From d68f45f8aaca461167907c07e8c161be14e87157 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Fri, 25 Aug 2017 13:52:07 -0700 Subject: plumbing: use `seen` map in tree walker This helps avoids iterating down the same trees for every commit. For a big-ish repo with 35K objects (17K commits), this reduced the time for calling `revlist.Objects` during a push (with 0 hashes to ignore) from more than ten minutes to less than a minute. --- plumbing/object/file.go | 2 +- plumbing/object/tree.go | 16 +++++++++++----- plumbing/object/tree_test.go | 32 +++++++++++++++++++++++++++++--- plumbing/object/treenoder.go | 2 +- plumbing/revlist/revlist.go | 2 +- 5 files changed, 43 insertions(+), 11 deletions(-) diff --git a/plumbing/object/file.go b/plumbing/object/file.go index 79f57fe..40b5206 100644 --- a/plumbing/object/file.go +++ b/plumbing/object/file.go @@ -81,7 +81,7 @@ type FileIter struct { // NewFileIter takes a storer.EncodedObjectStorer and a Tree and returns a // *FileIter that iterates over all files contained in the tree, recursively. func NewFileIter(s storer.EncodedObjectStorer, t *Tree) *FileIter { - return &FileIter{s: s, w: *NewTreeWalker(t, true)} + return &FileIter{s: s, w: *NewTreeWalker(t, true, nil)} } // Next moves the iterator to the next file and returns a pointer to it. If diff --git a/plumbing/object/tree.go b/plumbing/object/tree.go index 512db9f..44ac720 100644 --- a/plumbing/object/tree.go +++ b/plumbing/object/tree.go @@ -297,9 +297,10 @@ func (iter *treeEntryIter) Next() (TreeEntry, error) { // TreeWalker provides a means of walking through all of the entries in a Tree. type TreeWalker struct { - stack []treeEntryIter + stack []*treeEntryIter base string recursive bool + seen map[plumbing.Hash]bool s storer.EncodedObjectStorer t *Tree @@ -309,13 +310,14 @@ type TreeWalker struct { // // It is the caller's responsibility to call Close() when finished with the // tree walker. -func NewTreeWalker(t *Tree, recursive bool) *TreeWalker { - stack := make([]treeEntryIter, 0, startingStackSize) - stack = append(stack, treeEntryIter{t, 0}) +func NewTreeWalker(t *Tree, recursive bool, seen map[plumbing.Hash]bool) *TreeWalker { + stack := make([]*treeEntryIter, 0, startingStackSize) + stack = append(stack, &treeEntryIter{t, 0}) return &TreeWalker{ stack: stack, recursive: recursive, + seen: seen, s: t.s, t: t, @@ -358,6 +360,10 @@ func (w *TreeWalker) Next() (name string, entry TreeEntry, err error) { return } + if w.seen[entry.Hash] { + continue + } + if entry.Mode == filemode.Dir { obj, err = GetTree(w.s, entry.Hash) } @@ -377,7 +383,7 @@ func (w *TreeWalker) Next() (name string, entry TreeEntry, err error) { } if t, ok := obj.(*Tree); ok { - w.stack = append(w.stack, treeEntryIter{t, 0}) + w.stack = append(w.stack, &treeEntryIter{t, 0}) w.base = path.Join(w.base, entry.Name) } diff --git a/plumbing/object/tree_test.go b/plumbing/object/tree_test.go index aa86517..796d979 100644 --- a/plumbing/object/tree_test.go +++ b/plumbing/object/tree_test.go @@ -228,7 +228,7 @@ func (s *TreeSuite) TestTreeWalkerNext(c *C) { tree, err := commit.Tree() c.Assert(err, IsNil) - walker := NewTreeWalker(tree, true) + walker := NewTreeWalker(tree, true, nil) for _, e := range treeWalkerExpects { name, entry, err := walker.Next() if err == io.EOF { @@ -245,13 +245,39 @@ func (s *TreeSuite) TestTreeWalkerNext(c *C) { } } +func (s *TreeSuite) TestTreeWalkerNextSkipSeen(c *C) { + commit, err := GetCommit(s.Storer, plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")) + c.Assert(err, IsNil) + tree, err := commit.Tree() + c.Assert(err, IsNil) + + seen := map[plumbing.Hash]bool{ + plumbing.NewHash(treeWalkerExpects[0].Hash): true, + } + walker := NewTreeWalker(tree, true, seen) + for _, e := range treeWalkerExpects[1:] { + name, entry, err := walker.Next() + if err == io.EOF { + break + } + + c.Assert(err, IsNil) + c.Assert(name, Equals, e.Path) + c.Assert(entry.Name, Equals, e.Name) + c.Assert(entry.Mode, Equals, e.Mode) + c.Assert(entry.Hash.String(), Equals, e.Hash) + + c.Assert(walker.Tree().ID().String(), Equals, e.Tree) + } +} + func (s *TreeSuite) TestTreeWalkerNextNonRecursive(c *C) { commit := s.commit(c, plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")) tree, err := commit.Tree() c.Assert(err, IsNil) var count int - walker := NewTreeWalker(tree, false) + walker := NewTreeWalker(tree, false, nil) for { name, entry, err := walker.Next() if err == io.EOF { @@ -290,7 +316,7 @@ func (s *TreeSuite) TestTreeWalkerNextSubmodule(c *C) { } var count int - walker := NewTreeWalker(tree, true) + walker := NewTreeWalker(tree, true, nil) defer walker.Close() for { diff --git a/plumbing/object/treenoder.go b/plumbing/object/treenoder.go index bd65abc..52f0e61 100644 --- a/plumbing/object/treenoder.go +++ b/plumbing/object/treenoder.go @@ -99,7 +99,7 @@ func transformChildren(t *Tree) ([]noder.Noder, error) { // is bigger than needed. ret := make([]noder.Noder, 0, len(t.Entries)) - walker := NewTreeWalker(t, false) // don't recurse + walker := NewTreeWalker(t, false, nil) // don't recurse // don't defer walker.Close() for efficiency reasons. for { _, e, err = walker.Next() diff --git a/plumbing/revlist/revlist.go b/plumbing/revlist/revlist.go index f56cf28..5b2ff99 100644 --- a/plumbing/revlist/revlist.go +++ b/plumbing/revlist/revlist.go @@ -137,7 +137,7 @@ func iterateCommitTrees( cb(tree.Hash) - treeWalker := object.NewTreeWalker(tree, true) + treeWalker := object.NewTreeWalker(tree, true, seen) for { _, e, err := treeWalker.Next() -- cgit