aboutsummaryrefslogtreecommitdiffstats
path: root/object_walker.go
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2017-11-21 17:33:15 +0200
committerJeremy Stribling <strib@alum.mit.edu>2017-11-29 11:01:41 -0800
commitf28e4477dfe49a36dbd55027f8d1133c324bdac5 (patch)
tree2b3bb5031fc9c6658dfe650bf14d0ffcab85abff /object_walker.go
parent9dcb096416b6ad16994763fcbc029bcfa95730e8 (diff)
downloadgo-git-f28e4477dfe49a36dbd55027f8d1133c324bdac5.tar.gz
Make prune object walker generic
Diffstat (limited to 'object_walker.go')
-rw-r--r--object_walker.go105
1 files changed, 105 insertions, 0 deletions
diff --git a/object_walker.go b/object_walker.go
new file mode 100644
index 0000000..8bae1fa
--- /dev/null
+++ b/object_walker.go
@@ -0,0 +1,105 @@
+package git
+
+import (
+ "fmt"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/filemode"
+ "gopkg.in/src-d/go-git.v4/plumbing/object"
+ "gopkg.in/src-d/go-git.v4/storage"
+)
+
+type objectWalker struct {
+ Storer storage.Storer
+ // seen is the set of objects seen in the repo.
+ // seen map can become huge if walking over large
+ // repos. Thus using struct{} as the value type.
+ seen map[plumbing.Hash]struct{}
+}
+
+func newObjectWalker(s storage.Storer) *objectWalker {
+ return &objectWalker{s, map[plumbing.Hash]struct{}{}}
+}
+
+// walkAllRefs walks all (hash) refererences from the repo.
+func (p *objectWalker) walkAllRefs() error {
+ // Walk over all the references in the repo.
+ it, err := p.Storer.IterReferences()
+ if err != nil {
+ return err
+ }
+ defer it.Close()
+ err = it.ForEach(func(ref *plumbing.Reference) error {
+ // Exit this iteration early for non-hash references.
+ if ref.Type() != plumbing.HashReference {
+ return nil
+ }
+ return p.walkObjectTree(ref.Hash())
+ })
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+func (p *objectWalker) isSeen(hash plumbing.Hash) bool {
+ _, seen := p.seen[hash]
+ return seen
+}
+
+func (p *objectWalker) add(hash plumbing.Hash) {
+ p.seen[hash] = struct{}{}
+}
+
+// walkObjectTree walks over all objects and remembers references
+// to them in the objectWalker. This is used instead of the revlist
+// walks because memory usage is tight with huge repos.
+func (p *objectWalker) walkObjectTree(hash plumbing.Hash) error {
+ // Check if we have already seen, and mark this object
+ if p.isSeen(hash) {
+ return nil
+ }
+ p.add(hash)
+ // Fetch the object.
+ obj, err := object.GetObject(p.Storer, hash)
+ if err != nil {
+ return fmt.Errorf("Getting object %s failed: %v", hash, err)
+ }
+ // Walk all children depending on object type.
+ switch obj := obj.(type) {
+ case *object.Commit:
+ err = p.walkObjectTree(obj.TreeHash)
+ if err != nil {
+ return err
+ }
+ for _, h := range obj.ParentHashes {
+ err = p.walkObjectTree(h)
+ if err != nil {
+ return err
+ }
+ }
+ case *object.Tree:
+ for i := range obj.Entries {
+ // Shortcut for blob objects:
+ // 'or' the lower bits of a mode and check that it
+ // it matches a filemode.Executable. The type information
+ // is in the higher bits, but this is the cleanest way
+ // to handle plain files with different modes.
+ // Other non-tree objects are somewhat rare, so they
+ // are not special-cased.
+ if obj.Entries[i].Mode|0755 == filemode.Executable {
+ p.add(obj.Entries[i].Hash)
+ continue
+ }
+ // Normal walk for sub-trees (and symlinks etc).
+ err = p.walkObjectTree(obj.Entries[i].Hash)
+ if err != nil {
+ return err
+ }
+ }
+ default:
+ // Error out on unhandled object types.
+ return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj)
+ }
+ return nil
+}