diff options
Diffstat (limited to 'object_walker.go')
-rw-r--r-- | object_walker.go | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/object_walker.go b/object_walker.go new file mode 100644 index 0000000..8bae1fa --- /dev/null +++ b/object_walker.go @@ -0,0 +1,105 @@ +package git + +import ( + "fmt" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/filemode" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/storage" +) + +type objectWalker struct { + Storer storage.Storer + // seen is the set of objects seen in the repo. + // seen map can become huge if walking over large + // repos. Thus using struct{} as the value type. + seen map[plumbing.Hash]struct{} +} + +func newObjectWalker(s storage.Storer) *objectWalker { + return &objectWalker{s, map[plumbing.Hash]struct{}{}} +} + +// walkAllRefs walks all (hash) refererences from the repo. +func (p *objectWalker) walkAllRefs() error { + // Walk over all the references in the repo. + it, err := p.Storer.IterReferences() + if err != nil { + return err + } + defer it.Close() + err = it.ForEach(func(ref *plumbing.Reference) error { + // Exit this iteration early for non-hash references. + if ref.Type() != plumbing.HashReference { + return nil + } + return p.walkObjectTree(ref.Hash()) + }) + if err != nil { + return err + } + return nil +} + +func (p *objectWalker) isSeen(hash plumbing.Hash) bool { + _, seen := p.seen[hash] + return seen +} + +func (p *objectWalker) add(hash plumbing.Hash) { + p.seen[hash] = struct{}{} +} + +// walkObjectTree walks over all objects and remembers references +// to them in the objectWalker. This is used instead of the revlist +// walks because memory usage is tight with huge repos. +func (p *objectWalker) walkObjectTree(hash plumbing.Hash) error { + // Check if we have already seen, and mark this object + if p.isSeen(hash) { + return nil + } + p.add(hash) + // Fetch the object. + obj, err := object.GetObject(p.Storer, hash) + if err != nil { + return fmt.Errorf("Getting object %s failed: %v", hash, err) + } + // Walk all children depending on object type. + switch obj := obj.(type) { + case *object.Commit: + err = p.walkObjectTree(obj.TreeHash) + if err != nil { + return err + } + for _, h := range obj.ParentHashes { + err = p.walkObjectTree(h) + if err != nil { + return err + } + } + case *object.Tree: + for i := range obj.Entries { + // Shortcut for blob objects: + // 'or' the lower bits of a mode and check that it + // it matches a filemode.Executable. The type information + // is in the higher bits, but this is the cleanest way + // to handle plain files with different modes. + // Other non-tree objects are somewhat rare, so they + // are not special-cased. + if obj.Entries[i].Mode|0755 == filemode.Executable { + p.add(obj.Entries[i].Hash) + continue + } + // Normal walk for sub-trees (and symlinks etc). + err = p.walkObjectTree(obj.Entries[i].Hash) + if err != nil { + return err + } + } + default: + // Error out on unhandled object types. + return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) + } + return nil +} |