aboutsummaryrefslogblamecommitdiffstats
path: root/object_walker.go
blob: 8bae1faea607e4ae281a11fd99f2aa2763572aa5 (plain) (tree)








































































































                                                                                         
package git

import (
	"fmt"

	"gopkg.in/src-d/go-git.v4/plumbing"
	"gopkg.in/src-d/go-git.v4/plumbing/filemode"
	"gopkg.in/src-d/go-git.v4/plumbing/object"
	"gopkg.in/src-d/go-git.v4/storage"
)

type objectWalker struct {
	Storer storage.Storer
	// seen is the set of objects seen in the repo.
	// seen map can become huge if walking over large
	// repos. Thus using struct{} as the value type.
	seen map[plumbing.Hash]struct{}
}

func newObjectWalker(s storage.Storer) *objectWalker {
	return &objectWalker{s, map[plumbing.Hash]struct{}{}}
}

// walkAllRefs walks all (hash) refererences from the repo.
func (p *objectWalker) walkAllRefs() error {
	// Walk over all the references in the repo.
	it, err := p.Storer.IterReferences()
	if err != nil {
		return err
	}
	defer it.Close()
	err = it.ForEach(func(ref *plumbing.Reference) error {
		// Exit this iteration early for non-hash references.
		if ref.Type() != plumbing.HashReference {
			return nil
		}
		return p.walkObjectTree(ref.Hash())
	})
	if err != nil {
		return err
	}
	return nil
}

func (p *objectWalker) isSeen(hash plumbing.Hash) bool {
	_, seen := p.seen[hash]
	return seen
}

func (p *objectWalker) add(hash plumbing.Hash) {
	p.seen[hash] = struct{}{}
}

// walkObjectTree walks over all objects and remembers references
// to them in the objectWalker. This is used instead of the revlist
// walks because memory usage is tight with huge repos.
func (p *objectWalker) walkObjectTree(hash plumbing.Hash) error {
	// Check if we have already seen, and mark this object
	if p.isSeen(hash) {
		return nil
	}
	p.add(hash)
	// Fetch the object.
	obj, err := object.GetObject(p.Storer, hash)
	if err != nil {
		return fmt.Errorf("Getting object %s failed: %v", hash, err)
	}
	// Walk all children depending on object type.
	switch obj := obj.(type) {
	case *object.Commit:
		err = p.walkObjectTree(obj.TreeHash)
		if err != nil {
			return err
		}
		for _, h := range obj.ParentHashes {
			err = p.walkObjectTree(h)
			if err != nil {
				return err
			}
		}
	case *object.Tree:
		for i := range obj.Entries {
			// Shortcut for blob objects:
			// 'or' the lower bits of a mode and check that it
			// it matches a filemode.Executable. The type information
			// is in the higher bits, but this is the cleanest way
			// to handle plain files with different modes.
			// Other non-tree objects are somewhat rare, so they
			// are not special-cased.
			if obj.Entries[i].Mode|0755 == filemode.Executable {
				p.add(obj.Entries[i].Hash)
				continue
			}
			// Normal walk for sub-trees (and symlinks etc).
			err = p.walkObjectTree(obj.Entries[i].Hash)
			if err != nil {
				return err
			}
		}
	default:
		// Error out on unhandled object types.
		return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj)
	}
	return nil
}