aboutsummaryrefslogblamecommitdiffstats
path: root/prune.go
blob: 9c3c4ff40e25b6f8ba140f51b1e75bc46edfa127 (plain) (tree)
















































































































































                                                                                         
package git

import (
	"fmt"
	"time"

	"gopkg.in/src-d/go-git.v4/plumbing"
	"gopkg.in/src-d/go-git.v4/plumbing/filemode"
	"gopkg.in/src-d/go-git.v4/plumbing/object"
	"gopkg.in/src-d/go-git.v4/storage"
)

type PruneHandler func(unreferencedObjectHash plumbing.Hash) error
type PruneOptions struct {
	// OnlyObjectsOlderThan if set to non-zero value
	// selects only objects older than the time provided.
	OnlyObjectsOlderThan time.Time
	// Handler is called on matching objects
	Handler PruneHandler
}

// DeleteObject deletes an object from a repository.
// The type conveniently matches PruneHandler.
func (r *Repository) DeleteObject(hash plumbing.Hash) error {
	return r.Storer.DeleteLooseObject(hash)
}

func (r *Repository) Prune(opt PruneOptions) error {
	pw := &pruneWalker{
		Storer: r.Storer,
		seen:   map[plumbing.Hash]struct{}{},
	}
	// Walk over all the references in the repo.
	it, err := r.Storer.IterReferences()
	if err != nil {
		return nil
	}
	defer it.Close()
	err = it.ForEach(func(ref *plumbing.Reference) error {
		// Exit this iteration early for non-hash references.
		if ref.Type() != plumbing.HashReference {
			return nil
		}
		return pw.walkObjectTree(ref.Hash())
	})
	if err != nil {
		return err
	}
	// Now walk all (loose) objects in storage.
	err = r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error {
		// Get out if we have seen this object.
		if pw.isSeen(hash) {
			return nil
		}
		// Otherwise it is a candidate for pruning.
		// Check out for too new objects next.
		if opt.OnlyObjectsOlderThan != (time.Time{}) {
			// Errors here are non-fatal. The object may be e.g. packed.
			// Or concurrently deleted. Skip such objects.
			t, err := r.Storer.LooseObjectTime(hash)
			if err != nil {
				return nil
			}
			// Skip too new objects.
			if !t.Before(opt.OnlyObjectsOlderThan) {
				return nil
			}
		}
		return opt.Handler(hash)
	})
	if err != nil {
		return err
	}
	return nil
}

type pruneWalker struct {
	Storer storage.Storer
	// seen is the set of objects seen in the repo.
	// seen map can become huge if walking over large
	// repos. Thus using struct{} as the value type.
	seen map[plumbing.Hash]struct{}
}

func (p *pruneWalker) isSeen(hash plumbing.Hash) bool {
	_, seen := p.seen[hash]
	return seen
}

func (p *pruneWalker) add(hash plumbing.Hash) {
	p.seen[hash] = struct{}{}
}

// walkObjectTree walks over all objects and remembers references
// to them in the pruneWalker. This is used instead of the revlist
// walks because memory usage is tight with huge repos.
func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error {
	// Check if we have already seen, and mark this object
	if p.isSeen(hash) {
		return nil
	}
	p.add(hash)
	// Fetch the object.
	obj, err := object.GetObject(p.Storer, hash)
	if err != nil {
		return fmt.Errorf("Getting object %s failed: %v", hash, err)
	}
	// Walk all children depending on object type.
	switch obj := obj.(type) {
	case *object.Commit:
		err = p.walkObjectTree(obj.TreeHash)
		if err != nil {
			return err
		}
		for _, h := range obj.ParentHashes {
			err = p.walkObjectTree(h)
			if err != nil {
				return err
			}
		}
	case *object.Tree:
		for i := range obj.Entries {
			// Shortcut for blob objects:
			// 'or' the lower bits of a mode and check that it
			// it matches a filemode.Executable. The type information
			// is in the higher bits, but this is the cleanest way
			// to handle plain files with different modes.
			// Other non-tree objects are somewhat rare, so they
			// are not special-cased.
			if obj.Entries[i].Mode|0755 == filemode.Executable {
				p.add(obj.Entries[i].Hash)
				continue
			}
			// Normal walk for sub-trees (and symlinks etc).
			err = p.walkObjectTree(obj.Entries[i].Hash)
			if err != nil {
				return err
			}
		}
	default:
		// Error out on unhandled object types.
		return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj)
	}
	return nil
}