diff options
author | Taru Karttunen <taruti@taruti.net> | 2017-11-01 21:06:37 +0200 |
---|---|---|
committer | Jeremy Stribling <strib@alum.mit.edu> | 2017-11-29 10:40:46 -0800 |
commit | ac1914eac3c20efa63de8809229994364ad9639b (patch) | |
tree | 4595bfceec10aea603517e5dc7f493b93e068d30 | |
parent | a6202cacd12aa5ee20c54aff799b0e91330526c5 (diff) | |
download | go-git-ac1914eac3c20efa63de8809229994364ad9639b.tar.gz |
First pass of prune design
-rw-r--r-- | plumbing/storer/object.go | 14 | ||||
-rw-r--r-- | plumbing/storer/object_test.go | 13 | ||||
-rw-r--r-- | repository.go | 124 | ||||
-rw-r--r-- | storage/filesystem/internal/dotgit/dotgit.go | 48 | ||||
-rw-r--r-- | storage/filesystem/object.go | 24 | ||||
-rw-r--r-- | storage/memory/storage.go | 23 |
6 files changed, 236 insertions, 10 deletions
diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go index e793211..5d043cb 100644 --- a/plumbing/storer/object.go +++ b/plumbing/storer/object.go @@ -3,6 +3,7 @@ package storer import ( "errors" "io" + "time" "gopkg.in/src-d/go-git.v4/plumbing" ) @@ -36,6 +37,19 @@ type EncodedObjectStorer interface { // // Valid plumbing.ObjectType values are CommitObject, BlobObject, TagObject, IterEncodedObjects(plumbing.ObjectType) (EncodedObjectIter, error) + // HasEncodedObject returns ErrObjNotFound if the object doesn't + // exist. If the object does exist, it returns nil. + HasEncodedObject(plumbing.Hash) error + // ForEachObjectHash iterates over all the (loose) object hashes + // in the repository without necessarily having to read those objects. + // Objects only inside pack files may be omitted. + ForEachObjectHash(func(plumbing.Hash) error) error + // LooseObjectTime looks up the (m)time associated with the + // loose object (that is not in a pack file). Implementations + // may + LooseObjectTime(plumbing.Hash) (time.Time, error) + // DeleteLooseObject deletes a loose object if it exists. + DeleteLooseObject(plumbing.Hash) error } // DeltaObjectStorer is an EncodedObjectStorer that can return delta diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go index 6bdd25c..f8b1f73 100644 --- a/plumbing/storer/object_test.go +++ b/plumbing/storer/object_test.go @@ -3,6 +3,7 @@ package storer import ( "fmt" "testing" + "time" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git.v4/plumbing" @@ -148,3 +149,15 @@ func (o *MockObjectStorage) IterEncodedObjects(t plumbing.ObjectType) (EncodedOb func (o *MockObjectStorage) Begin() Transaction { return nil } + +func (o *MockObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { + return nil +} + +func (o *MockObjectStorage) LooseObjectTime(plumbing.Hash) (time.Time, error) { + return time.Time{}, plumbing.ErrObjectNotFound +} + +func (o *MockObjectStorage) DeleteLooseObject(plumbing.Hash) error { + return plumbing.ErrObjectNotFound +} diff --git a/repository.go b/repository.go index b159ff0..7079fd1 100644 --- a/repository.go +++ b/repository.go @@ -8,10 +8,12 @@ import ( "os" "path/filepath" "strings" + "time" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/internal/revision" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/filemode" "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage" @@ -1011,3 +1013,125 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err return &commit.Hash, nil } + +type PruneHandler func(unreferencedObjectHash plumbing.Hash) error +type PruneOptions struct { + // OnlyObjectsOlderThan if set to non-zero value + // selects only objects older than the time provided. + OnlyObjectsOlderThan time.Time + // Handler is called on matching objects + Handler PruneHandler +} + +// DeleteObject deletes an object from a repository. +// The type conveniently matches PruneHandler. +func (r *Repository) DeleteObject(hash plumbing.Hash) error { + return r.Storer.DeleteLooseObject(hash) +} + +func (r *Repository) Prune(opt PruneOptions) error { + pw := &pruneWalker{ + r: r, + seen: map[plumbing.Hash]struct{}{}, + } + // Walk over all the references in the repo. + it, err := r.Storer.IterReferences() + if err != nil { + return nil + } + defer it.Close() + err = it.ForEach(func(ref *plumbing.Reference) error { + // Exit this iteration early for non-hash references. + if ref.Type() != plumbing.HashReference { + return nil + } + return pw.walkObjectTree(ref.Hash()) + }) + if err != nil { + return err + } + // Now walk all (loose) objects in storage. + err = r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { + // Get out if we have seen this object. + if pw.isSeen(hash) { + return nil + } + // Otherwise it is a candidate for pruning. + // Check out for too new objects next. + if opt.OnlyObjectsOlderThan != (time.Time{}) { + // Errors here are non-fatal. The object may be e.g. packed. + // Or concurrently deleted. Skip such objects. + t, err := r.Storer.LooseObjectTime(hash) + if err != nil { + return nil + } + // Skip too new objects. + if !t.Before(opt.OnlyObjectsOlderThan) { + return nil + } + } + return opt.Handler(hash) + }) + if err != nil { + return err + } + return nil +} + +type pruneWalker struct { + r *Repository + seen map[plumbing.Hash]struct{} +} + +func (p *pruneWalker) isSeen(hash plumbing.Hash) bool { + _, seen := p.seen[hash] + return seen +} + +func (p *pruneWalker) add(hash plumbing.Hash) { + p.seen[hash] = struct{}{} +} + +func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error { + // Check if we have already seen, and mark this object + if p.isSeen(hash) { + return nil + } + p.add(hash) + // Fetch the object. + obj, err := object.GetObject(p.r.Storer, hash) + if err != nil { + return fmt.Errorf("Getting object %s failed: %v", hash, err) + } + // Walk all children depending on object type. + switch obj := obj.(type) { + case *object.Commit: + err = p.walkObjectTree(obj.TreeHash) + if err != nil { + return err + } + for _, h := range obj.ParentHashes { + err = p.walkObjectTree(h) + if err != nil { + return err + } + } + case *object.Tree: + for i := range obj.Entries { + // Shortcut for blob objects: + if obj.Entries[i].Mode|0755 == filemode.Executable { + p.add(obj.Entries[i].Hash) + continue + } + // Normal walk for sub-trees (and symlinks etc). + err = p.walkObjectTree(obj.Entries[i].Hash) + if err != nil { + return err + } + } + default: + // Error out on unhandled object types. + return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) + } + return nil +} diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 9d49945..83fe159 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -205,39 +205,67 @@ func (d *DotGit) NewObject() (*ObjectWriter, error) { // Objects returns a slice with the hashes of objects found under the // .git/objects/ directory. func (d *DotGit) Objects() ([]plumbing.Hash, error) { + var objects []plumbing.Hash + err := d.ForEachObjectHash(func(hash plumbing.Hash) error { + objects = append(objects, hash) + return nil + }) + if err != nil { + return nil, err + } + return objects, nil +} + +// Objects returns a slice with the hashes of objects found under the +// .git/objects/ directory. +func (d *DotGit) ForEachObjectHash(fun func(plumbing.Hash) error) error { files, err := d.fs.ReadDir(objectsPath) if err != nil { if os.IsNotExist(err) { - return nil, nil + return nil } - return nil, err + return err } - var objects []plumbing.Hash for _, f := range files { if f.IsDir() && len(f.Name()) == 2 && isHex(f.Name()) { base := f.Name() d, err := d.fs.ReadDir(d.fs.Join(objectsPath, base)) if err != nil { - return nil, err + return err } for _, o := range d { - objects = append(objects, plumbing.NewHash(base+o.Name())) + err = fun(plumbing.NewHash(base + o.Name())) + if err != nil { + return err + } } } } - return objects, nil + return nil } -// Object return a fs.File pointing the object file, if exists -func (d *DotGit) Object(h plumbing.Hash) (billy.File, error) { +func (d *DotGit) objectPath(h plumbing.Hash) string { hash := h.String() - file := d.fs.Join(objectsPath, hash[0:2], hash[2:40]) + return d.fs.Join(objectsPath, hash[0:2], hash[2:40]) +} + +// Object returns a fs.File pointing the object file, if exists +func (d *DotGit) Object(h plumbing.Hash) (billy.File, error) { + return d.fs.Open(d.objectPath(h)) +} + +// ObjectStat returns a os.FileInfo pointing the object file, if exists +func (d *DotGit) ObjectStat(h plumbing.Hash) (os.FileInfo, error) { + return d.fs.Stat(d.objectPath(h)) +} - return d.fs.Open(file) +// ObjectDelete removes the object file, if exists +func (d *DotGit) ObjectDelete(h plumbing.Hash) error { + return d.fs.Remove(d.objectPath(h)) } func (d *DotGit) readReferenceFrom(rd io.Reader, name string) (ref *plumbing.Reference, err error) { diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 9690c0e..9b2790f 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -3,6 +3,7 @@ package filesystem import ( "io" "os" + "time" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" @@ -479,3 +480,26 @@ func hashListAsMap(l []plumbing.Hash) map[plumbing.Hash]bool { return m } + +func (s *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { + err := s.dir.ForEachObjectHash(fun) + if err != nil { + if err == storer.ErrStop { + return nil + } + return err + } + return nil +} + +func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) { + fi, err := s.dir.ObjectStat(hash) + if err != nil { + return time.Time{}, err + } + return fi.ModTime(), nil +} + +func (s *ObjectStorage) DeleteLooseObject(hash plumbing.Hash) error { + return s.dir.ObjectDelete(hash) +} diff --git a/storage/memory/storage.go b/storage/memory/storage.go index 3d4e84a..bfcad34 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -3,6 +3,7 @@ package memory import ( "fmt" + "time" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/plumbing" @@ -156,6 +157,28 @@ func (o *ObjectStorage) Begin() storer.Transaction { } } +func (o *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { + for h, _ := range o.Objects { + err := fun(h) + if err != nil { + if err == storer.ErrStop { + return nil + } + return err + } + } + return nil +} + +var errNotSupported = fmt.Errorf("Not supported") + +func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) { + return time.Time{}, errNotSupported +} +func (s *ObjectStorage) DeleteLooseObject(plumbing.Hash) error { + return errNotSupported +} + type TxObjectStorage struct { Storage *ObjectStorage Objects map[plumbing.Hash]plumbing.EncodedObject |