aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2017-11-06 13:30:42 +0200
committerJeremy Stribling <strib@alum.mit.edu>2017-11-29 10:41:19 -0800
commit3f0b1ff37b64108cfed1b57ea4ae1f1566592905 (patch)
treee27f0a736634febf14d9186ea6bdb11a897bd9f5
parentac1914eac3c20efa63de8809229994364ad9639b (diff)
downloadgo-git-3f0b1ff37b64108cfed1b57ea4ae1f1566592905.tar.gz
Address CI and move code around
-rw-r--r--plumbing/storer/object.go6
-rw-r--r--prune.go145
-rw-r--r--repository.go124
3 files changed, 149 insertions, 126 deletions
diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go
index 5d043cb..bd34be8 100644
--- a/plumbing/storer/object.go
+++ b/plumbing/storer/object.go
@@ -43,10 +43,12 @@ type EncodedObjectStorer interface {
// ForEachObjectHash iterates over all the (loose) object hashes
// in the repository without necessarily having to read those objects.
// Objects only inside pack files may be omitted.
+ // If ErrStop is sent the iteration is stop but no error is returned.
ForEachObjectHash(func(plumbing.Hash) error) error
// LooseObjectTime looks up the (m)time associated with the
- // loose object (that is not in a pack file). Implementations
- // may
+ // loose object (that is not in a pack file). Some
+ // implementations (e.g. without loose objects)
+ // always return an error.
LooseObjectTime(plumbing.Hash) (time.Time, error)
// DeleteLooseObject deletes a loose object if it exists.
DeleteLooseObject(plumbing.Hash) error
diff --git a/prune.go b/prune.go
new file mode 100644
index 0000000..9c3c4ff
--- /dev/null
+++ b/prune.go
@@ -0,0 +1,145 @@
+package git
+
+import (
+ "fmt"
+ "time"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/filemode"
+ "gopkg.in/src-d/go-git.v4/plumbing/object"
+ "gopkg.in/src-d/go-git.v4/storage"
+)
+
+type PruneHandler func(unreferencedObjectHash plumbing.Hash) error
+type PruneOptions struct {
+ // OnlyObjectsOlderThan if set to non-zero value
+ // selects only objects older than the time provided.
+ OnlyObjectsOlderThan time.Time
+ // Handler is called on matching objects
+ Handler PruneHandler
+}
+
+// DeleteObject deletes an object from a repository.
+// The type conveniently matches PruneHandler.
+func (r *Repository) DeleteObject(hash plumbing.Hash) error {
+ return r.Storer.DeleteLooseObject(hash)
+}
+
+func (r *Repository) Prune(opt PruneOptions) error {
+ pw := &pruneWalker{
+ Storer: r.Storer,
+ seen: map[plumbing.Hash]struct{}{},
+ }
+ // Walk over all the references in the repo.
+ it, err := r.Storer.IterReferences()
+ if err != nil {
+ return nil
+ }
+ defer it.Close()
+ err = it.ForEach(func(ref *plumbing.Reference) error {
+ // Exit this iteration early for non-hash references.
+ if ref.Type() != plumbing.HashReference {
+ return nil
+ }
+ return pw.walkObjectTree(ref.Hash())
+ })
+ if err != nil {
+ return err
+ }
+ // Now walk all (loose) objects in storage.
+ err = r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error {
+ // Get out if we have seen this object.
+ if pw.isSeen(hash) {
+ return nil
+ }
+ // Otherwise it is a candidate for pruning.
+ // Check out for too new objects next.
+ if opt.OnlyObjectsOlderThan != (time.Time{}) {
+ // Errors here are non-fatal. The object may be e.g. packed.
+ // Or concurrently deleted. Skip such objects.
+ t, err := r.Storer.LooseObjectTime(hash)
+ if err != nil {
+ return nil
+ }
+ // Skip too new objects.
+ if !t.Before(opt.OnlyObjectsOlderThan) {
+ return nil
+ }
+ }
+ return opt.Handler(hash)
+ })
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+type pruneWalker struct {
+ Storer storage.Storer
+ // seen is the set of objects seen in the repo.
+ // seen map can become huge if walking over large
+ // repos. Thus using struct{} as the value type.
+ seen map[plumbing.Hash]struct{}
+}
+
+func (p *pruneWalker) isSeen(hash plumbing.Hash) bool {
+ _, seen := p.seen[hash]
+ return seen
+}
+
+func (p *pruneWalker) add(hash plumbing.Hash) {
+ p.seen[hash] = struct{}{}
+}
+
+// walkObjectTree walks over all objects and remembers references
+// to them in the pruneWalker. This is used instead of the revlist
+// walks because memory usage is tight with huge repos.
+func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error {
+ // Check if we have already seen, and mark this object
+ if p.isSeen(hash) {
+ return nil
+ }
+ p.add(hash)
+ // Fetch the object.
+ obj, err := object.GetObject(p.Storer, hash)
+ if err != nil {
+ return fmt.Errorf("Getting object %s failed: %v", hash, err)
+ }
+ // Walk all children depending on object type.
+ switch obj := obj.(type) {
+ case *object.Commit:
+ err = p.walkObjectTree(obj.TreeHash)
+ if err != nil {
+ return err
+ }
+ for _, h := range obj.ParentHashes {
+ err = p.walkObjectTree(h)
+ if err != nil {
+ return err
+ }
+ }
+ case *object.Tree:
+ for i := range obj.Entries {
+ // Shortcut for blob objects:
+ // 'or' the lower bits of a mode and check that it
+ // it matches a filemode.Executable. The type information
+ // is in the higher bits, but this is the cleanest way
+ // to handle plain files with different modes.
+ // Other non-tree objects are somewhat rare, so they
+ // are not special-cased.
+ if obj.Entries[i].Mode|0755 == filemode.Executable {
+ p.add(obj.Entries[i].Hash)
+ continue
+ }
+ // Normal walk for sub-trees (and symlinks etc).
+ err = p.walkObjectTree(obj.Entries[i].Hash)
+ if err != nil {
+ return err
+ }
+ }
+ default:
+ // Error out on unhandled object types.
+ return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj)
+ }
+ return nil
+}
diff --git a/repository.go b/repository.go
index 7079fd1..b159ff0 100644
--- a/repository.go
+++ b/repository.go
@@ -8,12 +8,10 @@ import (
"os"
"path/filepath"
"strings"
- "time"
"gopkg.in/src-d/go-git.v4/config"
"gopkg.in/src-d/go-git.v4/internal/revision"
"gopkg.in/src-d/go-git.v4/plumbing"
- "gopkg.in/src-d/go-git.v4/plumbing/filemode"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
"gopkg.in/src-d/go-git.v4/storage"
@@ -1013,125 +1011,3 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err
return &commit.Hash, nil
}
-
-type PruneHandler func(unreferencedObjectHash plumbing.Hash) error
-type PruneOptions struct {
- // OnlyObjectsOlderThan if set to non-zero value
- // selects only objects older than the time provided.
- OnlyObjectsOlderThan time.Time
- // Handler is called on matching objects
- Handler PruneHandler
-}
-
-// DeleteObject deletes an object from a repository.
-// The type conveniently matches PruneHandler.
-func (r *Repository) DeleteObject(hash plumbing.Hash) error {
- return r.Storer.DeleteLooseObject(hash)
-}
-
-func (r *Repository) Prune(opt PruneOptions) error {
- pw := &pruneWalker{
- r: r,
- seen: map[plumbing.Hash]struct{}{},
- }
- // Walk over all the references in the repo.
- it, err := r.Storer.IterReferences()
- if err != nil {
- return nil
- }
- defer it.Close()
- err = it.ForEach(func(ref *plumbing.Reference) error {
- // Exit this iteration early for non-hash references.
- if ref.Type() != plumbing.HashReference {
- return nil
- }
- return pw.walkObjectTree(ref.Hash())
- })
- if err != nil {
- return err
- }
- // Now walk all (loose) objects in storage.
- err = r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error {
- // Get out if we have seen this object.
- if pw.isSeen(hash) {
- return nil
- }
- // Otherwise it is a candidate for pruning.
- // Check out for too new objects next.
- if opt.OnlyObjectsOlderThan != (time.Time{}) {
- // Errors here are non-fatal. The object may be e.g. packed.
- // Or concurrently deleted. Skip such objects.
- t, err := r.Storer.LooseObjectTime(hash)
- if err != nil {
- return nil
- }
- // Skip too new objects.
- if !t.Before(opt.OnlyObjectsOlderThan) {
- return nil
- }
- }
- return opt.Handler(hash)
- })
- if err != nil {
- return err
- }
- return nil
-}
-
-type pruneWalker struct {
- r *Repository
- seen map[plumbing.Hash]struct{}
-}
-
-func (p *pruneWalker) isSeen(hash plumbing.Hash) bool {
- _, seen := p.seen[hash]
- return seen
-}
-
-func (p *pruneWalker) add(hash plumbing.Hash) {
- p.seen[hash] = struct{}{}
-}
-
-func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error {
- // Check if we have already seen, and mark this object
- if p.isSeen(hash) {
- return nil
- }
- p.add(hash)
- // Fetch the object.
- obj, err := object.GetObject(p.r.Storer, hash)
- if err != nil {
- return fmt.Errorf("Getting object %s failed: %v", hash, err)
- }
- // Walk all children depending on object type.
- switch obj := obj.(type) {
- case *object.Commit:
- err = p.walkObjectTree(obj.TreeHash)
- if err != nil {
- return err
- }
- for _, h := range obj.ParentHashes {
- err = p.walkObjectTree(h)
- if err != nil {
- return err
- }
- }
- case *object.Tree:
- for i := range obj.Entries {
- // Shortcut for blob objects:
- if obj.Entries[i].Mode|0755 == filemode.Executable {
- p.add(obj.Entries[i].Hash)
- continue
- }
- // Normal walk for sub-trees (and symlinks etc).
- err = p.walkObjectTree(obj.Entries[i].Hash)
- if err != nil {
- return err
- }
- }
- default:
- // Error out on unhandled object types.
- return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj)
- }
- return nil
-}