From 3f0b1ff37b64108cfed1b57ea4ae1f1566592905 Mon Sep 17 00:00:00 2001 From: Taru Karttunen Date: Mon, 6 Nov 2017 13:30:42 +0200 Subject: Address CI and move code around --- prune.go | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 prune.go (limited to 'prune.go') diff --git a/prune.go b/prune.go new file mode 100644 index 0000000..9c3c4ff --- /dev/null +++ b/prune.go @@ -0,0 +1,145 @@ +package git + +import ( + "fmt" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/filemode" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/storage" +) + +type PruneHandler func(unreferencedObjectHash plumbing.Hash) error +type PruneOptions struct { + // OnlyObjectsOlderThan if set to non-zero value + // selects only objects older than the time provided. + OnlyObjectsOlderThan time.Time + // Handler is called on matching objects + Handler PruneHandler +} + +// DeleteObject deletes an object from a repository. +// The type conveniently matches PruneHandler. +func (r *Repository) DeleteObject(hash plumbing.Hash) error { + return r.Storer.DeleteLooseObject(hash) +} + +func (r *Repository) Prune(opt PruneOptions) error { + pw := &pruneWalker{ + Storer: r.Storer, + seen: map[plumbing.Hash]struct{}{}, + } + // Walk over all the references in the repo. + it, err := r.Storer.IterReferences() + if err != nil { + return nil + } + defer it.Close() + err = it.ForEach(func(ref *plumbing.Reference) error { + // Exit this iteration early for non-hash references. + if ref.Type() != plumbing.HashReference { + return nil + } + return pw.walkObjectTree(ref.Hash()) + }) + if err != nil { + return err + } + // Now walk all (loose) objects in storage. + err = r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { + // Get out if we have seen this object. + if pw.isSeen(hash) { + return nil + } + // Otherwise it is a candidate for pruning. + // Check out for too new objects next. + if opt.OnlyObjectsOlderThan != (time.Time{}) { + // Errors here are non-fatal. The object may be e.g. packed. + // Or concurrently deleted. Skip such objects. + t, err := r.Storer.LooseObjectTime(hash) + if err != nil { + return nil + } + // Skip too new objects. + if !t.Before(opt.OnlyObjectsOlderThan) { + return nil + } + } + return opt.Handler(hash) + }) + if err != nil { + return err + } + return nil +} + +type pruneWalker struct { + Storer storage.Storer + // seen is the set of objects seen in the repo. + // seen map can become huge if walking over large + // repos. Thus using struct{} as the value type. + seen map[plumbing.Hash]struct{} +} + +func (p *pruneWalker) isSeen(hash plumbing.Hash) bool { + _, seen := p.seen[hash] + return seen +} + +func (p *pruneWalker) add(hash plumbing.Hash) { + p.seen[hash] = struct{}{} +} + +// walkObjectTree walks over all objects and remembers references +// to them in the pruneWalker. This is used instead of the revlist +// walks because memory usage is tight with huge repos. +func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error { + // Check if we have already seen, and mark this object + if p.isSeen(hash) { + return nil + } + p.add(hash) + // Fetch the object. + obj, err := object.GetObject(p.Storer, hash) + if err != nil { + return fmt.Errorf("Getting object %s failed: %v", hash, err) + } + // Walk all children depending on object type. + switch obj := obj.(type) { + case *object.Commit: + err = p.walkObjectTree(obj.TreeHash) + if err != nil { + return err + } + for _, h := range obj.ParentHashes { + err = p.walkObjectTree(h) + if err != nil { + return err + } + } + case *object.Tree: + for i := range obj.Entries { + // Shortcut for blob objects: + // 'or' the lower bits of a mode and check that it + // it matches a filemode.Executable. The type information + // is in the higher bits, but this is the cleanest way + // to handle plain files with different modes. + // Other non-tree objects are somewhat rare, so they + // are not special-cased. + if obj.Entries[i].Mode|0755 == filemode.Executable { + p.add(obj.Entries[i].Hash) + continue + } + // Normal walk for sub-trees (and symlinks etc). + err = p.walkObjectTree(obj.Entries[i].Hash) + if err != nil { + return err + } + } + default: + // Error out on unhandled object types. + return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) + } + return nil +} -- cgit From f28e4477dfe49a36dbd55027f8d1133c324bdac5 Mon Sep 17 00:00:00 2001 From: Taru Karttunen Date: Tue, 21 Nov 2017 17:33:15 +0200 Subject: Make prune object walker generic --- prune.go | 93 ++-------------------------------------------------------------- 1 file changed, 2 insertions(+), 91 deletions(-) (limited to 'prune.go') diff --git a/prune.go b/prune.go index 9c3c4ff..fce3bfd 100644 --- a/prune.go +++ b/prune.go @@ -1,13 +1,9 @@ package git import ( - "fmt" "time" "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/filemode" - "gopkg.in/src-d/go-git.v4/plumbing/object" - "gopkg.in/src-d/go-git.v4/storage" ) type PruneHandler func(unreferencedObjectHash plumbing.Hash) error @@ -26,23 +22,8 @@ func (r *Repository) DeleteObject(hash plumbing.Hash) error { } func (r *Repository) Prune(opt PruneOptions) error { - pw := &pruneWalker{ - Storer: r.Storer, - seen: map[plumbing.Hash]struct{}{}, - } - // Walk over all the references in the repo. - it, err := r.Storer.IterReferences() - if err != nil { - return nil - } - defer it.Close() - err = it.ForEach(func(ref *plumbing.Reference) error { - // Exit this iteration early for non-hash references. - if ref.Type() != plumbing.HashReference { - return nil - } - return pw.walkObjectTree(ref.Hash()) - }) + pw := newObjectWalker(r.Storer) + err := pw.walkAllRefs() if err != nil { return err } @@ -73,73 +54,3 @@ func (r *Repository) Prune(opt PruneOptions) error { } return nil } - -type pruneWalker struct { - Storer storage.Storer - // seen is the set of objects seen in the repo. - // seen map can become huge if walking over large - // repos. Thus using struct{} as the value type. - seen map[plumbing.Hash]struct{} -} - -func (p *pruneWalker) isSeen(hash plumbing.Hash) bool { - _, seen := p.seen[hash] - return seen -} - -func (p *pruneWalker) add(hash plumbing.Hash) { - p.seen[hash] = struct{}{} -} - -// walkObjectTree walks over all objects and remembers references -// to them in the pruneWalker. This is used instead of the revlist -// walks because memory usage is tight with huge repos. -func (p *pruneWalker) walkObjectTree(hash plumbing.Hash) error { - // Check if we have already seen, and mark this object - if p.isSeen(hash) { - return nil - } - p.add(hash) - // Fetch the object. - obj, err := object.GetObject(p.Storer, hash) - if err != nil { - return fmt.Errorf("Getting object %s failed: %v", hash, err) - } - // Walk all children depending on object type. - switch obj := obj.(type) { - case *object.Commit: - err = p.walkObjectTree(obj.TreeHash) - if err != nil { - return err - } - for _, h := range obj.ParentHashes { - err = p.walkObjectTree(h) - if err != nil { - return err - } - } - case *object.Tree: - for i := range obj.Entries { - // Shortcut for blob objects: - // 'or' the lower bits of a mode and check that it - // it matches a filemode.Executable. The type information - // is in the higher bits, but this is the cleanest way - // to handle plain files with different modes. - // Other non-tree objects are somewhat rare, so they - // are not special-cased. - if obj.Entries[i].Mode|0755 == filemode.Executable { - p.add(obj.Entries[i].Hash) - continue - } - // Normal walk for sub-trees (and symlinks etc). - err = p.walkObjectTree(obj.Entries[i].Hash) - if err != nil { - return err - } - } - default: - // Error out on unhandled object types. - return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) - } - return nil -} -- cgit From b18457df6a1f75283d95999fde5c162ba1a19651 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Wed, 29 Nov 2017 13:57:36 -0800 Subject: storage: some minor code cleanup Suggested by mcuadros. Issue: #669 --- prune.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'prune.go') diff --git a/prune.go b/prune.go index fce3bfd..81f2582 100644 --- a/prune.go +++ b/prune.go @@ -28,7 +28,7 @@ func (r *Repository) Prune(opt PruneOptions) error { return err } // Now walk all (loose) objects in storage. - err = r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { + return r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { // Get out if we have seen this object. if pw.isSeen(hash) { return nil @@ -49,8 +49,4 @@ func (r *Repository) Prune(opt PruneOptions) error { } return opt.Handler(hash) }) - if err != nil { - return err - } - return nil } -- cgit From 4c1569511db5e1d26e42e9cd8dadb9e65ccafb20 Mon Sep 17 00:00:00 2001 From: Jeremy Stribling Date: Wed, 29 Nov 2017 14:15:32 -0800 Subject: storer: separate loose and packed object mgmt into optional ifaces Suggested by mcuadros. --- prune.go | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'prune.go') diff --git a/prune.go b/prune.go index 81f2582..04913d6 100644 --- a/prune.go +++ b/prune.go @@ -1,9 +1,11 @@ package git import ( + "errors" "time" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" ) type PruneHandler func(unreferencedObjectHash plumbing.Hash) error @@ -15,20 +17,32 @@ type PruneOptions struct { Handler PruneHandler } +var ErrLooseObjectsNotSupported = errors.New("Loose objects not supported") + // DeleteObject deletes an object from a repository. // The type conveniently matches PruneHandler. func (r *Repository) DeleteObject(hash plumbing.Hash) error { - return r.Storer.DeleteLooseObject(hash) + los, ok := r.Storer.(storer.LooseObjectStorer) + if !ok { + return ErrLooseObjectsNotSupported + } + + return los.DeleteLooseObject(hash) } func (r *Repository) Prune(opt PruneOptions) error { + los, ok := r.Storer.(storer.LooseObjectStorer) + if !ok { + return ErrLooseObjectsNotSupported + } + pw := newObjectWalker(r.Storer) err := pw.walkAllRefs() if err != nil { return err } // Now walk all (loose) objects in storage. - return r.Storer.ForEachObjectHash(func(hash plumbing.Hash) error { + return los.ForEachObjectHash(func(hash plumbing.Hash) error { // Get out if we have seen this object. if pw.isSeen(hash) { return nil @@ -38,7 +52,7 @@ func (r *Repository) Prune(opt PruneOptions) error { if opt.OnlyObjectsOlderThan != (time.Time{}) { // Errors here are non-fatal. The object may be e.g. packed. // Or concurrently deleted. Skip such objects. - t, err := r.Storer.LooseObjectTime(hash) + t, err := los.LooseObjectTime(hash) if err != nil { return nil } -- cgit