aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2017-12-01 00:45:11 +0100
committerGitHub <noreply@github.com>2017-12-01 00:45:11 +0100
commitb0f6b4786b58b4add6e54d354311fc1084764b36 (patch)
treeebc3ad2f840da271aeac9cf33cc3912feb9f9e12
parent174fd8e5b2150dbd4cf522bb4a98fb9d79ebc6f4 (diff)
parentd53264806f0d5ddef259f45f4490a19398a102ba (diff)
downloadgo-git-b0f6b4786b58b4add6e54d354311fc1084764b36.tar.gz
Merge pull request #669 from keybase/strib/gh-gc
storage/repository: add new functions for garbage collection
-rw-r--r--object_walker.go105
-rw-r--r--plumbing/storer/object.go32
-rw-r--r--plumbing/storer/object_test.go9
-rw-r--r--plumbing/storer/reference.go2
-rw-r--r--prune.go66
-rw-r--r--prune_test.go73
-rw-r--r--repository.go112
-rw-r--r--repository_test.go61
-rw-r--r--storage/filesystem/internal/dotgit/dotgit.go312
-rw-r--r--storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go11
-rw-r--r--storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go39
-rw-r--r--storage/filesystem/internal/dotgit/dotgit_test.go72
-rw-r--r--storage/filesystem/object.go55
-rw-r--r--storage/filesystem/reference.go8
-rw-r--r--storage/memory/storage.go45
15 files changed, 928 insertions, 74 deletions
diff --git a/object_walker.go b/object_walker.go
new file mode 100644
index 0000000..8bae1fa
--- /dev/null
+++ b/object_walker.go
@@ -0,0 +1,105 @@
+package git
+
+import (
+ "fmt"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/filemode"
+ "gopkg.in/src-d/go-git.v4/plumbing/object"
+ "gopkg.in/src-d/go-git.v4/storage"
+)
+
+type objectWalker struct {
+ Storer storage.Storer
+ // seen is the set of objects seen in the repo.
+ // seen map can become huge if walking over large
+ // repos. Thus using struct{} as the value type.
+ seen map[plumbing.Hash]struct{}
+}
+
+func newObjectWalker(s storage.Storer) *objectWalker {
+ return &objectWalker{s, map[plumbing.Hash]struct{}{}}
+}
+
+// walkAllRefs walks all (hash) refererences from the repo.
+func (p *objectWalker) walkAllRefs() error {
+ // Walk over all the references in the repo.
+ it, err := p.Storer.IterReferences()
+ if err != nil {
+ return err
+ }
+ defer it.Close()
+ err = it.ForEach(func(ref *plumbing.Reference) error {
+ // Exit this iteration early for non-hash references.
+ if ref.Type() != plumbing.HashReference {
+ return nil
+ }
+ return p.walkObjectTree(ref.Hash())
+ })
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+func (p *objectWalker) isSeen(hash plumbing.Hash) bool {
+ _, seen := p.seen[hash]
+ return seen
+}
+
+func (p *objectWalker) add(hash plumbing.Hash) {
+ p.seen[hash] = struct{}{}
+}
+
+// walkObjectTree walks over all objects and remembers references
+// to them in the objectWalker. This is used instead of the revlist
+// walks because memory usage is tight with huge repos.
+func (p *objectWalker) walkObjectTree(hash plumbing.Hash) error {
+ // Check if we have already seen, and mark this object
+ if p.isSeen(hash) {
+ return nil
+ }
+ p.add(hash)
+ // Fetch the object.
+ obj, err := object.GetObject(p.Storer, hash)
+ if err != nil {
+ return fmt.Errorf("Getting object %s failed: %v", hash, err)
+ }
+ // Walk all children depending on object type.
+ switch obj := obj.(type) {
+ case *object.Commit:
+ err = p.walkObjectTree(obj.TreeHash)
+ if err != nil {
+ return err
+ }
+ for _, h := range obj.ParentHashes {
+ err = p.walkObjectTree(h)
+ if err != nil {
+ return err
+ }
+ }
+ case *object.Tree:
+ for i := range obj.Entries {
+ // Shortcut for blob objects:
+ // 'or' the lower bits of a mode and check that it
+ // it matches a filemode.Executable. The type information
+ // is in the higher bits, but this is the cleanest way
+ // to handle plain files with different modes.
+ // Other non-tree objects are somewhat rare, so they
+ // are not special-cased.
+ if obj.Entries[i].Mode|0755 == filemode.Executable {
+ p.add(obj.Entries[i].Hash)
+ continue
+ }
+ // Normal walk for sub-trees (and symlinks etc).
+ err = p.walkObjectTree(obj.Entries[i].Hash)
+ if err != nil {
+ return err
+ }
+ }
+ default:
+ // Error out on unhandled object types.
+ return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj)
+ }
+ return nil
+}
diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go
index e793211..f1d19ef 100644
--- a/plumbing/storer/object.go
+++ b/plumbing/storer/object.go
@@ -3,6 +3,7 @@ package storer
import (
"errors"
"io"
+ "time"
"gopkg.in/src-d/go-git.v4/plumbing"
)
@@ -36,6 +37,9 @@ type EncodedObjectStorer interface {
//
// Valid plumbing.ObjectType values are CommitObject, BlobObject, TagObject,
IterEncodedObjects(plumbing.ObjectType) (EncodedObjectIter, error)
+ // HasEncodedObject returns ErrObjNotFound if the object doesn't
+ // exist. If the object does exist, it returns nil.
+ HasEncodedObject(plumbing.Hash) error
}
// DeltaObjectStorer is an EncodedObjectStorer that can return delta
@@ -53,6 +57,34 @@ type Transactioner interface {
Begin() Transaction
}
+// LooseObjectStorer is an optional interface for managing "loose"
+// objects, i.e. those not in packfiles.
+type LooseObjectStorer interface {
+ // ForEachObjectHash iterates over all the (loose) object hashes
+ // in the repository without necessarily having to read those objects.
+ // Objects only inside pack files may be omitted.
+ // If ErrStop is sent the iteration is stop but no error is returned.
+ ForEachObjectHash(func(plumbing.Hash) error) error
+ // LooseObjectTime looks up the (m)time associated with the
+ // loose object (that is not in a pack file). Some
+ // implementations (e.g. without loose objects)
+ // always return an error.
+ LooseObjectTime(plumbing.Hash) (time.Time, error)
+ // DeleteLooseObject deletes a loose object if it exists.
+ DeleteLooseObject(plumbing.Hash) error
+}
+
+// PackedObjectStorer is an optional interface for managing objects in
+// packfiles.
+type PackedObjectStorer interface {
+ // ObjectPacks returns hashes of object packs if the underlying
+ // implementation has pack files.
+ ObjectPacks() ([]plumbing.Hash, error)
+ // DeleteOldObjectPackAndIndex deletes an object pack and the corresponding index file if they exist.
+ // Deletion is only performed if the pack is older than the supplied time (or the time is zero).
+ DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error
+}
+
// PackfileWriter is a optional method for ObjectStorer, it enable direct write
// of packfile to the storage
type PackfileWriter interface {
diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go
index 6bdd25c..6b4fe0f 100644
--- a/plumbing/storer/object_test.go
+++ b/plumbing/storer/object_test.go
@@ -132,6 +132,15 @@ func (o *MockObjectStorage) SetEncodedObject(obj plumbing.EncodedObject) (plumbi
return plumbing.ZeroHash, nil
}
+func (o *MockObjectStorage) HasEncodedObject(h plumbing.Hash) error {
+ for _, o := range o.db {
+ if o.Hash() == h {
+ return nil
+ }
+ }
+ return plumbing.ErrObjectNotFound
+}
+
func (o *MockObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) {
for _, o := range o.db {
if o.Hash() == h {
diff --git a/plumbing/storer/reference.go b/plumbing/storer/reference.go
index ae80a39..5e85a3b 100644
--- a/plumbing/storer/reference.go
+++ b/plumbing/storer/reference.go
@@ -24,6 +24,8 @@ type ReferenceStorer interface {
Reference(plumbing.ReferenceName) (*plumbing.Reference, error)
IterReferences() (ReferenceIter, error)
RemoveReference(plumbing.ReferenceName) error
+ CountLooseRefs() (int, error)
+ PackRefs() error
}
// ReferenceIter is a generic closable interface for iterating over references.
diff --git a/prune.go b/prune.go
new file mode 100644
index 0000000..04913d6
--- /dev/null
+++ b/prune.go
@@ -0,0 +1,66 @@
+package git
+
+import (
+ "errors"
+ "time"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/storer"
+)
+
+type PruneHandler func(unreferencedObjectHash plumbing.Hash) error
+type PruneOptions struct {
+ // OnlyObjectsOlderThan if set to non-zero value
+ // selects only objects older than the time provided.
+ OnlyObjectsOlderThan time.Time
+ // Handler is called on matching objects
+ Handler PruneHandler
+}
+
+var ErrLooseObjectsNotSupported = errors.New("Loose objects not supported")
+
+// DeleteObject deletes an object from a repository.
+// The type conveniently matches PruneHandler.
+func (r *Repository) DeleteObject(hash plumbing.Hash) error {
+ los, ok := r.Storer.(storer.LooseObjectStorer)
+ if !ok {
+ return ErrLooseObjectsNotSupported
+ }
+
+ return los.DeleteLooseObject(hash)
+}
+
+func (r *Repository) Prune(opt PruneOptions) error {
+ los, ok := r.Storer.(storer.LooseObjectStorer)
+ if !ok {
+ return ErrLooseObjectsNotSupported
+ }
+
+ pw := newObjectWalker(r.Storer)
+ err := pw.walkAllRefs()
+ if err != nil {
+ return err
+ }
+ // Now walk all (loose) objects in storage.
+ return los.ForEachObjectHash(func(hash plumbing.Hash) error {
+ // Get out if we have seen this object.
+ if pw.isSeen(hash) {
+ return nil
+ }
+ // Otherwise it is a candidate for pruning.
+ // Check out for too new objects next.
+ if opt.OnlyObjectsOlderThan != (time.Time{}) {
+ // Errors here are non-fatal. The object may be e.g. packed.
+ // Or concurrently deleted. Skip such objects.
+ t, err := los.LooseObjectTime(hash)
+ if err != nil {
+ return nil
+ }
+ // Skip too new objects.
+ if !t.Before(opt.OnlyObjectsOlderThan) {
+ return nil
+ }
+ }
+ return opt.Handler(hash)
+ })
+}
diff --git a/prune_test.go b/prune_test.go
new file mode 100644
index 0000000..60652ec
--- /dev/null
+++ b/prune_test.go
@@ -0,0 +1,73 @@
+package git
+
+import (
+ "time"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/storer"
+ "gopkg.in/src-d/go-git.v4/storage"
+ "gopkg.in/src-d/go-git.v4/storage/filesystem"
+
+ . "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-git-fixtures.v3"
+)
+
+type PruneSuite struct {
+ BaseSuite
+}
+
+var _ = Suite(&PruneSuite{})
+
+func (s *PruneSuite) testPrune(c *C, deleteTime time.Time) {
+ srcFs := fixtures.ByTag("unpacked").One().DotGit()
+ var sto storage.Storer
+ var err error
+ sto, err = filesystem.NewStorage(srcFs)
+ c.Assert(err, IsNil)
+
+ los := sto.(storer.LooseObjectStorer)
+ c.Assert(los, NotNil)
+
+ count := 0
+ err = los.ForEachObjectHash(func(_ plumbing.Hash) error {
+ count++
+ return nil
+ })
+ c.Assert(err, IsNil)
+
+ r, err := Open(sto, srcFs)
+ c.Assert(err, IsNil)
+ c.Assert(r, NotNil)
+
+ // Remove a branch so we can prune some objects.
+ err = sto.RemoveReference(plumbing.ReferenceName("refs/heads/v4"))
+ c.Assert(err, IsNil)
+ err = sto.RemoveReference(plumbing.ReferenceName("refs/remotes/origin/v4"))
+ c.Assert(err, IsNil)
+
+ err = r.Prune(PruneOptions{
+ OnlyObjectsOlderThan: deleteTime,
+ Handler: r.DeleteObject,
+ })
+ c.Assert(err, IsNil)
+
+ newCount := 0
+ err = los.ForEachObjectHash(func(_ plumbing.Hash) error {
+ newCount++
+ return nil
+ })
+ if deleteTime.IsZero() {
+ c.Assert(newCount < count, Equals, true)
+ } else {
+ // Assume a delete time older than any of the objects was passed in.
+ c.Assert(newCount, Equals, count)
+ }
+}
+
+func (s *PruneSuite) TestPrune(c *C) {
+ s.testPrune(c, time.Time{})
+}
+
+func (s *PruneSuite) TestPruneWithNoDelete(c *C) {
+ s.testPrune(c, time.Unix(0, 1))
+}
diff --git a/repository.go b/repository.go
index b159ff0..7cdc0d5 100644
--- a/repository.go
+++ b/repository.go
@@ -8,10 +8,12 @@ import (
"os"
"path/filepath"
"strings"
+ "time"
"gopkg.in/src-d/go-git.v4/config"
"gopkg.in/src-d/go-git.v4/internal/revision"
"gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
"gopkg.in/src-d/go-git.v4/plumbing/object"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
"gopkg.in/src-d/go-git.v4/storage"
@@ -23,14 +25,15 @@ import (
)
var (
- ErrInvalidReference = errors.New("invalid reference, should be a tag or a branch")
- ErrRepositoryNotExists = errors.New("repository does not exist")
- ErrRepositoryAlreadyExists = errors.New("repository already exists")
- ErrRemoteNotFound = errors.New("remote not found")
- ErrRemoteExists = errors.New("remote already exists ")
- ErrWorktreeNotProvided = errors.New("worktree should be provided")
- ErrIsBareRepository = errors.New("worktree not available in a bare repository")
- ErrUnableToResolveCommit = errors.New("unable to resolve commit")
+ ErrInvalidReference = errors.New("invalid reference, should be a tag or a branch")
+ ErrRepositoryNotExists = errors.New("repository does not exist")
+ ErrRepositoryAlreadyExists = errors.New("repository already exists")
+ ErrRemoteNotFound = errors.New("remote not found")
+ ErrRemoteExists = errors.New("remote already exists ")
+ ErrWorktreeNotProvided = errors.New("worktree should be provided")
+ ErrIsBareRepository = errors.New("worktree not available in a bare repository")
+ ErrUnableToResolveCommit = errors.New("unable to resolve commit")
+ ErrPackedObjectsNotSupported = errors.New("Packed objects not supported")
)
// Repository represents a git repository
@@ -1011,3 +1014,96 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err
return &commit.Hash, nil
}
+
+type RepackConfig struct {
+ // UseRefDeltas configures whether packfile encoder will use reference deltas.
+ // By default OFSDeltaObject is used.
+ UseRefDeltas bool
+ // OnlyDeletePacksOlderThan if set to non-zero value
+ // selects only objects older than the time provided.
+ OnlyDeletePacksOlderThan time.Time
+}
+
+func (r *Repository) RepackObjects(cfg *RepackConfig) (err error) {
+ pos, ok := r.Storer.(storer.PackedObjectStorer)
+ if !ok {
+ return ErrPackedObjectsNotSupported
+ }
+
+ // Get the existing object packs.
+ hs, err := pos.ObjectPacks()
+ if err != nil {
+ return err
+ }
+
+ // Create a new pack.
+ nh, err := r.createNewObjectPack(cfg)
+ if err != nil {
+ return err
+ }
+
+ // Delete old packs.
+ for _, h := range hs {
+ // Skip if new hash is the same as an old one.
+ if h == nh {
+ continue
+ }
+ err = pos.DeleteOldObjectPackAndIndex(h, cfg.OnlyDeletePacksOlderThan)
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// createNewObjectPack is a helper for RepackObjects taking care
+// of creating a new pack. It is used so the the PackfileWriter
+// deferred close has the right scope.
+func (r *Repository) createNewObjectPack(cfg *RepackConfig) (h plumbing.Hash, err error) {
+ ow := newObjectWalker(r.Storer)
+ err = ow.walkAllRefs()
+ if err != nil {
+ return h, err
+ }
+ objs := make([]plumbing.Hash, 0, len(ow.seen))
+ for h := range ow.seen {
+ objs = append(objs, h)
+ }
+ pfw, ok := r.Storer.(storer.PackfileWriter)
+ if !ok {
+ return h, fmt.Errorf("Repository storer is not a storer.PackfileWriter")
+ }
+ wc, err := pfw.PackfileWriter()
+ if err != nil {
+ return h, err
+ }
+ defer ioutil.CheckClose(wc, &err)
+ scfg, err := r.Storer.Config()
+ if err != nil {
+ return h, err
+ }
+ enc := packfile.NewEncoder(wc, r.Storer, cfg.UseRefDeltas)
+ h, err = enc.Encode(objs, scfg.Pack.Window)
+ if err != nil {
+ return h, err
+ }
+
+ // Delete the packed, loose objects.
+ if los, ok := r.Storer.(storer.LooseObjectStorer); ok {
+ err = los.ForEachObjectHash(func(hash plumbing.Hash) error {
+ if ow.isSeen(hash) {
+ err := los.DeleteLooseObject(hash)
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+ if err != nil {
+ return h, err
+ }
+ }
+
+ return h, err
+}
diff --git a/repository_test.go b/repository_test.go
index 9d82651..2ebc597 100644
--- a/repository_test.go
+++ b/repository_test.go
@@ -10,10 +10,13 @@ import (
"os/exec"
"path/filepath"
"strings"
+ "time"
"gopkg.in/src-d/go-git.v4/config"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
+ "gopkg.in/src-d/go-git.v4/plumbing/storer"
+ "gopkg.in/src-d/go-git.v4/storage"
"gopkg.in/src-d/go-git.v4/storage/filesystem"
"gopkg.in/src-d/go-git.v4/storage/memory"
@@ -1313,6 +1316,64 @@ func (s *RepositorySuite) TestResolveRevisionWithErrors(c *C) {
}
}
+func (s *RepositorySuite) testRepackObjects(
+ c *C, deleteTime time.Time, expectedPacks int) {
+ srcFs := fixtures.ByTag("unpacked").One().DotGit()
+ var sto storage.Storer
+ var err error
+ sto, err = filesystem.NewStorage(srcFs)
+ c.Assert(err, IsNil)
+
+ los := sto.(storer.LooseObjectStorer)
+ c.Assert(los, NotNil)
+
+ numLooseStart := 0
+ err = los.ForEachObjectHash(func(_ plumbing.Hash) error {
+ numLooseStart++
+ return nil
+ })
+ c.Assert(err, IsNil)
+ c.Assert(numLooseStart > 0, Equals, true)
+
+ pos := sto.(storer.PackedObjectStorer)
+ c.Assert(los, NotNil)
+
+ packs, err := pos.ObjectPacks()
+ c.Assert(err, IsNil)
+ numPacksStart := len(packs)
+ c.Assert(numPacksStart > 1, Equals, true)
+
+ r, err := Open(sto, srcFs)
+ c.Assert(err, IsNil)
+ c.Assert(r, NotNil)
+
+ err = r.RepackObjects(&RepackConfig{
+ OnlyDeletePacksOlderThan: deleteTime,
+ })
+ c.Assert(err, IsNil)
+
+ numLooseEnd := 0
+ err = los.ForEachObjectHash(func(_ plumbing.Hash) error {
+ numLooseEnd++
+ return nil
+ })
+ c.Assert(err, IsNil)
+ c.Assert(numLooseEnd, Equals, 0)
+
+ packs, err = pos.ObjectPacks()
+ c.Assert(err, IsNil)
+ numPacksEnd := len(packs)
+ c.Assert(numPacksEnd, Equals, expectedPacks)
+}
+
+func (s *RepositorySuite) TestRepackObjects(c *C) {
+ s.testRepackObjects(c, time.Time{}, 1)
+}
+
+func (s *RepositorySuite) TestRepackObjectsWithNoDelete(c *C) {
+ s.testRepackObjects(c, time.Unix(0, 1), 3)
+}
+
func ExecuteOnPath(c *C, path string, cmds ...string) error {
for _, cmd := range cmds {
err := executeOnPath(path, cmd)
diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go
index 1cb97bd..5e23e66 100644
--- a/storage/filesystem/internal/dotgit/dotgit.go
+++ b/storage/filesystem/internal/dotgit/dotgit.go
@@ -9,6 +9,7 @@ import (
stdioutil "io/ioutil"
"os"
"strings"
+ "time"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/utils/ioutil"
@@ -166,11 +167,12 @@ func (d *DotGit) ObjectPacks() ([]plumbing.Hash, error) {
return packs, nil
}
-// ObjectPack returns a fs.File of the given packfile
-func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) {
- file := d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.pack", hash.String()))
+func (d *DotGit) objectPackPath(hash plumbing.Hash, extension string) string {
+ return d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.%s", hash.String(), extension))
+}
- pack, err := d.fs.Open(file)
+func (d *DotGit) objectPackOpen(hash plumbing.Hash, extension string) (billy.File, error) {
+ pack, err := d.fs.Open(d.objectPackPath(hash, extension))
if err != nil {
if os.IsNotExist(err) {
return nil, ErrPackfileNotFound
@@ -182,19 +184,37 @@ func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) {
return pack, nil
}
+// ObjectPack returns a fs.File of the given packfile
+func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) {
+ return d.objectPackOpen(hash, `pack`)
+}
+
// ObjectPackIdx returns a fs.File of the index file for a given packfile
func (d *DotGit) ObjectPackIdx(hash plumbing.Hash) (billy.File, error) {
- file := d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.idx", hash.String()))
- idx, err := d.fs.Open(file)
- if err != nil {
- if os.IsNotExist(err) {
- return nil, ErrPackfileNotFound
- }
+ return d.objectPackOpen(hash, `idx`)
+}
- return nil, err
+func (d *DotGit) DeleteOldObjectPackAndIndex(hash plumbing.Hash, t time.Time) error {
+ path := d.objectPackPath(hash, `pack`)
+ if !t.IsZero() {
+ fi, err := d.fs.Stat(path)
+ if err != nil {
+ return err
+ }
+ // too new, skip deletion.
+ if !fi.ModTime().Before(t) {
+ return nil
+ }
}
-
- return idx, nil
+ err := d.fs.Remove(path)
+ if err != nil {
+ return err
+ }
+ err = d.fs.Remove(d.objectPackPath(hash, `idx`))
+ if err != nil {
+ return err
+ }
+ return nil
}
// NewObject return a writer for a new object file.
@@ -205,39 +225,67 @@ func (d *DotGit) NewObject() (*ObjectWriter, error) {
// Objects returns a slice with the hashes of objects found under the
// .git/objects/ directory.
func (d *DotGit) Objects() ([]plumbing.Hash, error) {
+ var objects []plumbing.Hash
+ err := d.ForEachObjectHash(func(hash plumbing.Hash) error {
+ objects = append(objects, hash)
+ return nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ return objects, nil
+}
+
+// Objects returns a slice with the hashes of objects found under the
+// .git/objects/ directory.
+func (d *DotGit) ForEachObjectHash(fun func(plumbing.Hash) error) error {
files, err := d.fs.ReadDir(objectsPath)
if err != nil {
if os.IsNotExist(err) {
- return nil, nil
+ return nil
}
- return nil, err
+ return err
}
- var objects []plumbing.Hash
for _, f := range files {
if f.IsDir() && len(f.Name()) == 2 && isHex(f.Name()) {
base := f.Name()
d, err := d.fs.ReadDir(d.fs.Join(objectsPath, base))
if err != nil {
- return nil, err
+ return err
}
for _, o := range d {
- objects = append(objects, plumbing.NewHash(base+o.Name()))
+ err = fun(plumbing.NewHash(base + o.Name()))
+ if err != nil {
+ return err
+ }
}
}
}
- return objects, nil
+ return nil
}
-// Object return a fs.File pointing the object file, if exists
-func (d *DotGit) Object(h plumbing.Hash) (billy.File, error) {
+func (d *DotGit) objectPath(h plumbing.Hash) string {
hash := h.String()
- file := d.fs.Join(objectsPath, hash[0:2], hash[2:40])
+ return d.fs.Join(objectsPath, hash[0:2], hash[2:40])
+}
+
+// Object returns a fs.File pointing the object file, if exists
+func (d *DotGit) Object(h plumbing.Hash) (billy.File, error) {
+ return d.fs.Open(d.objectPath(h))
+}
+
+// ObjectStat returns a os.FileInfo pointing the object file, if exists
+func (d *DotGit) ObjectStat(h plumbing.Hash) (os.FileInfo, error) {
+ return d.fs.Stat(d.objectPath(h))
+}
- return d.fs.Open(file)
+// ObjectDelete removes the object file, if exists
+func (d *DotGit) ObjectDelete(h plumbing.Hash) error {
+ return d.fs.Remove(d.objectPath(h))
}
func (d *DotGit) readReferenceFrom(rd io.Reader, name string) (ref *plumbing.Reference, err error) {
@@ -339,17 +387,7 @@ func (d *DotGit) Ref(name plumbing.ReferenceName) (*plumbing.Reference, error) {
return d.packedRef(name)
}
-func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) {
- f, err := d.fs.Open(packedRefsPath)
- if err != nil {
- if os.IsNotExist(err) {
- return nil, nil
- }
- return nil, err
- }
-
- defer ioutil.CheckClose(f, &err)
-
+func (d *DotGit) findPackedRefsInFile(f billy.File) ([]*plumbing.Reference, error) {
s := bufio.NewScanner(f)
var refs []*plumbing.Reference
for s.Scan() {
@@ -366,6 +404,19 @@ func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) {
return refs, s.Err()
}
+func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) {
+ f, err := d.fs.Open(packedRefsPath)
+ if err != nil {
+ if os.IsNotExist(err) {
+ return nil, nil
+ }
+ return nil, err
+ }
+
+ defer ioutil.CheckClose(f, &err)
+ return d.findPackedRefsInFile(f)
+}
+
func (d *DotGit) packedRef(name plumbing.ReferenceName) (*plumbing.Reference, error) {
refs, err := d.findPackedRefs()
if err != nil {
@@ -412,26 +463,82 @@ func (d *DotGit) addRefsFromPackedRefs(refs *[]*plumbing.Reference, seen map[plu
return nil
}
-func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err error) {
- f, err := d.fs.Open(packedRefsPath)
+func (d *DotGit) addRefsFromPackedRefsFile(refs *[]*plumbing.Reference, f billy.File, seen map[plumbing.ReferenceName]bool) (err error) {
+ packedRefs, err := d.findPackedRefsInFile(f)
if err != nil {
- if os.IsNotExist(err) {
- return nil
- }
-
return err
}
- doCloseF := true
+
+ for _, ref := range packedRefs {
+ if !seen[ref.Name()] {
+ *refs = append(*refs, ref)
+ seen[ref.Name()] = true
+ }
+ }
+ return nil
+}
+
+func (d *DotGit) openAndLockPackedRefs(doCreate bool) (
+ pr billy.File, err error) {
+ var f billy.File
defer func() {
- if doCloseF {
+ if err != nil && f != nil {
ioutil.CheckClose(f, &err)
}
}()
- err = f.Lock()
+ openFlags := os.O_RDWR
+ if doCreate {
+ openFlags |= os.O_CREATE
+ }
+
+ // Keep trying to open and lock the file until we're sure the file
+ // didn't change between the open and the lock.
+ for {
+ f, err = d.fs.OpenFile(packedRefsPath, openFlags, 0600)
+ if err != nil {
+ if os.IsNotExist(err) && !doCreate {
+ return nil, nil
+ }
+
+ return nil, err
+ }
+ fi, err := d.fs.Stat(packedRefsPath)
+ if err != nil {
+ return nil, err
+ }
+ mtime := fi.ModTime()
+
+ err = f.Lock()
+ if err != nil {
+ return nil, err
+ }
+
+ fi, err = d.fs.Stat(packedRefsPath)
+ if err != nil {
+ return nil, err
+ }
+ if mtime == fi.ModTime() {
+ break
+ }
+ // The file has changed since we opened it. Close and retry.
+ err = f.Close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ return f, nil
+}
+
+func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err error) {
+ pr, err := d.openAndLockPackedRefs(false)
if err != nil {
return err
}
+ if pr == nil {
+ return nil
+ }
+ defer ioutil.CheckClose(pr, &err)
// Creating the temp file in the same directory as the target file
// improves our chances for rename operation to be atomic.
@@ -439,14 +546,13 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e
if err != nil {
return err
}
- doCloseTmp := true
+ tmpName := tmp.Name()
defer func() {
- if doCloseTmp {
- ioutil.CheckClose(tmp, &err)
- }
+ ioutil.CheckClose(tmp, &err)
+ _ = d.fs.Remove(tmpName) // don't check err, we might have renamed it
}()
- s := bufio.NewScanner(f)
+ s := bufio.NewScanner(pr)
found := false
for s.Scan() {
line := s.Text()
@@ -470,26 +576,10 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e
}
if !found {
- doCloseTmp = false
- ioutil.CheckClose(tmp, &err)
- if err != nil {
- return err
- }
- // Delete the temp file if nothing needed to be removed.
- return d.fs.Remove(tmp.Name())
- }
-
- doCloseF = false
- if err := f.Close(); err != nil {
- return err
- }
-
- doCloseTmp = false
- if err := tmp.Close(); err != nil {
- return err
+ return nil
}
- return d.fs.Rename(tmp.Name(), packedRefsPath)
+ return d.rewritePackedRefsWhileLocked(tmp, pr)
}
// process lines from a packed-refs file
@@ -576,6 +666,96 @@ func (d *DotGit) readReferenceFile(path, name string) (ref *plumbing.Reference,
return d.readReferenceFrom(f, name)
}
+func (d *DotGit) CountLooseRefs() (int, error) {
+ var refs []*plumbing.Reference
+ var seen = make(map[plumbing.ReferenceName]bool)
+ if err := d.addRefsFromRefDir(&refs, seen); err != nil {
+ return 0, err
+ }
+
+ return len(refs), nil
+}
+
+// PackRefs packs all loose refs into the packed-refs file.
+//
+// This implementation only works under the assumption that the view
+// of the file system won't be updated during this operation. This
+// strategy would not work on a general file system though, without
+// locking each loose reference and checking it again before deleting
+// the file, because otherwise an updated reference could sneak in and
+// then be deleted by the packed-refs process. Alternatively, every
+// ref update could also lock packed-refs, so only one lock is
+// required during ref-packing. But that would worsen performance in
+// the common case.
+//
+// TODO: add an "all" boolean like the `git pack-refs --all` flag.
+// When `all` is false, it would only pack refs that have already been
+// packed, plus all tags.
+func (d *DotGit) PackRefs() (err error) {
+ // Lock packed-refs, and create it if it doesn't exist yet.
+ f, err := d.openAndLockPackedRefs(true)
+ if err != nil {
+ return err
+ }
+ defer ioutil.CheckClose(f, &err)
+
+ // Gather all refs using addRefsFromRefDir and addRefsFromPackedRefs.
+ var refs []*plumbing.Reference
+ seen := make(map[plumbing.ReferenceName]bool)
+ if err := d.addRefsFromRefDir(&refs, seen); err != nil {
+ return err
+ }
+ if len(refs) == 0 {
+ // Nothing to do!
+ return nil
+ }
+ numLooseRefs := len(refs)
+ if err := d.addRefsFromPackedRefsFile(&refs, f, seen); err != nil {
+ return err
+ }
+
+ // Write them all to a new temp packed-refs file.
+ tmp, err := d.fs.TempFile("", tmpPackedRefsPrefix)
+ if err != nil {
+ return err
+ }
+ tmpName := tmp.Name()
+ defer func() {
+ ioutil.CheckClose(tmp, &err)
+ _ = d.fs.Remove(tmpName) // don't check err, we might have renamed it
+ }()
+
+ w := bufio.NewWriter(tmp)
+ for _, ref := range refs {
+ _, err := w.WriteString(ref.String() + "\n")
+ if err != nil {
+ return err
+ }
+ }
+ err = w.Flush()
+ if err != nil {
+ return err
+ }
+
+ // Rename the temp packed-refs file.
+ err = d.rewritePackedRefsWhileLocked(tmp, f)
+ if err != nil {
+ return err
+ }
+
+ // Delete all the loose refs, while still holding the packed-refs
+ // lock.
+ for _, ref := range refs[:numLooseRefs] {
+ path := d.fs.Join(".", ref.Name().String())
+ err = d.fs.Remove(path)
+ if err != nil && !os.IsNotExist(err) {
+ return err
+ }
+ }
+
+ return nil
+}
+
// Module return a billy.Filesystem poiting to the module folder
func (d *DotGit) Module(name string) (billy.Filesystem, error) {
return d.fs.Chroot(d.fs.Join(modulePath, name))
diff --git a/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go
new file mode 100644
index 0000000..af96196
--- /dev/null
+++ b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go
@@ -0,0 +1,11 @@
+// +build !windows
+
+package dotgit
+
+import "gopkg.in/src-d/go-billy.v4"
+
+func (d *DotGit) rewritePackedRefsWhileLocked(
+ tmp billy.File, pr billy.File) error {
+ // On non-Windows platforms, we can have atomic rename.
+ return d.fs.Rename(tmp.Name(), pr.Name())
+}
diff --git a/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go
new file mode 100644
index 0000000..bcdb93e
--- /dev/null
+++ b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go
@@ -0,0 +1,39 @@
+// +build windows
+
+package dotgit
+
+import (
+ "io"
+
+ "gopkg.in/src-d/go-billy.v4"
+)
+
+func (d *DotGit) rewritePackedRefsWhileLocked(
+ tmp billy.File, pr billy.File) error {
+ // If we aren't using the bare Windows filesystem as the storage
+ // layer, we might be able to get away with a rename over a locked
+ // file.
+ err := d.fs.Rename(tmp.Name(), pr.Name())
+ if err == nil {
+ return nil
+ }
+
+ // Otherwise, Windows doesn't let us rename over a locked file, so
+ // we have to do a straight copy. Unfortunately this could result
+ // in a partially-written file if the process fails before the
+ // copy completes.
+ _, err = pr.Seek(0, io.SeekStart)
+ if err != nil {
+ return err
+ }
+ err = pr.Truncate(0)
+ if err != nil {
+ return err
+ }
+ _, err = tmp.Seek(0, io.SeekStart)
+ if err != nil {
+ return err
+ }
+ _, err = io.Copy(pr, tmp)
+ return err
+}
diff --git a/storage/filesystem/internal/dotgit/dotgit_test.go b/storage/filesystem/internal/dotgit/dotgit_test.go
index 446a204..0ed9ec5 100644
--- a/storage/filesystem/internal/dotgit/dotgit_test.go
+++ b/storage/filesystem/internal/dotgit/dotgit_test.go
@@ -543,3 +543,75 @@ func (s *SuiteDotGit) TestSubmodules(c *C) {
c.Assert(err, IsNil)
c.Assert(strings.HasSuffix(m.Root(), m.Join(".git", "modules", "basic")), Equals, true)
}
+
+func (s *SuiteDotGit) TestPackRefs(c *C) {
+ tmp, err := ioutil.TempDir("", "dot-git")
+ c.Assert(err, IsNil)
+ defer os.RemoveAll(tmp)
+
+ fs := osfs.New(tmp)
+ dir := New(fs)
+
+ err = dir.SetRef(plumbing.NewReferenceFromStrings(
+ "refs/heads/foo",
+ "e8d3ffab552895c19b9fcf7aa264d277cde33881",
+ ), nil)
+ c.Assert(err, IsNil)
+ err = dir.SetRef(plumbing.NewReferenceFromStrings(
+ "refs/heads/bar",
+ "a8d3ffab552895c19b9fcf7aa264d277cde33881",
+ ), nil)
+ c.Assert(err, IsNil)
+
+ refs, err := dir.Refs()
+ c.Assert(err, IsNil)
+ c.Assert(refs, HasLen, 2)
+ looseCount, err := dir.CountLooseRefs()
+ c.Assert(err, IsNil)
+ c.Assert(looseCount, Equals, 2)
+
+ err = dir.PackRefs()
+ c.Assert(err, IsNil)
+
+ // Make sure the refs are still there, but no longer loose.
+ refs, err = dir.Refs()
+ c.Assert(err, IsNil)
+ c.Assert(refs, HasLen, 2)
+ looseCount, err = dir.CountLooseRefs()
+ c.Assert(err, IsNil)
+ c.Assert(looseCount, Equals, 0)
+
+ ref, err := dir.Ref("refs/heads/foo")
+ c.Assert(err, IsNil)
+ c.Assert(ref, NotNil)
+ c.Assert(ref.Hash().String(), Equals, "e8d3ffab552895c19b9fcf7aa264d277cde33881")
+ ref, err = dir.Ref("refs/heads/bar")
+ c.Assert(err, IsNil)
+ c.Assert(ref, NotNil)
+ c.Assert(ref.Hash().String(), Equals, "a8d3ffab552895c19b9fcf7aa264d277cde33881")
+
+ // Now update one of them, re-pack, and check again.
+ err = dir.SetRef(plumbing.NewReferenceFromStrings(
+ "refs/heads/foo",
+ "b8d3ffab552895c19b9fcf7aa264d277cde33881",
+ ), nil)
+ c.Assert(err, IsNil)
+ looseCount, err = dir.CountLooseRefs()
+ c.Assert(err, IsNil)
+ c.Assert(looseCount, Equals, 1)
+ err = dir.PackRefs()
+ c.Assert(err, IsNil)
+
+ // Make sure the refs are still there, but no longer loose.
+ refs, err = dir.Refs()
+ c.Assert(err, IsNil)
+ c.Assert(refs, HasLen, 2)
+ looseCount, err = dir.CountLooseRefs()
+ c.Assert(err, IsNil)
+ c.Assert(looseCount, Equals, 0)
+
+ ref, err = dir.Ref("refs/heads/foo")
+ c.Assert(err, IsNil)
+ c.Assert(ref, NotNil)
+ c.Assert(ref.Hash().String(), Equals, "b8d3ffab552895c19b9fcf7aa264d277cde33881")
+}
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index 9690c0e..6ca67cc 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -3,6 +3,7 @@ package filesystem
import (
"io"
"os"
+ "time"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/cache"
@@ -125,6 +126,32 @@ func (s *ObjectStorage) SetEncodedObject(o plumbing.EncodedObject) (plumbing.Has
return o.Hash(), err
}
+// HasEncodedObject returns nil if the object exists, without actually
+// reading the object data from storage.
+func (s *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) {
+ // Check unpacked objects
+ f, err := s.dir.Object(h)
+ if err != nil {
+ if !os.IsNotExist(err) {
+ return err
+ }
+ // Fall through to check packed objects.
+ } else {
+ defer ioutil.CheckClose(f, &err)
+ return nil
+ }
+
+ // Check packed objects.
+ if err := s.requireIndex(); err != nil {
+ return err
+ }
+ _, _, offset := s.findObjectInPackfile(h)
+ if offset == -1 {
+ return plumbing.ErrObjectNotFound
+ }
+ return nil
+}
+
// EncodedObject returns the object with the given hash, by searching for it in
// the packfile and the git object directories.
func (s *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) {
@@ -479,3 +506,31 @@ func hashListAsMap(l []plumbing.Hash) map[plumbing.Hash]bool {
return m
}
+
+func (s *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error {
+ err := s.dir.ForEachObjectHash(fun)
+ if err == storer.ErrStop {
+ return nil
+ }
+ return err
+}
+
+func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) {
+ fi, err := s.dir.ObjectStat(hash)
+ if err != nil {
+ return time.Time{}, err
+ }
+ return fi.ModTime(), nil
+}
+
+func (s *ObjectStorage) DeleteLooseObject(hash plumbing.Hash) error {
+ return s.dir.ObjectDelete(hash)
+}
+
+func (s *ObjectStorage) ObjectPacks() ([]plumbing.Hash, error) {
+ return s.dir.ObjectPacks()
+}
+
+func (s *ObjectStorage) DeleteOldObjectPackAndIndex(h plumbing.Hash, t time.Time) error {
+ return s.dir.DeleteOldObjectPackAndIndex(h, t)
+}
diff --git a/storage/filesystem/reference.go b/storage/filesystem/reference.go
index 54cdf56..7313f05 100644
--- a/storage/filesystem/reference.go
+++ b/storage/filesystem/reference.go
@@ -34,3 +34,11 @@ func (r *ReferenceStorage) IterReferences() (storer.ReferenceIter, error) {
func (r *ReferenceStorage) RemoveReference(n plumbing.ReferenceName) error {
return r.dir.RemoveRef(n)
}
+
+func (r *ReferenceStorage) CountLooseRefs() (int, error) {
+ return r.dir.CountLooseRefs()
+}
+
+func (r *ReferenceStorage) PackRefs() error {
+ return r.dir.PackRefs()
+}
diff --git a/storage/memory/storage.go b/storage/memory/storage.go
index 927ec41..0f66f1e 100644
--- a/storage/memory/storage.go
+++ b/storage/memory/storage.go
@@ -3,6 +3,7 @@ package memory
import (
"fmt"
+ "time"
"gopkg.in/src-d/go-git.v4/config"
"gopkg.in/src-d/go-git.v4/plumbing"
@@ -114,6 +115,13 @@ func (o *ObjectStorage) SetEncodedObject(obj plumbing.EncodedObject) (plumbing.H
return h, nil
}
+func (o *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) {
+ if _, ok := o.Objects[h]; !ok {
+ return plumbing.ErrObjectNotFound
+ }
+ return nil
+}
+
func (o *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) {
obj, ok := o.Objects[h]
if !ok || (plumbing.AnyObject != t && obj.Type() != t) {
@@ -156,6 +164,35 @@ func (o *ObjectStorage) Begin() storer.Transaction {
}
}
+func (o *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error {
+ for h, _ := range o.Objects {
+ err := fun(h)
+ if err != nil {
+ if err == storer.ErrStop {
+ return nil
+ }
+ return err
+ }
+ }
+ return nil
+}
+
+func (o *ObjectStorage) ObjectPacks() ([]plumbing.Hash, error) {
+ return nil, nil
+}
+func (o *ObjectStorage) DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error {
+ return nil
+}
+
+var errNotSupported = fmt.Errorf("Not supported")
+
+func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) {
+ return time.Time{}, errNotSupported
+}
+func (s *ObjectStorage) DeleteLooseObject(plumbing.Hash) error {
+ return errNotSupported
+}
+
type TxObjectStorage struct {
Storage *ObjectStorage
Objects map[plumbing.Hash]plumbing.EncodedObject
@@ -236,6 +273,14 @@ func (r ReferenceStorage) IterReferences() (storer.ReferenceIter, error) {
return storer.NewReferenceSliceIter(refs), nil
}
+func (r ReferenceStorage) CountLooseRefs() (int, error) {
+ return len(r), nil
+}
+
+func (r ReferenceStorage) PackRefs() error {
+ return nil
+}
+
func (r ReferenceStorage) RemoveReference(n plumbing.ReferenceName) error {
delete(r, n)
return nil