diff options
-rw-r--r-- | object_walker.go | 105 | ||||
-rw-r--r-- | plumbing/storer/object.go | 32 | ||||
-rw-r--r-- | plumbing/storer/object_test.go | 9 | ||||
-rw-r--r-- | plumbing/storer/reference.go | 2 | ||||
-rw-r--r-- | prune.go | 66 | ||||
-rw-r--r-- | prune_test.go | 73 | ||||
-rw-r--r-- | repository.go | 112 | ||||
-rw-r--r-- | repository_test.go | 61 | ||||
-rw-r--r-- | storage/filesystem/internal/dotgit/dotgit.go | 312 | ||||
-rw-r--r-- | storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go | 11 | ||||
-rw-r--r-- | storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go | 39 | ||||
-rw-r--r-- | storage/filesystem/internal/dotgit/dotgit_test.go | 72 | ||||
-rw-r--r-- | storage/filesystem/object.go | 55 | ||||
-rw-r--r-- | storage/filesystem/reference.go | 8 | ||||
-rw-r--r-- | storage/memory/storage.go | 45 |
15 files changed, 928 insertions, 74 deletions
diff --git a/object_walker.go b/object_walker.go new file mode 100644 index 0000000..8bae1fa --- /dev/null +++ b/object_walker.go @@ -0,0 +1,105 @@ +package git + +import ( + "fmt" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/filemode" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/storage" +) + +type objectWalker struct { + Storer storage.Storer + // seen is the set of objects seen in the repo. + // seen map can become huge if walking over large + // repos. Thus using struct{} as the value type. + seen map[plumbing.Hash]struct{} +} + +func newObjectWalker(s storage.Storer) *objectWalker { + return &objectWalker{s, map[plumbing.Hash]struct{}{}} +} + +// walkAllRefs walks all (hash) refererences from the repo. +func (p *objectWalker) walkAllRefs() error { + // Walk over all the references in the repo. + it, err := p.Storer.IterReferences() + if err != nil { + return err + } + defer it.Close() + err = it.ForEach(func(ref *plumbing.Reference) error { + // Exit this iteration early for non-hash references. + if ref.Type() != plumbing.HashReference { + return nil + } + return p.walkObjectTree(ref.Hash()) + }) + if err != nil { + return err + } + return nil +} + +func (p *objectWalker) isSeen(hash plumbing.Hash) bool { + _, seen := p.seen[hash] + return seen +} + +func (p *objectWalker) add(hash plumbing.Hash) { + p.seen[hash] = struct{}{} +} + +// walkObjectTree walks over all objects and remembers references +// to them in the objectWalker. This is used instead of the revlist +// walks because memory usage is tight with huge repos. +func (p *objectWalker) walkObjectTree(hash plumbing.Hash) error { + // Check if we have already seen, and mark this object + if p.isSeen(hash) { + return nil + } + p.add(hash) + // Fetch the object. + obj, err := object.GetObject(p.Storer, hash) + if err != nil { + return fmt.Errorf("Getting object %s failed: %v", hash, err) + } + // Walk all children depending on object type. + switch obj := obj.(type) { + case *object.Commit: + err = p.walkObjectTree(obj.TreeHash) + if err != nil { + return err + } + for _, h := range obj.ParentHashes { + err = p.walkObjectTree(h) + if err != nil { + return err + } + } + case *object.Tree: + for i := range obj.Entries { + // Shortcut for blob objects: + // 'or' the lower bits of a mode and check that it + // it matches a filemode.Executable. The type information + // is in the higher bits, but this is the cleanest way + // to handle plain files with different modes. + // Other non-tree objects are somewhat rare, so they + // are not special-cased. + if obj.Entries[i].Mode|0755 == filemode.Executable { + p.add(obj.Entries[i].Hash) + continue + } + // Normal walk for sub-trees (and symlinks etc). + err = p.walkObjectTree(obj.Entries[i].Hash) + if err != nil { + return err + } + } + default: + // Error out on unhandled object types. + return fmt.Errorf("Unknown object %X %s %T\n", obj.ID(), obj.Type(), obj) + } + return nil +} diff --git a/plumbing/storer/object.go b/plumbing/storer/object.go index e793211..f1d19ef 100644 --- a/plumbing/storer/object.go +++ b/plumbing/storer/object.go @@ -3,6 +3,7 @@ package storer import ( "errors" "io" + "time" "gopkg.in/src-d/go-git.v4/plumbing" ) @@ -36,6 +37,9 @@ type EncodedObjectStorer interface { // // Valid plumbing.ObjectType values are CommitObject, BlobObject, TagObject, IterEncodedObjects(plumbing.ObjectType) (EncodedObjectIter, error) + // HasEncodedObject returns ErrObjNotFound if the object doesn't + // exist. If the object does exist, it returns nil. + HasEncodedObject(plumbing.Hash) error } // DeltaObjectStorer is an EncodedObjectStorer that can return delta @@ -53,6 +57,34 @@ type Transactioner interface { Begin() Transaction } +// LooseObjectStorer is an optional interface for managing "loose" +// objects, i.e. those not in packfiles. +type LooseObjectStorer interface { + // ForEachObjectHash iterates over all the (loose) object hashes + // in the repository without necessarily having to read those objects. + // Objects only inside pack files may be omitted. + // If ErrStop is sent the iteration is stop but no error is returned. + ForEachObjectHash(func(plumbing.Hash) error) error + // LooseObjectTime looks up the (m)time associated with the + // loose object (that is not in a pack file). Some + // implementations (e.g. without loose objects) + // always return an error. + LooseObjectTime(plumbing.Hash) (time.Time, error) + // DeleteLooseObject deletes a loose object if it exists. + DeleteLooseObject(plumbing.Hash) error +} + +// PackedObjectStorer is an optional interface for managing objects in +// packfiles. +type PackedObjectStorer interface { + // ObjectPacks returns hashes of object packs if the underlying + // implementation has pack files. + ObjectPacks() ([]plumbing.Hash, error) + // DeleteOldObjectPackAndIndex deletes an object pack and the corresponding index file if they exist. + // Deletion is only performed if the pack is older than the supplied time (or the time is zero). + DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error +} + // PackfileWriter is a optional method for ObjectStorer, it enable direct write // of packfile to the storage type PackfileWriter interface { diff --git a/plumbing/storer/object_test.go b/plumbing/storer/object_test.go index 6bdd25c..6b4fe0f 100644 --- a/plumbing/storer/object_test.go +++ b/plumbing/storer/object_test.go @@ -132,6 +132,15 @@ func (o *MockObjectStorage) SetEncodedObject(obj plumbing.EncodedObject) (plumbi return plumbing.ZeroHash, nil } +func (o *MockObjectStorage) HasEncodedObject(h plumbing.Hash) error { + for _, o := range o.db { + if o.Hash() == h { + return nil + } + } + return plumbing.ErrObjectNotFound +} + func (o *MockObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { for _, o := range o.db { if o.Hash() == h { diff --git a/plumbing/storer/reference.go b/plumbing/storer/reference.go index ae80a39..5e85a3b 100644 --- a/plumbing/storer/reference.go +++ b/plumbing/storer/reference.go @@ -24,6 +24,8 @@ type ReferenceStorer interface { Reference(plumbing.ReferenceName) (*plumbing.Reference, error) IterReferences() (ReferenceIter, error) RemoveReference(plumbing.ReferenceName) error + CountLooseRefs() (int, error) + PackRefs() error } // ReferenceIter is a generic closable interface for iterating over references. diff --git a/prune.go b/prune.go new file mode 100644 index 0000000..04913d6 --- /dev/null +++ b/prune.go @@ -0,0 +1,66 @@ +package git + +import ( + "errors" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +type PruneHandler func(unreferencedObjectHash plumbing.Hash) error +type PruneOptions struct { + // OnlyObjectsOlderThan if set to non-zero value + // selects only objects older than the time provided. + OnlyObjectsOlderThan time.Time + // Handler is called on matching objects + Handler PruneHandler +} + +var ErrLooseObjectsNotSupported = errors.New("Loose objects not supported") + +// DeleteObject deletes an object from a repository. +// The type conveniently matches PruneHandler. +func (r *Repository) DeleteObject(hash plumbing.Hash) error { + los, ok := r.Storer.(storer.LooseObjectStorer) + if !ok { + return ErrLooseObjectsNotSupported + } + + return los.DeleteLooseObject(hash) +} + +func (r *Repository) Prune(opt PruneOptions) error { + los, ok := r.Storer.(storer.LooseObjectStorer) + if !ok { + return ErrLooseObjectsNotSupported + } + + pw := newObjectWalker(r.Storer) + err := pw.walkAllRefs() + if err != nil { + return err + } + // Now walk all (loose) objects in storage. + return los.ForEachObjectHash(func(hash plumbing.Hash) error { + // Get out if we have seen this object. + if pw.isSeen(hash) { + return nil + } + // Otherwise it is a candidate for pruning. + // Check out for too new objects next. + if opt.OnlyObjectsOlderThan != (time.Time{}) { + // Errors here are non-fatal. The object may be e.g. packed. + // Or concurrently deleted. Skip such objects. + t, err := los.LooseObjectTime(hash) + if err != nil { + return nil + } + // Skip too new objects. + if !t.Before(opt.OnlyObjectsOlderThan) { + return nil + } + } + return opt.Handler(hash) + }) +} diff --git a/prune_test.go b/prune_test.go new file mode 100644 index 0000000..60652ec --- /dev/null +++ b/prune_test.go @@ -0,0 +1,73 @@ +package git + +import ( + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" + "gopkg.in/src-d/go-git.v4/storage" + "gopkg.in/src-d/go-git.v4/storage/filesystem" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" +) + +type PruneSuite struct { + BaseSuite +} + +var _ = Suite(&PruneSuite{}) + +func (s *PruneSuite) testPrune(c *C, deleteTime time.Time) { + srcFs := fixtures.ByTag("unpacked").One().DotGit() + var sto storage.Storer + var err error + sto, err = filesystem.NewStorage(srcFs) + c.Assert(err, IsNil) + + los := sto.(storer.LooseObjectStorer) + c.Assert(los, NotNil) + + count := 0 + err = los.ForEachObjectHash(func(_ plumbing.Hash) error { + count++ + return nil + }) + c.Assert(err, IsNil) + + r, err := Open(sto, srcFs) + c.Assert(err, IsNil) + c.Assert(r, NotNil) + + // Remove a branch so we can prune some objects. + err = sto.RemoveReference(plumbing.ReferenceName("refs/heads/v4")) + c.Assert(err, IsNil) + err = sto.RemoveReference(plumbing.ReferenceName("refs/remotes/origin/v4")) + c.Assert(err, IsNil) + + err = r.Prune(PruneOptions{ + OnlyObjectsOlderThan: deleteTime, + Handler: r.DeleteObject, + }) + c.Assert(err, IsNil) + + newCount := 0 + err = los.ForEachObjectHash(func(_ plumbing.Hash) error { + newCount++ + return nil + }) + if deleteTime.IsZero() { + c.Assert(newCount < count, Equals, true) + } else { + // Assume a delete time older than any of the objects was passed in. + c.Assert(newCount, Equals, count) + } +} + +func (s *PruneSuite) TestPrune(c *C) { + s.testPrune(c, time.Time{}) +} + +func (s *PruneSuite) TestPruneWithNoDelete(c *C) { + s.testPrune(c, time.Unix(0, 1)) +} diff --git a/repository.go b/repository.go index b159ff0..7cdc0d5 100644 --- a/repository.go +++ b/repository.go @@ -8,10 +8,12 @@ import ( "os" "path/filepath" "strings" + "time" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/internal/revision" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/object" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage" @@ -23,14 +25,15 @@ import ( ) var ( - ErrInvalidReference = errors.New("invalid reference, should be a tag or a branch") - ErrRepositoryNotExists = errors.New("repository does not exist") - ErrRepositoryAlreadyExists = errors.New("repository already exists") - ErrRemoteNotFound = errors.New("remote not found") - ErrRemoteExists = errors.New("remote already exists ") - ErrWorktreeNotProvided = errors.New("worktree should be provided") - ErrIsBareRepository = errors.New("worktree not available in a bare repository") - ErrUnableToResolveCommit = errors.New("unable to resolve commit") + ErrInvalidReference = errors.New("invalid reference, should be a tag or a branch") + ErrRepositoryNotExists = errors.New("repository does not exist") + ErrRepositoryAlreadyExists = errors.New("repository already exists") + ErrRemoteNotFound = errors.New("remote not found") + ErrRemoteExists = errors.New("remote already exists ") + ErrWorktreeNotProvided = errors.New("worktree should be provided") + ErrIsBareRepository = errors.New("worktree not available in a bare repository") + ErrUnableToResolveCommit = errors.New("unable to resolve commit") + ErrPackedObjectsNotSupported = errors.New("Packed objects not supported") ) // Repository represents a git repository @@ -1011,3 +1014,96 @@ func (r *Repository) ResolveRevision(rev plumbing.Revision) (*plumbing.Hash, err return &commit.Hash, nil } + +type RepackConfig struct { + // UseRefDeltas configures whether packfile encoder will use reference deltas. + // By default OFSDeltaObject is used. + UseRefDeltas bool + // OnlyDeletePacksOlderThan if set to non-zero value + // selects only objects older than the time provided. + OnlyDeletePacksOlderThan time.Time +} + +func (r *Repository) RepackObjects(cfg *RepackConfig) (err error) { + pos, ok := r.Storer.(storer.PackedObjectStorer) + if !ok { + return ErrPackedObjectsNotSupported + } + + // Get the existing object packs. + hs, err := pos.ObjectPacks() + if err != nil { + return err + } + + // Create a new pack. + nh, err := r.createNewObjectPack(cfg) + if err != nil { + return err + } + + // Delete old packs. + for _, h := range hs { + // Skip if new hash is the same as an old one. + if h == nh { + continue + } + err = pos.DeleteOldObjectPackAndIndex(h, cfg.OnlyDeletePacksOlderThan) + if err != nil { + return err + } + } + + return nil +} + +// createNewObjectPack is a helper for RepackObjects taking care +// of creating a new pack. It is used so the the PackfileWriter +// deferred close has the right scope. +func (r *Repository) createNewObjectPack(cfg *RepackConfig) (h plumbing.Hash, err error) { + ow := newObjectWalker(r.Storer) + err = ow.walkAllRefs() + if err != nil { + return h, err + } + objs := make([]plumbing.Hash, 0, len(ow.seen)) + for h := range ow.seen { + objs = append(objs, h) + } + pfw, ok := r.Storer.(storer.PackfileWriter) + if !ok { + return h, fmt.Errorf("Repository storer is not a storer.PackfileWriter") + } + wc, err := pfw.PackfileWriter() + if err != nil { + return h, err + } + defer ioutil.CheckClose(wc, &err) + scfg, err := r.Storer.Config() + if err != nil { + return h, err + } + enc := packfile.NewEncoder(wc, r.Storer, cfg.UseRefDeltas) + h, err = enc.Encode(objs, scfg.Pack.Window) + if err != nil { + return h, err + } + + // Delete the packed, loose objects. + if los, ok := r.Storer.(storer.LooseObjectStorer); ok { + err = los.ForEachObjectHash(func(hash plumbing.Hash) error { + if ow.isSeen(hash) { + err := los.DeleteLooseObject(hash) + if err != nil { + return err + } + } + return nil + }) + if err != nil { + return h, err + } + } + + return h, err +} diff --git a/repository_test.go b/repository_test.go index 9d82651..2ebc597 100644 --- a/repository_test.go +++ b/repository_test.go @@ -10,10 +10,13 @@ import ( "os/exec" "path/filepath" "strings" + "time" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" + "gopkg.in/src-d/go-git.v4/storage" "gopkg.in/src-d/go-git.v4/storage/filesystem" "gopkg.in/src-d/go-git.v4/storage/memory" @@ -1313,6 +1316,64 @@ func (s *RepositorySuite) TestResolveRevisionWithErrors(c *C) { } } +func (s *RepositorySuite) testRepackObjects( + c *C, deleteTime time.Time, expectedPacks int) { + srcFs := fixtures.ByTag("unpacked").One().DotGit() + var sto storage.Storer + var err error + sto, err = filesystem.NewStorage(srcFs) + c.Assert(err, IsNil) + + los := sto.(storer.LooseObjectStorer) + c.Assert(los, NotNil) + + numLooseStart := 0 + err = los.ForEachObjectHash(func(_ plumbing.Hash) error { + numLooseStart++ + return nil + }) + c.Assert(err, IsNil) + c.Assert(numLooseStart > 0, Equals, true) + + pos := sto.(storer.PackedObjectStorer) + c.Assert(los, NotNil) + + packs, err := pos.ObjectPacks() + c.Assert(err, IsNil) + numPacksStart := len(packs) + c.Assert(numPacksStart > 1, Equals, true) + + r, err := Open(sto, srcFs) + c.Assert(err, IsNil) + c.Assert(r, NotNil) + + err = r.RepackObjects(&RepackConfig{ + OnlyDeletePacksOlderThan: deleteTime, + }) + c.Assert(err, IsNil) + + numLooseEnd := 0 + err = los.ForEachObjectHash(func(_ plumbing.Hash) error { + numLooseEnd++ + return nil + }) + c.Assert(err, IsNil) + c.Assert(numLooseEnd, Equals, 0) + + packs, err = pos.ObjectPacks() + c.Assert(err, IsNil) + numPacksEnd := len(packs) + c.Assert(numPacksEnd, Equals, expectedPacks) +} + +func (s *RepositorySuite) TestRepackObjects(c *C) { + s.testRepackObjects(c, time.Time{}, 1) +} + +func (s *RepositorySuite) TestRepackObjectsWithNoDelete(c *C) { + s.testRepackObjects(c, time.Unix(0, 1), 3) +} + func ExecuteOnPath(c *C, path string, cmds ...string) error { for _, cmd := range cmds { err := executeOnPath(path, cmd) diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 1cb97bd..5e23e66 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -9,6 +9,7 @@ import ( stdioutil "io/ioutil" "os" "strings" + "time" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/utils/ioutil" @@ -166,11 +167,12 @@ func (d *DotGit) ObjectPacks() ([]plumbing.Hash, error) { return packs, nil } -// ObjectPack returns a fs.File of the given packfile -func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) { - file := d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.pack", hash.String())) +func (d *DotGit) objectPackPath(hash plumbing.Hash, extension string) string { + return d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.%s", hash.String(), extension)) +} - pack, err := d.fs.Open(file) +func (d *DotGit) objectPackOpen(hash plumbing.Hash, extension string) (billy.File, error) { + pack, err := d.fs.Open(d.objectPackPath(hash, extension)) if err != nil { if os.IsNotExist(err) { return nil, ErrPackfileNotFound @@ -182,19 +184,37 @@ func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) { return pack, nil } +// ObjectPack returns a fs.File of the given packfile +func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) { + return d.objectPackOpen(hash, `pack`) +} + // ObjectPackIdx returns a fs.File of the index file for a given packfile func (d *DotGit) ObjectPackIdx(hash plumbing.Hash) (billy.File, error) { - file := d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.idx", hash.String())) - idx, err := d.fs.Open(file) - if err != nil { - if os.IsNotExist(err) { - return nil, ErrPackfileNotFound - } + return d.objectPackOpen(hash, `idx`) +} - return nil, err +func (d *DotGit) DeleteOldObjectPackAndIndex(hash plumbing.Hash, t time.Time) error { + path := d.objectPackPath(hash, `pack`) + if !t.IsZero() { + fi, err := d.fs.Stat(path) + if err != nil { + return err + } + // too new, skip deletion. + if !fi.ModTime().Before(t) { + return nil + } } - - return idx, nil + err := d.fs.Remove(path) + if err != nil { + return err + } + err = d.fs.Remove(d.objectPackPath(hash, `idx`)) + if err != nil { + return err + } + return nil } // NewObject return a writer for a new object file. @@ -205,39 +225,67 @@ func (d *DotGit) NewObject() (*ObjectWriter, error) { // Objects returns a slice with the hashes of objects found under the // .git/objects/ directory. func (d *DotGit) Objects() ([]plumbing.Hash, error) { + var objects []plumbing.Hash + err := d.ForEachObjectHash(func(hash plumbing.Hash) error { + objects = append(objects, hash) + return nil + }) + if err != nil { + return nil, err + } + return objects, nil +} + +// Objects returns a slice with the hashes of objects found under the +// .git/objects/ directory. +func (d *DotGit) ForEachObjectHash(fun func(plumbing.Hash) error) error { files, err := d.fs.ReadDir(objectsPath) if err != nil { if os.IsNotExist(err) { - return nil, nil + return nil } - return nil, err + return err } - var objects []plumbing.Hash for _, f := range files { if f.IsDir() && len(f.Name()) == 2 && isHex(f.Name()) { base := f.Name() d, err := d.fs.ReadDir(d.fs.Join(objectsPath, base)) if err != nil { - return nil, err + return err } for _, o := range d { - objects = append(objects, plumbing.NewHash(base+o.Name())) + err = fun(plumbing.NewHash(base + o.Name())) + if err != nil { + return err + } } } } - return objects, nil + return nil } -// Object return a fs.File pointing the object file, if exists -func (d *DotGit) Object(h plumbing.Hash) (billy.File, error) { +func (d *DotGit) objectPath(h plumbing.Hash) string { hash := h.String() - file := d.fs.Join(objectsPath, hash[0:2], hash[2:40]) + return d.fs.Join(objectsPath, hash[0:2], hash[2:40]) +} + +// Object returns a fs.File pointing the object file, if exists +func (d *DotGit) Object(h plumbing.Hash) (billy.File, error) { + return d.fs.Open(d.objectPath(h)) +} + +// ObjectStat returns a os.FileInfo pointing the object file, if exists +func (d *DotGit) ObjectStat(h plumbing.Hash) (os.FileInfo, error) { + return d.fs.Stat(d.objectPath(h)) +} - return d.fs.Open(file) +// ObjectDelete removes the object file, if exists +func (d *DotGit) ObjectDelete(h plumbing.Hash) error { + return d.fs.Remove(d.objectPath(h)) } func (d *DotGit) readReferenceFrom(rd io.Reader, name string) (ref *plumbing.Reference, err error) { @@ -339,17 +387,7 @@ func (d *DotGit) Ref(name plumbing.ReferenceName) (*plumbing.Reference, error) { return d.packedRef(name) } -func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) { - f, err := d.fs.Open(packedRefsPath) - if err != nil { - if os.IsNotExist(err) { - return nil, nil - } - return nil, err - } - - defer ioutil.CheckClose(f, &err) - +func (d *DotGit) findPackedRefsInFile(f billy.File) ([]*plumbing.Reference, error) { s := bufio.NewScanner(f) var refs []*plumbing.Reference for s.Scan() { @@ -366,6 +404,19 @@ func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) { return refs, s.Err() } +func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) { + f, err := d.fs.Open(packedRefsPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + + defer ioutil.CheckClose(f, &err) + return d.findPackedRefsInFile(f) +} + func (d *DotGit) packedRef(name plumbing.ReferenceName) (*plumbing.Reference, error) { refs, err := d.findPackedRefs() if err != nil { @@ -412,26 +463,82 @@ func (d *DotGit) addRefsFromPackedRefs(refs *[]*plumbing.Reference, seen map[plu return nil } -func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err error) { - f, err := d.fs.Open(packedRefsPath) +func (d *DotGit) addRefsFromPackedRefsFile(refs *[]*plumbing.Reference, f billy.File, seen map[plumbing.ReferenceName]bool) (err error) { + packedRefs, err := d.findPackedRefsInFile(f) if err != nil { - if os.IsNotExist(err) { - return nil - } - return err } - doCloseF := true + + for _, ref := range packedRefs { + if !seen[ref.Name()] { + *refs = append(*refs, ref) + seen[ref.Name()] = true + } + } + return nil +} + +func (d *DotGit) openAndLockPackedRefs(doCreate bool) ( + pr billy.File, err error) { + var f billy.File defer func() { - if doCloseF { + if err != nil && f != nil { ioutil.CheckClose(f, &err) } }() - err = f.Lock() + openFlags := os.O_RDWR + if doCreate { + openFlags |= os.O_CREATE + } + + // Keep trying to open and lock the file until we're sure the file + // didn't change between the open and the lock. + for { + f, err = d.fs.OpenFile(packedRefsPath, openFlags, 0600) + if err != nil { + if os.IsNotExist(err) && !doCreate { + return nil, nil + } + + return nil, err + } + fi, err := d.fs.Stat(packedRefsPath) + if err != nil { + return nil, err + } + mtime := fi.ModTime() + + err = f.Lock() + if err != nil { + return nil, err + } + + fi, err = d.fs.Stat(packedRefsPath) + if err != nil { + return nil, err + } + if mtime == fi.ModTime() { + break + } + // The file has changed since we opened it. Close and retry. + err = f.Close() + if err != nil { + return nil, err + } + } + return f, nil +} + +func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err error) { + pr, err := d.openAndLockPackedRefs(false) if err != nil { return err } + if pr == nil { + return nil + } + defer ioutil.CheckClose(pr, &err) // Creating the temp file in the same directory as the target file // improves our chances for rename operation to be atomic. @@ -439,14 +546,13 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e if err != nil { return err } - doCloseTmp := true + tmpName := tmp.Name() defer func() { - if doCloseTmp { - ioutil.CheckClose(tmp, &err) - } + ioutil.CheckClose(tmp, &err) + _ = d.fs.Remove(tmpName) // don't check err, we might have renamed it }() - s := bufio.NewScanner(f) + s := bufio.NewScanner(pr) found := false for s.Scan() { line := s.Text() @@ -470,26 +576,10 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e } if !found { - doCloseTmp = false - ioutil.CheckClose(tmp, &err) - if err != nil { - return err - } - // Delete the temp file if nothing needed to be removed. - return d.fs.Remove(tmp.Name()) - } - - doCloseF = false - if err := f.Close(); err != nil { - return err - } - - doCloseTmp = false - if err := tmp.Close(); err != nil { - return err + return nil } - return d.fs.Rename(tmp.Name(), packedRefsPath) + return d.rewritePackedRefsWhileLocked(tmp, pr) } // process lines from a packed-refs file @@ -576,6 +666,96 @@ func (d *DotGit) readReferenceFile(path, name string) (ref *plumbing.Reference, return d.readReferenceFrom(f, name) } +func (d *DotGit) CountLooseRefs() (int, error) { + var refs []*plumbing.Reference + var seen = make(map[plumbing.ReferenceName]bool) + if err := d.addRefsFromRefDir(&refs, seen); err != nil { + return 0, err + } + + return len(refs), nil +} + +// PackRefs packs all loose refs into the packed-refs file. +// +// This implementation only works under the assumption that the view +// of the file system won't be updated during this operation. This +// strategy would not work on a general file system though, without +// locking each loose reference and checking it again before deleting +// the file, because otherwise an updated reference could sneak in and +// then be deleted by the packed-refs process. Alternatively, every +// ref update could also lock packed-refs, so only one lock is +// required during ref-packing. But that would worsen performance in +// the common case. +// +// TODO: add an "all" boolean like the `git pack-refs --all` flag. +// When `all` is false, it would only pack refs that have already been +// packed, plus all tags. +func (d *DotGit) PackRefs() (err error) { + // Lock packed-refs, and create it if it doesn't exist yet. + f, err := d.openAndLockPackedRefs(true) + if err != nil { + return err + } + defer ioutil.CheckClose(f, &err) + + // Gather all refs using addRefsFromRefDir and addRefsFromPackedRefs. + var refs []*plumbing.Reference + seen := make(map[plumbing.ReferenceName]bool) + if err := d.addRefsFromRefDir(&refs, seen); err != nil { + return err + } + if len(refs) == 0 { + // Nothing to do! + return nil + } + numLooseRefs := len(refs) + if err := d.addRefsFromPackedRefsFile(&refs, f, seen); err != nil { + return err + } + + // Write them all to a new temp packed-refs file. + tmp, err := d.fs.TempFile("", tmpPackedRefsPrefix) + if err != nil { + return err + } + tmpName := tmp.Name() + defer func() { + ioutil.CheckClose(tmp, &err) + _ = d.fs.Remove(tmpName) // don't check err, we might have renamed it + }() + + w := bufio.NewWriter(tmp) + for _, ref := range refs { + _, err := w.WriteString(ref.String() + "\n") + if err != nil { + return err + } + } + err = w.Flush() + if err != nil { + return err + } + + // Rename the temp packed-refs file. + err = d.rewritePackedRefsWhileLocked(tmp, f) + if err != nil { + return err + } + + // Delete all the loose refs, while still holding the packed-refs + // lock. + for _, ref := range refs[:numLooseRefs] { + path := d.fs.Join(".", ref.Name().String()) + err = d.fs.Remove(path) + if err != nil && !os.IsNotExist(err) { + return err + } + } + + return nil +} + // Module return a billy.Filesystem poiting to the module folder func (d *DotGit) Module(name string) (billy.Filesystem, error) { return d.fs.Chroot(d.fs.Join(modulePath, name)) diff --git a/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go new file mode 100644 index 0000000..af96196 --- /dev/null +++ b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go @@ -0,0 +1,11 @@ +// +build !windows + +package dotgit + +import "gopkg.in/src-d/go-billy.v4" + +func (d *DotGit) rewritePackedRefsWhileLocked( + tmp billy.File, pr billy.File) error { + // On non-Windows platforms, we can have atomic rename. + return d.fs.Rename(tmp.Name(), pr.Name()) +} diff --git a/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go new file mode 100644 index 0000000..bcdb93e --- /dev/null +++ b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go @@ -0,0 +1,39 @@ +// +build windows + +package dotgit + +import ( + "io" + + "gopkg.in/src-d/go-billy.v4" +) + +func (d *DotGit) rewritePackedRefsWhileLocked( + tmp billy.File, pr billy.File) error { + // If we aren't using the bare Windows filesystem as the storage + // layer, we might be able to get away with a rename over a locked + // file. + err := d.fs.Rename(tmp.Name(), pr.Name()) + if err == nil { + return nil + } + + // Otherwise, Windows doesn't let us rename over a locked file, so + // we have to do a straight copy. Unfortunately this could result + // in a partially-written file if the process fails before the + // copy completes. + _, err = pr.Seek(0, io.SeekStart) + if err != nil { + return err + } + err = pr.Truncate(0) + if err != nil { + return err + } + _, err = tmp.Seek(0, io.SeekStart) + if err != nil { + return err + } + _, err = io.Copy(pr, tmp) + return err +} diff --git a/storage/filesystem/internal/dotgit/dotgit_test.go b/storage/filesystem/internal/dotgit/dotgit_test.go index 446a204..0ed9ec5 100644 --- a/storage/filesystem/internal/dotgit/dotgit_test.go +++ b/storage/filesystem/internal/dotgit/dotgit_test.go @@ -543,3 +543,75 @@ func (s *SuiteDotGit) TestSubmodules(c *C) { c.Assert(err, IsNil) c.Assert(strings.HasSuffix(m.Root(), m.Join(".git", "modules", "basic")), Equals, true) } + +func (s *SuiteDotGit) TestPackRefs(c *C) { + tmp, err := ioutil.TempDir("", "dot-git") + c.Assert(err, IsNil) + defer os.RemoveAll(tmp) + + fs := osfs.New(tmp) + dir := New(fs) + + err = dir.SetRef(plumbing.NewReferenceFromStrings( + "refs/heads/foo", + "e8d3ffab552895c19b9fcf7aa264d277cde33881", + ), nil) + c.Assert(err, IsNil) + err = dir.SetRef(plumbing.NewReferenceFromStrings( + "refs/heads/bar", + "a8d3ffab552895c19b9fcf7aa264d277cde33881", + ), nil) + c.Assert(err, IsNil) + + refs, err := dir.Refs() + c.Assert(err, IsNil) + c.Assert(refs, HasLen, 2) + looseCount, err := dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 2) + + err = dir.PackRefs() + c.Assert(err, IsNil) + + // Make sure the refs are still there, but no longer loose. + refs, err = dir.Refs() + c.Assert(err, IsNil) + c.Assert(refs, HasLen, 2) + looseCount, err = dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 0) + + ref, err := dir.Ref("refs/heads/foo") + c.Assert(err, IsNil) + c.Assert(ref, NotNil) + c.Assert(ref.Hash().String(), Equals, "e8d3ffab552895c19b9fcf7aa264d277cde33881") + ref, err = dir.Ref("refs/heads/bar") + c.Assert(err, IsNil) + c.Assert(ref, NotNil) + c.Assert(ref.Hash().String(), Equals, "a8d3ffab552895c19b9fcf7aa264d277cde33881") + + // Now update one of them, re-pack, and check again. + err = dir.SetRef(plumbing.NewReferenceFromStrings( + "refs/heads/foo", + "b8d3ffab552895c19b9fcf7aa264d277cde33881", + ), nil) + c.Assert(err, IsNil) + looseCount, err = dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 1) + err = dir.PackRefs() + c.Assert(err, IsNil) + + // Make sure the refs are still there, but no longer loose. + refs, err = dir.Refs() + c.Assert(err, IsNil) + c.Assert(refs, HasLen, 2) + looseCount, err = dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 0) + + ref, err = dir.Ref("refs/heads/foo") + c.Assert(err, IsNil) + c.Assert(ref, NotNil) + c.Assert(ref.Hash().String(), Equals, "b8d3ffab552895c19b9fcf7aa264d277cde33881") +} diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 9690c0e..6ca67cc 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -3,6 +3,7 @@ package filesystem import ( "io" "os" + "time" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" @@ -125,6 +126,32 @@ func (s *ObjectStorage) SetEncodedObject(o plumbing.EncodedObject) (plumbing.Has return o.Hash(), err } +// HasEncodedObject returns nil if the object exists, without actually +// reading the object data from storage. +func (s *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) { + // Check unpacked objects + f, err := s.dir.Object(h) + if err != nil { + if !os.IsNotExist(err) { + return err + } + // Fall through to check packed objects. + } else { + defer ioutil.CheckClose(f, &err) + return nil + } + + // Check packed objects. + if err := s.requireIndex(); err != nil { + return err + } + _, _, offset := s.findObjectInPackfile(h) + if offset == -1 { + return plumbing.ErrObjectNotFound + } + return nil +} + // EncodedObject returns the object with the given hash, by searching for it in // the packfile and the git object directories. func (s *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { @@ -479,3 +506,31 @@ func hashListAsMap(l []plumbing.Hash) map[plumbing.Hash]bool { return m } + +func (s *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { + err := s.dir.ForEachObjectHash(fun) + if err == storer.ErrStop { + return nil + } + return err +} + +func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) { + fi, err := s.dir.ObjectStat(hash) + if err != nil { + return time.Time{}, err + } + return fi.ModTime(), nil +} + +func (s *ObjectStorage) DeleteLooseObject(hash plumbing.Hash) error { + return s.dir.ObjectDelete(hash) +} + +func (s *ObjectStorage) ObjectPacks() ([]plumbing.Hash, error) { + return s.dir.ObjectPacks() +} + +func (s *ObjectStorage) DeleteOldObjectPackAndIndex(h plumbing.Hash, t time.Time) error { + return s.dir.DeleteOldObjectPackAndIndex(h, t) +} diff --git a/storage/filesystem/reference.go b/storage/filesystem/reference.go index 54cdf56..7313f05 100644 --- a/storage/filesystem/reference.go +++ b/storage/filesystem/reference.go @@ -34,3 +34,11 @@ func (r *ReferenceStorage) IterReferences() (storer.ReferenceIter, error) { func (r *ReferenceStorage) RemoveReference(n plumbing.ReferenceName) error { return r.dir.RemoveRef(n) } + +func (r *ReferenceStorage) CountLooseRefs() (int, error) { + return r.dir.CountLooseRefs() +} + +func (r *ReferenceStorage) PackRefs() error { + return r.dir.PackRefs() +} diff --git a/storage/memory/storage.go b/storage/memory/storage.go index 927ec41..0f66f1e 100644 --- a/storage/memory/storage.go +++ b/storage/memory/storage.go @@ -3,6 +3,7 @@ package memory import ( "fmt" + "time" "gopkg.in/src-d/go-git.v4/config" "gopkg.in/src-d/go-git.v4/plumbing" @@ -114,6 +115,13 @@ func (o *ObjectStorage) SetEncodedObject(obj plumbing.EncodedObject) (plumbing.H return h, nil } +func (o *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) { + if _, ok := o.Objects[h]; !ok { + return plumbing.ErrObjectNotFound + } + return nil +} + func (o *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { obj, ok := o.Objects[h] if !ok || (plumbing.AnyObject != t && obj.Type() != t) { @@ -156,6 +164,35 @@ func (o *ObjectStorage) Begin() storer.Transaction { } } +func (o *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { + for h, _ := range o.Objects { + err := fun(h) + if err != nil { + if err == storer.ErrStop { + return nil + } + return err + } + } + return nil +} + +func (o *ObjectStorage) ObjectPacks() ([]plumbing.Hash, error) { + return nil, nil +} +func (o *ObjectStorage) DeleteOldObjectPackAndIndex(plumbing.Hash, time.Time) error { + return nil +} + +var errNotSupported = fmt.Errorf("Not supported") + +func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) { + return time.Time{}, errNotSupported +} +func (s *ObjectStorage) DeleteLooseObject(plumbing.Hash) error { + return errNotSupported +} + type TxObjectStorage struct { Storage *ObjectStorage Objects map[plumbing.Hash]plumbing.EncodedObject @@ -236,6 +273,14 @@ func (r ReferenceStorage) IterReferences() (storer.ReferenceIter, error) { return storer.NewReferenceSliceIter(refs), nil } +func (r ReferenceStorage) CountLooseRefs() (int, error) { + return len(r), nil +} + +func (r ReferenceStorage) PackRefs() error { + return nil +} + func (r ReferenceStorage) RemoveReference(n plumbing.ReferenceName) error { delete(r, n) return nil |