diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2017-12-01 00:45:11 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-12-01 00:45:11 +0100 |
commit | b0f6b4786b58b4add6e54d354311fc1084764b36 (patch) | |
tree | ebc3ad2f840da271aeac9cf33cc3912feb9f9e12 /storage/filesystem | |
parent | 174fd8e5b2150dbd4cf522bb4a98fb9d79ebc6f4 (diff) | |
parent | d53264806f0d5ddef259f45f4490a19398a102ba (diff) | |
download | go-git-b0f6b4786b58b4add6e54d354311fc1084764b36.tar.gz |
Merge pull request #669 from keybase/strib/gh-gc
storage/repository: add new functions for garbage collection
Diffstat (limited to 'storage/filesystem')
-rw-r--r-- | storage/filesystem/internal/dotgit/dotgit.go | 312 | ||||
-rw-r--r-- | storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go | 11 | ||||
-rw-r--r-- | storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go | 39 | ||||
-rw-r--r-- | storage/filesystem/internal/dotgit/dotgit_test.go | 72 | ||||
-rw-r--r-- | storage/filesystem/object.go | 55 | ||||
-rw-r--r-- | storage/filesystem/reference.go | 8 |
6 files changed, 431 insertions, 66 deletions
diff --git a/storage/filesystem/internal/dotgit/dotgit.go b/storage/filesystem/internal/dotgit/dotgit.go index 1cb97bd..5e23e66 100644 --- a/storage/filesystem/internal/dotgit/dotgit.go +++ b/storage/filesystem/internal/dotgit/dotgit.go @@ -9,6 +9,7 @@ import ( stdioutil "io/ioutil" "os" "strings" + "time" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/utils/ioutil" @@ -166,11 +167,12 @@ func (d *DotGit) ObjectPacks() ([]plumbing.Hash, error) { return packs, nil } -// ObjectPack returns a fs.File of the given packfile -func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) { - file := d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.pack", hash.String())) +func (d *DotGit) objectPackPath(hash plumbing.Hash, extension string) string { + return d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.%s", hash.String(), extension)) +} - pack, err := d.fs.Open(file) +func (d *DotGit) objectPackOpen(hash plumbing.Hash, extension string) (billy.File, error) { + pack, err := d.fs.Open(d.objectPackPath(hash, extension)) if err != nil { if os.IsNotExist(err) { return nil, ErrPackfileNotFound @@ -182,19 +184,37 @@ func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) { return pack, nil } +// ObjectPack returns a fs.File of the given packfile +func (d *DotGit) ObjectPack(hash plumbing.Hash) (billy.File, error) { + return d.objectPackOpen(hash, `pack`) +} + // ObjectPackIdx returns a fs.File of the index file for a given packfile func (d *DotGit) ObjectPackIdx(hash plumbing.Hash) (billy.File, error) { - file := d.fs.Join(objectsPath, packPath, fmt.Sprintf("pack-%s.idx", hash.String())) - idx, err := d.fs.Open(file) - if err != nil { - if os.IsNotExist(err) { - return nil, ErrPackfileNotFound - } + return d.objectPackOpen(hash, `idx`) +} - return nil, err +func (d *DotGit) DeleteOldObjectPackAndIndex(hash plumbing.Hash, t time.Time) error { + path := d.objectPackPath(hash, `pack`) + if !t.IsZero() { + fi, err := d.fs.Stat(path) + if err != nil { + return err + } + // too new, skip deletion. + if !fi.ModTime().Before(t) { + return nil + } } - - return idx, nil + err := d.fs.Remove(path) + if err != nil { + return err + } + err = d.fs.Remove(d.objectPackPath(hash, `idx`)) + if err != nil { + return err + } + return nil } // NewObject return a writer for a new object file. @@ -205,39 +225,67 @@ func (d *DotGit) NewObject() (*ObjectWriter, error) { // Objects returns a slice with the hashes of objects found under the // .git/objects/ directory. func (d *DotGit) Objects() ([]plumbing.Hash, error) { + var objects []plumbing.Hash + err := d.ForEachObjectHash(func(hash plumbing.Hash) error { + objects = append(objects, hash) + return nil + }) + if err != nil { + return nil, err + } + return objects, nil +} + +// Objects returns a slice with the hashes of objects found under the +// .git/objects/ directory. +func (d *DotGit) ForEachObjectHash(fun func(plumbing.Hash) error) error { files, err := d.fs.ReadDir(objectsPath) if err != nil { if os.IsNotExist(err) { - return nil, nil + return nil } - return nil, err + return err } - var objects []plumbing.Hash for _, f := range files { if f.IsDir() && len(f.Name()) == 2 && isHex(f.Name()) { base := f.Name() d, err := d.fs.ReadDir(d.fs.Join(objectsPath, base)) if err != nil { - return nil, err + return err } for _, o := range d { - objects = append(objects, plumbing.NewHash(base+o.Name())) + err = fun(plumbing.NewHash(base + o.Name())) + if err != nil { + return err + } } } } - return objects, nil + return nil } -// Object return a fs.File pointing the object file, if exists -func (d *DotGit) Object(h plumbing.Hash) (billy.File, error) { +func (d *DotGit) objectPath(h plumbing.Hash) string { hash := h.String() - file := d.fs.Join(objectsPath, hash[0:2], hash[2:40]) + return d.fs.Join(objectsPath, hash[0:2], hash[2:40]) +} + +// Object returns a fs.File pointing the object file, if exists +func (d *DotGit) Object(h plumbing.Hash) (billy.File, error) { + return d.fs.Open(d.objectPath(h)) +} + +// ObjectStat returns a os.FileInfo pointing the object file, if exists +func (d *DotGit) ObjectStat(h plumbing.Hash) (os.FileInfo, error) { + return d.fs.Stat(d.objectPath(h)) +} - return d.fs.Open(file) +// ObjectDelete removes the object file, if exists +func (d *DotGit) ObjectDelete(h plumbing.Hash) error { + return d.fs.Remove(d.objectPath(h)) } func (d *DotGit) readReferenceFrom(rd io.Reader, name string) (ref *plumbing.Reference, err error) { @@ -339,17 +387,7 @@ func (d *DotGit) Ref(name plumbing.ReferenceName) (*plumbing.Reference, error) { return d.packedRef(name) } -func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) { - f, err := d.fs.Open(packedRefsPath) - if err != nil { - if os.IsNotExist(err) { - return nil, nil - } - return nil, err - } - - defer ioutil.CheckClose(f, &err) - +func (d *DotGit) findPackedRefsInFile(f billy.File) ([]*plumbing.Reference, error) { s := bufio.NewScanner(f) var refs []*plumbing.Reference for s.Scan() { @@ -366,6 +404,19 @@ func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) { return refs, s.Err() } +func (d *DotGit) findPackedRefs() ([]*plumbing.Reference, error) { + f, err := d.fs.Open(packedRefsPath) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, err + } + + defer ioutil.CheckClose(f, &err) + return d.findPackedRefsInFile(f) +} + func (d *DotGit) packedRef(name plumbing.ReferenceName) (*plumbing.Reference, error) { refs, err := d.findPackedRefs() if err != nil { @@ -412,26 +463,82 @@ func (d *DotGit) addRefsFromPackedRefs(refs *[]*plumbing.Reference, seen map[plu return nil } -func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err error) { - f, err := d.fs.Open(packedRefsPath) +func (d *DotGit) addRefsFromPackedRefsFile(refs *[]*plumbing.Reference, f billy.File, seen map[plumbing.ReferenceName]bool) (err error) { + packedRefs, err := d.findPackedRefsInFile(f) if err != nil { - if os.IsNotExist(err) { - return nil - } - return err } - doCloseF := true + + for _, ref := range packedRefs { + if !seen[ref.Name()] { + *refs = append(*refs, ref) + seen[ref.Name()] = true + } + } + return nil +} + +func (d *DotGit) openAndLockPackedRefs(doCreate bool) ( + pr billy.File, err error) { + var f billy.File defer func() { - if doCloseF { + if err != nil && f != nil { ioutil.CheckClose(f, &err) } }() - err = f.Lock() + openFlags := os.O_RDWR + if doCreate { + openFlags |= os.O_CREATE + } + + // Keep trying to open and lock the file until we're sure the file + // didn't change between the open and the lock. + for { + f, err = d.fs.OpenFile(packedRefsPath, openFlags, 0600) + if err != nil { + if os.IsNotExist(err) && !doCreate { + return nil, nil + } + + return nil, err + } + fi, err := d.fs.Stat(packedRefsPath) + if err != nil { + return nil, err + } + mtime := fi.ModTime() + + err = f.Lock() + if err != nil { + return nil, err + } + + fi, err = d.fs.Stat(packedRefsPath) + if err != nil { + return nil, err + } + if mtime == fi.ModTime() { + break + } + // The file has changed since we opened it. Close and retry. + err = f.Close() + if err != nil { + return nil, err + } + } + return f, nil +} + +func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err error) { + pr, err := d.openAndLockPackedRefs(false) if err != nil { return err } + if pr == nil { + return nil + } + defer ioutil.CheckClose(pr, &err) // Creating the temp file in the same directory as the target file // improves our chances for rename operation to be atomic. @@ -439,14 +546,13 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e if err != nil { return err } - doCloseTmp := true + tmpName := tmp.Name() defer func() { - if doCloseTmp { - ioutil.CheckClose(tmp, &err) - } + ioutil.CheckClose(tmp, &err) + _ = d.fs.Remove(tmpName) // don't check err, we might have renamed it }() - s := bufio.NewScanner(f) + s := bufio.NewScanner(pr) found := false for s.Scan() { line := s.Text() @@ -470,26 +576,10 @@ func (d *DotGit) rewritePackedRefsWithoutRef(name plumbing.ReferenceName) (err e } if !found { - doCloseTmp = false - ioutil.CheckClose(tmp, &err) - if err != nil { - return err - } - // Delete the temp file if nothing needed to be removed. - return d.fs.Remove(tmp.Name()) - } - - doCloseF = false - if err := f.Close(); err != nil { - return err - } - - doCloseTmp = false - if err := tmp.Close(); err != nil { - return err + return nil } - return d.fs.Rename(tmp.Name(), packedRefsPath) + return d.rewritePackedRefsWhileLocked(tmp, pr) } // process lines from a packed-refs file @@ -576,6 +666,96 @@ func (d *DotGit) readReferenceFile(path, name string) (ref *plumbing.Reference, return d.readReferenceFrom(f, name) } +func (d *DotGit) CountLooseRefs() (int, error) { + var refs []*plumbing.Reference + var seen = make(map[plumbing.ReferenceName]bool) + if err := d.addRefsFromRefDir(&refs, seen); err != nil { + return 0, err + } + + return len(refs), nil +} + +// PackRefs packs all loose refs into the packed-refs file. +// +// This implementation only works under the assumption that the view +// of the file system won't be updated during this operation. This +// strategy would not work on a general file system though, without +// locking each loose reference and checking it again before deleting +// the file, because otherwise an updated reference could sneak in and +// then be deleted by the packed-refs process. Alternatively, every +// ref update could also lock packed-refs, so only one lock is +// required during ref-packing. But that would worsen performance in +// the common case. +// +// TODO: add an "all" boolean like the `git pack-refs --all` flag. +// When `all` is false, it would only pack refs that have already been +// packed, plus all tags. +func (d *DotGit) PackRefs() (err error) { + // Lock packed-refs, and create it if it doesn't exist yet. + f, err := d.openAndLockPackedRefs(true) + if err != nil { + return err + } + defer ioutil.CheckClose(f, &err) + + // Gather all refs using addRefsFromRefDir and addRefsFromPackedRefs. + var refs []*plumbing.Reference + seen := make(map[plumbing.ReferenceName]bool) + if err := d.addRefsFromRefDir(&refs, seen); err != nil { + return err + } + if len(refs) == 0 { + // Nothing to do! + return nil + } + numLooseRefs := len(refs) + if err := d.addRefsFromPackedRefsFile(&refs, f, seen); err != nil { + return err + } + + // Write them all to a new temp packed-refs file. + tmp, err := d.fs.TempFile("", tmpPackedRefsPrefix) + if err != nil { + return err + } + tmpName := tmp.Name() + defer func() { + ioutil.CheckClose(tmp, &err) + _ = d.fs.Remove(tmpName) // don't check err, we might have renamed it + }() + + w := bufio.NewWriter(tmp) + for _, ref := range refs { + _, err := w.WriteString(ref.String() + "\n") + if err != nil { + return err + } + } + err = w.Flush() + if err != nil { + return err + } + + // Rename the temp packed-refs file. + err = d.rewritePackedRefsWhileLocked(tmp, f) + if err != nil { + return err + } + + // Delete all the loose refs, while still holding the packed-refs + // lock. + for _, ref := range refs[:numLooseRefs] { + path := d.fs.Join(".", ref.Name().String()) + err = d.fs.Remove(path) + if err != nil && !os.IsNotExist(err) { + return err + } + } + + return nil +} + // Module return a billy.Filesystem poiting to the module folder func (d *DotGit) Module(name string) (billy.Filesystem, error) { return d.fs.Chroot(d.fs.Join(modulePath, name)) diff --git a/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go new file mode 100644 index 0000000..af96196 --- /dev/null +++ b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_nix.go @@ -0,0 +1,11 @@ +// +build !windows + +package dotgit + +import "gopkg.in/src-d/go-billy.v4" + +func (d *DotGit) rewritePackedRefsWhileLocked( + tmp billy.File, pr billy.File) error { + // On non-Windows platforms, we can have atomic rename. + return d.fs.Rename(tmp.Name(), pr.Name()) +} diff --git a/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go new file mode 100644 index 0000000..bcdb93e --- /dev/null +++ b/storage/filesystem/internal/dotgit/dotgit_rewrite_packed_refs_windows.go @@ -0,0 +1,39 @@ +// +build windows + +package dotgit + +import ( + "io" + + "gopkg.in/src-d/go-billy.v4" +) + +func (d *DotGit) rewritePackedRefsWhileLocked( + tmp billy.File, pr billy.File) error { + // If we aren't using the bare Windows filesystem as the storage + // layer, we might be able to get away with a rename over a locked + // file. + err := d.fs.Rename(tmp.Name(), pr.Name()) + if err == nil { + return nil + } + + // Otherwise, Windows doesn't let us rename over a locked file, so + // we have to do a straight copy. Unfortunately this could result + // in a partially-written file if the process fails before the + // copy completes. + _, err = pr.Seek(0, io.SeekStart) + if err != nil { + return err + } + err = pr.Truncate(0) + if err != nil { + return err + } + _, err = tmp.Seek(0, io.SeekStart) + if err != nil { + return err + } + _, err = io.Copy(pr, tmp) + return err +} diff --git a/storage/filesystem/internal/dotgit/dotgit_test.go b/storage/filesystem/internal/dotgit/dotgit_test.go index 446a204..0ed9ec5 100644 --- a/storage/filesystem/internal/dotgit/dotgit_test.go +++ b/storage/filesystem/internal/dotgit/dotgit_test.go @@ -543,3 +543,75 @@ func (s *SuiteDotGit) TestSubmodules(c *C) { c.Assert(err, IsNil) c.Assert(strings.HasSuffix(m.Root(), m.Join(".git", "modules", "basic")), Equals, true) } + +func (s *SuiteDotGit) TestPackRefs(c *C) { + tmp, err := ioutil.TempDir("", "dot-git") + c.Assert(err, IsNil) + defer os.RemoveAll(tmp) + + fs := osfs.New(tmp) + dir := New(fs) + + err = dir.SetRef(plumbing.NewReferenceFromStrings( + "refs/heads/foo", + "e8d3ffab552895c19b9fcf7aa264d277cde33881", + ), nil) + c.Assert(err, IsNil) + err = dir.SetRef(plumbing.NewReferenceFromStrings( + "refs/heads/bar", + "a8d3ffab552895c19b9fcf7aa264d277cde33881", + ), nil) + c.Assert(err, IsNil) + + refs, err := dir.Refs() + c.Assert(err, IsNil) + c.Assert(refs, HasLen, 2) + looseCount, err := dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 2) + + err = dir.PackRefs() + c.Assert(err, IsNil) + + // Make sure the refs are still there, but no longer loose. + refs, err = dir.Refs() + c.Assert(err, IsNil) + c.Assert(refs, HasLen, 2) + looseCount, err = dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 0) + + ref, err := dir.Ref("refs/heads/foo") + c.Assert(err, IsNil) + c.Assert(ref, NotNil) + c.Assert(ref.Hash().String(), Equals, "e8d3ffab552895c19b9fcf7aa264d277cde33881") + ref, err = dir.Ref("refs/heads/bar") + c.Assert(err, IsNil) + c.Assert(ref, NotNil) + c.Assert(ref.Hash().String(), Equals, "a8d3ffab552895c19b9fcf7aa264d277cde33881") + + // Now update one of them, re-pack, and check again. + err = dir.SetRef(plumbing.NewReferenceFromStrings( + "refs/heads/foo", + "b8d3ffab552895c19b9fcf7aa264d277cde33881", + ), nil) + c.Assert(err, IsNil) + looseCount, err = dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 1) + err = dir.PackRefs() + c.Assert(err, IsNil) + + // Make sure the refs are still there, but no longer loose. + refs, err = dir.Refs() + c.Assert(err, IsNil) + c.Assert(refs, HasLen, 2) + looseCount, err = dir.CountLooseRefs() + c.Assert(err, IsNil) + c.Assert(looseCount, Equals, 0) + + ref, err = dir.Ref("refs/heads/foo") + c.Assert(err, IsNil) + c.Assert(ref, NotNil) + c.Assert(ref.Hash().String(), Equals, "b8d3ffab552895c19b9fcf7aa264d277cde33881") +} diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 9690c0e..6ca67cc 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -3,6 +3,7 @@ package filesystem import ( "io" "os" + "time" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" @@ -125,6 +126,32 @@ func (s *ObjectStorage) SetEncodedObject(o plumbing.EncodedObject) (plumbing.Has return o.Hash(), err } +// HasEncodedObject returns nil if the object exists, without actually +// reading the object data from storage. +func (s *ObjectStorage) HasEncodedObject(h plumbing.Hash) (err error) { + // Check unpacked objects + f, err := s.dir.Object(h) + if err != nil { + if !os.IsNotExist(err) { + return err + } + // Fall through to check packed objects. + } else { + defer ioutil.CheckClose(f, &err) + return nil + } + + // Check packed objects. + if err := s.requireIndex(); err != nil { + return err + } + _, _, offset := s.findObjectInPackfile(h) + if offset == -1 { + return plumbing.ErrObjectNotFound + } + return nil +} + // EncodedObject returns the object with the given hash, by searching for it in // the packfile and the git object directories. func (s *ObjectStorage) EncodedObject(t plumbing.ObjectType, h plumbing.Hash) (plumbing.EncodedObject, error) { @@ -479,3 +506,31 @@ func hashListAsMap(l []plumbing.Hash) map[plumbing.Hash]bool { return m } + +func (s *ObjectStorage) ForEachObjectHash(fun func(plumbing.Hash) error) error { + err := s.dir.ForEachObjectHash(fun) + if err == storer.ErrStop { + return nil + } + return err +} + +func (s *ObjectStorage) LooseObjectTime(hash plumbing.Hash) (time.Time, error) { + fi, err := s.dir.ObjectStat(hash) + if err != nil { + return time.Time{}, err + } + return fi.ModTime(), nil +} + +func (s *ObjectStorage) DeleteLooseObject(hash plumbing.Hash) error { + return s.dir.ObjectDelete(hash) +} + +func (s *ObjectStorage) ObjectPacks() ([]plumbing.Hash, error) { + return s.dir.ObjectPacks() +} + +func (s *ObjectStorage) DeleteOldObjectPackAndIndex(h plumbing.Hash, t time.Time) error { + return s.dir.DeleteOldObjectPackAndIndex(h, t) +} diff --git a/storage/filesystem/reference.go b/storage/filesystem/reference.go index 54cdf56..7313f05 100644 --- a/storage/filesystem/reference.go +++ b/storage/filesystem/reference.go @@ -34,3 +34,11 @@ func (r *ReferenceStorage) IterReferences() (storer.ReferenceIter, error) { func (r *ReferenceStorage) RemoveReference(n plumbing.ReferenceName) error { return r.dir.RemoveRef(n) } + +func (r *ReferenceStorage) CountLooseRefs() (int, error) { + return r.dir.CountLooseRefs() +} + +func (r *ReferenceStorage) PackRefs() error { + return r.dir.PackRefs() +} |