aboutsummaryrefslogtreecommitdiffstats
path: root/storage
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2018-08-14 09:57:46 +0200
committerGitHub <noreply@github.com>2018-08-14 09:57:46 +0200
commita28c2ce44695f13ddf28748958f236afd8e0b544 (patch)
tree107dd441cd96b44b4f3994d26faf5f0bfae933fc /storage
parentc3740924da0d1929cb523c85ae9da3b456b901ea (diff)
parent8d75d239e93474e4287870e4e5143da14e2c360d (diff)
downloadgo-git-a28c2ce44695f13ddf28748958f236afd8e0b544.tar.gz
Merge pull request #906 from src-d/perf/packfile-reads
Improve packfile reading performance
Diffstat (limited to 'storage')
-rw-r--r--storage/filesystem/dotgit/dotgit.go5
-rw-r--r--storage/filesystem/dotgit/writers.go30
-rw-r--r--storage/filesystem/dotgit/writers_test.go3
-rw-r--r--storage/filesystem/object.go163
-rw-r--r--storage/filesystem/object_test.go186
5 files changed, 290 insertions, 97 deletions
diff --git a/storage/filesystem/dotgit/dotgit.go b/storage/filesystem/dotgit/dotgit.go
index dc12f23..af07eb5 100644
--- a/storage/filesystem/dotgit/dotgit.go
+++ b/storage/filesystem/dotgit/dotgit.go
@@ -784,6 +784,11 @@ func (d *DotGit) Alternates() ([]*DotGit, error) {
return alternates, nil
}
+// Fs returns the underlying filesystem of the DotGit folder.
+func (d *DotGit) Fs() billy.Filesystem {
+ return d.fs
+}
+
func isHex(s string) bool {
for _, b := range []byte(s) {
if isNum(b) {
diff --git a/storage/filesystem/dotgit/writers.go b/storage/filesystem/dotgit/writers.go
index c2b420f..93d2d8c 100644
--- a/storage/filesystem/dotgit/writers.go
+++ b/storage/filesystem/dotgit/writers.go
@@ -20,13 +20,14 @@ import (
// is renamed/moved (depends on the Filesystem implementation) to the final
// location, if the PackWriter is not used, nothing is written
type PackWriter struct {
- Notify func(plumbing.Hash, *packfile.Index)
+ Notify func(plumbing.Hash, *idxfile.Writer)
fs billy.Filesystem
fr, fw billy.File
synced *syncedReader
checksum plumbing.Hash
- index *packfile.Index
+ parser *packfile.Parser
+ writer *idxfile.Writer
result chan error
}
@@ -55,20 +56,21 @@ func newPackWrite(fs billy.Filesystem) (*PackWriter, error) {
func (w *PackWriter) buildIndex() {
s := packfile.NewScanner(w.synced)
- d, err := packfile.NewDecoder(s, nil)
+ w.writer = new(idxfile.Writer)
+ var err error
+ w.parser, err = packfile.NewParser(s, w.writer)
if err != nil {
w.result <- err
return
}
- checksum, err := d.Decode()
+ checksum, err := w.parser.Parse()
if err != nil {
w.result <- err
return
}
w.checksum = checksum
- w.index = d.Index()
w.result <- err
}
@@ -92,8 +94,8 @@ func (w *PackWriter) Write(p []byte) (int, error) {
// was written, the tempfiles are deleted without writing a packfile.
func (w *PackWriter) Close() error {
defer func() {
- if w.Notify != nil && w.index != nil && w.index.Size() > 0 {
- w.Notify(w.checksum, w.index)
+ if w.Notify != nil && w.writer != nil && w.writer.Finished() {
+ w.Notify(w.checksum, w.writer)
}
close(w.result)
@@ -115,7 +117,7 @@ func (w *PackWriter) Close() error {
return err
}
- if w.index == nil || w.index.Size() == 0 {
+ if w.writer == nil || !w.writer.Finished() {
return w.clean()
}
@@ -145,11 +147,13 @@ func (w *PackWriter) save() error {
}
func (w *PackWriter) encodeIdx(writer io.Writer) error {
- idx := w.index.ToIdxFile()
- idx.PackfileChecksum = w.checksum
- idx.Version = idxfile.VersionSupported
+ idx, err := w.writer.Index()
+ if err != nil {
+ return err
+ }
+
e := idxfile.NewEncoder(writer)
- _, err := e.Encode(idx)
+ _, err = e.Encode(idx)
return err
}
@@ -209,7 +213,6 @@ func (s *syncedReader) isBlocked() bool {
func (s *syncedReader) wake() {
if s.isBlocked() {
- // fmt.Println("wake")
atomic.StoreUint32(&s.blocked, 0)
s.news <- true
}
@@ -220,7 +223,6 @@ func (s *syncedReader) sleep() {
written := atomic.LoadUint64(&s.written)
if read >= written {
atomic.StoreUint32(&s.blocked, 1)
- // fmt.Println("sleep", read, written)
<-s.news
}
diff --git a/storage/filesystem/dotgit/writers_test.go b/storage/filesystem/dotgit/writers_test.go
index bf00762..5a5f7b4 100644
--- a/storage/filesystem/dotgit/writers_test.go
+++ b/storage/filesystem/dotgit/writers_test.go
@@ -9,6 +9,7 @@ import (
"strconv"
"gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
. "gopkg.in/check.v1"
@@ -148,7 +149,7 @@ func (s *SuiteDotGit) TestPackWriterUnusedNotify(c *C) {
w, err := newPackWrite(fs)
c.Assert(err, IsNil)
- w.Notify = func(h plumbing.Hash, idx *packfile.Index) {
+ w.Notify = func(h plumbing.Hash, idx *idxfile.Writer) {
c.Fatal("unexpected call to PackWriter.Notify")
}
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index 9ffe4dc..6958e32 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -12,7 +12,6 @@ import (
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
"gopkg.in/src-d/go-git.v4/storage/filesystem/dotgit"
- "gopkg.in/src-d/go-git.v4/storage/memory"
"gopkg.in/src-d/go-git.v4/utils/ioutil"
"gopkg.in/src-d/go-billy.v4"
@@ -23,7 +22,7 @@ type ObjectStorage struct {
deltaBaseCache cache.Object
dir *dotgit.DotGit
- index map[plumbing.Hash]*packfile.Index
+ index map[plumbing.Hash]idxfile.Index
}
// NewObjectStorage creates a new ObjectStorage with the given .git directory.
@@ -41,7 +40,7 @@ func (s *ObjectStorage) requireIndex() error {
return nil
}
- s.index = make(map[plumbing.Hash]*packfile.Index)
+ s.index = make(map[plumbing.Hash]idxfile.Index)
packs, err := s.dir.ObjectPacks()
if err != nil {
return err
@@ -63,13 +62,13 @@ func (s *ObjectStorage) loadIdxFile(h plumbing.Hash) (err error) {
}
defer ioutil.CheckClose(f, &err)
- idxf := idxfile.NewIdxfile()
+ idxf := idxfile.NewMemoryIndex()
d := idxfile.NewDecoder(f)
if err = d.Decode(idxf); err != nil {
return err
}
- s.index[h] = packfile.NewIndexFromIdxFile(idxf)
+ s.index[h] = idxf
return err
}
@@ -87,8 +86,11 @@ func (s *ObjectStorage) PackfileWriter() (io.WriteCloser, error) {
return nil, err
}
- w.Notify = func(h plumbing.Hash, idx *packfile.Index) {
- s.index[h] = idx
+ w.Notify = func(h plumbing.Hash, writer *idxfile.Writer) {
+ index, err := writer.Index()
+ if err == nil {
+ s.index[h] = index
+ }
}
return w, nil
@@ -278,30 +280,30 @@ func (s *ObjectStorage) getFromPackfile(h plumbing.Hash, canBeDelta bool) (
func (s *ObjectStorage) decodeObjectAt(
f billy.File,
- idx *packfile.Index,
- offset int64) (plumbing.EncodedObject, error) {
- if _, err := f.Seek(0, io.SeekStart); err != nil {
- return nil, err
+ idx idxfile.Index,
+ offset int64,
+) (plumbing.EncodedObject, error) {
+ hash, err := idx.FindHash(offset)
+ if err == nil {
+ obj, ok := s.deltaBaseCache.Get(hash)
+ if ok {
+ return obj, nil
+ }
}
- p := packfile.NewScanner(f)
-
- d, err := packfile.NewDecoderWithCache(p, memory.NewStorage(),
- s.deltaBaseCache)
- if err != nil {
+ if err != nil && err != plumbing.ErrObjectNotFound {
return nil, err
}
- d.SetIndex(idx)
- obj, err := d.DecodeObjectAt(offset)
- return obj, err
+ return packfile.NewPackfile(idx, s.dir.Fs(), f).GetByOffset(offset)
}
func (s *ObjectStorage) decodeDeltaObjectAt(
f billy.File,
- idx *packfile.Index,
+ idx idxfile.Index,
offset int64,
- hash plumbing.Hash) (plumbing.EncodedObject, error) {
+ hash plumbing.Hash,
+) (plumbing.EncodedObject, error) {
if _, err := f.Seek(0, io.SeekStart); err != nil {
return nil, err
}
@@ -324,12 +326,10 @@ func (s *ObjectStorage) decodeDeltaObjectAt(
case plumbing.REFDeltaObject:
base = header.Reference
case plumbing.OFSDeltaObject:
- e, ok := idx.LookupOffset(uint64(header.OffsetReference))
- if !ok {
- return nil, plumbing.ErrObjectNotFound
+ base, err = idx.FindHash(header.OffsetReference)
+ if err != nil {
+ return nil, err
}
-
- base = e.Hash
default:
return s.decodeObjectAt(f, idx, offset)
}
@@ -350,8 +350,9 @@ func (s *ObjectStorage) decodeDeltaObjectAt(
func (s *ObjectStorage) findObjectInPackfile(h plumbing.Hash) (plumbing.Hash, plumbing.Hash, int64) {
for packfile, index := range s.index {
- if e, ok := index.LookupHash(h); ok {
- return packfile, e.Hash, int64(e.Offset)
+ offset, err := index.FindOffset(h)
+ if err == nil {
+ return packfile, h, offset
}
}
@@ -398,7 +399,7 @@ func (s *ObjectStorage) buildPackfileIters(t plumbing.ObjectType, seen map[plumb
if err != nil {
return nil, err
}
- return newPackfileIter(pack, t, seen, s.index[h], s.deltaBaseCache)
+ return newPackfileIter(s.dir.Fs(), pack, t, seen, s.index[h], s.deltaBaseCache)
},
}, nil
}
@@ -451,76 +452,87 @@ func (it *lazyPackfilesIter) Close() {
}
type packfileIter struct {
- f billy.File
- d *packfile.Decoder
- t plumbing.ObjectType
-
- seen map[plumbing.Hash]struct{}
- position uint32
- total uint32
+ pack billy.File
+ iter storer.EncodedObjectIter
+ seen map[plumbing.Hash]struct{}
}
-func NewPackfileIter(f billy.File, t plumbing.ObjectType) (storer.EncodedObjectIter, error) {
- return newPackfileIter(f, t, make(map[plumbing.Hash]struct{}), nil, nil)
-}
+// NewPackfileIter returns a new EncodedObjectIter for the provided packfile
+// and object type. Packfile and index file will be closed after they're
+// used.
+func NewPackfileIter(
+ fs billy.Filesystem,
+ f billy.File,
+ idxFile billy.File,
+ t plumbing.ObjectType,
+) (storer.EncodedObjectIter, error) {
+ idx := idxfile.NewMemoryIndex()
+ if err := idxfile.NewDecoder(idxFile).Decode(idx); err != nil {
+ return nil, err
+ }
-func newPackfileIter(f billy.File, t plumbing.ObjectType, seen map[plumbing.Hash]struct{},
- index *packfile.Index, cache cache.Object) (storer.EncodedObjectIter, error) {
- s := packfile.NewScanner(f)
- _, total, err := s.Header()
- if err != nil {
+ if err := idxFile.Close(); err != nil {
return nil, err
}
- d, err := packfile.NewDecoderForType(s, memory.NewStorage(), t, cache)
+ return newPackfileIter(fs, f, t, make(map[plumbing.Hash]struct{}), idx, nil)
+}
+
+func newPackfileIter(
+ fs billy.Filesystem,
+ f billy.File,
+ t plumbing.ObjectType,
+ seen map[plumbing.Hash]struct{},
+ index idxfile.Index,
+ cache cache.Object,
+) (storer.EncodedObjectIter, error) {
+ iter, err := packfile.NewPackfile(index, fs, f).GetByType(t)
if err != nil {
return nil, err
}
- d.SetIndex(index)
-
return &packfileIter{
- f: f,
- d: d,
- t: t,
-
- total: total,
- seen: seen,
+ pack: f,
+ iter: iter,
+ seen: seen,
}, nil
}
func (iter *packfileIter) Next() (plumbing.EncodedObject, error) {
for {
- if iter.position >= iter.total {
- return nil, io.EOF
- }
-
- obj, err := iter.d.DecodeObject()
+ obj, err := iter.iter.Next()
if err != nil {
return nil, err
}
- iter.position++
- if obj == nil {
- continue
- }
-
if _, ok := iter.seen[obj.Hash()]; ok {
- return iter.Next()
+ continue
}
return obj, nil
}
}
-// ForEach is never called since is used inside of a MultiObjectIterator
func (iter *packfileIter) ForEach(cb func(plumbing.EncodedObject) error) error {
- return nil
+ for {
+ o, err := iter.Next()
+ if err != nil {
+ if err == io.EOF {
+ iter.Close()
+ return nil
+ }
+ return err
+ }
+
+ if err := cb(o); err != nil {
+ return err
+ }
+ }
}
func (iter *packfileIter) Close() {
- iter.f.Close()
- iter.d.Close()
+ iter.iter.Close()
+ _ = iter.pack.Close()
}
type objectsIter struct {
@@ -548,9 +560,20 @@ func (iter *objectsIter) Next() (plumbing.EncodedObject, error) {
return obj, err
}
-// ForEach is never called since is used inside of a MultiObjectIterator
func (iter *objectsIter) ForEach(cb func(plumbing.EncodedObject) error) error {
- return nil
+ for {
+ o, err := iter.Next()
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ }
+
+ if err := cb(o); err != nil {
+ return err
+ }
+ }
}
func (iter *objectsIter) Close() {
diff --git a/storage/filesystem/object_test.go b/storage/filesystem/object_test.go
index ecd6beb..b1408b7 100644
--- a/storage/filesystem/object_test.go
+++ b/storage/filesystem/object_test.go
@@ -1,6 +1,9 @@
package filesystem
import (
+ "io/ioutil"
+ "testing"
+
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/storage/filesystem/dotgit"
@@ -10,17 +13,16 @@ import (
type FsSuite struct {
fixtures.Suite
- Types []plumbing.ObjectType
}
-var _ = Suite(&FsSuite{
- Types: []plumbing.ObjectType{
- plumbing.CommitObject,
- plumbing.TagObject,
- plumbing.TreeObject,
- plumbing.BlobObject,
- },
-})
+var objectTypes = []plumbing.ObjectType{
+ plumbing.CommitObject,
+ plumbing.TagObject,
+ plumbing.TreeObject,
+ plumbing.BlobObject,
+}
+
+var _ = Suite(&FsSuite{})
func (s *FsSuite) TestGetFromObjectFile(c *C) {
fs := fixtures.ByTag(".git").ByTag("unpacked").One().DotGit()
@@ -84,7 +86,7 @@ func (s *FsSuite) TestIter(c *C) {
func (s *FsSuite) TestIterWithType(c *C) {
fixtures.ByTag(".git").Test(c, func(f *fixtures.Fixture) {
- for _, t := range s.Types {
+ for _, t := range objectTypes {
fs := f.DotGit()
o, err := NewObjectStorage(dotgit.New(fs))
c.Assert(err, IsNil)
@@ -108,14 +110,18 @@ func (s *FsSuite) TestPackfileIter(c *C) {
fs := f.DotGit()
dg := dotgit.New(fs)
- for _, t := range s.Types {
+ for _, t := range objectTypes {
ph, err := dg.ObjectPacks()
c.Assert(err, IsNil)
for _, h := range ph {
f, err := dg.ObjectPack(h)
c.Assert(err, IsNil)
- iter, err := NewPackfileIter(f, t)
+
+ idxf, err := dg.ObjectPackIdx(h)
+ c.Assert(err, IsNil)
+
+ iter, err := NewPackfileIter(fs, f, idxf, t)
c.Assert(err, IsNil)
err = iter.ForEach(func(o plumbing.EncodedObject) error {
c.Assert(o.Type(), Equals, t)
@@ -128,3 +134,159 @@ func (s *FsSuite) TestPackfileIter(c *C) {
})
}
+
+func BenchmarkPackfileIter(b *testing.B) {
+ if err := fixtures.Init(); err != nil {
+ b.Fatal(err)
+ }
+
+ defer func() {
+ if err := fixtures.Clean(); err != nil {
+ b.Fatal(err)
+ }
+ }()
+
+ for _, f := range fixtures.ByTag(".git") {
+ b.Run(f.URL, func(b *testing.B) {
+ fs := f.DotGit()
+ dg := dotgit.New(fs)
+
+ for i := 0; i < b.N; i++ {
+ for _, t := range objectTypes {
+ ph, err := dg.ObjectPacks()
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ for _, h := range ph {
+ f, err := dg.ObjectPack(h)
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ idxf, err := dg.ObjectPackIdx(h)
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ iter, err := NewPackfileIter(fs, f, idxf, t)
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ err = iter.ForEach(func(o plumbing.EncodedObject) error {
+ if o.Type() != t {
+ b.Errorf("expecting %s, got %s", t, o.Type())
+ }
+ return nil
+ })
+
+ if err != nil {
+ b.Fatal(err)
+ }
+ }
+ }
+ }
+ })
+ }
+}
+
+func BenchmarkPackfileIterReadContent(b *testing.B) {
+ if err := fixtures.Init(); err != nil {
+ b.Fatal(err)
+ }
+
+ defer func() {
+ if err := fixtures.Clean(); err != nil {
+ b.Fatal(err)
+ }
+ }()
+
+ for _, f := range fixtures.ByTag(".git") {
+ b.Run(f.URL, func(b *testing.B) {
+ fs := f.DotGit()
+ dg := dotgit.New(fs)
+
+ for i := 0; i < b.N; i++ {
+ for _, t := range objectTypes {
+ ph, err := dg.ObjectPacks()
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ for _, h := range ph {
+ f, err := dg.ObjectPack(h)
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ idxf, err := dg.ObjectPackIdx(h)
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ iter, err := NewPackfileIter(fs, f, idxf, t)
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ err = iter.ForEach(func(o plumbing.EncodedObject) error {
+ if o.Type() != t {
+ b.Errorf("expecting %s, got %s", t, o.Type())
+ }
+
+ r, err := o.Reader()
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ if _, err := ioutil.ReadAll(r); err != nil {
+ b.Fatal(err)
+ }
+
+ return r.Close()
+ })
+
+ if err != nil {
+ b.Fatal(err)
+ }
+ }
+ }
+ }
+ })
+ }
+}
+
+func BenchmarkGetObjectFromPackfile(b *testing.B) {
+ if err := fixtures.Init(); err != nil {
+ b.Fatal(err)
+ }
+
+ defer func() {
+ if err := fixtures.Clean(); err != nil {
+ b.Fatal(err)
+ }
+ }()
+
+ for _, f := range fixtures.Basic() {
+ b.Run(f.URL, func(b *testing.B) {
+ fs := f.DotGit()
+ o, err := NewObjectStorage(dotgit.New(fs))
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ for i := 0; i < b.N; i++ {
+ expected := plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")
+ obj, err := o.EncodedObject(plumbing.AnyObject, expected)
+ if err != nil {
+ b.Fatal(err)
+ }
+
+ if obj.Hash() != expected {
+ b.Errorf("expecting %s, got %s", expected, obj.Hash())
+ }
+ }
+ })
+ }
+}