From 962eeb399ce322daa4dd6522cdc91d277fbdba2c Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Wed, 20 Dec 2017 15:11:38 +0100 Subject: Enforce the use of cache in packfile decoder Decoder object can make use of an object cache to speed up processing. Previously the only way to specify it was changing manually the struct generated by NewDecodeForFile. This lead to some instances to be created without it and penalized performance. Now the cache should be explicitly passed to the constructor function. NewDecoder now creates objects with a cache using the default size. A new helper function was added to create cache objects with the default size as this becomes a common task now: cache.NewObjectLRUDefault() Signed-off-by: Javi Fontan --- plumbing/cache/common.go | 2 ++ plumbing/cache/object_lru.go | 5 +++++ plumbing/format/packfile/decoder.go | 16 +++++++++++----- plumbing/format/packfile/decoder_test.go | 23 ++++++++++++++++------- storage/filesystem/object.go | 7 ++----- 5 files changed, 36 insertions(+), 17 deletions(-) diff --git a/plumbing/cache/common.go b/plumbing/cache/common.go index 9efc26c..e77baf0 100644 --- a/plumbing/cache/common.go +++ b/plumbing/cache/common.go @@ -11,6 +11,8 @@ const ( type FileSize int64 +const DefaultMaxSize FileSize = 96 * MiByte + // Object is an interface to a object cache. type Object interface { // Put puts the given object into the cache. Whether this object will diff --git a/plumbing/cache/object_lru.go b/plumbing/cache/object_lru.go index e8414ab..d99a5c9 100644 --- a/plumbing/cache/object_lru.go +++ b/plumbing/cache/object_lru.go @@ -24,6 +24,11 @@ func NewObjectLRU(maxSize FileSize) *ObjectLRU { return &ObjectLRU{MaxSize: maxSize} } +// NewObjectLRUDefault creates a new ObjectLRU with the default cache size. +func NewObjectLRUDefault() *ObjectLRU { + return &ObjectLRU{MaxSize: DefaultMaxSize} +} + // Put puts an object into the cache. If the object is already in the cache, it // will be marked as used. Otherwise, it will be inserted. A single object might // be evicted to make room for the new object. diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index ad72ea0..24493ec 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -80,15 +80,20 @@ type Decoder struct { // If the ObjectStorer implements storer.Transactioner, a transaction is created // during the Decode execution. If anything fails, Rollback is called func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) { - return NewDecoderForType(s, o, plumbing.AnyObject) + return NewDecoderForType(s, o, plumbing.AnyObject, + cache.NewObjectLRUDefault()) } // NewDecoderForType returns a new Decoder but in this case for a specific object type. // When an object is read using this Decoder instance and it is not of the same type of // the specified one, nil will be returned. This is intended to avoid the content -// deserialization of all the objects +// deserialization of all the objects. +// +// cacheObject is an ObjectLRU that is used to speed up the process. If cache +// is not needed you can pass nil. To create a cache object with the default +// size you an use the helper cache.ObjectLRUDefault(). func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, - t plumbing.ObjectType) (*Decoder, error) { + t plumbing.ObjectType, cacheObject cache.Object) (*Decoder, error) { if t == plumbing.OFSDeltaObject || t == plumbing.REFDeltaObject || @@ -101,8 +106,9 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, } return &Decoder{ - s: s, - o: o, + s: s, + o: o, + DeltaBaseCache: cacheObject, idx: NewIndex(0), offsetToType: make(map[int64]plumbing.ObjectType), diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go index 1a1a74a..b5bc7b7 100644 --- a/plumbing/format/packfile/decoder_test.go +++ b/plumbing/format/packfile/decoder_test.go @@ -4,6 +4,7 @@ import ( "io" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" @@ -51,7 +52,8 @@ func (s *ReaderSuite) TestDecodeByTypeRefDelta(c *C) { storage := memory.NewStorage() scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, plumbing.CommitObject) + d, err := packfile.NewDecoderForType(scanner, storage, plumbing.CommitObject, + cache.NewObjectLRUDefault()) c.Assert(err, IsNil) // Index required to decode by ref-delta. @@ -77,7 +79,8 @@ func (s *ReaderSuite) TestDecodeByTypeRefDeltaError(c *C) { fixtures.Basic().ByTag("ref-delta").Test(c, func(f *fixtures.Fixture) { storage := memory.NewStorage() scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, plumbing.CommitObject) + d, err := packfile.NewDecoderForType(scanner, storage, + plumbing.CommitObject, cache.NewObjectLRUDefault()) c.Assert(err, IsNil) defer d.Close() @@ -111,7 +114,8 @@ func (s *ReaderSuite) TestDecodeByType(c *C) { for _, t := range ts { storage := memory.NewStorage() scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, t) + d, err := packfile.NewDecoderForType(scanner, storage, t, + cache.NewObjectLRUDefault()) c.Assert(err, IsNil) // when the packfile is ref-delta based, the offsets are required @@ -141,13 +145,17 @@ func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) { storage := memory.NewStorage() scanner := packfile.NewScanner(f.Packfile()) - _, err := packfile.NewDecoderForType(scanner, storage, plumbing.OFSDeltaObject) + _, err := packfile.NewDecoderForType(scanner, storage, + plumbing.OFSDeltaObject, cache.NewObjectLRUDefault()) c.Assert(err, Equals, plumbing.ErrInvalidType) - _, err = packfile.NewDecoderForType(scanner, storage, plumbing.REFDeltaObject) + _, err = packfile.NewDecoderForType(scanner, storage, + plumbing.REFDeltaObject, cache.NewObjectLRUDefault()) + c.Assert(err, Equals, plumbing.ErrInvalidType) - _, err = packfile.NewDecoderForType(scanner, storage, plumbing.InvalidObject) + _, err = packfile.NewDecoderForType(scanner, storage, plumbing.InvalidObject, + cache.NewObjectLRUDefault()) c.Assert(err, Equals, plumbing.ErrInvalidType) } @@ -313,7 +321,8 @@ func (s *ReaderSuite) TestDecodeObjectAt(c *C) { func (s *ReaderSuite) TestDecodeObjectAtForType(c *C) { f := fixtures.Basic().One() scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, nil, plumbing.TreeObject) + d, err := packfile.NewDecoderForType(scanner, nil, plumbing.TreeObject, + cache.NewObjectLRUDefault()) c.Assert(err, IsNil) // when the packfile is ref-delta based, the offsets are required diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index fd52ed5..9c5ca21 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -18,8 +18,6 @@ import ( "gopkg.in/src-d/go-billy.v4" ) -const DefaultMaxDeltaBaseCacheSize = 92 * cache.MiByte - type ObjectStorage struct { // DeltaBaseCache is an object cache uses to cache delta's bases when DeltaBaseCache cache.Object @@ -30,7 +28,7 @@ type ObjectStorage struct { func newObjectStorage(dir *dotgit.DotGit) (ObjectStorage, error) { s := ObjectStorage{ - DeltaBaseCache: cache.NewObjectLRU(DefaultMaxDeltaBaseCacheSize), + DeltaBaseCache: cache.NewObjectLRUDefault(), dir: dir, } @@ -433,13 +431,12 @@ func newPackfileIter(f billy.File, t plumbing.ObjectType, seen map[plumbing.Hash return nil, err } - d, err := packfile.NewDecoderForType(s, memory.NewStorage(), t) + d, err := packfile.NewDecoderForType(s, memory.NewStorage(), t, cache) if err != nil { return nil, err } d.SetIndex(index) - d.DeltaBaseCache = cache return &packfileIter{ f: f, -- cgit From 2787bd71e6564e21cf225971376ba9b62b6d451b Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Wed, 20 Dec 2017 15:58:25 +0100 Subject: Fix typo and documentation of NewDecoderForType Signed-off-by: Javi Fontan --- plumbing/format/packfile/decoder.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 24493ec..dc39bf1 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -89,9 +89,9 @@ func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) { // the specified one, nil will be returned. This is intended to avoid the content // deserialization of all the objects. // -// cacheObject is an ObjectLRU that is used to speed up the process. If cache -// is not needed you can pass nil. To create a cache object with the default -// size you an use the helper cache.ObjectLRUDefault(). +// cacheObject is a cache.Object implementation that is used to speed up the +// process. If cache is not needed you can pass nil. To create an LRU cache +// object with the default size you can use the helper cache.ObjectLRUDefault(). func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, t plumbing.ObjectType, cacheObject cache.Object) (*Decoder, error) { -- cgit From 8f82387beb7ae4ac8936d893f58c6a29992a7050 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Wed, 20 Dec 2017 16:39:45 +0100 Subject: Make DeltaBaseCache private Signed-off-by: Javi Fontan --- plumbing/format/packfile/decoder.go | 19 +++++++++++++------ storage/filesystem/object.go | 12 ++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index dc39bf1..cb78701 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -52,7 +52,7 @@ var ( // is destroyed. The Offsets and CRCs are calculated whether an // ObjectStorer was provided or not. type Decoder struct { - DeltaBaseCache cache.Object + deltaBaseCache cache.Object s *Scanner o storer.EncodedObjectStorer @@ -84,6 +84,13 @@ func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) { cache.NewObjectLRUDefault()) } +// NewDecoderWithCache is a version of NewDecoder where cache can be specified. +func NewDecoderWithCache(s *Scanner, o storer.EncodedObjectStorer, + cacheObject cache.Object) (*Decoder, error) { + + return NewDecoderForType(s, o, plumbing.AnyObject, cacheObject) +} + // NewDecoderForType returns a new Decoder but in this case for a specific object type. // When an object is read using this Decoder instance and it is not of the same type of // the specified one, nil will be returned. This is intended to avoid the content @@ -108,7 +115,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, return &Decoder{ s: s, o: o, - DeltaBaseCache: cacheObject, + deltaBaseCache: cacheObject, idx: NewIndex(0), offsetToType: make(map[int64]plumbing.ObjectType), @@ -410,19 +417,19 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i } func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { - if d.DeltaBaseCache == nil { + if d.deltaBaseCache == nil { return nil, false } - return d.DeltaBaseCache.Get(h) + return d.deltaBaseCache.Get(h) } func (d *Decoder) cachePut(obj plumbing.EncodedObject) { - if d.DeltaBaseCache == nil { + if d.deltaBaseCache == nil { return } - d.DeltaBaseCache.Put(obj) + d.deltaBaseCache.Put(obj) } func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index 9c5ca21..3ec7304 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -19,8 +19,8 @@ import ( ) type ObjectStorage struct { - // DeltaBaseCache is an object cache uses to cache delta's bases when - DeltaBaseCache cache.Object + // deltaBaseCache is an object cache uses to cache delta's bases when + deltaBaseCache cache.Object dir *dotgit.DotGit index map[plumbing.Hash]*packfile.Index @@ -28,7 +28,7 @@ type ObjectStorage struct { func newObjectStorage(dir *dotgit.DotGit) (ObjectStorage, error) { s := ObjectStorage{ - DeltaBaseCache: cache.NewObjectLRUDefault(), + deltaBaseCache: cache.NewObjectLRUDefault(), dir: dir, } @@ -285,13 +285,13 @@ func (s *ObjectStorage) decodeObjectAt( p := packfile.NewScanner(f) - d, err := packfile.NewDecoder(p, memory.NewStorage()) + d, err := packfile.NewDecoderWithCache(p, memory.NewStorage(), + s.deltaBaseCache) if err != nil { return nil, err } d.SetIndex(idx) - d.DeltaBaseCache = s.DeltaBaseCache obj, err := d.DecodeObjectAt(offset) return obj, err } @@ -398,7 +398,7 @@ func (s *ObjectStorage) buildPackfileIters(t plumbing.ObjectType, seen map[plumb return nil, err } - iter, err := newPackfileIter(pack, t, seen, s.index[h], s.DeltaBaseCache) + iter, err := newPackfileIter(pack, t, seen, s.index[h], s.deltaBaseCache) if err != nil { return nil, err } -- cgit From e39559fba6ea936c5f4659c03c22f9a5a256a4d7 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Wed, 20 Dec 2017 19:01:21 +0100 Subject: Exercise NewObjectLRUDefault in testing Signed-off-by: Javi Fontan --- plumbing/cache/object_test.go | 101 +++++++++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 41 deletions(-) diff --git a/plumbing/cache/object_test.go b/plumbing/cache/object_test.go index b38272f..ec01d60 100644 --- a/plumbing/cache/object_test.go +++ b/plumbing/cache/object_test.go @@ -14,7 +14,7 @@ import ( func Test(t *testing.T) { TestingT(t) } type ObjectSuite struct { - c Object + c map[string]Object aObject plumbing.EncodedObject bObject plumbing.EncodedObject cObject plumbing.EncodedObject @@ -29,70 +29,89 @@ func (s *ObjectSuite) SetUpTest(c *C) { s.cObject = newObject("cccccccccccccccccccccccccccccccccccccccc", 1*Byte) s.dObject = newObject("dddddddddddddddddddddddddddddddddddddddd", 1*Byte) - s.c = NewObjectLRU(2 * Byte) + s.c = make(map[string]Object) + s.c["two_bytes"] = NewObjectLRU(2 * Byte) + s.c["default_lru"] = NewObjectLRUDefault() } func (s *ObjectSuite) TestPutSameObject(c *C) { - s.c.Put(s.aObject) - s.c.Put(s.aObject) - _, ok := s.c.Get(s.aObject.Hash()) - c.Assert(ok, Equals, true) + for _, o := range s.c { + o.Put(s.aObject) + o.Put(s.aObject) + _, ok := o.Get(s.aObject.Hash()) + c.Assert(ok, Equals, true) + } } func (s *ObjectSuite) TestPutBigObject(c *C) { - s.c.Put(s.bObject) - _, ok := s.c.Get(s.aObject.Hash()) - c.Assert(ok, Equals, false) + for _, o := range s.c { + o.Put(s.bObject) + _, ok := o.Get(s.aObject.Hash()) + c.Assert(ok, Equals, false) + } } func (s *ObjectSuite) TestPutCacheOverflow(c *C) { - s.c.Put(s.aObject) - s.c.Put(s.cObject) - s.c.Put(s.dObject) + // this test only works with an specific size + o := s.c["two_bytes"] + + o.Put(s.aObject) + o.Put(s.cObject) + o.Put(s.dObject) - obj, ok := s.c.Get(s.aObject.Hash()) + obj, ok := o.Get(s.aObject.Hash()) c.Assert(ok, Equals, false) c.Assert(obj, IsNil) - obj, ok = s.c.Get(s.cObject.Hash()) + obj, ok = o.Get(s.cObject.Hash()) c.Assert(ok, Equals, true) c.Assert(obj, NotNil) - obj, ok = s.c.Get(s.dObject.Hash()) + obj, ok = o.Get(s.dObject.Hash()) c.Assert(ok, Equals, true) c.Assert(obj, NotNil) } func (s *ObjectSuite) TestClear(c *C) { - s.c.Put(s.aObject) - s.c.Clear() - obj, ok := s.c.Get(s.aObject.Hash()) - c.Assert(ok, Equals, false) - c.Assert(obj, IsNil) + for _, o := range s.c { + o.Put(s.aObject) + o.Clear() + obj, ok := o.Get(s.aObject.Hash()) + c.Assert(ok, Equals, false) + c.Assert(obj, IsNil) + } } func (s *ObjectSuite) TestConcurrentAccess(c *C) { - var wg sync.WaitGroup - - for i := 0; i < 1000; i++ { - wg.Add(3) - go func(i int) { - s.c.Put(newObject(fmt.Sprint(i), FileSize(i))) - wg.Done() - }(i) - - go func(i int) { - if i%30 == 0 { - s.c.Clear() - } - wg.Done() - }(i) - - go func(i int) { - s.c.Get(plumbing.NewHash(fmt.Sprint(i))) - wg.Done() - }(i) + for _, o := range s.c { + var wg sync.WaitGroup + + for i := 0; i < 1000; i++ { + wg.Add(3) + go func(i int) { + o.Put(newObject(fmt.Sprint(i), FileSize(i))) + wg.Done() + }(i) + + go func(i int) { + if i%30 == 0 { + o.Clear() + } + wg.Done() + }(i) + + go func(i int) { + o.Get(plumbing.NewHash(fmt.Sprint(i))) + wg.Done() + }(i) + } + + wg.Wait() } +} + +func (s *ObjectSuite) TestDefaultLRU(c *C) { + defaultLRU := s.c["default_lru"].(*ObjectLRU) - wg.Wait() + c.Assert(defaultLRU.MaxSize, Equals, DefaultMaxSize) } type dummyObject struct { -- cgit