diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2017-07-27 14:55:47 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-27 14:55:47 +0200 |
commit | 8c57f96d4678087e06035095e67ad1239d6509ce (patch) | |
tree | a1468ab8a942435435c644abdc9bac3338990bc2 | |
parent | 854ffa16f650706200a6ebb5505bb448b5c64035 (diff) | |
parent | ae1c4f3df729c3a7fed4cd5a1f530c95d640497a (diff) | |
download | go-git-8c57f96d4678087e06035095e67ad1239d6509ce.tar.gz |
Merge pull request #514 from smola/use-cache-delta
cache: reuse object cache for delta resolution, use LRU policy
-rw-r--r-- | plumbing/cache/common.go | 10 | ||||
-rw-r--r-- | plumbing/cache/object.go | 68 | ||||
-rw-r--r-- | plumbing/cache/object_lru.go | 84 | ||||
-rw-r--r-- | plumbing/cache/object_test.go | 58 | ||||
-rw-r--r-- | plumbing/format/packfile/decoder.go | 37 | ||||
-rw-r--r-- | storage/filesystem/object.go | 10 |
6 files changed, 154 insertions, 113 deletions
diff --git a/plumbing/cache/common.go b/plumbing/cache/common.go index 7b90c55..9efc26c 100644 --- a/plumbing/cache/common.go +++ b/plumbing/cache/common.go @@ -11,8 +11,14 @@ const ( type FileSize int64 +// Object is an interface to a object cache. type Object interface { - Add(o plumbing.EncodedObject) - Get(k plumbing.Hash) plumbing.EncodedObject + // Put puts the given object into the cache. Whether this object will + // actually be put into the cache or not is implementation specific. + Put(o plumbing.EncodedObject) + // Get gets an object from the cache given its hash. The second return value + // is true if the object was returned, and false otherwise. + Get(k plumbing.Hash) (plumbing.EncodedObject, bool) + // Clear clears every object from the cache. Clear() } diff --git a/plumbing/cache/object.go b/plumbing/cache/object.go deleted file mode 100644 index 44238ce..0000000 --- a/plumbing/cache/object.go +++ /dev/null @@ -1,68 +0,0 @@ -package cache - -import "gopkg.in/src-d/go-git.v4/plumbing" - -const ( - initialQueueSize = 20 - MaxSize = 10 * MiByte -) - -type ObjectFIFO struct { - objects map[plumbing.Hash]plumbing.EncodedObject - order *queue - - maxSize FileSize - actualSize FileSize -} - -// NewObjectFIFO returns an Object cache that keeps the newest objects that fit -// into the specific memory size -func NewObjectFIFO(size FileSize) *ObjectFIFO { - return &ObjectFIFO{ - objects: make(map[plumbing.Hash]plumbing.EncodedObject), - order: newQueue(initialQueueSize), - maxSize: size, - } -} - -// Add adds a new object to the cache. If the object size is greater than the -// cache size, the object is not added. -func (c *ObjectFIFO) Add(o plumbing.EncodedObject) { - // if the size of the object is bigger or equal than the cache size, - // skip it - if FileSize(o.Size()) >= c.maxSize { - return - } - - // if the object is into the cache, do not add it again - if _, ok := c.objects[o.Hash()]; ok { - return - } - - // delete the oldest object if cache is full - if c.actualSize >= c.maxSize { - h := c.order.Pop() - o := c.objects[h] - if o != nil { - c.actualSize -= FileSize(o.Size()) - delete(c.objects, h) - } - } - - c.objects[o.Hash()] = o - c.order.Push(o.Hash()) - c.actualSize += FileSize(o.Size()) -} - -// Get returns an object by his hash. If the object is not found in the cache, it -// returns nil -func (c *ObjectFIFO) Get(k plumbing.Hash) plumbing.EncodedObject { - return c.objects[k] -} - -// Clear the content of this object cache -func (c *ObjectFIFO) Clear() { - c.objects = make(map[plumbing.Hash]plumbing.EncodedObject) - c.order = newQueue(initialQueueSize) - c.actualSize = 0 -} diff --git a/plumbing/cache/object_lru.go b/plumbing/cache/object_lru.go new file mode 100644 index 0000000..e4c3160 --- /dev/null +++ b/plumbing/cache/object_lru.go @@ -0,0 +1,84 @@ +package cache + +import ( + "container/list" + + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// ObjectLRU implements an object cache with an LRU eviction policy and a +// maximum size (measured in object size). +type ObjectLRU struct { + MaxSize FileSize + + actualSize FileSize + ll *list.List + cache map[interface{}]*list.Element +} + +// NewObjectLRU creates a new ObjectLRU with the given maximum size. The maximum +// size will never be exceeded. +func NewObjectLRU(maxSize FileSize) *ObjectLRU { + return &ObjectLRU{MaxSize: maxSize} +} + +// Put puts an object into the cache. If the object is already in the cache, it +// will be marked as used. Otherwise, it will be inserted. A single object might +// be evicted to make room for the new object. +func (c *ObjectLRU) Put(obj plumbing.EncodedObject) { + if c.cache == nil { + c.actualSize = 0 + c.cache = make(map[interface{}]*list.Element, 1000) + c.ll = list.New() + } + + key := obj.Hash() + if ee, ok := c.cache[key]; ok { + c.ll.MoveToFront(ee) + ee.Value = obj + return + } + + objSize := FileSize(obj.Size()) + + if objSize >= c.MaxSize { + return + } + + if c.actualSize+objSize > c.MaxSize { + last := c.ll.Back() + lastObj := last.Value.(plumbing.EncodedObject) + lastSize := FileSize(lastObj.Size()) + + c.ll.Remove(last) + delete(c.cache, lastObj.Hash()) + c.actualSize -= lastSize + + if c.actualSize+objSize > c.MaxSize { + return + } + } + + ee := c.ll.PushFront(obj) + c.cache[key] = ee + c.actualSize += objSize +} + +// Get returns an object by its hash. It marks the object as used. If the object +// is not in the cache, (nil, false) will be returned. +func (c *ObjectLRU) Get(k plumbing.Hash) (plumbing.EncodedObject, bool) { + ee, ok := c.cache[k] + if !ok { + return nil, false + } + + c.ll.MoveToFront(ee) + return ee.Value.(plumbing.EncodedObject), true +} + +// Clear the content of this object cache. +func (c *ObjectLRU) Clear() { + c.ll = nil + c.cache = nil + c.actualSize = 0 +} diff --git a/plumbing/cache/object_test.go b/plumbing/cache/object_test.go index 80cd17b..9359455 100644 --- a/plumbing/cache/object_test.go +++ b/plumbing/cache/object_test.go @@ -12,7 +12,7 @@ import ( func Test(t *testing.T) { TestingT(t) } type ObjectSuite struct { - c *ObjectFIFO + c Object aObject plumbing.EncodedObject bObject plumbing.EncodedObject cObject plumbing.EncodedObject @@ -27,44 +27,44 @@ func (s *ObjectSuite) SetUpTest(c *C) { s.cObject = newObject("cccccccccccccccccccccccccccccccccccccccc", 1*Byte) s.dObject = newObject("dddddddddddddddddddddddddddddddddddddddd", 1*Byte) - s.c = NewObjectFIFO(2 * Byte) + s.c = NewObjectLRU(2 * Byte) } -func (s *ObjectSuite) TestAdd_SameObject(c *C) { - s.c.Add(s.aObject) - c.Assert(s.c.actualSize, Equals, 1*Byte) - s.c.Add(s.aObject) - c.Assert(s.c.actualSize, Equals, 1*Byte) +func (s *ObjectSuite) TestPutSameObject(c *C) { + s.c.Put(s.aObject) + s.c.Put(s.aObject) + _, ok := s.c.Get(s.aObject.Hash()) + c.Assert(ok, Equals, true) } -func (s *ObjectSuite) TestAdd_BigObject(c *C) { - s.c.Add(s.bObject) - c.Assert(s.c.actualSize, Equals, 0*Byte) - c.Assert(s.c.actualSize, Equals, 0*KiByte) - c.Assert(s.c.actualSize, Equals, 0*MiByte) - c.Assert(s.c.actualSize, Equals, 0*GiByte) - c.Assert(len(s.c.objects), Equals, 0) +func (s *ObjectSuite) TestPutBigObject(c *C) { + s.c.Put(s.bObject) + _, ok := s.c.Get(s.aObject.Hash()) + c.Assert(ok, Equals, false) } -func (s *ObjectSuite) TestAdd_CacheOverflow(c *C) { - s.c.Add(s.aObject) - c.Assert(s.c.actualSize, Equals, 1*Byte) - s.c.Add(s.cObject) - c.Assert(len(s.c.objects), Equals, 2) - s.c.Add(s.dObject) - c.Assert(len(s.c.objects), Equals, 2) - - c.Assert(s.c.Get(s.aObject.Hash()), IsNil) - c.Assert(s.c.Get(s.cObject.Hash()), NotNil) - c.Assert(s.c.Get(s.dObject.Hash()), NotNil) +func (s *ObjectSuite) TestPutCacheOverflow(c *C) { + s.c.Put(s.aObject) + s.c.Put(s.cObject) + s.c.Put(s.dObject) + + obj, ok := s.c.Get(s.aObject.Hash()) + c.Assert(ok, Equals, false) + c.Assert(obj, IsNil) + obj, ok = s.c.Get(s.cObject.Hash()) + c.Assert(ok, Equals, true) + c.Assert(obj, NotNil) + obj, ok = s.c.Get(s.dObject.Hash()) + c.Assert(ok, Equals, true) + c.Assert(obj, NotNil) } func (s *ObjectSuite) TestClear(c *C) { - s.c.Add(s.aObject) - c.Assert(s.c.actualSize, Equals, 1*Byte) + s.c.Put(s.aObject) s.c.Clear() - c.Assert(s.c.actualSize, Equals, 0*Byte) - c.Assert(s.c.Get(s.aObject.Hash()), IsNil) + obj, ok := s.c.Get(s.aObject.Hash()) + c.Assert(ok, Equals, false) + c.Assert(obj, IsNil) } type dummyObject struct { diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 39680a3..e49de51 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -52,6 +52,8 @@ var ( // is destroyed. The Offsets and CRCs are calculated whether an // ObjectStorer was provided or not. type Decoder struct { + DeltaBaseCache cache.Object + s *Scanner o storer.EncodedObjectStorer tx storer.Transaction @@ -65,8 +67,6 @@ type Decoder struct { offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType - - cache cache.Object } // NewDecoder returns a new Decoder that decodes a Packfile using the given @@ -107,8 +107,6 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, idx: NewIndex(0), offsetToType: make(map[int64]plumbing.ObjectType, 0), decoderType: t, - - cache: cache.NewObjectFIFO(cache.MaxSize), }, nil } @@ -355,9 +353,8 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum return 0, err } - base := d.cache.Get(ref) - - if base == nil { + base, ok := d.cacheGet(ref) + if !ok { base, err = d.recallByHash(ref) if err != nil { return 0, err @@ -366,7 +363,7 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum obj.SetType(base.Type()) err = ApplyDelta(obj, base, buf.Bytes()) - d.cache.Add(obj) + d.cachePut(obj) return crc, err } @@ -381,10 +378,10 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i e, ok := d.idx.LookupOffset(uint64(offset)) var base plumbing.EncodedObject if ok { - base = d.cache.Get(e.Hash) + base, ok = d.cacheGet(e.Hash) } - if base == nil { + if !ok { base, err = d.recallByOffset(offset) if err != nil { return 0, err @@ -393,11 +390,27 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i obj.SetType(base.Type()) err = ApplyDelta(obj, base, buf.Bytes()) - d.cache.Add(obj) + d.cachePut(obj) return crc, err } +func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { + if d.DeltaBaseCache == nil { + return nil, false + } + + return d.DeltaBaseCache.Get(h) +} + +func (d *Decoder) cachePut(obj plumbing.EncodedObject) { + if d.DeltaBaseCache == nil { + return + } + + d.DeltaBaseCache.Put(obj) +} + func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { if d.s.IsSeekable { return d.DecodeObjectAt(o) @@ -455,7 +468,5 @@ func (d *Decoder) Index() *Index { // Close closes the Scanner. usually this mean that the whole reader is read and // discarded func (d *Decoder) Close() error { - d.cache.Clear() - return d.s.Close() } diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index e235b33..6dd910b 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -5,6 +5,7 @@ import ( "os" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/format/objfile" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" @@ -16,14 +17,20 @@ import ( "gopkg.in/src-d/go-billy.v3" ) +const DefaultMaxDeltaBaseCacheSize = 92 * cache.MiByte + type ObjectStorage struct { + // DeltaBaseCache is an object cache uses to cache delta's bases when + DeltaBaseCache cache.Object + dir *dotgit.DotGit index map[plumbing.Hash]*packfile.Index } func newObjectStorage(dir *dotgit.DotGit) (ObjectStorage, error) { s := ObjectStorage{ - dir: dir, + DeltaBaseCache: cache.NewObjectLRU(DefaultMaxDeltaBaseCacheSize), + dir: dir, } return s, nil @@ -198,6 +205,7 @@ func (s *ObjectStorage) getFromPackfile(h plumbing.Hash) (plumbing.EncodedObject return nil, err } + d.DeltaBaseCache = s.DeltaBaseCache d.SetIndex(s.index[pack]) obj, err := d.DecodeObjectAt(offset) return obj, err |