From f07672f5c3cad2e73596ab3d7ca16660f6881df6 Mon Sep 17 00:00:00 2001 From: "Santiago M. Mola" Date: Mon, 24 Jul 2017 13:36:41 +0200 Subject: plumbing/cache: use more explicit interface * renamed Add to Put * Get returns a second bool value to indicate if there was hit or miss. --- plumbing/cache/common.go | 10 ++++++++-- plumbing/cache/object.go | 17 +++++++++++------ plumbing/cache/object_test.go | 30 +++++++++++++++++++----------- plumbing/format/packfile/decoder.go | 13 ++++++------- 4 files changed, 44 insertions(+), 26 deletions(-) (limited to 'plumbing') diff --git a/plumbing/cache/common.go b/plumbing/cache/common.go index 7b90c55..9efc26c 100644 --- a/plumbing/cache/common.go +++ b/plumbing/cache/common.go @@ -11,8 +11,14 @@ const ( type FileSize int64 +// Object is an interface to a object cache. type Object interface { - Add(o plumbing.EncodedObject) - Get(k plumbing.Hash) plumbing.EncodedObject + // Put puts the given object into the cache. Whether this object will + // actually be put into the cache or not is implementation specific. + Put(o plumbing.EncodedObject) + // Get gets an object from the cache given its hash. The second return value + // is true if the object was returned, and false otherwise. + Get(k plumbing.Hash) (plumbing.EncodedObject, bool) + // Clear clears every object from the cache. Clear() } diff --git a/plumbing/cache/object.go b/plumbing/cache/object.go index 44238ce..44e0d32 100644 --- a/plumbing/cache/object.go +++ b/plumbing/cache/object.go @@ -1,6 +1,8 @@ package cache -import "gopkg.in/src-d/go-git.v4/plumbing" +import ( + "gopkg.in/src-d/go-git.v4/plumbing" +) const ( initialQueueSize = 20 @@ -25,12 +27,14 @@ func NewObjectFIFO(size FileSize) *ObjectFIFO { } } -// Add adds a new object to the cache. If the object size is greater than the +// Put puts a new object to the cache. If the object size is greater than the // cache size, the object is not added. -func (c *ObjectFIFO) Add(o plumbing.EncodedObject) { +func (c *ObjectFIFO) Put(o plumbing.EncodedObject) { + objSize := FileSize(o.Size()) + // if the size of the object is bigger or equal than the cache size, // skip it - if FileSize(o.Size()) >= c.maxSize { + if objSize >= c.maxSize { return } @@ -56,8 +60,9 @@ func (c *ObjectFIFO) Add(o plumbing.EncodedObject) { // Get returns an object by his hash. If the object is not found in the cache, it // returns nil -func (c *ObjectFIFO) Get(k plumbing.Hash) plumbing.EncodedObject { - return c.objects[k] +func (c *ObjectFIFO) Get(k plumbing.Hash) (plumbing.EncodedObject, bool) { + obj, ok := c.objects[k] + return obj, ok } // Clear the content of this object cache diff --git a/plumbing/cache/object_test.go b/plumbing/cache/object_test.go index 80cd17b..2a55acf 100644 --- a/plumbing/cache/object_test.go +++ b/plumbing/cache/object_test.go @@ -31,14 +31,14 @@ func (s *ObjectSuite) SetUpTest(c *C) { } func (s *ObjectSuite) TestAdd_SameObject(c *C) { - s.c.Add(s.aObject) + s.c.Put(s.aObject) c.Assert(s.c.actualSize, Equals, 1*Byte) - s.c.Add(s.aObject) + s.c.Put(s.aObject) c.Assert(s.c.actualSize, Equals, 1*Byte) } func (s *ObjectSuite) TestAdd_BigObject(c *C) { - s.c.Add(s.bObject) + s.c.Put(s.bObject) c.Assert(s.c.actualSize, Equals, 0*Byte) c.Assert(s.c.actualSize, Equals, 0*KiByte) c.Assert(s.c.actualSize, Equals, 0*MiByte) @@ -47,24 +47,32 @@ func (s *ObjectSuite) TestAdd_BigObject(c *C) { } func (s *ObjectSuite) TestAdd_CacheOverflow(c *C) { - s.c.Add(s.aObject) + s.c.Put(s.aObject) c.Assert(s.c.actualSize, Equals, 1*Byte) - s.c.Add(s.cObject) + s.c.Put(s.cObject) c.Assert(len(s.c.objects), Equals, 2) - s.c.Add(s.dObject) + s.c.Put(s.dObject) c.Assert(len(s.c.objects), Equals, 2) - c.Assert(s.c.Get(s.aObject.Hash()), IsNil) - c.Assert(s.c.Get(s.cObject.Hash()), NotNil) - c.Assert(s.c.Get(s.dObject.Hash()), NotNil) + obj, ok := s.c.Get(s.aObject.Hash()) + c.Assert(ok, Equals, false) + c.Assert(obj, IsNil) + obj, ok = s.c.Get(s.cObject.Hash()) + c.Assert(ok, Equals, true) + c.Assert(obj, NotNil) + obj, ok = s.c.Get(s.dObject.Hash()) + c.Assert(ok, Equals, true) + c.Assert(obj, NotNil) } func (s *ObjectSuite) TestClear(c *C) { - s.c.Add(s.aObject) + s.c.Put(s.aObject) c.Assert(s.c.actualSize, Equals, 1*Byte) s.c.Clear() c.Assert(s.c.actualSize, Equals, 0*Byte) - c.Assert(s.c.Get(s.aObject.Hash()), IsNil) + obj, ok := s.c.Get(s.aObject.Hash()) + c.Assert(ok, Equals, false) + c.Assert(obj, IsNil) } type dummyObject struct { diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 39680a3..3d2eb3b 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -355,9 +355,8 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum return 0, err } - base := d.cache.Get(ref) - - if base == nil { + base, ok := d.cache.Get(ref) + if !ok { base, err = d.recallByHash(ref) if err != nil { return 0, err @@ -366,7 +365,7 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum obj.SetType(base.Type()) err = ApplyDelta(obj, base, buf.Bytes()) - d.cache.Add(obj) + d.cache.Put(obj) return crc, err } @@ -381,10 +380,10 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i e, ok := d.idx.LookupOffset(uint64(offset)) var base plumbing.EncodedObject if ok { - base = d.cache.Get(e.Hash) + base, ok = d.cache.Get(e.Hash) } - if base == nil { + if !ok { base, err = d.recallByOffset(offset) if err != nil { return 0, err @@ -393,7 +392,7 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i obj.SetType(base.Type()) err = ApplyDelta(obj, base, buf.Bytes()) - d.cache.Add(obj) + d.cache.Put(obj) return crc, err } -- cgit From b3fc7760ba332306bb1faa64c8a101a2e605077f Mon Sep 17 00:00:00 2001 From: "Santiago M. Mola" Date: Mon, 24 Jul 2017 14:04:51 +0200 Subject: storage/filesystem: reuse delta cache Reuse delta base object cache for packfile decoders across multiple instances. --- plumbing/format/packfile/decoder.go | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'plumbing') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 3d2eb3b..e49de51 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -52,6 +52,8 @@ var ( // is destroyed. The Offsets and CRCs are calculated whether an // ObjectStorer was provided or not. type Decoder struct { + DeltaBaseCache cache.Object + s *Scanner o storer.EncodedObjectStorer tx storer.Transaction @@ -65,8 +67,6 @@ type Decoder struct { offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType - - cache cache.Object } // NewDecoder returns a new Decoder that decodes a Packfile using the given @@ -107,8 +107,6 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, idx: NewIndex(0), offsetToType: make(map[int64]plumbing.ObjectType, 0), decoderType: t, - - cache: cache.NewObjectFIFO(cache.MaxSize), }, nil } @@ -355,7 +353,7 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum return 0, err } - base, ok := d.cache.Get(ref) + base, ok := d.cacheGet(ref) if !ok { base, err = d.recallByHash(ref) if err != nil { @@ -365,7 +363,7 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum obj.SetType(base.Type()) err = ApplyDelta(obj, base, buf.Bytes()) - d.cache.Put(obj) + d.cachePut(obj) return crc, err } @@ -380,7 +378,7 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i e, ok := d.idx.LookupOffset(uint64(offset)) var base plumbing.EncodedObject if ok { - base, ok = d.cache.Get(e.Hash) + base, ok = d.cacheGet(e.Hash) } if !ok { @@ -392,11 +390,27 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i obj.SetType(base.Type()) err = ApplyDelta(obj, base, buf.Bytes()) - d.cache.Put(obj) + d.cachePut(obj) return crc, err } +func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { + if d.DeltaBaseCache == nil { + return nil, false + } + + return d.DeltaBaseCache.Get(h) +} + +func (d *Decoder) cachePut(obj plumbing.EncodedObject) { + if d.DeltaBaseCache == nil { + return + } + + d.DeltaBaseCache.Put(obj) +} + func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { if d.s.IsSeekable { return d.DecodeObjectAt(o) @@ -454,7 +468,5 @@ func (d *Decoder) Index() *Index { // Close closes the Scanner. usually this mean that the whole reader is read and // discarded func (d *Decoder) Close() error { - d.cache.Clear() - return d.s.Close() } -- cgit From ae1c4f3df729c3a7fed4cd5a1f530c95d640497a Mon Sep 17 00:00:00 2001 From: "Santiago M. Mola" Date: Mon, 24 Jul 2017 14:19:21 +0200 Subject: plumbing/cache: change FIFO to LRU cache --- plumbing/cache/object.go | 73 ------------------------------------- plumbing/cache/object_lru.go | 84 +++++++++++++++++++++++++++++++++++++++++++ plumbing/cache/object_test.go | 26 +++++--------- 3 files changed, 93 insertions(+), 90 deletions(-) delete mode 100644 plumbing/cache/object.go create mode 100644 plumbing/cache/object_lru.go (limited to 'plumbing') diff --git a/plumbing/cache/object.go b/plumbing/cache/object.go deleted file mode 100644 index 44e0d32..0000000 --- a/plumbing/cache/object.go +++ /dev/null @@ -1,73 +0,0 @@ -package cache - -import ( - "gopkg.in/src-d/go-git.v4/plumbing" -) - -const ( - initialQueueSize = 20 - MaxSize = 10 * MiByte -) - -type ObjectFIFO struct { - objects map[plumbing.Hash]plumbing.EncodedObject - order *queue - - maxSize FileSize - actualSize FileSize -} - -// NewObjectFIFO returns an Object cache that keeps the newest objects that fit -// into the specific memory size -func NewObjectFIFO(size FileSize) *ObjectFIFO { - return &ObjectFIFO{ - objects: make(map[plumbing.Hash]plumbing.EncodedObject), - order: newQueue(initialQueueSize), - maxSize: size, - } -} - -// Put puts a new object to the cache. If the object size is greater than the -// cache size, the object is not added. -func (c *ObjectFIFO) Put(o plumbing.EncodedObject) { - objSize := FileSize(o.Size()) - - // if the size of the object is bigger or equal than the cache size, - // skip it - if objSize >= c.maxSize { - return - } - - // if the object is into the cache, do not add it again - if _, ok := c.objects[o.Hash()]; ok { - return - } - - // delete the oldest object if cache is full - if c.actualSize >= c.maxSize { - h := c.order.Pop() - o := c.objects[h] - if o != nil { - c.actualSize -= FileSize(o.Size()) - delete(c.objects, h) - } - } - - c.objects[o.Hash()] = o - c.order.Push(o.Hash()) - c.actualSize += FileSize(o.Size()) -} - -// Get returns an object by his hash. If the object is not found in the cache, it -// returns nil -func (c *ObjectFIFO) Get(k plumbing.Hash) (plumbing.EncodedObject, bool) { - obj, ok := c.objects[k] - return obj, ok -} - -// Clear the content of this object cache -func (c *ObjectFIFO) Clear() { - c.objects = make(map[plumbing.Hash]plumbing.EncodedObject) - c.order = newQueue(initialQueueSize) - c.actualSize = 0 -} diff --git a/plumbing/cache/object_lru.go b/plumbing/cache/object_lru.go new file mode 100644 index 0000000..e4c3160 --- /dev/null +++ b/plumbing/cache/object_lru.go @@ -0,0 +1,84 @@ +package cache + +import ( + "container/list" + + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// ObjectLRU implements an object cache with an LRU eviction policy and a +// maximum size (measured in object size). +type ObjectLRU struct { + MaxSize FileSize + + actualSize FileSize + ll *list.List + cache map[interface{}]*list.Element +} + +// NewObjectLRU creates a new ObjectLRU with the given maximum size. The maximum +// size will never be exceeded. +func NewObjectLRU(maxSize FileSize) *ObjectLRU { + return &ObjectLRU{MaxSize: maxSize} +} + +// Put puts an object into the cache. If the object is already in the cache, it +// will be marked as used. Otherwise, it will be inserted. A single object might +// be evicted to make room for the new object. +func (c *ObjectLRU) Put(obj plumbing.EncodedObject) { + if c.cache == nil { + c.actualSize = 0 + c.cache = make(map[interface{}]*list.Element, 1000) + c.ll = list.New() + } + + key := obj.Hash() + if ee, ok := c.cache[key]; ok { + c.ll.MoveToFront(ee) + ee.Value = obj + return + } + + objSize := FileSize(obj.Size()) + + if objSize >= c.MaxSize { + return + } + + if c.actualSize+objSize > c.MaxSize { + last := c.ll.Back() + lastObj := last.Value.(plumbing.EncodedObject) + lastSize := FileSize(lastObj.Size()) + + c.ll.Remove(last) + delete(c.cache, lastObj.Hash()) + c.actualSize -= lastSize + + if c.actualSize+objSize > c.MaxSize { + return + } + } + + ee := c.ll.PushFront(obj) + c.cache[key] = ee + c.actualSize += objSize +} + +// Get returns an object by its hash. It marks the object as used. If the object +// is not in the cache, (nil, false) will be returned. +func (c *ObjectLRU) Get(k plumbing.Hash) (plumbing.EncodedObject, bool) { + ee, ok := c.cache[k] + if !ok { + return nil, false + } + + c.ll.MoveToFront(ee) + return ee.Value.(plumbing.EncodedObject), true +} + +// Clear the content of this object cache. +func (c *ObjectLRU) Clear() { + c.ll = nil + c.cache = nil + c.actualSize = 0 +} diff --git a/plumbing/cache/object_test.go b/plumbing/cache/object_test.go index 2a55acf..9359455 100644 --- a/plumbing/cache/object_test.go +++ b/plumbing/cache/object_test.go @@ -12,7 +12,7 @@ import ( func Test(t *testing.T) { TestingT(t) } type ObjectSuite struct { - c *ObjectFIFO + c Object aObject plumbing.EncodedObject bObject plumbing.EncodedObject cObject plumbing.EncodedObject @@ -27,32 +27,26 @@ func (s *ObjectSuite) SetUpTest(c *C) { s.cObject = newObject("cccccccccccccccccccccccccccccccccccccccc", 1*Byte) s.dObject = newObject("dddddddddddddddddddddddddddddddddddddddd", 1*Byte) - s.c = NewObjectFIFO(2 * Byte) + s.c = NewObjectLRU(2 * Byte) } -func (s *ObjectSuite) TestAdd_SameObject(c *C) { +func (s *ObjectSuite) TestPutSameObject(c *C) { s.c.Put(s.aObject) - c.Assert(s.c.actualSize, Equals, 1*Byte) s.c.Put(s.aObject) - c.Assert(s.c.actualSize, Equals, 1*Byte) + _, ok := s.c.Get(s.aObject.Hash()) + c.Assert(ok, Equals, true) } -func (s *ObjectSuite) TestAdd_BigObject(c *C) { +func (s *ObjectSuite) TestPutBigObject(c *C) { s.c.Put(s.bObject) - c.Assert(s.c.actualSize, Equals, 0*Byte) - c.Assert(s.c.actualSize, Equals, 0*KiByte) - c.Assert(s.c.actualSize, Equals, 0*MiByte) - c.Assert(s.c.actualSize, Equals, 0*GiByte) - c.Assert(len(s.c.objects), Equals, 0) + _, ok := s.c.Get(s.aObject.Hash()) + c.Assert(ok, Equals, false) } -func (s *ObjectSuite) TestAdd_CacheOverflow(c *C) { +func (s *ObjectSuite) TestPutCacheOverflow(c *C) { s.c.Put(s.aObject) - c.Assert(s.c.actualSize, Equals, 1*Byte) s.c.Put(s.cObject) - c.Assert(len(s.c.objects), Equals, 2) s.c.Put(s.dObject) - c.Assert(len(s.c.objects), Equals, 2) obj, ok := s.c.Get(s.aObject.Hash()) c.Assert(ok, Equals, false) @@ -67,9 +61,7 @@ func (s *ObjectSuite) TestAdd_CacheOverflow(c *C) { func (s *ObjectSuite) TestClear(c *C) { s.c.Put(s.aObject) - c.Assert(s.c.actualSize, Equals, 1*Byte) s.c.Clear() - c.Assert(s.c.actualSize, Equals, 0*Byte) obj, ok := s.c.Get(s.aObject.Hash()) c.Assert(ok, Equals, false) c.Assert(obj, IsNil) -- cgit