aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2017-07-27 14:55:47 +0200
committerGitHub <noreply@github.com>2017-07-27 14:55:47 +0200
commit8c57f96d4678087e06035095e67ad1239d6509ce (patch)
treea1468ab8a942435435c644abdc9bac3338990bc2
parent854ffa16f650706200a6ebb5505bb448b5c64035 (diff)
parentae1c4f3df729c3a7fed4cd5a1f530c95d640497a (diff)
downloadgo-git-8c57f96d4678087e06035095e67ad1239d6509ce.tar.gz
Merge pull request #514 from smola/use-cache-delta
cache: reuse object cache for delta resolution, use LRU policy
-rw-r--r--plumbing/cache/common.go10
-rw-r--r--plumbing/cache/object.go68
-rw-r--r--plumbing/cache/object_lru.go84
-rw-r--r--plumbing/cache/object_test.go58
-rw-r--r--plumbing/format/packfile/decoder.go37
-rw-r--r--storage/filesystem/object.go10
6 files changed, 154 insertions, 113 deletions
diff --git a/plumbing/cache/common.go b/plumbing/cache/common.go
index 7b90c55..9efc26c 100644
--- a/plumbing/cache/common.go
+++ b/plumbing/cache/common.go
@@ -11,8 +11,14 @@ const (
type FileSize int64
+// Object is an interface to a object cache.
type Object interface {
- Add(o plumbing.EncodedObject)
- Get(k plumbing.Hash) plumbing.EncodedObject
+ // Put puts the given object into the cache. Whether this object will
+ // actually be put into the cache or not is implementation specific.
+ Put(o plumbing.EncodedObject)
+ // Get gets an object from the cache given its hash. The second return value
+ // is true if the object was returned, and false otherwise.
+ Get(k plumbing.Hash) (plumbing.EncodedObject, bool)
+ // Clear clears every object from the cache.
Clear()
}
diff --git a/plumbing/cache/object.go b/plumbing/cache/object.go
deleted file mode 100644
index 44238ce..0000000
--- a/plumbing/cache/object.go
+++ /dev/null
@@ -1,68 +0,0 @@
-package cache
-
-import "gopkg.in/src-d/go-git.v4/plumbing"
-
-const (
- initialQueueSize = 20
- MaxSize = 10 * MiByte
-)
-
-type ObjectFIFO struct {
- objects map[plumbing.Hash]plumbing.EncodedObject
- order *queue
-
- maxSize FileSize
- actualSize FileSize
-}
-
-// NewObjectFIFO returns an Object cache that keeps the newest objects that fit
-// into the specific memory size
-func NewObjectFIFO(size FileSize) *ObjectFIFO {
- return &ObjectFIFO{
- objects: make(map[plumbing.Hash]plumbing.EncodedObject),
- order: newQueue(initialQueueSize),
- maxSize: size,
- }
-}
-
-// Add adds a new object to the cache. If the object size is greater than the
-// cache size, the object is not added.
-func (c *ObjectFIFO) Add(o plumbing.EncodedObject) {
- // if the size of the object is bigger or equal than the cache size,
- // skip it
- if FileSize(o.Size()) >= c.maxSize {
- return
- }
-
- // if the object is into the cache, do not add it again
- if _, ok := c.objects[o.Hash()]; ok {
- return
- }
-
- // delete the oldest object if cache is full
- if c.actualSize >= c.maxSize {
- h := c.order.Pop()
- o := c.objects[h]
- if o != nil {
- c.actualSize -= FileSize(o.Size())
- delete(c.objects, h)
- }
- }
-
- c.objects[o.Hash()] = o
- c.order.Push(o.Hash())
- c.actualSize += FileSize(o.Size())
-}
-
-// Get returns an object by his hash. If the object is not found in the cache, it
-// returns nil
-func (c *ObjectFIFO) Get(k plumbing.Hash) plumbing.EncodedObject {
- return c.objects[k]
-}
-
-// Clear the content of this object cache
-func (c *ObjectFIFO) Clear() {
- c.objects = make(map[plumbing.Hash]plumbing.EncodedObject)
- c.order = newQueue(initialQueueSize)
- c.actualSize = 0
-}
diff --git a/plumbing/cache/object_lru.go b/plumbing/cache/object_lru.go
new file mode 100644
index 0000000..e4c3160
--- /dev/null
+++ b/plumbing/cache/object_lru.go
@@ -0,0 +1,84 @@
+package cache
+
+import (
+ "container/list"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+)
+
+// ObjectLRU implements an object cache with an LRU eviction policy and a
+// maximum size (measured in object size).
+type ObjectLRU struct {
+ MaxSize FileSize
+
+ actualSize FileSize
+ ll *list.List
+ cache map[interface{}]*list.Element
+}
+
+// NewObjectLRU creates a new ObjectLRU with the given maximum size. The maximum
+// size will never be exceeded.
+func NewObjectLRU(maxSize FileSize) *ObjectLRU {
+ return &ObjectLRU{MaxSize: maxSize}
+}
+
+// Put puts an object into the cache. If the object is already in the cache, it
+// will be marked as used. Otherwise, it will be inserted. A single object might
+// be evicted to make room for the new object.
+func (c *ObjectLRU) Put(obj plumbing.EncodedObject) {
+ if c.cache == nil {
+ c.actualSize = 0
+ c.cache = make(map[interface{}]*list.Element, 1000)
+ c.ll = list.New()
+ }
+
+ key := obj.Hash()
+ if ee, ok := c.cache[key]; ok {
+ c.ll.MoveToFront(ee)
+ ee.Value = obj
+ return
+ }
+
+ objSize := FileSize(obj.Size())
+
+ if objSize >= c.MaxSize {
+ return
+ }
+
+ if c.actualSize+objSize > c.MaxSize {
+ last := c.ll.Back()
+ lastObj := last.Value.(plumbing.EncodedObject)
+ lastSize := FileSize(lastObj.Size())
+
+ c.ll.Remove(last)
+ delete(c.cache, lastObj.Hash())
+ c.actualSize -= lastSize
+
+ if c.actualSize+objSize > c.MaxSize {
+ return
+ }
+ }
+
+ ee := c.ll.PushFront(obj)
+ c.cache[key] = ee
+ c.actualSize += objSize
+}
+
+// Get returns an object by its hash. It marks the object as used. If the object
+// is not in the cache, (nil, false) will be returned.
+func (c *ObjectLRU) Get(k plumbing.Hash) (plumbing.EncodedObject, bool) {
+ ee, ok := c.cache[k]
+ if !ok {
+ return nil, false
+ }
+
+ c.ll.MoveToFront(ee)
+ return ee.Value.(plumbing.EncodedObject), true
+}
+
+// Clear the content of this object cache.
+func (c *ObjectLRU) Clear() {
+ c.ll = nil
+ c.cache = nil
+ c.actualSize = 0
+}
diff --git a/plumbing/cache/object_test.go b/plumbing/cache/object_test.go
index 80cd17b..9359455 100644
--- a/plumbing/cache/object_test.go
+++ b/plumbing/cache/object_test.go
@@ -12,7 +12,7 @@ import (
func Test(t *testing.T) { TestingT(t) }
type ObjectSuite struct {
- c *ObjectFIFO
+ c Object
aObject plumbing.EncodedObject
bObject plumbing.EncodedObject
cObject plumbing.EncodedObject
@@ -27,44 +27,44 @@ func (s *ObjectSuite) SetUpTest(c *C) {
s.cObject = newObject("cccccccccccccccccccccccccccccccccccccccc", 1*Byte)
s.dObject = newObject("dddddddddddddddddddddddddddddddddddddddd", 1*Byte)
- s.c = NewObjectFIFO(2 * Byte)
+ s.c = NewObjectLRU(2 * Byte)
}
-func (s *ObjectSuite) TestAdd_SameObject(c *C) {
- s.c.Add(s.aObject)
- c.Assert(s.c.actualSize, Equals, 1*Byte)
- s.c.Add(s.aObject)
- c.Assert(s.c.actualSize, Equals, 1*Byte)
+func (s *ObjectSuite) TestPutSameObject(c *C) {
+ s.c.Put(s.aObject)
+ s.c.Put(s.aObject)
+ _, ok := s.c.Get(s.aObject.Hash())
+ c.Assert(ok, Equals, true)
}
-func (s *ObjectSuite) TestAdd_BigObject(c *C) {
- s.c.Add(s.bObject)
- c.Assert(s.c.actualSize, Equals, 0*Byte)
- c.Assert(s.c.actualSize, Equals, 0*KiByte)
- c.Assert(s.c.actualSize, Equals, 0*MiByte)
- c.Assert(s.c.actualSize, Equals, 0*GiByte)
- c.Assert(len(s.c.objects), Equals, 0)
+func (s *ObjectSuite) TestPutBigObject(c *C) {
+ s.c.Put(s.bObject)
+ _, ok := s.c.Get(s.aObject.Hash())
+ c.Assert(ok, Equals, false)
}
-func (s *ObjectSuite) TestAdd_CacheOverflow(c *C) {
- s.c.Add(s.aObject)
- c.Assert(s.c.actualSize, Equals, 1*Byte)
- s.c.Add(s.cObject)
- c.Assert(len(s.c.objects), Equals, 2)
- s.c.Add(s.dObject)
- c.Assert(len(s.c.objects), Equals, 2)
-
- c.Assert(s.c.Get(s.aObject.Hash()), IsNil)
- c.Assert(s.c.Get(s.cObject.Hash()), NotNil)
- c.Assert(s.c.Get(s.dObject.Hash()), NotNil)
+func (s *ObjectSuite) TestPutCacheOverflow(c *C) {
+ s.c.Put(s.aObject)
+ s.c.Put(s.cObject)
+ s.c.Put(s.dObject)
+
+ obj, ok := s.c.Get(s.aObject.Hash())
+ c.Assert(ok, Equals, false)
+ c.Assert(obj, IsNil)
+ obj, ok = s.c.Get(s.cObject.Hash())
+ c.Assert(ok, Equals, true)
+ c.Assert(obj, NotNil)
+ obj, ok = s.c.Get(s.dObject.Hash())
+ c.Assert(ok, Equals, true)
+ c.Assert(obj, NotNil)
}
func (s *ObjectSuite) TestClear(c *C) {
- s.c.Add(s.aObject)
- c.Assert(s.c.actualSize, Equals, 1*Byte)
+ s.c.Put(s.aObject)
s.c.Clear()
- c.Assert(s.c.actualSize, Equals, 0*Byte)
- c.Assert(s.c.Get(s.aObject.Hash()), IsNil)
+ obj, ok := s.c.Get(s.aObject.Hash())
+ c.Assert(ok, Equals, false)
+ c.Assert(obj, IsNil)
}
type dummyObject struct {
diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go
index 39680a3..e49de51 100644
--- a/plumbing/format/packfile/decoder.go
+++ b/plumbing/format/packfile/decoder.go
@@ -52,6 +52,8 @@ var (
// is destroyed. The Offsets and CRCs are calculated whether an
// ObjectStorer was provided or not.
type Decoder struct {
+ DeltaBaseCache cache.Object
+
s *Scanner
o storer.EncodedObjectStorer
tx storer.Transaction
@@ -65,8 +67,6 @@ type Decoder struct {
offsetToType map[int64]plumbing.ObjectType
decoderType plumbing.ObjectType
-
- cache cache.Object
}
// NewDecoder returns a new Decoder that decodes a Packfile using the given
@@ -107,8 +107,6 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
idx: NewIndex(0),
offsetToType: make(map[int64]plumbing.ObjectType, 0),
decoderType: t,
-
- cache: cache.NewObjectFIFO(cache.MaxSize),
}, nil
}
@@ -355,9 +353,8 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum
return 0, err
}
- base := d.cache.Get(ref)
-
- if base == nil {
+ base, ok := d.cacheGet(ref)
+ if !ok {
base, err = d.recallByHash(ref)
if err != nil {
return 0, err
@@ -366,7 +363,7 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum
obj.SetType(base.Type())
err = ApplyDelta(obj, base, buf.Bytes())
- d.cache.Add(obj)
+ d.cachePut(obj)
return crc, err
}
@@ -381,10 +378,10 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i
e, ok := d.idx.LookupOffset(uint64(offset))
var base plumbing.EncodedObject
if ok {
- base = d.cache.Get(e.Hash)
+ base, ok = d.cacheGet(e.Hash)
}
- if base == nil {
+ if !ok {
base, err = d.recallByOffset(offset)
if err != nil {
return 0, err
@@ -393,11 +390,27 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i
obj.SetType(base.Type())
err = ApplyDelta(obj, base, buf.Bytes())
- d.cache.Add(obj)
+ d.cachePut(obj)
return crc, err
}
+func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) {
+ if d.DeltaBaseCache == nil {
+ return nil, false
+ }
+
+ return d.DeltaBaseCache.Get(h)
+}
+
+func (d *Decoder) cachePut(obj plumbing.EncodedObject) {
+ if d.DeltaBaseCache == nil {
+ return
+ }
+
+ d.DeltaBaseCache.Put(obj)
+}
+
func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) {
if d.s.IsSeekable {
return d.DecodeObjectAt(o)
@@ -455,7 +468,5 @@ func (d *Decoder) Index() *Index {
// Close closes the Scanner. usually this mean that the whole reader is read and
// discarded
func (d *Decoder) Close() error {
- d.cache.Clear()
-
return d.s.Close()
}
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index e235b33..6dd910b 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -5,6 +5,7 @@ import (
"os"
"gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/cache"
"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
"gopkg.in/src-d/go-git.v4/plumbing/format/objfile"
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
@@ -16,14 +17,20 @@ import (
"gopkg.in/src-d/go-billy.v3"
)
+const DefaultMaxDeltaBaseCacheSize = 92 * cache.MiByte
+
type ObjectStorage struct {
+ // DeltaBaseCache is an object cache uses to cache delta's bases when
+ DeltaBaseCache cache.Object
+
dir *dotgit.DotGit
index map[plumbing.Hash]*packfile.Index
}
func newObjectStorage(dir *dotgit.DotGit) (ObjectStorage, error) {
s := ObjectStorage{
- dir: dir,
+ DeltaBaseCache: cache.NewObjectLRU(DefaultMaxDeltaBaseCacheSize),
+ dir: dir,
}
return s, nil
@@ -198,6 +205,7 @@ func (s *ObjectStorage) getFromPackfile(h plumbing.Hash) (plumbing.EncodedObject
return nil, err
}
+ d.DeltaBaseCache = s.DeltaBaseCache
d.SetIndex(s.index[pack])
obj, err := d.DecodeObjectAt(offset)
return obj, err