aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntonio Navarro Perez <antnavper@gmail.com>2017-01-25 14:29:39 +0100
committerSantiago M. Mola <santi@mola.io>2017-01-25 14:29:39 +0100
commitec28bd3808d42f523eeb05e23909dbfc54eb9bcd (patch)
treec7c5470a3f0089f01c5c9e2d80fef60da16a3c94
parentdc45de29f87a43078356a5be4c4b5aa24f626ee0 (diff)
downloadgo-git-ec28bd3808d42f523eeb05e23909dbfc54eb9bcd.tar.gz
packfile: cache undeltified objects to improve decode performance (#218)
* Simple object cache that keeps in memory the last undeltified objects. When no more objects can be kept into memory, the oldest one is deleted (FIFO). This speeds up packfile operations preventing redundant seeks and decodes.
-rw-r--r--cache/common.go16
-rw-r--r--cache/object.go68
-rw-r--r--cache/object_test.go85
-rw-r--r--cache/queue.go46
-rw-r--r--plumbing/format/packfile/decoder.go41
5 files changed, 248 insertions, 8 deletions
diff --git a/cache/common.go b/cache/common.go
new file mode 100644
index 0000000..bd2f1c3
--- /dev/null
+++ b/cache/common.go
@@ -0,0 +1,16 @@
+package cache
+
+import "gopkg.in/src-d/go-git.v4/plumbing"
+
+const (
+ Byte = 1 << (iota * 10)
+ KiByte
+ MiByte
+ GiByte
+)
+
+type Object interface {
+ Add(o plumbing.EncodedObject)
+ Get(k plumbing.Hash) plumbing.EncodedObject
+ Clear()
+}
diff --git a/cache/object.go b/cache/object.go
new file mode 100644
index 0000000..d6cd49b
--- /dev/null
+++ b/cache/object.go
@@ -0,0 +1,68 @@
+package cache
+
+import "gopkg.in/src-d/go-git.v4/plumbing"
+
+const (
+ initialQueueSize = 20
+ MaxSize = 10 * MiByte
+)
+
+type ObjectFIFO struct {
+ objects map[plumbing.Hash]plumbing.EncodedObject
+ order *queue
+
+ maxSize int64
+ actualSize int64
+}
+
+// NewObjectFIFO returns an Object cache that keeps the newest objects that fit
+// into the specific memory size
+func NewObjectFIFO(size int64) *ObjectFIFO {
+ return &ObjectFIFO{
+ objects: make(map[plumbing.Hash]plumbing.EncodedObject),
+ order: newQueue(initialQueueSize),
+ maxSize: size,
+ }
+}
+
+// Add adds a new object to the cache. If the object size is greater than the
+// cache size, the object is not added.
+func (c *ObjectFIFO) Add(o plumbing.EncodedObject) {
+ // if the size of the object is bigger or equal than the cache size,
+ // skip it
+ if o.Size() >= c.maxSize {
+ return
+ }
+
+ // if the object is into the cache, do not add it again
+ if _, ok := c.objects[o.Hash()]; ok {
+ return
+ }
+
+ // delete the oldest object if cache is full
+ if c.actualSize >= c.maxSize {
+ h := c.order.Pop()
+ o := c.objects[h]
+ if o != nil {
+ c.actualSize -= o.Size()
+ delete(c.objects, h)
+ }
+ }
+
+ c.objects[o.Hash()] = o
+ c.order.Push(o.Hash())
+ c.actualSize += o.Size()
+}
+
+// Get returns an object by his hash. If the object is not into the cache, it
+// returns nil
+func (c *ObjectFIFO) Get(k plumbing.Hash) plumbing.EncodedObject {
+ return c.objects[k]
+}
+
+// Clear the content of this cache object
+func (c *ObjectFIFO) Clear() {
+ c.objects = make(map[plumbing.Hash]plumbing.EncodedObject)
+ c.order = newQueue(initialQueueSize)
+ c.actualSize = 0
+}
diff --git a/cache/object_test.go b/cache/object_test.go
new file mode 100644
index 0000000..91aba8f
--- /dev/null
+++ b/cache/object_test.go
@@ -0,0 +1,85 @@
+package cache
+
+import (
+ "io"
+ "testing"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+
+ . "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) { TestingT(t) }
+
+type ObjectSuite struct {
+ c *ObjectFIFO
+ aObject plumbing.EncodedObject
+ bObject plumbing.EncodedObject
+ cObject plumbing.EncodedObject
+ dObject plumbing.EncodedObject
+}
+
+var _ = Suite(&ObjectSuite{})
+
+func (s *ObjectSuite) SetUpTest(c *C) {
+ s.aObject = newObject("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1*Byte)
+ s.bObject = newObject("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", 3*Byte)
+ s.cObject = newObject("cccccccccccccccccccccccccccccccccccccccc", 1*Byte)
+ s.dObject = newObject("dddddddddddddddddddddddddddddddddddddddd", 1*Byte)
+
+ s.c = NewObjectFIFO(2 * Byte)
+}
+
+func (s *ObjectSuite) TestAdd_SameObject(c *C) {
+ s.c.Add(s.aObject)
+ c.Assert(s.c.actualSize, Equals, int64(1*Byte))
+ s.c.Add(s.aObject)
+ c.Assert(s.c.actualSize, Equals, int64(1*Byte))
+}
+
+func (s *ObjectSuite) TestAdd_BigObject(c *C) {
+ s.c.Add(s.bObject)
+ c.Assert(s.c.actualSize, Equals, int64(0))
+ c.Assert(len(s.c.objects), Equals, 0)
+}
+
+func (s *ObjectSuite) TestAdd_CacheOverflow(c *C) {
+ s.c.Add(s.aObject)
+ c.Assert(s.c.actualSize, Equals, int64(1*Byte))
+ s.c.Add(s.cObject)
+ c.Assert(len(s.c.objects), Equals, 2)
+ s.c.Add(s.dObject)
+ c.Assert(len(s.c.objects), Equals, 2)
+
+ c.Assert(s.c.Get(s.aObject.Hash()), IsNil)
+ c.Assert(s.c.Get(s.cObject.Hash()), NotNil)
+ c.Assert(s.c.Get(s.dObject.Hash()), NotNil)
+}
+
+func (s *ObjectSuite) TestClear(c *C) {
+ s.c.Add(s.aObject)
+ c.Assert(s.c.actualSize, Equals, int64(1*Byte))
+ s.c.Clear()
+ c.Assert(s.c.actualSize, Equals, int64(0))
+ c.Assert(s.c.Get(s.aObject.Hash()), IsNil)
+}
+
+type dummyObject struct {
+ hash plumbing.Hash
+ size int64
+}
+
+func newObject(hash string, size int64) plumbing.EncodedObject {
+ return &dummyObject{
+ hash: plumbing.NewHash(hash),
+ size: size,
+ }
+}
+
+func (d *dummyObject) Hash() plumbing.Hash { return d.hash }
+func (*dummyObject) Type() plumbing.ObjectType { return plumbing.InvalidObject }
+func (*dummyObject) SetType(plumbing.ObjectType) {}
+func (d *dummyObject) Size() int64 { return d.size }
+func (*dummyObject) SetSize(s int64) {}
+func (*dummyObject) Reader() (io.ReadCloser, error) { return nil, nil }
+func (*dummyObject) Writer() (io.WriteCloser, error) { return nil, nil }
diff --git a/cache/queue.go b/cache/queue.go
new file mode 100644
index 0000000..85413e0
--- /dev/null
+++ b/cache/queue.go
@@ -0,0 +1,46 @@
+package cache
+
+import "gopkg.in/src-d/go-git.v4/plumbing"
+
+// queue is a basic FIFO queue based on a circular list that resize as needed.
+type queue struct {
+ elements []plumbing.Hash
+ size int
+ head int
+ tail int
+ count int
+}
+
+// newQueue returns a queue with the specified initial size
+func newQueue(size int) *queue {
+ return &queue{
+ elements: make([]plumbing.Hash, size),
+ size: size,
+ }
+}
+
+// Push adds a node to the queue.
+func (q *queue) Push(h plumbing.Hash) {
+ if q.head == q.tail && q.count > 0 {
+ elements := make([]plumbing.Hash, len(q.elements)+q.size)
+ copy(elements, q.elements[q.head:])
+ copy(elements[len(q.elements)-q.head:], q.elements[:q.head])
+ q.head = 0
+ q.tail = len(q.elements)
+ q.elements = elements
+ }
+ q.elements[q.tail] = h
+ q.tail = (q.tail + 1) % len(q.elements)
+ q.count++
+}
+
+// Pop removes and returns a Hash from the queue in first to last order.
+func (q *queue) Pop() plumbing.Hash {
+ if q.count == 0 {
+ return plumbing.ZeroHash
+ }
+ node := q.elements[q.head]
+ q.head = (q.head + 1) % len(q.elements)
+ q.count--
+ return node
+}
diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go
index 70d41ef..5793fdd 100644
--- a/plumbing/format/packfile/decoder.go
+++ b/plumbing/format/packfile/decoder.go
@@ -3,6 +3,7 @@ package packfile
import (
"bytes"
+ "gopkg.in/src-d/go-git.v4/cache"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
@@ -62,6 +63,8 @@ type Decoder struct {
offsetToType map[int64]plumbing.ObjectType
decoderType plumbing.ObjectType
+
+ cache cache.Object
}
// NewDecoder returns a new Decoder that decodes a Packfile using the given
@@ -105,6 +108,8 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
offsetToType: make(map[int64]plumbing.ObjectType, 0),
decoderType: t,
+
+ cache: cache.NewObjectFIFO(cache.MaxSize),
}, nil
}
@@ -341,13 +346,20 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum
return 0, err
}
- base, err := d.recallByHash(ref)
- if err != nil {
- return 0, err
+ base := d.cache.Get(ref)
+
+ if base == nil {
+ base, err = d.recallByHash(ref)
+ if err != nil {
+ return 0, err
+ }
}
obj.SetType(base.Type())
- return crc, ApplyDelta(obj, base, buf.Bytes())
+ err = ApplyDelta(obj, base, buf.Bytes())
+ d.cache.Add(obj)
+
+ return crc, err
}
func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) (uint32, error) {
@@ -357,13 +369,24 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i
return 0, err
}
- base, err := d.recallByOffset(offset)
- if err != nil {
- return 0, err
+ h := d.offsetToHash[offset]
+ var base plumbing.EncodedObject
+ if h != plumbing.ZeroHash {
+ base = d.cache.Get(h)
+ }
+
+ if base == nil {
+ base, err = d.recallByOffset(offset)
+ if err != nil {
+ return 0, err
+ }
}
obj.SetType(base.Type())
- return crc, ApplyDelta(obj, base, buf.Bytes())
+ err = ApplyDelta(obj, base, buf.Bytes())
+ d.cache.Add(obj)
+
+ return crc, err
}
func (d *Decoder) setOffset(h plumbing.Hash, offset int64) {
@@ -434,5 +457,7 @@ func (d *Decoder) CRCs() map[plumbing.Hash]uint32 {
// Close close the Scanner, usually this mean that the whole reader is read and
// discarded
func (d *Decoder) Close() error {
+ d.cache.Clear()
+
return d.s.Close()
}