aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Symonds <dsymonds@golang.org>2018-05-30 11:06:44 +1000
committerDavid Symonds <dsymonds@golang.org>2018-05-30 11:34:26 +1000
commitcf532f99e3e7632bc1d813245a4c79ae38b4d320 (patch)
tree702b13e6e462ca39fcfff119c1be949963f32705
parent57570e84f8c5739f0f4a59387493e590e709dde9 (diff)
downloadgo-git-cf532f99e3e7632bc1d813245a4c79ae38b4d320.tar.gz
packfile: improve Index memory representation to be more compact
Instead of using a map for offset indexing, use a sorted slice. Binary searching is fast, and a slice is much more compact. This has a negligible hit on speed, but has a significant impact on memory usage, especially for larger repos. benchmark old ns/op new ns/op delta BenchmarkIndexConstruction-12 15506506 14056098 -9.35% benchmark old allocs new allocs delta BenchmarkIndexConstruction-12 60764 60385 -0.62% benchmark old bytes new bytes delta BenchmarkIndexConstruction-12 4318145 3913169 -9.38% Signed-off-by: David Symonds <dsymonds@golang.org>
-rw-r--r--plumbing/format/packfile/index.go53
-rw-r--r--plumbing/format/packfile/index_test.go37
2 files changed, 67 insertions, 23 deletions
diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go
index 2c5f98f..7d8f2ad 100644
--- a/plumbing/format/packfile/index.go
+++ b/plumbing/format/packfile/index.go
@@ -1,6 +1,8 @@
package packfile
import (
+ "sort"
+
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
)
@@ -10,7 +12,7 @@ import (
// or to store them.
type Index struct {
byHash map[plumbing.Hash]*idxfile.Entry
- byOffset map[uint64]*idxfile.Entry
+ byOffset []*idxfile.Entry // sorted by their offset
}
// NewIndex creates a new empty index with the given size. Size is a hint and
@@ -19,7 +21,7 @@ type Index struct {
func NewIndex(size int) *Index {
return &Index{
byHash: make(map[plumbing.Hash]*idxfile.Entry, size),
- byOffset: make(map[uint64]*idxfile.Entry, size),
+ byOffset: make([]*idxfile.Entry, 0, size),
}
}
@@ -27,28 +29,54 @@ func NewIndex(size int) *Index {
func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index {
idx := &Index{
byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount),
- byOffset: make(map[uint64]*idxfile.Entry, idxf.ObjectCount),
+ byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount),
}
for _, e := range idxf.Entries {
- idx.add(e)
+ idx.addUnsorted(e)
}
+ sort.Sort(orderByOffset(idx.byOffset))
return idx
}
+// orderByOffset is a sort.Interface adapter that arranges
+// a slice of entries by their offset.
+type orderByOffset []*idxfile.Entry
+
+func (o orderByOffset) Len() int { return len(o) }
+func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset }
+func (o orderByOffset) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
+
// Add adds a new Entry with the given values to the index.
func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
- e := idxfile.Entry{
+ e := &idxfile.Entry{
Hash: h,
Offset: offset,
CRC32: crc32,
}
- idx.add(&e)
+ idx.byHash[e.Hash] = e
+
+ // Find the right position in byOffset.
+ // Look for the first position whose offset is *greater* than e.Offset.
+ i := sort.Search(len(idx.byOffset), func(i int) bool {
+ return idx.byOffset[i].Offset > offset
+ })
+ if i == len(idx.byOffset) {
+ // Simple case: add it to the end.
+ idx.byOffset = append(idx.byOffset, e)
+ return
+ }
+ // Harder case: shift existing entries down by one to make room.
+ // Append a nil entry first so we can use existing capacity in case
+ // the index was carefully preallocated.
+ idx.byOffset = append(idx.byOffset, nil)
+ copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1])
+ idx.byOffset[i] = e
}
-func (idx *Index) add(e *idxfile.Entry) {
+func (idx *Index) addUnsorted(e *idxfile.Entry) {
idx.byHash[e.Hash] = e
- idx.byOffset[e.Offset] = e
+ idx.byOffset = append(idx.byOffset, e)
}
// LookupHash looks an entry up by its hash. An idxfile.Entry is returned and
@@ -61,8 +89,13 @@ func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) {
// LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry
// is returned and a bool, which is true if it was found or false if it wasn't.
func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) {
- e, ok := idx.byOffset[offset]
- return e, ok
+ i := sort.Search(len(idx.byOffset), func(i int) bool {
+ return idx.byOffset[i].Offset >= offset
+ })
+ if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset {
+ return nil, false // not present
+ }
+ return idx.byOffset[i], true
}
// Size returns the number of entries in the index.
diff --git a/plumbing/format/packfile/index_test.go b/plumbing/format/packfile/index_test.go
index 6714704..8de886d 100644
--- a/plumbing/format/packfile/index_test.go
+++ b/plumbing/format/packfile/index_test.go
@@ -3,6 +3,7 @@ package packfile
import (
"strconv"
"strings"
+ "testing"
"gopkg.in/src-d/go-git.v4/plumbing"
@@ -26,12 +27,12 @@ func (s *IndexSuite) TestLookupOffset(c *C) {
e, ok := idx.LookupOffset(uint64(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
- c.Assert(e.Hash, Equals, s.toHash(o2))
+ c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}
- h1 := s.toHash(o1)
+ h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)
for o2 := 0; o2 < 10000; o2 += 100 {
@@ -43,7 +44,7 @@ func (s *IndexSuite) TestLookupOffset(c *C) {
e, ok := idx.LookupOffset(uint64(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
- c.Assert(e.Hash, Equals, s.toHash(o2))
+ c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}
@@ -56,31 +57,31 @@ func (s *IndexSuite) TestLookupHash(c *C) {
for o1 := 0; o1 < 10000; o1 += 100 {
for o2 := 0; o2 < 10000; o2 += 100 {
if o2 >= o1 {
- e, ok := idx.LookupHash(s.toHash(o2))
+ e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, false)
c.Assert(e, IsNil)
} else {
- e, ok := idx.LookupHash(s.toHash(o2))
+ e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
- c.Assert(e.Hash, Equals, s.toHash(o2))
+ c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}
- h1 := s.toHash(o1)
+ h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)
for o2 := 0; o2 < 10000; o2 += 100 {
if o2 > o1 {
- e, ok := idx.LookupHash(s.toHash(o2))
+ e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, false)
c.Assert(e, IsNil)
} else {
- e, ok := idx.LookupHash(s.toHash(o2))
+ e, ok := idx.LookupHash(toHash(o2))
c.Assert(ok, Equals, true)
c.Assert(e, NotNil)
- c.Assert(e.Hash, Equals, s.toHash(o2))
+ c.Assert(e.Hash, Equals, toHash(o2))
c.Assert(e.Offset, Equals, uint64(o2))
}
}
@@ -92,7 +93,7 @@ func (s *IndexSuite) TestSize(c *C) {
for o1 := 0; o1 < 1000; o1++ {
c.Assert(idx.Size(), Equals, o1)
- h1 := s.toHash(o1)
+ h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)
}
}
@@ -107,7 +108,7 @@ func (s *IndexSuite) TestIdxFileEmpty(c *C) {
func (s *IndexSuite) TestIdxFile(c *C) {
idx := NewIndex(0)
for o1 := 0; o1 < 1000; o1++ {
- h1 := s.toHash(o1)
+ h1 := toHash(o1)
idx.Add(h1, uint64(o1), 0)
}
@@ -115,8 +116,18 @@ func (s *IndexSuite) TestIdxFile(c *C) {
c.Assert(idx, DeepEquals, idx2)
}
-func (s *IndexSuite) toHash(i int) plumbing.Hash {
+func toHash(i int) plumbing.Hash {
is := strconv.Itoa(i)
padding := strings.Repeat("a", 40-len(is))
return plumbing.NewHash(padding + is)
}
+
+func BenchmarkIndexConstruction(b *testing.B) {
+ b.ReportAllocs()
+
+ idx := NewIndex(0)
+ for o := 0; o < 1e6*b.N; o += 100 {
+ h1 := toHash(o)
+ idx.Add(h1, uint64(o), 0)
+ }
+}