From cf532f99e3e7632bc1d813245a4c79ae38b4d320 Mon Sep 17 00:00:00 2001
From: David Symonds <dsymonds@golang.org>
Date: Wed, 30 May 2018 11:06:44 +1000
Subject: packfile: improve Index memory representation to be more compact

Instead of using a map for offset indexing, use a sorted slice.
Binary searching is fast, and a slice is much more compact.
This has a negligible hit on speed, but has a significant impact on
memory usage, especially for larger repos.

benchmark                         old ns/op     new ns/op     delta
BenchmarkIndexConstruction-12     15506506      14056098      -9.35%

benchmark                         old allocs     new allocs     delta
BenchmarkIndexConstruction-12     60764          60385          -0.62%

benchmark                         old bytes     new bytes     delta
BenchmarkIndexConstruction-12     4318145       3913169       -9.38%

Signed-off-by: David Symonds <dsymonds@golang.org>
---
 plumbing/format/packfile/index_test.go | 37 ++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 13 deletions(-)

(limited to 'plumbing/format/packfile/index_test.go')

diff --git a/plumbing/format/packfile/index_test.go b/plumbing/format/packfile/index_test.go
index 6714704..8de886d 100644
--- a/plumbing/format/packfile/index_test.go
+++ b/plumbing/format/packfile/index_test.go
@@ -3,6 +3,7 @@ package packfile
 import (
 	"strconv"
 	"strings"
+	"testing"
 
 	"gopkg.in/src-d/go-git.v4/plumbing"
 
@@ -26,12 +27,12 @@ func (s *IndexSuite) TestLookupOffset(c *C) {
 				e, ok := idx.LookupOffset(uint64(o2))
 				c.Assert(ok, Equals, true)
 				c.Assert(e, NotNil)
-				c.Assert(e.Hash, Equals, s.toHash(o2))
+				c.Assert(e.Hash, Equals, toHash(o2))
 				c.Assert(e.Offset, Equals, uint64(o2))
 			}
 		}
 
-		h1 := s.toHash(o1)
+		h1 := toHash(o1)
 		idx.Add(h1, uint64(o1), 0)
 
 		for o2 := 0; o2 < 10000; o2 += 100 {
@@ -43,7 +44,7 @@ func (s *IndexSuite) TestLookupOffset(c *C) {
 				e, ok := idx.LookupOffset(uint64(o2))
 				c.Assert(ok, Equals, true)
 				c.Assert(e, NotNil)
-				c.Assert(e.Hash, Equals, s.toHash(o2))
+				c.Assert(e.Hash, Equals, toHash(o2))
 				c.Assert(e.Offset, Equals, uint64(o2))
 			}
 		}
@@ -56,31 +57,31 @@ func (s *IndexSuite) TestLookupHash(c *C) {
 	for o1 := 0; o1 < 10000; o1 += 100 {
 		for o2 := 0; o2 < 10000; o2 += 100 {
 			if o2 >= o1 {
-				e, ok := idx.LookupHash(s.toHash(o2))
+				e, ok := idx.LookupHash(toHash(o2))
 				c.Assert(ok, Equals, false)
 				c.Assert(e, IsNil)
 			} else {
-				e, ok := idx.LookupHash(s.toHash(o2))
+				e, ok := idx.LookupHash(toHash(o2))
 				c.Assert(ok, Equals, true)
 				c.Assert(e, NotNil)
-				c.Assert(e.Hash, Equals, s.toHash(o2))
+				c.Assert(e.Hash, Equals, toHash(o2))
 				c.Assert(e.Offset, Equals, uint64(o2))
 			}
 		}
 
-		h1 := s.toHash(o1)
+		h1 := toHash(o1)
 		idx.Add(h1, uint64(o1), 0)
 
 		for o2 := 0; o2 < 10000; o2 += 100 {
 			if o2 > o1 {
-				e, ok := idx.LookupHash(s.toHash(o2))
+				e, ok := idx.LookupHash(toHash(o2))
 				c.Assert(ok, Equals, false)
 				c.Assert(e, IsNil)
 			} else {
-				e, ok := idx.LookupHash(s.toHash(o2))
+				e, ok := idx.LookupHash(toHash(o2))
 				c.Assert(ok, Equals, true)
 				c.Assert(e, NotNil)
-				c.Assert(e.Hash, Equals, s.toHash(o2))
+				c.Assert(e.Hash, Equals, toHash(o2))
 				c.Assert(e.Offset, Equals, uint64(o2))
 			}
 		}
@@ -92,7 +93,7 @@ func (s *IndexSuite) TestSize(c *C) {
 
 	for o1 := 0; o1 < 1000; o1++ {
 		c.Assert(idx.Size(), Equals, o1)
-		h1 := s.toHash(o1)
+		h1 := toHash(o1)
 		idx.Add(h1, uint64(o1), 0)
 	}
 }
@@ -107,7 +108,7 @@ func (s *IndexSuite) TestIdxFileEmpty(c *C) {
 func (s *IndexSuite) TestIdxFile(c *C) {
 	idx := NewIndex(0)
 	for o1 := 0; o1 < 1000; o1++ {
-		h1 := s.toHash(o1)
+		h1 := toHash(o1)
 		idx.Add(h1, uint64(o1), 0)
 	}
 
@@ -115,8 +116,18 @@ func (s *IndexSuite) TestIdxFile(c *C) {
 	c.Assert(idx, DeepEquals, idx2)
 }
 
-func (s *IndexSuite) toHash(i int) plumbing.Hash {
+func toHash(i int) plumbing.Hash {
 	is := strconv.Itoa(i)
 	padding := strings.Repeat("a", 40-len(is))
 	return plumbing.NewHash(padding + is)
 }
+
+func BenchmarkIndexConstruction(b *testing.B) {
+	b.ReportAllocs()
+
+	idx := NewIndex(0)
+	for o := 0; o < 1e6*b.N; o += 100 {
+		h1 := toHash(o)
+		idx.Add(h1, uint64(o), 0)
+	}
+}
-- 
cgit