From cf532f99e3e7632bc1d813245a4c79ae38b4d320 Mon Sep 17 00:00:00 2001
From: David Symonds <dsymonds@golang.org>
Date: Wed, 30 May 2018 11:06:44 +1000
Subject: packfile: improve Index memory representation to be more compact

Instead of using a map for offset indexing, use a sorted slice.
Binary searching is fast, and a slice is much more compact.
This has a negligible hit on speed, but has a significant impact on
memory usage, especially for larger repos.

benchmark                         old ns/op     new ns/op     delta
BenchmarkIndexConstruction-12     15506506      14056098      -9.35%

benchmark                         old allocs     new allocs     delta
BenchmarkIndexConstruction-12     60764          60385          -0.62%

benchmark                         old bytes     new bytes     delta
BenchmarkIndexConstruction-12     4318145       3913169       -9.38%

Signed-off-by: David Symonds <dsymonds@golang.org>
---
 plumbing/format/packfile/index.go | 53 +++++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 10 deletions(-)

(limited to 'plumbing/format/packfile/index.go')

diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go
index 2c5f98f..7d8f2ad 100644
--- a/plumbing/format/packfile/index.go
+++ b/plumbing/format/packfile/index.go
@@ -1,6 +1,8 @@
 package packfile
 
 import (
+	"sort"
+
 	"gopkg.in/src-d/go-git.v4/plumbing"
 	"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
 )
@@ -10,7 +12,7 @@ import (
 // or to store them.
 type Index struct {
 	byHash   map[plumbing.Hash]*idxfile.Entry
-	byOffset map[uint64]*idxfile.Entry
+	byOffset []*idxfile.Entry // sorted by their offset
 }
 
 // NewIndex creates a new empty index with the given size. Size is a hint and
@@ -19,7 +21,7 @@ type Index struct {
 func NewIndex(size int) *Index {
 	return &Index{
 		byHash:   make(map[plumbing.Hash]*idxfile.Entry, size),
-		byOffset: make(map[uint64]*idxfile.Entry, size),
+		byOffset: make([]*idxfile.Entry, 0, size),
 	}
 }
 
@@ -27,28 +29,54 @@ func NewIndex(size int) *Index {
 func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index {
 	idx := &Index{
 		byHash:   make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount),
-		byOffset: make(map[uint64]*idxfile.Entry, idxf.ObjectCount),
+		byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount),
 	}
 	for _, e := range idxf.Entries {
-		idx.add(e)
+		idx.addUnsorted(e)
 	}
+	sort.Sort(orderByOffset(idx.byOffset))
 
 	return idx
 }
 
+// orderByOffset is a sort.Interface adapter that arranges
+// a slice of entries by their offset.
+type orderByOffset []*idxfile.Entry
+
+func (o orderByOffset) Len() int           { return len(o) }
+func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset }
+func (o orderByOffset) Swap(i, j int)      { o[i], o[j] = o[j], o[i] }
+
 // Add adds a new Entry with the given values to the index.
 func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
-	e := idxfile.Entry{
+	e := &idxfile.Entry{
 		Hash:   h,
 		Offset: offset,
 		CRC32:  crc32,
 	}
-	idx.add(&e)
+	idx.byHash[e.Hash] = e
+
+	// Find the right position in byOffset.
+	// Look for the first position whose offset is *greater* than e.Offset.
+	i := sort.Search(len(idx.byOffset), func(i int) bool {
+		return idx.byOffset[i].Offset > offset
+	})
+	if i == len(idx.byOffset) {
+		// Simple case: add it to the end.
+		idx.byOffset = append(idx.byOffset, e)
+		return
+	}
+	// Harder case: shift existing entries down by one to make room.
+	// Append a nil entry first so we can use existing capacity in case
+	// the index was carefully preallocated.
+	idx.byOffset = append(idx.byOffset, nil)
+	copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1])
+	idx.byOffset[i] = e
 }
 
-func (idx *Index) add(e *idxfile.Entry) {
+func (idx *Index) addUnsorted(e *idxfile.Entry) {
 	idx.byHash[e.Hash] = e
-	idx.byOffset[e.Offset] = e
+	idx.byOffset = append(idx.byOffset, e)
 }
 
 // LookupHash looks an entry up by its hash. An idxfile.Entry is returned and
@@ -61,8 +89,13 @@ func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) {
 // LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry
 // is returned and a bool, which is true if it was found or false if it wasn't.
 func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) {
-	e, ok := idx.byOffset[offset]
-	return e, ok
+	i := sort.Search(len(idx.byOffset), func(i int) bool {
+		return idx.byOffset[i].Offset >= offset
+	})
+	if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset {
+		return nil, false // not present
+	}
+	return idx.byOffset[i], true
 }
 
 // Size returns the number of entries in the index.
-- 
cgit 


From 2d9816a5e7daea58a1419fef70bfc8d220ffd6a2 Mon Sep 17 00:00:00 2001
From: David Symonds <dsymonds@golang.org>
Date: Thu, 21 Jun 2018 13:24:03 +1000
Subject: packfile: optimise NewIndexFromIdxFile for a very common case

Loading from an on-disk idxfile will usually already have the idxfile
entries in order, so check that before wasting time on sorting.

Signed-off-by: David Symonds <dsymonds@golang.org>
---
 plumbing/format/packfile/index.go | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'plumbing/format/packfile/index.go')

diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go
index 7d8f2ad..021b2d1 100644
--- a/plumbing/format/packfile/index.go
+++ b/plumbing/format/packfile/index.go
@@ -31,10 +31,20 @@ func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index {
 		byHash:   make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount),
 		byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount),
 	}
-	for _, e := range idxf.Entries {
+	sorted := true
+	for i, e := range idxf.Entries {
 		idx.addUnsorted(e)
+		if i > 0 && idx.byOffset[i-1].Offset >= e.Offset {
+			sorted = false
+		}
+	}
+
+	// If the idxfile was loaded from a regular packfile index
+	// then it will already be in offset order, in which case we
+	// can avoid doing a relatively expensive idempotent sort.
+	if !sorted {
+		sort.Sort(orderByOffset(idx.byOffset))
 	}
-	sort.Sort(orderByOffset(idx.byOffset))
 
 	return idx
 }
-- 
cgit 


From 009f1069a1248c1e9189a9e4c342f6d017156ec4 Mon Sep 17 00:00:00 2001
From: Miguel Molina <miguel@erizocosmi.co>
Date: Thu, 19 Jul 2018 15:20:10 +0200
Subject: plumbing/format/idxfile: add new Index and MemoryIndex

Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
---
 plumbing/format/packfile/index.go | 125 --------------------------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 plumbing/format/packfile/index.go

(limited to 'plumbing/format/packfile/index.go')

diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go
deleted file mode 100644
index 021b2d1..0000000
--- a/plumbing/format/packfile/index.go
+++ /dev/null
@@ -1,125 +0,0 @@
-package packfile
-
-import (
-	"sort"
-
-	"gopkg.in/src-d/go-git.v4/plumbing"
-	"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
-)
-
-// Index is an in-memory representation of a packfile index.
-// This uses idxfile.Idxfile under the hood to obtain indexes from .idx files
-// or to store them.
-type Index struct {
-	byHash   map[plumbing.Hash]*idxfile.Entry
-	byOffset []*idxfile.Entry // sorted by their offset
-}
-
-// NewIndex creates a new empty index with the given size. Size is a hint and
-// can be 0. It is recommended to set it to the number of objects to be indexed
-// if it is known beforehand (e.g. reading from a packfile).
-func NewIndex(size int) *Index {
-	return &Index{
-		byHash:   make(map[plumbing.Hash]*idxfile.Entry, size),
-		byOffset: make([]*idxfile.Entry, 0, size),
-	}
-}
-
-// NewIndexFromIdxFile creates a new Index from an idxfile.IdxFile.
-func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index {
-	idx := &Index{
-		byHash:   make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount),
-		byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount),
-	}
-	sorted := true
-	for i, e := range idxf.Entries {
-		idx.addUnsorted(e)
-		if i > 0 && idx.byOffset[i-1].Offset >= e.Offset {
-			sorted = false
-		}
-	}
-
-	// If the idxfile was loaded from a regular packfile index
-	// then it will already be in offset order, in which case we
-	// can avoid doing a relatively expensive idempotent sort.
-	if !sorted {
-		sort.Sort(orderByOffset(idx.byOffset))
-	}
-
-	return idx
-}
-
-// orderByOffset is a sort.Interface adapter that arranges
-// a slice of entries by their offset.
-type orderByOffset []*idxfile.Entry
-
-func (o orderByOffset) Len() int           { return len(o) }
-func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset }
-func (o orderByOffset) Swap(i, j int)      { o[i], o[j] = o[j], o[i] }
-
-// Add adds a new Entry with the given values to the index.
-func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
-	e := &idxfile.Entry{
-		Hash:   h,
-		Offset: offset,
-		CRC32:  crc32,
-	}
-	idx.byHash[e.Hash] = e
-
-	// Find the right position in byOffset.
-	// Look for the first position whose offset is *greater* than e.Offset.
-	i := sort.Search(len(idx.byOffset), func(i int) bool {
-		return idx.byOffset[i].Offset > offset
-	})
-	if i == len(idx.byOffset) {
-		// Simple case: add it to the end.
-		idx.byOffset = append(idx.byOffset, e)
-		return
-	}
-	// Harder case: shift existing entries down by one to make room.
-	// Append a nil entry first so we can use existing capacity in case
-	// the index was carefully preallocated.
-	idx.byOffset = append(idx.byOffset, nil)
-	copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1])
-	idx.byOffset[i] = e
-}
-
-func (idx *Index) addUnsorted(e *idxfile.Entry) {
-	idx.byHash[e.Hash] = e
-	idx.byOffset = append(idx.byOffset, e)
-}
-
-// LookupHash looks an entry up by its hash. An idxfile.Entry is returned and
-// a bool, which is true if it was found or false if it wasn't.
-func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) {
-	e, ok := idx.byHash[h]
-	return e, ok
-}
-
-// LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry
-// is returned and a bool, which is true if it was found or false if it wasn't.
-func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) {
-	i := sort.Search(len(idx.byOffset), func(i int) bool {
-		return idx.byOffset[i].Offset >= offset
-	})
-	if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset {
-		return nil, false // not present
-	}
-	return idx.byOffset[i], true
-}
-
-// Size returns the number of entries in the index.
-func (idx *Index) Size() int {
-	return len(idx.byHash)
-}
-
-// ToIdxFile converts the index to an idxfile.Idxfile, which can then be used
-// to serialize.
-func (idx *Index) ToIdxFile() *idxfile.Idxfile {
-	idxf := idxfile.NewIdxfile()
-	for _, e := range idx.byHash {
-		idxf.Entries = append(idxf.Entries, e)
-	}
-
-	return idxf
-}
-- 
cgit