aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/idxfile
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2018-08-14 09:57:46 +0200
committerGitHub <noreply@github.com>2018-08-14 09:57:46 +0200
commita28c2ce44695f13ddf28748958f236afd8e0b544 (patch)
tree107dd441cd96b44b4f3994d26faf5f0bfae933fc /plumbing/format/idxfile
parentc3740924da0d1929cb523c85ae9da3b456b901ea (diff)
parent8d75d239e93474e4287870e4e5143da14e2c360d (diff)
downloadgo-git-a28c2ce44695f13ddf28748958f236afd8e0b544.tar.gz
Merge pull request #906 from src-d/perf/packfile-reads
Improve packfile reading performance
Diffstat (limited to 'plumbing/format/idxfile')
-rw-r--r--plumbing/format/idxfile/decoder.go109
-rw-r--r--plumbing/format/idxfile/decoder_test.go106
-rw-r--r--plumbing/format/idxfile/encoder.go101
-rw-r--r--plumbing/format/idxfile/encoder_test.go21
-rw-r--r--plumbing/format/idxfile/idxfile.go280
-rw-r--r--plumbing/format/idxfile/idxfile_test.go154
-rw-r--r--plumbing/format/idxfile/writer.go186
-rw-r--r--plumbing/format/idxfile/writer_test.go98
8 files changed, 851 insertions, 204 deletions
diff --git a/plumbing/format/idxfile/decoder.go b/plumbing/format/idxfile/decoder.go
index 45afb1e..5b92782 100644
--- a/plumbing/format/idxfile/decoder.go
+++ b/plumbing/format/idxfile/decoder.go
@@ -17,6 +17,11 @@ var (
ErrMalformedIdxFile = errors.New("Malformed IDX file")
)
+const (
+ fanout = 256
+ objectIDLength = 20
+)
+
// Decoder reads and decodes idx files from an input stream.
type Decoder struct {
*bufio.Reader
@@ -27,13 +32,13 @@ func NewDecoder(r io.Reader) *Decoder {
return &Decoder{bufio.NewReader(r)}
}
-// Decode reads from the stream and decode the content into the Idxfile struct.
-func (d *Decoder) Decode(idx *Idxfile) error {
+// Decode reads from the stream and decode the content into the MemoryIndex struct.
+func (d *Decoder) Decode(idx *MemoryIndex) error {
if err := validateHeader(d); err != nil {
return err
}
- flow := []func(*Idxfile, io.Reader) error{
+ flow := []func(*MemoryIndex, io.Reader) error{
readVersion,
readFanout,
readObjectNames,
@@ -48,10 +53,6 @@ func (d *Decoder) Decode(idx *Idxfile) error {
}
}
- if !idx.isValid() {
- return ErrMalformedIdxFile
- }
-
return nil
}
@@ -68,7 +69,7 @@ func validateHeader(r io.Reader) error {
return nil
}
-func readVersion(idx *Idxfile, r io.Reader) error {
+func readVersion(idx *MemoryIndex, r io.Reader) error {
v, err := binary.ReadUint32(r)
if err != nil {
return err
@@ -82,74 +83,92 @@ func readVersion(idx *Idxfile, r io.Reader) error {
return nil
}
-func readFanout(idx *Idxfile, r io.Reader) error {
- var err error
- for i := 0; i < 255; i++ {
- idx.Fanout[i], err = binary.ReadUint32(r)
+func readFanout(idx *MemoryIndex, r io.Reader) error {
+ for k := 0; k < fanout; k++ {
+ n, err := binary.ReadUint32(r)
if err != nil {
return err
}
+
+ idx.Fanout[k] = n
+ idx.FanoutMapping[k] = noMapping
}
- idx.ObjectCount, err = binary.ReadUint32(r)
- return err
+ return nil
}
-func readObjectNames(idx *Idxfile, r io.Reader) error {
- c := int(idx.ObjectCount)
- new := make([]Entry, c)
- for i := 0; i < c; i++ {
- e := &new[i]
- if _, err := io.ReadFull(r, e.Hash[:]); err != nil {
+func readObjectNames(idx *MemoryIndex, r io.Reader) error {
+ for k := 0; k < fanout; k++ {
+ var buckets uint32
+ if k == 0 {
+ buckets = idx.Fanout[k]
+ } else {
+ buckets = idx.Fanout[k] - idx.Fanout[k-1]
+ }
+
+ if buckets == 0 {
+ continue
+ }
+
+ if buckets < 0 {
+ return ErrMalformedIdxFile
+ }
+
+ idx.FanoutMapping[k] = len(idx.Names)
+
+ nameLen := int(buckets * objectIDLength)
+ bin := make([]byte, nameLen)
+ if _, err := io.ReadFull(r, bin); err != nil {
return err
}
- idx.Entries = append(idx.Entries, e)
+ idx.Names = append(idx.Names, bin)
+ idx.Offset32 = append(idx.Offset32, make([]byte, buckets*4))
+ idx.CRC32 = append(idx.CRC32, make([]byte, buckets*4))
}
return nil
}
-func readCRC32(idx *Idxfile, r io.Reader) error {
- c := int(idx.ObjectCount)
- for i := 0; i < c; i++ {
- if err := binary.Read(r, &idx.Entries[i].CRC32); err != nil {
- return err
+func readCRC32(idx *MemoryIndex, r io.Reader) error {
+ for k := 0; k < fanout; k++ {
+ if pos := idx.FanoutMapping[k]; pos != noMapping {
+ if _, err := io.ReadFull(r, idx.CRC32[pos]); err != nil {
+ return err
+ }
}
}
return nil
}
-func readOffsets(idx *Idxfile, r io.Reader) error {
- c := int(idx.ObjectCount)
-
- for i := 0; i < c; i++ {
- o, err := binary.ReadUint32(r)
- if err != nil {
- return err
+func readOffsets(idx *MemoryIndex, r io.Reader) error {
+ var o64cnt int
+ for k := 0; k < fanout; k++ {
+ if pos := idx.FanoutMapping[k]; pos != noMapping {
+ if _, err := io.ReadFull(r, idx.Offset32[pos]); err != nil {
+ return err
+ }
+
+ for p := 0; p < len(idx.Offset32[pos]); p += 4 {
+ if idx.Offset32[pos][p]&(byte(1)<<7) > 0 {
+ o64cnt++
+ }
+ }
}
-
- idx.Entries[i].Offset = uint64(o)
}
- for i := 0; i < c; i++ {
- if idx.Entries[i].Offset <= offsetLimit {
- continue
- }
-
- o, err := binary.ReadUint64(r)
- if err != nil {
+ if o64cnt > 0 {
+ idx.Offset64 = make([]byte, o64cnt*8)
+ if _, err := io.ReadFull(r, idx.Offset64); err != nil {
return err
}
-
- idx.Entries[i].Offset = o
}
return nil
}
-func readChecksums(idx *Idxfile, r io.Reader) error {
+func readChecksums(idx *MemoryIndex, r io.Reader) error {
if _, err := io.ReadFull(r, idx.PackfileChecksum[:]); err != nil {
return err
}
diff --git a/plumbing/format/idxfile/decoder_test.go b/plumbing/format/idxfile/decoder_test.go
index 20d6859..b43d7c5 100644
--- a/plumbing/format/idxfile/decoder_test.go
+++ b/plumbing/format/idxfile/decoder_test.go
@@ -4,11 +4,12 @@ import (
"bytes"
"encoding/base64"
"fmt"
+ "io"
+ "io/ioutil"
"testing"
+ "gopkg.in/src-d/go-git.v4/plumbing"
. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
- "gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
- "gopkg.in/src-d/go-git.v4/storage/memory"
. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
@@ -26,51 +27,34 @@ func (s *IdxfileSuite) TestDecode(c *C) {
f := fixtures.Basic().One()
d := NewDecoder(f.Idx())
- idx := &Idxfile{}
+ idx := new(MemoryIndex)
err := d.Decode(idx)
c.Assert(err, IsNil)
- c.Assert(idx.Entries, HasLen, 31)
- c.Assert(idx.Entries[0].Hash.String(), Equals, "1669dce138d9b841a518c64b10914d88f5e488ea")
- c.Assert(idx.Entries[0].Offset, Equals, uint64(615))
- c.Assert(idx.Entries[0].CRC32, Equals, uint32(3645019190))
+ count, _ := idx.Count()
+ c.Assert(count, Equals, int64(31))
- c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9")
- c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String())
-}
-
-func (s *IdxfileSuite) TestDecodeCRCs(c *C) {
- f := fixtures.Basic().ByTag("ofs-delta").One()
-
- scanner := packfile.NewScanner(f.Packfile())
- storage := memory.NewStorage()
-
- pd, err := packfile.NewDecoder(scanner, storage)
+ hash := plumbing.NewHash("1669dce138d9b841a518c64b10914d88f5e488ea")
+ ok, err := idx.Contains(hash)
c.Assert(err, IsNil)
- _, err = pd.Decode()
- c.Assert(err, IsNil)
-
- i := pd.Index().ToIdxFile()
- i.Version = VersionSupported
+ c.Assert(ok, Equals, true)
- buf := bytes.NewBuffer(nil)
- e := NewEncoder(buf)
- _, err = e.Encode(i)
+ offset, err := idx.FindOffset(hash)
c.Assert(err, IsNil)
+ c.Assert(offset, Equals, int64(615))
- idx := &Idxfile{}
-
- d := NewDecoder(buf)
- err = d.Decode(idx)
+ crc32, err := idx.FindCRC32(hash)
c.Assert(err, IsNil)
+ c.Assert(crc32, Equals, uint32(3645019190))
- c.Assert(idx.Entries, DeepEquals, i.Entries)
+ c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9")
+ c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String())
}
func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) {
f := bytes.NewBufferString(fixtureLarge4GB)
- idx := &Idxfile{}
+ idx := new(MemoryIndex)
d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f))
err := d.Decode(idx)
@@ -88,29 +72,22 @@ func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) {
"35858be9c6f5914cbe6768489c41eb6809a2bceb": 5924278919,
}
- for _, e := range idx.Entries {
- c.Assert(expected[e.Hash.String()], Equals, e.Offset)
- }
-}
-
-func (s *IdxfileSuite) TestDecode64bitsOffsetsIdempotent(c *C) {
- f := bytes.NewBufferString(fixtureLarge4GB)
-
- expected := &Idxfile{}
-
- d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f))
- err := d.Decode(expected)
+ iter, err := idx.Entries()
c.Assert(err, IsNil)
- buf := bytes.NewBuffer(nil)
- _, err = NewEncoder(buf).Encode(expected)
- c.Assert(err, IsNil)
+ var entries int
+ for {
+ e, err := iter.Next()
+ if err == io.EOF {
+ break
+ }
+ c.Assert(err, IsNil)
+ entries++
- idx := &Idxfile{}
- err = NewDecoder(buf).Decode(idx)
- c.Assert(err, IsNil)
+ c.Assert(expected[e.Hash.String()], Equals, e.Offset)
+ }
- c.Assert(idx.Entries, DeepEquals, expected.Entries)
+ c.Assert(entries, Equals, len(expected))
}
const fixtureLarge4GB = `/3RPYwAAAAIAAAAAAAAAAAAAAAAAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEA
@@ -139,3 +116,30 @@ AAAAAAAMgAAAAQAAAI6AAAACgAAAA4AAAASAAAAFAAAAAV9Qam8AAAABYR1ShwAAAACdxfYxAAAA
ANz1Di4AAAABPUnxJAAAAADNxzlGr6vCJpIFz4XaG/fi/f9C9zgQ8ptKSQpfQ1NMJBGTDTxxYGGp
ch2xUA==
`
+
+func BenchmarkDecode(b *testing.B) {
+ if err := fixtures.Init(); err != nil {
+ b.Errorf("unexpected error initializing fixtures: %s", err)
+ }
+
+ f := fixtures.Basic().One()
+ fixture, err := ioutil.ReadAll(f.Idx())
+ if err != nil {
+ b.Errorf("unexpected error reading idx file: %s", err)
+ }
+
+ defer func() {
+ if err := fixtures.Clean(); err != nil {
+ b.Errorf("unexpected error cleaning fixtures: %s", err)
+ }
+ }()
+
+ for i := 0; i < b.N; i++ {
+ f := bytes.NewBuffer(fixture)
+ idx := new(MemoryIndex)
+ d := NewDecoder(f)
+ if err := d.Decode(idx); err != nil {
+ b.Errorf("unexpected error decoding: %s", err)
+ }
+ }
+}
diff --git a/plumbing/format/idxfile/encoder.go b/plumbing/format/idxfile/encoder.go
index 40abfb8..e479511 100644
--- a/plumbing/format/idxfile/encoder.go
+++ b/plumbing/format/idxfile/encoder.go
@@ -4,12 +4,11 @@ import (
"crypto/sha1"
"hash"
"io"
- "sort"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
-// Encoder writes Idxfile structs to an output stream.
+// Encoder writes MemoryIndex structs to an output stream.
type Encoder struct {
io.Writer
hash hash.Hash
@@ -22,11 +21,9 @@ func NewEncoder(w io.Writer) *Encoder {
return &Encoder{mw, h}
}
-// Encode encodes an Idxfile to the encoder writer.
-func (e *Encoder) Encode(idx *Idxfile) (int, error) {
- idx.Entries.Sort()
-
- flow := []func(*Idxfile) (int, error){
+// Encode encodes an MemoryIndex to the encoder writer.
+func (e *Encoder) Encode(idx *MemoryIndex) (int, error) {
+ flow := []func(*MemoryIndex) (int, error){
e.encodeHeader,
e.encodeFanout,
e.encodeHashes,
@@ -48,7 +45,7 @@ func (e *Encoder) Encode(idx *Idxfile) (int, error) {
return sz, nil
}
-func (e *Encoder) encodeHeader(idx *Idxfile) (int, error) {
+func (e *Encoder) encodeHeader(idx *MemoryIndex) (int, error) {
c, err := e.Write(idxHeader)
if err != nil {
return c, err
@@ -57,75 +54,81 @@ func (e *Encoder) encodeHeader(idx *Idxfile) (int, error) {
return c + 4, binary.WriteUint32(e, idx.Version)
}
-func (e *Encoder) encodeFanout(idx *Idxfile) (int, error) {
- fanout := idx.calculateFanout()
- for _, c := range fanout {
+func (e *Encoder) encodeFanout(idx *MemoryIndex) (int, error) {
+ for _, c := range idx.Fanout {
if err := binary.WriteUint32(e, c); err != nil {
return 0, err
}
}
- return 1024, nil
+ return fanout * 4, nil
}
-func (e *Encoder) encodeHashes(idx *Idxfile) (int, error) {
- sz := 0
- for _, ent := range idx.Entries {
- i, err := e.Write(ent.Hash[:])
- sz += i
+func (e *Encoder) encodeHashes(idx *MemoryIndex) (int, error) {
+ var size int
+ for k := 0; k < fanout; k++ {
+ pos := idx.FanoutMapping[k]
+ if pos == noMapping {
+ continue
+ }
+ n, err := e.Write(idx.Names[pos])
if err != nil {
- return sz, err
+ return size, err
}
+ size += n
}
-
- return sz, nil
+ return size, nil
}
-func (e *Encoder) encodeCRC32(idx *Idxfile) (int, error) {
- sz := 0
- for _, ent := range idx.Entries {
- err := binary.Write(e, ent.CRC32)
- sz += 4
+func (e *Encoder) encodeCRC32(idx *MemoryIndex) (int, error) {
+ var size int
+ for k := 0; k < fanout; k++ {
+ pos := idx.FanoutMapping[k]
+ if pos == noMapping {
+ continue
+ }
+ n, err := e.Write(idx.CRC32[pos])
if err != nil {
- return sz, err
+ return size, err
}
+
+ size += n
}
- return sz, nil
+ return size, nil
}
-func (e *Encoder) encodeOffsets(idx *Idxfile) (int, error) {
- sz := 0
-
- var o64bits []uint64
- for _, ent := range idx.Entries {
- o := ent.Offset
- if o > offsetLimit {
- o64bits = append(o64bits, o)
- o = offsetLimit + uint64(len(o64bits))
+func (e *Encoder) encodeOffsets(idx *MemoryIndex) (int, error) {
+ var size int
+ for k := 0; k < fanout; k++ {
+ pos := idx.FanoutMapping[k]
+ if pos == noMapping {
+ continue
}
- if err := binary.WriteUint32(e, uint32(o)); err != nil {
- return sz, err
+ n, err := e.Write(idx.Offset32[pos])
+ if err != nil {
+ return size, err
}
- sz += 4
+ size += n
}
- for _, o := range o64bits {
- if err := binary.WriteUint64(e, o); err != nil {
- return sz, err
+ if len(idx.Offset64) > 0 {
+ n, err := e.Write(idx.Offset64)
+ if err != nil {
+ return size, err
}
- sz += 8
+ size += n
}
- return sz, nil
+ return size, nil
}
-func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) {
+func (e *Encoder) encodeChecksums(idx *MemoryIndex) (int, error) {
if _, err := e.Write(idx.PackfileChecksum[:]); err != nil {
return 0, err
}
@@ -137,11 +140,3 @@ func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) {
return 40, nil
}
-
-// EntryList implements sort.Interface allowing sorting in increasing order.
-type EntryList []*Entry
-
-func (p EntryList) Len() int { return len(p) }
-func (p EntryList) Less(i, j int) bool { return p[i].Hash.String() < p[j].Hash.String() }
-func (p EntryList) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
-func (p EntryList) Sort() { sort.Sort(p) }
diff --git a/plumbing/format/idxfile/encoder_test.go b/plumbing/format/idxfile/encoder_test.go
index e5b96b7..e8deeea 100644
--- a/plumbing/format/idxfile/encoder_test.go
+++ b/plumbing/format/idxfile/encoder_test.go
@@ -4,37 +4,18 @@ import (
"bytes"
"io/ioutil"
- "gopkg.in/src-d/go-git.v4/plumbing"
. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
)
-func (s *IdxfileSuite) TestEncode(c *C) {
- expected := &Idxfile{}
- expected.Add(plumbing.NewHash("4bfc730165c370df4a012afbb45ba3f9c332c0d4"), 82, 82)
- expected.Add(plumbing.NewHash("8fa2238efdae08d83c12ee176fae65ff7c99af46"), 42, 42)
-
- buf := bytes.NewBuffer(nil)
- e := NewEncoder(buf)
- _, err := e.Encode(expected)
- c.Assert(err, IsNil)
-
- idx := &Idxfile{}
- d := NewDecoder(buf)
- err = d.Decode(idx)
- c.Assert(err, IsNil)
-
- c.Assert(idx.Entries, DeepEquals, expected.Entries)
-}
-
func (s *IdxfileSuite) TestDecodeEncode(c *C) {
fixtures.ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
expected, err := ioutil.ReadAll(f.Idx())
c.Assert(err, IsNil)
- idx := &Idxfile{}
+ idx := new(MemoryIndex)
d := NewDecoder(bytes.NewBuffer(expected))
err = d.Decode(idx)
c.Assert(err, IsNil)
diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go
index 6b05eaa..c977bee 100644
--- a/plumbing/format/idxfile/idxfile.go
+++ b/plumbing/format/idxfile/idxfile.go
@@ -1,68 +1,278 @@
package idxfile
-import "gopkg.in/src-d/go-git.v4/plumbing"
+import (
+ "bytes"
+ "io"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/utils/binary"
+)
const (
// VersionSupported is the only idx version supported.
VersionSupported = 2
- offsetLimit = 0x7fffffff
+ noMapping = -1
)
var (
idxHeader = []byte{255, 't', 'O', 'c'}
)
-// Idxfile is the in memory representation of an idx file.
-type Idxfile struct {
- Version uint32
- Fanout [255]uint32
- ObjectCount uint32
- Entries EntryList
+// Index represents an index of a packfile.
+type Index interface {
+ // Contains checks whether the given hash is in the index.
+ Contains(h plumbing.Hash) (bool, error)
+ // FindOffset finds the offset in the packfile for the object with
+ // the given hash.
+ FindOffset(h plumbing.Hash) (int64, error)
+ // FindCRC32 finds the CRC32 of the object with the given hash.
+ FindCRC32(h plumbing.Hash) (uint32, error)
+ // FindHash finds the hash for the object with the given offset.
+ FindHash(o int64) (plumbing.Hash, error)
+ // Count returns the number of entries in the index.
+ Count() (int64, error)
+ // Entries returns an iterator to retrieve all index entries.
+ Entries() (EntryIter, error)
+}
+
+// MemoryIndex is the in memory representation of an idx file.
+type MemoryIndex struct {
+ Version uint32
+ Fanout [256]uint32
+ // FanoutMapping maps the position in the fanout table to the position
+ // in the Names, Offset32 and CRC32 slices. This improves the memory
+ // usage by not needing an array with unnecessary empty slots.
+ FanoutMapping [256]int
+ Names [][]byte
+ Offset32 [][]byte
+ CRC32 [][]byte
+ Offset64 []byte
PackfileChecksum [20]byte
IdxChecksum [20]byte
+
+ offsetHash map[int64]plumbing.Hash
}
-func NewIdxfile() *Idxfile {
- return &Idxfile{}
+var _ Index = (*MemoryIndex)(nil)
+
+// NewMemoryIndex returns an instance of a new MemoryIndex.
+func NewMemoryIndex() *MemoryIndex {
+ return &MemoryIndex{}
}
-// Entry is the in memory representation of an object entry in the idx file.
-type Entry struct {
- Hash plumbing.Hash
- CRC32 uint32
- Offset uint64
+func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) (int, bool) {
+ k := idx.FanoutMapping[h[0]]
+ if k == noMapping {
+ return 0, false
+ }
+
+ if len(idx.Names) <= k {
+ return 0, false
+ }
+
+ data := idx.Names[k]
+ high := uint64(len(idx.Offset32[k])) >> 2
+ if high == 0 {
+ return 0, false
+ }
+
+ low := uint64(0)
+ for {
+ mid := (low + high) >> 1
+ offset := mid * objectIDLength
+
+ cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength])
+ if cmp < 0 {
+ high = mid
+ } else if cmp == 0 {
+ return int(mid), true
+ } else {
+ low = mid + 1
+ }
+
+ if low >= high {
+ break
+ }
+ }
+
+ return 0, false
}
-// Add adds a new Entry with the given values to the Idxfile.
-func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
- idx.Entries = append(idx.Entries, &Entry{
- Hash: h,
- Offset: offset,
- CRC32: crc32,
- })
+// Contains implements the Index interface.
+func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) {
+ _, ok := idx.findHashIndex(h)
+ return ok, nil
+}
+
+// FindOffset implements the Index interface.
+func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) {
+ if len(idx.FanoutMapping) <= int(h[0]) {
+ return 0, plumbing.ErrObjectNotFound
+ }
+
+ k := idx.FanoutMapping[h[0]]
+ i, ok := idx.findHashIndex(h)
+ if !ok {
+ return 0, plumbing.ErrObjectNotFound
+ }
+
+ return idx.getOffset(k, i)
}
-func (idx *Idxfile) isValid() bool {
- fanout := idx.calculateFanout()
- for k, c := range idx.Fanout {
- if fanout[k] != c {
- return false
+const isO64Mask = uint64(1) << 31
+
+func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) (int64, error) {
+ offset := secondLevel << 2
+ buf := bytes.NewBuffer(idx.Offset32[firstLevel][offset : offset+4])
+ ofs, err := binary.ReadUint32(buf)
+ if err != nil {
+ return -1, err
+ }
+
+ if (uint64(ofs) & isO64Mask) != 0 {
+ offset := 8 * (uint64(ofs) & ^isO64Mask)
+ buf := bytes.NewBuffer(idx.Offset64[offset : offset+8])
+ n, err := binary.ReadUint64(buf)
+ if err != nil {
+ return -1, err
}
+
+ return int64(n), nil
+ }
+
+ return int64(ofs), nil
+}
+
+// FindCRC32 implements the Index interface.
+func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) {
+ k := idx.FanoutMapping[h[0]]
+ i, ok := idx.findHashIndex(h)
+ if !ok {
+ return 0, plumbing.ErrObjectNotFound
}
- return true
+ return idx.getCRC32(k, i)
+}
+
+func (idx *MemoryIndex) getCRC32(firstLevel, secondLevel int) (uint32, error) {
+ offset := secondLevel << 2
+ buf := bytes.NewBuffer(idx.CRC32[firstLevel][offset : offset+4])
+ return binary.ReadUint32(buf)
}
-func (idx *Idxfile) calculateFanout() [256]uint32 {
- fanout := [256]uint32{}
- for _, e := range idx.Entries {
- fanout[e.Hash[0]]++
+// FindHash implements the Index interface.
+func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) {
+ // Lazily generate the reverse offset/hash map if required.
+ if idx.offsetHash == nil {
+ if err := idx.genOffsetHash(); err != nil {
+ return plumbing.ZeroHash, err
+ }
}
- for i := 1; i < 256; i++ {
- fanout[i] += fanout[i-1]
+ hash, ok := idx.offsetHash[o]
+ if !ok {
+ return plumbing.ZeroHash, plumbing.ErrObjectNotFound
}
- return fanout
+ return hash, nil
+}
+
+// genOffsetHash generates the offset/hash mapping for reverse search.
+func (idx *MemoryIndex) genOffsetHash() error {
+ count, err := idx.Count()
+ if err != nil {
+ return err
+ }
+
+ idx.offsetHash = make(map[int64]plumbing.Hash, count)
+
+ iter, err := idx.Entries()
+ if err != nil {
+ return err
+ }
+
+ for {
+ entry, err := iter.Next()
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ }
+
+ idx.offsetHash[int64(entry.Offset)] = entry.Hash
+ }
+}
+
+// Count implements the Index interface.
+func (idx *MemoryIndex) Count() (int64, error) {
+ return int64(idx.Fanout[fanout-1]), nil
+}
+
+// Entries implements the Index interface.
+func (idx *MemoryIndex) Entries() (EntryIter, error) {
+ return &idxfileEntryIter{idx, 0, 0, 0}, nil
+}
+
+// EntryIter is an iterator that will return the entries in a packfile index.
+type EntryIter interface {
+ // Next returns the next entry in the packfile index.
+ Next() (*Entry, error)
+ // Close closes the iterator.
+ Close() error
+}
+
+type idxfileEntryIter struct {
+ idx *MemoryIndex
+ total int
+ firstLevel, secondLevel int
+}
+
+func (i *idxfileEntryIter) Next() (*Entry, error) {
+ for {
+ if i.firstLevel >= fanout {
+ return nil, io.EOF
+ }
+
+ if i.total >= int(i.idx.Fanout[i.firstLevel]) {
+ i.firstLevel++
+ i.secondLevel = 0
+ continue
+ }
+
+ entry := new(Entry)
+ ofs := i.secondLevel * objectIDLength
+ copy(entry.Hash[:], i.idx.Names[i.idx.FanoutMapping[i.firstLevel]][ofs:])
+
+ pos := i.idx.FanoutMapping[entry.Hash[0]]
+
+ offset, err := i.idx.getOffset(pos, i.secondLevel)
+ if err != nil {
+ return nil, err
+ }
+ entry.Offset = uint64(offset)
+
+ entry.CRC32, err = i.idx.getCRC32(pos, i.secondLevel)
+ if err != nil {
+ return nil, err
+ }
+
+ i.secondLevel++
+ i.total++
+
+ return entry, nil
+ }
+}
+
+func (i *idxfileEntryIter) Close() error {
+ i.firstLevel = fanout
+ return nil
+}
+
+// Entry is the in memory representation of an object entry in the idx file.
+type Entry struct {
+ Hash plumbing.Hash
+ CRC32 uint32
+ Offset uint64
}
diff --git a/plumbing/format/idxfile/idxfile_test.go b/plumbing/format/idxfile/idxfile_test.go
new file mode 100644
index 0000000..d15accf
--- /dev/null
+++ b/plumbing/format/idxfile/idxfile_test.go
@@ -0,0 +1,154 @@
+package idxfile_test
+
+import (
+ "bytes"
+ "encoding/base64"
+ "fmt"
+ "io"
+ "testing"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
+
+ . "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-git-fixtures.v3"
+)
+
+func BenchmarkFindOffset(b *testing.B) {
+ idx, err := fixtureIndex()
+ if err != nil {
+ b.Fatalf(err.Error())
+ }
+
+ for i := 0; i < b.N; i++ {
+ for _, h := range fixtureHashes {
+ _, err := idx.FindOffset(h)
+ if err != nil {
+ b.Fatalf("error getting offset: %s", err)
+ }
+ }
+ }
+}
+
+func BenchmarkFindCRC32(b *testing.B) {
+ idx, err := fixtureIndex()
+ if err != nil {
+ b.Fatalf(err.Error())
+ }
+
+ for i := 0; i < b.N; i++ {
+ for _, h := range fixtureHashes {
+ _, err := idx.FindCRC32(h)
+ if err != nil {
+ b.Fatalf("error getting crc32: %s", err)
+ }
+ }
+ }
+}
+
+func BenchmarkContains(b *testing.B) {
+ idx, err := fixtureIndex()
+ if err != nil {
+ b.Fatalf(err.Error())
+ }
+
+ for i := 0; i < b.N; i++ {
+ for _, h := range fixtureHashes {
+ ok, err := idx.Contains(h)
+ if err != nil {
+ b.Fatalf("error checking if hash is in index: %s", err)
+ }
+
+ if !ok {
+ b.Error("expected hash to be in index")
+ }
+ }
+ }
+}
+
+func BenchmarkEntries(b *testing.B) {
+ idx, err := fixtureIndex()
+ if err != nil {
+ b.Fatalf(err.Error())
+ }
+
+ for i := 0; i < b.N; i++ {
+ iter, err := idx.Entries()
+ if err != nil {
+ b.Fatalf("unexpected error getting entries: %s", err)
+ }
+
+ var entries int
+ for {
+ _, err := iter.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+
+ b.Errorf("unexpected error getting entry: %s", err)
+ }
+
+ entries++
+ }
+
+ if entries != len(fixtureHashes) {
+ b.Errorf("expecting entries to be %d, got %d", len(fixtureHashes), entries)
+ }
+ }
+}
+
+type IndexSuite struct {
+ fixtures.Suite
+}
+
+var _ = Suite(&IndexSuite{})
+
+func (s *IndexSuite) TestFindHash(c *C) {
+ idx, err := fixtureIndex()
+ c.Assert(err, IsNil)
+
+ for i, pos := range fixtureOffsets {
+ hash, err := idx.FindHash(pos)
+ c.Assert(err, IsNil)
+ c.Assert(hash, Equals, fixtureHashes[i])
+ }
+}
+
+var fixtureHashes = []plumbing.Hash{
+ plumbing.NewHash("303953e5aa461c203a324821bc1717f9b4fff895"),
+ plumbing.NewHash("5296768e3d9f661387ccbff18c4dea6c997fd78c"),
+ plumbing.NewHash("03fc8d58d44267274edef4585eaeeb445879d33f"),
+ plumbing.NewHash("8f3ceb4ea4cb9e4a0f751795eb41c9a4f07be772"),
+ plumbing.NewHash("e0d1d625010087f79c9e01ad9d8f95e1628dda02"),
+ plumbing.NewHash("90eba326cdc4d1d61c5ad25224ccbf08731dd041"),
+ plumbing.NewHash("bab53055add7bc35882758a922c54a874d6b1272"),
+ plumbing.NewHash("1b8995f51987d8a449ca5ea4356595102dc2fbd4"),
+ plumbing.NewHash("35858be9c6f5914cbe6768489c41eb6809a2bceb"),
+}
+
+var fixtureOffsets = []int64{
+ 12,
+ 142,
+ 1601322837,
+ 2646996529,
+ 3452385606,
+ 3707047470,
+ 5323223332,
+ 5894072943,
+ 5924278919,
+}
+
+func fixtureIndex() (*idxfile.MemoryIndex, error) {
+ f := bytes.NewBufferString(fixtureLarge4GB)
+
+ idx := new(idxfile.MemoryIndex)
+
+ d := idxfile.NewDecoder(base64.NewDecoder(base64.StdEncoding, f))
+ err := d.Decode(idx)
+ if err != nil {
+ return nil, fmt.Errorf("unexpected error decoding index: %s", err)
+ }
+
+ return idx, nil
+}
diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go
new file mode 100644
index 0000000..aa919e7
--- /dev/null
+++ b/plumbing/format/idxfile/writer.go
@@ -0,0 +1,186 @@
+package idxfile
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+ "sort"
+ "sync"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/utils/binary"
+)
+
+// objects implements sort.Interface and uses hash as sorting key.
+type objects []Entry
+
+// Writer implements a packfile Observer interface and is used to generate
+// indexes.
+type Writer struct {
+ m sync.Mutex
+
+ count uint32
+ checksum plumbing.Hash
+ objects objects
+ offset64 uint32
+ finished bool
+ index *MemoryIndex
+ added map[plumbing.Hash]struct{}
+}
+
+// Index returns a previously created MemoryIndex or creates a new one if
+// needed.
+func (w *Writer) Index() (*MemoryIndex, error) {
+ w.m.Lock()
+ defer w.m.Unlock()
+
+ if w.index == nil {
+ return w.createIndex()
+ }
+
+ return w.index, nil
+}
+
+// Add appends new object data.
+func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) {
+ w.m.Lock()
+ defer w.m.Unlock()
+
+ if w.added == nil {
+ w.added = make(map[plumbing.Hash]struct{})
+ }
+
+ if _, ok := w.added[h]; !ok {
+ w.added[h] = struct{}{}
+ w.objects = append(w.objects, Entry{h, crc, pos})
+ }
+
+}
+
+func (w *Writer) Finished() bool {
+ return w.finished
+}
+
+// OnHeader implements packfile.Observer interface.
+func (w *Writer) OnHeader(count uint32) error {
+ w.count = count
+ w.objects = make(objects, 0, count)
+ return nil
+}
+
+// OnInflatedObjectHeader implements packfile.Observer interface.
+func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error {
+ return nil
+}
+
+// OnInflatedObjectContent implements packfile.Observer interface.
+func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, _ []byte) error {
+ w.Add(h, uint64(pos), crc)
+ return nil
+}
+
+// OnFooter implements packfile.Observer interface.
+func (w *Writer) OnFooter(h plumbing.Hash) error {
+ w.checksum = h
+ w.finished = true
+ _, err := w.createIndex()
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// creatIndex returns a filled MemoryIndex with the information filled by
+// the observer callbacks.
+func (w *Writer) createIndex() (*MemoryIndex, error) {
+ if !w.finished {
+ return nil, fmt.Errorf("the index still hasn't finished building")
+ }
+
+ idx := new(MemoryIndex)
+ w.index = idx
+
+ sort.Sort(w.objects)
+
+ // unmap all fans by default
+ for i := range idx.FanoutMapping {
+ idx.FanoutMapping[i] = noMapping
+ }
+
+ buf := new(bytes.Buffer)
+
+ last := -1
+ bucket := -1
+ for i, o := range w.objects {
+ fan := o.Hash[0]
+
+ // fill the gaps between fans
+ for j := last + 1; j < int(fan); j++ {
+ idx.Fanout[j] = uint32(i)
+ }
+
+ // update the number of objects for this position
+ idx.Fanout[fan] = uint32(i + 1)
+
+ // we move from one bucket to another, update counters and allocate
+ // memory
+ if last != int(fan) {
+ bucket++
+ idx.FanoutMapping[fan] = bucket
+ last = int(fan)
+
+ idx.Names = append(idx.Names, make([]byte, 0))
+ idx.Offset32 = append(idx.Offset32, make([]byte, 0))
+ idx.CRC32 = append(idx.CRC32, make([]byte, 0))
+ }
+
+ idx.Names[bucket] = append(idx.Names[bucket], o.Hash[:]...)
+
+ offset := o.Offset
+ if offset > math.MaxInt32 {
+ offset = w.addOffset64(offset)
+ }
+
+ buf.Truncate(0)
+ binary.WriteUint32(buf, uint32(offset))
+ idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...)
+
+ buf.Truncate(0)
+ binary.WriteUint32(buf, uint32(o.CRC32))
+ idx.CRC32[bucket] = append(idx.CRC32[bucket], buf.Bytes()...)
+ }
+
+ for j := last + 1; j < 256; j++ {
+ idx.Fanout[j] = uint32(len(w.objects))
+ }
+
+ idx.Version = VersionSupported
+ idx.PackfileChecksum = w.checksum
+
+ return idx, nil
+}
+
+func (w *Writer) addOffset64(pos uint64) uint64 {
+ buf := new(bytes.Buffer)
+ binary.WriteUint64(buf, pos)
+ w.index.Offset64 = append(w.index.Offset64, buf.Bytes()...)
+
+ index := uint64(w.offset64 | (1 << 31))
+ w.offset64++
+
+ return index
+}
+
+func (o objects) Len() int {
+ return len(o)
+}
+
+func (o objects) Less(i int, j int) bool {
+ cmp := bytes.Compare(o[i].Hash[:], o[j].Hash[:])
+ return cmp < 0
+}
+
+func (o objects) Swap(i int, j int) {
+ o[i], o[j] = o[j], o[i]
+}
diff --git a/plumbing/format/idxfile/writer_test.go b/plumbing/format/idxfile/writer_test.go
new file mode 100644
index 0000000..912211d
--- /dev/null
+++ b/plumbing/format/idxfile/writer_test.go
@@ -0,0 +1,98 @@
+package idxfile_test
+
+import (
+ "bytes"
+ "encoding/base64"
+ "io/ioutil"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
+ "gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
+
+ . "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-git-fixtures.v3"
+)
+
+type WriterSuite struct {
+ fixtures.Suite
+}
+
+var _ = Suite(&WriterSuite{})
+
+func (s *WriterSuite) TestWriter(c *C) {
+ f := fixtures.Basic().One()
+ scanner := packfile.NewScanner(f.Packfile())
+
+ obs := new(idxfile.Writer)
+ parser, err := packfile.NewParser(scanner, obs)
+ c.Assert(err, IsNil)
+
+ _, err = parser.Parse()
+ c.Assert(err, IsNil)
+
+ idx, err := obs.Index()
+ c.Assert(err, IsNil)
+
+ idxFile := f.Idx()
+ expected, err := ioutil.ReadAll(idxFile)
+ c.Assert(err, IsNil)
+ idxFile.Close()
+
+ buf := new(bytes.Buffer)
+ encoder := idxfile.NewEncoder(buf)
+ n, err := encoder.Encode(idx)
+ c.Assert(err, IsNil)
+ c.Assert(n, Equals, len(expected))
+
+ c.Assert(buf.Bytes(), DeepEquals, expected)
+}
+
+func (s *WriterSuite) TestWriterLarge(c *C) {
+ writer := new(idxfile.Writer)
+ err := writer.OnHeader(uint32(len(fixture4GbEntries)))
+ c.Assert(err, IsNil)
+
+ for _, o := range fixture4GbEntries {
+ err = writer.OnInflatedObjectContent(plumbing.NewHash(o.hash), o.offset, o.crc, nil)
+ c.Assert(err, IsNil)
+ }
+
+ err = writer.OnFooter(fixture4GbChecksum)
+ c.Assert(err, IsNil)
+
+ idx, err := writer.Index()
+ c.Assert(err, IsNil)
+
+ // load fixture index
+ f := bytes.NewBufferString(fixtureLarge4GB)
+ expected, err := ioutil.ReadAll(base64.NewDecoder(base64.StdEncoding, f))
+ c.Assert(err, IsNil)
+
+ buf := new(bytes.Buffer)
+ encoder := idxfile.NewEncoder(buf)
+ n, err := encoder.Encode(idx)
+ c.Assert(err, IsNil)
+ c.Assert(n, Equals, len(expected))
+
+ c.Assert(buf.Bytes(), DeepEquals, expected)
+}
+
+var (
+ fixture4GbChecksum = plumbing.NewHash("afabc2269205cf85da1bf7e2fdff42f73810f29b")
+
+ fixture4GbEntries = []struct {
+ offset int64
+ hash string
+ crc uint32
+ }{
+ {12, "303953e5aa461c203a324821bc1717f9b4fff895", 0xbc347c4c},
+ {142, "5296768e3d9f661387ccbff18c4dea6c997fd78c", 0xcdc22842},
+ {1601322837, "03fc8d58d44267274edef4585eaeeb445879d33f", 0x929dfaaa},
+ {2646996529, "8f3ceb4ea4cb9e4a0f751795eb41c9a4f07be772", 0xa61def8a},
+ {3452385606, "e0d1d625010087f79c9e01ad9d8f95e1628dda02", 0x06bea180},
+ {3707047470, "90eba326cdc4d1d61c5ad25224ccbf08731dd041", 0x7193f3ba},
+ {5323223332, "bab53055add7bc35882758a922c54a874d6b1272", 0xac269b8e},
+ {5894072943, "1b8995f51987d8a449ca5ea4356595102dc2fbd4", 0x2187c056},
+ {5924278919, "35858be9c6f5914cbe6768489c41eb6809a2bceb", 0x9c89d9d2},
+ }
+)