aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiguel Molina <miguel@erizocosmi.co>2018-07-26 14:10:19 +0200
committerGitHub <noreply@github.com>2018-07-26 14:10:19 +0200
commita8ff3e599b3ee998a8b8626cd9fe9fa68490d354 (patch)
treeac984b72e7c54160b0b154bffd4474f96ae5028e
parent9f00789688d26191a987fdec8bc2678362ec4453 (diff)
parent009f1069a1248c1e9189a9e4c342f6d017156ec4 (diff)
downloadgo-git-a8ff3e599b3ee998a8b8626cd9fe9fa68490d354.tar.gz
Merge pull request #896 from erizocosmico/feature/new-index-decoder
plumbing/format/idxfile: add new Index and MemoryIndex
-rw-r--r--plumbing/format/idxfile/decoder.go109
-rw-r--r--plumbing/format/idxfile/decoder_test.go106
-rw-r--r--plumbing/format/idxfile/encoder.go101
-rw-r--r--plumbing/format/idxfile/encoder_test.go21
-rw-r--r--plumbing/format/idxfile/idxfile.go224
-rw-r--r--plumbing/format/idxfile/idxfile_test.go109
-rw-r--r--plumbing/format/packfile/decoder.go27
-rw-r--r--plumbing/format/packfile/index.go125
-rw-r--r--storage/filesystem/index.go47
-rw-r--r--storage/filesystem/object.go2
10 files changed, 484 insertions, 387 deletions
diff --git a/plumbing/format/idxfile/decoder.go b/plumbing/format/idxfile/decoder.go
index 45afb1e..25ff88e 100644
--- a/plumbing/format/idxfile/decoder.go
+++ b/plumbing/format/idxfile/decoder.go
@@ -17,6 +17,11 @@ var (
ErrMalformedIdxFile = errors.New("Malformed IDX file")
)
+const (
+ fanout = 256
+ objectIDLength = 20
+)
+
// Decoder reads and decodes idx files from an input stream.
type Decoder struct {
*bufio.Reader
@@ -27,13 +32,13 @@ func NewDecoder(r io.Reader) *Decoder {
return &Decoder{bufio.NewReader(r)}
}
-// Decode reads from the stream and decode the content into the Idxfile struct.
-func (d *Decoder) Decode(idx *Idxfile) error {
+// Decode reads from the stream and decode the content into the MemoryIndex struct.
+func (d *Decoder) Decode(idx *MemoryIndex) error {
if err := validateHeader(d); err != nil {
return err
}
- flow := []func(*Idxfile, io.Reader) error{
+ flow := []func(*MemoryIndex, io.Reader) error{
readVersion,
readFanout,
readObjectNames,
@@ -48,10 +53,6 @@ func (d *Decoder) Decode(idx *Idxfile) error {
}
}
- if !idx.isValid() {
- return ErrMalformedIdxFile
- }
-
return nil
}
@@ -68,7 +69,7 @@ func validateHeader(r io.Reader) error {
return nil
}
-func readVersion(idx *Idxfile, r io.Reader) error {
+func readVersion(idx *MemoryIndex, r io.Reader) error {
v, err := binary.ReadUint32(r)
if err != nil {
return err
@@ -82,74 +83,92 @@ func readVersion(idx *Idxfile, r io.Reader) error {
return nil
}
-func readFanout(idx *Idxfile, r io.Reader) error {
- var err error
- for i := 0; i < 255; i++ {
- idx.Fanout[i], err = binary.ReadUint32(r)
+func readFanout(idx *MemoryIndex, r io.Reader) error {
+ for k := 0; k < fanout; k++ {
+ n, err := binary.ReadUint32(r)
if err != nil {
return err
}
+
+ idx.Fanout[k] = n
+ idx.FanoutMapping[k] = noMapping
}
- idx.ObjectCount, err = binary.ReadUint32(r)
- return err
+ return nil
}
-func readObjectNames(idx *Idxfile, r io.Reader) error {
- c := int(idx.ObjectCount)
- new := make([]Entry, c)
- for i := 0; i < c; i++ {
- e := &new[i]
- if _, err := io.ReadFull(r, e.Hash[:]); err != nil {
+func readObjectNames(idx *MemoryIndex, r io.Reader) error {
+ for k := 0; k < fanout; k++ {
+ var buckets uint32
+ if k == 0 {
+ buckets = idx.Fanout[k]
+ } else {
+ buckets = idx.Fanout[k] - idx.Fanout[k-1]
+ }
+
+ if buckets == 0 {
+ continue
+ }
+
+ if buckets < 0 {
+ return ErrMalformedIdxFile
+ }
+
+ idx.FanoutMapping[k] = len(idx.Names)
+
+ nameLen := int(buckets * objectIDLength)
+ bin := make([]byte, nameLen)
+ if _, err := io.ReadFull(r, bin); err != nil {
return err
}
- idx.Entries = append(idx.Entries, e)
+ idx.Names = append(idx.Names, bin)
+ idx.Offset32 = append(idx.Offset32, make([]byte, buckets*4))
+ idx.Crc32 = append(idx.Crc32, make([]byte, buckets*4))
}
return nil
}
-func readCRC32(idx *Idxfile, r io.Reader) error {
- c := int(idx.ObjectCount)
- for i := 0; i < c; i++ {
- if err := binary.Read(r, &idx.Entries[i].CRC32); err != nil {
- return err
+func readCRC32(idx *MemoryIndex, r io.Reader) error {
+ for k := 0; k < fanout; k++ {
+ if pos := idx.FanoutMapping[k]; pos != noMapping {
+ if _, err := io.ReadFull(r, idx.Crc32[pos]); err != nil {
+ return err
+ }
}
}
return nil
}
-func readOffsets(idx *Idxfile, r io.Reader) error {
- c := int(idx.ObjectCount)
-
- for i := 0; i < c; i++ {
- o, err := binary.ReadUint32(r)
- if err != nil {
- return err
+func readOffsets(idx *MemoryIndex, r io.Reader) error {
+ var o64cnt int
+ for k := 0; k < fanout; k++ {
+ if pos := idx.FanoutMapping[k]; pos != noMapping {
+ if _, err := io.ReadFull(r, idx.Offset32[pos]); err != nil {
+ return err
+ }
+
+ for p := 0; p < len(idx.Offset32[pos]); p += 4 {
+ if idx.Offset32[pos][p]&(byte(1)<<7) > 0 {
+ o64cnt++
+ }
+ }
}
-
- idx.Entries[i].Offset = uint64(o)
}
- for i := 0; i < c; i++ {
- if idx.Entries[i].Offset <= offsetLimit {
- continue
- }
-
- o, err := binary.ReadUint64(r)
- if err != nil {
+ if o64cnt > 0 {
+ idx.Offset64 = make([]byte, o64cnt*8)
+ if _, err := io.ReadFull(r, idx.Offset64); err != nil {
return err
}
-
- idx.Entries[i].Offset = o
}
return nil
}
-func readChecksums(idx *Idxfile, r io.Reader) error {
+func readChecksums(idx *MemoryIndex, r io.Reader) error {
if _, err := io.ReadFull(r, idx.PackfileChecksum[:]); err != nil {
return err
}
diff --git a/plumbing/format/idxfile/decoder_test.go b/plumbing/format/idxfile/decoder_test.go
index 20d6859..b43d7c5 100644
--- a/plumbing/format/idxfile/decoder_test.go
+++ b/plumbing/format/idxfile/decoder_test.go
@@ -4,11 +4,12 @@ import (
"bytes"
"encoding/base64"
"fmt"
+ "io"
+ "io/ioutil"
"testing"
+ "gopkg.in/src-d/go-git.v4/plumbing"
. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
- "gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
- "gopkg.in/src-d/go-git.v4/storage/memory"
. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
@@ -26,51 +27,34 @@ func (s *IdxfileSuite) TestDecode(c *C) {
f := fixtures.Basic().One()
d := NewDecoder(f.Idx())
- idx := &Idxfile{}
+ idx := new(MemoryIndex)
err := d.Decode(idx)
c.Assert(err, IsNil)
- c.Assert(idx.Entries, HasLen, 31)
- c.Assert(idx.Entries[0].Hash.String(), Equals, "1669dce138d9b841a518c64b10914d88f5e488ea")
- c.Assert(idx.Entries[0].Offset, Equals, uint64(615))
- c.Assert(idx.Entries[0].CRC32, Equals, uint32(3645019190))
+ count, _ := idx.Count()
+ c.Assert(count, Equals, int64(31))
- c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9")
- c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String())
-}
-
-func (s *IdxfileSuite) TestDecodeCRCs(c *C) {
- f := fixtures.Basic().ByTag("ofs-delta").One()
-
- scanner := packfile.NewScanner(f.Packfile())
- storage := memory.NewStorage()
-
- pd, err := packfile.NewDecoder(scanner, storage)
+ hash := plumbing.NewHash("1669dce138d9b841a518c64b10914d88f5e488ea")
+ ok, err := idx.Contains(hash)
c.Assert(err, IsNil)
- _, err = pd.Decode()
- c.Assert(err, IsNil)
-
- i := pd.Index().ToIdxFile()
- i.Version = VersionSupported
+ c.Assert(ok, Equals, true)
- buf := bytes.NewBuffer(nil)
- e := NewEncoder(buf)
- _, err = e.Encode(i)
+ offset, err := idx.FindOffset(hash)
c.Assert(err, IsNil)
+ c.Assert(offset, Equals, int64(615))
- idx := &Idxfile{}
-
- d := NewDecoder(buf)
- err = d.Decode(idx)
+ crc32, err := idx.FindCRC32(hash)
c.Assert(err, IsNil)
+ c.Assert(crc32, Equals, uint32(3645019190))
- c.Assert(idx.Entries, DeepEquals, i.Entries)
+ c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9")
+ c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String())
}
func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) {
f := bytes.NewBufferString(fixtureLarge4GB)
- idx := &Idxfile{}
+ idx := new(MemoryIndex)
d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f))
err := d.Decode(idx)
@@ -88,29 +72,22 @@ func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) {
"35858be9c6f5914cbe6768489c41eb6809a2bceb": 5924278919,
}
- for _, e := range idx.Entries {
- c.Assert(expected[e.Hash.String()], Equals, e.Offset)
- }
-}
-
-func (s *IdxfileSuite) TestDecode64bitsOffsetsIdempotent(c *C) {
- f := bytes.NewBufferString(fixtureLarge4GB)
-
- expected := &Idxfile{}
-
- d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f))
- err := d.Decode(expected)
+ iter, err := idx.Entries()
c.Assert(err, IsNil)
- buf := bytes.NewBuffer(nil)
- _, err = NewEncoder(buf).Encode(expected)
- c.Assert(err, IsNil)
+ var entries int
+ for {
+ e, err := iter.Next()
+ if err == io.EOF {
+ break
+ }
+ c.Assert(err, IsNil)
+ entries++
- idx := &Idxfile{}
- err = NewDecoder(buf).Decode(idx)
- c.Assert(err, IsNil)
+ c.Assert(expected[e.Hash.String()], Equals, e.Offset)
+ }
- c.Assert(idx.Entries, DeepEquals, expected.Entries)
+ c.Assert(entries, Equals, len(expected))
}
const fixtureLarge4GB = `/3RPYwAAAAIAAAAAAAAAAAAAAAAAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEA
@@ -139,3 +116,30 @@ AAAAAAAMgAAAAQAAAI6AAAACgAAAA4AAAASAAAAFAAAAAV9Qam8AAAABYR1ShwAAAACdxfYxAAAA
ANz1Di4AAAABPUnxJAAAAADNxzlGr6vCJpIFz4XaG/fi/f9C9zgQ8ptKSQpfQ1NMJBGTDTxxYGGp
ch2xUA==
`
+
+func BenchmarkDecode(b *testing.B) {
+ if err := fixtures.Init(); err != nil {
+ b.Errorf("unexpected error initializing fixtures: %s", err)
+ }
+
+ f := fixtures.Basic().One()
+ fixture, err := ioutil.ReadAll(f.Idx())
+ if err != nil {
+ b.Errorf("unexpected error reading idx file: %s", err)
+ }
+
+ defer func() {
+ if err := fixtures.Clean(); err != nil {
+ b.Errorf("unexpected error cleaning fixtures: %s", err)
+ }
+ }()
+
+ for i := 0; i < b.N; i++ {
+ f := bytes.NewBuffer(fixture)
+ idx := new(MemoryIndex)
+ d := NewDecoder(f)
+ if err := d.Decode(idx); err != nil {
+ b.Errorf("unexpected error decoding: %s", err)
+ }
+ }
+}
diff --git a/plumbing/format/idxfile/encoder.go b/plumbing/format/idxfile/encoder.go
index 40abfb8..55df466 100644
--- a/plumbing/format/idxfile/encoder.go
+++ b/plumbing/format/idxfile/encoder.go
@@ -4,12 +4,11 @@ import (
"crypto/sha1"
"hash"
"io"
- "sort"
"gopkg.in/src-d/go-git.v4/utils/binary"
)
-// Encoder writes Idxfile structs to an output stream.
+// Encoder writes MemoryIndex structs to an output stream.
type Encoder struct {
io.Writer
hash hash.Hash
@@ -22,11 +21,9 @@ func NewEncoder(w io.Writer) *Encoder {
return &Encoder{mw, h}
}
-// Encode encodes an Idxfile to the encoder writer.
-func (e *Encoder) Encode(idx *Idxfile) (int, error) {
- idx.Entries.Sort()
-
- flow := []func(*Idxfile) (int, error){
+// Encode encodes an MemoryIndex to the encoder writer.
+func (e *Encoder) Encode(idx *MemoryIndex) (int, error) {
+ flow := []func(*MemoryIndex) (int, error){
e.encodeHeader,
e.encodeFanout,
e.encodeHashes,
@@ -48,7 +45,7 @@ func (e *Encoder) Encode(idx *Idxfile) (int, error) {
return sz, nil
}
-func (e *Encoder) encodeHeader(idx *Idxfile) (int, error) {
+func (e *Encoder) encodeHeader(idx *MemoryIndex) (int, error) {
c, err := e.Write(idxHeader)
if err != nil {
return c, err
@@ -57,75 +54,81 @@ func (e *Encoder) encodeHeader(idx *Idxfile) (int, error) {
return c + 4, binary.WriteUint32(e, idx.Version)
}
-func (e *Encoder) encodeFanout(idx *Idxfile) (int, error) {
- fanout := idx.calculateFanout()
- for _, c := range fanout {
+func (e *Encoder) encodeFanout(idx *MemoryIndex) (int, error) {
+ for _, c := range idx.Fanout {
if err := binary.WriteUint32(e, c); err != nil {
return 0, err
}
}
- return 1024, nil
+ return fanout * 4, nil
}
-func (e *Encoder) encodeHashes(idx *Idxfile) (int, error) {
- sz := 0
- for _, ent := range idx.Entries {
- i, err := e.Write(ent.Hash[:])
- sz += i
+func (e *Encoder) encodeHashes(idx *MemoryIndex) (int, error) {
+ var size int
+ for k := 0; k < fanout; k++ {
+ pos := idx.FanoutMapping[k]
+ if pos == noMapping {
+ continue
+ }
+ n, err := e.Write(idx.Names[pos])
if err != nil {
- return sz, err
+ return size, err
}
+ size += n
}
-
- return sz, nil
+ return size, nil
}
-func (e *Encoder) encodeCRC32(idx *Idxfile) (int, error) {
- sz := 0
- for _, ent := range idx.Entries {
- err := binary.Write(e, ent.CRC32)
- sz += 4
+func (e *Encoder) encodeCRC32(idx *MemoryIndex) (int, error) {
+ var size int
+ for k := 0; k < fanout; k++ {
+ pos := idx.FanoutMapping[k]
+ if pos == noMapping {
+ continue
+ }
+ n, err := e.Write(idx.Crc32[pos])
if err != nil {
- return sz, err
+ return size, err
}
+
+ size += n
}
- return sz, nil
+ return size, nil
}
-func (e *Encoder) encodeOffsets(idx *Idxfile) (int, error) {
- sz := 0
-
- var o64bits []uint64
- for _, ent := range idx.Entries {
- o := ent.Offset
- if o > offsetLimit {
- o64bits = append(o64bits, o)
- o = offsetLimit + uint64(len(o64bits))
+func (e *Encoder) encodeOffsets(idx *MemoryIndex) (int, error) {
+ var size int
+ for k := 0; k < fanout; k++ {
+ pos := idx.FanoutMapping[k]
+ if pos == noMapping {
+ continue
}
- if err := binary.WriteUint32(e, uint32(o)); err != nil {
- return sz, err
+ n, err := e.Write(idx.Offset32[pos])
+ if err != nil {
+ return size, err
}
- sz += 4
+ size += n
}
- for _, o := range o64bits {
- if err := binary.WriteUint64(e, o); err != nil {
- return sz, err
+ if len(idx.Offset64) > 0 {
+ n, err := e.Write(idx.Offset64)
+ if err != nil {
+ return size, err
}
- sz += 8
+ size += n
}
- return sz, nil
+ return size, nil
}
-func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) {
+func (e *Encoder) encodeChecksums(idx *MemoryIndex) (int, error) {
if _, err := e.Write(idx.PackfileChecksum[:]); err != nil {
return 0, err
}
@@ -137,11 +140,3 @@ func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) {
return 40, nil
}
-
-// EntryList implements sort.Interface allowing sorting in increasing order.
-type EntryList []*Entry
-
-func (p EntryList) Len() int { return len(p) }
-func (p EntryList) Less(i, j int) bool { return p[i].Hash.String() < p[j].Hash.String() }
-func (p EntryList) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
-func (p EntryList) Sort() { sort.Sort(p) }
diff --git a/plumbing/format/idxfile/encoder_test.go b/plumbing/format/idxfile/encoder_test.go
index e5b96b7..e8deeea 100644
--- a/plumbing/format/idxfile/encoder_test.go
+++ b/plumbing/format/idxfile/encoder_test.go
@@ -4,37 +4,18 @@ import (
"bytes"
"io/ioutil"
- "gopkg.in/src-d/go-git.v4/plumbing"
. "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
. "gopkg.in/check.v1"
"gopkg.in/src-d/go-git-fixtures.v3"
)
-func (s *IdxfileSuite) TestEncode(c *C) {
- expected := &Idxfile{}
- expected.Add(plumbing.NewHash("4bfc730165c370df4a012afbb45ba3f9c332c0d4"), 82, 82)
- expected.Add(plumbing.NewHash("8fa2238efdae08d83c12ee176fae65ff7c99af46"), 42, 42)
-
- buf := bytes.NewBuffer(nil)
- e := NewEncoder(buf)
- _, err := e.Encode(expected)
- c.Assert(err, IsNil)
-
- idx := &Idxfile{}
- d := NewDecoder(buf)
- err = d.Decode(idx)
- c.Assert(err, IsNil)
-
- c.Assert(idx.Entries, DeepEquals, expected.Entries)
-}
-
func (s *IdxfileSuite) TestDecodeEncode(c *C) {
fixtures.ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
expected, err := ioutil.ReadAll(f.Idx())
c.Assert(err, IsNil)
- idx := &Idxfile{}
+ idx := new(MemoryIndex)
d := NewDecoder(bytes.NewBuffer(expected))
err = d.Decode(idx)
c.Assert(err, IsNil)
diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go
index 6b05eaa..b196608 100644
--- a/plumbing/format/idxfile/idxfile.go
+++ b/plumbing/format/idxfile/idxfile.go
@@ -1,68 +1,222 @@
package idxfile
-import "gopkg.in/src-d/go-git.v4/plumbing"
+import (
+ "bytes"
+ "io"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/utils/binary"
+)
const (
// VersionSupported is the only idx version supported.
VersionSupported = 2
- offsetLimit = 0x7fffffff
+ noMapping = -1
)
var (
idxHeader = []byte{255, 't', 'O', 'c'}
)
-// Idxfile is the in memory representation of an idx file.
-type Idxfile struct {
- Version uint32
- Fanout [255]uint32
- ObjectCount uint32
- Entries EntryList
+// Index represents an index of a packfile.
+type Index interface {
+ // Contains checks whether the given hash is in the index.
+ Contains(h plumbing.Hash) (bool, error)
+ // FindOffset finds the offset in the packfile for the object with
+ // the given hash.
+ FindOffset(h plumbing.Hash) (int64, error)
+ // FindCRC32 finds the CRC32 of the object with the given hash.
+ FindCRC32(h plumbing.Hash) (uint32, error)
+ // Count returns the number of entries in the index.
+ Count() (int64, error)
+ // Entries returns an iterator to retrieve all index entries.
+ Entries() (EntryIter, error)
+}
+
+// MemoryIndex is the in memory representation of an idx file.
+type MemoryIndex struct {
+ Version uint32
+ Fanout [256]uint32
+ // FanoutMapping maps the position in the fanout table to the position
+ // in the Names, Offset32 and Crc32 slices. This improves the memory
+ // usage by not needing an array with unnecessary empty slots.
+ FanoutMapping [256]int
+ Names [][]byte
+ Offset32 [][]byte
+ Crc32 [][]byte
+ Offset64 []byte
PackfileChecksum [20]byte
IdxChecksum [20]byte
}
-func NewIdxfile() *Idxfile {
- return &Idxfile{}
+var _ Index = (*MemoryIndex)(nil)
+
+// NewMemoryIndex returns an instance of a new MemoryIndex.
+func NewMemoryIndex() *MemoryIndex {
+ return &MemoryIndex{}
}
-// Entry is the in memory representation of an object entry in the idx file.
-type Entry struct {
- Hash plumbing.Hash
- CRC32 uint32
- Offset uint64
+func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int {
+ k := idx.FanoutMapping[h[0]]
+ if k == noMapping {
+ return -1
+ }
+
+ data := idx.Names[k]
+ high := uint64(len(idx.Offset32[k])) >> 2
+ if high == 0 {
+ return -1
+ }
+
+ low := uint64(0)
+ for {
+ mid := (low + high) >> 1
+ offset := mid + (mid << 2)
+
+ cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength])
+ if cmp < 0 {
+ high = mid
+ } else if cmp == 0 {
+ return int(mid)
+ } else {
+ low = mid + 1
+ }
+
+ if low < high {
+ break
+ }
+ }
+
+ return -1
}
-// Add adds a new Entry with the given values to the Idxfile.
-func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
- idx.Entries = append(idx.Entries, &Entry{
- Hash: h,
- Offset: offset,
- CRC32: crc32,
- })
+// Contains implements the Index interface.
+func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) {
+ i := idx.findHashIndex(h)
+ return i >= 0, nil
}
-func (idx *Idxfile) isValid() bool {
- fanout := idx.calculateFanout()
- for k, c := range idx.Fanout {
- if fanout[k] != c {
- return false
+// FindOffset implements the Index interface.
+func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) {
+ k := idx.FanoutMapping[h[0]]
+ i := idx.findHashIndex(h)
+ if i < 0 {
+ return 0, plumbing.ErrObjectNotFound
+ }
+
+ return idx.getOffset(k, i)
+}
+
+const isO64Mask = uint64(1) << 31
+
+func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) (int64, error) {
+ offset := secondLevel << 2
+ buf := bytes.NewBuffer(idx.Offset32[firstLevel][offset : offset+4])
+ ofs, err := binary.ReadUint32(buf)
+ if err != nil {
+ return -1, err
+ }
+
+ if (uint64(ofs) & isO64Mask) != 0 {
+ offset := 8 * (uint64(ofs) & ^isO64Mask)
+ buf := bytes.NewBuffer(idx.Offset64[offset : offset+8])
+ n, err := binary.ReadUint64(buf)
+ if err != nil {
+ return -1, err
}
+
+ return int64(n), nil
}
- return true
+ return int64(ofs), nil
}
-func (idx *Idxfile) calculateFanout() [256]uint32 {
- fanout := [256]uint32{}
- for _, e := range idx.Entries {
- fanout[e.Hash[0]]++
+// FindCRC32 implements the Index interface.
+func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) {
+ k := idx.FanoutMapping[h[0]]
+ i := idx.findHashIndex(h)
+ if i < 0 {
+ return 0, plumbing.ErrObjectNotFound
}
- for i := 1; i < 256; i++ {
- fanout[i] += fanout[i-1]
+ return idx.getCrc32(k, i)
+}
+
+func (idx *MemoryIndex) getCrc32(firstLevel, secondLevel int) (uint32, error) {
+ offset := secondLevel << 2
+ buf := bytes.NewBuffer(idx.Crc32[firstLevel][offset : offset+4])
+ return binary.ReadUint32(buf)
+}
+
+// Count implements the Index interface.
+func (idx *MemoryIndex) Count() (int64, error) {
+ return int64(idx.Fanout[fanout-1]), nil
+}
+
+// Entries implements the Index interface.
+func (idx *MemoryIndex) Entries() (EntryIter, error) {
+ return &idxfileEntryIter{idx, 0, 0, 0}, nil
+}
+
+// EntryIter is an iterator that will return the entries in a packfile index.
+type EntryIter interface {
+ // Next returns the next entry in the packfile index.
+ Next() (*Entry, error)
+ // Close closes the iterator.
+ Close() error
+}
+
+type idxfileEntryIter struct {
+ idx *MemoryIndex
+ total int
+ firstLevel, secondLevel int
+}
+
+func (i *idxfileEntryIter) Next() (*Entry, error) {
+ for {
+ if i.firstLevel >= fanout {
+ return nil, io.EOF
+ }
+
+ if i.total >= int(i.idx.Fanout[i.firstLevel]) {
+ i.firstLevel++
+ i.secondLevel = 0
+ continue
+ }
+
+ entry := new(Entry)
+ ofs := i.secondLevel * objectIDLength
+ copy(entry.Hash[:], i.idx.Names[i.idx.FanoutMapping[i.firstLevel]][ofs:])
+
+ pos := i.idx.FanoutMapping[entry.Hash[0]]
+
+ offset, err := i.idx.getOffset(pos, i.secondLevel)
+ if err != nil {
+ return nil, err
+ }
+ entry.Offset = uint64(offset)
+
+ entry.CRC32, err = i.idx.getCrc32(pos, i.secondLevel)
+ if err != nil {
+ return nil, err
+ }
+
+ i.secondLevel++
+ i.total++
+
+ return entry, nil
}
+}
- return fanout
+func (i *idxfileEntryIter) Close() error {
+ i.firstLevel = fanout
+ return nil
+}
+
+// Entry is the in memory representation of an object entry in the idx file.
+type Entry struct {
+ Hash plumbing.Hash
+ CRC32 uint32
+ Offset uint64
}
diff --git a/plumbing/format/idxfile/idxfile_test.go b/plumbing/format/idxfile/idxfile_test.go
new file mode 100644
index 0000000..f42a419
--- /dev/null
+++ b/plumbing/format/idxfile/idxfile_test.go
@@ -0,0 +1,109 @@
+package idxfile_test
+
+import (
+ "bytes"
+ "encoding/base64"
+ "io"
+ "testing"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
+)
+
+func BenchmarkFindOffset(b *testing.B) {
+ idx := fixtureIndex(b)
+
+ for i := 0; i < b.N; i++ {
+ for _, h := range fixtureHashes {
+ _, err := idx.FindOffset(h)
+ if err != nil {
+ b.Fatalf("error getting offset: %s", err)
+ }
+ }
+ }
+}
+
+func BenchmarkFindCRC32(b *testing.B) {
+ idx := fixtureIndex(b)
+
+ for i := 0; i < b.N; i++ {
+ for _, h := range fixtureHashes {
+ _, err := idx.FindCRC32(h)
+ if err != nil {
+ b.Fatalf("error getting crc32: %s", err)
+ }
+ }
+ }
+}
+
+func BenchmarkContains(b *testing.B) {
+ idx := fixtureIndex(b)
+
+ for i := 0; i < b.N; i++ {
+ for _, h := range fixtureHashes {
+ ok, err := idx.Contains(h)
+ if err != nil {
+ b.Fatalf("error checking if hash is in index: %s", err)
+ }
+
+ if !ok {
+ b.Error("expected hash to be in index")
+ }
+ }
+ }
+}
+
+func BenchmarkEntries(b *testing.B) {
+ idx := fixtureIndex(b)
+
+ for i := 0; i < b.N; i++ {
+ iter, err := idx.Entries()
+ if err != nil {
+ b.Fatalf("unexpected error getting entries: %s", err)
+ }
+
+ var entries int
+ for {
+ _, err := iter.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+
+ b.Errorf("unexpected error getting entry: %s", err)
+ }
+
+ entries++
+ }
+
+ if entries != len(fixtureHashes) {
+ b.Errorf("expecting entries to be %d, got %d", len(fixtureHashes), entries)
+ }
+ }
+}
+
+var fixtureHashes = []plumbing.Hash{
+ plumbing.NewHash("303953e5aa461c203a324821bc1717f9b4fff895"),
+ plumbing.NewHash("5296768e3d9f661387ccbff18c4dea6c997fd78c"),
+ plumbing.NewHash("03fc8d58d44267274edef4585eaeeb445879d33f"),
+ plumbing.NewHash("8f3ceb4ea4cb9e4a0f751795eb41c9a4f07be772"),
+ plumbing.NewHash("e0d1d625010087f79c9e01ad9d8f95e1628dda02"),
+ plumbing.NewHash("90eba326cdc4d1d61c5ad25224ccbf08731dd041"),
+ plumbing.NewHash("bab53055add7bc35882758a922c54a874d6b1272"),
+ plumbing.NewHash("1b8995f51987d8a449ca5ea4356595102dc2fbd4"),
+ plumbing.NewHash("35858be9c6f5914cbe6768489c41eb6809a2bceb"),
+}
+
+func fixtureIndex(t testing.TB) *idxfile.MemoryIndex {
+ f := bytes.NewBufferString(fixtureLarge4GB)
+
+ idx := new(idxfile.MemoryIndex)
+
+ d := idxfile.NewDecoder(base64.NewDecoder(base64.StdEncoding, f))
+ err := d.Decode(idx)
+ if err != nil {
+ t.Fatalf("unexpected error decoding index: %s", err)
+ }
+
+ return idx
+}
diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go
index f706e5d..765401f 100644
--- a/plumbing/format/packfile/decoder.go
+++ b/plumbing/format/packfile/decoder.go
@@ -5,6 +5,7 @@ import (
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/cache"
+ "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
@@ -63,7 +64,7 @@ type Decoder struct {
// hasBuiltIndex indicates if the index is fully built or not. If it is not,
// will be built incrementally while decoding.
hasBuiltIndex bool
- idx *Index
+ idx idxfile.Index
offsetToType map[int64]plumbing.ObjectType
decoderType plumbing.ObjectType
@@ -117,7 +118,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
o: o,
deltaBaseCache: cacheObject,
- idx: NewIndex(0),
+ idx: idxfile.NewMemoryIndex(),
offsetToType: make(map[int64]plumbing.ObjectType),
decoderType: t,
}, nil
@@ -150,7 +151,8 @@ func (d *Decoder) doDecode() error {
}
if !d.hasBuiltIndex {
- d.idx = NewIndex(int(count))
+ // TODO: MemoryIndex is not writable, change to something else
+ d.idx = idxfile.NewMemoryIndex()
}
defer func() { d.hasBuiltIndex = true }()
@@ -284,12 +286,12 @@ func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) {
}
func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) {
- e, ok := d.idx.LookupHash(ref)
- if !ok {
+ offset, err := d.idx.FindOffset(ref)
+ if err != nil {
return plumbing.InvalidObject, plumbing.ErrObjectNotFound
}
- return d.ofsDeltaType(int64(e.Offset))
+ return d.ofsDeltaType(offset)
}
func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) {
@@ -314,9 +316,14 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error
return obj, err
}
+ // TODO: remove this
+ _ = crc
+
+ /* Add is no longer available
if !d.hasBuiltIndex {
d.idx.Add(obj.Hash(), uint64(h.Offset), crc)
}
+ */
return obj, nil
}
@@ -448,8 +455,8 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) {
func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) {
if d.s.IsSeekable {
- if e, ok := d.idx.LookupHash(h); ok {
- return d.DecodeObjectAt(int64(e.Offset))
+ if offset, err := d.idx.FindOffset(h); err != nil {
+ return d.DecodeObjectAt(offset)
}
}
@@ -475,7 +482,7 @@ func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.Encoded
// SetIndex sets an index for the packfile. It is recommended to set this.
// The index might be read from a file or reused from a previous Decoder usage
// (see Index function).
-func (d *Decoder) SetIndex(idx *Index) {
+func (d *Decoder) SetIndex(idx idxfile.Index) {
d.hasBuiltIndex = true
d.idx = idx
}
@@ -484,7 +491,7 @@ func (d *Decoder) SetIndex(idx *Index) {
// Index will return it. Otherwise, it will return an index that is built while
// decoding. If neither SetIndex was called with a full index or Decode called
// for the whole packfile, then the returned index will be incomplete.
-func (d *Decoder) Index() *Index {
+func (d *Decoder) Index() idxfile.Index {
return d.idx
}
diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go
deleted file mode 100644
index 021b2d1..0000000
--- a/plumbing/format/packfile/index.go
+++ /dev/null
@@ -1,125 +0,0 @@
-package packfile
-
-import (
- "sort"
-
- "gopkg.in/src-d/go-git.v4/plumbing"
- "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
-)
-
-// Index is an in-memory representation of a packfile index.
-// This uses idxfile.Idxfile under the hood to obtain indexes from .idx files
-// or to store them.
-type Index struct {
- byHash map[plumbing.Hash]*idxfile.Entry
- byOffset []*idxfile.Entry // sorted by their offset
-}
-
-// NewIndex creates a new empty index with the given size. Size is a hint and
-// can be 0. It is recommended to set it to the number of objects to be indexed
-// if it is known beforehand (e.g. reading from a packfile).
-func NewIndex(size int) *Index {
- return &Index{
- byHash: make(map[plumbing.Hash]*idxfile.Entry, size),
- byOffset: make([]*idxfile.Entry, 0, size),
- }
-}
-
-// NewIndexFromIdxFile creates a new Index from an idxfile.IdxFile.
-func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index {
- idx := &Index{
- byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount),
- byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount),
- }
- sorted := true
- for i, e := range idxf.Entries {
- idx.addUnsorted(e)
- if i > 0 && idx.byOffset[i-1].Offset >= e.Offset {
- sorted = false
- }
- }
-
- // If the idxfile was loaded from a regular packfile index
- // then it will already be in offset order, in which case we
- // can avoid doing a relatively expensive idempotent sort.
- if !sorted {
- sort.Sort(orderByOffset(idx.byOffset))
- }
-
- return idx
-}
-
-// orderByOffset is a sort.Interface adapter that arranges
-// a slice of entries by their offset.
-type orderByOffset []*idxfile.Entry
-
-func (o orderByOffset) Len() int { return len(o) }
-func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset }
-func (o orderByOffset) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
-
-// Add adds a new Entry with the given values to the index.
-func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
- e := &idxfile.Entry{
- Hash: h,
- Offset: offset,
- CRC32: crc32,
- }
- idx.byHash[e.Hash] = e
-
- // Find the right position in byOffset.
- // Look for the first position whose offset is *greater* than e.Offset.
- i := sort.Search(len(idx.byOffset), func(i int) bool {
- return idx.byOffset[i].Offset > offset
- })
- if i == len(idx.byOffset) {
- // Simple case: add it to the end.
- idx.byOffset = append(idx.byOffset, e)
- return
- }
- // Harder case: shift existing entries down by one to make room.
- // Append a nil entry first so we can use existing capacity in case
- // the index was carefully preallocated.
- idx.byOffset = append(idx.byOffset, nil)
- copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1])
- idx.byOffset[i] = e
-}
-
-func (idx *Index) addUnsorted(e *idxfile.Entry) {
- idx.byHash[e.Hash] = e
- idx.byOffset = append(idx.byOffset, e)
-}
-
-// LookupHash looks an entry up by its hash. An idxfile.Entry is returned and
-// a bool, which is true if it was found or false if it wasn't.
-func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) {
- e, ok := idx.byHash[h]
- return e, ok
-}
-
-// LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry
-// is returned and a bool, which is true if it was found or false if it wasn't.
-func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) {
- i := sort.Search(len(idx.byOffset), func(i int) bool {
- return idx.byOffset[i].Offset >= offset
- })
- if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset {
- return nil, false // not present
- }
- return idx.byOffset[i], true
-}
-
-// Size returns the number of entries in the index.
-func (idx *Index) Size() int {
- return len(idx.byHash)
-}
-
-// ToIdxFile converts the index to an idxfile.Idxfile, which can then be used
-// to serialize.
-func (idx *Index) ToIdxFile() *idxfile.Idxfile {
- idxf := idxfile.NewIdxfile()
- for _, e := range idx.byHash {
- idxf.Entries = append(idxf.Entries, e)
- }
-
- return idxf
-}
diff --git a/storage/filesystem/index.go b/storage/filesystem/index.go
deleted file mode 100644
index 2ebf57e..0000000
--- a/storage/filesystem/index.go
+++ /dev/null
@@ -1,47 +0,0 @@
-package filesystem
-
-import (
- "os"
-
- "gopkg.in/src-d/go-git.v4/plumbing/format/index"
- "gopkg.in/src-d/go-git.v4/storage/filesystem/dotgit"
- "gopkg.in/src-d/go-git.v4/utils/ioutil"
-)
-
-type IndexStorage struct {
- dir *dotgit.DotGit
-}
-
-func (s *IndexStorage) SetIndex(idx *index.Index) (err error) {
- f, err := s.dir.IndexWriter()
- if err != nil {
- return err
- }
-
- defer ioutil.CheckClose(f, &err)
-
- e := index.NewEncoder(f)
- err = e.Encode(idx)
- return err
-}
-
-func (s *IndexStorage) Index() (i *index.Index, err error) {
- idx := &index.Index{
- Version: 2,
- }
-
- f, err := s.dir.Index()
- if err != nil {
- if os.IsNotExist(err) {
- return idx, nil
- }
-
- return nil, err
- }
-
- defer ioutil.CheckClose(f, &err)
-
- d := index.NewDecoder(f)
- err = d.Decode(idx)
- return idx, err
-}
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index 9ffe4dc..ef67f50 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -63,7 +63,7 @@ func (s *ObjectStorage) loadIdxFile(h plumbing.Hash) (err error) {
}
defer ioutil.CheckClose(f, &err)
- idxf := idxfile.NewIdxfile()
+ idxf := idxfile.NewMemoryIndex()
d := idxfile.NewDecoder(f)
if err = d.Decode(idxf); err != nil {
return err