diff options
author | Miguel Molina <miguel@erizocosmi.co> | 2018-07-19 15:20:10 +0200 |
---|---|---|
committer | Miguel Molina <miguel@erizocosmi.co> | 2018-07-19 15:20:10 +0200 |
commit | 009f1069a1248c1e9189a9e4c342f6d017156ec4 (patch) | |
tree | ac984b72e7c54160b0b154bffd4474f96ae5028e /plumbing | |
parent | 9f00789688d26191a987fdec8bc2678362ec4453 (diff) | |
download | go-git-009f1069a1248c1e9189a9e4c342f6d017156ec4.tar.gz |
plumbing/format/idxfile: add new Index and MemoryIndex
Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
Diffstat (limited to 'plumbing')
-rw-r--r-- | plumbing/format/idxfile/decoder.go | 109 | ||||
-rw-r--r-- | plumbing/format/idxfile/decoder_test.go | 106 | ||||
-rw-r--r-- | plumbing/format/idxfile/encoder.go | 101 | ||||
-rw-r--r-- | plumbing/format/idxfile/encoder_test.go | 21 | ||||
-rw-r--r-- | plumbing/format/idxfile/idxfile.go | 224 | ||||
-rw-r--r-- | plumbing/format/idxfile/idxfile_test.go | 109 | ||||
-rw-r--r-- | plumbing/format/packfile/decoder.go | 27 | ||||
-rw-r--r-- | plumbing/format/packfile/index.go | 125 |
8 files changed, 483 insertions, 339 deletions
diff --git a/plumbing/format/idxfile/decoder.go b/plumbing/format/idxfile/decoder.go index 45afb1e..25ff88e 100644 --- a/plumbing/format/idxfile/decoder.go +++ b/plumbing/format/idxfile/decoder.go @@ -17,6 +17,11 @@ var ( ErrMalformedIdxFile = errors.New("Malformed IDX file") ) +const ( + fanout = 256 + objectIDLength = 20 +) + // Decoder reads and decodes idx files from an input stream. type Decoder struct { *bufio.Reader @@ -27,13 +32,13 @@ func NewDecoder(r io.Reader) *Decoder { return &Decoder{bufio.NewReader(r)} } -// Decode reads from the stream and decode the content into the Idxfile struct. -func (d *Decoder) Decode(idx *Idxfile) error { +// Decode reads from the stream and decode the content into the MemoryIndex struct. +func (d *Decoder) Decode(idx *MemoryIndex) error { if err := validateHeader(d); err != nil { return err } - flow := []func(*Idxfile, io.Reader) error{ + flow := []func(*MemoryIndex, io.Reader) error{ readVersion, readFanout, readObjectNames, @@ -48,10 +53,6 @@ func (d *Decoder) Decode(idx *Idxfile) error { } } - if !idx.isValid() { - return ErrMalformedIdxFile - } - return nil } @@ -68,7 +69,7 @@ func validateHeader(r io.Reader) error { return nil } -func readVersion(idx *Idxfile, r io.Reader) error { +func readVersion(idx *MemoryIndex, r io.Reader) error { v, err := binary.ReadUint32(r) if err != nil { return err @@ -82,74 +83,92 @@ func readVersion(idx *Idxfile, r io.Reader) error { return nil } -func readFanout(idx *Idxfile, r io.Reader) error { - var err error - for i := 0; i < 255; i++ { - idx.Fanout[i], err = binary.ReadUint32(r) +func readFanout(idx *MemoryIndex, r io.Reader) error { + for k := 0; k < fanout; k++ { + n, err := binary.ReadUint32(r) if err != nil { return err } + + idx.Fanout[k] = n + idx.FanoutMapping[k] = noMapping } - idx.ObjectCount, err = binary.ReadUint32(r) - return err + return nil } -func readObjectNames(idx *Idxfile, r io.Reader) error { - c := int(idx.ObjectCount) - new := make([]Entry, c) - for i := 0; i < c; i++ { - e := &new[i] - if _, err := io.ReadFull(r, e.Hash[:]); err != nil { +func readObjectNames(idx *MemoryIndex, r io.Reader) error { + for k := 0; k < fanout; k++ { + var buckets uint32 + if k == 0 { + buckets = idx.Fanout[k] + } else { + buckets = idx.Fanout[k] - idx.Fanout[k-1] + } + + if buckets == 0 { + continue + } + + if buckets < 0 { + return ErrMalformedIdxFile + } + + idx.FanoutMapping[k] = len(idx.Names) + + nameLen := int(buckets * objectIDLength) + bin := make([]byte, nameLen) + if _, err := io.ReadFull(r, bin); err != nil { return err } - idx.Entries = append(idx.Entries, e) + idx.Names = append(idx.Names, bin) + idx.Offset32 = append(idx.Offset32, make([]byte, buckets*4)) + idx.Crc32 = append(idx.Crc32, make([]byte, buckets*4)) } return nil } -func readCRC32(idx *Idxfile, r io.Reader) error { - c := int(idx.ObjectCount) - for i := 0; i < c; i++ { - if err := binary.Read(r, &idx.Entries[i].CRC32); err != nil { - return err +func readCRC32(idx *MemoryIndex, r io.Reader) error { + for k := 0; k < fanout; k++ { + if pos := idx.FanoutMapping[k]; pos != noMapping { + if _, err := io.ReadFull(r, idx.Crc32[pos]); err != nil { + return err + } } } return nil } -func readOffsets(idx *Idxfile, r io.Reader) error { - c := int(idx.ObjectCount) - - for i := 0; i < c; i++ { - o, err := binary.ReadUint32(r) - if err != nil { - return err +func readOffsets(idx *MemoryIndex, r io.Reader) error { + var o64cnt int + for k := 0; k < fanout; k++ { + if pos := idx.FanoutMapping[k]; pos != noMapping { + if _, err := io.ReadFull(r, idx.Offset32[pos]); err != nil { + return err + } + + for p := 0; p < len(idx.Offset32[pos]); p += 4 { + if idx.Offset32[pos][p]&(byte(1)<<7) > 0 { + o64cnt++ + } + } } - - idx.Entries[i].Offset = uint64(o) } - for i := 0; i < c; i++ { - if idx.Entries[i].Offset <= offsetLimit { - continue - } - - o, err := binary.ReadUint64(r) - if err != nil { + if o64cnt > 0 { + idx.Offset64 = make([]byte, o64cnt*8) + if _, err := io.ReadFull(r, idx.Offset64); err != nil { return err } - - idx.Entries[i].Offset = o } return nil } -func readChecksums(idx *Idxfile, r io.Reader) error { +func readChecksums(idx *MemoryIndex, r io.Reader) error { if _, err := io.ReadFull(r, idx.PackfileChecksum[:]); err != nil { return err } diff --git a/plumbing/format/idxfile/decoder_test.go b/plumbing/format/idxfile/decoder_test.go index 20d6859..b43d7c5 100644 --- a/plumbing/format/idxfile/decoder_test.go +++ b/plumbing/format/idxfile/decoder_test.go @@ -4,11 +4,12 @@ import ( "bytes" "encoding/base64" "fmt" + "io" + "io/ioutil" "testing" + "gopkg.in/src-d/go-git.v4/plumbing" . "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" - "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" - "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git-fixtures.v3" @@ -26,51 +27,34 @@ func (s *IdxfileSuite) TestDecode(c *C) { f := fixtures.Basic().One() d := NewDecoder(f.Idx()) - idx := &Idxfile{} + idx := new(MemoryIndex) err := d.Decode(idx) c.Assert(err, IsNil) - c.Assert(idx.Entries, HasLen, 31) - c.Assert(idx.Entries[0].Hash.String(), Equals, "1669dce138d9b841a518c64b10914d88f5e488ea") - c.Assert(idx.Entries[0].Offset, Equals, uint64(615)) - c.Assert(idx.Entries[0].CRC32, Equals, uint32(3645019190)) + count, _ := idx.Count() + c.Assert(count, Equals, int64(31)) - c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9") - c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String()) -} - -func (s *IdxfileSuite) TestDecodeCRCs(c *C) { - f := fixtures.Basic().ByTag("ofs-delta").One() - - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - pd, err := packfile.NewDecoder(scanner, storage) + hash := plumbing.NewHash("1669dce138d9b841a518c64b10914d88f5e488ea") + ok, err := idx.Contains(hash) c.Assert(err, IsNil) - _, err = pd.Decode() - c.Assert(err, IsNil) - - i := pd.Index().ToIdxFile() - i.Version = VersionSupported + c.Assert(ok, Equals, true) - buf := bytes.NewBuffer(nil) - e := NewEncoder(buf) - _, err = e.Encode(i) + offset, err := idx.FindOffset(hash) c.Assert(err, IsNil) + c.Assert(offset, Equals, int64(615)) - idx := &Idxfile{} - - d := NewDecoder(buf) - err = d.Decode(idx) + crc32, err := idx.FindCRC32(hash) c.Assert(err, IsNil) + c.Assert(crc32, Equals, uint32(3645019190)) - c.Assert(idx.Entries, DeepEquals, i.Entries) + c.Assert(fmt.Sprintf("%x", idx.IdxChecksum), Equals, "fb794f1ec720b9bc8e43257451bd99c4be6fa1c9") + c.Assert(fmt.Sprintf("%x", idx.PackfileChecksum), Equals, f.PackfileHash.String()) } func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) { f := bytes.NewBufferString(fixtureLarge4GB) - idx := &Idxfile{} + idx := new(MemoryIndex) d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f)) err := d.Decode(idx) @@ -88,29 +72,22 @@ func (s *IdxfileSuite) TestDecode64bitsOffsets(c *C) { "35858be9c6f5914cbe6768489c41eb6809a2bceb": 5924278919, } - for _, e := range idx.Entries { - c.Assert(expected[e.Hash.String()], Equals, e.Offset) - } -} - -func (s *IdxfileSuite) TestDecode64bitsOffsetsIdempotent(c *C) { - f := bytes.NewBufferString(fixtureLarge4GB) - - expected := &Idxfile{} - - d := NewDecoder(base64.NewDecoder(base64.StdEncoding, f)) - err := d.Decode(expected) + iter, err := idx.Entries() c.Assert(err, IsNil) - buf := bytes.NewBuffer(nil) - _, err = NewEncoder(buf).Encode(expected) - c.Assert(err, IsNil) + var entries int + for { + e, err := iter.Next() + if err == io.EOF { + break + } + c.Assert(err, IsNil) + entries++ - idx := &Idxfile{} - err = NewDecoder(buf).Decode(idx) - c.Assert(err, IsNil) + c.Assert(expected[e.Hash.String()], Equals, e.Offset) + } - c.Assert(idx.Entries, DeepEquals, expected.Entries) + c.Assert(entries, Equals, len(expected)) } const fixtureLarge4GB = `/3RPYwAAAAIAAAAAAAAAAAAAAAAAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEAAAABAAAAAQAAAAEA @@ -139,3 +116,30 @@ AAAAAAAMgAAAAQAAAI6AAAACgAAAA4AAAASAAAAFAAAAAV9Qam8AAAABYR1ShwAAAACdxfYxAAAA ANz1Di4AAAABPUnxJAAAAADNxzlGr6vCJpIFz4XaG/fi/f9C9zgQ8ptKSQpfQ1NMJBGTDTxxYGGp ch2xUA== ` + +func BenchmarkDecode(b *testing.B) { + if err := fixtures.Init(); err != nil { + b.Errorf("unexpected error initializing fixtures: %s", err) + } + + f := fixtures.Basic().One() + fixture, err := ioutil.ReadAll(f.Idx()) + if err != nil { + b.Errorf("unexpected error reading idx file: %s", err) + } + + defer func() { + if err := fixtures.Clean(); err != nil { + b.Errorf("unexpected error cleaning fixtures: %s", err) + } + }() + + for i := 0; i < b.N; i++ { + f := bytes.NewBuffer(fixture) + idx := new(MemoryIndex) + d := NewDecoder(f) + if err := d.Decode(idx); err != nil { + b.Errorf("unexpected error decoding: %s", err) + } + } +} diff --git a/plumbing/format/idxfile/encoder.go b/plumbing/format/idxfile/encoder.go index 40abfb8..55df466 100644 --- a/plumbing/format/idxfile/encoder.go +++ b/plumbing/format/idxfile/encoder.go @@ -4,12 +4,11 @@ import ( "crypto/sha1" "hash" "io" - "sort" "gopkg.in/src-d/go-git.v4/utils/binary" ) -// Encoder writes Idxfile structs to an output stream. +// Encoder writes MemoryIndex structs to an output stream. type Encoder struct { io.Writer hash hash.Hash @@ -22,11 +21,9 @@ func NewEncoder(w io.Writer) *Encoder { return &Encoder{mw, h} } -// Encode encodes an Idxfile to the encoder writer. -func (e *Encoder) Encode(idx *Idxfile) (int, error) { - idx.Entries.Sort() - - flow := []func(*Idxfile) (int, error){ +// Encode encodes an MemoryIndex to the encoder writer. +func (e *Encoder) Encode(idx *MemoryIndex) (int, error) { + flow := []func(*MemoryIndex) (int, error){ e.encodeHeader, e.encodeFanout, e.encodeHashes, @@ -48,7 +45,7 @@ func (e *Encoder) Encode(idx *Idxfile) (int, error) { return sz, nil } -func (e *Encoder) encodeHeader(idx *Idxfile) (int, error) { +func (e *Encoder) encodeHeader(idx *MemoryIndex) (int, error) { c, err := e.Write(idxHeader) if err != nil { return c, err @@ -57,75 +54,81 @@ func (e *Encoder) encodeHeader(idx *Idxfile) (int, error) { return c + 4, binary.WriteUint32(e, idx.Version) } -func (e *Encoder) encodeFanout(idx *Idxfile) (int, error) { - fanout := idx.calculateFanout() - for _, c := range fanout { +func (e *Encoder) encodeFanout(idx *MemoryIndex) (int, error) { + for _, c := range idx.Fanout { if err := binary.WriteUint32(e, c); err != nil { return 0, err } } - return 1024, nil + return fanout * 4, nil } -func (e *Encoder) encodeHashes(idx *Idxfile) (int, error) { - sz := 0 - for _, ent := range idx.Entries { - i, err := e.Write(ent.Hash[:]) - sz += i +func (e *Encoder) encodeHashes(idx *MemoryIndex) (int, error) { + var size int + for k := 0; k < fanout; k++ { + pos := idx.FanoutMapping[k] + if pos == noMapping { + continue + } + n, err := e.Write(idx.Names[pos]) if err != nil { - return sz, err + return size, err } + size += n } - - return sz, nil + return size, nil } -func (e *Encoder) encodeCRC32(idx *Idxfile) (int, error) { - sz := 0 - for _, ent := range idx.Entries { - err := binary.Write(e, ent.CRC32) - sz += 4 +func (e *Encoder) encodeCRC32(idx *MemoryIndex) (int, error) { + var size int + for k := 0; k < fanout; k++ { + pos := idx.FanoutMapping[k] + if pos == noMapping { + continue + } + n, err := e.Write(idx.Crc32[pos]) if err != nil { - return sz, err + return size, err } + + size += n } - return sz, nil + return size, nil } -func (e *Encoder) encodeOffsets(idx *Idxfile) (int, error) { - sz := 0 - - var o64bits []uint64 - for _, ent := range idx.Entries { - o := ent.Offset - if o > offsetLimit { - o64bits = append(o64bits, o) - o = offsetLimit + uint64(len(o64bits)) +func (e *Encoder) encodeOffsets(idx *MemoryIndex) (int, error) { + var size int + for k := 0; k < fanout; k++ { + pos := idx.FanoutMapping[k] + if pos == noMapping { + continue } - if err := binary.WriteUint32(e, uint32(o)); err != nil { - return sz, err + n, err := e.Write(idx.Offset32[pos]) + if err != nil { + return size, err } - sz += 4 + size += n } - for _, o := range o64bits { - if err := binary.WriteUint64(e, o); err != nil { - return sz, err + if len(idx.Offset64) > 0 { + n, err := e.Write(idx.Offset64) + if err != nil { + return size, err } - sz += 8 + size += n } - return sz, nil + return size, nil } -func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) { +func (e *Encoder) encodeChecksums(idx *MemoryIndex) (int, error) { if _, err := e.Write(idx.PackfileChecksum[:]); err != nil { return 0, err } @@ -137,11 +140,3 @@ func (e *Encoder) encodeChecksums(idx *Idxfile) (int, error) { return 40, nil } - -// EntryList implements sort.Interface allowing sorting in increasing order. -type EntryList []*Entry - -func (p EntryList) Len() int { return len(p) } -func (p EntryList) Less(i, j int) bool { return p[i].Hash.String() < p[j].Hash.String() } -func (p EntryList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -func (p EntryList) Sort() { sort.Sort(p) } diff --git a/plumbing/format/idxfile/encoder_test.go b/plumbing/format/idxfile/encoder_test.go index e5b96b7..e8deeea 100644 --- a/plumbing/format/idxfile/encoder_test.go +++ b/plumbing/format/idxfile/encoder_test.go @@ -4,37 +4,18 @@ import ( "bytes" "io/ioutil" - "gopkg.in/src-d/go-git.v4/plumbing" . "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git-fixtures.v3" ) -func (s *IdxfileSuite) TestEncode(c *C) { - expected := &Idxfile{} - expected.Add(plumbing.NewHash("4bfc730165c370df4a012afbb45ba3f9c332c0d4"), 82, 82) - expected.Add(plumbing.NewHash("8fa2238efdae08d83c12ee176fae65ff7c99af46"), 42, 42) - - buf := bytes.NewBuffer(nil) - e := NewEncoder(buf) - _, err := e.Encode(expected) - c.Assert(err, IsNil) - - idx := &Idxfile{} - d := NewDecoder(buf) - err = d.Decode(idx) - c.Assert(err, IsNil) - - c.Assert(idx.Entries, DeepEquals, expected.Entries) -} - func (s *IdxfileSuite) TestDecodeEncode(c *C) { fixtures.ByTag("packfile").Test(c, func(f *fixtures.Fixture) { expected, err := ioutil.ReadAll(f.Idx()) c.Assert(err, IsNil) - idx := &Idxfile{} + idx := new(MemoryIndex) d := NewDecoder(bytes.NewBuffer(expected)) err = d.Decode(idx) c.Assert(err, IsNil) diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index 6b05eaa..b196608 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -1,68 +1,222 @@ package idxfile -import "gopkg.in/src-d/go-git.v4/plumbing" +import ( + "bytes" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) const ( // VersionSupported is the only idx version supported. VersionSupported = 2 - offsetLimit = 0x7fffffff + noMapping = -1 ) var ( idxHeader = []byte{255, 't', 'O', 'c'} ) -// Idxfile is the in memory representation of an idx file. -type Idxfile struct { - Version uint32 - Fanout [255]uint32 - ObjectCount uint32 - Entries EntryList +// Index represents an index of a packfile. +type Index interface { + // Contains checks whether the given hash is in the index. + Contains(h plumbing.Hash) (bool, error) + // FindOffset finds the offset in the packfile for the object with + // the given hash. + FindOffset(h plumbing.Hash) (int64, error) + // FindCRC32 finds the CRC32 of the object with the given hash. + FindCRC32(h plumbing.Hash) (uint32, error) + // Count returns the number of entries in the index. + Count() (int64, error) + // Entries returns an iterator to retrieve all index entries. + Entries() (EntryIter, error) +} + +// MemoryIndex is the in memory representation of an idx file. +type MemoryIndex struct { + Version uint32 + Fanout [256]uint32 + // FanoutMapping maps the position in the fanout table to the position + // in the Names, Offset32 and Crc32 slices. This improves the memory + // usage by not needing an array with unnecessary empty slots. + FanoutMapping [256]int + Names [][]byte + Offset32 [][]byte + Crc32 [][]byte + Offset64 []byte PackfileChecksum [20]byte IdxChecksum [20]byte } -func NewIdxfile() *Idxfile { - return &Idxfile{} +var _ Index = (*MemoryIndex)(nil) + +// NewMemoryIndex returns an instance of a new MemoryIndex. +func NewMemoryIndex() *MemoryIndex { + return &MemoryIndex{} } -// Entry is the in memory representation of an object entry in the idx file. -type Entry struct { - Hash plumbing.Hash - CRC32 uint32 - Offset uint64 +func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { + k := idx.FanoutMapping[h[0]] + if k == noMapping { + return -1 + } + + data := idx.Names[k] + high := uint64(len(idx.Offset32[k])) >> 2 + if high == 0 { + return -1 + } + + low := uint64(0) + for { + mid := (low + high) >> 1 + offset := mid + (mid << 2) + + cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength]) + if cmp < 0 { + high = mid + } else if cmp == 0 { + return int(mid) + } else { + low = mid + 1 + } + + if low < high { + break + } + } + + return -1 } -// Add adds a new Entry with the given values to the Idxfile. -func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) { - idx.Entries = append(idx.Entries, &Entry{ - Hash: h, - Offset: offset, - CRC32: crc32, - }) +// Contains implements the Index interface. +func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) { + i := idx.findHashIndex(h) + return i >= 0, nil } -func (idx *Idxfile) isValid() bool { - fanout := idx.calculateFanout() - for k, c := range idx.Fanout { - if fanout[k] != c { - return false +// FindOffset implements the Index interface. +func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) { + k := idx.FanoutMapping[h[0]] + i := idx.findHashIndex(h) + if i < 0 { + return 0, plumbing.ErrObjectNotFound + } + + return idx.getOffset(k, i) +} + +const isO64Mask = uint64(1) << 31 + +func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) (int64, error) { + offset := secondLevel << 2 + buf := bytes.NewBuffer(idx.Offset32[firstLevel][offset : offset+4]) + ofs, err := binary.ReadUint32(buf) + if err != nil { + return -1, err + } + + if (uint64(ofs) & isO64Mask) != 0 { + offset := 8 * (uint64(ofs) & ^isO64Mask) + buf := bytes.NewBuffer(idx.Offset64[offset : offset+8]) + n, err := binary.ReadUint64(buf) + if err != nil { + return -1, err } + + return int64(n), nil } - return true + return int64(ofs), nil } -func (idx *Idxfile) calculateFanout() [256]uint32 { - fanout := [256]uint32{} - for _, e := range idx.Entries { - fanout[e.Hash[0]]++ +// FindCRC32 implements the Index interface. +func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) { + k := idx.FanoutMapping[h[0]] + i := idx.findHashIndex(h) + if i < 0 { + return 0, plumbing.ErrObjectNotFound } - for i := 1; i < 256; i++ { - fanout[i] += fanout[i-1] + return idx.getCrc32(k, i) +} + +func (idx *MemoryIndex) getCrc32(firstLevel, secondLevel int) (uint32, error) { + offset := secondLevel << 2 + buf := bytes.NewBuffer(idx.Crc32[firstLevel][offset : offset+4]) + return binary.ReadUint32(buf) +} + +// Count implements the Index interface. +func (idx *MemoryIndex) Count() (int64, error) { + return int64(idx.Fanout[fanout-1]), nil +} + +// Entries implements the Index interface. +func (idx *MemoryIndex) Entries() (EntryIter, error) { + return &idxfileEntryIter{idx, 0, 0, 0}, nil +} + +// EntryIter is an iterator that will return the entries in a packfile index. +type EntryIter interface { + // Next returns the next entry in the packfile index. + Next() (*Entry, error) + // Close closes the iterator. + Close() error +} + +type idxfileEntryIter struct { + idx *MemoryIndex + total int + firstLevel, secondLevel int +} + +func (i *idxfileEntryIter) Next() (*Entry, error) { + for { + if i.firstLevel >= fanout { + return nil, io.EOF + } + + if i.total >= int(i.idx.Fanout[i.firstLevel]) { + i.firstLevel++ + i.secondLevel = 0 + continue + } + + entry := new(Entry) + ofs := i.secondLevel * objectIDLength + copy(entry.Hash[:], i.idx.Names[i.idx.FanoutMapping[i.firstLevel]][ofs:]) + + pos := i.idx.FanoutMapping[entry.Hash[0]] + + offset, err := i.idx.getOffset(pos, i.secondLevel) + if err != nil { + return nil, err + } + entry.Offset = uint64(offset) + + entry.CRC32, err = i.idx.getCrc32(pos, i.secondLevel) + if err != nil { + return nil, err + } + + i.secondLevel++ + i.total++ + + return entry, nil } +} - return fanout +func (i *idxfileEntryIter) Close() error { + i.firstLevel = fanout + return nil +} + +// Entry is the in memory representation of an object entry in the idx file. +type Entry struct { + Hash plumbing.Hash + CRC32 uint32 + Offset uint64 } diff --git a/plumbing/format/idxfile/idxfile_test.go b/plumbing/format/idxfile/idxfile_test.go new file mode 100644 index 0000000..f42a419 --- /dev/null +++ b/plumbing/format/idxfile/idxfile_test.go @@ -0,0 +1,109 @@ +package idxfile_test + +import ( + "bytes" + "encoding/base64" + "io" + "testing" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" +) + +func BenchmarkFindOffset(b *testing.B) { + idx := fixtureIndex(b) + + for i := 0; i < b.N; i++ { + for _, h := range fixtureHashes { + _, err := idx.FindOffset(h) + if err != nil { + b.Fatalf("error getting offset: %s", err) + } + } + } +} + +func BenchmarkFindCRC32(b *testing.B) { + idx := fixtureIndex(b) + + for i := 0; i < b.N; i++ { + for _, h := range fixtureHashes { + _, err := idx.FindCRC32(h) + if err != nil { + b.Fatalf("error getting crc32: %s", err) + } + } + } +} + +func BenchmarkContains(b *testing.B) { + idx := fixtureIndex(b) + + for i := 0; i < b.N; i++ { + for _, h := range fixtureHashes { + ok, err := idx.Contains(h) + if err != nil { + b.Fatalf("error checking if hash is in index: %s", err) + } + + if !ok { + b.Error("expected hash to be in index") + } + } + } +} + +func BenchmarkEntries(b *testing.B) { + idx := fixtureIndex(b) + + for i := 0; i < b.N; i++ { + iter, err := idx.Entries() + if err != nil { + b.Fatalf("unexpected error getting entries: %s", err) + } + + var entries int + for { + _, err := iter.Next() + if err != nil { + if err == io.EOF { + break + } + + b.Errorf("unexpected error getting entry: %s", err) + } + + entries++ + } + + if entries != len(fixtureHashes) { + b.Errorf("expecting entries to be %d, got %d", len(fixtureHashes), entries) + } + } +} + +var fixtureHashes = []plumbing.Hash{ + plumbing.NewHash("303953e5aa461c203a324821bc1717f9b4fff895"), + plumbing.NewHash("5296768e3d9f661387ccbff18c4dea6c997fd78c"), + plumbing.NewHash("03fc8d58d44267274edef4585eaeeb445879d33f"), + plumbing.NewHash("8f3ceb4ea4cb9e4a0f751795eb41c9a4f07be772"), + plumbing.NewHash("e0d1d625010087f79c9e01ad9d8f95e1628dda02"), + plumbing.NewHash("90eba326cdc4d1d61c5ad25224ccbf08731dd041"), + plumbing.NewHash("bab53055add7bc35882758a922c54a874d6b1272"), + plumbing.NewHash("1b8995f51987d8a449ca5ea4356595102dc2fbd4"), + plumbing.NewHash("35858be9c6f5914cbe6768489c41eb6809a2bceb"), +} + +func fixtureIndex(t testing.TB) *idxfile.MemoryIndex { + f := bytes.NewBufferString(fixtureLarge4GB) + + idx := new(idxfile.MemoryIndex) + + d := idxfile.NewDecoder(base64.NewDecoder(base64.StdEncoding, f)) + err := d.Decode(idx) + if err != nil { + t.Fatalf("unexpected error decoding index: %s", err) + } + + return idx +} diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index f706e5d..765401f 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -5,6 +5,7 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) @@ -63,7 +64,7 @@ type Decoder struct { // hasBuiltIndex indicates if the index is fully built or not. If it is not, // will be built incrementally while decoding. hasBuiltIndex bool - idx *Index + idx idxfile.Index offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType @@ -117,7 +118,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, o: o, deltaBaseCache: cacheObject, - idx: NewIndex(0), + idx: idxfile.NewMemoryIndex(), offsetToType: make(map[int64]plumbing.ObjectType), decoderType: t, }, nil @@ -150,7 +151,8 @@ func (d *Decoder) doDecode() error { } if !d.hasBuiltIndex { - d.idx = NewIndex(int(count)) + // TODO: MemoryIndex is not writable, change to something else + d.idx = idxfile.NewMemoryIndex() } defer func() { d.hasBuiltIndex = true }() @@ -284,12 +286,12 @@ func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) { } func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) { - e, ok := d.idx.LookupHash(ref) - if !ok { + offset, err := d.idx.FindOffset(ref) + if err != nil { return plumbing.InvalidObject, plumbing.ErrObjectNotFound } - return d.ofsDeltaType(int64(e.Offset)) + return d.ofsDeltaType(offset) } func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) { @@ -314,9 +316,14 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error return obj, err } + // TODO: remove this + _ = crc + + /* Add is no longer available if !d.hasBuiltIndex { d.idx.Add(obj.Hash(), uint64(h.Offset), crc) } + */ return obj, nil } @@ -448,8 +455,8 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { if d.s.IsSeekable { - if e, ok := d.idx.LookupHash(h); ok { - return d.DecodeObjectAt(int64(e.Offset)) + if offset, err := d.idx.FindOffset(h); err != nil { + return d.DecodeObjectAt(offset) } } @@ -475,7 +482,7 @@ func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.Encoded // SetIndex sets an index for the packfile. It is recommended to set this. // The index might be read from a file or reused from a previous Decoder usage // (see Index function). -func (d *Decoder) SetIndex(idx *Index) { +func (d *Decoder) SetIndex(idx idxfile.Index) { d.hasBuiltIndex = true d.idx = idx } @@ -484,7 +491,7 @@ func (d *Decoder) SetIndex(idx *Index) { // Index will return it. Otherwise, it will return an index that is built while // decoding. If neither SetIndex was called with a full index or Decode called // for the whole packfile, then the returned index will be incomplete. -func (d *Decoder) Index() *Index { +func (d *Decoder) Index() idxfile.Index { return d.idx } diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go deleted file mode 100644 index 021b2d1..0000000 --- a/plumbing/format/packfile/index.go +++ /dev/null @@ -1,125 +0,0 @@ -package packfile - -import ( - "sort" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" -) - -// Index is an in-memory representation of a packfile index. -// This uses idxfile.Idxfile under the hood to obtain indexes from .idx files -// or to store them. -type Index struct { - byHash map[plumbing.Hash]*idxfile.Entry - byOffset []*idxfile.Entry // sorted by their offset -} - -// NewIndex creates a new empty index with the given size. Size is a hint and -// can be 0. It is recommended to set it to the number of objects to be indexed -// if it is known beforehand (e.g. reading from a packfile). -func NewIndex(size int) *Index { - return &Index{ - byHash: make(map[plumbing.Hash]*idxfile.Entry, size), - byOffset: make([]*idxfile.Entry, 0, size), - } -} - -// NewIndexFromIdxFile creates a new Index from an idxfile.IdxFile. -func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index { - idx := &Index{ - byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount), - byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount), - } - sorted := true - for i, e := range idxf.Entries { - idx.addUnsorted(e) - if i > 0 && idx.byOffset[i-1].Offset >= e.Offset { - sorted = false - } - } - - // If the idxfile was loaded from a regular packfile index - // then it will already be in offset order, in which case we - // can avoid doing a relatively expensive idempotent sort. - if !sorted { - sort.Sort(orderByOffset(idx.byOffset)) - } - - return idx -} - -// orderByOffset is a sort.Interface adapter that arranges -// a slice of entries by their offset. -type orderByOffset []*idxfile.Entry - -func (o orderByOffset) Len() int { return len(o) } -func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset } -func (o orderByOffset) Swap(i, j int) { o[i], o[j] = o[j], o[i] } - -// Add adds a new Entry with the given values to the index. -func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) { - e := &idxfile.Entry{ - Hash: h, - Offset: offset, - CRC32: crc32, - } - idx.byHash[e.Hash] = e - - // Find the right position in byOffset. - // Look for the first position whose offset is *greater* than e.Offset. - i := sort.Search(len(idx.byOffset), func(i int) bool { - return idx.byOffset[i].Offset > offset - }) - if i == len(idx.byOffset) { - // Simple case: add it to the end. - idx.byOffset = append(idx.byOffset, e) - return - } - // Harder case: shift existing entries down by one to make room. - // Append a nil entry first so we can use existing capacity in case - // the index was carefully preallocated. - idx.byOffset = append(idx.byOffset, nil) - copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1]) - idx.byOffset[i] = e -} - -func (idx *Index) addUnsorted(e *idxfile.Entry) { - idx.byHash[e.Hash] = e - idx.byOffset = append(idx.byOffset, e) -} - -// LookupHash looks an entry up by its hash. An idxfile.Entry is returned and -// a bool, which is true if it was found or false if it wasn't. -func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) { - e, ok := idx.byHash[h] - return e, ok -} - -// LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry -// is returned and a bool, which is true if it was found or false if it wasn't. -func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) { - i := sort.Search(len(idx.byOffset), func(i int) bool { - return idx.byOffset[i].Offset >= offset - }) - if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset { - return nil, false // not present - } - return idx.byOffset[i], true -} - -// Size returns the number of entries in the index. -func (idx *Index) Size() int { - return len(idx.byHash) -} - -// ToIdxFile converts the index to an idxfile.Idxfile, which can then be used -// to serialize. -func (idx *Index) ToIdxFile() *idxfile.Idxfile { - idxf := idxfile.NewIdxfile() - for _, e := range idx.byHash { - idxf.Entries = append(idxf.Entries, e) - } - - return idxf -} |