aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/idxfile/idxfile.go
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2018-08-14 09:57:46 +0200
committerGitHub <noreply@github.com>2018-08-14 09:57:46 +0200
commita28c2ce44695f13ddf28748958f236afd8e0b544 (patch)
tree107dd441cd96b44b4f3994d26faf5f0bfae933fc /plumbing/format/idxfile/idxfile.go
parentc3740924da0d1929cb523c85ae9da3b456b901ea (diff)
parent8d75d239e93474e4287870e4e5143da14e2c360d (diff)
downloadgo-git-a28c2ce44695f13ddf28748958f236afd8e0b544.tar.gz
Merge pull request #906 from src-d/perf/packfile-reads
Improve packfile reading performance
Diffstat (limited to 'plumbing/format/idxfile/idxfile.go')
-rw-r--r--plumbing/format/idxfile/idxfile.go280
1 files changed, 245 insertions, 35 deletions
diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go
index 6b05eaa..c977bee 100644
--- a/plumbing/format/idxfile/idxfile.go
+++ b/plumbing/format/idxfile/idxfile.go
@@ -1,68 +1,278 @@
package idxfile
-import "gopkg.in/src-d/go-git.v4/plumbing"
+import (
+ "bytes"
+ "io"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/utils/binary"
+)
const (
// VersionSupported is the only idx version supported.
VersionSupported = 2
- offsetLimit = 0x7fffffff
+ noMapping = -1
)
var (
idxHeader = []byte{255, 't', 'O', 'c'}
)
-// Idxfile is the in memory representation of an idx file.
-type Idxfile struct {
- Version uint32
- Fanout [255]uint32
- ObjectCount uint32
- Entries EntryList
+// Index represents an index of a packfile.
+type Index interface {
+ // Contains checks whether the given hash is in the index.
+ Contains(h plumbing.Hash) (bool, error)
+ // FindOffset finds the offset in the packfile for the object with
+ // the given hash.
+ FindOffset(h plumbing.Hash) (int64, error)
+ // FindCRC32 finds the CRC32 of the object with the given hash.
+ FindCRC32(h plumbing.Hash) (uint32, error)
+ // FindHash finds the hash for the object with the given offset.
+ FindHash(o int64) (plumbing.Hash, error)
+ // Count returns the number of entries in the index.
+ Count() (int64, error)
+ // Entries returns an iterator to retrieve all index entries.
+ Entries() (EntryIter, error)
+}
+
+// MemoryIndex is the in memory representation of an idx file.
+type MemoryIndex struct {
+ Version uint32
+ Fanout [256]uint32
+ // FanoutMapping maps the position in the fanout table to the position
+ // in the Names, Offset32 and CRC32 slices. This improves the memory
+ // usage by not needing an array with unnecessary empty slots.
+ FanoutMapping [256]int
+ Names [][]byte
+ Offset32 [][]byte
+ CRC32 [][]byte
+ Offset64 []byte
PackfileChecksum [20]byte
IdxChecksum [20]byte
+
+ offsetHash map[int64]plumbing.Hash
}
-func NewIdxfile() *Idxfile {
- return &Idxfile{}
+var _ Index = (*MemoryIndex)(nil)
+
+// NewMemoryIndex returns an instance of a new MemoryIndex.
+func NewMemoryIndex() *MemoryIndex {
+ return &MemoryIndex{}
}
-// Entry is the in memory representation of an object entry in the idx file.
-type Entry struct {
- Hash plumbing.Hash
- CRC32 uint32
- Offset uint64
+func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) (int, bool) {
+ k := idx.FanoutMapping[h[0]]
+ if k == noMapping {
+ return 0, false
+ }
+
+ if len(idx.Names) <= k {
+ return 0, false
+ }
+
+ data := idx.Names[k]
+ high := uint64(len(idx.Offset32[k])) >> 2
+ if high == 0 {
+ return 0, false
+ }
+
+ low := uint64(0)
+ for {
+ mid := (low + high) >> 1
+ offset := mid * objectIDLength
+
+ cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength])
+ if cmp < 0 {
+ high = mid
+ } else if cmp == 0 {
+ return int(mid), true
+ } else {
+ low = mid + 1
+ }
+
+ if low >= high {
+ break
+ }
+ }
+
+ return 0, false
}
-// Add adds a new Entry with the given values to the Idxfile.
-func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
- idx.Entries = append(idx.Entries, &Entry{
- Hash: h,
- Offset: offset,
- CRC32: crc32,
- })
+// Contains implements the Index interface.
+func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) {
+ _, ok := idx.findHashIndex(h)
+ return ok, nil
+}
+
+// FindOffset implements the Index interface.
+func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) {
+ if len(idx.FanoutMapping) <= int(h[0]) {
+ return 0, plumbing.ErrObjectNotFound
+ }
+
+ k := idx.FanoutMapping[h[0]]
+ i, ok := idx.findHashIndex(h)
+ if !ok {
+ return 0, plumbing.ErrObjectNotFound
+ }
+
+ return idx.getOffset(k, i)
}
-func (idx *Idxfile) isValid() bool {
- fanout := idx.calculateFanout()
- for k, c := range idx.Fanout {
- if fanout[k] != c {
- return false
+const isO64Mask = uint64(1) << 31
+
+func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) (int64, error) {
+ offset := secondLevel << 2
+ buf := bytes.NewBuffer(idx.Offset32[firstLevel][offset : offset+4])
+ ofs, err := binary.ReadUint32(buf)
+ if err != nil {
+ return -1, err
+ }
+
+ if (uint64(ofs) & isO64Mask) != 0 {
+ offset := 8 * (uint64(ofs) & ^isO64Mask)
+ buf := bytes.NewBuffer(idx.Offset64[offset : offset+8])
+ n, err := binary.ReadUint64(buf)
+ if err != nil {
+ return -1, err
}
+
+ return int64(n), nil
+ }
+
+ return int64(ofs), nil
+}
+
+// FindCRC32 implements the Index interface.
+func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) {
+ k := idx.FanoutMapping[h[0]]
+ i, ok := idx.findHashIndex(h)
+ if !ok {
+ return 0, plumbing.ErrObjectNotFound
}
- return true
+ return idx.getCRC32(k, i)
+}
+
+func (idx *MemoryIndex) getCRC32(firstLevel, secondLevel int) (uint32, error) {
+ offset := secondLevel << 2
+ buf := bytes.NewBuffer(idx.CRC32[firstLevel][offset : offset+4])
+ return binary.ReadUint32(buf)
}
-func (idx *Idxfile) calculateFanout() [256]uint32 {
- fanout := [256]uint32{}
- for _, e := range idx.Entries {
- fanout[e.Hash[0]]++
+// FindHash implements the Index interface.
+func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) {
+ // Lazily generate the reverse offset/hash map if required.
+ if idx.offsetHash == nil {
+ if err := idx.genOffsetHash(); err != nil {
+ return plumbing.ZeroHash, err
+ }
}
- for i := 1; i < 256; i++ {
- fanout[i] += fanout[i-1]
+ hash, ok := idx.offsetHash[o]
+ if !ok {
+ return plumbing.ZeroHash, plumbing.ErrObjectNotFound
}
- return fanout
+ return hash, nil
+}
+
+// genOffsetHash generates the offset/hash mapping for reverse search.
+func (idx *MemoryIndex) genOffsetHash() error {
+ count, err := idx.Count()
+ if err != nil {
+ return err
+ }
+
+ idx.offsetHash = make(map[int64]plumbing.Hash, count)
+
+ iter, err := idx.Entries()
+ if err != nil {
+ return err
+ }
+
+ for {
+ entry, err := iter.Next()
+ if err != nil {
+ if err == io.EOF {
+ return nil
+ }
+ return err
+ }
+
+ idx.offsetHash[int64(entry.Offset)] = entry.Hash
+ }
+}
+
+// Count implements the Index interface.
+func (idx *MemoryIndex) Count() (int64, error) {
+ return int64(idx.Fanout[fanout-1]), nil
+}
+
+// Entries implements the Index interface.
+func (idx *MemoryIndex) Entries() (EntryIter, error) {
+ return &idxfileEntryIter{idx, 0, 0, 0}, nil
+}
+
+// EntryIter is an iterator that will return the entries in a packfile index.
+type EntryIter interface {
+ // Next returns the next entry in the packfile index.
+ Next() (*Entry, error)
+ // Close closes the iterator.
+ Close() error
+}
+
+type idxfileEntryIter struct {
+ idx *MemoryIndex
+ total int
+ firstLevel, secondLevel int
+}
+
+func (i *idxfileEntryIter) Next() (*Entry, error) {
+ for {
+ if i.firstLevel >= fanout {
+ return nil, io.EOF
+ }
+
+ if i.total >= int(i.idx.Fanout[i.firstLevel]) {
+ i.firstLevel++
+ i.secondLevel = 0
+ continue
+ }
+
+ entry := new(Entry)
+ ofs := i.secondLevel * objectIDLength
+ copy(entry.Hash[:], i.idx.Names[i.idx.FanoutMapping[i.firstLevel]][ofs:])
+
+ pos := i.idx.FanoutMapping[entry.Hash[0]]
+
+ offset, err := i.idx.getOffset(pos, i.secondLevel)
+ if err != nil {
+ return nil, err
+ }
+ entry.Offset = uint64(offset)
+
+ entry.CRC32, err = i.idx.getCRC32(pos, i.secondLevel)
+ if err != nil {
+ return nil, err
+ }
+
+ i.secondLevel++
+ i.total++
+
+ return entry, nil
+ }
+}
+
+func (i *idxfileEntryIter) Close() error {
+ i.firstLevel = fanout
+ return nil
+}
+
+// Entry is the in memory representation of an object entry in the idx file.
+type Entry struct {
+ Hash plumbing.Hash
+ CRC32 uint32
+ Offset uint64
}