aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/idxfile/idxfile.go
diff options
context:
space:
mode:
authorMiguel Molina <miguel@erizocosmi.co>2018-07-26 14:10:19 +0200
committerGitHub <noreply@github.com>2018-07-26 14:10:19 +0200
commita8ff3e599b3ee998a8b8626cd9fe9fa68490d354 (patch)
treeac984b72e7c54160b0b154bffd4474f96ae5028e /plumbing/format/idxfile/idxfile.go
parent9f00789688d26191a987fdec8bc2678362ec4453 (diff)
parent009f1069a1248c1e9189a9e4c342f6d017156ec4 (diff)
downloadgo-git-a8ff3e599b3ee998a8b8626cd9fe9fa68490d354.tar.gz
Merge pull request #896 from erizocosmico/feature/new-index-decoder
plumbing/format/idxfile: add new Index and MemoryIndex
Diffstat (limited to 'plumbing/format/idxfile/idxfile.go')
-rw-r--r--plumbing/format/idxfile/idxfile.go224
1 files changed, 189 insertions, 35 deletions
diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go
index 6b05eaa..b196608 100644
--- a/plumbing/format/idxfile/idxfile.go
+++ b/plumbing/format/idxfile/idxfile.go
@@ -1,68 +1,222 @@
package idxfile
-import "gopkg.in/src-d/go-git.v4/plumbing"
+import (
+ "bytes"
+ "io"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/utils/binary"
+)
const (
// VersionSupported is the only idx version supported.
VersionSupported = 2
- offsetLimit = 0x7fffffff
+ noMapping = -1
)
var (
idxHeader = []byte{255, 't', 'O', 'c'}
)
-// Idxfile is the in memory representation of an idx file.
-type Idxfile struct {
- Version uint32
- Fanout [255]uint32
- ObjectCount uint32
- Entries EntryList
+// Index represents an index of a packfile.
+type Index interface {
+ // Contains checks whether the given hash is in the index.
+ Contains(h plumbing.Hash) (bool, error)
+ // FindOffset finds the offset in the packfile for the object with
+ // the given hash.
+ FindOffset(h plumbing.Hash) (int64, error)
+ // FindCRC32 finds the CRC32 of the object with the given hash.
+ FindCRC32(h plumbing.Hash) (uint32, error)
+ // Count returns the number of entries in the index.
+ Count() (int64, error)
+ // Entries returns an iterator to retrieve all index entries.
+ Entries() (EntryIter, error)
+}
+
+// MemoryIndex is the in memory representation of an idx file.
+type MemoryIndex struct {
+ Version uint32
+ Fanout [256]uint32
+ // FanoutMapping maps the position in the fanout table to the position
+ // in the Names, Offset32 and Crc32 slices. This improves the memory
+ // usage by not needing an array with unnecessary empty slots.
+ FanoutMapping [256]int
+ Names [][]byte
+ Offset32 [][]byte
+ Crc32 [][]byte
+ Offset64 []byte
PackfileChecksum [20]byte
IdxChecksum [20]byte
}
-func NewIdxfile() *Idxfile {
- return &Idxfile{}
+var _ Index = (*MemoryIndex)(nil)
+
+// NewMemoryIndex returns an instance of a new MemoryIndex.
+func NewMemoryIndex() *MemoryIndex {
+ return &MemoryIndex{}
}
-// Entry is the in memory representation of an object entry in the idx file.
-type Entry struct {
- Hash plumbing.Hash
- CRC32 uint32
- Offset uint64
+func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int {
+ k := idx.FanoutMapping[h[0]]
+ if k == noMapping {
+ return -1
+ }
+
+ data := idx.Names[k]
+ high := uint64(len(idx.Offset32[k])) >> 2
+ if high == 0 {
+ return -1
+ }
+
+ low := uint64(0)
+ for {
+ mid := (low + high) >> 1
+ offset := mid + (mid << 2)
+
+ cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength])
+ if cmp < 0 {
+ high = mid
+ } else if cmp == 0 {
+ return int(mid)
+ } else {
+ low = mid + 1
+ }
+
+ if low < high {
+ break
+ }
+ }
+
+ return -1
}
-// Add adds a new Entry with the given values to the Idxfile.
-func (idx *Idxfile) Add(h plumbing.Hash, offset uint64, crc32 uint32) {
- idx.Entries = append(idx.Entries, &Entry{
- Hash: h,
- Offset: offset,
- CRC32: crc32,
- })
+// Contains implements the Index interface.
+func (idx *MemoryIndex) Contains(h plumbing.Hash) (bool, error) {
+ i := idx.findHashIndex(h)
+ return i >= 0, nil
}
-func (idx *Idxfile) isValid() bool {
- fanout := idx.calculateFanout()
- for k, c := range idx.Fanout {
- if fanout[k] != c {
- return false
+// FindOffset implements the Index interface.
+func (idx *MemoryIndex) FindOffset(h plumbing.Hash) (int64, error) {
+ k := idx.FanoutMapping[h[0]]
+ i := idx.findHashIndex(h)
+ if i < 0 {
+ return 0, plumbing.ErrObjectNotFound
+ }
+
+ return idx.getOffset(k, i)
+}
+
+const isO64Mask = uint64(1) << 31
+
+func (idx *MemoryIndex) getOffset(firstLevel, secondLevel int) (int64, error) {
+ offset := secondLevel << 2
+ buf := bytes.NewBuffer(idx.Offset32[firstLevel][offset : offset+4])
+ ofs, err := binary.ReadUint32(buf)
+ if err != nil {
+ return -1, err
+ }
+
+ if (uint64(ofs) & isO64Mask) != 0 {
+ offset := 8 * (uint64(ofs) & ^isO64Mask)
+ buf := bytes.NewBuffer(idx.Offset64[offset : offset+8])
+ n, err := binary.ReadUint64(buf)
+ if err != nil {
+ return -1, err
}
+
+ return int64(n), nil
}
- return true
+ return int64(ofs), nil
}
-func (idx *Idxfile) calculateFanout() [256]uint32 {
- fanout := [256]uint32{}
- for _, e := range idx.Entries {
- fanout[e.Hash[0]]++
+// FindCRC32 implements the Index interface.
+func (idx *MemoryIndex) FindCRC32(h plumbing.Hash) (uint32, error) {
+ k := idx.FanoutMapping[h[0]]
+ i := idx.findHashIndex(h)
+ if i < 0 {
+ return 0, plumbing.ErrObjectNotFound
}
- for i := 1; i < 256; i++ {
- fanout[i] += fanout[i-1]
+ return idx.getCrc32(k, i)
+}
+
+func (idx *MemoryIndex) getCrc32(firstLevel, secondLevel int) (uint32, error) {
+ offset := secondLevel << 2
+ buf := bytes.NewBuffer(idx.Crc32[firstLevel][offset : offset+4])
+ return binary.ReadUint32(buf)
+}
+
+// Count implements the Index interface.
+func (idx *MemoryIndex) Count() (int64, error) {
+ return int64(idx.Fanout[fanout-1]), nil
+}
+
+// Entries implements the Index interface.
+func (idx *MemoryIndex) Entries() (EntryIter, error) {
+ return &idxfileEntryIter{idx, 0, 0, 0}, nil
+}
+
+// EntryIter is an iterator that will return the entries in a packfile index.
+type EntryIter interface {
+ // Next returns the next entry in the packfile index.
+ Next() (*Entry, error)
+ // Close closes the iterator.
+ Close() error
+}
+
+type idxfileEntryIter struct {
+ idx *MemoryIndex
+ total int
+ firstLevel, secondLevel int
+}
+
+func (i *idxfileEntryIter) Next() (*Entry, error) {
+ for {
+ if i.firstLevel >= fanout {
+ return nil, io.EOF
+ }
+
+ if i.total >= int(i.idx.Fanout[i.firstLevel]) {
+ i.firstLevel++
+ i.secondLevel = 0
+ continue
+ }
+
+ entry := new(Entry)
+ ofs := i.secondLevel * objectIDLength
+ copy(entry.Hash[:], i.idx.Names[i.idx.FanoutMapping[i.firstLevel]][ofs:])
+
+ pos := i.idx.FanoutMapping[entry.Hash[0]]
+
+ offset, err := i.idx.getOffset(pos, i.secondLevel)
+ if err != nil {
+ return nil, err
+ }
+ entry.Offset = uint64(offset)
+
+ entry.CRC32, err = i.idx.getCrc32(pos, i.secondLevel)
+ if err != nil {
+ return nil, err
+ }
+
+ i.secondLevel++
+ i.total++
+
+ return entry, nil
}
+}
- return fanout
+func (i *idxfileEntryIter) Close() error {
+ i.firstLevel = fanout
+ return nil
+}
+
+// Entry is the in memory representation of an object entry in the idx file.
+type Entry struct {
+ Hash plumbing.Hash
+ CRC32 uint32
+ Offset uint64
}