diff options
author | Javi Fontan <jfontan@gmail.com> | 2018-07-26 14:19:32 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-07-26 14:19:32 +0200 |
commit | a08061aa298ec4b92bbe470a1834465f25a0ad0d (patch) | |
tree | 8804820221e05096433b2e0cef5249e4a3e94c7b /plumbing/format/idxfile | |
parent | a8ff3e599b3ee998a8b8626cd9fe9fa68490d354 (diff) | |
parent | 79f249465b24104b73c9dc220d9098cecdab4d77 (diff) | |
download | go-git-a08061aa298ec4b92bbe470a1834465f25a0ad0d.tar.gz |
Merge pull request #898 from jfontan/feature/new-packfile-parser
Feature/new packfile parser
Diffstat (limited to 'plumbing/format/idxfile')
-rw-r--r-- | plumbing/format/idxfile/idxfile.go | 55 | ||||
-rw-r--r-- | plumbing/format/idxfile/writer.go | 177 | ||||
-rw-r--r-- | plumbing/format/idxfile/writer_test.go | 45 |
3 files changed, 275 insertions, 2 deletions
diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index b196608..f8debb1 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -28,6 +28,8 @@ type Index interface { FindOffset(h plumbing.Hash) (int64, error) // FindCRC32 finds the CRC32 of the object with the given hash. FindCRC32(h plumbing.Hash) (uint32, error) + // FindHash finds the hash for the object with the given offset. + FindHash(o int64) (plumbing.Hash, error) // Count returns the number of entries in the index. Count() (int64, error) // Entries returns an iterator to retrieve all index entries. @@ -48,6 +50,8 @@ type MemoryIndex struct { Offset64 []byte PackfileChecksum [20]byte IdxChecksum [20]byte + + offsetHash map[int64]plumbing.Hash } var _ Index = (*MemoryIndex)(nil) @@ -72,7 +76,7 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { low := uint64(0) for { mid := (low + high) >> 1 - offset := mid + (mid << 2) + offset := mid * objectIDLength cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength]) if cmp < 0 { @@ -83,7 +87,7 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { low = mid + 1 } - if low < high { + if low > high { break } } @@ -149,6 +153,53 @@ func (idx *MemoryIndex) getCrc32(firstLevel, secondLevel int) (uint32, error) { return binary.ReadUint32(buf) } +// FindHash implements the Index interface. +func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) { + // Lazily generate the reverse offset/hash map if required. + if idx.offsetHash == nil { + err := idx.genOffsetHash() + if err != nil { + return plumbing.ZeroHash, nil + } + } + + hash, ok := idx.offsetHash[o] + if !ok { + return plumbing.ZeroHash, plumbing.ErrObjectNotFound + } + + return hash, nil +} + +// genOffsetHash generates the offset/hash mapping for reverse search. +func (idx *MemoryIndex) genOffsetHash() error { + count, err := idx.Count() + if err != nil { + return err + } + + idx.offsetHash = make(map[int64]plumbing.Hash, count) + + iter, err := idx.Entries() + if err != nil { + return err + } + + var entry *Entry + for err != nil { + entry, err = iter.Next() + if err == nil { + idx.offsetHash[int64(entry.Offset)] = entry.Hash + } + } + + if err == io.EOF { + return nil + } + + return err +} + // Count implements the Index interface. func (idx *MemoryIndex) Count() (int64, error) { return int64(idx.Fanout[fanout-1]), nil diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go new file mode 100644 index 0000000..efcdcc6 --- /dev/null +++ b/plumbing/format/idxfile/writer.go @@ -0,0 +1,177 @@ +package idxfile + +import ( + "bytes" + "fmt" + "math" + "sort" + "sync" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +// objects implements sort.Interface and uses hash as sorting key. +type objects []Entry + +// Writer implements a packfile Observer interface and is used to generate +// indexes. +type Writer struct { + m sync.Mutex + + count uint32 + checksum plumbing.Hash + objects objects + offset64 uint32 + finished bool + index *MemoryIndex +} + +// Index returns a previously created MemoryIndex or creates a new one if +// needed. +func (w *Writer) Index() (*MemoryIndex, error) { + w.m.Lock() + defer w.m.Unlock() + + if w.index == nil { + return w.createIndex() + } + + return w.index, nil +} + +// Add appends new object data. +func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) { + w.m.Lock() + defer w.m.Unlock() + + w.objects = append(w.objects, Entry{h, crc, pos}) +} + +func (w *Writer) Finished() bool { + return w.finished +} + +// OnHeader implements packfile.Observer interface. +func (w *Writer) OnHeader(count uint32) error { + w.count = count + w.objects = make(objects, 0, count) + return nil +} + +// OnInflatedObjectHeader implements packfile.Observer interface. +func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error { + return nil +} + +// OnInflatedObjectContent implements packfile.Observer interface. +func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { + w.Add(h, uint64(pos), crc) + return nil +} + +// OnFooter implements packfile.Observer interface. +func (w *Writer) OnFooter(h plumbing.Hash) error { + w.checksum = h + w.finished = true + _, err := w.createIndex() + if err != nil { + return err + } + + return nil +} + +// creatIndex returns a filled MemoryIndex with the information filled by +// the observer callbacks. +func (w *Writer) createIndex() (*MemoryIndex, error) { + if !w.finished { + return nil, fmt.Errorf("the index still hasn't finished building") + } + + idx := new(MemoryIndex) + w.index = idx + + sort.Sort(w.objects) + + // unmap all fans by default + for i := range idx.FanoutMapping { + idx.FanoutMapping[i] = noMapping + } + + buf := new(bytes.Buffer) + + last := -1 + bucket := -1 + for i, o := range w.objects { + fan := o.Hash[0] + + // fill the gaps between fans + for j := last + 1; j < int(fan); j++ { + idx.Fanout[j] = uint32(i) + } + + // update the number of objects for this position + idx.Fanout[fan] = uint32(i + 1) + + // we move from one bucket to another, update counters and allocate + // memory + if last != int(fan) { + bucket++ + idx.FanoutMapping[fan] = bucket + last = int(fan) + + idx.Names = append(idx.Names, make([]byte, 0)) + idx.Offset32 = append(idx.Offset32, make([]byte, 0)) + idx.Crc32 = append(idx.Crc32, make([]byte, 0)) + } + + idx.Names[bucket] = append(idx.Names[bucket], o.Hash[:]...) + + offset := o.Offset + if offset > math.MaxInt32 { + offset = w.addOffset64(offset) + } + + buf.Truncate(0) + binary.WriteUint32(buf, uint32(offset)) + idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...) + + buf.Truncate(0) + binary.WriteUint32(buf, uint32(o.CRC32)) + idx.Crc32[bucket] = append(idx.Crc32[bucket], buf.Bytes()...) + } + + for j := last + 1; j < 256; j++ { + idx.Fanout[j] = uint32(len(w.objects)) + } + + idx.Version = VersionSupported + idx.PackfileChecksum = w.checksum + + return idx, nil +} + +func (w *Writer) addOffset64(pos uint64) uint64 { + buf := new(bytes.Buffer) + binary.WriteUint64(buf, pos) + w.index.Offset64 = append(w.index.Offset64, buf.Bytes()...) + + index := uint64(w.offset64 | (1 << 31)) + w.offset64++ + + return index +} + +func (o objects) Len() int { + return len(o) +} + +func (o objects) Less(i int, j int) bool { + cmp := bytes.Compare(o[i].Hash[:], o[j].Hash[:]) + return cmp < 0 +} + +func (o objects) Swap(i int, j int) { + o[i], o[j] = o[j], o[i] +} diff --git a/plumbing/format/idxfile/writer_test.go b/plumbing/format/idxfile/writer_test.go new file mode 100644 index 0000000..51273a3 --- /dev/null +++ b/plumbing/format/idxfile/writer_test.go @@ -0,0 +1,45 @@ +package idxfile_test + +import ( + "bytes" + "io/ioutil" + + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" +) + +type IndexSuite struct { + fixtures.Suite +} + +var _ = Suite(&IndexSuite{}) + +func (s *IndexSuite) TestIndexWriter(c *C) { + f := fixtures.Basic().One() + scanner := packfile.NewScanner(f.Packfile()) + + obs := new(idxfile.Writer) + parser := packfile.NewParser(scanner, obs) + + _, err := parser.Parse() + c.Assert(err, IsNil) + + idx, err := obs.Index() + c.Assert(err, IsNil) + + idxFile := f.Idx() + expected, err := ioutil.ReadAll(idxFile) + c.Assert(err, IsNil) + idxFile.Close() + + buf := new(bytes.Buffer) + encoder := idxfile.NewEncoder(buf) + n, err := encoder.Encode(idx) + c.Assert(err, IsNil) + c.Assert(n, Equals, len(expected)) + + c.Assert(buf.Bytes(), DeepEquals, expected) +} |