From da5677f5ba3970d585d5955b15a6a1c3c262c07b Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 19 Jul 2018 17:05:45 +0200 Subject: plumbing/packfile: add new packfile parser Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 359 ++++++++++++++++++++++++++++++++ plumbing/format/packfile/parser_test.go | 139 +++++++++++++ 2 files changed, 498 insertions(+) create mode 100644 plumbing/format/packfile/parser.go create mode 100644 plumbing/format/packfile/parser_test.go diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go new file mode 100644 index 0000000..460fc3f --- /dev/null +++ b/plumbing/format/packfile/parser.go @@ -0,0 +1,359 @@ +package packfile + +import ( + "bytes" + "errors" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" +) + +// Observer interface is implemented by index encoders. +type Observer interface { + // OnHeader is called when a new packfile is opened. + OnHeader(count uint32) error + // OnInflatedObjectHeader is called for each object header read. + OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error + // OnInflatedObjectContent is called for each decoded object. + OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error + // OnFooter is called when decoding is done. + OnFooter(h plumbing.Hash) error +} + +// Parser decodes a packfile and calls any observer associated to it. Is used +// to generate indexes. +type Parser struct { + scanner *Scanner + count uint32 + oi []*objectInfo + oiByHash map[plumbing.Hash]*objectInfo + oiByOffset map[int64]*objectInfo + hashOffset map[plumbing.Hash]int64 + checksum plumbing.Hash + + cache *cache.ObjectLRU + + ob []Observer +} + +// NewParser creates a new Parser struct. +func NewParser(scanner *Scanner, ob ...Observer) *Parser { + return &Parser{ + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewObjectLRUDefault(), + } +} + +// Parse start decoding phase of the packfile. +func (p *Parser) Parse() (plumbing.Hash, error) { + err := p.init() + if err != nil { + return plumbing.ZeroHash, err + } + + err = p.firstPass() + if err != nil { + return plumbing.ZeroHash, err + } + + err = p.resolveDeltas() + if err != nil { + return plumbing.ZeroHash, err + } + + for _, o := range p.ob { + err := o.OnFooter(p.checksum) + if err != nil { + return plumbing.ZeroHash, err + } + } + + return p.checksum, nil +} + +func (p *Parser) init() error { + _, c, err := p.scanner.Header() + if err != nil { + return err + } + + for _, o := range p.ob { + err := o.OnHeader(c) + if err != nil { + return err + } + } + + p.count = c + p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count) + p.oiByOffset = make(map[int64]*objectInfo, p.count) + p.oi = make([]*objectInfo, p.count) + + return nil +} + +func (p *Parser) firstPass() error { + buf := new(bytes.Buffer) + + for i := uint32(0); i < p.count; i++ { + buf.Truncate(0) + + oh, err := p.scanner.NextObjectHeader() + if err != nil { + return err + } + + delta := false + var ota *objectInfo + switch t := oh.Type; t { + case plumbing.OFSDeltaObject, plumbing.REFDeltaObject: + delta = true + + var parent *objectInfo + var ok bool + + if t == plumbing.OFSDeltaObject { + parent, ok = p.oiByOffset[oh.OffsetReference] + } else { + parent, ok = p.oiByHash[oh.Reference] + } + + if !ok { + // TODO improve error + return errors.New("Reference delta not found") + } + + ota = newDeltaObject(oh.Offset, oh.Length, t, parent) + + parent.Children = append(parent.Children, ota) + default: + ota = newBaseObject(oh.Offset, oh.Length, t) + } + + size, crc, err := p.scanner.NextObject(buf) + if err != nil { + return err + } + + ota.Crc32 = crc + ota.PackSize = size + ota.Length = oh.Length + + if !delta { + ota.Write(buf.Bytes()) + ota.SHA1 = ota.Sum() + } + + p.oiByOffset[oh.Offset] = ota + p.oiByHash[oh.Reference] = ota + + p.oi[i] = ota + } + + checksum, err := p.scanner.Checksum() + p.checksum = checksum + + if err == io.EOF { + return nil + } + + return err +} + +func (p *Parser) resolveDeltas() error { + for _, obj := range p.oi { + for _, o := range p.ob { + err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset) + if err != nil { + return err + } + + err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32) + if err != nil { + return err + } + } + + if !obj.IsDelta() && len(obj.Children) > 0 { + var err error + base, err := p.get(obj) + if err != nil { + return err + } + + for _, child := range obj.Children { + _, err = p.resolveObject(child, base) + if err != nil { + return err + } + } + } + } + + return nil +} + +func (p *Parser) get(o *objectInfo) ([]byte, error) { + e, ok := p.cache.Get(o.SHA1) + if ok { + r, err := e.Reader() + if err != nil { + return nil, err + } + + buf := make([]byte, e.Size()) + _, err = r.Read(buf) + if err != nil { + return nil, err + } + + return buf, nil + } + + // Read from disk + if o.DiskType.IsDelta() { + base, err := p.get(o.Parent) + if err != nil { + return nil, err + } + + data, err := p.resolveObject(o, base) + if err != nil { + return nil, err + } + + if len(o.Children) > 0 { + m := &plumbing.MemoryObject{} + m.Write(data) + m.SetType(o.Type) + m.SetSize(o.Size()) + p.cache.Put(m) + } + + return data, nil + } + + data, err := p.readData(o) + if err != nil { + return nil, err + } + + if len(o.Children) > 0 { + m := &plumbing.MemoryObject{} + m.Write(data) + m.SetType(o.Type) + m.SetSize(o.Size()) + p.cache.Put(m) + } + + return data, nil +} + +func (p *Parser) resolveObject( + o *objectInfo, + base []byte) ([]byte, error) { + + if !o.DiskType.IsDelta() { + return nil, nil + } + + data, err := p.readData(o) + if err != nil { + return nil, err + } + + data, err = applyPatchBase(o, data, base) + if err != nil { + return nil, err + } + + return data, nil +} + +func (p *Parser) readData(o *objectInfo) ([]byte, error) { + buf := new(bytes.Buffer) + + // TODO: skip header. Header size can be calculated with the offset of the + // next offset in the first pass. + p.scanner.SeekFromStart(o.Offset) + _, err := p.scanner.NextObjectHeader() + if err != nil { + return nil, err + } + + buf.Truncate(0) + + _, _, err = p.scanner.NextObject(buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { + patched, err := PatchDelta(base, data) + if err != nil { + return nil, err + } + + ota.Type = ota.Parent.Type + hash := plumbing.ComputeHash(ota.Type, patched) + + ota.SHA1 = hash + + return patched, nil +} + +type objectInfo struct { + plumbing.Hasher + + Offset int64 + Length int64 + PackSize int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType + + Crc32 uint32 + + Parent *objectInfo + Children []*objectInfo + SHA1 plumbing.Hash +} + +func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { + return newDeltaObject(offset, length, t, nil) +} + +func newDeltaObject( + offset, length int64, + t plumbing.ObjectType, + parent *objectInfo, +) *objectInfo { + children := make([]*objectInfo, 0) + + obj := &objectInfo{ + Hasher: plumbing.NewHasher(t, length), + Offset: offset, + Length: length, + PackSize: 0, + Type: t, + DiskType: t, + Crc32: 0, + Parent: parent, + Children: children, + } + + return obj +} + +func (o *objectInfo) IsDelta() bool { + return o.Type.IsDelta() +} + +func (o *objectInfo) Size() int64 { + return o.Length +} diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go new file mode 100644 index 0000000..87a8804 --- /dev/null +++ b/plumbing/format/packfile/parser_test.go @@ -0,0 +1,139 @@ +package packfile_test + +import ( + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" +) + +type ParserSuite struct { + fixtures.Suite +} + +var _ = Suite(&ParserSuite{}) + +func (s *ParserSuite) TestParserHashes(c *C) { + f := fixtures.Basic().One() + scanner := packfile.NewScanner(f.Packfile()) + + obs := new(testObserver) + parser := packfile.NewParser(scanner, obs) + + ch, err := parser.Parse() + c.Assert(err, IsNil) + + checksum := "a3fed42da1e8189a077c0e6846c040dcf73fc9dd" + c.Assert(ch.String(), Equals, checksum) + + c.Assert(obs.checksum, Equals, checksum) + c.Assert(int(obs.count), Equals, int(31)) + + commit := plumbing.CommitObject + blob := plumbing.BlobObject + tree := plumbing.TreeObject + + objs := []observerObject{ + {"e8d3ffab552895c19b9fcf7aa264d277cde33881", commit, 254, 12, 0xaa07ba4b}, + {"6ecf0ef2c2dffb796033e5a02219af86ec6584e5", commit, 93, 186, 0xf706df58}, + {"918c48b83bd081e863dbe1b80f8998f058cd8294", commit, 242, 286, 0x12438846}, + {"af2d6a6954d532f8ffb47615169c8fdf9d383a1a", commit, 242, 449, 0x2905a38c}, + {"1669dce138d9b841a518c64b10914d88f5e488ea", commit, 333, 615, 0xd9429436}, + {"a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", commit, 332, 838, 0xbecfde4e}, + {"35e85108805c84807bc66a02d91535e1e24b38b9", commit, 244, 1063, 0x780e4b3e}, + {"b8e471f58bcbca63b07bda20e428190409c2db47", commit, 243, 1230, 0xdc18344f}, + {"b029517f6300c2da0f4b651b8642506cd6aaf45d", commit, 187, 1392, 0xcf4e4280}, + {"32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", blob, 189, 1524, 0x1f08118a}, + {"d3ff53e0564a9f87d8e84b6e28e5060e517008aa", blob, 18, 1685, 0xafded7b8}, + {"c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", blob, 1072, 1713, 0xcc1428ed}, + {"d5c0f4ab811897cadf03aec358ae60d21f91c50d", blob, 76110, 2351, 0x1631d22f}, + {"880cd14280f4b9b6ed3986d6671f907d7cc2a198", blob, 2780, 78050, 0xbfff5850}, + {"49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", blob, 217848, 78882, 0xd108e1d8}, + {"c8f1d8c61f9da76f4cb49fd86322b6e685dba956", blob, 706, 80725, 0x8e97ba25}, + {"9a48f23120e880dfbe41f7c9b7b708e9ee62a492", blob, 11488, 80998, 0x7316ff70}, + {"9dea2395f5403188298c1dabe8bdafe562c491e3", blob, 78, 84032, 0xdb4fce56}, + {"dbd3641b371024f44d0e469a9c8f5457b0660de1", tree, 272, 84115, 0x901cce2c}, + {"a8d315b2b1c615d43042c3a62402b8a54288cf5c", tree, 43, 84375, 0xec4552b0}, + {"a39771a7651f97faf5c72e08224d857fc35133db", tree, 38, 84430, 0x847905bf}, + {"5a877e6a906a2743ad6e45d99c1793642aaf8eda", tree, 75, 84479, 0x3689459a}, + {"586af567d0bb5e771e49bdd9434f5e0fb76d25fa", tree, 38, 84559, 0xe67af94a}, + {"cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", tree, 34, 84608, 0xc2314a2e}, + {"7e59600739c96546163833214c36459e324bad0a", blob, 9, 84653, 0xcd987848}, + {"fb72698cab7617ac416264415f13224dfd7a165e", tree, 6, 84671, 0x8a853a6d}, + {"4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", tree, 9, 84688, 0x70c6518}, + {"eba74343e2f15d62adedfd8c883ee0262b5c8021", tree, 6, 84708, 0x4f4108e2}, + {"c2d30fa8ef288618f65f6eed6e168e0d514886f4", tree, 5, 84725, 0xd6fe09e9}, + {"8dcef98b1d52143e1e2dbc458ffe38f925786bf2", tree, 8, 84741, 0xf07a2804}, + {"aa9b383c260e1d05fbbf6b30a02914555e20c725", tree, 4, 84760, 0x1d75d6be}, + } + + c.Assert(obs.objects, DeepEquals, objs) +} + +type observerObject struct { + hash string + otype plumbing.ObjectType + size int64 + offset int64 + crc uint32 +} + +type testObserver struct { + count uint32 + checksum string + objects []observerObject + pos map[int64]int +} + +func (t *testObserver) OnHeader(count uint32) error { + t.count = count + t.pos = make(map[int64]int, count) + return nil +} + +func (t *testObserver) OnInflatedObjectHeader(otype plumbing.ObjectType, objSize int64, pos int64) error { + o := t.get(pos) + o.otype = otype + o.size = objSize + o.offset = pos + + t.put(pos, o) + + return nil +} + +func (t *testObserver) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { + o := t.get(pos) + o.hash = h.String() + o.crc = crc + + t.put(pos, o) + + return nil +} + +func (t *testObserver) OnFooter(h plumbing.Hash) error { + t.checksum = h.String() + return nil +} + +func (t *testObserver) get(pos int64) observerObject { + i, ok := t.pos[pos] + if ok { + return t.objects[i] + } + + return observerObject{} +} + +func (t *testObserver) put(pos int64, o observerObject) { + i, ok := t.pos[pos] + if ok { + t.objects[i] = o + return + } + + t.pos[pos] = len(t.objects) + t.objects = append(t.objects, o) +} -- cgit From ce91d71f96097ede2bb77d2af444aee6fff73183 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 19 Jul 2018 23:25:14 +0200 Subject: plumbing/packfile: disable lookup by offset In one case it disables the cache and the other disables lookup when the scanner is not seekable. Could be added back later. Signed-off-by: Javi Fontan --- plumbing/format/packfile/decoder.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 765401f..9bfd69b 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -403,12 +403,13 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i return 0, err } - e, ok := d.idx.LookupOffset(uint64(offset)) - var base plumbing.EncodedObject - if ok { - base, ok = d.cacheGet(e.Hash) - } + // e, ok := d.idx.LookupOffset(uint64(offset)) + // if ok { + // base, ok = d.cacheGet(e.Hash) + // } + var base plumbing.EncodedObject + ok := false if !ok { base, err = d.recallByOffset(offset) if err != nil { @@ -446,9 +447,9 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { return d.DecodeObjectAt(o) } - if e, ok := d.idx.LookupOffset(uint64(o)); ok { - return d.recallByHashNonSeekable(e.Hash) - } + // if e, ok := d.idx.LookupOffset(uint64(o)); ok { + // return d.recallByHashNonSeekable(e.Hash) + // } return nil, plumbing.ErrObjectNotFound } -- cgit From 355cfc3df3a64d1bd438e0e17e1c4ba21350badf Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 19 Jul 2018 23:27:16 +0200 Subject: plumbing: idxfile, add idxfile.Writer with Observer interface It's still not complete: * 64 bit offsets * IdxChecksum Signed-off-by: Javi Fontan --- plumbing/format/idxfile/writer.go | 132 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 plumbing/format/idxfile/writer.go diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go new file mode 100644 index 0000000..aac68b5 --- /dev/null +++ b/plumbing/format/idxfile/writer.go @@ -0,0 +1,132 @@ +package idxfile + +import ( + "bytes" + "math" + "sort" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +type object struct { + hash plumbing.Hash + offset int64 + crc uint32 +} + +type objects []object + +// Writer implements a packfile Observer interface and is used to generate +// indexes. +type Writer struct { + count uint32 + checksum plumbing.Hash + objects objects +} + +// Create index returns a filled MemoryIndex with the information filled by +// the observer callbacks. +func (w *Writer) CreateIndex() (*MemoryIndex, error) { + idx := new(MemoryIndex) + sort.Sort(w.objects) + + // unmap all fans by default + for i := range idx.FanoutMapping { + idx.FanoutMapping[i] = noMapping + } + + buf := new(bytes.Buffer) + + last := -1 + bucket := -1 + for i, o := range w.objects { + fan := o.hash[0] + + // fill the gaps between fans + for j := last + 1; j < int(fan); j++ { + idx.Fanout[j] = uint32(i) + } + + // update the number of objects for this position + idx.Fanout[fan] = uint32(i + 1) + + // we move from one bucket to another, update counters and allocate + // memory + if last != int(fan) { + bucket++ + idx.FanoutMapping[fan] = bucket + last = int(fan) + + idx.Names = append(idx.Names, make([]byte, 0)) + idx.Offset32 = append(idx.Offset32, make([]byte, 0)) + idx.Crc32 = append(idx.Crc32, make([]byte, 0)) + } + + idx.Names[bucket] = append(idx.Names[bucket], o.hash[:]...) + + // TODO: implement 64 bit offsets + if o.offset > math.MaxInt32 { + panic("64 bit offsets not implemented") + } + + buf.Truncate(0) + binary.WriteUint32(buf, uint32(o.offset)) + idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...) + + buf.Truncate(0) + binary.WriteUint32(buf, uint32(o.crc)) + idx.Crc32[bucket] = append(idx.Crc32[bucket], buf.Bytes()...) + } + + for j := last + 1; j < 256; j++ { + idx.Fanout[j] = uint32(len(w.objects)) + } + + idx.PackfileChecksum = w.checksum + // TODO: fill IdxChecksum + + return idx, nil +} + +// Add appends new object data. +func (w *Writer) Add(h plumbing.Hash, pos int64, crc uint32) { + w.objects = append(w.objects, object{h, pos, crc}) +} + +// OnHeader implements packfile.Observer interface. +func (w *Writer) OnHeader(count uint32) error { + w.count = count + w.objects = make(objects, 0, count) + return nil +} + +// OnInflatedObjectHeader implements packfile.Observer interface. +func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error { + return nil +} + +// OnInflatedObjectContent implements packfile.Observer interface. +func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { + w.Add(h, pos, crc) + return nil +} + +// OnFooter implements packfile.Observer interface. +func (w *Writer) OnFooter(h plumbing.Hash) error { + w.checksum = h + return nil +} + +func (o objects) Len() int { + return len(o) +} + +func (o objects) Less(i int, j int) bool { + cmp := bytes.Compare(o[i].hash[:], o[j].hash[:]) + return cmp < 0 +} + +func (o objects) Swap(i int, j int) { + o[i], o[j] = o[j], o[i] +} -- cgit From 4e3765aef344eae2fbcd977fefd66b6571638d59 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Fri, 20 Jul 2018 12:22:55 +0200 Subject: plumbing/idxfile: use Entry to hold object data Signed-off-by: Javi Fontan --- plumbing/format/idxfile/writer.go | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go index aac68b5..3c5a00e 100644 --- a/plumbing/format/idxfile/writer.go +++ b/plumbing/format/idxfile/writer.go @@ -9,13 +9,8 @@ import ( "gopkg.in/src-d/go-git.v4/utils/binary" ) -type object struct { - hash plumbing.Hash - offset int64 - crc uint32 -} - -type objects []object +// objects implements sort.Interface and uses hash as sorting key. +type objects []Entry // Writer implements a packfile Observer interface and is used to generate // indexes. @@ -41,7 +36,7 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { last := -1 bucket := -1 for i, o := range w.objects { - fan := o.hash[0] + fan := o.Hash[0] // fill the gaps between fans for j := last + 1; j < int(fan); j++ { @@ -63,19 +58,19 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { idx.Crc32 = append(idx.Crc32, make([]byte, 0)) } - idx.Names[bucket] = append(idx.Names[bucket], o.hash[:]...) + idx.Names[bucket] = append(idx.Names[bucket], o.Hash[:]...) // TODO: implement 64 bit offsets - if o.offset > math.MaxInt32 { + if o.Offset > math.MaxInt32 { panic("64 bit offsets not implemented") } buf.Truncate(0) - binary.WriteUint32(buf, uint32(o.offset)) + binary.WriteUint32(buf, uint32(o.Offset)) idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...) buf.Truncate(0) - binary.WriteUint32(buf, uint32(o.crc)) + binary.WriteUint32(buf, uint32(o.CRC32)) idx.Crc32[bucket] = append(idx.Crc32[bucket], buf.Bytes()...) } @@ -90,8 +85,8 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { } // Add appends new object data. -func (w *Writer) Add(h plumbing.Hash, pos int64, crc uint32) { - w.objects = append(w.objects, object{h, pos, crc}) +func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) { + w.objects = append(w.objects, Entry{h, crc, pos}) } // OnHeader implements packfile.Observer interface. @@ -108,7 +103,7 @@ func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, po // OnInflatedObjectContent implements packfile.Observer interface. func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { - w.Add(h, pos, crc) + w.Add(h, uint64(pos), crc) return nil } @@ -123,7 +118,7 @@ func (o objects) Len() int { } func (o objects) Less(i int, j int) bool { - cmp := bytes.Compare(o[i].hash[:], o[j].hash[:]) + cmp := bytes.Compare(o[i].Hash[:], o[j].Hash[:]) return cmp < 0 } -- cgit From 65e8359db00ae79838d19e19f69594f6a262c3d4 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Fri, 20 Jul 2018 13:01:27 +0200 Subject: plumbing/idxfile: support offset64 generating indexes Signed-off-by: Javi Fontan --- plumbing/format/idxfile/writer.go | 25 +++++++++++++++---- plumbing/format/idxfile/writer_test.go | 45 ++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 plumbing/format/idxfile/writer_test.go diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go index 3c5a00e..ea54081 100644 --- a/plumbing/format/idxfile/writer.go +++ b/plumbing/format/idxfile/writer.go @@ -18,12 +18,16 @@ type Writer struct { count uint32 checksum plumbing.Hash objects objects + offset64 uint32 + idx *MemoryIndex } // Create index returns a filled MemoryIndex with the information filled by // the observer callbacks. func (w *Writer) CreateIndex() (*MemoryIndex, error) { idx := new(MemoryIndex) + w.idx = idx + sort.Sort(w.objects) // unmap all fans by default @@ -60,13 +64,13 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { idx.Names[bucket] = append(idx.Names[bucket], o.Hash[:]...) - // TODO: implement 64 bit offsets - if o.Offset > math.MaxInt32 { - panic("64 bit offsets not implemented") + offset := o.Offset + if offset > math.MaxInt32 { + offset = w.addOffset64(offset) } buf.Truncate(0) - binary.WriteUint32(buf, uint32(o.Offset)) + binary.WriteUint32(buf, uint32(offset)) idx.Offset32[bucket] = append(idx.Offset32[bucket], buf.Bytes()...) buf.Truncate(0) @@ -78,12 +82,23 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { idx.Fanout[j] = uint32(len(w.objects)) } + idx.Version = VersionSupported idx.PackfileChecksum = w.checksum - // TODO: fill IdxChecksum return idx, nil } +func (w *Writer) addOffset64(pos uint64) uint64 { + buf := new(bytes.Buffer) + binary.WriteUint64(buf, pos) + w.idx.Offset64 = append(w.idx.Offset64, buf.Bytes()...) + + index := uint64(w.offset64 | (1 << 31)) + w.offset64++ + + return index +} + // Add appends new object data. func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) { w.objects = append(w.objects, Entry{h, crc, pos}) diff --git a/plumbing/format/idxfile/writer_test.go b/plumbing/format/idxfile/writer_test.go new file mode 100644 index 0000000..92d2046 --- /dev/null +++ b/plumbing/format/idxfile/writer_test.go @@ -0,0 +1,45 @@ +package idxfile_test + +import ( + "bytes" + "io/ioutil" + + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" +) + +type IndexSuite struct { + fixtures.Suite +} + +var _ = Suite(&IndexSuite{}) + +func (s *IndexSuite) TestIndexWriter(c *C) { + f := fixtures.Basic().One() + scanner := packfile.NewScanner(f.Packfile()) + + obs := new(idxfile.Writer) + parser := packfile.NewParser(scanner, obs) + + _, err := parser.Parse() + c.Assert(err, IsNil) + + idx, err := obs.CreateIndex() + c.Assert(err, IsNil) + + idxFile := f.Idx() + expected, err := ioutil.ReadAll(idxFile) + c.Assert(err, IsNil) + idxFile.Close() + + buf := new(bytes.Buffer) + encoder := idxfile.NewEncoder(buf) + n, err := encoder.Encode(idx) + c.Assert(err, IsNil) + c.Assert(n, Equals, len(expected)) + + c.Assert(buf.Bytes(), DeepEquals, expected) +} -- cgit From a716126aa7f9b77030d2e697db24d206d944f05d Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 24 Jul 2018 17:36:21 +0200 Subject: plumbing/packfile: preallocate memory in PatchDelta Signed-off-by: Javi Fontan --- plumbing/format/packfile/patch_delta.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go index c604851..a972f1c 100644 --- a/plumbing/format/packfile/patch_delta.go +++ b/plumbing/format/packfile/patch_delta.go @@ -63,8 +63,8 @@ func PatchDelta(src, delta []byte) ([]byte, error) { targetSz, delta := decodeLEB128(delta) remainingTargetSz := targetSz - var dest []byte var cmd byte + dest := make([]byte, 0, targetSz) for { if len(delta) == 0 { return nil, ErrInvalidDelta -- cgit From 7418b411660aaa3d8d54eb602fda8accaed2833f Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 26 Jul 2018 12:24:26 +0200 Subject: plumbing/idxfile: fix bug searching in MemoryIndex Signed-off-by: Javi Fontan --- plumbing/format/idxfile/idxfile.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index b196608..adeba44 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -72,7 +72,7 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { low := uint64(0) for { mid := (low + high) >> 1 - offset := mid + (mid << 2) + offset := mid * objectIDLength cmp := bytes.Compare(h[:], data[offset:offset+objectIDLength]) if cmp < 0 { @@ -83,7 +83,7 @@ func (idx *MemoryIndex) findHashIndex(h plumbing.Hash) int { low = mid + 1 } - if low < high { + if low > high { break } } -- cgit From 4ddd6783cf9707f8b72ebb00e5bb4705f5fd436a Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 26 Jul 2018 12:27:53 +0200 Subject: plumbing/idxfile: add offset/hash mapping to index This functionality may be moved elsewhere in the future but is needed now to fit filesystem.ObjectStorage and the new index. Signed-off-by: Javi Fontan --- plumbing/format/idxfile/idxfile.go | 51 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/plumbing/format/idxfile/idxfile.go b/plumbing/format/idxfile/idxfile.go index adeba44..f8debb1 100644 --- a/plumbing/format/idxfile/idxfile.go +++ b/plumbing/format/idxfile/idxfile.go @@ -28,6 +28,8 @@ type Index interface { FindOffset(h plumbing.Hash) (int64, error) // FindCRC32 finds the CRC32 of the object with the given hash. FindCRC32(h plumbing.Hash) (uint32, error) + // FindHash finds the hash for the object with the given offset. + FindHash(o int64) (plumbing.Hash, error) // Count returns the number of entries in the index. Count() (int64, error) // Entries returns an iterator to retrieve all index entries. @@ -48,6 +50,8 @@ type MemoryIndex struct { Offset64 []byte PackfileChecksum [20]byte IdxChecksum [20]byte + + offsetHash map[int64]plumbing.Hash } var _ Index = (*MemoryIndex)(nil) @@ -149,6 +153,53 @@ func (idx *MemoryIndex) getCrc32(firstLevel, secondLevel int) (uint32, error) { return binary.ReadUint32(buf) } +// FindHash implements the Index interface. +func (idx *MemoryIndex) FindHash(o int64) (plumbing.Hash, error) { + // Lazily generate the reverse offset/hash map if required. + if idx.offsetHash == nil { + err := idx.genOffsetHash() + if err != nil { + return plumbing.ZeroHash, nil + } + } + + hash, ok := idx.offsetHash[o] + if !ok { + return plumbing.ZeroHash, plumbing.ErrObjectNotFound + } + + return hash, nil +} + +// genOffsetHash generates the offset/hash mapping for reverse search. +func (idx *MemoryIndex) genOffsetHash() error { + count, err := idx.Count() + if err != nil { + return err + } + + idx.offsetHash = make(map[int64]plumbing.Hash, count) + + iter, err := idx.Entries() + if err != nil { + return err + } + + var entry *Entry + for err != nil { + entry, err = iter.Next() + if err == nil { + idx.offsetHash[int64(entry.Offset)] = entry.Hash + } + } + + if err == io.EOF { + return nil + } + + return err +} + // Count implements the Index interface. func (idx *MemoryIndex) Count() (int64, error) { return int64(idx.Fanout[fanout-1]), nil -- cgit From 74f56f388bbe8072bfcd976add2373f9a7e20341 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 26 Jul 2018 13:14:02 +0200 Subject: plumbing/idxfile: index is created only once and retrieved with Index Index is also automatically generated when OnFooter is called. Signed-off-by: Javi Fontan --- plumbing/format/idxfile/writer.go | 103 ++++++++++++++++++++++----------- plumbing/format/idxfile/writer_test.go | 2 +- 2 files changed, 70 insertions(+), 35 deletions(-) diff --git a/plumbing/format/idxfile/writer.go b/plumbing/format/idxfile/writer.go index ea54081..efcdcc6 100644 --- a/plumbing/format/idxfile/writer.go +++ b/plumbing/format/idxfile/writer.go @@ -2,8 +2,10 @@ package idxfile import ( "bytes" + "fmt" "math" "sort" + "sync" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/utils/binary" @@ -15,18 +17,80 @@ type objects []Entry // Writer implements a packfile Observer interface and is used to generate // indexes. type Writer struct { + m sync.Mutex + count uint32 checksum plumbing.Hash objects objects offset64 uint32 - idx *MemoryIndex + finished bool + index *MemoryIndex +} + +// Index returns a previously created MemoryIndex or creates a new one if +// needed. +func (w *Writer) Index() (*MemoryIndex, error) { + w.m.Lock() + defer w.m.Unlock() + + if w.index == nil { + return w.createIndex() + } + + return w.index, nil +} + +// Add appends new object data. +func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) { + w.m.Lock() + defer w.m.Unlock() + + w.objects = append(w.objects, Entry{h, crc, pos}) +} + +func (w *Writer) Finished() bool { + return w.finished +} + +// OnHeader implements packfile.Observer interface. +func (w *Writer) OnHeader(count uint32) error { + w.count = count + w.objects = make(objects, 0, count) + return nil +} + +// OnInflatedObjectHeader implements packfile.Observer interface. +func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error { + return nil +} + +// OnInflatedObjectContent implements packfile.Observer interface. +func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { + w.Add(h, uint64(pos), crc) + return nil } -// Create index returns a filled MemoryIndex with the information filled by +// OnFooter implements packfile.Observer interface. +func (w *Writer) OnFooter(h plumbing.Hash) error { + w.checksum = h + w.finished = true + _, err := w.createIndex() + if err != nil { + return err + } + + return nil +} + +// creatIndex returns a filled MemoryIndex with the information filled by // the observer callbacks. -func (w *Writer) CreateIndex() (*MemoryIndex, error) { +func (w *Writer) createIndex() (*MemoryIndex, error) { + if !w.finished { + return nil, fmt.Errorf("the index still hasn't finished building") + } + idx := new(MemoryIndex) - w.idx = idx + w.index = idx sort.Sort(w.objects) @@ -91,7 +155,7 @@ func (w *Writer) CreateIndex() (*MemoryIndex, error) { func (w *Writer) addOffset64(pos uint64) uint64 { buf := new(bytes.Buffer) binary.WriteUint64(buf, pos) - w.idx.Offset64 = append(w.idx.Offset64, buf.Bytes()...) + w.index.Offset64 = append(w.index.Offset64, buf.Bytes()...) index := uint64(w.offset64 | (1 << 31)) w.offset64++ @@ -99,35 +163,6 @@ func (w *Writer) addOffset64(pos uint64) uint64 { return index } -// Add appends new object data. -func (w *Writer) Add(h plumbing.Hash, pos uint64, crc uint32) { - w.objects = append(w.objects, Entry{h, crc, pos}) -} - -// OnHeader implements packfile.Observer interface. -func (w *Writer) OnHeader(count uint32) error { - w.count = count - w.objects = make(objects, 0, count) - return nil -} - -// OnInflatedObjectHeader implements packfile.Observer interface. -func (w *Writer) OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error { - return nil -} - -// OnInflatedObjectContent implements packfile.Observer interface. -func (w *Writer) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { - w.Add(h, uint64(pos), crc) - return nil -} - -// OnFooter implements packfile.Observer interface. -func (w *Writer) OnFooter(h plumbing.Hash) error { - w.checksum = h - return nil -} - func (o objects) Len() int { return len(o) } diff --git a/plumbing/format/idxfile/writer_test.go b/plumbing/format/idxfile/writer_test.go index 92d2046..51273a3 100644 --- a/plumbing/format/idxfile/writer_test.go +++ b/plumbing/format/idxfile/writer_test.go @@ -27,7 +27,7 @@ func (s *IndexSuite) TestIndexWriter(c *C) { _, err := parser.Parse() c.Assert(err, IsNil) - idx, err := obs.CreateIndex() + idx, err := obs.Index() c.Assert(err, IsNil) idxFile := f.Idx() -- cgit From 79f249465b24104b73c9dc220d9098cecdab4d77 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 26 Jul 2018 13:42:51 +0200 Subject: plumbing, storage: integrate new index Now dotgit.PackWriter uses the new packfile.Parser and index. Signed-off-by: Javi Fontan --- plumbing/format/packfile/decoder.go | 9 +++--- plumbing/format/packfile/parser.go | 11 ++++---- storage/filesystem/dotgit/writers.go | 33 ++++++++++------------ storage/filesystem/dotgit/writers_test.go | 3 +- storage/filesystem/object.go | 46 +++++++++++++++++++------------ 5 files changed, 57 insertions(+), 45 deletions(-) diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 9bfd69b..69aef2d 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -447,11 +447,12 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { return d.DecodeObjectAt(o) } - // if e, ok := d.idx.LookupOffset(uint64(o)); ok { - // return d.recallByHashNonSeekable(e.Hash) - // } + hash, err := d.idx.FindHash(o) + if err != nil { + return nil, err + } - return nil, plumbing.ErrObjectNotFound + return d.recallByHashNonSeekable(hash) } func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 460fc3f..696f5ba 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -311,11 +311,12 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { type objectInfo struct { plumbing.Hasher - Offset int64 - Length int64 - PackSize int64 - Type plumbing.ObjectType - DiskType plumbing.ObjectType + Offset int64 + Length int64 + HeaderLength int64 + PackSize int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType Crc32 uint32 diff --git a/storage/filesystem/dotgit/writers.go b/storage/filesystem/dotgit/writers.go index c2b420f..e1ede3c 100644 --- a/storage/filesystem/dotgit/writers.go +++ b/storage/filesystem/dotgit/writers.go @@ -20,13 +20,14 @@ import ( // is renamed/moved (depends on the Filesystem implementation) to the final // location, if the PackWriter is not used, nothing is written type PackWriter struct { - Notify func(plumbing.Hash, *packfile.Index) + Notify func(plumbing.Hash, *idxfile.Writer) fs billy.Filesystem fr, fw billy.File synced *syncedReader checksum plumbing.Hash - index *packfile.Index + parser *packfile.Parser + writer *idxfile.Writer result chan error } @@ -55,20 +56,16 @@ func newPackWrite(fs billy.Filesystem) (*PackWriter, error) { func (w *PackWriter) buildIndex() { s := packfile.NewScanner(w.synced) - d, err := packfile.NewDecoder(s, nil) - if err != nil { - w.result <- err - return - } + w.writer = new(idxfile.Writer) + w.parser = packfile.NewParser(s, w.writer) - checksum, err := d.Decode() + checksum, err := w.parser.Parse() if err != nil { w.result <- err return } w.checksum = checksum - w.index = d.Index() w.result <- err } @@ -92,8 +89,8 @@ func (w *PackWriter) Write(p []byte) (int, error) { // was written, the tempfiles are deleted without writing a packfile. func (w *PackWriter) Close() error { defer func() { - if w.Notify != nil && w.index != nil && w.index.Size() > 0 { - w.Notify(w.checksum, w.index) + if w.Notify != nil && w.writer != nil && w.writer.Finished() { + w.Notify(w.checksum, w.writer) } close(w.result) @@ -115,7 +112,7 @@ func (w *PackWriter) Close() error { return err } - if w.index == nil || w.index.Size() == 0 { + if w.writer == nil || !w.writer.Finished() { return w.clean() } @@ -145,11 +142,13 @@ func (w *PackWriter) save() error { } func (w *PackWriter) encodeIdx(writer io.Writer) error { - idx := w.index.ToIdxFile() - idx.PackfileChecksum = w.checksum - idx.Version = idxfile.VersionSupported + idx, err := w.writer.Index() + if err != nil { + return err + } + e := idxfile.NewEncoder(writer) - _, err := e.Encode(idx) + _, err = e.Encode(idx) return err } @@ -209,7 +208,6 @@ func (s *syncedReader) isBlocked() bool { func (s *syncedReader) wake() { if s.isBlocked() { - // fmt.Println("wake") atomic.StoreUint32(&s.blocked, 0) s.news <- true } @@ -220,7 +218,6 @@ func (s *syncedReader) sleep() { written := atomic.LoadUint64(&s.written) if read >= written { atomic.StoreUint32(&s.blocked, 1) - // fmt.Println("sleep", read, written) <-s.news } diff --git a/storage/filesystem/dotgit/writers_test.go b/storage/filesystem/dotgit/writers_test.go index bf00762..5a5f7b4 100644 --- a/storage/filesystem/dotgit/writers_test.go +++ b/storage/filesystem/dotgit/writers_test.go @@ -9,6 +9,7 @@ import ( "strconv" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" . "gopkg.in/check.v1" @@ -148,7 +149,7 @@ func (s *SuiteDotGit) TestPackWriterUnusedNotify(c *C) { w, err := newPackWrite(fs) c.Assert(err, IsNil) - w.Notify = func(h plumbing.Hash, idx *packfile.Index) { + w.Notify = func(h plumbing.Hash, idx *idxfile.Writer) { c.Fatal("unexpected call to PackWriter.Notify") } diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go index ef67f50..b73b309 100644 --- a/storage/filesystem/object.go +++ b/storage/filesystem/object.go @@ -23,7 +23,7 @@ type ObjectStorage struct { deltaBaseCache cache.Object dir *dotgit.DotGit - index map[plumbing.Hash]*packfile.Index + index map[plumbing.Hash]idxfile.Index } // NewObjectStorage creates a new ObjectStorage with the given .git directory. @@ -41,7 +41,7 @@ func (s *ObjectStorage) requireIndex() error { return nil } - s.index = make(map[plumbing.Hash]*packfile.Index) + s.index = make(map[plumbing.Hash]idxfile.Index) packs, err := s.dir.ObjectPacks() if err != nil { return err @@ -69,7 +69,7 @@ func (s *ObjectStorage) loadIdxFile(h plumbing.Hash) (err error) { return err } - s.index[h] = packfile.NewIndexFromIdxFile(idxf) + s.index[h] = idxf return err } @@ -87,8 +87,11 @@ func (s *ObjectStorage) PackfileWriter() (io.WriteCloser, error) { return nil, err } - w.Notify = func(h plumbing.Hash, idx *packfile.Index) { - s.index[h] = idx + w.Notify = func(h plumbing.Hash, writer *idxfile.Writer) { + index, err := writer.Index() + if err == nil { + s.index[h] = index + } } return w, nil @@ -278,7 +281,7 @@ func (s *ObjectStorage) getFromPackfile(h plumbing.Hash, canBeDelta bool) ( func (s *ObjectStorage) decodeObjectAt( f billy.File, - idx *packfile.Index, + idx idxfile.Index, offset int64) (plumbing.EncodedObject, error) { if _, err := f.Seek(0, io.SeekStart); err != nil { return nil, err @@ -299,7 +302,7 @@ func (s *ObjectStorage) decodeObjectAt( func (s *ObjectStorage) decodeDeltaObjectAt( f billy.File, - idx *packfile.Index, + idx idxfile.Index, offset int64, hash plumbing.Hash) (plumbing.EncodedObject, error) { if _, err := f.Seek(0, io.SeekStart); err != nil { @@ -324,12 +327,10 @@ func (s *ObjectStorage) decodeDeltaObjectAt( case plumbing.REFDeltaObject: base = header.Reference case plumbing.OFSDeltaObject: - e, ok := idx.LookupOffset(uint64(header.OffsetReference)) - if !ok { - return nil, plumbing.ErrObjectNotFound + base, err = idx.FindHash(header.OffsetReference) + if err != nil { + return nil, err } - - base = e.Hash default: return s.decodeObjectAt(f, idx, offset) } @@ -350,8 +351,9 @@ func (s *ObjectStorage) decodeDeltaObjectAt( func (s *ObjectStorage) findObjectInPackfile(h plumbing.Hash) (plumbing.Hash, plumbing.Hash, int64) { for packfile, index := range s.index { - if e, ok := index.LookupHash(h); ok { - return packfile, e.Hash, int64(e.Offset) + offset, err := index.FindOffset(h) + if err == nil { + return packfile, h, offset } } @@ -460,12 +462,22 @@ type packfileIter struct { total uint32 } -func NewPackfileIter(f billy.File, t plumbing.ObjectType) (storer.EncodedObjectIter, error) { +// NewPackfileIter returns a new EncodedObjectIter for the provided packfile +// and object type. +func NewPackfileIter( + f billy.File, + t plumbing.ObjectType, +) (storer.EncodedObjectIter, error) { return newPackfileIter(f, t, make(map[plumbing.Hash]struct{}), nil, nil) } -func newPackfileIter(f billy.File, t plumbing.ObjectType, seen map[plumbing.Hash]struct{}, - index *packfile.Index, cache cache.Object) (storer.EncodedObjectIter, error) { +func newPackfileIter( + f billy.File, + t plumbing.ObjectType, + seen map[plumbing.Hash]struct{}, + index idxfile.Index, + cache cache.Object, +) (storer.EncodedObjectIter, error) { s := packfile.NewScanner(f) _, total, err := s.Header() if err != nil { -- cgit