From cf532f99e3e7632bc1d813245a4c79ae38b4d320 Mon Sep 17 00:00:00 2001 From: David Symonds Date: Wed, 30 May 2018 11:06:44 +1000 Subject: packfile: improve Index memory representation to be more compact Instead of using a map for offset indexing, use a sorted slice. Binary searching is fast, and a slice is much more compact. This has a negligible hit on speed, but has a significant impact on memory usage, especially for larger repos. benchmark old ns/op new ns/op delta BenchmarkIndexConstruction-12 15506506 14056098 -9.35% benchmark old allocs new allocs delta BenchmarkIndexConstruction-12 60764 60385 -0.62% benchmark old bytes new bytes delta BenchmarkIndexConstruction-12 4318145 3913169 -9.38% Signed-off-by: David Symonds --- plumbing/format/packfile/index.go | 53 +++++++++++++++++++++++++++------- plumbing/format/packfile/index_test.go | 37 +++++++++++++++--------- 2 files changed, 67 insertions(+), 23 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go index 2c5f98f..7d8f2ad 100644 --- a/plumbing/format/packfile/index.go +++ b/plumbing/format/packfile/index.go @@ -1,6 +1,8 @@ package packfile import ( + "sort" + "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" ) @@ -10,7 +12,7 @@ import ( // or to store them. type Index struct { byHash map[plumbing.Hash]*idxfile.Entry - byOffset map[uint64]*idxfile.Entry + byOffset []*idxfile.Entry // sorted by their offset } // NewIndex creates a new empty index with the given size. Size is a hint and @@ -19,7 +21,7 @@ type Index struct { func NewIndex(size int) *Index { return &Index{ byHash: make(map[plumbing.Hash]*idxfile.Entry, size), - byOffset: make(map[uint64]*idxfile.Entry, size), + byOffset: make([]*idxfile.Entry, 0, size), } } @@ -27,28 +29,54 @@ func NewIndex(size int) *Index { func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index { idx := &Index{ byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount), - byOffset: make(map[uint64]*idxfile.Entry, idxf.ObjectCount), + byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount), } for _, e := range idxf.Entries { - idx.add(e) + idx.addUnsorted(e) } + sort.Sort(orderByOffset(idx.byOffset)) return idx } +// orderByOffset is a sort.Interface adapter that arranges +// a slice of entries by their offset. +type orderByOffset []*idxfile.Entry + +func (o orderByOffset) Len() int { return len(o) } +func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset } +func (o orderByOffset) Swap(i, j int) { o[i], o[j] = o[j], o[i] } + // Add adds a new Entry with the given values to the index. func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) { - e := idxfile.Entry{ + e := &idxfile.Entry{ Hash: h, Offset: offset, CRC32: crc32, } - idx.add(&e) + idx.byHash[e.Hash] = e + + // Find the right position in byOffset. + // Look for the first position whose offset is *greater* than e.Offset. + i := sort.Search(len(idx.byOffset), func(i int) bool { + return idx.byOffset[i].Offset > offset + }) + if i == len(idx.byOffset) { + // Simple case: add it to the end. + idx.byOffset = append(idx.byOffset, e) + return + } + // Harder case: shift existing entries down by one to make room. + // Append a nil entry first so we can use existing capacity in case + // the index was carefully preallocated. + idx.byOffset = append(idx.byOffset, nil) + copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1]) + idx.byOffset[i] = e } -func (idx *Index) add(e *idxfile.Entry) { +func (idx *Index) addUnsorted(e *idxfile.Entry) { idx.byHash[e.Hash] = e - idx.byOffset[e.Offset] = e + idx.byOffset = append(idx.byOffset, e) } // LookupHash looks an entry up by its hash. An idxfile.Entry is returned and @@ -61,8 +89,13 @@ func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) { // LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry // is returned and a bool, which is true if it was found or false if it wasn't. func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) { - e, ok := idx.byOffset[offset] - return e, ok + i := sort.Search(len(idx.byOffset), func(i int) bool { + return idx.byOffset[i].Offset >= offset + }) + if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset { + return nil, false // not present + } + return idx.byOffset[i], true } // Size returns the number of entries in the index. diff --git a/plumbing/format/packfile/index_test.go b/plumbing/format/packfile/index_test.go index 6714704..8de886d 100644 --- a/plumbing/format/packfile/index_test.go +++ b/plumbing/format/packfile/index_test.go @@ -3,6 +3,7 @@ package packfile import ( "strconv" "strings" + "testing" "gopkg.in/src-d/go-git.v4/plumbing" @@ -26,12 +27,12 @@ func (s *IndexSuite) TestLookupOffset(c *C) { e, ok := idx.LookupOffset(uint64(o2)) c.Assert(ok, Equals, true) c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) + c.Assert(e.Hash, Equals, toHash(o2)) c.Assert(e.Offset, Equals, uint64(o2)) } } - h1 := s.toHash(o1) + h1 := toHash(o1) idx.Add(h1, uint64(o1), 0) for o2 := 0; o2 < 10000; o2 += 100 { @@ -43,7 +44,7 @@ func (s *IndexSuite) TestLookupOffset(c *C) { e, ok := idx.LookupOffset(uint64(o2)) c.Assert(ok, Equals, true) c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) + c.Assert(e.Hash, Equals, toHash(o2)) c.Assert(e.Offset, Equals, uint64(o2)) } } @@ -56,31 +57,31 @@ func (s *IndexSuite) TestLookupHash(c *C) { for o1 := 0; o1 < 10000; o1 += 100 { for o2 := 0; o2 < 10000; o2 += 100 { if o2 >= o1 { - e, ok := idx.LookupHash(s.toHash(o2)) + e, ok := idx.LookupHash(toHash(o2)) c.Assert(ok, Equals, false) c.Assert(e, IsNil) } else { - e, ok := idx.LookupHash(s.toHash(o2)) + e, ok := idx.LookupHash(toHash(o2)) c.Assert(ok, Equals, true) c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) + c.Assert(e.Hash, Equals, toHash(o2)) c.Assert(e.Offset, Equals, uint64(o2)) } } - h1 := s.toHash(o1) + h1 := toHash(o1) idx.Add(h1, uint64(o1), 0) for o2 := 0; o2 < 10000; o2 += 100 { if o2 > o1 { - e, ok := idx.LookupHash(s.toHash(o2)) + e, ok := idx.LookupHash(toHash(o2)) c.Assert(ok, Equals, false) c.Assert(e, IsNil) } else { - e, ok := idx.LookupHash(s.toHash(o2)) + e, ok := idx.LookupHash(toHash(o2)) c.Assert(ok, Equals, true) c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) + c.Assert(e.Hash, Equals, toHash(o2)) c.Assert(e.Offset, Equals, uint64(o2)) } } @@ -92,7 +93,7 @@ func (s *IndexSuite) TestSize(c *C) { for o1 := 0; o1 < 1000; o1++ { c.Assert(idx.Size(), Equals, o1) - h1 := s.toHash(o1) + h1 := toHash(o1) idx.Add(h1, uint64(o1), 0) } } @@ -107,7 +108,7 @@ func (s *IndexSuite) TestIdxFileEmpty(c *C) { func (s *IndexSuite) TestIdxFile(c *C) { idx := NewIndex(0) for o1 := 0; o1 < 1000; o1++ { - h1 := s.toHash(o1) + h1 := toHash(o1) idx.Add(h1, uint64(o1), 0) } @@ -115,8 +116,18 @@ func (s *IndexSuite) TestIdxFile(c *C) { c.Assert(idx, DeepEquals, idx2) } -func (s *IndexSuite) toHash(i int) plumbing.Hash { +func toHash(i int) plumbing.Hash { is := strconv.Itoa(i) padding := strings.Repeat("a", 40-len(is)) return plumbing.NewHash(padding + is) } + +func BenchmarkIndexConstruction(b *testing.B) { + b.ReportAllocs() + + idx := NewIndex(0) + for o := 0; o < 1e6*b.N; o += 100 { + h1 := toHash(o) + idx.Add(h1, uint64(o), 0) + } +} -- cgit From 88f0dc3d89a0391f9f52d913207556d15a4c2a77 Mon Sep 17 00:00:00 2001 From: kuba-- Date: Thu, 7 Jun 2018 22:23:58 +0200 Subject: plumbing: packfile, Don't push empty objects. Fixes #840 Signed-off-by: kuba-- --- plumbing/format/packfile/delta_test.go | 21 ++++++++++++++++++--- plumbing/format/packfile/diff_delta.go | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/delta_test.go b/plumbing/format/packfile/delta_test.go index 42b777a..98f53f6 100644 --- a/plumbing/format/packfile/delta_test.go +++ b/plumbing/format/packfile/delta_test.go @@ -62,7 +62,7 @@ func (s *DeltaSuite) SetUpSuite(c *C) { target: []piece{{"1", 30}, {"2", 20}, {"7", 40}, {"4", 400}, {"5", 10}}, }, { - description: "A copy operation bigger tan 64kb", + description: "A copy operation bigger than 64kb", base: []piece{{bigRandStr, 1}, {"1", 200}}, target: []piece{{bigRandStr, 1}}, }} @@ -72,12 +72,16 @@ var bigRandStr = randStringBytes(100 * 1024) const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" -func randStringBytes(n int) string { +func randBytes(n int) []byte { b := make([]byte, n) for i := range b { b[i] = letterBytes[rand.Intn(len(letterBytes))] } - return string(b) + return b +} + +func randStringBytes(n int) string { + return string(randBytes(n)) } func (s *DeltaSuite) TestAddDelta(c *C) { @@ -110,3 +114,14 @@ func (s *DeltaSuite) TestIncompleteDelta(c *C) { c.Assert(err, NotNil) c.Assert(result, IsNil) } + +func (s *DeltaSuite) TestMaxCopySizeDelta(c *C) { + baseBuf := randBytes(maxCopySize) + targetBuf := baseBuf[0:] + targetBuf = append(targetBuf, byte(1)) + + delta := DiffDelta(baseBuf, targetBuf) + result, err := PatchDelta(baseBuf, delta) + c.Assert(err, IsNil) + c.Assert(result, DeepEquals, targetBuf) +} diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index 4d56dc1..d35e78a 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -111,7 +111,7 @@ func diffDelta(index *deltaIndex, src []byte, tgt []byte) []byte { rl := l aOffset := offset - for { + for rl > 0 { if rl < maxCopySize { buf.Write(encodeCopyOperation(aOffset, rl)) break -- cgit From 2d9816a5e7daea58a1419fef70bfc8d220ffd6a2 Mon Sep 17 00:00:00 2001 From: David Symonds Date: Thu, 21 Jun 2018 13:24:03 +1000 Subject: packfile: optimise NewIndexFromIdxFile for a very common case Loading from an on-disk idxfile will usually already have the idxfile entries in order, so check that before wasting time on sorting. Signed-off-by: David Symonds --- plumbing/format/packfile/index.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go index 7d8f2ad..021b2d1 100644 --- a/plumbing/format/packfile/index.go +++ b/plumbing/format/packfile/index.go @@ -31,10 +31,20 @@ func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index { byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount), byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount), } - for _, e := range idxf.Entries { + sorted := true + for i, e := range idxf.Entries { idx.addUnsorted(e) + if i > 0 && idx.byOffset[i-1].Offset >= e.Offset { + sorted = false + } + } + + // If the idxfile was loaded from a regular packfile index + // then it will already be in offset order, in which case we + // can avoid doing a relatively expensive idempotent sort. + if !sorted { + sort.Sort(orderByOffset(idx.byOffset)) } - sort.Sort(orderByOffset(idx.byOffset)) return idx } -- cgit From 009f1069a1248c1e9189a9e4c342f6d017156ec4 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 19 Jul 2018 15:20:10 +0200 Subject: plumbing/format/idxfile: add new Index and MemoryIndex Signed-off-by: Miguel Molina --- plumbing/format/packfile/decoder.go | 27 +++++--- plumbing/format/packfile/index.go | 125 ------------------------------------ 2 files changed, 17 insertions(+), 135 deletions(-) delete mode 100644 plumbing/format/packfile/index.go (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index f706e5d..765401f 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -5,6 +5,7 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) @@ -63,7 +64,7 @@ type Decoder struct { // hasBuiltIndex indicates if the index is fully built or not. If it is not, // will be built incrementally while decoding. hasBuiltIndex bool - idx *Index + idx idxfile.Index offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType @@ -117,7 +118,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, o: o, deltaBaseCache: cacheObject, - idx: NewIndex(0), + idx: idxfile.NewMemoryIndex(), offsetToType: make(map[int64]plumbing.ObjectType), decoderType: t, }, nil @@ -150,7 +151,8 @@ func (d *Decoder) doDecode() error { } if !d.hasBuiltIndex { - d.idx = NewIndex(int(count)) + // TODO: MemoryIndex is not writable, change to something else + d.idx = idxfile.NewMemoryIndex() } defer func() { d.hasBuiltIndex = true }() @@ -284,12 +286,12 @@ func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) { } func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) { - e, ok := d.idx.LookupHash(ref) - if !ok { + offset, err := d.idx.FindOffset(ref) + if err != nil { return plumbing.InvalidObject, plumbing.ErrObjectNotFound } - return d.ofsDeltaType(int64(e.Offset)) + return d.ofsDeltaType(offset) } func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) { @@ -314,9 +316,14 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error return obj, err } + // TODO: remove this + _ = crc + + /* Add is no longer available if !d.hasBuiltIndex { d.idx.Add(obj.Hash(), uint64(h.Offset), crc) } + */ return obj, nil } @@ -448,8 +455,8 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { if d.s.IsSeekable { - if e, ok := d.idx.LookupHash(h); ok { - return d.DecodeObjectAt(int64(e.Offset)) + if offset, err := d.idx.FindOffset(h); err != nil { + return d.DecodeObjectAt(offset) } } @@ -475,7 +482,7 @@ func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.Encoded // SetIndex sets an index for the packfile. It is recommended to set this. // The index might be read from a file or reused from a previous Decoder usage // (see Index function). -func (d *Decoder) SetIndex(idx *Index) { +func (d *Decoder) SetIndex(idx idxfile.Index) { d.hasBuiltIndex = true d.idx = idx } @@ -484,7 +491,7 @@ func (d *Decoder) SetIndex(idx *Index) { // Index will return it. Otherwise, it will return an index that is built while // decoding. If neither SetIndex was called with a full index or Decode called // for the whole packfile, then the returned index will be incomplete. -func (d *Decoder) Index() *Index { +func (d *Decoder) Index() idxfile.Index { return d.idx } diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go deleted file mode 100644 index 021b2d1..0000000 --- a/plumbing/format/packfile/index.go +++ /dev/null @@ -1,125 +0,0 @@ -package packfile - -import ( - "sort" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" -) - -// Index is an in-memory representation of a packfile index. -// This uses idxfile.Idxfile under the hood to obtain indexes from .idx files -// or to store them. -type Index struct { - byHash map[plumbing.Hash]*idxfile.Entry - byOffset []*idxfile.Entry // sorted by their offset -} - -// NewIndex creates a new empty index with the given size. Size is a hint and -// can be 0. It is recommended to set it to the number of objects to be indexed -// if it is known beforehand (e.g. reading from a packfile). -func NewIndex(size int) *Index { - return &Index{ - byHash: make(map[plumbing.Hash]*idxfile.Entry, size), - byOffset: make([]*idxfile.Entry, 0, size), - } -} - -// NewIndexFromIdxFile creates a new Index from an idxfile.IdxFile. -func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index { - idx := &Index{ - byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount), - byOffset: make([]*idxfile.Entry, 0, idxf.ObjectCount), - } - sorted := true - for i, e := range idxf.Entries { - idx.addUnsorted(e) - if i > 0 && idx.byOffset[i-1].Offset >= e.Offset { - sorted = false - } - } - - // If the idxfile was loaded from a regular packfile index - // then it will already be in offset order, in which case we - // can avoid doing a relatively expensive idempotent sort. - if !sorted { - sort.Sort(orderByOffset(idx.byOffset)) - } - - return idx -} - -// orderByOffset is a sort.Interface adapter that arranges -// a slice of entries by their offset. -type orderByOffset []*idxfile.Entry - -func (o orderByOffset) Len() int { return len(o) } -func (o orderByOffset) Less(i, j int) bool { return o[i].Offset < o[j].Offset } -func (o orderByOffset) Swap(i, j int) { o[i], o[j] = o[j], o[i] } - -// Add adds a new Entry with the given values to the index. -func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) { - e := &idxfile.Entry{ - Hash: h, - Offset: offset, - CRC32: crc32, - } - idx.byHash[e.Hash] = e - - // Find the right position in byOffset. - // Look for the first position whose offset is *greater* than e.Offset. - i := sort.Search(len(idx.byOffset), func(i int) bool { - return idx.byOffset[i].Offset > offset - }) - if i == len(idx.byOffset) { - // Simple case: add it to the end. - idx.byOffset = append(idx.byOffset, e) - return - } - // Harder case: shift existing entries down by one to make room. - // Append a nil entry first so we can use existing capacity in case - // the index was carefully preallocated. - idx.byOffset = append(idx.byOffset, nil) - copy(idx.byOffset[i+1:], idx.byOffset[i:len(idx.byOffset)-1]) - idx.byOffset[i] = e -} - -func (idx *Index) addUnsorted(e *idxfile.Entry) { - idx.byHash[e.Hash] = e - idx.byOffset = append(idx.byOffset, e) -} - -// LookupHash looks an entry up by its hash. An idxfile.Entry is returned and -// a bool, which is true if it was found or false if it wasn't. -func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) { - e, ok := idx.byHash[h] - return e, ok -} - -// LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry -// is returned and a bool, which is true if it was found or false if it wasn't. -func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) { - i := sort.Search(len(idx.byOffset), func(i int) bool { - return idx.byOffset[i].Offset >= offset - }) - if i >= len(idx.byOffset) || idx.byOffset[i].Offset != offset { - return nil, false // not present - } - return idx.byOffset[i], true -} - -// Size returns the number of entries in the index. -func (idx *Index) Size() int { - return len(idx.byHash) -} - -// ToIdxFile converts the index to an idxfile.Idxfile, which can then be used -// to serialize. -func (idx *Index) ToIdxFile() *idxfile.Idxfile { - idxf := idxfile.NewIdxfile() - for _, e := range idx.byHash { - idxf.Entries = append(idxf.Entries, e) - } - - return idxf -} -- cgit From da5677f5ba3970d585d5955b15a6a1c3c262c07b Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 19 Jul 2018 17:05:45 +0200 Subject: plumbing/packfile: add new packfile parser Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 359 ++++++++++++++++++++++++++++++++ plumbing/format/packfile/parser_test.go | 139 +++++++++++++ 2 files changed, 498 insertions(+) create mode 100644 plumbing/format/packfile/parser.go create mode 100644 plumbing/format/packfile/parser_test.go (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go new file mode 100644 index 0000000..460fc3f --- /dev/null +++ b/plumbing/format/packfile/parser.go @@ -0,0 +1,359 @@ +package packfile + +import ( + "bytes" + "errors" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" +) + +// Observer interface is implemented by index encoders. +type Observer interface { + // OnHeader is called when a new packfile is opened. + OnHeader(count uint32) error + // OnInflatedObjectHeader is called for each object header read. + OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error + // OnInflatedObjectContent is called for each decoded object. + OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error + // OnFooter is called when decoding is done. + OnFooter(h plumbing.Hash) error +} + +// Parser decodes a packfile and calls any observer associated to it. Is used +// to generate indexes. +type Parser struct { + scanner *Scanner + count uint32 + oi []*objectInfo + oiByHash map[plumbing.Hash]*objectInfo + oiByOffset map[int64]*objectInfo + hashOffset map[plumbing.Hash]int64 + checksum plumbing.Hash + + cache *cache.ObjectLRU + + ob []Observer +} + +// NewParser creates a new Parser struct. +func NewParser(scanner *Scanner, ob ...Observer) *Parser { + return &Parser{ + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewObjectLRUDefault(), + } +} + +// Parse start decoding phase of the packfile. +func (p *Parser) Parse() (plumbing.Hash, error) { + err := p.init() + if err != nil { + return plumbing.ZeroHash, err + } + + err = p.firstPass() + if err != nil { + return plumbing.ZeroHash, err + } + + err = p.resolveDeltas() + if err != nil { + return plumbing.ZeroHash, err + } + + for _, o := range p.ob { + err := o.OnFooter(p.checksum) + if err != nil { + return plumbing.ZeroHash, err + } + } + + return p.checksum, nil +} + +func (p *Parser) init() error { + _, c, err := p.scanner.Header() + if err != nil { + return err + } + + for _, o := range p.ob { + err := o.OnHeader(c) + if err != nil { + return err + } + } + + p.count = c + p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count) + p.oiByOffset = make(map[int64]*objectInfo, p.count) + p.oi = make([]*objectInfo, p.count) + + return nil +} + +func (p *Parser) firstPass() error { + buf := new(bytes.Buffer) + + for i := uint32(0); i < p.count; i++ { + buf.Truncate(0) + + oh, err := p.scanner.NextObjectHeader() + if err != nil { + return err + } + + delta := false + var ota *objectInfo + switch t := oh.Type; t { + case plumbing.OFSDeltaObject, plumbing.REFDeltaObject: + delta = true + + var parent *objectInfo + var ok bool + + if t == plumbing.OFSDeltaObject { + parent, ok = p.oiByOffset[oh.OffsetReference] + } else { + parent, ok = p.oiByHash[oh.Reference] + } + + if !ok { + // TODO improve error + return errors.New("Reference delta not found") + } + + ota = newDeltaObject(oh.Offset, oh.Length, t, parent) + + parent.Children = append(parent.Children, ota) + default: + ota = newBaseObject(oh.Offset, oh.Length, t) + } + + size, crc, err := p.scanner.NextObject(buf) + if err != nil { + return err + } + + ota.Crc32 = crc + ota.PackSize = size + ota.Length = oh.Length + + if !delta { + ota.Write(buf.Bytes()) + ota.SHA1 = ota.Sum() + } + + p.oiByOffset[oh.Offset] = ota + p.oiByHash[oh.Reference] = ota + + p.oi[i] = ota + } + + checksum, err := p.scanner.Checksum() + p.checksum = checksum + + if err == io.EOF { + return nil + } + + return err +} + +func (p *Parser) resolveDeltas() error { + for _, obj := range p.oi { + for _, o := range p.ob { + err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset) + if err != nil { + return err + } + + err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32) + if err != nil { + return err + } + } + + if !obj.IsDelta() && len(obj.Children) > 0 { + var err error + base, err := p.get(obj) + if err != nil { + return err + } + + for _, child := range obj.Children { + _, err = p.resolveObject(child, base) + if err != nil { + return err + } + } + } + } + + return nil +} + +func (p *Parser) get(o *objectInfo) ([]byte, error) { + e, ok := p.cache.Get(o.SHA1) + if ok { + r, err := e.Reader() + if err != nil { + return nil, err + } + + buf := make([]byte, e.Size()) + _, err = r.Read(buf) + if err != nil { + return nil, err + } + + return buf, nil + } + + // Read from disk + if o.DiskType.IsDelta() { + base, err := p.get(o.Parent) + if err != nil { + return nil, err + } + + data, err := p.resolveObject(o, base) + if err != nil { + return nil, err + } + + if len(o.Children) > 0 { + m := &plumbing.MemoryObject{} + m.Write(data) + m.SetType(o.Type) + m.SetSize(o.Size()) + p.cache.Put(m) + } + + return data, nil + } + + data, err := p.readData(o) + if err != nil { + return nil, err + } + + if len(o.Children) > 0 { + m := &plumbing.MemoryObject{} + m.Write(data) + m.SetType(o.Type) + m.SetSize(o.Size()) + p.cache.Put(m) + } + + return data, nil +} + +func (p *Parser) resolveObject( + o *objectInfo, + base []byte) ([]byte, error) { + + if !o.DiskType.IsDelta() { + return nil, nil + } + + data, err := p.readData(o) + if err != nil { + return nil, err + } + + data, err = applyPatchBase(o, data, base) + if err != nil { + return nil, err + } + + return data, nil +} + +func (p *Parser) readData(o *objectInfo) ([]byte, error) { + buf := new(bytes.Buffer) + + // TODO: skip header. Header size can be calculated with the offset of the + // next offset in the first pass. + p.scanner.SeekFromStart(o.Offset) + _, err := p.scanner.NextObjectHeader() + if err != nil { + return nil, err + } + + buf.Truncate(0) + + _, _, err = p.scanner.NextObject(buf) + if err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { + patched, err := PatchDelta(base, data) + if err != nil { + return nil, err + } + + ota.Type = ota.Parent.Type + hash := plumbing.ComputeHash(ota.Type, patched) + + ota.SHA1 = hash + + return patched, nil +} + +type objectInfo struct { + plumbing.Hasher + + Offset int64 + Length int64 + PackSize int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType + + Crc32 uint32 + + Parent *objectInfo + Children []*objectInfo + SHA1 plumbing.Hash +} + +func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { + return newDeltaObject(offset, length, t, nil) +} + +func newDeltaObject( + offset, length int64, + t plumbing.ObjectType, + parent *objectInfo, +) *objectInfo { + children := make([]*objectInfo, 0) + + obj := &objectInfo{ + Hasher: plumbing.NewHasher(t, length), + Offset: offset, + Length: length, + PackSize: 0, + Type: t, + DiskType: t, + Crc32: 0, + Parent: parent, + Children: children, + } + + return obj +} + +func (o *objectInfo) IsDelta() bool { + return o.Type.IsDelta() +} + +func (o *objectInfo) Size() int64 { + return o.Length +} diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go new file mode 100644 index 0000000..87a8804 --- /dev/null +++ b/plumbing/format/packfile/parser_test.go @@ -0,0 +1,139 @@ +package packfile_test + +import ( + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" +) + +type ParserSuite struct { + fixtures.Suite +} + +var _ = Suite(&ParserSuite{}) + +func (s *ParserSuite) TestParserHashes(c *C) { + f := fixtures.Basic().One() + scanner := packfile.NewScanner(f.Packfile()) + + obs := new(testObserver) + parser := packfile.NewParser(scanner, obs) + + ch, err := parser.Parse() + c.Assert(err, IsNil) + + checksum := "a3fed42da1e8189a077c0e6846c040dcf73fc9dd" + c.Assert(ch.String(), Equals, checksum) + + c.Assert(obs.checksum, Equals, checksum) + c.Assert(int(obs.count), Equals, int(31)) + + commit := plumbing.CommitObject + blob := plumbing.BlobObject + tree := plumbing.TreeObject + + objs := []observerObject{ + {"e8d3ffab552895c19b9fcf7aa264d277cde33881", commit, 254, 12, 0xaa07ba4b}, + {"6ecf0ef2c2dffb796033e5a02219af86ec6584e5", commit, 93, 186, 0xf706df58}, + {"918c48b83bd081e863dbe1b80f8998f058cd8294", commit, 242, 286, 0x12438846}, + {"af2d6a6954d532f8ffb47615169c8fdf9d383a1a", commit, 242, 449, 0x2905a38c}, + {"1669dce138d9b841a518c64b10914d88f5e488ea", commit, 333, 615, 0xd9429436}, + {"a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", commit, 332, 838, 0xbecfde4e}, + {"35e85108805c84807bc66a02d91535e1e24b38b9", commit, 244, 1063, 0x780e4b3e}, + {"b8e471f58bcbca63b07bda20e428190409c2db47", commit, 243, 1230, 0xdc18344f}, + {"b029517f6300c2da0f4b651b8642506cd6aaf45d", commit, 187, 1392, 0xcf4e4280}, + {"32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", blob, 189, 1524, 0x1f08118a}, + {"d3ff53e0564a9f87d8e84b6e28e5060e517008aa", blob, 18, 1685, 0xafded7b8}, + {"c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", blob, 1072, 1713, 0xcc1428ed}, + {"d5c0f4ab811897cadf03aec358ae60d21f91c50d", blob, 76110, 2351, 0x1631d22f}, + {"880cd14280f4b9b6ed3986d6671f907d7cc2a198", blob, 2780, 78050, 0xbfff5850}, + {"49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", blob, 217848, 78882, 0xd108e1d8}, + {"c8f1d8c61f9da76f4cb49fd86322b6e685dba956", blob, 706, 80725, 0x8e97ba25}, + {"9a48f23120e880dfbe41f7c9b7b708e9ee62a492", blob, 11488, 80998, 0x7316ff70}, + {"9dea2395f5403188298c1dabe8bdafe562c491e3", blob, 78, 84032, 0xdb4fce56}, + {"dbd3641b371024f44d0e469a9c8f5457b0660de1", tree, 272, 84115, 0x901cce2c}, + {"a8d315b2b1c615d43042c3a62402b8a54288cf5c", tree, 43, 84375, 0xec4552b0}, + {"a39771a7651f97faf5c72e08224d857fc35133db", tree, 38, 84430, 0x847905bf}, + {"5a877e6a906a2743ad6e45d99c1793642aaf8eda", tree, 75, 84479, 0x3689459a}, + {"586af567d0bb5e771e49bdd9434f5e0fb76d25fa", tree, 38, 84559, 0xe67af94a}, + {"cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", tree, 34, 84608, 0xc2314a2e}, + {"7e59600739c96546163833214c36459e324bad0a", blob, 9, 84653, 0xcd987848}, + {"fb72698cab7617ac416264415f13224dfd7a165e", tree, 6, 84671, 0x8a853a6d}, + {"4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", tree, 9, 84688, 0x70c6518}, + {"eba74343e2f15d62adedfd8c883ee0262b5c8021", tree, 6, 84708, 0x4f4108e2}, + {"c2d30fa8ef288618f65f6eed6e168e0d514886f4", tree, 5, 84725, 0xd6fe09e9}, + {"8dcef98b1d52143e1e2dbc458ffe38f925786bf2", tree, 8, 84741, 0xf07a2804}, + {"aa9b383c260e1d05fbbf6b30a02914555e20c725", tree, 4, 84760, 0x1d75d6be}, + } + + c.Assert(obs.objects, DeepEquals, objs) +} + +type observerObject struct { + hash string + otype plumbing.ObjectType + size int64 + offset int64 + crc uint32 +} + +type testObserver struct { + count uint32 + checksum string + objects []observerObject + pos map[int64]int +} + +func (t *testObserver) OnHeader(count uint32) error { + t.count = count + t.pos = make(map[int64]int, count) + return nil +} + +func (t *testObserver) OnInflatedObjectHeader(otype plumbing.ObjectType, objSize int64, pos int64) error { + o := t.get(pos) + o.otype = otype + o.size = objSize + o.offset = pos + + t.put(pos, o) + + return nil +} + +func (t *testObserver) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { + o := t.get(pos) + o.hash = h.String() + o.crc = crc + + t.put(pos, o) + + return nil +} + +func (t *testObserver) OnFooter(h plumbing.Hash) error { + t.checksum = h.String() + return nil +} + +func (t *testObserver) get(pos int64) observerObject { + i, ok := t.pos[pos] + if ok { + return t.objects[i] + } + + return observerObject{} +} + +func (t *testObserver) put(pos int64, o observerObject) { + i, ok := t.pos[pos] + if ok { + t.objects[i] = o + return + } + + t.pos[pos] = len(t.objects) + t.objects = append(t.objects, o) +} -- cgit From ce91d71f96097ede2bb77d2af444aee6fff73183 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 19 Jul 2018 23:25:14 +0200 Subject: plumbing/packfile: disable lookup by offset In one case it disables the cache and the other disables lookup when the scanner is not seekable. Could be added back later. Signed-off-by: Javi Fontan --- plumbing/format/packfile/decoder.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 765401f..9bfd69b 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -403,12 +403,13 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i return 0, err } - e, ok := d.idx.LookupOffset(uint64(offset)) - var base plumbing.EncodedObject - if ok { - base, ok = d.cacheGet(e.Hash) - } + // e, ok := d.idx.LookupOffset(uint64(offset)) + // if ok { + // base, ok = d.cacheGet(e.Hash) + // } + var base plumbing.EncodedObject + ok := false if !ok { base, err = d.recallByOffset(offset) if err != nil { @@ -446,9 +447,9 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { return d.DecodeObjectAt(o) } - if e, ok := d.idx.LookupOffset(uint64(o)); ok { - return d.recallByHashNonSeekable(e.Hash) - } + // if e, ok := d.idx.LookupOffset(uint64(o)); ok { + // return d.recallByHashNonSeekable(e.Hash) + // } return nil, plumbing.ErrObjectNotFound } -- cgit From a716126aa7f9b77030d2e697db24d206d944f05d Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 24 Jul 2018 17:36:21 +0200 Subject: plumbing/packfile: preallocate memory in PatchDelta Signed-off-by: Javi Fontan --- plumbing/format/packfile/patch_delta.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go index c604851..a972f1c 100644 --- a/plumbing/format/packfile/patch_delta.go +++ b/plumbing/format/packfile/patch_delta.go @@ -63,8 +63,8 @@ func PatchDelta(src, delta []byte) ([]byte, error) { targetSz, delta := decodeLEB128(delta) remainingTargetSz := targetSz - var dest []byte var cmd byte + dest := make([]byte, 0, targetSz) for { if len(delta) == 0 { return nil, ErrInvalidDelta -- cgit From 79f249465b24104b73c9dc220d9098cecdab4d77 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Thu, 26 Jul 2018 13:42:51 +0200 Subject: plumbing, storage: integrate new index Now dotgit.PackWriter uses the new packfile.Parser and index. Signed-off-by: Javi Fontan --- plumbing/format/packfile/decoder.go | 9 +++++---- plumbing/format/packfile/parser.go | 11 ++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 9bfd69b..69aef2d 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -447,11 +447,12 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { return d.DecodeObjectAt(o) } - // if e, ok := d.idx.LookupOffset(uint64(o)); ok { - // return d.recallByHashNonSeekable(e.Hash) - // } + hash, err := d.idx.FindHash(o) + if err != nil { + return nil, err + } - return nil, plumbing.ErrObjectNotFound + return d.recallByHashNonSeekable(hash) } func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 460fc3f..696f5ba 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -311,11 +311,12 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { type objectInfo struct { plumbing.Hasher - Offset int64 - Length int64 - PackSize int64 - Type plumbing.ObjectType - DiskType plumbing.ObjectType + Offset int64 + Length int64 + HeaderLength int64 + PackSize int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType Crc32 uint32 -- cgit From ffdfb7dbabb78090b27ca29b762b803969c89fd7 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Fri, 20 Jul 2018 15:51:15 +0200 Subject: plumbing: packfile, new Packfile representation Signed-off-by: Miguel Molina --- plumbing/format/packfile/decoder.go | 57 ++++--- plumbing/format/packfile/decoder_test.go | 12 +- plumbing/format/packfile/index_test.go | 133 ---------------- plumbing/format/packfile/packfile.go | 249 ++++++++++++++++++++++++++++++ plumbing/format/packfile/packfile_test.go | 121 +++++++++++++++ 5 files changed, 418 insertions(+), 154 deletions(-) delete mode 100644 plumbing/format/packfile/index_test.go create mode 100644 plumbing/format/packfile/packfile.go create mode 100644 plumbing/format/packfile/packfile_test.go (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 69aef2d..b1a0a26 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -2,6 +2,7 @@ package packfile import ( "bytes" + "io" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" @@ -68,6 +69,7 @@ type Decoder struct { offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType + offsetToHash map[int64]plumbing.Hash } // NewDecoder returns a new Decoder that decodes a Packfile using the given @@ -120,6 +122,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, idx: idxfile.NewMemoryIndex(), offsetToType: make(map[int64]plumbing.ObjectType), + offsetToHash: make(map[int64]plumbing.Hash), decoderType: t, }, nil } @@ -144,6 +147,27 @@ func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { return d.s.Checksum() } +func (d *Decoder) fillOffsetsToHashes() error { + entries, err := d.idx.Entries() + if err != nil { + return err + } + + for { + e, err := entries.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + + d.offsetToHash[int64(e.Offset)] = e.Hash + } + + return entries.Close() +} + func (d *Decoder) doDecode() error { _, count, err := d.s.Header() if err != nil { @@ -156,6 +180,12 @@ func (d *Decoder) doDecode() error { } defer func() { d.hasBuiltIndex = true }() + if d.hasBuiltIndex && !d.s.IsSeekable { + if err := d.fillOffsetsToHashes(); err != nil { + return err + } + } + _, isTxStorer := d.o.(storer.Transactioner) switch { case d.o == nil: @@ -299,15 +329,14 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error obj.SetSize(h.Length) obj.SetType(h.Type) - var crc uint32 var err error switch h.Type { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: - crc, err = d.fillRegularObjectContent(obj) + _, err = d.fillRegularObjectContent(obj) case plumbing.REFDeltaObject: - crc, err = d.fillREFDeltaObjectContent(obj, h.Reference) + _, err = d.fillREFDeltaObjectContent(obj, h.Reference) case plumbing.OFSDeltaObject: - crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) + _, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) default: err = ErrInvalidObject.AddDetails("type %q", h.Type) } @@ -316,14 +345,7 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error return obj, err } - // TODO: remove this - _ = crc - - /* Add is no longer available - if !d.hasBuiltIndex { - d.idx.Add(obj.Hash(), uint64(h.Offset), crc) - } - */ + d.offsetToHash[h.Offset] = obj.Hash() return obj, nil } @@ -403,13 +425,12 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i return 0, err } - // e, ok := d.idx.LookupOffset(uint64(offset)) - // if ok { - // base, ok = d.cacheGet(e.Hash) - // } - + h, ok := d.offsetToHash[offset] var base plumbing.EncodedObject - ok := false + if ok { + base, ok = d.cacheGet(h) + } + if !ok { base, err = d.recallByOffset(offset) if err != nil { diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go index b5bc7b7..4fe9b5e 100644 --- a/plumbing/format/packfile/decoder_test.go +++ b/plumbing/format/packfile/decoder_test.go @@ -5,7 +5,6 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" @@ -47,6 +46,7 @@ func (s *ReaderSuite) TestDecode(c *C) { }) } +/* func (s *ReaderSuite) TestDecodeByTypeRefDelta(c *C) { f := fixtures.Basic().ByTag("ref-delta").One() @@ -101,7 +101,9 @@ func (s *ReaderSuite) TestDecodeByTypeRefDeltaError(c *C) { }) } +*/ +/* func (s *ReaderSuite) TestDecodeByType(c *C) { ts := []plumbing.ObjectType{ plumbing.CommitObject, @@ -140,6 +142,8 @@ func (s *ReaderSuite) TestDecodeByType(c *C) { } }) } +*/ + func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) { f := fixtures.Basic().ByTag("packfile").One() storage := memory.NewStorage() @@ -280,6 +284,7 @@ var expectedHashes = []string{ "7e59600739c96546163833214c36459e324bad0a", } +/* func (s *ReaderSuite) TestDecodeCRCs(c *C) { f := fixtures.Basic().ByTag("ofs-delta").One() @@ -366,7 +371,7 @@ func (s *ReaderSuite) TestSetIndex(c *C) { idxf := d.Index().ToIdxFile() c.Assert(idxf.Entries, HasLen, 1) c.Assert(idxf.Entries[0].Offset, Equals, uint64(42)) -} +}*/ func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { @@ -385,6 +390,7 @@ func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { } } +/* func getIndexFromIdxFile(r io.Reader) *packfile.Index { idxf := idxfile.NewIdxfile() d := idxfile.NewDecoder(r) @@ -393,4 +399,4 @@ func getIndexFromIdxFile(r io.Reader) *packfile.Index { } return packfile.NewIndexFromIdxFile(idxf) -} +}*/ diff --git a/plumbing/format/packfile/index_test.go b/plumbing/format/packfile/index_test.go deleted file mode 100644 index 8de886d..0000000 --- a/plumbing/format/packfile/index_test.go +++ /dev/null @@ -1,133 +0,0 @@ -package packfile - -import ( - "strconv" - "strings" - "testing" - - "gopkg.in/src-d/go-git.v4/plumbing" - - . "gopkg.in/check.v1" -) - -type IndexSuite struct{} - -var _ = Suite(&IndexSuite{}) - -func (s *IndexSuite) TestLookupOffset(c *C) { - idx := NewIndex(0) - - for o1 := 0; o1 < 10000; o1 += 100 { - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 >= o1 { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - - h1 := toHash(o1) - idx.Add(h1, uint64(o1), 0) - - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 > o1 { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - } -} - -func (s *IndexSuite) TestLookupHash(c *C) { - idx := NewIndex(0) - - for o1 := 0; o1 < 10000; o1 += 100 { - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 >= o1 { - e, ok := idx.LookupHash(toHash(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupHash(toHash(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - - h1 := toHash(o1) - idx.Add(h1, uint64(o1), 0) - - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 > o1 { - e, ok := idx.LookupHash(toHash(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupHash(toHash(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - } -} - -func (s *IndexSuite) TestSize(c *C) { - idx := NewIndex(0) - - for o1 := 0; o1 < 1000; o1++ { - c.Assert(idx.Size(), Equals, o1) - h1 := toHash(o1) - idx.Add(h1, uint64(o1), 0) - } -} - -func (s *IndexSuite) TestIdxFileEmpty(c *C) { - idx := NewIndex(0) - idxf := idx.ToIdxFile() - idx2 := NewIndexFromIdxFile(idxf) - c.Assert(idx, DeepEquals, idx2) -} - -func (s *IndexSuite) TestIdxFile(c *C) { - idx := NewIndex(0) - for o1 := 0; o1 < 1000; o1++ { - h1 := toHash(o1) - idx.Add(h1, uint64(o1), 0) - } - - idx2 := NewIndexFromIdxFile(idx.ToIdxFile()) - c.Assert(idx, DeepEquals, idx2) -} - -func toHash(i int) plumbing.Hash { - is := strconv.Itoa(i) - padding := strings.Repeat("a", 40-len(is)) - return plumbing.NewHash(padding + is) -} - -func BenchmarkIndexConstruction(b *testing.B) { - b.ReportAllocs() - - idx := NewIndex(0) - for o := 0; o < 1e6*b.N; o += 100 { - h1 := toHash(o) - idx.Add(h1, uint64(o), 0) - } -} diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go new file mode 100644 index 0000000..cee6031 --- /dev/null +++ b/plumbing/format/packfile/packfile.go @@ -0,0 +1,249 @@ +package packfile + +import ( + "bytes" + "io" + + billy "gopkg.in/src-d/go-billy.v4" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// Packfile allows retrieving information from inside a packfile. +type Packfile struct { + idxfile.Index + billy.File + s *Scanner + deltaBaseCache cache.Object + offsetToHash map[int64]plumbing.Hash +} + +// NewPackfile returns a packfile representation for the given packfile file +// and packfile idx. +func NewPackfile(index idxfile.Index, file billy.File) *Packfile { + s := NewScanner(file) + + return &Packfile{ + index, + file, + s, + cache.NewObjectLRUDefault(), + make(map[int64]plumbing.Hash), + } +} + +// Get retrieves the encoded object in the packfile with the given hash. +func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { + offset, err := p.FindOffset(h) + if err != nil { + return nil, err + } + + return p.GetByOffset(offset) +} + +// GetByOffset retrieves the encoded object from the packfile with the given +// offset. +func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { + if h, ok := p.offsetToHash[o]; ok { + if obj, ok := p.deltaBaseCache.Get(h); ok { + return obj, nil + } + } + + if _, err := p.s.SeekFromStart(o); err != nil { + return nil, err + } + + return p.nextObject() +} + +func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { + h, err := p.s.NextObjectHeader() + if err != nil { + return nil, err + } + + obj := new(plumbing.MemoryObject) + obj.SetSize(h.Length) + obj.SetType(h.Type) + + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + err = p.fillRegularObjectContent(obj) + case plumbing.REFDeltaObject: + err = p.fillREFDeltaObjectContent(obj, h.Reference) + case plumbing.OFSDeltaObject: + err = p.fillOFSDeltaObjectContent(obj, h.OffsetReference) + default: + err = ErrInvalidObject.AddDetails("type %q", h.Type) + } + + if err != nil { + return obj, err + } + + p.offsetToHash[h.Offset] = obj.Hash() + + return obj, nil +} + +func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error { + w, err := obj.Writer() + if err != nil { + return err + } + + _, _, err = p.s.NextObject(w) + return err +} + +func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) error { + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + _, _, err := p.s.NextObject(buf) + if err != nil { + return err + } + + base, ok := p.cacheGet(ref) + if !ok { + base, err = p.Get(ref) + if err != nil { + return err + } + } + + obj.SetType(base.Type()) + err = ApplyDelta(obj, base, buf.Bytes()) + p.cachePut(obj) + bufPool.Put(buf) + + return err +} + +func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error { + buf := bytes.NewBuffer(nil) + _, _, err := p.s.NextObject(buf) + if err != nil { + return err + } + + var base plumbing.EncodedObject + h, ok := p.offsetToHash[offset] + if ok { + base, ok = p.cacheGet(h) + } + + if !ok { + base, err = p.GetByOffset(offset) + if err != nil { + return err + } + + p.cachePut(base) + } + + obj.SetType(base.Type()) + err = ApplyDelta(obj, base, buf.Bytes()) + p.cachePut(obj) + + return err +} + +func (p *Packfile) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { + if p.deltaBaseCache == nil { + return nil, false + } + + return p.deltaBaseCache.Get(h) +} + +func (p *Packfile) cachePut(obj plumbing.EncodedObject) { + if p.deltaBaseCache == nil { + return + } + + p.deltaBaseCache.Put(obj) +} + +// GetAll returns an iterator with all encoded objects in the packfile. +// The iterator returned is not thread-safe, it should be used in the same +// thread as the Packfile instance. +func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) { + s := NewScanner(p.File) + + _, count, err := s.Header() + if err != nil { + return nil, err + } + + return &objectIter{ + // Easiest way to provide an object decoder is just to pass a Packfile + // instance. To not mess with the seeks, it's a new instance with a + // different scanner but the same cache and offset to hash map for + // reusing as much cache as possible. + d: &Packfile{p.Index, nil, s, p.deltaBaseCache, p.offsetToHash}, + count: int(count), + }, nil +} + +// ID returns the ID of the packfile, which is the checksum at the end of it. +func (p *Packfile) ID() (plumbing.Hash, error) { + if _, err := p.File.Seek(-20, io.SeekEnd); err != nil { + return plumbing.ZeroHash, err + } + + var hash plumbing.Hash + if _, err := io.ReadFull(p.File, hash[:]); err != nil { + return plumbing.ZeroHash, err + } + + return hash, nil +} + +// Close the packfile and its resources. +func (p *Packfile) Close() error { + return p.File.Close() +} + +type objectDecoder interface { + nextObject() (plumbing.EncodedObject, error) +} + +type objectIter struct { + d objectDecoder + count int + pos int +} + +func (i *objectIter) Next() (plumbing.EncodedObject, error) { + if i.pos >= i.count { + return nil, io.EOF + } + + i.pos++ + return i.d.nextObject() +} + +func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error { + for { + o, err := i.Next() + if err != nil { + if err == io.EOF { + return nil + } + return err + } + + if err := f(o); err != nil { + return err + } + } +} + +func (i *objectIter) Close() { + i.pos = i.count +} diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go new file mode 100644 index 0000000..10e4080 --- /dev/null +++ b/plumbing/format/packfile/packfile_test.go @@ -0,0 +1,121 @@ +package packfile + +import ( + "io" + "math" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-billy.v4/osfs" + fixtures "gopkg.in/src-d/go-git-fixtures.v3" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" +) + +type PackfileSuite struct { + fixtures.Suite + p *Packfile + idx *idxfile.MemoryIndex + f *fixtures.Fixture +} + +var _ = Suite(&PackfileSuite{}) + +func (s *PackfileSuite) TestGet(c *C) { + for h := range expectedEntries { + obj, err := s.p.Get(h) + c.Assert(err, IsNil) + c.Assert(obj, Not(IsNil)) + c.Assert(obj.Hash(), Equals, h) + } + + _, err := s.p.Get(plumbing.ZeroHash) + c.Assert(err, Equals, plumbing.ErrObjectNotFound) +} + +func (s *PackfileSuite) TestGetByOffset(c *C) { + for h, o := range expectedEntries { + obj, err := s.p.GetByOffset(o) + c.Assert(err, IsNil) + c.Assert(obj, Not(IsNil)) + c.Assert(obj.Hash(), Equals, h) + } + + _, err := s.p.GetByOffset(math.MaxInt64) + c.Assert(err, Equals, io.EOF) +} + +func (s *PackfileSuite) TestID(c *C) { + id, err := s.p.ID() + c.Assert(err, IsNil) + c.Assert(id, Equals, s.f.PackfileHash) +} + +func (s *PackfileSuite) TestGetAll(c *C) { + iter, err := s.p.GetAll() + c.Assert(err, IsNil) + + var objects int + for { + o, err := iter.Next() + if err == io.EOF { + break + } + c.Assert(err, IsNil) + + objects++ + _, ok := expectedEntries[o.Hash()] + c.Assert(ok, Equals, true) + } + + c.Assert(objects, Equals, len(expectedEntries)) +} + +var expectedEntries = map[plumbing.Hash]int64{ + plumbing.NewHash("1669dce138d9b841a518c64b10914d88f5e488ea"): 615, + plumbing.NewHash("32858aad3c383ed1ff0a0f9bdf231d54a00c9e88"): 1524, + plumbing.NewHash("35e85108805c84807bc66a02d91535e1e24b38b9"): 1063, + plumbing.NewHash("49c6bb89b17060d7b4deacb7b338fcc6ea2352a9"): 78882, + plumbing.NewHash("4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd"): 84688, + plumbing.NewHash("586af567d0bb5e771e49bdd9434f5e0fb76d25fa"): 84559, + plumbing.NewHash("5a877e6a906a2743ad6e45d99c1793642aaf8eda"): 84479, + plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5"): 186, + plumbing.NewHash("7e59600739c96546163833214c36459e324bad0a"): 84653, + plumbing.NewHash("880cd14280f4b9b6ed3986d6671f907d7cc2a198"): 78050, + plumbing.NewHash("8dcef98b1d52143e1e2dbc458ffe38f925786bf2"): 84741, + plumbing.NewHash("918c48b83bd081e863dbe1b80f8998f058cd8294"): 286, + plumbing.NewHash("9a48f23120e880dfbe41f7c9b7b708e9ee62a492"): 80998, + plumbing.NewHash("9dea2395f5403188298c1dabe8bdafe562c491e3"): 84032, + plumbing.NewHash("a39771a7651f97faf5c72e08224d857fc35133db"): 84430, + plumbing.NewHash("a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69"): 838, + plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c"): 84375, + plumbing.NewHash("aa9b383c260e1d05fbbf6b30a02914555e20c725"): 84760, + plumbing.NewHash("af2d6a6954d532f8ffb47615169c8fdf9d383a1a"): 449, + plumbing.NewHash("b029517f6300c2da0f4b651b8642506cd6aaf45d"): 1392, + plumbing.NewHash("b8e471f58bcbca63b07bda20e428190409c2db47"): 1230, + plumbing.NewHash("c192bd6a24ea1ab01d78686e417c8bdc7c3d197f"): 1713, + plumbing.NewHash("c2d30fa8ef288618f65f6eed6e168e0d514886f4"): 84725, + plumbing.NewHash("c8f1d8c61f9da76f4cb49fd86322b6e685dba956"): 80725, + plumbing.NewHash("cf4aa3b38974fb7d81f367c0830f7d78d65ab86b"): 84608, + plumbing.NewHash("d3ff53e0564a9f87d8e84b6e28e5060e517008aa"): 1685, + plumbing.NewHash("d5c0f4ab811897cadf03aec358ae60d21f91c50d"): 2351, + plumbing.NewHash("dbd3641b371024f44d0e469a9c8f5457b0660de1"): 84115, + plumbing.NewHash("e8d3ffab552895c19b9fcf7aa264d277cde33881"): 12, + plumbing.NewHash("eba74343e2f15d62adedfd8c883ee0262b5c8021"): 84708, + plumbing.NewHash("fb72698cab7617ac416264415f13224dfd7a165e"): 84671, +} + +func (s *PackfileSuite) SetUpTest(c *C) { + s.f = fixtures.Basic().One() + + f, err := osfs.New("/").Open(s.f.Packfile().Name()) + c.Assert(err, IsNil) + + s.idx = idxfile.NewMemoryIndex() + c.Assert(idxfile.NewDecoder(s.f.Idx()).Decode(s.idx), IsNil) + + s.p = NewPackfile(s.idx, f) +} + +func (s *PackfileSuite) TearDownTest(c *C) { + c.Assert(s.p.Close(), IsNil) +} -- cgit From ccd0fa0bc17f0680038529b00f5c5a44f8e77b41 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Fri, 27 Jul 2018 15:07:25 +0200 Subject: plumbing: packfile, lazy object reads with DiskObjects Signed-off-by: Miguel Molina --- plumbing/format/packfile/decoder.go | 2 +- plumbing/format/packfile/disk_object.go | 64 +++++++++ plumbing/format/packfile/packfile.go | 208 ++++++++++++++++++++++++++---- plumbing/format/packfile/packfile_test.go | 46 +++++++ 4 files changed, 293 insertions(+), 27 deletions(-) create mode 100644 plumbing/format/packfile/disk_object.go (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index b1a0a26..edf386b 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -478,7 +478,7 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { if d.s.IsSeekable { - if offset, err := d.idx.FindOffset(h); err != nil { + if offset, err := d.idx.FindOffset(h); err == nil { return d.DecodeObjectAt(offset) } } diff --git a/plumbing/format/packfile/disk_object.go b/plumbing/format/packfile/disk_object.go new file mode 100644 index 0000000..d3e8520 --- /dev/null +++ b/plumbing/format/packfile/disk_object.go @@ -0,0 +1,64 @@ +package packfile + +import ( + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// DiskObject is an object from the packfile on disk. +type DiskObject struct { + hash plumbing.Hash + h *ObjectHeader + offset int64 + size int64 + typ plumbing.ObjectType + packfile *Packfile +} + +// NewDiskObject creates a new disk object. +func NewDiskObject( + hash plumbing.Hash, + finalType plumbing.ObjectType, + offset int64, + contentSize int64, + packfile *Packfile, +) *DiskObject { + return &DiskObject{ + hash: hash, + offset: offset, + size: contentSize, + typ: finalType, + packfile: packfile, + } +} + +// Reader implements the plumbing.EncodedObject interface. +func (o *DiskObject) Reader() (io.ReadCloser, error) { + return o.packfile.getObjectContent(o.offset) +} + +// SetSize implements the plumbing.EncodedObject interface. This method +// is a noop. +func (o *DiskObject) SetSize(int64) {} + +// SetType implements the plumbing.EncodedObject interface. This method is +// a noop. +func (o *DiskObject) SetType(plumbing.ObjectType) {} + +// Hash implements the plumbing.EncodedObject interface. +func (o *DiskObject) Hash() plumbing.Hash { return o.hash } + +// Size implements the plumbing.EncodedObject interface. +func (o *DiskObject) Size() int64 { return o.size } + +// Type implements the plumbing.EncodedObject interface. +func (o *DiskObject) Type() plumbing.ObjectType { + return o.typ +} + +// Writer implements the plumbing.EncodedObject interface. This method always +// returns a nil writer. +func (o *DiskObject) Writer() (io.WriteCloser, error) { + return nil, nil +} diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index cee6031..00014f6 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -17,7 +17,7 @@ type Packfile struct { billy.File s *Scanner deltaBaseCache cache.Object - offsetToHash map[int64]plumbing.Hash + offsetToType map[int64]plumbing.ObjectType } // NewPackfile returns a packfile representation for the given packfile file @@ -30,7 +30,7 @@ func NewPackfile(index idxfile.Index, file billy.File) *Packfile { file, s, cache.NewObjectLRUDefault(), - make(map[int64]plumbing.Hash), + make(map[int64]plumbing.ObjectType), } } @@ -47,8 +47,9 @@ func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { // GetByOffset retrieves the encoded object from the packfile with the given // offset. func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { - if h, ok := p.offsetToHash[o]; ok { - if obj, ok := p.deltaBaseCache.Get(h); ok { + hash, err := p.FindHash(o) + if err == nil { + if obj, ok := p.deltaBaseCache.Get(hash); ok { return obj, nil } } @@ -60,13 +61,166 @@ func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { return p.nextObject() } -func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { +func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) { h, err := p.s.NextObjectHeader() + p.s.pendingObject = nil + return h, err +} + +func (p *Packfile) getObjectData( + h *ObjectHeader, +) (typ plumbing.ObjectType, size int64, err error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + typ = h.Type + size = h.Length + case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + defer bufPool.Put(buf) + + _, _, err = p.s.NextObject(buf) + if err != nil { + return + } + + delta := buf.Bytes() + _, delta = decodeLEB128(delta) // skip src size + sz, _ := decodeLEB128(delta) + size = int64(sz) + + var offset int64 + if h.Type == plumbing.REFDeltaObject { + offset, err = p.FindOffset(h.Reference) + if err != nil { + return + } + } else { + offset = h.OffsetReference + } + + if baseType, ok := p.offsetToType[offset]; ok { + typ = baseType + } else { + if _, err = p.s.SeekFromStart(offset); err != nil { + return + } + + h, err = p.nextObjectHeader() + if err != nil { + return + } + + typ, _, err = p.getObjectData(h) + if err != nil { + return + } + } + default: + err = ErrInvalidObject.AddDetails("type %q", h.Type) + } + + return +} + +func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + return h.Length, nil + case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + defer bufPool.Put(buf) + + if _, _, err := p.s.NextObject(buf); err != nil { + return 0, err + } + + delta := buf.Bytes() + _, delta = decodeLEB128(delta) // skip src size + sz, _ := decodeLEB128(delta) + return int64(sz), nil + default: + return 0, ErrInvalidObject.AddDetails("type %q", h.Type) + } +} + +func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + return h.Type, nil + case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: + var offset int64 + if h.Type == plumbing.REFDeltaObject { + offset, err = p.FindOffset(h.Reference) + if err != nil { + return + } + } else { + offset = h.OffsetReference + } + + if baseType, ok := p.offsetToType[offset]; ok { + typ = baseType + } else { + if _, err = p.s.SeekFromStart(offset); err != nil { + return + } + + h, err = p.nextObjectHeader() + if err != nil { + return + } + + typ, err = p.getObjectType(h) + if err != nil { + return + } + } + default: + err = ErrInvalidObject.AddDetails("type %q", h.Type) + } + + return +} + +func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { + h, err := p.nextObjectHeader() + if err != nil { + return nil, err + } + + hash, err := p.FindHash(h.Offset) + if err != nil { + return nil, err + } + + size, err := p.getObjectSize(h) if err != nil { return nil, err } - obj := new(plumbing.MemoryObject) + typ, err := p.getObjectType(h) + if err != nil { + return nil, err + } + + p.offsetToType[h.Offset] = typ + + return NewDiskObject(hash, typ, h.Offset, size, p), nil +} + +func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { + if _, err := p.s.SeekFromStart(offset); err != nil { + return nil, err + } + + h, err := p.nextObjectHeader() + if err != nil { + return nil, err + } + + var obj = new(plumbing.MemoryObject) obj.SetSize(h.Length) obj.SetType(h.Type) @@ -82,12 +236,10 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { } if err != nil { - return obj, err + return nil, err } - p.offsetToHash[h.Offset] = obj.Hash() - - return obj, nil + return obj.Reader() } func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error { @@ -132,9 +284,10 @@ func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset } var base plumbing.EncodedObject - h, ok := p.offsetToHash[offset] - if ok { - base, ok = p.cacheGet(h) + var ok bool + hash, err := p.FindHash(offset) + if err == nil { + base, ok = p.cacheGet(hash) } if !ok { @@ -173,9 +326,7 @@ func (p *Packfile) cachePut(obj plumbing.EncodedObject) { // The iterator returned is not thread-safe, it should be used in the same // thread as the Packfile instance. func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) { - s := NewScanner(p.File) - - _, count, err := s.Header() + entries, err := p.Entries() if err != nil { return nil, err } @@ -185,8 +336,14 @@ func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) { // instance. To not mess with the seeks, it's a new instance with a // different scanner but the same cache and offset to hash map for // reusing as much cache as possible. - d: &Packfile{p.Index, nil, s, p.deltaBaseCache, p.offsetToHash}, - count: int(count), + p: &Packfile{ + p.Index, + p.File, + NewScanner(p.File), + p.deltaBaseCache, + p.offsetToType, + }, + iter: entries, }, nil } @@ -214,18 +371,17 @@ type objectDecoder interface { } type objectIter struct { - d objectDecoder - count int - pos int + p *Packfile + iter idxfile.EntryIter } func (i *objectIter) Next() (plumbing.EncodedObject, error) { - if i.pos >= i.count { - return nil, io.EOF + e, err := i.iter.Next() + if err != nil { + return nil, err } - i.pos++ - return i.d.nextObject() + return i.p.GetByOffset(int64(e.Offset)) } func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error { @@ -245,5 +401,5 @@ func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error { } func (i *objectIter) Close() { - i.pos = i.count + i.iter.Close() } diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go index 10e4080..0d7a806 100644 --- a/plumbing/format/packfile/packfile_test.go +++ b/plumbing/format/packfile/packfile_test.go @@ -1,14 +1,18 @@ package packfile import ( + "bytes" "io" "math" + "io/ioutil" + . "gopkg.in/check.v1" "gopkg.in/src-d/go-billy.v4/osfs" fixtures "gopkg.in/src-d/go-git-fixtures.v3" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" + "gopkg.in/src-d/go-git.v4/storage/memory" ) type PackfileSuite struct { @@ -104,6 +108,48 @@ var expectedEntries = map[plumbing.Hash]int64{ plumbing.NewHash("fb72698cab7617ac416264415f13224dfd7a165e"): 84671, } +func (s *PackfileSuite) TestContent(c *C) { + storer := memory.NewObjectStorage() + decoder, err := NewDecoder(NewScanner(s.f.Packfile()), storer) + c.Assert(err, IsNil) + + _, err = decoder.Decode() + c.Assert(err, IsNil) + + iter, err := s.p.GetAll() + c.Assert(err, IsNil) + + for { + o, err := iter.Next() + if err == io.EOF { + break + } + c.Assert(err, IsNil) + + o2, err := storer.EncodedObject(plumbing.AnyObject, o.Hash()) + c.Assert(err, IsNil) + + c.Assert(o.Type(), Equals, o2.Type()) + c.Assert(o.Size(), Equals, o2.Size()) + + r, err := o.Reader() + c.Assert(err, IsNil) + + c1, err := ioutil.ReadAll(r) + c.Assert(err, IsNil) + c.Assert(r.Close(), IsNil) + + r, err = o2.Reader() + c.Assert(err, IsNil) + + c2, err := ioutil.ReadAll(r) + c.Assert(err, IsNil) + c.Assert(r.Close(), IsNil) + + c.Assert(bytes.Compare(c1, c2), Equals, 0) + } +} + func (s *PackfileSuite) SetUpTest(c *C) { s.f = fixtures.Basic().One() -- cgit From b4cd0899e24e0e8c7910bcdc33c96dc463dcb1e4 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Fri, 27 Jul 2018 18:31:40 +0200 Subject: plumbing/packfile: add index generation to decoder Signed-off-by: Javi Fontan --- plumbing/format/packfile/decoder.go | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index edf386b..d6bc0ef 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -66,6 +66,7 @@ type Decoder struct { // will be built incrementally while decoding. hasBuiltIndex bool idx idxfile.Index + writer *idxfile.Writer offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType @@ -144,7 +145,17 @@ func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { return plumbing.ZeroHash, err } - return d.s.Checksum() + checksum, err = d.s.Checksum() + if err != nil { + return plumbing.ZeroHash, err + } + + if !d.hasBuiltIndex { + d.writer.OnFooter(checksum) + d.idx = d.Index() + } + + return checksum, err } func (d *Decoder) fillOffsetsToHashes() error { @@ -177,6 +188,8 @@ func (d *Decoder) doDecode() error { if !d.hasBuiltIndex { // TODO: MemoryIndex is not writable, change to something else d.idx = idxfile.NewMemoryIndex() + d.writer = new(idxfile.Writer) + d.writer.OnHeader(count) } defer func() { d.hasBuiltIndex = true }() @@ -329,14 +342,15 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error obj.SetSize(h.Length) obj.SetType(h.Type) + var crc uint32 var err error switch h.Type { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: - _, err = d.fillRegularObjectContent(obj) + crc, err = d.fillRegularObjectContent(obj) case plumbing.REFDeltaObject: - _, err = d.fillREFDeltaObjectContent(obj, h.Reference) + crc, err = d.fillREFDeltaObjectContent(obj, h.Reference) case plumbing.OFSDeltaObject: - _, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) + crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) default: err = ErrInvalidObject.AddDetails("type %q", h.Type) } @@ -345,6 +359,10 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error return obj, err } + if !d.hasBuiltIndex { + d.writer.Add(obj.Hash(), uint64(h.Offset), crc) + } + d.offsetToHash[h.Offset] = obj.Hash() return obj, nil @@ -468,9 +486,9 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { return d.DecodeObjectAt(o) } - hash, err := d.idx.FindHash(o) - if err != nil { - return nil, err + hash, ok := d.offsetToHash[o] + if !ok { + return nil, plumbing.ErrObjectNotFound } return d.recallByHashNonSeekable(hash) -- cgit From 6f7fc05543861ee074aa17f75e1d1b5c1b948d48 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Mon, 30 Jul 2018 17:11:01 +0200 Subject: plumbing: packfile, fix package tests Signed-off-by: Miguel Molina --- plumbing/format/packfile/decoder.go | 28 ++++++++---- plumbing/format/packfile/decoder_test.go | 72 +++++++++++++++++++++---------- plumbing/format/packfile/packfile_test.go | 2 +- 3 files changed, 70 insertions(+), 32 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index d6bc0ef..6bb0677 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -122,6 +122,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, deltaBaseCache: cacheObject, idx: idxfile.NewMemoryIndex(), + writer: new(idxfile.Writer), offsetToType: make(map[int64]plumbing.ObjectType), offsetToHash: make(map[int64]plumbing.Hash), decoderType: t, @@ -152,7 +153,12 @@ func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { if !d.hasBuiltIndex { d.writer.OnFooter(checksum) - d.idx = d.Index() + + idx, err := d.writer.Index() + if err != nil { + return plumbing.ZeroHash, err + } + d.SetIndex(idx) } return checksum, err @@ -186,12 +192,8 @@ func (d *Decoder) doDecode() error { } if !d.hasBuiltIndex { - // TODO: MemoryIndex is not writable, change to something else - d.idx = idxfile.NewMemoryIndex() - d.writer = new(idxfile.Writer) d.writer.OnHeader(count) } - defer func() { d.hasBuiltIndex = true }() if d.hasBuiltIndex && !d.s.IsSeekable { if err := d.fillOffsetsToHashes(); err != nil { @@ -202,12 +204,18 @@ func (d *Decoder) doDecode() error { _, isTxStorer := d.o.(storer.Transactioner) switch { case d.o == nil: - return d.decodeObjects(int(count)) + err = d.decodeObjects(int(count)) case isTxStorer: - return d.decodeObjectsWithObjectStorerTx(int(count)) + err = d.decodeObjectsWithObjectStorerTx(int(count)) default: - return d.decodeObjectsWithObjectStorer(int(count)) + err = d.decodeObjectsWithObjectStorer(int(count)) + } + + if err != nil { + return err } + + return nil } func (d *Decoder) decodeObjects(count int) error { @@ -509,8 +517,10 @@ func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.EncodedObject, err error) { if d.tx != nil { obj, err = d.tx.EncodedObject(plumbing.AnyObject, h) - } else { + } else if d.o != nil { obj, err = d.o.EncodedObject(plumbing.AnyObject, h) + } else { + return nil, plumbing.ErrObjectNotFound } if err != plumbing.ErrObjectNotFound { diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go index 4fe9b5e..d4f7145 100644 --- a/plumbing/format/packfile/decoder_test.go +++ b/plumbing/format/packfile/decoder_test.go @@ -5,6 +5,7 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" @@ -46,7 +47,6 @@ func (s *ReaderSuite) TestDecode(c *C) { }) } -/* func (s *ReaderSuite) TestDecodeByTypeRefDelta(c *C) { f := fixtures.Basic().ByTag("ref-delta").One() @@ -101,9 +101,7 @@ func (s *ReaderSuite) TestDecodeByTypeRefDeltaError(c *C) { }) } -*/ -/* func (s *ReaderSuite) TestDecodeByType(c *C) { ts := []plumbing.ObjectType{ plumbing.CommitObject, @@ -142,7 +140,6 @@ func (s *ReaderSuite) TestDecodeByType(c *C) { } }) } -*/ func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) { f := fixtures.Basic().ByTag("packfile").One() @@ -184,7 +181,7 @@ func (s *ReaderSuite) TestDecodeMultipleTimes(c *C) { func (s *ReaderSuite) TestDecodeInMemory(c *C) { fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) + d, err := packfile.NewDecoder(scanner, memory.NewStorage()) c.Assert(err, IsNil) ch, err := d.Decode() @@ -284,7 +281,6 @@ var expectedHashes = []string{ "7e59600739c96546163833214c36459e324bad0a", } -/* func (s *ReaderSuite) TestDecodeCRCs(c *C) { f := fixtures.Basic().ByTag("ofs-delta").One() @@ -297,8 +293,16 @@ func (s *ReaderSuite) TestDecodeCRCs(c *C) { c.Assert(err, IsNil) var sum uint64 - idx := d.Index().ToIdxFile() - for _, e := range idx.Entries { + iter, err := d.Index().Entries() + c.Assert(err, IsNil) + + for { + e, err := iter.Next() + if err == io.EOF { + break + } + + c.Assert(err, IsNil) sum += uint64(e.CRC32) } @@ -349,12 +353,30 @@ func (s *ReaderSuite) TestIndex(c *C) { d, err := packfile.NewDecoder(scanner, nil) c.Assert(err, IsNil) - c.Assert(d.Index().ToIdxFile().Entries, HasLen, 0) + c.Assert(indexEntries(c, d), Equals, 0) _, err = d.Decode() c.Assert(err, IsNil) - c.Assert(len(d.Index().ToIdxFile().Entries), Equals, 31) + c.Assert(indexEntries(c, d), Equals, 31) +} + +func indexEntries(c *C, d *packfile.Decoder) int { + var count int + entries, err := d.Index().Entries() + c.Assert(err, IsNil) + + for { + _, err := entries.Next() + if err == io.EOF { + break + } + + c.Assert(err, IsNil) + count++ + } + + return count } func (s *ReaderSuite) TestSetIndex(c *C) { @@ -363,18 +385,25 @@ func (s *ReaderSuite) TestSetIndex(c *C) { d, err := packfile.NewDecoder(scanner, nil) c.Assert(err, IsNil) - idx := packfile.NewIndex(1) + w := new(idxfile.Writer) h := plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5") - idx.Add(h, uint64(42), 0) + w.Add(h, uint64(42), 0) + w.OnFooter(plumbing.ZeroHash) + + var idx idxfile.Index + idx, err = w.Index() + c.Assert(err, IsNil) d.SetIndex(idx) - idxf := d.Index().ToIdxFile() - c.Assert(idxf.Entries, HasLen, 1) - c.Assert(idxf.Entries[0].Offset, Equals, uint64(42)) -}*/ + idx = d.Index() + c.Assert(indexEntries(c, d), Equals, 1) -func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { + offset, err := idx.FindOffset(h) + c.Assert(err, IsNil) + c.Assert(offset, Equals, int64(42)) +} +func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { i, err := s.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) @@ -390,13 +419,12 @@ func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { } } -/* -func getIndexFromIdxFile(r io.Reader) *packfile.Index { - idxf := idxfile.NewIdxfile() +func getIndexFromIdxFile(r io.Reader) idxfile.Index { + idxf := idxfile.NewMemoryIndex() d := idxfile.NewDecoder(r) if err := d.Decode(idxf); err != nil { panic(err) } - return packfile.NewIndexFromIdxFile(idxf) -}*/ + return idxf +} diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go index 0d7a806..a17a483 100644 --- a/plumbing/format/packfile/packfile_test.go +++ b/plumbing/format/packfile/packfile_test.go @@ -109,7 +109,7 @@ var expectedEntries = map[plumbing.Hash]int64{ } func (s *PackfileSuite) TestContent(c *C) { - storer := memory.NewObjectStorage() + storer := memory.NewStorage() decoder, err := NewDecoder(NewScanner(s.f.Packfile()), storer) c.Assert(err, IsNil) -- cgit From 6a24b4c1f0cb9e5daf30fa7979f2643a967af1ad Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Tue, 7 Aug 2018 18:41:19 +0200 Subject: *: use parser to populate non writable storages and bug fixes Signed-off-by: Miguel Molina --- plumbing/format/packfile/common.go | 79 +++- plumbing/format/packfile/decoder.go | 553 ---------------------- plumbing/format/packfile/decoder_test.go | 430 ----------------- plumbing/format/packfile/encoder_advanced_test.go | 37 +- plumbing/format/packfile/encoder_test.go | 110 +++-- plumbing/format/packfile/packfile.go | 135 ++++-- plumbing/format/packfile/packfile_test.go | 169 +++++-- plumbing/format/packfile/parser.go | 130 +++-- plumbing/format/packfile/parser_test.go | 2 +- 9 files changed, 489 insertions(+), 1156 deletions(-) delete mode 100644 plumbing/format/packfile/decoder.go delete mode 100644 plumbing/format/packfile/decoder_test.go (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/common.go b/plumbing/format/packfile/common.go index beb015d..76254f0 100644 --- a/plumbing/format/packfile/common.go +++ b/plumbing/format/packfile/common.go @@ -2,9 +2,11 @@ package packfile import ( "bytes" + "errors" "io" "sync" + "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/utils/ioutil" ) @@ -23,24 +25,24 @@ const ( maskType = uint8(112) // 0111 0000 ) -// UpdateObjectStorage updates the given storer.EncodedObjectStorer with the contents of the +// UpdateObjectStorage updates the storer with the objects in the given // packfile. -func UpdateObjectStorage(s storer.EncodedObjectStorer, packfile io.Reader) error { - if sw, ok := s.(storer.PackfileWriter); ok { - return writePackfileToObjectStorage(sw, packfile) +func UpdateObjectStorage(s storer.Storer, packfile io.Reader) error { + if pw, ok := s.(storer.PackfileWriter); ok { + return WritePackfileToObjectStorage(pw, packfile) } - stream := NewScanner(packfile) - d, err := NewDecoder(stream, s) - if err != nil { - return err - } - - _, err = d.Decode() + updater := newPackfileStorageUpdater(s) + _, err := NewParser(NewScanner(packfile), updater).Parse() return err } -func writePackfileToObjectStorage(sw storer.PackfileWriter, packfile io.Reader) (err error) { +// WritePackfileToObjectStorage writes all the packfile objects into the given +// object storage. +func WritePackfileToObjectStorage( + sw storer.PackfileWriter, + packfile io.Reader, +) (err error) { w, err := sw.PackfileWriter() if err != nil { return err @@ -56,3 +58,56 @@ var bufPool = sync.Pool{ return bytes.NewBuffer(nil) }, } + +var errMissingObjectContent = errors.New("missing object content") + +type packfileStorageUpdater struct { + storer.Storer + lastSize int64 + lastType plumbing.ObjectType +} + +func newPackfileStorageUpdater(s storer.Storer) *packfileStorageUpdater { + return &packfileStorageUpdater{Storer: s} +} + +func (p *packfileStorageUpdater) OnHeader(count uint32) error { + return nil +} + +func (p *packfileStorageUpdater) OnInflatedObjectHeader( + t plumbing.ObjectType, + objSize int64, + pos int64, +) error { + if p.lastSize > 0 || p.lastType != plumbing.InvalidObject { + return errMissingObjectContent + } + + p.lastType = t + p.lastSize = objSize + return nil +} + +func (p *packfileStorageUpdater) OnInflatedObjectContent( + h plumbing.Hash, + pos int64, + crc uint32, + content []byte, +) error { + obj := new(plumbing.MemoryObject) + obj.SetSize(p.lastSize) + obj.SetType(p.lastType) + if _, err := obj.Write(content); err != nil { + return err + } + + _, err := p.SetEncodedObject(obj) + p.lastSize = 0 + p.lastType = plumbing.InvalidObject + return err +} + +func (p *packfileStorageUpdater) OnFooter(h plumbing.Hash) error { + return nil +} diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go deleted file mode 100644 index 6bb0677..0000000 --- a/plumbing/format/packfile/decoder.go +++ /dev/null @@ -1,553 +0,0 @@ -package packfile - -import ( - "bytes" - "io" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/cache" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" - "gopkg.in/src-d/go-git.v4/plumbing/storer" -) - -// Format specifies if the packfile uses ref-deltas or ofs-deltas. -type Format int - -// Possible values of the Format type. -const ( - UnknownFormat Format = iota - OFSDeltaFormat - REFDeltaFormat -) - -var ( - // ErrMaxObjectsLimitReached is returned by Decode when the number - // of objects in the packfile is higher than - // Decoder.MaxObjectsLimit. - ErrMaxObjectsLimitReached = NewError("max. objects limit reached") - // ErrInvalidObject is returned by Decode when an invalid object is - // found in the packfile. - ErrInvalidObject = NewError("invalid git object") - // ErrPackEntryNotFound is returned by Decode when a reference in - // the packfile references and unknown object. - ErrPackEntryNotFound = NewError("can't find a pack entry") - // ErrZLib is returned by Decode when there was an error unzipping - // the packfile contents. - ErrZLib = NewError("zlib reading error") - // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object - // to recall cannot be returned. - ErrCannotRecall = NewError("cannot recall object") - // ErrResolveDeltasNotSupported is returned if a NewDecoder is used with a - // non-seekable scanner and without a plumbing.ObjectStorage - ErrResolveDeltasNotSupported = NewError("resolve delta is not supported") - // ErrNonSeekable is returned if a ReadObjectAt method is called without a - // seekable scanner - ErrNonSeekable = NewError("non-seekable scanner") - // ErrRollback error making Rollback over a transaction after an error - ErrRollback = NewError("rollback error, during set error") - // ErrAlreadyDecoded is returned if NewDecoder is called for a second time - ErrAlreadyDecoded = NewError("packfile was already decoded") -) - -// Decoder reads and decodes packfiles from an input Scanner, if an ObjectStorer -// was provided the decoded objects are store there. If not the decode object -// is destroyed. The Offsets and CRCs are calculated whether an -// ObjectStorer was provided or not. -type Decoder struct { - deltaBaseCache cache.Object - - s *Scanner - o storer.EncodedObjectStorer - tx storer.Transaction - - isDecoded bool - - // hasBuiltIndex indicates if the index is fully built or not. If it is not, - // will be built incrementally while decoding. - hasBuiltIndex bool - idx idxfile.Index - writer *idxfile.Writer - - offsetToType map[int64]plumbing.ObjectType - decoderType plumbing.ObjectType - offsetToHash map[int64]plumbing.Hash -} - -// NewDecoder returns a new Decoder that decodes a Packfile using the given -// Scanner and stores the objects in the provided EncodedObjectStorer. ObjectStorer can be nil, in this -// If the passed EncodedObjectStorer is nil, objects are not stored, but -// offsets on the Packfile and CRCs are calculated. -// -// If EncodedObjectStorer is nil and the Scanner is not Seekable, ErrNonSeekable is -// returned. -// -// If the ObjectStorer implements storer.Transactioner, a transaction is created -// during the Decode execution. If anything fails, Rollback is called -func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) { - return NewDecoderForType(s, o, plumbing.AnyObject, - cache.NewObjectLRUDefault()) -} - -// NewDecoderWithCache is a version of NewDecoder where cache can be specified. -func NewDecoderWithCache(s *Scanner, o storer.EncodedObjectStorer, - cacheObject cache.Object) (*Decoder, error) { - - return NewDecoderForType(s, o, plumbing.AnyObject, cacheObject) -} - -// NewDecoderForType returns a new Decoder but in this case for a specific object type. -// When an object is read using this Decoder instance and it is not of the same type of -// the specified one, nil will be returned. This is intended to avoid the content -// deserialization of all the objects. -// -// cacheObject is a cache.Object implementation that is used to speed up the -// process. If cache is not needed you can pass nil. To create an LRU cache -// object with the default size you can use the helper cache.ObjectLRUDefault(). -func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, - t plumbing.ObjectType, cacheObject cache.Object) (*Decoder, error) { - - if t == plumbing.OFSDeltaObject || - t == plumbing.REFDeltaObject || - t == plumbing.InvalidObject { - return nil, plumbing.ErrInvalidType - } - - if !canResolveDeltas(s, o) { - return nil, ErrResolveDeltasNotSupported - } - - return &Decoder{ - s: s, - o: o, - deltaBaseCache: cacheObject, - - idx: idxfile.NewMemoryIndex(), - writer: new(idxfile.Writer), - offsetToType: make(map[int64]plumbing.ObjectType), - offsetToHash: make(map[int64]plumbing.Hash), - decoderType: t, - }, nil -} - -func canResolveDeltas(s *Scanner, o storer.EncodedObjectStorer) bool { - return s.IsSeekable || o != nil -} - -// Decode reads a packfile and stores it in the value pointed to by s. The -// offsets and the CRCs are calculated by this method -func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { - defer func() { d.isDecoded = true }() - - if d.isDecoded { - return plumbing.ZeroHash, ErrAlreadyDecoded - } - - if err := d.doDecode(); err != nil { - return plumbing.ZeroHash, err - } - - checksum, err = d.s.Checksum() - if err != nil { - return plumbing.ZeroHash, err - } - - if !d.hasBuiltIndex { - d.writer.OnFooter(checksum) - - idx, err := d.writer.Index() - if err != nil { - return plumbing.ZeroHash, err - } - d.SetIndex(idx) - } - - return checksum, err -} - -func (d *Decoder) fillOffsetsToHashes() error { - entries, err := d.idx.Entries() - if err != nil { - return err - } - - for { - e, err := entries.Next() - if err != nil { - if err == io.EOF { - break - } - return err - } - - d.offsetToHash[int64(e.Offset)] = e.Hash - } - - return entries.Close() -} - -func (d *Decoder) doDecode() error { - _, count, err := d.s.Header() - if err != nil { - return err - } - - if !d.hasBuiltIndex { - d.writer.OnHeader(count) - } - - if d.hasBuiltIndex && !d.s.IsSeekable { - if err := d.fillOffsetsToHashes(); err != nil { - return err - } - } - - _, isTxStorer := d.o.(storer.Transactioner) - switch { - case d.o == nil: - err = d.decodeObjects(int(count)) - case isTxStorer: - err = d.decodeObjectsWithObjectStorerTx(int(count)) - default: - err = d.decodeObjectsWithObjectStorer(int(count)) - } - - if err != nil { - return err - } - - return nil -} - -func (d *Decoder) decodeObjects(count int) error { - for i := 0; i < count; i++ { - if _, err := d.DecodeObject(); err != nil { - return err - } - } - - return nil -} - -func (d *Decoder) decodeObjectsWithObjectStorer(count int) error { - for i := 0; i < count; i++ { - obj, err := d.DecodeObject() - if err != nil { - return err - } - - if _, err := d.o.SetEncodedObject(obj); err != nil { - return err - } - } - - return nil -} - -func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error { - d.tx = d.o.(storer.Transactioner).Begin() - - for i := 0; i < count; i++ { - obj, err := d.DecodeObject() - if err != nil { - return err - } - - if _, err := d.tx.SetEncodedObject(obj); err != nil { - if rerr := d.tx.Rollback(); rerr != nil { - return ErrRollback.AddDetails( - "error: %s, during tx.Set error: %s", rerr, err, - ) - } - - return err - } - - } - - return d.tx.Commit() -} - -// DecodeObject reads the next object from the scanner and returns it. This -// method can be used in replacement of the Decode method, to work in a -// interactive way. If you created a new decoder instance using NewDecoderForType -// constructor, if the object decoded is not equals to the specified one, nil will -// be returned -func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) { - return d.doDecodeObject(d.decoderType) -} - -func (d *Decoder) doDecodeObject(t plumbing.ObjectType) (plumbing.EncodedObject, error) { - h, err := d.s.NextObjectHeader() - if err != nil { - return nil, err - } - - if t == plumbing.AnyObject { - return d.decodeByHeader(h) - } - - return d.decodeIfSpecificType(h) -} - -func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) { - var ( - obj plumbing.EncodedObject - realType plumbing.ObjectType - err error - ) - switch h.Type { - case plumbing.OFSDeltaObject: - realType, err = d.ofsDeltaType(h.OffsetReference) - case plumbing.REFDeltaObject: - realType, err = d.refDeltaType(h.Reference) - if err == plumbing.ErrObjectNotFound { - obj, err = d.decodeByHeader(h) - if err != nil { - realType = obj.Type() - } - } - default: - realType = h.Type - } - - if err != nil { - return nil, err - } - - d.offsetToType[h.Offset] = realType - - if d.decoderType == realType { - if obj != nil { - return obj, nil - } - - return d.decodeByHeader(h) - } - - return nil, nil -} - -func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) { - t, ok := d.offsetToType[offset] - if !ok { - return plumbing.InvalidObject, plumbing.ErrObjectNotFound - } - - return t, nil -} - -func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) { - offset, err := d.idx.FindOffset(ref) - if err != nil { - return plumbing.InvalidObject, plumbing.ErrObjectNotFound - } - - return d.ofsDeltaType(offset) -} - -func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) { - obj := d.newObject() - obj.SetSize(h.Length) - obj.SetType(h.Type) - - var crc uint32 - var err error - switch h.Type { - case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: - crc, err = d.fillRegularObjectContent(obj) - case plumbing.REFDeltaObject: - crc, err = d.fillREFDeltaObjectContent(obj, h.Reference) - case plumbing.OFSDeltaObject: - crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) - default: - err = ErrInvalidObject.AddDetails("type %q", h.Type) - } - - if err != nil { - return obj, err - } - - if !d.hasBuiltIndex { - d.writer.Add(obj.Hash(), uint64(h.Offset), crc) - } - - d.offsetToHash[h.Offset] = obj.Hash() - - return obj, nil -} - -func (d *Decoder) newObject() plumbing.EncodedObject { - if d.o == nil { - return &plumbing.MemoryObject{} - } - - return d.o.NewEncodedObject() -} - -// DecodeObjectAt reads an object at the given location. Every EncodedObject -// returned is added into a internal index. This is intended to be able to regenerate -// objects from deltas (offset deltas or reference deltas) without an package index -// (.idx file). If Decode wasn't called previously objects offset should provided -// using the SetOffsets method. It decodes the object regardless of the Decoder -// type. -func (d *Decoder) DecodeObjectAt(offset int64) (plumbing.EncodedObject, error) { - if !d.s.IsSeekable { - return nil, ErrNonSeekable - } - - beforeJump, err := d.s.SeekFromStart(offset) - if err != nil { - return nil, err - } - - defer func() { - _, seekErr := d.s.SeekFromStart(beforeJump) - if err == nil { - err = seekErr - } - }() - - return d.doDecodeObject(plumbing.AnyObject) -} - -func (d *Decoder) fillRegularObjectContent(obj plumbing.EncodedObject) (uint32, error) { - w, err := obj.Writer() - if err != nil { - return 0, err - } - - _, crc, err := d.s.NextObject(w) - return crc, err -} - -func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) (uint32, error) { - buf := bufPool.Get().(*bytes.Buffer) - buf.Reset() - _, crc, err := d.s.NextObject(buf) - if err != nil { - return 0, err - } - - base, ok := d.cacheGet(ref) - if !ok { - base, err = d.recallByHash(ref) - if err != nil { - return 0, err - } - } - - obj.SetType(base.Type()) - err = ApplyDelta(obj, base, buf.Bytes()) - d.cachePut(obj) - bufPool.Put(buf) - - return crc, err -} - -func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) (uint32, error) { - buf := bytes.NewBuffer(nil) - _, crc, err := d.s.NextObject(buf) - if err != nil { - return 0, err - } - - h, ok := d.offsetToHash[offset] - var base plumbing.EncodedObject - if ok { - base, ok = d.cacheGet(h) - } - - if !ok { - base, err = d.recallByOffset(offset) - if err != nil { - return 0, err - } - - d.cachePut(base) - } - - obj.SetType(base.Type()) - err = ApplyDelta(obj, base, buf.Bytes()) - d.cachePut(obj) - - return crc, err -} - -func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { - if d.deltaBaseCache == nil { - return nil, false - } - - return d.deltaBaseCache.Get(h) -} - -func (d *Decoder) cachePut(obj plumbing.EncodedObject) { - if d.deltaBaseCache == nil { - return - } - - d.deltaBaseCache.Put(obj) -} - -func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { - if d.s.IsSeekable { - return d.DecodeObjectAt(o) - } - - hash, ok := d.offsetToHash[o] - if !ok { - return nil, plumbing.ErrObjectNotFound - } - - return d.recallByHashNonSeekable(hash) -} - -func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { - if d.s.IsSeekable { - if offset, err := d.idx.FindOffset(h); err == nil { - return d.DecodeObjectAt(offset) - } - } - - return d.recallByHashNonSeekable(h) -} - -// recallByHashNonSeekable if we are in a transaction the objects are read from -// the transaction, if not are directly read from the ObjectStorer -func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.EncodedObject, err error) { - if d.tx != nil { - obj, err = d.tx.EncodedObject(plumbing.AnyObject, h) - } else if d.o != nil { - obj, err = d.o.EncodedObject(plumbing.AnyObject, h) - } else { - return nil, plumbing.ErrObjectNotFound - } - - if err != plumbing.ErrObjectNotFound { - return obj, err - } - - return nil, plumbing.ErrObjectNotFound -} - -// SetIndex sets an index for the packfile. It is recommended to set this. -// The index might be read from a file or reused from a previous Decoder usage -// (see Index function). -func (d *Decoder) SetIndex(idx idxfile.Index) { - d.hasBuiltIndex = true - d.idx = idx -} - -// Index returns the index for the packfile. If index was set with SetIndex, -// Index will return it. Otherwise, it will return an index that is built while -// decoding. If neither SetIndex was called with a full index or Decode called -// for the whole packfile, then the returned index will be incomplete. -func (d *Decoder) Index() idxfile.Index { - return d.idx -} - -// Close closes the Scanner. usually this mean that the whole reader is read and -// discarded -func (d *Decoder) Close() error { - return d.s.Close() -} diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go deleted file mode 100644 index d4f7145..0000000 --- a/plumbing/format/packfile/decoder_test.go +++ /dev/null @@ -1,430 +0,0 @@ -package packfile_test - -import ( - "io" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/cache" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" - "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" - "gopkg.in/src-d/go-git.v4/plumbing/storer" - "gopkg.in/src-d/go-git.v4/storage/filesystem" - "gopkg.in/src-d/go-git.v4/storage/memory" - - . "gopkg.in/check.v1" - "gopkg.in/src-d/go-billy.v4/memfs" - "gopkg.in/src-d/go-git-fixtures.v3" -) - -type ReaderSuite struct { - fixtures.Suite -} - -var _ = Suite(&ReaderSuite{}) - -func (s *ReaderSuite) TestNewDecodeNonSeekable(c *C) { - scanner := packfile.NewScanner(nil) - d, err := packfile.NewDecoder(scanner, nil) - - c.Assert(d, IsNil) - c.Assert(err, NotNil) -} - -func (s *ReaderSuite) TestDecode(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - assertObjects(c, storage, expectedHashes) - }) -} - -func (s *ReaderSuite) TestDecodeByTypeRefDelta(c *C) { - f := fixtures.Basic().ByTag("ref-delta").One() - - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, plumbing.CommitObject, - cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - // Index required to decode by ref-delta. - d.SetIndex(getIndexFromIdxFile(f.Idx())) - - defer d.Close() - - _, count, err := scanner.Header() - c.Assert(err, IsNil) - - var i uint32 - for i = 0; i < count; i++ { - obj, err := d.DecodeObject() - c.Assert(err, IsNil) - - if obj != nil { - c.Assert(obj.Type(), Equals, plumbing.CommitObject) - } - } -} - -func (s *ReaderSuite) TestDecodeByTypeRefDeltaError(c *C) { - fixtures.Basic().ByTag("ref-delta").Test(c, func(f *fixtures.Fixture) { - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, - plumbing.CommitObject, cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - defer d.Close() - - _, count, err := scanner.Header() - c.Assert(err, IsNil) - - isError := false - var i uint32 - for i = 0; i < count; i++ { - _, err := d.DecodeObject() - if err != nil { - isError = true - break - } - } - c.Assert(isError, Equals, true) - }) - -} - -func (s *ReaderSuite) TestDecodeByType(c *C) { - ts := []plumbing.ObjectType{ - plumbing.CommitObject, - plumbing.TagObject, - plumbing.TreeObject, - plumbing.BlobObject, - } - - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - for _, t := range ts { - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, t, - cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - // when the packfile is ref-delta based, the offsets are required - if f.Is("ref-delta") { - d.SetIndex(getIndexFromIdxFile(f.Idx())) - } - - defer d.Close() - - _, count, err := scanner.Header() - c.Assert(err, IsNil) - - var i uint32 - for i = 0; i < count; i++ { - obj, err := d.DecodeObject() - c.Assert(err, IsNil) - - if obj != nil { - c.Assert(obj.Type(), Equals, t) - } - } - } - }) -} - -func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) { - f := fixtures.Basic().ByTag("packfile").One() - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - - _, err := packfile.NewDecoderForType(scanner, storage, - plumbing.OFSDeltaObject, cache.NewObjectLRUDefault()) - c.Assert(err, Equals, plumbing.ErrInvalidType) - - _, err = packfile.NewDecoderForType(scanner, storage, - plumbing.REFDeltaObject, cache.NewObjectLRUDefault()) - - c.Assert(err, Equals, plumbing.ErrInvalidType) - - _, err = packfile.NewDecoderForType(scanner, storage, plumbing.InvalidObject, - cache.NewObjectLRUDefault()) - c.Assert(err, Equals, plumbing.ErrInvalidType) -} - -func (s *ReaderSuite) TestDecodeMultipleTimes(c *C) { - f := fixtures.Basic().ByTag("packfile").One() - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - ch, err = d.Decode() - c.Assert(err, Equals, packfile.ErrAlreadyDecoded) - c.Assert(ch, Equals, plumbing.ZeroHash) -} - -func (s *ReaderSuite) TestDecodeInMemory(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, memory.NewStorage()) - c.Assert(err, IsNil) - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - }) -} - -type nonSeekableReader struct { - r io.Reader -} - -func (nsr nonSeekableReader) Read(b []byte) (int, error) { - return nsr.r.Read(b) -} - -func (s *ReaderSuite) TestDecodeNoSeekableWithTxStorer(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - reader := nonSeekableReader{ - r: f.Packfile(), - } - - scanner := packfile.NewScanner(reader) - - var storage storer.EncodedObjectStorer = memory.NewStorage() - _, isTxStorer := storage.(storer.Transactioner) - c.Assert(isTxStorer, Equals, true) - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - assertObjects(c, storage, expectedHashes) - }) -} - -func (s *ReaderSuite) TestDecodeNoSeekableWithoutTxStorer(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - reader := nonSeekableReader{ - r: f.Packfile(), - } - - scanner := packfile.NewScanner(reader) - - var storage storer.EncodedObjectStorer - storage, _ = filesystem.NewStorage(memfs.New()) - _, isTxStorer := storage.(storer.Transactioner) - c.Assert(isTxStorer, Equals, false) - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - assertObjects(c, storage, expectedHashes) - }) -} - -var expectedHashes = []string{ - "918c48b83bd081e863dbe1b80f8998f058cd8294", - "af2d6a6954d532f8ffb47615169c8fdf9d383a1a", - "1669dce138d9b841a518c64b10914d88f5e488ea", - "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", - "b8e471f58bcbca63b07bda20e428190409c2db47", - "35e85108805c84807bc66a02d91535e1e24b38b9", - "b029517f6300c2da0f4b651b8642506cd6aaf45d", - "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", - "d3ff53e0564a9f87d8e84b6e28e5060e517008aa", - "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", - "d5c0f4ab811897cadf03aec358ae60d21f91c50d", - "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", - "cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", - "9dea2395f5403188298c1dabe8bdafe562c491e3", - "586af567d0bb5e771e49bdd9434f5e0fb76d25fa", - "9a48f23120e880dfbe41f7c9b7b708e9ee62a492", - "5a877e6a906a2743ad6e45d99c1793642aaf8eda", - "c8f1d8c61f9da76f4cb49fd86322b6e685dba956", - "a8d315b2b1c615d43042c3a62402b8a54288cf5c", - "a39771a7651f97faf5c72e08224d857fc35133db", - "880cd14280f4b9b6ed3986d6671f907d7cc2a198", - "fb72698cab7617ac416264415f13224dfd7a165e", - "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", - "eba74343e2f15d62adedfd8c883ee0262b5c8021", - "c2d30fa8ef288618f65f6eed6e168e0d514886f4", - "8dcef98b1d52143e1e2dbc458ffe38f925786bf2", - "aa9b383c260e1d05fbbf6b30a02914555e20c725", - "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", - "dbd3641b371024f44d0e469a9c8f5457b0660de1", - "e8d3ffab552895c19b9fcf7aa264d277cde33881", - "7e59600739c96546163833214c36459e324bad0a", -} - -func (s *ReaderSuite) TestDecodeCRCs(c *C) { - f := fixtures.Basic().ByTag("ofs-delta").One() - - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - _, err = d.Decode() - c.Assert(err, IsNil) - - var sum uint64 - iter, err := d.Index().Entries() - c.Assert(err, IsNil) - - for { - e, err := iter.Next() - if err == io.EOF { - break - } - - c.Assert(err, IsNil) - sum += uint64(e.CRC32) - } - - c.Assert(int(sum), Equals, 78022211966) -} - -func (s *ReaderSuite) TestDecodeObjectAt(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - // when the packfile is ref-delta based, the offsets are required - if f.Is("ref-delta") { - d.SetIndex(getIndexFromIdxFile(f.Idx())) - } - - // the objects at reference 186, is a delta, so should be recall, - // without being read before. - obj, err := d.DecodeObjectAt(186) - c.Assert(err, IsNil) - c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5") -} - -func (s *ReaderSuite) TestDecodeObjectAtForType(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, nil, plumbing.TreeObject, - cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - // when the packfile is ref-delta based, the offsets are required - if f.Is("ref-delta") { - d.SetIndex(getIndexFromIdxFile(f.Idx())) - } - - // the objects at reference 186, is a delta, so should be recall, - // without being read before. - obj, err := d.DecodeObjectAt(186) - c.Assert(err, IsNil) - c.Assert(obj.Type(), Equals, plumbing.CommitObject) - c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5") -} - -func (s *ReaderSuite) TestIndex(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - c.Assert(indexEntries(c, d), Equals, 0) - - _, err = d.Decode() - c.Assert(err, IsNil) - - c.Assert(indexEntries(c, d), Equals, 31) -} - -func indexEntries(c *C, d *packfile.Decoder) int { - var count int - entries, err := d.Index().Entries() - c.Assert(err, IsNil) - - for { - _, err := entries.Next() - if err == io.EOF { - break - } - - c.Assert(err, IsNil) - count++ - } - - return count -} - -func (s *ReaderSuite) TestSetIndex(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - w := new(idxfile.Writer) - h := plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5") - w.Add(h, uint64(42), 0) - w.OnFooter(plumbing.ZeroHash) - - var idx idxfile.Index - idx, err = w.Index() - c.Assert(err, IsNil) - d.SetIndex(idx) - - idx = d.Index() - c.Assert(indexEntries(c, d), Equals, 1) - - offset, err := idx.FindOffset(h) - c.Assert(err, IsNil) - c.Assert(offset, Equals, int64(42)) -} - -func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { - i, err := s.IterEncodedObjects(plumbing.AnyObject) - c.Assert(err, IsNil) - - var count int - err = i.ForEach(func(plumbing.EncodedObject) error { count++; return nil }) - c.Assert(err, IsNil) - c.Assert(count, Equals, len(expects)) - - for _, exp := range expects { - obt, err := s.EncodedObject(plumbing.AnyObject, plumbing.NewHash(exp)) - c.Assert(err, IsNil) - c.Assert(obt.Hash().String(), Equals, exp) - } -} - -func getIndexFromIdxFile(r io.Reader) idxfile.Index { - idxf := idxfile.NewMemoryIndex() - d := idxfile.NewDecoder(r) - if err := d.Decode(idxf); err != nil { - panic(err) - } - - return idxf -} diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index 8cc7180..6ffebc2 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -2,14 +2,16 @@ package packfile_test import ( "bytes" + "io" "math/rand" "testing" + "gopkg.in/src-d/go-billy.v3/memfs" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" - "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git-fixtures.v3" @@ -34,7 +36,6 @@ func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) { c.Assert(err, IsNil) s.testEncodeDecode(c, storage, 10) }) - } func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) { @@ -52,8 +53,11 @@ func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) { }) } -func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, packWindow uint) { - +func (s *EncoderAdvancedSuite) testEncodeDecode( + c *C, + storage storer.Storer, + packWindow uint, +) { objIter, err := storage.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) @@ -80,16 +84,31 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, pac encodeHash, err := enc.Encode(hashes, packWindow) c.Assert(err, IsNil) - scanner := NewScanner(buf) - storage = memory.NewStorage() - d, err := NewDecoder(scanner, storage) + f, err := memfs.New().Create("packfile") + c.Assert(err, IsNil) + + _, err = f.Write(buf.Bytes()) + c.Assert(err, IsNil) + + _, err = f.Seek(0, io.SeekStart) c.Assert(err, IsNil) - decodeHash, err := d.Decode() + + w := new(idxfile.Writer) + _, err = NewParser(NewScanner(f), w).Parse() + c.Assert(err, IsNil) + index, err := w.Index() + c.Assert(err, IsNil) + + _, err = f.Seek(0, io.SeekStart) c.Assert(err, IsNil) + p := NewPackfile(index, f) + + decodeHash, err := p.ID() + c.Assert(err, IsNil) c.Assert(encodeHash, Equals, decodeHash) - objIter, err = storage.IterEncodedObjects(plumbing.AnyObject) + objIter, err = p.GetAll() c.Assert(err, IsNil) obtainedObjects := map[plumbing.Hash]bool{} err = objIter.ForEach(func(o plumbing.EncodedObject) error { diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index 84d03fb..7b6dde2 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -2,8 +2,12 @@ package packfile import ( "bytes" + "io" + stdioutil "io/ioutil" + "gopkg.in/src-d/go-billy.v3/memfs" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" @@ -130,24 +134,20 @@ func (s *EncoderSuite) simpleDeltaTest(c *C) { }) c.Assert(err, IsNil) - scanner := NewScanner(s.buf) - - storage := memory.NewStorage() - d, err := NewDecoder(scanner, storage) - c.Assert(err, IsNil) - - decHash, err := d.Decode() + p, cleanup := packfileFromReader(c, s.buf) + defer cleanup() + decHash, err := p.ID() c.Assert(err, IsNil) c.Assert(encHash, Equals, decHash) - decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash()) + decSrc, err := p.Get(srcObject.Hash()) c.Assert(err, IsNil) - c.Assert(decSrc, DeepEquals, srcObject) + objectsEqual(c, decSrc, srcObject) - decTarget, err := storage.EncodedObject(targetObject.Type(), targetObject.Hash()) + decTarget, err := p.Get(targetObject.Hash()) c.Assert(err, IsNil) - c.Assert(decTarget, DeepEquals, targetObject) + objectsEqual(c, decTarget, targetObject) } func (s *EncoderSuite) deltaOverDeltaTest(c *C) { @@ -173,27 +173,24 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) { }) c.Assert(err, IsNil) - scanner := NewScanner(s.buf) - storage := memory.NewStorage() - d, err := NewDecoder(scanner, storage) - c.Assert(err, IsNil) - - decHash, err := d.Decode() + p, cleanup := packfileFromReader(c, s.buf) + defer cleanup() + decHash, err := p.ID() c.Assert(err, IsNil) c.Assert(encHash, Equals, decHash) - decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash()) + decSrc, err := p.Get(srcObject.Hash()) c.Assert(err, IsNil) - c.Assert(decSrc, DeepEquals, srcObject) + objectsEqual(c, decSrc, srcObject) - decTarget, err := storage.EncodedObject(targetObject.Type(), targetObject.Hash()) + decTarget, err := p.Get(targetObject.Hash()) c.Assert(err, IsNil) - c.Assert(decTarget, DeepEquals, targetObject) + objectsEqual(c, decTarget, targetObject) - decOtherTarget, err := storage.EncodedObject(otherTargetObject.Type(), otherTargetObject.Hash()) + decOtherTarget, err := p.Get(otherTargetObject.Hash()) c.Assert(err, IsNil) - c.Assert(decOtherTarget, DeepEquals, otherTargetObject) + objectsEqual(c, decOtherTarget, otherTargetObject) } func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) { @@ -248,29 +245,70 @@ func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) { }) c.Assert(err, IsNil) - scanner := NewScanner(s.buf) - storage := memory.NewStorage() - d, err := NewDecoder(scanner, storage) + p, cleanup := packfileFromReader(c, s.buf) + defer cleanup() + decHash, err := p.ID() c.Assert(err, IsNil) - decHash, err := d.Decode() + c.Assert(encHash, Equals, decHash) + + decSrc, err := p.Get(o1.Hash()) c.Assert(err, IsNil) + objectsEqual(c, decSrc, o1) - c.Assert(encHash, Equals, decHash) + decTarget, err := p.Get(o2.Hash()) + c.Assert(err, IsNil) + objectsEqual(c, decTarget, o2) + + decOtherTarget, err := p.Get(o3.Hash()) + c.Assert(err, IsNil) + objectsEqual(c, decOtherTarget, o3) + + decAnotherTarget, err := p.Get(o4.Hash()) + c.Assert(err, IsNil) + objectsEqual(c, decAnotherTarget, o4) +} + +func objectsEqual(c *C, o1, o2 plumbing.EncodedObject) { + c.Assert(o1.Type(), Equals, o2.Type()) + c.Assert(o1.Hash(), Equals, o2.Hash()) + c.Assert(o1.Size(), Equals, o2.Size()) - decSrc, err := storage.EncodedObject(o1.Type(), o1.Hash()) + r1, err := o1.Reader() c.Assert(err, IsNil) - c.Assert(decSrc, DeepEquals, o1) - decTarget, err := storage.EncodedObject(o2.Type(), o2.Hash()) + b1, err := stdioutil.ReadAll(r1) c.Assert(err, IsNil) - c.Assert(decTarget, DeepEquals, o2) - decOtherTarget, err := storage.EncodedObject(o3.Type(), o3.Hash()) + r2, err := o2.Reader() c.Assert(err, IsNil) - c.Assert(decOtherTarget, DeepEquals, o3) - decAnotherTarget, err := storage.EncodedObject(o4.Type(), o4.Hash()) + b2, err := stdioutil.ReadAll(r2) c.Assert(err, IsNil) - c.Assert(decAnotherTarget, DeepEquals, o4) + + c.Assert(bytes.Compare(b1, b2), Equals, 0) +} + +func packfileFromReader(c *C, buf *bytes.Buffer) (*Packfile, func()) { + file, err := memfs.New().Create("packfile") + c.Assert(err, IsNil) + + _, err = file.Write(buf.Bytes()) + c.Assert(err, IsNil) + + _, err = file.Seek(0, io.SeekStart) + c.Assert(err, IsNil) + + scanner := NewScanner(file) + + w := new(idxfile.Writer) + _, err = NewParser(scanner, w).Parse() + c.Assert(err, IsNil) + + index, err := w.Index() + c.Assert(err, IsNil) + + return NewPackfile(index, file), func() { + c.Assert(file.Close(), IsNil) + } } diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 2e831f2..37743ba 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -3,38 +3,55 @@ package packfile import ( "bytes" "io" + stdioutil "io/ioutil" "os" - billy "gopkg.in/src-d/go-billy.v4" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) +var ( + // ErrInvalidObject is returned by Decode when an invalid object is + // found in the packfile. + ErrInvalidObject = NewError("invalid git object") + // ErrZLib is returned by Decode when there was an error unzipping + // the packfile contents. + ErrZLib = NewError("zlib reading error") +) + // Packfile allows retrieving information from inside a packfile. type Packfile struct { idxfile.Index - billy.File + file io.ReadSeeker s *Scanner deltaBaseCache cache.Object offsetToType map[int64]plumbing.ObjectType } -// NewPackfile returns a packfile representation for the given packfile file -// and packfile idx. -func NewPackfile(index idxfile.Index, file billy.File) *Packfile { +// NewPackfileWithCache creates a new Packfile with the given object cache. +func NewPackfileWithCache( + index idxfile.Index, + file io.ReadSeeker, + cache cache.Object, +) *Packfile { s := NewScanner(file) - return &Packfile{ index, file, s, - cache.NewObjectLRUDefault(), + cache, make(map[int64]plumbing.ObjectType), } } +// NewPackfile returns a packfile representation for the given packfile file +// and packfile idx. +func NewPackfile(index idxfile.Index, file io.ReadSeeker) *Packfile { + return NewPackfileWithCache(index, file, cache.NewObjectLRUDefault()) +} + // Get retrieves the encoded object in the packfile with the given hash. func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { offset, err := p.FindOffset(h) @@ -334,35 +351,49 @@ func (p *Packfile) cachePut(obj plumbing.EncodedObject) { // The iterator returned is not thread-safe, it should be used in the same // thread as the Packfile instance. func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) { - entries, err := p.Entries() - if err != nil { - return nil, err - } + return p.GetByType(plumbing.AnyObject) +} - return &objectIter{ - // Easiest way to provide an object decoder is just to pass a Packfile - // instance. To not mess with the seeks, it's a new instance with a - // different scanner but the same cache and offset to hash map for - // reusing as much cache as possible. - p: &Packfile{ - p.Index, - p.File, - NewScanner(p.File), - p.deltaBaseCache, - p.offsetToType, - }, - iter: entries, - }, nil +// GetByType returns all the objects of the given type. +func (p *Packfile) GetByType(typ plumbing.ObjectType) (storer.EncodedObjectIter, error) { + switch typ { + case plumbing.AnyObject, + plumbing.BlobObject, + plumbing.TreeObject, + plumbing.CommitObject, + plumbing.TagObject: + entries, err := p.Entries() + if err != nil { + return nil, err + } + + return &objectIter{ + // Easiest way to provide an object decoder is just to pass a Packfile + // instance. To not mess with the seeks, it's a new instance with a + // different scanner but the same cache and offset to hash map for + // reusing as much cache as possible. + p: p, + iter: entries, + typ: typ, + }, nil + default: + return nil, plumbing.ErrInvalidType + } } // ID returns the ID of the packfile, which is the checksum at the end of it. func (p *Packfile) ID() (plumbing.Hash, error) { - if _, err := p.File.Seek(-20, io.SeekEnd); err != nil { + prev, err := p.file.Seek(-20, io.SeekEnd) + if err != nil { return plumbing.ZeroHash, err } var hash plumbing.Hash - if _, err := io.ReadFull(p.File, hash[:]); err != nil { + if _, err := io.ReadFull(p.file, hash[:]); err != nil { + return plumbing.ZeroHash, err + } + + if _, err := p.file.Seek(prev, io.SeekStart); err != nil { return plumbing.ZeroHash, err } @@ -371,25 +402,59 @@ func (p *Packfile) ID() (plumbing.Hash, error) { // Close the packfile and its resources. func (p *Packfile) Close() error { - return p.File.Close() + closer, ok := p.file.(io.Closer) + if !ok { + return nil + } + + return closer.Close() } -type objectDecoder interface { - nextObject() (plumbing.EncodedObject, error) +// MemoryObjectFromDisk converts a DiskObject to a MemoryObject. +func MemoryObjectFromDisk(obj plumbing.EncodedObject) (plumbing.EncodedObject, error) { + o2 := new(plumbing.MemoryObject) + o2.SetType(obj.Type()) + o2.SetSize(obj.Size()) + + r, err := obj.Reader() + if err != nil { + return nil, err + } + + data, err := stdioutil.ReadAll(r) + if err != nil { + return nil, err + } + + if _, err := o2.Write(data); err != nil { + return nil, err + } + + return o2, nil } type objectIter struct { p *Packfile + typ plumbing.ObjectType iter idxfile.EntryIter } func (i *objectIter) Next() (plumbing.EncodedObject, error) { - e, err := i.iter.Next() - if err != nil { - return nil, err - } + for { + e, err := i.iter.Next() + if err != nil { + return nil, err + } - return i.p.GetByOffset(int64(e.Offset)) + obj, err := i.p.GetByOffset(int64(e.Offset)) + if err != nil { + return nil, err + } + + if i.typ == plumbing.AnyObject || obj.Type() == i.typ { + return obj, nil + } + } } func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error { diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go index e234794..3193bed 100644 --- a/plumbing/format/packfile/packfile_test.go +++ b/plumbing/format/packfile/packfile_test.go @@ -1,23 +1,21 @@ -package packfile +package packfile_test import ( - "bytes" "io" "math" - "io/ioutil" - . "gopkg.in/check.v1" "gopkg.in/src-d/go-billy.v4/osfs" fixtures "gopkg.in/src-d/go-git-fixtures.v3" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" - "gopkg.in/src-d/go-git.v4/storage/memory" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + "gopkg.in/src-d/go-git.v4/plumbing/storer" ) type PackfileSuite struct { fixtures.Suite - p *Packfile + p *packfile.Packfile idx *idxfile.MemoryIndex f *fixtures.Fixture } @@ -108,60 +106,157 @@ var expectedEntries = map[plumbing.Hash]int64{ plumbing.NewHash("fb72698cab7617ac416264415f13224dfd7a165e"): 84671, } -func (s *PackfileSuite) TestContent(c *C) { - storer := memory.NewStorage() - decoder, err := NewDecoder(NewScanner(s.f.Packfile()), storer) - c.Assert(err, IsNil) +func (s *PackfileSuite) SetUpTest(c *C) { + s.f = fixtures.Basic().One() - _, err = decoder.Decode() + f, err := osfs.New("").Open(s.f.Packfile().Name()) c.Assert(err, IsNil) - iter, err := s.p.GetAll() + s.idx = idxfile.NewMemoryIndex() + c.Assert(idxfile.NewDecoder(s.f.Idx()).Decode(s.idx), IsNil) + + s.p = packfile.NewPackfile(s.idx, f) +} + +func (s *PackfileSuite) TearDownTest(c *C) { + c.Assert(s.p.Close(), IsNil) +} + +func (s *PackfileSuite) TestDecode(c *C) { + fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { + index := getIndexFromIdxFile(f.Idx()) + p := packfile.NewPackfile(index, f.Packfile()) + defer p.Close() + + for _, h := range expectedHashes { + obj, err := p.Get(plumbing.NewHash(h)) + c.Assert(err, IsNil) + c.Assert(obj.Hash().String(), Equals, h) + } + }) +} + +func (s *PackfileSuite) TestDecodeByTypeRefDelta(c *C) { + f := fixtures.Basic().ByTag("ref-delta").One() + + index := getIndexFromIdxFile(f.Idx()) + packfile := packfile.NewPackfile(index, f.Packfile()) + defer packfile.Close() + + iter, err := packfile.GetByType(plumbing.CommitObject) c.Assert(err, IsNil) + var count int for { - o, err := iter.Next() + obj, err := iter.Next() if err == io.EOF { break } + count++ c.Assert(err, IsNil) + c.Assert(obj.Type(), Equals, plumbing.CommitObject) + } - o2, err := storer.EncodedObject(plumbing.AnyObject, o.Hash()) - c.Assert(err, IsNil) + c.Assert(count > 0, Equals, true) +} - c.Assert(o.Type(), Equals, o2.Type()) - c.Assert(o.Size(), Equals, o2.Size()) +func (s *PackfileSuite) TestDecodeByType(c *C) { + ts := []plumbing.ObjectType{ + plumbing.CommitObject, + plumbing.TagObject, + plumbing.TreeObject, + plumbing.BlobObject, + } - r, err := o.Reader() - c.Assert(err, IsNil) + fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { + for _, t := range ts { + index := getIndexFromIdxFile(f.Idx()) + packfile := packfile.NewPackfile(index, f.Packfile()) + defer packfile.Close() - c1, err := ioutil.ReadAll(r) - c.Assert(err, IsNil) - c.Assert(r.Close(), IsNil) + iter, err := packfile.GetByType(t) + c.Assert(err, IsNil) - r, err = o2.Reader() - c.Assert(err, IsNil) + c.Assert(iter.ForEach(func(obj plumbing.EncodedObject) error { + c.Assert(obj.Type(), Equals, t) + return nil + }), IsNil) + } + }) +} - c2, err := ioutil.ReadAll(r) - c.Assert(err, IsNil) - c.Assert(r.Close(), IsNil) +func (s *PackfileSuite) TestDecodeByTypeConstructor(c *C) { + f := fixtures.Basic().ByTag("packfile").One() + index := getIndexFromIdxFile(f.Idx()) + packfile := packfile.NewPackfile(index, f.Packfile()) + defer packfile.Close() - c.Assert(bytes.Compare(c1, c2), Equals, 0) - } + _, err := packfile.GetByType(plumbing.OFSDeltaObject) + c.Assert(err, Equals, plumbing.ErrInvalidType) + + _, err = packfile.GetByType(plumbing.REFDeltaObject) + c.Assert(err, Equals, plumbing.ErrInvalidType) + + _, err = packfile.GetByType(plumbing.InvalidObject) + c.Assert(err, Equals, plumbing.ErrInvalidType) } -func (s *PackfileSuite) SetUpTest(c *C) { - s.f = fixtures.Basic().One() +var expectedHashes = []string{ + "918c48b83bd081e863dbe1b80f8998f058cd8294", + "af2d6a6954d532f8ffb47615169c8fdf9d383a1a", + "1669dce138d9b841a518c64b10914d88f5e488ea", + "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", + "b8e471f58bcbca63b07bda20e428190409c2db47", + "35e85108805c84807bc66a02d91535e1e24b38b9", + "b029517f6300c2da0f4b651b8642506cd6aaf45d", + "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", + "d3ff53e0564a9f87d8e84b6e28e5060e517008aa", + "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", + "d5c0f4ab811897cadf03aec358ae60d21f91c50d", + "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", + "cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", + "9dea2395f5403188298c1dabe8bdafe562c491e3", + "586af567d0bb5e771e49bdd9434f5e0fb76d25fa", + "9a48f23120e880dfbe41f7c9b7b708e9ee62a492", + "5a877e6a906a2743ad6e45d99c1793642aaf8eda", + "c8f1d8c61f9da76f4cb49fd86322b6e685dba956", + "a8d315b2b1c615d43042c3a62402b8a54288cf5c", + "a39771a7651f97faf5c72e08224d857fc35133db", + "880cd14280f4b9b6ed3986d6671f907d7cc2a198", + "fb72698cab7617ac416264415f13224dfd7a165e", + "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", + "eba74343e2f15d62adedfd8c883ee0262b5c8021", + "c2d30fa8ef288618f65f6eed6e168e0d514886f4", + "8dcef98b1d52143e1e2dbc458ffe38f925786bf2", + "aa9b383c260e1d05fbbf6b30a02914555e20c725", + "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", + "dbd3641b371024f44d0e469a9c8f5457b0660de1", + "e8d3ffab552895c19b9fcf7aa264d277cde33881", + "7e59600739c96546163833214c36459e324bad0a", +} - f, err := osfs.New("").Open(s.f.Packfile().Name()) +func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { + i, err := s.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) - s.idx = idxfile.NewMemoryIndex() - c.Assert(idxfile.NewDecoder(s.f.Idx()).Decode(s.idx), IsNil) + var count int + err = i.ForEach(func(plumbing.EncodedObject) error { count++; return nil }) + c.Assert(err, IsNil) + c.Assert(count, Equals, len(expects)) - s.p = NewPackfile(s.idx, f) + for _, exp := range expects { + obt, err := s.EncodedObject(plumbing.AnyObject, plumbing.NewHash(exp)) + c.Assert(err, IsNil) + c.Assert(obt.Hash().String(), Equals, exp) + } } -func (s *PackfileSuite) TearDownTest(c *C) { - c.Assert(s.p.Close(), IsNil) +func getIndexFromIdxFile(r io.Reader) idxfile.Index { + idxf := idxfile.NewMemoryIndex() + d := idxfile.NewDecoder(r) + if err := d.Decode(idxf); err != nil { + panic(err) + } + + return idxf } diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 696f5ba..f0a7674 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -9,6 +9,16 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing/cache" ) +var ( + // ErrObjectContentAlreadyRead is returned when the content of the object + // was already read, since the content can only be read once. + ErrObjectContentAlreadyRead = errors.New("object content was already read") + + // ErrReferenceDeltaNotFound is returned when the reference delta is not + // found. + ErrReferenceDeltaNotFound = errors.New("reference delta not found") +) + // Observer interface is implemented by index encoders. type Observer interface { // OnHeader is called when a new packfile is opened. @@ -16,7 +26,7 @@ type Observer interface { // OnInflatedObjectHeader is called for each object header read. OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error // OnInflatedObjectContent is called for each decoded object. - OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error + OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, content []byte) error // OnFooter is called when decoding is done. OnFooter(h plumbing.Hash) error } @@ -32,41 +42,44 @@ type Parser struct { hashOffset map[plumbing.Hash]int64 checksum plumbing.Hash - cache *cache.ObjectLRU + cache *cache.ObjectLRU + contentCache map[int64][]byte ob []Observer } // NewParser creates a new Parser struct. func NewParser(scanner *Scanner, ob ...Observer) *Parser { + var contentCache map[int64][]byte + if !scanner.IsSeekable { + contentCache = make(map[int64][]byte) + } + return &Parser{ - scanner: scanner, - ob: ob, - count: 0, - cache: cache.NewObjectLRUDefault(), + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewObjectLRUDefault(), + contentCache: contentCache, } } // Parse start decoding phase of the packfile. func (p *Parser) Parse() (plumbing.Hash, error) { - err := p.init() - if err != nil { + if err := p.init(); err != nil { return plumbing.ZeroHash, err } - err = p.firstPass() - if err != nil { + if err := p.firstPass(); err != nil { return plumbing.ZeroHash, err } - err = p.resolveDeltas() - if err != nil { + if err := p.resolveDeltas(); err != nil { return plumbing.ZeroHash, err } for _, o := range p.ob { - err := o.OnFooter(p.checksum) - if err != nil { + if err := o.OnFooter(p.checksum); err != nil { return plumbing.ZeroHash, err } } @@ -81,8 +94,7 @@ func (p *Parser) init() error { } for _, o := range p.ob { - err := o.OnHeader(c) - if err != nil { + if err := o.OnHeader(c); err != nil { return err } } @@ -99,7 +111,7 @@ func (p *Parser) firstPass() error { buf := new(bytes.Buffer) for i := uint32(0); i < p.count; i++ { - buf.Truncate(0) + buf.Reset() oh, err := p.scanner.NextObjectHeader() if err != nil { @@ -122,8 +134,7 @@ func (p *Parser) firstPass() error { } if !ok { - // TODO improve error - return errors.New("Reference delta not found") + return ErrReferenceDeltaNotFound } ota = newDeltaObject(oh.Offset, oh.Length, t, parent) @@ -143,35 +154,41 @@ func (p *Parser) firstPass() error { ota.Length = oh.Length if !delta { - ota.Write(buf.Bytes()) + if _, err := ota.Write(buf.Bytes()); err != nil { + return err + } ota.SHA1 = ota.Sum() + p.oiByHash[ota.SHA1] = ota } p.oiByOffset[oh.Offset] = ota - p.oiByHash[oh.Reference] = ota p.oi[i] = ota } - checksum, err := p.scanner.Checksum() - p.checksum = checksum - - if err == io.EOF { - return nil + var err error + p.checksum, err = p.scanner.Checksum() + if err != nil && err != io.EOF { + return err } - return err + return nil } func (p *Parser) resolveDeltas() error { for _, obj := range p.oi { + content, err := obj.Content() + if err != nil { + return err + } + for _, o := range p.ob { err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset) if err != nil { return err } - err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32) + err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content) if err != nil { return err } @@ -185,8 +202,7 @@ func (p *Parser) resolveDeltas() error { } for _, child := range obj.Children { - _, err = p.resolveObject(child, base) - if err != nil { + if _, err := p.resolveObject(child, base); err != nil { return err } } @@ -205,8 +221,7 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { } buf := make([]byte, e.Size()) - _, err = r.Read(buf) - if err != nil { + if _, err = r.Read(buf); err != nil { return nil, err } @@ -254,8 +269,8 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { func (p *Parser) resolveObject( o *objectInfo, - base []byte) ([]byte, error) { - + base []byte, +) ([]byte, error) { if !o.DiskType.IsDelta() { return nil, nil } @@ -278,16 +293,17 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) { // TODO: skip header. Header size can be calculated with the offset of the // next offset in the first pass. - p.scanner.SeekFromStart(o.Offset) - _, err := p.scanner.NextObjectHeader() - if err != nil { + if _, err := p.scanner.SeekFromStart(o.Offset); err != nil { return nil, err } - buf.Truncate(0) + if _, err := p.scanner.NextObjectHeader(); err != nil { + return nil, err + } - _, _, err = p.scanner.NextObject(buf) - if err != nil { + buf.Reset() + + if _, _, err := p.scanner.NextObject(buf); err != nil { return nil, err } @@ -301,9 +317,11 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { } ota.Type = ota.Parent.Type - hash := plumbing.ComputeHash(ota.Type, patched) - - ota.SHA1 = hash + ota.Hasher = plumbing.NewHasher(ota.Type, int64(len(patched))) + if _, err := ota.Write(patched); err != nil { + return nil, err + } + ota.SHA1 = ota.Sum() return patched, nil } @@ -323,6 +341,8 @@ type objectInfo struct { Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash + + content *bytes.Buffer } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -351,6 +371,30 @@ func newDeltaObject( return obj } +func (o *objectInfo) Write(bs []byte) (int, error) { + n, err := o.Hasher.Write(bs) + if err != nil { + return 0, err + } + + o.content = bytes.NewBuffer(nil) + + _, _ = o.content.Write(bs) + return n, nil +} + +// Content returns the content of the object. This operation can only be done +// once. +func (o *objectInfo) Content() ([]byte, error) { + if o.content == nil { + return nil, ErrObjectContentAlreadyRead + } + + r := o.content + o.content = nil + return r.Bytes(), nil +} + func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go index 87a8804..b18f20f 100644 --- a/plumbing/format/packfile/parser_test.go +++ b/plumbing/format/packfile/parser_test.go @@ -103,7 +103,7 @@ func (t *testObserver) OnInflatedObjectHeader(otype plumbing.ObjectType, objSize return nil } -func (t *testObserver) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32) error { +func (t *testObserver) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, _ []byte) error { o := t.get(pos) o.hash = h.String() o.crc = crc -- cgit From 5889a3b669f0f515ff445aa040afc1e7eeb2bbd1 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Wed, 8 Aug 2018 16:56:20 +0200 Subject: plumbing: packfile, allow non-seekable sources on Parser Signed-off-by: Miguel Molina --- plumbing/format/packfile/common.go | 63 +---- plumbing/format/packfile/encoder_advanced_test.go | 5 +- plumbing/format/packfile/encoder_test.go | 5 +- plumbing/format/packfile/parser.go | 311 ++++++++++++++-------- plumbing/format/packfile/parser_test.go | 19 +- 5 files changed, 226 insertions(+), 177 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/common.go b/plumbing/format/packfile/common.go index 76254f0..2b4aceb 100644 --- a/plumbing/format/packfile/common.go +++ b/plumbing/format/packfile/common.go @@ -2,11 +2,9 @@ package packfile import ( "bytes" - "errors" "io" "sync" - "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/utils/ioutil" ) @@ -32,8 +30,12 @@ func UpdateObjectStorage(s storer.Storer, packfile io.Reader) error { return WritePackfileToObjectStorage(pw, packfile) } - updater := newPackfileStorageUpdater(s) - _, err := NewParser(NewScanner(packfile), updater).Parse() + p, err := NewParserWithStorage(NewScanner(packfile), s) + if err != nil { + return err + } + + _, err = p.Parse() return err } @@ -58,56 +60,3 @@ var bufPool = sync.Pool{ return bytes.NewBuffer(nil) }, } - -var errMissingObjectContent = errors.New("missing object content") - -type packfileStorageUpdater struct { - storer.Storer - lastSize int64 - lastType plumbing.ObjectType -} - -func newPackfileStorageUpdater(s storer.Storer) *packfileStorageUpdater { - return &packfileStorageUpdater{Storer: s} -} - -func (p *packfileStorageUpdater) OnHeader(count uint32) error { - return nil -} - -func (p *packfileStorageUpdater) OnInflatedObjectHeader( - t plumbing.ObjectType, - objSize int64, - pos int64, -) error { - if p.lastSize > 0 || p.lastType != plumbing.InvalidObject { - return errMissingObjectContent - } - - p.lastType = t - p.lastSize = objSize - return nil -} - -func (p *packfileStorageUpdater) OnInflatedObjectContent( - h plumbing.Hash, - pos int64, - crc uint32, - content []byte, -) error { - obj := new(plumbing.MemoryObject) - obj.SetSize(p.lastSize) - obj.SetType(p.lastType) - if _, err := obj.Write(content); err != nil { - return err - } - - _, err := p.SetEncodedObject(obj) - p.lastSize = 0 - p.lastType = plumbing.InvalidObject - return err -} - -func (p *packfileStorageUpdater) OnFooter(h plumbing.Hash) error { - return nil -} diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index 6ffebc2..78ddc45 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -94,7 +94,10 @@ func (s *EncoderAdvancedSuite) testEncodeDecode( c.Assert(err, IsNil) w := new(idxfile.Writer) - _, err = NewParser(NewScanner(f), w).Parse() + parser, err := NewParser(NewScanner(f), w) + c.Assert(err, IsNil) + + _, err = parser.Parse() c.Assert(err, IsNil) index, err := w.Index() c.Assert(err, IsNil) diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index 7b6dde2..24e2082 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -302,7 +302,10 @@ func packfileFromReader(c *C, buf *bytes.Buffer) (*Packfile, func()) { scanner := NewScanner(file) w := new(idxfile.Writer) - _, err = NewParser(scanner, w).Parse() + p, err := NewParser(scanner, w) + c.Assert(err, IsNil) + + _, err = p.Parse() c.Assert(err, IsNil) index, err := w.Index() diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index f0a7674..beb3e27 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -7,16 +7,20 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/storer" ) var ( - // ErrObjectContentAlreadyRead is returned when the content of the object - // was already read, since the content can only be read once. - ErrObjectContentAlreadyRead = errors.New("object content was already read") - // ErrReferenceDeltaNotFound is returned when the reference delta is not // found. ErrReferenceDeltaNotFound = errors.New("reference delta not found") + + // ErrNotSeekableSource is returned when the source for the parser is not + // seekable and a storage was not provided, so it can't be parsed. + ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided") + + // ErrDeltaNotCached is returned when the delta could not be found in cache. + ErrDeltaNotCached = errors.New("delta could not be found in cache") ) // Observer interface is implemented by index encoders. @@ -34,34 +38,96 @@ type Observer interface { // Parser decodes a packfile and calls any observer associated to it. Is used // to generate indexes. type Parser struct { - scanner *Scanner - count uint32 - oi []*objectInfo - oiByHash map[plumbing.Hash]*objectInfo - oiByOffset map[int64]*objectInfo - hashOffset map[plumbing.Hash]int64 - checksum plumbing.Hash - - cache *cache.ObjectLRU - contentCache map[int64][]byte + storage storer.EncodedObjectStorer + scanner *Scanner + count uint32 + oi []*objectInfo + oiByHash map[plumbing.Hash]*objectInfo + oiByOffset map[int64]*objectInfo + hashOffset map[plumbing.Hash]int64 + pendingRefDeltas map[plumbing.Hash][]*objectInfo + checksum plumbing.Hash + + cache *cache.ObjectLRU + // delta content by offset, only used if source is not seekable + deltas map[int64][]byte ob []Observer } -// NewParser creates a new Parser struct. -func NewParser(scanner *Scanner, ob ...Observer) *Parser { - var contentCache map[int64][]byte +// NewParser creates a new Parser. The Scanner source must be seekable. +// If it's not, NewParserWithStorage should be used instead. +func NewParser(scanner *Scanner, ob ...Observer) (*Parser, error) { + return NewParserWithStorage(scanner, nil, ob...) +} + +// NewParserWithStorage creates a new Parser. The scanner source must either +// be seekable or a storage must be provided. +func NewParserWithStorage( + scanner *Scanner, + storage storer.EncodedObjectStorer, + ob ...Observer, +) (*Parser, error) { + if !scanner.IsSeekable && storage == nil { + return nil, ErrNotSeekableSource + } + + var deltas map[int64][]byte if !scanner.IsSeekable { - contentCache = make(map[int64][]byte) + deltas = make(map[int64][]byte) } return &Parser{ - scanner: scanner, - ob: ob, - count: 0, - cache: cache.NewObjectLRUDefault(), - contentCache: contentCache, + storage: storage, + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewObjectLRUDefault(), + pendingRefDeltas: make(map[plumbing.Hash][]*objectInfo), + deltas: deltas, + }, nil +} + +func (p *Parser) forEachObserver(f func(o Observer) error) error { + for _, o := range p.ob { + if err := f(o); err != nil { + return err + } } + return nil +} + +func (p *Parser) onHeader(count uint32) error { + return p.forEachObserver(func(o Observer) error { + return o.OnHeader(count) + }) +} + +func (p *Parser) onInflatedObjectHeader( + t plumbing.ObjectType, + objSize int64, + pos int64, +) error { + return p.forEachObserver(func(o Observer) error { + return o.OnInflatedObjectHeader(t, objSize, pos) + }) +} + +func (p *Parser) onInflatedObjectContent( + h plumbing.Hash, + pos int64, + crc uint32, + content []byte, +) error { + return p.forEachObserver(func(o Observer) error { + return o.OnInflatedObjectContent(h, pos, crc, content) + }) +} + +func (p *Parser) onFooter(h plumbing.Hash) error { + return p.forEachObserver(func(o Observer) error { + return o.OnFooter(h) + }) } // Parse start decoding phase of the packfile. @@ -70,7 +136,13 @@ func (p *Parser) Parse() (plumbing.Hash, error) { return plumbing.ZeroHash, err } - if err := p.firstPass(); err != nil { + if err := p.indexObjects(); err != nil { + return plumbing.ZeroHash, err + } + + var err error + p.checksum, err = p.scanner.Checksum() + if err != nil && err != io.EOF { return plumbing.ZeroHash, err } @@ -78,10 +150,12 @@ func (p *Parser) Parse() (plumbing.Hash, error) { return plumbing.ZeroHash, err } - for _, o := range p.ob { - if err := o.OnFooter(p.checksum); err != nil { - return plumbing.ZeroHash, err - } + if len(p.pendingRefDeltas) > 0 { + return plumbing.ZeroHash, ErrReferenceDeltaNotFound + } + + if err := p.onFooter(p.checksum); err != nil { + return plumbing.ZeroHash, err } return p.checksum, nil @@ -93,10 +167,8 @@ func (p *Parser) init() error { return err } - for _, o := range p.ob { - if err := o.OnHeader(c); err != nil { - return err - } + if err := p.onHeader(c); err != nil { + return err } p.count = c @@ -107,7 +179,7 @@ func (p *Parser) init() error { return nil } -func (p *Parser) firstPass() error { +func (p *Parser) indexObjects() error { buf := new(bytes.Buffer) for i := uint32(0); i < p.count; i++ { @@ -121,25 +193,30 @@ func (p *Parser) firstPass() error { delta := false var ota *objectInfo switch t := oh.Type; t { - case plumbing.OFSDeltaObject, plumbing.REFDeltaObject: + case plumbing.OFSDeltaObject: delta = true - var parent *objectInfo - var ok bool - - if t == plumbing.OFSDeltaObject { - parent, ok = p.oiByOffset[oh.OffsetReference] - } else { - parent, ok = p.oiByHash[oh.Reference] - } - + parent, ok := p.oiByOffset[oh.OffsetReference] if !ok { - return ErrReferenceDeltaNotFound + return plumbing.ErrObjectNotFound } ota = newDeltaObject(oh.Offset, oh.Length, t, parent) - parent.Children = append(parent.Children, ota) + case plumbing.REFDeltaObject: + delta = true + + parent, ok := p.oiByHash[oh.Reference] + if ok { + ota = newDeltaObject(oh.Offset, oh.Length, t, parent) + parent.Children = append(parent.Children, ota) + } else { + ota = newBaseObject(oh.Offset, oh.Length, t) + p.pendingRefDeltas[oh.Reference] = append( + p.pendingRefDeltas[oh.Reference], + ota, + ) + } default: ota = newBaseObject(oh.Offset, oh.Length, t) } @@ -153,23 +230,35 @@ func (p *Parser) firstPass() error { ota.PackSize = size ota.Length = oh.Length + data := buf.Bytes() if !delta { - if _, err := ota.Write(buf.Bytes()); err != nil { + if _, err := ota.Write(data); err != nil { return err } ota.SHA1 = ota.Sum() p.oiByHash[ota.SHA1] = ota } - p.oiByOffset[oh.Offset] = ota + if p.storage != nil && !delta { + obj := new(plumbing.MemoryObject) + obj.SetSize(oh.Length) + obj.SetType(oh.Type) + if _, err := obj.Write(data); err != nil { + return err + } - p.oi[i] = ota - } + if _, err := p.storage.SetEncodedObject(obj); err != nil { + return err + } + } - var err error - p.checksum, err = p.scanner.Checksum() - if err != nil && err != io.EOF { - return err + if delta && !p.scanner.IsSeekable { + p.deltas[oh.Offset] = make([]byte, len(data)) + copy(p.deltas[oh.Offset], data) + } + + p.oiByOffset[oh.Offset] = ota + p.oi[i] = ota } return nil @@ -177,21 +266,17 @@ func (p *Parser) firstPass() error { func (p *Parser) resolveDeltas() error { for _, obj := range p.oi { - content, err := obj.Content() + content, err := p.get(obj) if err != nil { return err } - for _, o := range p.ob { - err := o.OnInflatedObjectHeader(obj.Type, obj.Length, obj.Offset) - if err != nil { - return err - } + if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil { + return err + } - err = o.OnInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content) - if err != nil { - return err - } + if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil { + return err } if !obj.IsDelta() && len(obj.Children) > 0 { @@ -206,6 +291,11 @@ func (p *Parser) resolveDeltas() error { return err } } + + // Remove the delta from the cache. + if obj.DiskType.IsDelta() && !p.scanner.IsSeekable { + delete(p.deltas, obj.Offset) + } } } @@ -214,7 +304,17 @@ func (p *Parser) resolveDeltas() error { func (p *Parser) get(o *objectInfo) ([]byte, error) { e, ok := p.cache.Get(o.SHA1) - if ok { + // If it's not on the cache and is not a delta we can try to find it in the + // storage, if there's one. + if !ok && p.storage != nil && !o.Type.IsDelta() { + var err error + e, err = p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) + if err != nil { + return nil, err + } + } + + if e != nil { r, err := e.Reader() if err != nil { return nil, err @@ -228,32 +328,23 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { return buf, nil } - // Read from disk + var data []byte if o.DiskType.IsDelta() { base, err := p.get(o.Parent) if err != nil { return nil, err } - data, err := p.resolveObject(o, base) + data, err = p.resolveObject(o, base) if err != nil { return nil, err } - - if len(o.Children) > 0 { - m := &plumbing.MemoryObject{} - m.Write(data) - m.SetType(o.Type) - m.SetSize(o.Size()) - p.cache.Put(m) + } else { + var err error + data, err = p.readData(o) + if err != nil { + return nil, err } - - return data, nil - } - - data, err := p.readData(o) - if err != nil { - return nil, err } if len(o.Children) > 0 { @@ -285,11 +376,39 @@ func (p *Parser) resolveObject( return nil, err } + if pending, ok := p.pendingRefDeltas[o.SHA1]; ok { + for _, po := range pending { + po.Parent = o + o.Children = append(o.Children, po) + } + delete(p.pendingRefDeltas, o.SHA1) + } + + if p.storage != nil { + obj := new(plumbing.MemoryObject) + obj.SetSize(o.Size()) + obj.SetType(o.Type) + if _, err := obj.Write(data); err != nil { + return nil, err + } + + if _, err := p.storage.SetEncodedObject(obj); err != nil { + return nil, err + } + } + return data, nil } func (p *Parser) readData(o *objectInfo) ([]byte, error) { - buf := new(bytes.Buffer) + if !p.scanner.IsSeekable && o.DiskType.IsDelta() { + data, ok := p.deltas[o.Offset] + if !ok { + return nil, ErrDeltaNotCached + } + + return data, nil + } // TODO: skip header. Header size can be calculated with the offset of the // next offset in the first pass. @@ -301,8 +420,7 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) { return nil, err } - buf.Reset() - + buf := new(bytes.Buffer) if _, _, err := p.scanner.NextObject(buf); err != nil { return nil, err } @@ -322,6 +440,7 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { return nil, err } ota.SHA1 = ota.Sum() + ota.Length = int64(len(patched)) return patched, nil } @@ -341,8 +460,6 @@ type objectInfo struct { Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash - - content *bytes.Buffer } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -371,30 +488,6 @@ func newDeltaObject( return obj } -func (o *objectInfo) Write(bs []byte) (int, error) { - n, err := o.Hasher.Write(bs) - if err != nil { - return 0, err - } - - o.content = bytes.NewBuffer(nil) - - _, _ = o.content.Write(bs) - return n, nil -} - -// Content returns the content of the object. This operation can only be done -// once. -func (o *objectInfo) Content() ([]byte, error) { - if o.content == nil { - return nil, ErrObjectContentAlreadyRead - } - - r := o.content - o.content = nil - return r.Bytes(), nil -} - func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go index b18f20f..7bce737 100644 --- a/plumbing/format/packfile/parser_test.go +++ b/plumbing/format/packfile/parser_test.go @@ -19,7 +19,8 @@ func (s *ParserSuite) TestParserHashes(c *C) { scanner := packfile.NewScanner(f.Packfile()) obs := new(testObserver) - parser := packfile.NewParser(scanner, obs) + parser, err := packfile.NewParser(scanner, obs) + c.Assert(err, IsNil) ch, err := parser.Parse() c.Assert(err, IsNil) @@ -36,7 +37,7 @@ func (s *ParserSuite) TestParserHashes(c *C) { objs := []observerObject{ {"e8d3ffab552895c19b9fcf7aa264d277cde33881", commit, 254, 12, 0xaa07ba4b}, - {"6ecf0ef2c2dffb796033e5a02219af86ec6584e5", commit, 93, 186, 0xf706df58}, + {"6ecf0ef2c2dffb796033e5a02219af86ec6584e5", commit, 245, 186, 0xf706df58}, {"918c48b83bd081e863dbe1b80f8998f058cd8294", commit, 242, 286, 0x12438846}, {"af2d6a6954d532f8ffb47615169c8fdf9d383a1a", commit, 242, 449, 0x2905a38c}, {"1669dce138d9b841a518c64b10914d88f5e488ea", commit, 333, 615, 0xd9429436}, @@ -54,18 +55,18 @@ func (s *ParserSuite) TestParserHashes(c *C) { {"9a48f23120e880dfbe41f7c9b7b708e9ee62a492", blob, 11488, 80998, 0x7316ff70}, {"9dea2395f5403188298c1dabe8bdafe562c491e3", blob, 78, 84032, 0xdb4fce56}, {"dbd3641b371024f44d0e469a9c8f5457b0660de1", tree, 272, 84115, 0x901cce2c}, - {"a8d315b2b1c615d43042c3a62402b8a54288cf5c", tree, 43, 84375, 0xec4552b0}, + {"a8d315b2b1c615d43042c3a62402b8a54288cf5c", tree, 271, 84375, 0xec4552b0}, {"a39771a7651f97faf5c72e08224d857fc35133db", tree, 38, 84430, 0x847905bf}, {"5a877e6a906a2743ad6e45d99c1793642aaf8eda", tree, 75, 84479, 0x3689459a}, {"586af567d0bb5e771e49bdd9434f5e0fb76d25fa", tree, 38, 84559, 0xe67af94a}, {"cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", tree, 34, 84608, 0xc2314a2e}, {"7e59600739c96546163833214c36459e324bad0a", blob, 9, 84653, 0xcd987848}, - {"fb72698cab7617ac416264415f13224dfd7a165e", tree, 6, 84671, 0x8a853a6d}, - {"4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", tree, 9, 84688, 0x70c6518}, - {"eba74343e2f15d62adedfd8c883ee0262b5c8021", tree, 6, 84708, 0x4f4108e2}, - {"c2d30fa8ef288618f65f6eed6e168e0d514886f4", tree, 5, 84725, 0xd6fe09e9}, - {"8dcef98b1d52143e1e2dbc458ffe38f925786bf2", tree, 8, 84741, 0xf07a2804}, - {"aa9b383c260e1d05fbbf6b30a02914555e20c725", tree, 4, 84760, 0x1d75d6be}, + {"fb72698cab7617ac416264415f13224dfd7a165e", tree, 238, 84671, 0x8a853a6d}, + {"4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", tree, 179, 84688, 0x70c6518}, + {"eba74343e2f15d62adedfd8c883ee0262b5c8021", tree, 148, 84708, 0x4f4108e2}, + {"c2d30fa8ef288618f65f6eed6e168e0d514886f4", tree, 110, 84725, 0xd6fe09e9}, + {"8dcef98b1d52143e1e2dbc458ffe38f925786bf2", tree, 111, 84741, 0xf07a2804}, + {"aa9b383c260e1d05fbbf6b30a02914555e20c725", tree, 73, 84760, 0x1d75d6be}, } c.Assert(obs.objects, DeepEquals, objs) -- cgit From b3d995f5ca6b544ed8a48fced85ffa94600af302 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 9 Aug 2018 09:23:44 +0200 Subject: plumbing: packfile, add Parse benchmark Signed-off-by: Miguel Molina --- plumbing/format/packfile/parser_test.go | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go index 7bce737..b5d482e 100644 --- a/plumbing/format/packfile/parser_test.go +++ b/plumbing/format/packfile/parser_test.go @@ -1,6 +1,8 @@ package packfile_test import ( + "testing" + "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" @@ -138,3 +140,31 @@ func (t *testObserver) put(pos int64, o observerObject) { t.pos[pos] = len(t.objects) t.objects = append(t.objects, o) } + +func BenchmarkParse(b *testing.B) { + if err := fixtures.Init(); err != nil { + b.Fatal(err) + } + + defer func() { + if err := fixtures.Clean(); err != nil { + b.Fatal(err) + } + }() + + for _, f := range fixtures.ByTag("packfile") { + b.Run(f.URL, func(b *testing.B) { + for i := 0; i < b.N; i++ { + parser, err := packfile.NewParser(packfile.NewScanner(f.Packfile())) + if err != nil { + b.Fatal(err) + } + + _, err = parser.Parse() + if err != nil { + b.Fatal(err) + } + } + }) + } +} -- cgit From 71a3c9161d4d8d2baf16440a86a02e8f5678aef2 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 9 Aug 2018 10:55:51 +0200 Subject: plumbing: packfile, read object content only once Signed-off-by: Miguel Molina --- plumbing/format/packfile/parser.go | 22 +++++++++++++++------- plumbing/format/packfile/parser_test.go | 25 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index beb3e27..581c334 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -280,14 +280,8 @@ func (p *Parser) resolveDeltas() error { } if !obj.IsDelta() && len(obj.Children) > 0 { - var err error - base, err := p.get(obj) - if err != nil { - return err - } - for _, child := range obj.Children { - if _, err := p.resolveObject(child, base); err != nil { + if _, err := p.resolveObject(child, content); err != nil { return err } } @@ -297,12 +291,18 @@ func (p *Parser) resolveDeltas() error { delete(p.deltas, obj.Offset) } } + + obj.Content = nil } return nil } func (p *Parser) get(o *objectInfo) ([]byte, error) { + if len(o.Content) > 0 { + return o.Content, nil + } + e, ok := p.cache.Get(o.SHA1) // If it's not on the cache and is not a delta we can try to find it in the // storage, if there's one. @@ -460,6 +460,8 @@ type objectInfo struct { Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash + + Content []byte } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -488,6 +490,12 @@ func newDeltaObject( return obj } +func (o *objectInfo) Write(b []byte) (int, error) { + o.Content = make([]byte, len(b)) + copy(o.Content, b) + return o.Hasher.Write(b) +} + func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go index b5d482e..012a140 100644 --- a/plumbing/format/packfile/parser_test.go +++ b/plumbing/format/packfile/parser_test.go @@ -168,3 +168,28 @@ func BenchmarkParse(b *testing.B) { }) } } + +func BenchmarkParseBasic(b *testing.B) { + if err := fixtures.Init(); err != nil { + b.Fatal(err) + } + + defer func() { + if err := fixtures.Clean(); err != nil { + b.Fatal(err) + } + }() + + f := fixtures.Basic().One() + for i := 0; i < b.N; i++ { + parser, err := packfile.NewParser(packfile.NewScanner(f.Packfile())) + if err != nil { + b.Fatal(err) + } + + _, err = parser.Parse() + if err != nil { + b.Fatal(err) + } + } +} -- cgit From 65dc4f9f192cc013e4765fb1162ce6ebda16573d Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 9 Aug 2018 12:18:49 +0200 Subject: plumbing: packfile, rename DiskObject to FSObject Signed-off-by: Miguel Molina --- plumbing/format/packfile/disk_object.go | 64 --------------------------------- plumbing/format/packfile/fsobject.go | 64 +++++++++++++++++++++++++++++++++ plumbing/format/packfile/packfile.go | 4 +-- 3 files changed, 66 insertions(+), 66 deletions(-) delete mode 100644 plumbing/format/packfile/disk_object.go create mode 100644 plumbing/format/packfile/fsobject.go (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/disk_object.go b/plumbing/format/packfile/disk_object.go deleted file mode 100644 index d3e8520..0000000 --- a/plumbing/format/packfile/disk_object.go +++ /dev/null @@ -1,64 +0,0 @@ -package packfile - -import ( - "io" - - "gopkg.in/src-d/go-git.v4/plumbing" -) - -// DiskObject is an object from the packfile on disk. -type DiskObject struct { - hash plumbing.Hash - h *ObjectHeader - offset int64 - size int64 - typ plumbing.ObjectType - packfile *Packfile -} - -// NewDiskObject creates a new disk object. -func NewDiskObject( - hash plumbing.Hash, - finalType plumbing.ObjectType, - offset int64, - contentSize int64, - packfile *Packfile, -) *DiskObject { - return &DiskObject{ - hash: hash, - offset: offset, - size: contentSize, - typ: finalType, - packfile: packfile, - } -} - -// Reader implements the plumbing.EncodedObject interface. -func (o *DiskObject) Reader() (io.ReadCloser, error) { - return o.packfile.getObjectContent(o.offset) -} - -// SetSize implements the plumbing.EncodedObject interface. This method -// is a noop. -func (o *DiskObject) SetSize(int64) {} - -// SetType implements the plumbing.EncodedObject interface. This method is -// a noop. -func (o *DiskObject) SetType(plumbing.ObjectType) {} - -// Hash implements the plumbing.EncodedObject interface. -func (o *DiskObject) Hash() plumbing.Hash { return o.hash } - -// Size implements the plumbing.EncodedObject interface. -func (o *DiskObject) Size() int64 { return o.size } - -// Type implements the plumbing.EncodedObject interface. -func (o *DiskObject) Type() plumbing.ObjectType { - return o.typ -} - -// Writer implements the plumbing.EncodedObject interface. This method always -// returns a nil writer. -func (o *DiskObject) Writer() (io.WriteCloser, error) { - return nil, nil -} diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go new file mode 100644 index 0000000..d63127e --- /dev/null +++ b/plumbing/format/packfile/fsobject.go @@ -0,0 +1,64 @@ +package packfile + +import ( + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// FSObject is an object from the packfile on the filesystem. +type FSObject struct { + hash plumbing.Hash + h *ObjectHeader + offset int64 + size int64 + typ plumbing.ObjectType + packfile *Packfile +} + +// NewFSObject creates a new filesystem object. +func NewFSObject( + hash plumbing.Hash, + finalType plumbing.ObjectType, + offset int64, + contentSize int64, + packfile *Packfile, +) *FSObject { + return &FSObject{ + hash: hash, + offset: offset, + size: contentSize, + typ: finalType, + packfile: packfile, + } +} + +// Reader implements the plumbing.EncodedObject interface. +func (o *FSObject) Reader() (io.ReadCloser, error) { + return o.packfile.getObjectContent(o.offset) +} + +// SetSize implements the plumbing.EncodedObject interface. This method +// is a noop. +func (o *FSObject) SetSize(int64) {} + +// SetType implements the plumbing.EncodedObject interface. This method is +// a noop. +func (o *FSObject) SetType(plumbing.ObjectType) {} + +// Hash implements the plumbing.EncodedObject interface. +func (o *FSObject) Hash() plumbing.Hash { return o.hash } + +// Size implements the plumbing.EncodedObject interface. +func (o *FSObject) Size() int64 { return o.size } + +// Type implements the plumbing.EncodedObject interface. +func (o *FSObject) Type() plumbing.ObjectType { + return o.typ +} + +// Writer implements the plumbing.EncodedObject interface. This method always +// returns a nil writer. +func (o *FSObject) Writer() (io.WriteCloser, error) { + return nil, nil +} diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 37743ba..df8a3d4 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -232,7 +232,7 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { p.offsetToType[h.Offset] = typ - return NewDiskObject(hash, typ, h.Offset, size, p), nil + return NewFSObject(hash, typ, h.Offset, size, p), nil } func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { @@ -410,7 +410,7 @@ func (p *Packfile) Close() error { return closer.Close() } -// MemoryObjectFromDisk converts a DiskObject to a MemoryObject. +// MemoryObjectFromDisk converts a FSObject to a MemoryObject. func MemoryObjectFromDisk(obj plumbing.EncodedObject) (plumbing.EncodedObject, error) { o2 := new(plumbing.MemoryObject) o2.SetType(obj.Type()) -- cgit From 56c5e91b158bc4569b38bfd5d27d4b4be5e06a27 Mon Sep 17 00:00:00 2001 From: Miguel Molina Date: Thu, 9 Aug 2018 16:53:00 +0200 Subject: plumbing: packfile, open and close packfile on FSObject reads Signed-off-by: Miguel Molina --- plumbing/format/packfile/encoder_advanced_test.go | 7 ++- plumbing/format/packfile/encoder_test.go | 7 ++- plumbing/format/packfile/fsobject.go | 68 +++++++++++++++++----- plumbing/format/packfile/packfile.go | 69 +++++++++++++---------- plumbing/format/packfile/packfile_test.go | 31 +++++++--- 5 files changed, 126 insertions(+), 56 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index 78ddc45..fc1419e 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -6,7 +6,7 @@ import ( "math/rand" "testing" - "gopkg.in/src-d/go-billy.v3/memfs" + "gopkg.in/src-d/go-billy.v4/memfs" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" @@ -84,7 +84,8 @@ func (s *EncoderAdvancedSuite) testEncodeDecode( encodeHash, err := enc.Encode(hashes, packWindow) c.Assert(err, IsNil) - f, err := memfs.New().Create("packfile") + fs := memfs.New() + f, err := fs.Create("packfile") c.Assert(err, IsNil) _, err = f.Write(buf.Bytes()) @@ -105,7 +106,7 @@ func (s *EncoderAdvancedSuite) testEncodeDecode( _, err = f.Seek(0, io.SeekStart) c.Assert(err, IsNil) - p := NewPackfile(index, f) + p := NewPackfile(index, fs, f) decodeHash, err := p.ID() c.Assert(err, IsNil) diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index 24e2082..80b916d 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -5,7 +5,7 @@ import ( "io" stdioutil "io/ioutil" - "gopkg.in/src-d/go-billy.v3/memfs" + "gopkg.in/src-d/go-billy.v4/memfs" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/storage/memory" @@ -290,7 +290,8 @@ func objectsEqual(c *C, o1, o2 plumbing.EncodedObject) { } func packfileFromReader(c *C, buf *bytes.Buffer) (*Packfile, func()) { - file, err := memfs.New().Create("packfile") + fs := memfs.New() + file, err := fs.Create("packfile") c.Assert(err, IsNil) _, err = file.Write(buf.Bytes()) @@ -311,7 +312,7 @@ func packfileFromReader(c *C, buf *bytes.Buffer) (*Packfile, func()) { index, err := w.Index() c.Assert(err, IsNil) - return NewPackfile(index, file), func() { + return NewPackfile(index, fs, file), func() { c.Assert(file.Close(), IsNil) } } diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go index d63127e..6fd3ca5 100644 --- a/plumbing/format/packfile/fsobject.go +++ b/plumbing/format/packfile/fsobject.go @@ -3,17 +3,23 @@ package packfile import ( "io" + billy "gopkg.in/src-d/go-billy.v4" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" ) // FSObject is an object from the packfile on the filesystem. type FSObject struct { - hash plumbing.Hash - h *ObjectHeader - offset int64 - size int64 - typ plumbing.ObjectType - packfile *Packfile + hash plumbing.Hash + h *ObjectHeader + offset int64 + size int64 + typ plumbing.ObjectType + index idxfile.Index + fs billy.Filesystem + path string + cache cache.Object } // NewFSObject creates a new filesystem object. @@ -22,20 +28,42 @@ func NewFSObject( finalType plumbing.ObjectType, offset int64, contentSize int64, - packfile *Packfile, + index idxfile.Index, + fs billy.Filesystem, + path string, + cache cache.Object, ) *FSObject { return &FSObject{ - hash: hash, - offset: offset, - size: contentSize, - typ: finalType, - packfile: packfile, + hash: hash, + offset: offset, + size: contentSize, + typ: finalType, + index: index, + fs: fs, + path: path, + cache: cache, } } // Reader implements the plumbing.EncodedObject interface. func (o *FSObject) Reader() (io.ReadCloser, error) { - return o.packfile.getObjectContent(o.offset) + f, err := o.fs.Open(o.path) + if err != nil { + return nil, err + } + + p := NewPackfileWithCache(o.index, nil, f, o.cache) + r, err := p.getObjectContent(o.offset) + if err != nil { + _ = f.Close() + return nil, err + } + + if err := f.Close(); err != nil { + return nil, err + } + + return r, nil } // SetSize implements the plumbing.EncodedObject interface. This method @@ -62,3 +90,17 @@ func (o *FSObject) Type() plumbing.ObjectType { func (o *FSObject) Writer() (io.WriteCloser, error) { return nil, nil } + +type objectReader struct { + io.ReadCloser + f billy.File +} + +func (r *objectReader) Close() error { + if err := r.ReadCloser.Close(); err != nil { + _ = r.f.Close() + return err + } + + return r.f.Close() +} diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index df8a3d4..5feb781 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -3,9 +3,9 @@ package packfile import ( "bytes" "io" - stdioutil "io/ioutil" "os" + billy "gopkg.in/src-d/go-billy.v4" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" @@ -24,21 +24,26 @@ var ( // Packfile allows retrieving information from inside a packfile. type Packfile struct { idxfile.Index - file io.ReadSeeker + fs billy.Filesystem + file billy.File s *Scanner deltaBaseCache cache.Object offsetToType map[int64]plumbing.ObjectType } // NewPackfileWithCache creates a new Packfile with the given object cache. +// If the filesystem is provided, the packfile will return FSObjects, otherwise +// it will return MemoryObjects. func NewPackfileWithCache( index idxfile.Index, - file io.ReadSeeker, + fs billy.Filesystem, + file billy.File, cache cache.Object, ) *Packfile { s := NewScanner(file) return &Packfile{ index, + fs, file, s, cache, @@ -48,8 +53,10 @@ func NewPackfileWithCache( // NewPackfile returns a packfile representation for the given packfile file // and packfile idx. -func NewPackfile(index idxfile.Index, file io.ReadSeeker) *Packfile { - return NewPackfileWithCache(index, file, cache.NewObjectLRUDefault()) +// If the filesystem is provided, the packfile will return FSObjects, otherwise +// it will return MemoryObjects. +func NewPackfile(index idxfile.Index, fs billy.Filesystem, file billy.File) *Packfile { + return NewPackfileWithCache(index, fs, file, cache.NewObjectLRUDefault()) } // Get retrieves the encoded object in the packfile with the given hash. @@ -215,6 +222,12 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { return nil, err } + // If we have no filesystem, we will return a MemoryObject instead + // of an FSObject. + if p.fs == nil { + return p.getNextObject(h) + } + hash, err := p.FindHash(h.Offset) if err != nil { return nil, err @@ -232,7 +245,16 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { p.offsetToType[h.Offset] = typ - return NewFSObject(hash, typ, h.Offset, size, p), nil + return NewFSObject( + hash, + typ, + h.Offset, + size, + p.Index, + p.fs, + p.file.Name(), + p.deltaBaseCache, + ), nil } func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { @@ -245,10 +267,20 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { return nil, err } + obj, err := p.getNextObject(h) + if err != nil { + return nil, err + } + + return obj.Reader() +} + +func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error) { var obj = new(plumbing.MemoryObject) obj.SetSize(h.Length) obj.SetType(h.Type) + var err error switch h.Type { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: err = p.fillRegularObjectContent(obj) @@ -264,7 +296,7 @@ func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { return nil, err } - return obj.Reader() + return obj, nil } func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error { @@ -410,29 +442,6 @@ func (p *Packfile) Close() error { return closer.Close() } -// MemoryObjectFromDisk converts a FSObject to a MemoryObject. -func MemoryObjectFromDisk(obj plumbing.EncodedObject) (plumbing.EncodedObject, error) { - o2 := new(plumbing.MemoryObject) - o2.SetType(obj.Type()) - o2.SetSize(obj.Size()) - - r, err := obj.Reader() - if err != nil { - return nil, err - } - - data, err := stdioutil.ReadAll(r) - if err != nil { - return nil, err - } - - if _, err := o2.Write(data); err != nil { - return nil, err - } - - return o2, nil -} - type objectIter struct { p *Packfile typ plumbing.ObjectType diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go index 3193bed..05dc8a7 100644 --- a/plumbing/format/packfile/packfile_test.go +++ b/plumbing/format/packfile/packfile_test.go @@ -109,13 +109,14 @@ var expectedEntries = map[plumbing.Hash]int64{ func (s *PackfileSuite) SetUpTest(c *C) { s.f = fixtures.Basic().One() - f, err := osfs.New("").Open(s.f.Packfile().Name()) + fs := osfs.New("") + f, err := fs.Open(s.f.Packfile().Name()) c.Assert(err, IsNil) s.idx = idxfile.NewMemoryIndex() c.Assert(idxfile.NewDecoder(s.f.Idx()).Decode(s.idx), IsNil) - s.p = packfile.NewPackfile(s.idx, f) + s.p = packfile.NewPackfile(s.idx, fs, f) } func (s *PackfileSuite) TearDownTest(c *C) { @@ -125,7 +126,11 @@ func (s *PackfileSuite) TearDownTest(c *C) { func (s *PackfileSuite) TestDecode(c *C) { fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { index := getIndexFromIdxFile(f.Idx()) - p := packfile.NewPackfile(index, f.Packfile()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + p := packfile.NewPackfile(index, fs, pf) defer p.Close() for _, h := range expectedHashes { @@ -140,7 +145,11 @@ func (s *PackfileSuite) TestDecodeByTypeRefDelta(c *C) { f := fixtures.Basic().ByTag("ref-delta").One() index := getIndexFromIdxFile(f.Idx()) - packfile := packfile.NewPackfile(index, f.Packfile()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + packfile := packfile.NewPackfile(index, fs, pf) defer packfile.Close() iter, err := packfile.GetByType(plumbing.CommitObject) @@ -171,7 +180,11 @@ func (s *PackfileSuite) TestDecodeByType(c *C) { fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { for _, t := range ts { index := getIndexFromIdxFile(f.Idx()) - packfile := packfile.NewPackfile(index, f.Packfile()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + packfile := packfile.NewPackfile(index, fs, pf) defer packfile.Close() iter, err := packfile.GetByType(t) @@ -188,10 +201,14 @@ func (s *PackfileSuite) TestDecodeByType(c *C) { func (s *PackfileSuite) TestDecodeByTypeConstructor(c *C) { f := fixtures.Basic().ByTag("packfile").One() index := getIndexFromIdxFile(f.Idx()) - packfile := packfile.NewPackfile(index, f.Packfile()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + packfile := packfile.NewPackfile(index, fs, pf) defer packfile.Close() - _, err := packfile.GetByType(plumbing.OFSDeltaObject) + _, err = packfile.GetByType(plumbing.OFSDeltaObject) c.Assert(err, Equals, plumbing.ErrInvalidType) _, err = packfile.GetByType(plumbing.REFDeltaObject) -- cgit From a8c4426d204f42e683e902dcb277494004d5e59d Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 14 Aug 2018 11:59:11 +0200 Subject: plumbing: add buffer cache and use it in packfile parser It uses less memory and is faster as slices don't have to be converted from/to MemoryObject and they are indexed by offset. Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 581c334..88f33dc 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -48,7 +48,7 @@ type Parser struct { pendingRefDeltas map[plumbing.Hash][]*objectInfo checksum plumbing.Hash - cache *cache.ObjectLRU + cache *cache.BufferLRU // delta content by offset, only used if source is not seekable deltas map[int64][]byte @@ -82,7 +82,7 @@ func NewParserWithStorage( scanner: scanner, ob: ob, count: 0, - cache: cache.NewObjectLRUDefault(), + cache: cache.NewBufferLRUDefault(), pendingRefDeltas: make(map[plumbing.Hash][]*objectInfo), deltas: deltas, }, nil @@ -303,29 +303,29 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { return o.Content, nil } - e, ok := p.cache.Get(o.SHA1) + b, ok := p.cache.Get(o.Offset) // If it's not on the cache and is not a delta we can try to find it in the // storage, if there's one. if !ok && p.storage != nil && !o.Type.IsDelta() { var err error - e, err = p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) + e, err := p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) if err != nil { return nil, err } - } - if e != nil { r, err := e.Reader() if err != nil { return nil, err } - buf := make([]byte, e.Size()) - if _, err = r.Read(buf); err != nil { + b = make([]byte, e.Size()) + if _, err = r.Read(b); err != nil { return nil, err } + } - return buf, nil + if b != nil { + return b, nil } var data []byte @@ -348,11 +348,7 @@ func (p *Parser) get(o *objectInfo) ([]byte, error) { } if len(o.Children) > 0 { - m := &plumbing.MemoryObject{} - m.Write(data) - m.SetType(o.Type) - m.SetSize(o.Size()) - p.cache.Put(m) + p.cache.Put(o.Offset, data) } return data, nil -- cgit From 555a6ca02e88279cef421df88a108c2955fcde77 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 14 Aug 2018 12:21:12 +0200 Subject: plumbing/pacfile: tidy up objectInfo struct * a new hasher is created when needed * delete unused fields * base content is no longer kept in memory Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 58 +++++++++++++++----------------------- 1 file changed, 22 insertions(+), 36 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 88f33dc..3a9c4d7 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -221,21 +221,22 @@ func (p *Parser) indexObjects() error { ota = newBaseObject(oh.Offset, oh.Length, t) } - size, crc, err := p.scanner.NextObject(buf) + _, crc, err := p.scanner.NextObject(buf) if err != nil { return err } ota.Crc32 = crc - ota.PackSize = size ota.Length = oh.Length data := buf.Bytes() if !delta { - if _, err := ota.Write(data); err != nil { + sha1, err := getSHA1(ota.Type, data) + if err != nil { return err } - ota.SHA1 = ota.Sum() + + ota.SHA1 = sha1 p.oiByHash[ota.SHA1] = ota } @@ -291,18 +292,12 @@ func (p *Parser) resolveDeltas() error { delete(p.deltas, obj.Offset) } } - - obj.Content = nil } return nil } func (p *Parser) get(o *objectInfo) ([]byte, error) { - if len(o.Content) > 0 { - return o.Content, nil - } - b, ok := p.cache.Get(o.Offset) // If it's not on the cache and is not a delta we can try to find it in the // storage, if there's one. @@ -406,8 +401,6 @@ func (p *Parser) readData(o *objectInfo) ([]byte, error) { return data, nil } - // TODO: skip header. Header size can be calculated with the offset of the - // next offset in the first pass. if _, err := p.scanner.SeekFromStart(o.Offset); err != nil { return nil, err } @@ -431,33 +424,37 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { } ota.Type = ota.Parent.Type - ota.Hasher = plumbing.NewHasher(ota.Type, int64(len(patched))) - if _, err := ota.Write(patched); err != nil { + sha1, err := getSHA1(ota.Type, patched) + if err != nil { return nil, err } - ota.SHA1 = ota.Sum() + + ota.SHA1 = sha1 ota.Length = int64(len(patched)) return patched, nil } -type objectInfo struct { - plumbing.Hasher +func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) { + hasher := plumbing.NewHasher(t, int64(len(data))) + if _, err := hasher.Write(data); err != nil { + return plumbing.ZeroHash, err + } + + return hasher.Sum(), nil +} - Offset int64 - Length int64 - HeaderLength int64 - PackSize int64 - Type plumbing.ObjectType - DiskType plumbing.ObjectType +type objectInfo struct { + Offset int64 + Length int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType Crc32 uint32 Parent *objectInfo Children []*objectInfo SHA1 plumbing.Hash - - Content []byte } func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { @@ -469,29 +466,18 @@ func newDeltaObject( t plumbing.ObjectType, parent *objectInfo, ) *objectInfo { - children := make([]*objectInfo, 0) - obj := &objectInfo{ - Hasher: plumbing.NewHasher(t, length), Offset: offset, Length: length, - PackSize: 0, Type: t, DiskType: t, Crc32: 0, Parent: parent, - Children: children, } return obj } -func (o *objectInfo) Write(b []byte) (int, error) { - o.Content = make([]byte, len(b)) - copy(o.Content, b) - return o.Hasher.Write(b) -} - func (o *objectInfo) IsDelta() bool { return o.Type.IsDelta() } -- cgit From eb2aa9b2c3bf7af93fd261228be1b96e61c52bcf Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 14 Aug 2018 16:56:29 +0200 Subject: plumbing/packfile: do not compute sha1 for already undeltified objects Signed-off-by: Javi Fontan --- plumbing/format/packfile/parser.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go index 3a9c4d7..28582b5 100644 --- a/plumbing/format/packfile/parser.go +++ b/plumbing/format/packfile/parser.go @@ -423,14 +423,16 @@ func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { return nil, err } - ota.Type = ota.Parent.Type - sha1, err := getSHA1(ota.Type, patched) - if err != nil { - return nil, err - } + if ota.SHA1 == plumbing.ZeroHash { + ota.Type = ota.Parent.Type + sha1, err := getSHA1(ota.Type, patched) + if err != nil { + return nil, err + } - ota.SHA1 = sha1 - ota.Length = int64(len(patched)) + ota.SHA1 = sha1 + ota.Length = int64(len(patched)) + } return patched, nil } -- cgit From f84c6b194f2eced0c068b9cdc9264d30e6d2021b Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 21 Aug 2018 17:35:52 +0200 Subject: plumbing/idxfile: object iterators returns entries in offset order In the latest change the order was changed from offset order in packfiles to hash order. This makes reading all the objects not as efficient as before. It also created problems when the previous order was expected. Also added EntriesByOffset to indexes. Signed-off-by: Javi Fontan --- plumbing/format/packfile/packfile.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 5feb781..18fcca7 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -394,7 +394,7 @@ func (p *Packfile) GetByType(typ plumbing.ObjectType) (storer.EncodedObjectIter, plumbing.TreeObject, plumbing.CommitObject, plumbing.TagObject: - entries, err := p.Entries() + entries, err := p.EntriesByOffset() if err != nil { return nil, err } -- cgit From 790191ef92ec6382ce65cc30286c901863b3b7a3 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Wed, 22 Aug 2018 16:46:50 +0200 Subject: plumbing, storage: add bases to the common cache After clone only resolved deltas were added to the cache. This caused slowdowns in small repositories where most objects can be held in cache. It also makes packfiles reuse delta cache from the store. Previously it created a new delta cache each time a packfile object was created. This also slowed down a bit accessing objects and had an impact on memory consumption when bases are added to the cache. Signed-off-by: Javi Fontan --- plumbing/format/packfile/fsobject.go | 10 ++++++++++ plumbing/format/packfile/packfile.go | 15 +++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go index 6fd3ca5..330cb73 100644 --- a/plumbing/format/packfile/fsobject.go +++ b/plumbing/format/packfile/fsobject.go @@ -47,6 +47,16 @@ func NewFSObject( // Reader implements the plumbing.EncodedObject interface. func (o *FSObject) Reader() (io.ReadCloser, error) { + obj, ok := o.cache.Get(o.hash) + if ok { + reader, err := obj.Reader() + if err != nil { + return nil, err + } + + return reader, nil + } + f, err := o.fs.Open(o.path) if err != nil { return nil, err diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go index 18fcca7..852a834 100644 --- a/plumbing/format/packfile/packfile.go +++ b/plumbing/format/packfile/packfile.go @@ -258,6 +258,19 @@ func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { } func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { + ref, err := p.FindHash(offset) + if err == nil { + obj, ok := p.cacheGet(ref) + if ok { + reader, err := obj.Reader() + if err != nil { + return nil, err + } + + return reader, nil + } + } + if _, err := p.s.SeekFromStart(offset); err != nil { return nil, err } @@ -306,6 +319,8 @@ func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error { } _, _, err = p.s.NextObject(w) + p.cachePut(obj) + return err } -- cgit From 8f6b3127c1ff7661113fff2662416c328971a285 Mon Sep 17 00:00:00 2001 From: kuba-- Date: Fri, 7 Sep 2018 09:27:35 +0200 Subject: Expose Storage cache. Signed-off-by: kuba-- --- plumbing/format/packfile/encoder_advanced_test.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'plumbing/format/packfile') diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index fc1419e..e15126e 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -8,6 +8,7 @@ import ( "gopkg.in/src-d/go-billy.v4/memfs" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" @@ -32,8 +33,7 @@ func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) { fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git"). ByTag("packfile").ByTag(".git").One()) fixs.Test(c, func(f *fixtures.Fixture) { - storage, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + storage := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) s.testEncodeDecode(c, storage, 10) }) } @@ -47,8 +47,7 @@ func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) { fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git"). ByTag("packfile").ByTag(".git").One()) fixs.Test(c, func(f *fixtures.Fixture) { - storage, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + storage := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) s.testEncodeDecode(c, storage, 0) }) } -- cgit