diff options
Diffstat (limited to 'plumbing/format/packfile')
20 files changed, 1930 insertions, 1177 deletions
diff --git a/plumbing/format/packfile/common.go b/plumbing/format/packfile/common.go index 7dad1f6..2b4aceb 100644 --- a/plumbing/format/packfile/common.go +++ b/plumbing/format/packfile/common.go @@ -23,25 +23,28 @@ const ( maskType = uint8(112) // 0111 0000 ) -// UpdateObjectStorage updates the given storer.EncodedObjectStorer with the contents of the +// UpdateObjectStorage updates the storer with the objects in the given // packfile. -func UpdateObjectStorage(s storer.EncodedObjectStorer, packfile io.Reader) error { - if sw, ok := s.(storer.PackfileWriter); ok { - return writePackfileToObjectStorage(sw, packfile) +func UpdateObjectStorage(s storer.Storer, packfile io.Reader) error { + if pw, ok := s.(storer.PackfileWriter); ok { + return WritePackfileToObjectStorage(pw, packfile) } - stream := NewScanner(packfile) - d, err := NewDecoder(stream, s) + p, err := NewParserWithStorage(NewScanner(packfile), s) if err != nil { return err } - _, err = d.Decode() + _, err = p.Parse() return err } -func writePackfileToObjectStorage(sw storer.PackfileWriter, packfile io.Reader) error { - var err error +// WritePackfileToObjectStorage writes all the packfile objects into the given +// object storage. +func WritePackfileToObjectStorage( + sw storer.PackfileWriter, + packfile io.Reader, +) (err error) { w, err := sw.PackfileWriter() if err != nil { return err diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go deleted file mode 100644 index f706e5d..0000000 --- a/plumbing/format/packfile/decoder.go +++ /dev/null @@ -1,495 +0,0 @@ -package packfile - -import ( - "bytes" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/cache" - "gopkg.in/src-d/go-git.v4/plumbing/storer" -) - -// Format specifies if the packfile uses ref-deltas or ofs-deltas. -type Format int - -// Possible values of the Format type. -const ( - UnknownFormat Format = iota - OFSDeltaFormat - REFDeltaFormat -) - -var ( - // ErrMaxObjectsLimitReached is returned by Decode when the number - // of objects in the packfile is higher than - // Decoder.MaxObjectsLimit. - ErrMaxObjectsLimitReached = NewError("max. objects limit reached") - // ErrInvalidObject is returned by Decode when an invalid object is - // found in the packfile. - ErrInvalidObject = NewError("invalid git object") - // ErrPackEntryNotFound is returned by Decode when a reference in - // the packfile references and unknown object. - ErrPackEntryNotFound = NewError("can't find a pack entry") - // ErrZLib is returned by Decode when there was an error unzipping - // the packfile contents. - ErrZLib = NewError("zlib reading error") - // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object - // to recall cannot be returned. - ErrCannotRecall = NewError("cannot recall object") - // ErrResolveDeltasNotSupported is returned if a NewDecoder is used with a - // non-seekable scanner and without a plumbing.ObjectStorage - ErrResolveDeltasNotSupported = NewError("resolve delta is not supported") - // ErrNonSeekable is returned if a ReadObjectAt method is called without a - // seekable scanner - ErrNonSeekable = NewError("non-seekable scanner") - // ErrRollback error making Rollback over a transaction after an error - ErrRollback = NewError("rollback error, during set error") - // ErrAlreadyDecoded is returned if NewDecoder is called for a second time - ErrAlreadyDecoded = NewError("packfile was already decoded") -) - -// Decoder reads and decodes packfiles from an input Scanner, if an ObjectStorer -// was provided the decoded objects are store there. If not the decode object -// is destroyed. The Offsets and CRCs are calculated whether an -// ObjectStorer was provided or not. -type Decoder struct { - deltaBaseCache cache.Object - - s *Scanner - o storer.EncodedObjectStorer - tx storer.Transaction - - isDecoded bool - - // hasBuiltIndex indicates if the index is fully built or not. If it is not, - // will be built incrementally while decoding. - hasBuiltIndex bool - idx *Index - - offsetToType map[int64]plumbing.ObjectType - decoderType plumbing.ObjectType -} - -// NewDecoder returns a new Decoder that decodes a Packfile using the given -// Scanner and stores the objects in the provided EncodedObjectStorer. ObjectStorer can be nil, in this -// If the passed EncodedObjectStorer is nil, objects are not stored, but -// offsets on the Packfile and CRCs are calculated. -// -// If EncodedObjectStorer is nil and the Scanner is not Seekable, ErrNonSeekable is -// returned. -// -// If the ObjectStorer implements storer.Transactioner, a transaction is created -// during the Decode execution. If anything fails, Rollback is called -func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) { - return NewDecoderForType(s, o, plumbing.AnyObject, - cache.NewObjectLRUDefault()) -} - -// NewDecoderWithCache is a version of NewDecoder where cache can be specified. -func NewDecoderWithCache(s *Scanner, o storer.EncodedObjectStorer, - cacheObject cache.Object) (*Decoder, error) { - - return NewDecoderForType(s, o, plumbing.AnyObject, cacheObject) -} - -// NewDecoderForType returns a new Decoder but in this case for a specific object type. -// When an object is read using this Decoder instance and it is not of the same type of -// the specified one, nil will be returned. This is intended to avoid the content -// deserialization of all the objects. -// -// cacheObject is a cache.Object implementation that is used to speed up the -// process. If cache is not needed you can pass nil. To create an LRU cache -// object with the default size you can use the helper cache.ObjectLRUDefault(). -func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, - t plumbing.ObjectType, cacheObject cache.Object) (*Decoder, error) { - - if t == plumbing.OFSDeltaObject || - t == plumbing.REFDeltaObject || - t == plumbing.InvalidObject { - return nil, plumbing.ErrInvalidType - } - - if !canResolveDeltas(s, o) { - return nil, ErrResolveDeltasNotSupported - } - - return &Decoder{ - s: s, - o: o, - deltaBaseCache: cacheObject, - - idx: NewIndex(0), - offsetToType: make(map[int64]plumbing.ObjectType), - decoderType: t, - }, nil -} - -func canResolveDeltas(s *Scanner, o storer.EncodedObjectStorer) bool { - return s.IsSeekable || o != nil -} - -// Decode reads a packfile and stores it in the value pointed to by s. The -// offsets and the CRCs are calculated by this method -func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { - defer func() { d.isDecoded = true }() - - if d.isDecoded { - return plumbing.ZeroHash, ErrAlreadyDecoded - } - - if err := d.doDecode(); err != nil { - return plumbing.ZeroHash, err - } - - return d.s.Checksum() -} - -func (d *Decoder) doDecode() error { - _, count, err := d.s.Header() - if err != nil { - return err - } - - if !d.hasBuiltIndex { - d.idx = NewIndex(int(count)) - } - defer func() { d.hasBuiltIndex = true }() - - _, isTxStorer := d.o.(storer.Transactioner) - switch { - case d.o == nil: - return d.decodeObjects(int(count)) - case isTxStorer: - return d.decodeObjectsWithObjectStorerTx(int(count)) - default: - return d.decodeObjectsWithObjectStorer(int(count)) - } -} - -func (d *Decoder) decodeObjects(count int) error { - for i := 0; i < count; i++ { - if _, err := d.DecodeObject(); err != nil { - return err - } - } - - return nil -} - -func (d *Decoder) decodeObjectsWithObjectStorer(count int) error { - for i := 0; i < count; i++ { - obj, err := d.DecodeObject() - if err != nil { - return err - } - - if _, err := d.o.SetEncodedObject(obj); err != nil { - return err - } - } - - return nil -} - -func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error { - d.tx = d.o.(storer.Transactioner).Begin() - - for i := 0; i < count; i++ { - obj, err := d.DecodeObject() - if err != nil { - return err - } - - if _, err := d.tx.SetEncodedObject(obj); err != nil { - if rerr := d.tx.Rollback(); rerr != nil { - return ErrRollback.AddDetails( - "error: %s, during tx.Set error: %s", rerr, err, - ) - } - - return err - } - - } - - return d.tx.Commit() -} - -// DecodeObject reads the next object from the scanner and returns it. This -// method can be used in replacement of the Decode method, to work in a -// interactive way. If you created a new decoder instance using NewDecoderForType -// constructor, if the object decoded is not equals to the specified one, nil will -// be returned -func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) { - return d.doDecodeObject(d.decoderType) -} - -func (d *Decoder) doDecodeObject(t plumbing.ObjectType) (plumbing.EncodedObject, error) { - h, err := d.s.NextObjectHeader() - if err != nil { - return nil, err - } - - if t == plumbing.AnyObject { - return d.decodeByHeader(h) - } - - return d.decodeIfSpecificType(h) -} - -func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) { - var ( - obj plumbing.EncodedObject - realType plumbing.ObjectType - err error - ) - switch h.Type { - case plumbing.OFSDeltaObject: - realType, err = d.ofsDeltaType(h.OffsetReference) - case plumbing.REFDeltaObject: - realType, err = d.refDeltaType(h.Reference) - if err == plumbing.ErrObjectNotFound { - obj, err = d.decodeByHeader(h) - if err != nil { - realType = obj.Type() - } - } - default: - realType = h.Type - } - - if err != nil { - return nil, err - } - - d.offsetToType[h.Offset] = realType - - if d.decoderType == realType { - if obj != nil { - return obj, nil - } - - return d.decodeByHeader(h) - } - - return nil, nil -} - -func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) { - t, ok := d.offsetToType[offset] - if !ok { - return plumbing.InvalidObject, plumbing.ErrObjectNotFound - } - - return t, nil -} - -func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) { - e, ok := d.idx.LookupHash(ref) - if !ok { - return plumbing.InvalidObject, plumbing.ErrObjectNotFound - } - - return d.ofsDeltaType(int64(e.Offset)) -} - -func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) { - obj := d.newObject() - obj.SetSize(h.Length) - obj.SetType(h.Type) - - var crc uint32 - var err error - switch h.Type { - case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: - crc, err = d.fillRegularObjectContent(obj) - case plumbing.REFDeltaObject: - crc, err = d.fillREFDeltaObjectContent(obj, h.Reference) - case plumbing.OFSDeltaObject: - crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) - default: - err = ErrInvalidObject.AddDetails("type %q", h.Type) - } - - if err != nil { - return obj, err - } - - if !d.hasBuiltIndex { - d.idx.Add(obj.Hash(), uint64(h.Offset), crc) - } - - return obj, nil -} - -func (d *Decoder) newObject() plumbing.EncodedObject { - if d.o == nil { - return &plumbing.MemoryObject{} - } - - return d.o.NewEncodedObject() -} - -// DecodeObjectAt reads an object at the given location. Every EncodedObject -// returned is added into a internal index. This is intended to be able to regenerate -// objects from deltas (offset deltas or reference deltas) without an package index -// (.idx file). If Decode wasn't called previously objects offset should provided -// using the SetOffsets method. It decodes the object regardless of the Decoder -// type. -func (d *Decoder) DecodeObjectAt(offset int64) (plumbing.EncodedObject, error) { - if !d.s.IsSeekable { - return nil, ErrNonSeekable - } - - beforeJump, err := d.s.SeekFromStart(offset) - if err != nil { - return nil, err - } - - defer func() { - _, seekErr := d.s.SeekFromStart(beforeJump) - if err == nil { - err = seekErr - } - }() - - return d.doDecodeObject(plumbing.AnyObject) -} - -func (d *Decoder) fillRegularObjectContent(obj plumbing.EncodedObject) (uint32, error) { - w, err := obj.Writer() - if err != nil { - return 0, err - } - - _, crc, err := d.s.NextObject(w) - return crc, err -} - -func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) (uint32, error) { - buf := bufPool.Get().(*bytes.Buffer) - buf.Reset() - _, crc, err := d.s.NextObject(buf) - if err != nil { - return 0, err - } - - base, ok := d.cacheGet(ref) - if !ok { - base, err = d.recallByHash(ref) - if err != nil { - return 0, err - } - } - - obj.SetType(base.Type()) - err = ApplyDelta(obj, base, buf.Bytes()) - d.cachePut(obj) - bufPool.Put(buf) - - return crc, err -} - -func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) (uint32, error) { - buf := bytes.NewBuffer(nil) - _, crc, err := d.s.NextObject(buf) - if err != nil { - return 0, err - } - - e, ok := d.idx.LookupOffset(uint64(offset)) - var base plumbing.EncodedObject - if ok { - base, ok = d.cacheGet(e.Hash) - } - - if !ok { - base, err = d.recallByOffset(offset) - if err != nil { - return 0, err - } - - d.cachePut(base) - } - - obj.SetType(base.Type()) - err = ApplyDelta(obj, base, buf.Bytes()) - d.cachePut(obj) - - return crc, err -} - -func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { - if d.deltaBaseCache == nil { - return nil, false - } - - return d.deltaBaseCache.Get(h) -} - -func (d *Decoder) cachePut(obj plumbing.EncodedObject) { - if d.deltaBaseCache == nil { - return - } - - d.deltaBaseCache.Put(obj) -} - -func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { - if d.s.IsSeekable { - return d.DecodeObjectAt(o) - } - - if e, ok := d.idx.LookupOffset(uint64(o)); ok { - return d.recallByHashNonSeekable(e.Hash) - } - - return nil, plumbing.ErrObjectNotFound -} - -func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { - if d.s.IsSeekable { - if e, ok := d.idx.LookupHash(h); ok { - return d.DecodeObjectAt(int64(e.Offset)) - } - } - - return d.recallByHashNonSeekable(h) -} - -// recallByHashNonSeekable if we are in a transaction the objects are read from -// the transaction, if not are directly read from the ObjectStorer -func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.EncodedObject, err error) { - if d.tx != nil { - obj, err = d.tx.EncodedObject(plumbing.AnyObject, h) - } else { - obj, err = d.o.EncodedObject(plumbing.AnyObject, h) - } - - if err != plumbing.ErrObjectNotFound { - return obj, err - } - - return nil, plumbing.ErrObjectNotFound -} - -// SetIndex sets an index for the packfile. It is recommended to set this. -// The index might be read from a file or reused from a previous Decoder usage -// (see Index function). -func (d *Decoder) SetIndex(idx *Index) { - d.hasBuiltIndex = true - d.idx = idx -} - -// Index returns the index for the packfile. If index was set with SetIndex, -// Index will return it. Otherwise, it will return an index that is built while -// decoding. If neither SetIndex was called with a full index or Decode called -// for the whole packfile, then the returned index will be incomplete. -func (d *Decoder) Index() *Index { - return d.idx -} - -// Close closes the Scanner. usually this mean that the whole reader is read and -// discarded -func (d *Decoder) Close() error { - return d.s.Close() -} diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go deleted file mode 100644 index b5bc7b7..0000000 --- a/plumbing/format/packfile/decoder_test.go +++ /dev/null @@ -1,396 +0,0 @@ -package packfile_test - -import ( - "io" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/cache" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" - "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" - "gopkg.in/src-d/go-git.v4/plumbing/storer" - "gopkg.in/src-d/go-git.v4/storage/filesystem" - "gopkg.in/src-d/go-git.v4/storage/memory" - - . "gopkg.in/check.v1" - "gopkg.in/src-d/go-billy.v4/memfs" - "gopkg.in/src-d/go-git-fixtures.v3" -) - -type ReaderSuite struct { - fixtures.Suite -} - -var _ = Suite(&ReaderSuite{}) - -func (s *ReaderSuite) TestNewDecodeNonSeekable(c *C) { - scanner := packfile.NewScanner(nil) - d, err := packfile.NewDecoder(scanner, nil) - - c.Assert(d, IsNil) - c.Assert(err, NotNil) -} - -func (s *ReaderSuite) TestDecode(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - assertObjects(c, storage, expectedHashes) - }) -} - -func (s *ReaderSuite) TestDecodeByTypeRefDelta(c *C) { - f := fixtures.Basic().ByTag("ref-delta").One() - - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, plumbing.CommitObject, - cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - // Index required to decode by ref-delta. - d.SetIndex(getIndexFromIdxFile(f.Idx())) - - defer d.Close() - - _, count, err := scanner.Header() - c.Assert(err, IsNil) - - var i uint32 - for i = 0; i < count; i++ { - obj, err := d.DecodeObject() - c.Assert(err, IsNil) - - if obj != nil { - c.Assert(obj.Type(), Equals, plumbing.CommitObject) - } - } -} - -func (s *ReaderSuite) TestDecodeByTypeRefDeltaError(c *C) { - fixtures.Basic().ByTag("ref-delta").Test(c, func(f *fixtures.Fixture) { - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, - plumbing.CommitObject, cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - defer d.Close() - - _, count, err := scanner.Header() - c.Assert(err, IsNil) - - isError := false - var i uint32 - for i = 0; i < count; i++ { - _, err := d.DecodeObject() - if err != nil { - isError = true - break - } - } - c.Assert(isError, Equals, true) - }) - -} - -func (s *ReaderSuite) TestDecodeByType(c *C) { - ts := []plumbing.ObjectType{ - plumbing.CommitObject, - plumbing.TagObject, - plumbing.TreeObject, - plumbing.BlobObject, - } - - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - for _, t := range ts { - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, storage, t, - cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - // when the packfile is ref-delta based, the offsets are required - if f.Is("ref-delta") { - d.SetIndex(getIndexFromIdxFile(f.Idx())) - } - - defer d.Close() - - _, count, err := scanner.Header() - c.Assert(err, IsNil) - - var i uint32 - for i = 0; i < count; i++ { - obj, err := d.DecodeObject() - c.Assert(err, IsNil) - - if obj != nil { - c.Assert(obj.Type(), Equals, t) - } - } - } - }) -} -func (s *ReaderSuite) TestDecodeByTypeConstructor(c *C) { - f := fixtures.Basic().ByTag("packfile").One() - storage := memory.NewStorage() - scanner := packfile.NewScanner(f.Packfile()) - - _, err := packfile.NewDecoderForType(scanner, storage, - plumbing.OFSDeltaObject, cache.NewObjectLRUDefault()) - c.Assert(err, Equals, plumbing.ErrInvalidType) - - _, err = packfile.NewDecoderForType(scanner, storage, - plumbing.REFDeltaObject, cache.NewObjectLRUDefault()) - - c.Assert(err, Equals, plumbing.ErrInvalidType) - - _, err = packfile.NewDecoderForType(scanner, storage, plumbing.InvalidObject, - cache.NewObjectLRUDefault()) - c.Assert(err, Equals, plumbing.ErrInvalidType) -} - -func (s *ReaderSuite) TestDecodeMultipleTimes(c *C) { - f := fixtures.Basic().ByTag("packfile").One() - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - ch, err = d.Decode() - c.Assert(err, Equals, packfile.ErrAlreadyDecoded) - c.Assert(ch, Equals, plumbing.ZeroHash) -} - -func (s *ReaderSuite) TestDecodeInMemory(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - }) -} - -type nonSeekableReader struct { - r io.Reader -} - -func (nsr nonSeekableReader) Read(b []byte) (int, error) { - return nsr.r.Read(b) -} - -func (s *ReaderSuite) TestDecodeNoSeekableWithTxStorer(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - reader := nonSeekableReader{ - r: f.Packfile(), - } - - scanner := packfile.NewScanner(reader) - - var storage storer.EncodedObjectStorer = memory.NewStorage() - _, isTxStorer := storage.(storer.Transactioner) - c.Assert(isTxStorer, Equals, true) - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - assertObjects(c, storage, expectedHashes) - }) -} - -func (s *ReaderSuite) TestDecodeNoSeekableWithoutTxStorer(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - reader := nonSeekableReader{ - r: f.Packfile(), - } - - scanner := packfile.NewScanner(reader) - - var storage storer.EncodedObjectStorer - storage, _ = filesystem.NewStorage(memfs.New()) - _, isTxStorer := storage.(storer.Transactioner) - c.Assert(isTxStorer, Equals, false) - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - defer d.Close() - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - assertObjects(c, storage, expectedHashes) - }) -} - -var expectedHashes = []string{ - "918c48b83bd081e863dbe1b80f8998f058cd8294", - "af2d6a6954d532f8ffb47615169c8fdf9d383a1a", - "1669dce138d9b841a518c64b10914d88f5e488ea", - "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", - "b8e471f58bcbca63b07bda20e428190409c2db47", - "35e85108805c84807bc66a02d91535e1e24b38b9", - "b029517f6300c2da0f4b651b8642506cd6aaf45d", - "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", - "d3ff53e0564a9f87d8e84b6e28e5060e517008aa", - "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", - "d5c0f4ab811897cadf03aec358ae60d21f91c50d", - "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", - "cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", - "9dea2395f5403188298c1dabe8bdafe562c491e3", - "586af567d0bb5e771e49bdd9434f5e0fb76d25fa", - "9a48f23120e880dfbe41f7c9b7b708e9ee62a492", - "5a877e6a906a2743ad6e45d99c1793642aaf8eda", - "c8f1d8c61f9da76f4cb49fd86322b6e685dba956", - "a8d315b2b1c615d43042c3a62402b8a54288cf5c", - "a39771a7651f97faf5c72e08224d857fc35133db", - "880cd14280f4b9b6ed3986d6671f907d7cc2a198", - "fb72698cab7617ac416264415f13224dfd7a165e", - "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", - "eba74343e2f15d62adedfd8c883ee0262b5c8021", - "c2d30fa8ef288618f65f6eed6e168e0d514886f4", - "8dcef98b1d52143e1e2dbc458ffe38f925786bf2", - "aa9b383c260e1d05fbbf6b30a02914555e20c725", - "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", - "dbd3641b371024f44d0e469a9c8f5457b0660de1", - "e8d3ffab552895c19b9fcf7aa264d277cde33881", - "7e59600739c96546163833214c36459e324bad0a", -} - -func (s *ReaderSuite) TestDecodeCRCs(c *C) { - f := fixtures.Basic().ByTag("ofs-delta").One() - - scanner := packfile.NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := packfile.NewDecoder(scanner, storage) - c.Assert(err, IsNil) - _, err = d.Decode() - c.Assert(err, IsNil) - - var sum uint64 - idx := d.Index().ToIdxFile() - for _, e := range idx.Entries { - sum += uint64(e.CRC32) - } - - c.Assert(int(sum), Equals, 78022211966) -} - -func (s *ReaderSuite) TestDecodeObjectAt(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - // when the packfile is ref-delta based, the offsets are required - if f.Is("ref-delta") { - d.SetIndex(getIndexFromIdxFile(f.Idx())) - } - - // the objects at reference 186, is a delta, so should be recall, - // without being read before. - obj, err := d.DecodeObjectAt(186) - c.Assert(err, IsNil) - c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5") -} - -func (s *ReaderSuite) TestDecodeObjectAtForType(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoderForType(scanner, nil, plumbing.TreeObject, - cache.NewObjectLRUDefault()) - c.Assert(err, IsNil) - - // when the packfile is ref-delta based, the offsets are required - if f.Is("ref-delta") { - d.SetIndex(getIndexFromIdxFile(f.Idx())) - } - - // the objects at reference 186, is a delta, so should be recall, - // without being read before. - obj, err := d.DecodeObjectAt(186) - c.Assert(err, IsNil) - c.Assert(obj.Type(), Equals, plumbing.CommitObject) - c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5") -} - -func (s *ReaderSuite) TestIndex(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - c.Assert(d.Index().ToIdxFile().Entries, HasLen, 0) - - _, err = d.Decode() - c.Assert(err, IsNil) - - c.Assert(len(d.Index().ToIdxFile().Entries), Equals, 31) -} - -func (s *ReaderSuite) TestSetIndex(c *C) { - f := fixtures.Basic().One() - scanner := packfile.NewScanner(f.Packfile()) - d, err := packfile.NewDecoder(scanner, nil) - c.Assert(err, IsNil) - - idx := packfile.NewIndex(1) - h := plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5") - idx.Add(h, uint64(42), 0) - d.SetIndex(idx) - - idxf := d.Index().ToIdxFile() - c.Assert(idxf.Entries, HasLen, 1) - c.Assert(idxf.Entries[0].Offset, Equals, uint64(42)) -} - -func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { - - i, err := s.IterEncodedObjects(plumbing.AnyObject) - c.Assert(err, IsNil) - - var count int - err = i.ForEach(func(plumbing.EncodedObject) error { count++; return nil }) - c.Assert(err, IsNil) - c.Assert(count, Equals, len(expects)) - - for _, exp := range expects { - obt, err := s.EncodedObject(plumbing.AnyObject, plumbing.NewHash(exp)) - c.Assert(err, IsNil) - c.Assert(obt.Hash().String(), Equals, exp) - } -} - -func getIndexFromIdxFile(r io.Reader) *packfile.Index { - idxf := idxfile.NewIdxfile() - d := idxfile.NewDecoder(r) - if err := d.Decode(idxf); err != nil { - panic(err) - } - - return packfile.NewIndexFromIdxFile(idxf) -} diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go index cd38c16..6710085 100644 --- a/plumbing/format/packfile/delta_selector.go +++ b/plumbing/format/packfile/delta_selector.go @@ -103,7 +103,7 @@ func (dw *deltaSelector) objectsToPack( otp := newObjectToPack(o) if _, ok := o.(plumbing.DeltaObject); ok { - otp.Original = nil + otp.CleanOriginal() } objectsToPack = append(objectsToPack, otp) @@ -196,7 +196,8 @@ func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error { return err } - otp.Original = obj + otp.SetOriginal(obj) + return nil } @@ -230,7 +231,8 @@ func (dw *deltaSelector) walk( delete(indexMap, obj.Hash()) if obj.IsDelta() { - obj.Original = nil + obj.SaveOriginalMetadata() + obj.CleanOriginal() } } diff --git a/plumbing/format/packfile/delta_test.go b/plumbing/format/packfile/delta_test.go index 42b777a..98f53f6 100644 --- a/plumbing/format/packfile/delta_test.go +++ b/plumbing/format/packfile/delta_test.go @@ -62,7 +62,7 @@ func (s *DeltaSuite) SetUpSuite(c *C) { target: []piece{{"1", 30}, {"2", 20}, {"7", 40}, {"4", 400}, {"5", 10}}, }, { - description: "A copy operation bigger tan 64kb", + description: "A copy operation bigger than 64kb", base: []piece{{bigRandStr, 1}, {"1", 200}}, target: []piece{{bigRandStr, 1}}, }} @@ -72,12 +72,16 @@ var bigRandStr = randStringBytes(100 * 1024) const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" -func randStringBytes(n int) string { +func randBytes(n int) []byte { b := make([]byte, n) for i := range b { b[i] = letterBytes[rand.Intn(len(letterBytes))] } - return string(b) + return b +} + +func randStringBytes(n int) string { + return string(randBytes(n)) } func (s *DeltaSuite) TestAddDelta(c *C) { @@ -110,3 +114,14 @@ func (s *DeltaSuite) TestIncompleteDelta(c *C) { c.Assert(err, NotNil) c.Assert(result, IsNil) } + +func (s *DeltaSuite) TestMaxCopySizeDelta(c *C) { + baseBuf := randBytes(maxCopySize) + targetBuf := baseBuf[0:] + targetBuf = append(targetBuf, byte(1)) + + delta := DiffDelta(baseBuf, targetBuf) + result, err := PatchDelta(baseBuf, delta) + c.Assert(err, IsNil) + c.Assert(result, DeepEquals, targetBuf) +} diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index 4d56dc1..d35e78a 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -111,7 +111,7 @@ func diffDelta(index *deltaIndex, src []byte, tgt []byte) []byte { rl := l aOffset := offset - for { + for rl > 0 { if rl < maxCopySize { buf.Write(encodeCopyOperation(aOffset, rl)) break diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go index 6686dd5..b077918 100644 --- a/plumbing/format/packfile/encoder.go +++ b/plumbing/format/packfile/encoder.go @@ -87,6 +87,7 @@ func (e *Encoder) entry(o *ObjectToPack) error { // (for example due to a concurrent repack) and a different base // was chosen, forcing a cycle. Select something other than a // delta, and write this object. + e.selector.restoreOriginal(o) o.BackToOriginal() } diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index 1075875..e15126e 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -2,13 +2,17 @@ package packfile_test import ( "bytes" + "io" "math/rand" + "testing" + "gopkg.in/src-d/go-billy.v4/memfs" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/filesystem" - "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git-fixtures.v3" @@ -21,30 +25,38 @@ type EncoderAdvancedSuite struct { var _ = Suite(&EncoderAdvancedSuite{}) func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) { + if testing.Short() { + c.Skip("skipping test in short mode.") + } + fixs := fixtures.Basic().ByTag("packfile").ByTag(".git") fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git"). ByTag("packfile").ByTag(".git").One()) fixs.Test(c, func(f *fixtures.Fixture) { - storage, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + storage := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) s.testEncodeDecode(c, storage, 10) }) - } func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) { + if testing.Short() { + c.Skip("skipping test in short mode.") + } + fixs := fixtures.Basic().ByTag("packfile").ByTag(".git") fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git"). ByTag("packfile").ByTag(".git").One()) fixs.Test(c, func(f *fixtures.Fixture) { - storage, err := filesystem.NewStorage(f.DotGit()) - c.Assert(err, IsNil) + storage := filesystem.NewStorage(f.DotGit(), cache.NewObjectLRUDefault()) s.testEncodeDecode(c, storage, 0) }) } -func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, packWindow uint) { - +func (s *EncoderAdvancedSuite) testEncodeDecode( + c *C, + storage storer.Storer, + packWindow uint, +) { objIter, err := storage.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) @@ -71,16 +83,35 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, pac encodeHash, err := enc.Encode(hashes, packWindow) c.Assert(err, IsNil) - scanner := NewScanner(buf) - storage = memory.NewStorage() - d, err := NewDecoder(scanner, storage) + fs := memfs.New() + f, err := fs.Create("packfile") + c.Assert(err, IsNil) + + _, err = f.Write(buf.Bytes()) + c.Assert(err, IsNil) + + _, err = f.Seek(0, io.SeekStart) + c.Assert(err, IsNil) + + w := new(idxfile.Writer) + parser, err := NewParser(NewScanner(f), w) + c.Assert(err, IsNil) + + _, err = parser.Parse() c.Assert(err, IsNil) - decodeHash, err := d.Decode() + index, err := w.Index() c.Assert(err, IsNil) + _, err = f.Seek(0, io.SeekStart) + c.Assert(err, IsNil) + + p := NewPackfile(index, fs, f) + + decodeHash, err := p.ID() + c.Assert(err, IsNil) c.Assert(encodeHash, Equals, decodeHash) - objIter, err = storage.IterEncodedObjects(plumbing.AnyObject) + objIter, err = p.GetAll() c.Assert(err, IsNil) obtainedObjects := map[plumbing.Hash]bool{} err = objIter.ForEach(func(o plumbing.EncodedObject) error { diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index 320036b..80b916d 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -2,8 +2,12 @@ package packfile import ( "bytes" + "io" + stdioutil "io/ioutil" + "gopkg.in/src-d/go-billy.v4/memfs" "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" @@ -130,24 +134,20 @@ func (s *EncoderSuite) simpleDeltaTest(c *C) { }) c.Assert(err, IsNil) - scanner := NewScanner(s.buf) - - storage := memory.NewStorage() - d, err := NewDecoder(scanner, storage) - c.Assert(err, IsNil) - - decHash, err := d.Decode() + p, cleanup := packfileFromReader(c, s.buf) + defer cleanup() + decHash, err := p.ID() c.Assert(err, IsNil) c.Assert(encHash, Equals, decHash) - decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash()) + decSrc, err := p.Get(srcObject.Hash()) c.Assert(err, IsNil) - c.Assert(decSrc, DeepEquals, srcObject) + objectsEqual(c, decSrc, srcObject) - decTarget, err := storage.EncodedObject(targetObject.Type(), targetObject.Hash()) + decTarget, err := p.Get(targetObject.Hash()) c.Assert(err, IsNil) - c.Assert(decTarget, DeepEquals, targetObject) + objectsEqual(c, decTarget, targetObject) } func (s *EncoderSuite) deltaOverDeltaTest(c *C) { @@ -173,27 +173,24 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) { }) c.Assert(err, IsNil) - scanner := NewScanner(s.buf) - storage := memory.NewStorage() - d, err := NewDecoder(scanner, storage) - c.Assert(err, IsNil) - - decHash, err := d.Decode() + p, cleanup := packfileFromReader(c, s.buf) + defer cleanup() + decHash, err := p.ID() c.Assert(err, IsNil) c.Assert(encHash, Equals, decHash) - decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash()) + decSrc, err := p.Get(srcObject.Hash()) c.Assert(err, IsNil) - c.Assert(decSrc, DeepEquals, srcObject) + objectsEqual(c, decSrc, srcObject) - decTarget, err := storage.EncodedObject(targetObject.Type(), targetObject.Hash()) + decTarget, err := p.Get(targetObject.Hash()) c.Assert(err, IsNil) - c.Assert(decTarget, DeepEquals, targetObject) + objectsEqual(c, decTarget, targetObject) - decOtherTarget, err := storage.EncodedObject(otherTargetObject.Type(), otherTargetObject.Hash()) + decOtherTarget, err := p.Get(otherTargetObject.Hash()) c.Assert(err, IsNil) - c.Assert(decOtherTarget, DeepEquals, otherTargetObject) + objectsEqual(c, decOtherTarget, otherTargetObject) } func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) { @@ -202,6 +199,15 @@ func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) { o3 := newObject(plumbing.BlobObject, []byte("011111")) o4 := newObject(plumbing.BlobObject, []byte("01111100000")) + _, err := s.store.SetEncodedObject(o1) + c.Assert(err, IsNil) + _, err = s.store.SetEncodedObject(o2) + c.Assert(err, IsNil) + _, err = s.store.SetEncodedObject(o3) + c.Assert(err, IsNil) + _, err = s.store.SetEncodedObject(o4) + c.Assert(err, IsNil) + d2, err := GetDelta(o1, o2) c.Assert(err, IsNil) @@ -219,6 +225,18 @@ func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) { pd3.SetDelta(pd4, d3) pd4.SetDelta(pd3, d4) + // SetOriginal is used by delta selector when generating ObjectToPack. + // It also fills type, hash and size values to be used when Original + // is nil. + po1.SetOriginal(po1.Original) + pd2.SetOriginal(pd2.Original) + pd2.CleanOriginal() + + pd3.SetOriginal(pd3.Original) + pd3.CleanOriginal() + + pd4.SetOriginal(pd4.Original) + encHash, err := s.enc.encode([]*ObjectToPack{ po1, pd2, @@ -227,29 +245,74 @@ func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) { }) c.Assert(err, IsNil) - scanner := NewScanner(s.buf) - storage := memory.NewStorage() - d, err := NewDecoder(scanner, storage) + p, cleanup := packfileFromReader(c, s.buf) + defer cleanup() + decHash, err := p.ID() c.Assert(err, IsNil) - decHash, err := d.Decode() + c.Assert(encHash, Equals, decHash) + + decSrc, err := p.Get(o1.Hash()) c.Assert(err, IsNil) + objectsEqual(c, decSrc, o1) - c.Assert(encHash, Equals, decHash) + decTarget, err := p.Get(o2.Hash()) + c.Assert(err, IsNil) + objectsEqual(c, decTarget, o2) - decSrc, err := storage.EncodedObject(o1.Type(), o1.Hash()) + decOtherTarget, err := p.Get(o3.Hash()) c.Assert(err, IsNil) - c.Assert(decSrc, DeepEquals, o1) + objectsEqual(c, decOtherTarget, o3) - decTarget, err := storage.EncodedObject(o2.Type(), o2.Hash()) + decAnotherTarget, err := p.Get(o4.Hash()) c.Assert(err, IsNil) - c.Assert(decTarget, DeepEquals, o2) + objectsEqual(c, decAnotherTarget, o4) +} - decOtherTarget, err := storage.EncodedObject(o3.Type(), o3.Hash()) +func objectsEqual(c *C, o1, o2 plumbing.EncodedObject) { + c.Assert(o1.Type(), Equals, o2.Type()) + c.Assert(o1.Hash(), Equals, o2.Hash()) + c.Assert(o1.Size(), Equals, o2.Size()) + + r1, err := o1.Reader() c.Assert(err, IsNil) - c.Assert(decOtherTarget, DeepEquals, o3) - decAnotherTarget, err := storage.EncodedObject(o4.Type(), o4.Hash()) + b1, err := stdioutil.ReadAll(r1) c.Assert(err, IsNil) - c.Assert(decAnotherTarget, DeepEquals, o4) + + r2, err := o2.Reader() + c.Assert(err, IsNil) + + b2, err := stdioutil.ReadAll(r2) + c.Assert(err, IsNil) + + c.Assert(bytes.Compare(b1, b2), Equals, 0) +} + +func packfileFromReader(c *C, buf *bytes.Buffer) (*Packfile, func()) { + fs := memfs.New() + file, err := fs.Create("packfile") + c.Assert(err, IsNil) + + _, err = file.Write(buf.Bytes()) + c.Assert(err, IsNil) + + _, err = file.Seek(0, io.SeekStart) + c.Assert(err, IsNil) + + scanner := NewScanner(file) + + w := new(idxfile.Writer) + p, err := NewParser(scanner, w) + c.Assert(err, IsNil) + + _, err = p.Parse() + c.Assert(err, IsNil) + + index, err := w.Index() + c.Assert(err, IsNil) + + return NewPackfile(index, fs, file), func() { + c.Assert(file.Close(), IsNil) + } } diff --git a/plumbing/format/packfile/fsobject.go b/plumbing/format/packfile/fsobject.go new file mode 100644 index 0000000..330cb73 --- /dev/null +++ b/plumbing/format/packfile/fsobject.go @@ -0,0 +1,116 @@ +package packfile + +import ( + "io" + + billy "gopkg.in/src-d/go-billy.v4" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" +) + +// FSObject is an object from the packfile on the filesystem. +type FSObject struct { + hash plumbing.Hash + h *ObjectHeader + offset int64 + size int64 + typ plumbing.ObjectType + index idxfile.Index + fs billy.Filesystem + path string + cache cache.Object +} + +// NewFSObject creates a new filesystem object. +func NewFSObject( + hash plumbing.Hash, + finalType plumbing.ObjectType, + offset int64, + contentSize int64, + index idxfile.Index, + fs billy.Filesystem, + path string, + cache cache.Object, +) *FSObject { + return &FSObject{ + hash: hash, + offset: offset, + size: contentSize, + typ: finalType, + index: index, + fs: fs, + path: path, + cache: cache, + } +} + +// Reader implements the plumbing.EncodedObject interface. +func (o *FSObject) Reader() (io.ReadCloser, error) { + obj, ok := o.cache.Get(o.hash) + if ok { + reader, err := obj.Reader() + if err != nil { + return nil, err + } + + return reader, nil + } + + f, err := o.fs.Open(o.path) + if err != nil { + return nil, err + } + + p := NewPackfileWithCache(o.index, nil, f, o.cache) + r, err := p.getObjectContent(o.offset) + if err != nil { + _ = f.Close() + return nil, err + } + + if err := f.Close(); err != nil { + return nil, err + } + + return r, nil +} + +// SetSize implements the plumbing.EncodedObject interface. This method +// is a noop. +func (o *FSObject) SetSize(int64) {} + +// SetType implements the plumbing.EncodedObject interface. This method is +// a noop. +func (o *FSObject) SetType(plumbing.ObjectType) {} + +// Hash implements the plumbing.EncodedObject interface. +func (o *FSObject) Hash() plumbing.Hash { return o.hash } + +// Size implements the plumbing.EncodedObject interface. +func (o *FSObject) Size() int64 { return o.size } + +// Type implements the plumbing.EncodedObject interface. +func (o *FSObject) Type() plumbing.ObjectType { + return o.typ +} + +// Writer implements the plumbing.EncodedObject interface. This method always +// returns a nil writer. +func (o *FSObject) Writer() (io.WriteCloser, error) { + return nil, nil +} + +type objectReader struct { + io.ReadCloser + f billy.File +} + +func (r *objectReader) Close() error { + if err := r.ReadCloser.Close(); err != nil { + _ = r.f.Close() + return err + } + + return r.f.Close() +} diff --git a/plumbing/format/packfile/index.go b/plumbing/format/packfile/index.go deleted file mode 100644 index 2c5f98f..0000000 --- a/plumbing/format/packfile/index.go +++ /dev/null @@ -1,82 +0,0 @@ -package packfile - -import ( - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" -) - -// Index is an in-memory representation of a packfile index. -// This uses idxfile.Idxfile under the hood to obtain indexes from .idx files -// or to store them. -type Index struct { - byHash map[plumbing.Hash]*idxfile.Entry - byOffset map[uint64]*idxfile.Entry -} - -// NewIndex creates a new empty index with the given size. Size is a hint and -// can be 0. It is recommended to set it to the number of objects to be indexed -// if it is known beforehand (e.g. reading from a packfile). -func NewIndex(size int) *Index { - return &Index{ - byHash: make(map[plumbing.Hash]*idxfile.Entry, size), - byOffset: make(map[uint64]*idxfile.Entry, size), - } -} - -// NewIndexFromIdxFile creates a new Index from an idxfile.IdxFile. -func NewIndexFromIdxFile(idxf *idxfile.Idxfile) *Index { - idx := &Index{ - byHash: make(map[plumbing.Hash]*idxfile.Entry, idxf.ObjectCount), - byOffset: make(map[uint64]*idxfile.Entry, idxf.ObjectCount), - } - for _, e := range idxf.Entries { - idx.add(e) - } - - return idx -} - -// Add adds a new Entry with the given values to the index. -func (idx *Index) Add(h plumbing.Hash, offset uint64, crc32 uint32) { - e := idxfile.Entry{ - Hash: h, - Offset: offset, - CRC32: crc32, - } - idx.add(&e) -} - -func (idx *Index) add(e *idxfile.Entry) { - idx.byHash[e.Hash] = e - idx.byOffset[e.Offset] = e -} - -// LookupHash looks an entry up by its hash. An idxfile.Entry is returned and -// a bool, which is true if it was found or false if it wasn't. -func (idx *Index) LookupHash(h plumbing.Hash) (*idxfile.Entry, bool) { - e, ok := idx.byHash[h] - return e, ok -} - -// LookupHash looks an entry up by its offset in the packfile. An idxfile.Entry -// is returned and a bool, which is true if it was found or false if it wasn't. -func (idx *Index) LookupOffset(offset uint64) (*idxfile.Entry, bool) { - e, ok := idx.byOffset[offset] - return e, ok -} - -// Size returns the number of entries in the index. -func (idx *Index) Size() int { - return len(idx.byHash) -} - -// ToIdxFile converts the index to an idxfile.Idxfile, which can then be used -// to serialize. -func (idx *Index) ToIdxFile() *idxfile.Idxfile { - idxf := idxfile.NewIdxfile() - for _, e := range idx.byHash { - idxf.Entries = append(idxf.Entries, e) - } - - return idxf -} diff --git a/plumbing/format/packfile/index_test.go b/plumbing/format/packfile/index_test.go deleted file mode 100644 index 6714704..0000000 --- a/plumbing/format/packfile/index_test.go +++ /dev/null @@ -1,122 +0,0 @@ -package packfile - -import ( - "strconv" - "strings" - - "gopkg.in/src-d/go-git.v4/plumbing" - - . "gopkg.in/check.v1" -) - -type IndexSuite struct{} - -var _ = Suite(&IndexSuite{}) - -func (s *IndexSuite) TestLookupOffset(c *C) { - idx := NewIndex(0) - - for o1 := 0; o1 < 10000; o1 += 100 { - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 >= o1 { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - - h1 := s.toHash(o1) - idx.Add(h1, uint64(o1), 0) - - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 > o1 { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupOffset(uint64(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - } -} - -func (s *IndexSuite) TestLookupHash(c *C) { - idx := NewIndex(0) - - for o1 := 0; o1 < 10000; o1 += 100 { - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 >= o1 { - e, ok := idx.LookupHash(s.toHash(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupHash(s.toHash(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - - h1 := s.toHash(o1) - idx.Add(h1, uint64(o1), 0) - - for o2 := 0; o2 < 10000; o2 += 100 { - if o2 > o1 { - e, ok := idx.LookupHash(s.toHash(o2)) - c.Assert(ok, Equals, false) - c.Assert(e, IsNil) - } else { - e, ok := idx.LookupHash(s.toHash(o2)) - c.Assert(ok, Equals, true) - c.Assert(e, NotNil) - c.Assert(e.Hash, Equals, s.toHash(o2)) - c.Assert(e.Offset, Equals, uint64(o2)) - } - } - } -} - -func (s *IndexSuite) TestSize(c *C) { - idx := NewIndex(0) - - for o1 := 0; o1 < 1000; o1++ { - c.Assert(idx.Size(), Equals, o1) - h1 := s.toHash(o1) - idx.Add(h1, uint64(o1), 0) - } -} - -func (s *IndexSuite) TestIdxFileEmpty(c *C) { - idx := NewIndex(0) - idxf := idx.ToIdxFile() - idx2 := NewIndexFromIdxFile(idxf) - c.Assert(idx, DeepEquals, idx2) -} - -func (s *IndexSuite) TestIdxFile(c *C) { - idx := NewIndex(0) - for o1 := 0; o1 < 1000; o1++ { - h1 := s.toHash(o1) - idx.Add(h1, uint64(o1), 0) - } - - idx2 := NewIndexFromIdxFile(idx.ToIdxFile()) - c.Assert(idx, DeepEquals, idx2) -} - -func (s *IndexSuite) toHash(i int) plumbing.Hash { - is := strconv.Itoa(i) - padding := strings.Repeat("a", 40-len(is)) - return plumbing.NewHash(padding + is) -} diff --git a/plumbing/format/packfile/object_pack.go b/plumbing/format/packfile/object_pack.go index 1563517..dfea571 100644 --- a/plumbing/format/packfile/object_pack.go +++ b/plumbing/format/packfile/object_pack.go @@ -23,6 +23,12 @@ type ObjectToPack struct { // offset in pack when object has been already written, or 0 if it // has not been written yet Offset int64 + + // Information from the original object + resolvedOriginal bool + originalType plumbing.ObjectType + originalSize int64 + originalHash plumbing.Hash } // newObjectToPack creates a correct ObjectToPack based on a non-delta object @@ -47,7 +53,7 @@ func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedOb // BackToOriginal converts that ObjectToPack to a non-deltified object if it was one func (o *ObjectToPack) BackToOriginal() { - if o.IsDelta() { + if o.IsDelta() && o.Original != nil { o.Object = o.Original o.Base = nil o.Depth = 0 @@ -71,11 +77,37 @@ func (o *ObjectToPack) WantWrite() bool { return o.Offset == 1 } +// SetOriginal sets both Original and saves size, type and hash. If object +// is nil Original is set but previous resolved values are kept +func (o *ObjectToPack) SetOriginal(obj plumbing.EncodedObject) { + o.Original = obj + o.SaveOriginalMetadata() +} + +// SaveOriginalMetadata saves size, type and hash of Original object +func (o *ObjectToPack) SaveOriginalMetadata() { + if o.Original != nil { + o.originalSize = o.Original.Size() + o.originalType = o.Original.Type() + o.originalHash = o.Original.Hash() + o.resolvedOriginal = true + } +} + +// CleanOriginal sets Original to nil +func (o *ObjectToPack) CleanOriginal() { + o.Original = nil +} + func (o *ObjectToPack) Type() plumbing.ObjectType { if o.Original != nil { return o.Original.Type() } + if o.resolvedOriginal { + return o.originalType + } + if o.Base != nil { return o.Base.Type() } @@ -92,6 +124,10 @@ func (o *ObjectToPack) Hash() plumbing.Hash { return o.Original.Hash() } + if o.resolvedOriginal { + return o.originalHash + } + do, ok := o.Object.(plumbing.DeltaObject) if ok { return do.ActualHash() @@ -105,6 +141,10 @@ func (o *ObjectToPack) Size() int64 { return o.Original.Size() } + if o.resolvedOriginal { + return o.originalSize + } + do, ok := o.Object.(plumbing.DeltaObject) if ok { return do.ActualSize() diff --git a/plumbing/format/packfile/packfile.go b/plumbing/format/packfile/packfile.go new file mode 100644 index 0000000..852a834 --- /dev/null +++ b/plumbing/format/packfile/packfile.go @@ -0,0 +1,520 @@ +package packfile + +import ( + "bytes" + "io" + "os" + + billy "gopkg.in/src-d/go-billy.v4" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +var ( + // ErrInvalidObject is returned by Decode when an invalid object is + // found in the packfile. + ErrInvalidObject = NewError("invalid git object") + // ErrZLib is returned by Decode when there was an error unzipping + // the packfile contents. + ErrZLib = NewError("zlib reading error") +) + +// Packfile allows retrieving information from inside a packfile. +type Packfile struct { + idxfile.Index + fs billy.Filesystem + file billy.File + s *Scanner + deltaBaseCache cache.Object + offsetToType map[int64]plumbing.ObjectType +} + +// NewPackfileWithCache creates a new Packfile with the given object cache. +// If the filesystem is provided, the packfile will return FSObjects, otherwise +// it will return MemoryObjects. +func NewPackfileWithCache( + index idxfile.Index, + fs billy.Filesystem, + file billy.File, + cache cache.Object, +) *Packfile { + s := NewScanner(file) + return &Packfile{ + index, + fs, + file, + s, + cache, + make(map[int64]plumbing.ObjectType), + } +} + +// NewPackfile returns a packfile representation for the given packfile file +// and packfile idx. +// If the filesystem is provided, the packfile will return FSObjects, otherwise +// it will return MemoryObjects. +func NewPackfile(index idxfile.Index, fs billy.Filesystem, file billy.File) *Packfile { + return NewPackfileWithCache(index, fs, file, cache.NewObjectLRUDefault()) +} + +// Get retrieves the encoded object in the packfile with the given hash. +func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { + offset, err := p.FindOffset(h) + if err != nil { + return nil, err + } + + return p.GetByOffset(offset) +} + +// GetByOffset retrieves the encoded object from the packfile with the given +// offset. +func (p *Packfile) GetByOffset(o int64) (plumbing.EncodedObject, error) { + hash, err := p.FindHash(o) + if err == nil { + if obj, ok := p.deltaBaseCache.Get(hash); ok { + return obj, nil + } + } + + if _, err := p.s.SeekFromStart(o); err != nil { + if err == io.EOF || isInvalid(err) { + return nil, plumbing.ErrObjectNotFound + } + + return nil, err + } + + return p.nextObject() +} + +func (p *Packfile) nextObjectHeader() (*ObjectHeader, error) { + h, err := p.s.NextObjectHeader() + p.s.pendingObject = nil + return h, err +} + +func (p *Packfile) getObjectData( + h *ObjectHeader, +) (typ plumbing.ObjectType, size int64, err error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + typ = h.Type + size = h.Length + case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + defer bufPool.Put(buf) + + _, _, err = p.s.NextObject(buf) + if err != nil { + return + } + + delta := buf.Bytes() + _, delta = decodeLEB128(delta) // skip src size + sz, _ := decodeLEB128(delta) + size = int64(sz) + + var offset int64 + if h.Type == plumbing.REFDeltaObject { + offset, err = p.FindOffset(h.Reference) + if err != nil { + return + } + } else { + offset = h.OffsetReference + } + + if baseType, ok := p.offsetToType[offset]; ok { + typ = baseType + } else { + if _, err = p.s.SeekFromStart(offset); err != nil { + return + } + + h, err = p.nextObjectHeader() + if err != nil { + return + } + + typ, _, err = p.getObjectData(h) + if err != nil { + return + } + } + default: + err = ErrInvalidObject.AddDetails("type %q", h.Type) + } + + return +} + +func (p *Packfile) getObjectSize(h *ObjectHeader) (int64, error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + return h.Length, nil + case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + defer bufPool.Put(buf) + + if _, _, err := p.s.NextObject(buf); err != nil { + return 0, err + } + + delta := buf.Bytes() + _, delta = decodeLEB128(delta) // skip src size + sz, _ := decodeLEB128(delta) + return int64(sz), nil + default: + return 0, ErrInvalidObject.AddDetails("type %q", h.Type) + } +} + +func (p *Packfile) getObjectType(h *ObjectHeader) (typ plumbing.ObjectType, err error) { + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + return h.Type, nil + case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: + var offset int64 + if h.Type == plumbing.REFDeltaObject { + offset, err = p.FindOffset(h.Reference) + if err != nil { + return + } + } else { + offset = h.OffsetReference + } + + if baseType, ok := p.offsetToType[offset]; ok { + typ = baseType + } else { + if _, err = p.s.SeekFromStart(offset); err != nil { + return + } + + h, err = p.nextObjectHeader() + if err != nil { + return + } + + typ, err = p.getObjectType(h) + if err != nil { + return + } + } + default: + err = ErrInvalidObject.AddDetails("type %q", h.Type) + } + + return +} + +func (p *Packfile) nextObject() (plumbing.EncodedObject, error) { + h, err := p.nextObjectHeader() + if err != nil { + if err == io.EOF || isInvalid(err) { + return nil, plumbing.ErrObjectNotFound + } + return nil, err + } + + // If we have no filesystem, we will return a MemoryObject instead + // of an FSObject. + if p.fs == nil { + return p.getNextObject(h) + } + + hash, err := p.FindHash(h.Offset) + if err != nil { + return nil, err + } + + size, err := p.getObjectSize(h) + if err != nil { + return nil, err + } + + typ, err := p.getObjectType(h) + if err != nil { + return nil, err + } + + p.offsetToType[h.Offset] = typ + + return NewFSObject( + hash, + typ, + h.Offset, + size, + p.Index, + p.fs, + p.file.Name(), + p.deltaBaseCache, + ), nil +} + +func (p *Packfile) getObjectContent(offset int64) (io.ReadCloser, error) { + ref, err := p.FindHash(offset) + if err == nil { + obj, ok := p.cacheGet(ref) + if ok { + reader, err := obj.Reader() + if err != nil { + return nil, err + } + + return reader, nil + } + } + + if _, err := p.s.SeekFromStart(offset); err != nil { + return nil, err + } + + h, err := p.nextObjectHeader() + if err != nil { + return nil, err + } + + obj, err := p.getNextObject(h) + if err != nil { + return nil, err + } + + return obj.Reader() +} + +func (p *Packfile) getNextObject(h *ObjectHeader) (plumbing.EncodedObject, error) { + var obj = new(plumbing.MemoryObject) + obj.SetSize(h.Length) + obj.SetType(h.Type) + + var err error + switch h.Type { + case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: + err = p.fillRegularObjectContent(obj) + case plumbing.REFDeltaObject: + err = p.fillREFDeltaObjectContent(obj, h.Reference) + case plumbing.OFSDeltaObject: + err = p.fillOFSDeltaObjectContent(obj, h.OffsetReference) + default: + err = ErrInvalidObject.AddDetails("type %q", h.Type) + } + + if err != nil { + return nil, err + } + + return obj, nil +} + +func (p *Packfile) fillRegularObjectContent(obj plumbing.EncodedObject) error { + w, err := obj.Writer() + if err != nil { + return err + } + + _, _, err = p.s.NextObject(w) + p.cachePut(obj) + + return err +} + +func (p *Packfile) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) error { + buf := bufPool.Get().(*bytes.Buffer) + buf.Reset() + _, _, err := p.s.NextObject(buf) + if err != nil { + return err + } + + base, ok := p.cacheGet(ref) + if !ok { + base, err = p.Get(ref) + if err != nil { + return err + } + } + + obj.SetType(base.Type()) + err = ApplyDelta(obj, base, buf.Bytes()) + p.cachePut(obj) + bufPool.Put(buf) + + return err +} + +func (p *Packfile) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) error { + buf := bytes.NewBuffer(nil) + _, _, err := p.s.NextObject(buf) + if err != nil { + return err + } + + var base plumbing.EncodedObject + var ok bool + hash, err := p.FindHash(offset) + if err == nil { + base, ok = p.cacheGet(hash) + } + + if !ok { + base, err = p.GetByOffset(offset) + if err != nil { + return err + } + + p.cachePut(base) + } + + obj.SetType(base.Type()) + err = ApplyDelta(obj, base, buf.Bytes()) + p.cachePut(obj) + + return err +} + +func (p *Packfile) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { + if p.deltaBaseCache == nil { + return nil, false + } + + return p.deltaBaseCache.Get(h) +} + +func (p *Packfile) cachePut(obj plumbing.EncodedObject) { + if p.deltaBaseCache == nil { + return + } + + p.deltaBaseCache.Put(obj) +} + +// GetAll returns an iterator with all encoded objects in the packfile. +// The iterator returned is not thread-safe, it should be used in the same +// thread as the Packfile instance. +func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) { + return p.GetByType(plumbing.AnyObject) +} + +// GetByType returns all the objects of the given type. +func (p *Packfile) GetByType(typ plumbing.ObjectType) (storer.EncodedObjectIter, error) { + switch typ { + case plumbing.AnyObject, + plumbing.BlobObject, + plumbing.TreeObject, + plumbing.CommitObject, + plumbing.TagObject: + entries, err := p.EntriesByOffset() + if err != nil { + return nil, err + } + + return &objectIter{ + // Easiest way to provide an object decoder is just to pass a Packfile + // instance. To not mess with the seeks, it's a new instance with a + // different scanner but the same cache and offset to hash map for + // reusing as much cache as possible. + p: p, + iter: entries, + typ: typ, + }, nil + default: + return nil, plumbing.ErrInvalidType + } +} + +// ID returns the ID of the packfile, which is the checksum at the end of it. +func (p *Packfile) ID() (plumbing.Hash, error) { + prev, err := p.file.Seek(-20, io.SeekEnd) + if err != nil { + return plumbing.ZeroHash, err + } + + var hash plumbing.Hash + if _, err := io.ReadFull(p.file, hash[:]); err != nil { + return plumbing.ZeroHash, err + } + + if _, err := p.file.Seek(prev, io.SeekStart); err != nil { + return plumbing.ZeroHash, err + } + + return hash, nil +} + +// Close the packfile and its resources. +func (p *Packfile) Close() error { + closer, ok := p.file.(io.Closer) + if !ok { + return nil + } + + return closer.Close() +} + +type objectIter struct { + p *Packfile + typ plumbing.ObjectType + iter idxfile.EntryIter +} + +func (i *objectIter) Next() (plumbing.EncodedObject, error) { + for { + e, err := i.iter.Next() + if err != nil { + return nil, err + } + + obj, err := i.p.GetByOffset(int64(e.Offset)) + if err != nil { + return nil, err + } + + if i.typ == plumbing.AnyObject || obj.Type() == i.typ { + return obj, nil + } + } +} + +func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error { + for { + o, err := i.Next() + if err != nil { + if err == io.EOF { + return nil + } + return err + } + + if err := f(o); err != nil { + return err + } + } +} + +func (i *objectIter) Close() { + i.iter.Close() +} + +// isInvalid checks whether an error is an os.PathError with an os.ErrInvalid +// error inside. It also checks for the windows error, which is different from +// os.ErrInvalid. +func isInvalid(err error) bool { + pe, ok := err.(*os.PathError) + if !ok { + return false + } + + errstr := pe.Err.Error() + return errstr == errInvalidUnix || errstr == errInvalidWindows +} + +// errInvalidWindows is the Windows equivalent to os.ErrInvalid +const errInvalidWindows = "The parameter is incorrect." + +var errInvalidUnix = os.ErrInvalid.Error() diff --git a/plumbing/format/packfile/packfile_test.go b/plumbing/format/packfile/packfile_test.go new file mode 100644 index 0000000..05dc8a7 --- /dev/null +++ b/plumbing/format/packfile/packfile_test.go @@ -0,0 +1,279 @@ +package packfile_test + +import ( + "io" + "math" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-billy.v4/osfs" + fixtures "gopkg.in/src-d/go-git-fixtures.v3" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +type PackfileSuite struct { + fixtures.Suite + p *packfile.Packfile + idx *idxfile.MemoryIndex + f *fixtures.Fixture +} + +var _ = Suite(&PackfileSuite{}) + +func (s *PackfileSuite) TestGet(c *C) { + for h := range expectedEntries { + obj, err := s.p.Get(h) + c.Assert(err, IsNil) + c.Assert(obj, Not(IsNil)) + c.Assert(obj.Hash(), Equals, h) + } + + _, err := s.p.Get(plumbing.ZeroHash) + c.Assert(err, Equals, plumbing.ErrObjectNotFound) +} + +func (s *PackfileSuite) TestGetByOffset(c *C) { + for h, o := range expectedEntries { + obj, err := s.p.GetByOffset(o) + c.Assert(err, IsNil) + c.Assert(obj, Not(IsNil)) + c.Assert(obj.Hash(), Equals, h) + } + + _, err := s.p.GetByOffset(math.MaxInt64) + c.Assert(err, Equals, plumbing.ErrObjectNotFound) +} + +func (s *PackfileSuite) TestID(c *C) { + id, err := s.p.ID() + c.Assert(err, IsNil) + c.Assert(id, Equals, s.f.PackfileHash) +} + +func (s *PackfileSuite) TestGetAll(c *C) { + iter, err := s.p.GetAll() + c.Assert(err, IsNil) + + var objects int + for { + o, err := iter.Next() + if err == io.EOF { + break + } + c.Assert(err, IsNil) + + objects++ + _, ok := expectedEntries[o.Hash()] + c.Assert(ok, Equals, true) + } + + c.Assert(objects, Equals, len(expectedEntries)) +} + +var expectedEntries = map[plumbing.Hash]int64{ + plumbing.NewHash("1669dce138d9b841a518c64b10914d88f5e488ea"): 615, + plumbing.NewHash("32858aad3c383ed1ff0a0f9bdf231d54a00c9e88"): 1524, + plumbing.NewHash("35e85108805c84807bc66a02d91535e1e24b38b9"): 1063, + plumbing.NewHash("49c6bb89b17060d7b4deacb7b338fcc6ea2352a9"): 78882, + plumbing.NewHash("4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd"): 84688, + plumbing.NewHash("586af567d0bb5e771e49bdd9434f5e0fb76d25fa"): 84559, + plumbing.NewHash("5a877e6a906a2743ad6e45d99c1793642aaf8eda"): 84479, + plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5"): 186, + plumbing.NewHash("7e59600739c96546163833214c36459e324bad0a"): 84653, + plumbing.NewHash("880cd14280f4b9b6ed3986d6671f907d7cc2a198"): 78050, + plumbing.NewHash("8dcef98b1d52143e1e2dbc458ffe38f925786bf2"): 84741, + plumbing.NewHash("918c48b83bd081e863dbe1b80f8998f058cd8294"): 286, + plumbing.NewHash("9a48f23120e880dfbe41f7c9b7b708e9ee62a492"): 80998, + plumbing.NewHash("9dea2395f5403188298c1dabe8bdafe562c491e3"): 84032, + plumbing.NewHash("a39771a7651f97faf5c72e08224d857fc35133db"): 84430, + plumbing.NewHash("a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69"): 838, + plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c"): 84375, + plumbing.NewHash("aa9b383c260e1d05fbbf6b30a02914555e20c725"): 84760, + plumbing.NewHash("af2d6a6954d532f8ffb47615169c8fdf9d383a1a"): 449, + plumbing.NewHash("b029517f6300c2da0f4b651b8642506cd6aaf45d"): 1392, + plumbing.NewHash("b8e471f58bcbca63b07bda20e428190409c2db47"): 1230, + plumbing.NewHash("c192bd6a24ea1ab01d78686e417c8bdc7c3d197f"): 1713, + plumbing.NewHash("c2d30fa8ef288618f65f6eed6e168e0d514886f4"): 84725, + plumbing.NewHash("c8f1d8c61f9da76f4cb49fd86322b6e685dba956"): 80725, + plumbing.NewHash("cf4aa3b38974fb7d81f367c0830f7d78d65ab86b"): 84608, + plumbing.NewHash("d3ff53e0564a9f87d8e84b6e28e5060e517008aa"): 1685, + plumbing.NewHash("d5c0f4ab811897cadf03aec358ae60d21f91c50d"): 2351, + plumbing.NewHash("dbd3641b371024f44d0e469a9c8f5457b0660de1"): 84115, + plumbing.NewHash("e8d3ffab552895c19b9fcf7aa264d277cde33881"): 12, + plumbing.NewHash("eba74343e2f15d62adedfd8c883ee0262b5c8021"): 84708, + plumbing.NewHash("fb72698cab7617ac416264415f13224dfd7a165e"): 84671, +} + +func (s *PackfileSuite) SetUpTest(c *C) { + s.f = fixtures.Basic().One() + + fs := osfs.New("") + f, err := fs.Open(s.f.Packfile().Name()) + c.Assert(err, IsNil) + + s.idx = idxfile.NewMemoryIndex() + c.Assert(idxfile.NewDecoder(s.f.Idx()).Decode(s.idx), IsNil) + + s.p = packfile.NewPackfile(s.idx, fs, f) +} + +func (s *PackfileSuite) TearDownTest(c *C) { + c.Assert(s.p.Close(), IsNil) +} + +func (s *PackfileSuite) TestDecode(c *C) { + fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { + index := getIndexFromIdxFile(f.Idx()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + p := packfile.NewPackfile(index, fs, pf) + defer p.Close() + + for _, h := range expectedHashes { + obj, err := p.Get(plumbing.NewHash(h)) + c.Assert(err, IsNil) + c.Assert(obj.Hash().String(), Equals, h) + } + }) +} + +func (s *PackfileSuite) TestDecodeByTypeRefDelta(c *C) { + f := fixtures.Basic().ByTag("ref-delta").One() + + index := getIndexFromIdxFile(f.Idx()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + packfile := packfile.NewPackfile(index, fs, pf) + defer packfile.Close() + + iter, err := packfile.GetByType(plumbing.CommitObject) + c.Assert(err, IsNil) + + var count int + for { + obj, err := iter.Next() + if err == io.EOF { + break + } + count++ + c.Assert(err, IsNil) + c.Assert(obj.Type(), Equals, plumbing.CommitObject) + } + + c.Assert(count > 0, Equals, true) +} + +func (s *PackfileSuite) TestDecodeByType(c *C) { + ts := []plumbing.ObjectType{ + plumbing.CommitObject, + plumbing.TagObject, + plumbing.TreeObject, + plumbing.BlobObject, + } + + fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { + for _, t := range ts { + index := getIndexFromIdxFile(f.Idx()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + packfile := packfile.NewPackfile(index, fs, pf) + defer packfile.Close() + + iter, err := packfile.GetByType(t) + c.Assert(err, IsNil) + + c.Assert(iter.ForEach(func(obj plumbing.EncodedObject) error { + c.Assert(obj.Type(), Equals, t) + return nil + }), IsNil) + } + }) +} + +func (s *PackfileSuite) TestDecodeByTypeConstructor(c *C) { + f := fixtures.Basic().ByTag("packfile").One() + index := getIndexFromIdxFile(f.Idx()) + fs := osfs.New("") + pf, err := fs.Open(f.Packfile().Name()) + c.Assert(err, IsNil) + + packfile := packfile.NewPackfile(index, fs, pf) + defer packfile.Close() + + _, err = packfile.GetByType(plumbing.OFSDeltaObject) + c.Assert(err, Equals, plumbing.ErrInvalidType) + + _, err = packfile.GetByType(plumbing.REFDeltaObject) + c.Assert(err, Equals, plumbing.ErrInvalidType) + + _, err = packfile.GetByType(plumbing.InvalidObject) + c.Assert(err, Equals, plumbing.ErrInvalidType) +} + +var expectedHashes = []string{ + "918c48b83bd081e863dbe1b80f8998f058cd8294", + "af2d6a6954d532f8ffb47615169c8fdf9d383a1a", + "1669dce138d9b841a518c64b10914d88f5e488ea", + "a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", + "b8e471f58bcbca63b07bda20e428190409c2db47", + "35e85108805c84807bc66a02d91535e1e24b38b9", + "b029517f6300c2da0f4b651b8642506cd6aaf45d", + "32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", + "d3ff53e0564a9f87d8e84b6e28e5060e517008aa", + "c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", + "d5c0f4ab811897cadf03aec358ae60d21f91c50d", + "49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", + "cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", + "9dea2395f5403188298c1dabe8bdafe562c491e3", + "586af567d0bb5e771e49bdd9434f5e0fb76d25fa", + "9a48f23120e880dfbe41f7c9b7b708e9ee62a492", + "5a877e6a906a2743ad6e45d99c1793642aaf8eda", + "c8f1d8c61f9da76f4cb49fd86322b6e685dba956", + "a8d315b2b1c615d43042c3a62402b8a54288cf5c", + "a39771a7651f97faf5c72e08224d857fc35133db", + "880cd14280f4b9b6ed3986d6671f907d7cc2a198", + "fb72698cab7617ac416264415f13224dfd7a165e", + "4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", + "eba74343e2f15d62adedfd8c883ee0262b5c8021", + "c2d30fa8ef288618f65f6eed6e168e0d514886f4", + "8dcef98b1d52143e1e2dbc458ffe38f925786bf2", + "aa9b383c260e1d05fbbf6b30a02914555e20c725", + "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", + "dbd3641b371024f44d0e469a9c8f5457b0660de1", + "e8d3ffab552895c19b9fcf7aa264d277cde33881", + "7e59600739c96546163833214c36459e324bad0a", +} + +func assertObjects(c *C, s storer.EncodedObjectStorer, expects []string) { + i, err := s.IterEncodedObjects(plumbing.AnyObject) + c.Assert(err, IsNil) + + var count int + err = i.ForEach(func(plumbing.EncodedObject) error { count++; return nil }) + c.Assert(err, IsNil) + c.Assert(count, Equals, len(expects)) + + for _, exp := range expects { + obt, err := s.EncodedObject(plumbing.AnyObject, plumbing.NewHash(exp)) + c.Assert(err, IsNil) + c.Assert(obt.Hash().String(), Equals, exp) + } +} + +func getIndexFromIdxFile(r io.Reader) idxfile.Index { + idxf := idxfile.NewMemoryIndex() + d := idxfile.NewDecoder(r) + if err := d.Decode(idxf); err != nil { + panic(err) + } + + return idxf +} diff --git a/plumbing/format/packfile/parser.go b/plumbing/format/packfile/parser.go new file mode 100644 index 0000000..28582b5 --- /dev/null +++ b/plumbing/format/packfile/parser.go @@ -0,0 +1,489 @@ +package packfile + +import ( + "bytes" + "errors" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +var ( + // ErrReferenceDeltaNotFound is returned when the reference delta is not + // found. + ErrReferenceDeltaNotFound = errors.New("reference delta not found") + + // ErrNotSeekableSource is returned when the source for the parser is not + // seekable and a storage was not provided, so it can't be parsed. + ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided") + + // ErrDeltaNotCached is returned when the delta could not be found in cache. + ErrDeltaNotCached = errors.New("delta could not be found in cache") +) + +// Observer interface is implemented by index encoders. +type Observer interface { + // OnHeader is called when a new packfile is opened. + OnHeader(count uint32) error + // OnInflatedObjectHeader is called for each object header read. + OnInflatedObjectHeader(t plumbing.ObjectType, objSize int64, pos int64) error + // OnInflatedObjectContent is called for each decoded object. + OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, content []byte) error + // OnFooter is called when decoding is done. + OnFooter(h plumbing.Hash) error +} + +// Parser decodes a packfile and calls any observer associated to it. Is used +// to generate indexes. +type Parser struct { + storage storer.EncodedObjectStorer + scanner *Scanner + count uint32 + oi []*objectInfo + oiByHash map[plumbing.Hash]*objectInfo + oiByOffset map[int64]*objectInfo + hashOffset map[plumbing.Hash]int64 + pendingRefDeltas map[plumbing.Hash][]*objectInfo + checksum plumbing.Hash + + cache *cache.BufferLRU + // delta content by offset, only used if source is not seekable + deltas map[int64][]byte + + ob []Observer +} + +// NewParser creates a new Parser. The Scanner source must be seekable. +// If it's not, NewParserWithStorage should be used instead. +func NewParser(scanner *Scanner, ob ...Observer) (*Parser, error) { + return NewParserWithStorage(scanner, nil, ob...) +} + +// NewParserWithStorage creates a new Parser. The scanner source must either +// be seekable or a storage must be provided. +func NewParserWithStorage( + scanner *Scanner, + storage storer.EncodedObjectStorer, + ob ...Observer, +) (*Parser, error) { + if !scanner.IsSeekable && storage == nil { + return nil, ErrNotSeekableSource + } + + var deltas map[int64][]byte + if !scanner.IsSeekable { + deltas = make(map[int64][]byte) + } + + return &Parser{ + storage: storage, + scanner: scanner, + ob: ob, + count: 0, + cache: cache.NewBufferLRUDefault(), + pendingRefDeltas: make(map[plumbing.Hash][]*objectInfo), + deltas: deltas, + }, nil +} + +func (p *Parser) forEachObserver(f func(o Observer) error) error { + for _, o := range p.ob { + if err := f(o); err != nil { + return err + } + } + return nil +} + +func (p *Parser) onHeader(count uint32) error { + return p.forEachObserver(func(o Observer) error { + return o.OnHeader(count) + }) +} + +func (p *Parser) onInflatedObjectHeader( + t plumbing.ObjectType, + objSize int64, + pos int64, +) error { + return p.forEachObserver(func(o Observer) error { + return o.OnInflatedObjectHeader(t, objSize, pos) + }) +} + +func (p *Parser) onInflatedObjectContent( + h plumbing.Hash, + pos int64, + crc uint32, + content []byte, +) error { + return p.forEachObserver(func(o Observer) error { + return o.OnInflatedObjectContent(h, pos, crc, content) + }) +} + +func (p *Parser) onFooter(h plumbing.Hash) error { + return p.forEachObserver(func(o Observer) error { + return o.OnFooter(h) + }) +} + +// Parse start decoding phase of the packfile. +func (p *Parser) Parse() (plumbing.Hash, error) { + if err := p.init(); err != nil { + return plumbing.ZeroHash, err + } + + if err := p.indexObjects(); err != nil { + return plumbing.ZeroHash, err + } + + var err error + p.checksum, err = p.scanner.Checksum() + if err != nil && err != io.EOF { + return plumbing.ZeroHash, err + } + + if err := p.resolveDeltas(); err != nil { + return plumbing.ZeroHash, err + } + + if len(p.pendingRefDeltas) > 0 { + return plumbing.ZeroHash, ErrReferenceDeltaNotFound + } + + if err := p.onFooter(p.checksum); err != nil { + return plumbing.ZeroHash, err + } + + return p.checksum, nil +} + +func (p *Parser) init() error { + _, c, err := p.scanner.Header() + if err != nil { + return err + } + + if err := p.onHeader(c); err != nil { + return err + } + + p.count = c + p.oiByHash = make(map[plumbing.Hash]*objectInfo, p.count) + p.oiByOffset = make(map[int64]*objectInfo, p.count) + p.oi = make([]*objectInfo, p.count) + + return nil +} + +func (p *Parser) indexObjects() error { + buf := new(bytes.Buffer) + + for i := uint32(0); i < p.count; i++ { + buf.Reset() + + oh, err := p.scanner.NextObjectHeader() + if err != nil { + return err + } + + delta := false + var ota *objectInfo + switch t := oh.Type; t { + case plumbing.OFSDeltaObject: + delta = true + + parent, ok := p.oiByOffset[oh.OffsetReference] + if !ok { + return plumbing.ErrObjectNotFound + } + + ota = newDeltaObject(oh.Offset, oh.Length, t, parent) + parent.Children = append(parent.Children, ota) + case plumbing.REFDeltaObject: + delta = true + + parent, ok := p.oiByHash[oh.Reference] + if ok { + ota = newDeltaObject(oh.Offset, oh.Length, t, parent) + parent.Children = append(parent.Children, ota) + } else { + ota = newBaseObject(oh.Offset, oh.Length, t) + p.pendingRefDeltas[oh.Reference] = append( + p.pendingRefDeltas[oh.Reference], + ota, + ) + } + default: + ota = newBaseObject(oh.Offset, oh.Length, t) + } + + _, crc, err := p.scanner.NextObject(buf) + if err != nil { + return err + } + + ota.Crc32 = crc + ota.Length = oh.Length + + data := buf.Bytes() + if !delta { + sha1, err := getSHA1(ota.Type, data) + if err != nil { + return err + } + + ota.SHA1 = sha1 + p.oiByHash[ota.SHA1] = ota + } + + if p.storage != nil && !delta { + obj := new(plumbing.MemoryObject) + obj.SetSize(oh.Length) + obj.SetType(oh.Type) + if _, err := obj.Write(data); err != nil { + return err + } + + if _, err := p.storage.SetEncodedObject(obj); err != nil { + return err + } + } + + if delta && !p.scanner.IsSeekable { + p.deltas[oh.Offset] = make([]byte, len(data)) + copy(p.deltas[oh.Offset], data) + } + + p.oiByOffset[oh.Offset] = ota + p.oi[i] = ota + } + + return nil +} + +func (p *Parser) resolveDeltas() error { + for _, obj := range p.oi { + content, err := p.get(obj) + if err != nil { + return err + } + + if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil { + return err + } + + if err := p.onInflatedObjectContent(obj.SHA1, obj.Offset, obj.Crc32, content); err != nil { + return err + } + + if !obj.IsDelta() && len(obj.Children) > 0 { + for _, child := range obj.Children { + if _, err := p.resolveObject(child, content); err != nil { + return err + } + } + + // Remove the delta from the cache. + if obj.DiskType.IsDelta() && !p.scanner.IsSeekable { + delete(p.deltas, obj.Offset) + } + } + } + + return nil +} + +func (p *Parser) get(o *objectInfo) ([]byte, error) { + b, ok := p.cache.Get(o.Offset) + // If it's not on the cache and is not a delta we can try to find it in the + // storage, if there's one. + if !ok && p.storage != nil && !o.Type.IsDelta() { + var err error + e, err := p.storage.EncodedObject(plumbing.AnyObject, o.SHA1) + if err != nil { + return nil, err + } + + r, err := e.Reader() + if err != nil { + return nil, err + } + + b = make([]byte, e.Size()) + if _, err = r.Read(b); err != nil { + return nil, err + } + } + + if b != nil { + return b, nil + } + + var data []byte + if o.DiskType.IsDelta() { + base, err := p.get(o.Parent) + if err != nil { + return nil, err + } + + data, err = p.resolveObject(o, base) + if err != nil { + return nil, err + } + } else { + var err error + data, err = p.readData(o) + if err != nil { + return nil, err + } + } + + if len(o.Children) > 0 { + p.cache.Put(o.Offset, data) + } + + return data, nil +} + +func (p *Parser) resolveObject( + o *objectInfo, + base []byte, +) ([]byte, error) { + if !o.DiskType.IsDelta() { + return nil, nil + } + + data, err := p.readData(o) + if err != nil { + return nil, err + } + + data, err = applyPatchBase(o, data, base) + if err != nil { + return nil, err + } + + if pending, ok := p.pendingRefDeltas[o.SHA1]; ok { + for _, po := range pending { + po.Parent = o + o.Children = append(o.Children, po) + } + delete(p.pendingRefDeltas, o.SHA1) + } + + if p.storage != nil { + obj := new(plumbing.MemoryObject) + obj.SetSize(o.Size()) + obj.SetType(o.Type) + if _, err := obj.Write(data); err != nil { + return nil, err + } + + if _, err := p.storage.SetEncodedObject(obj); err != nil { + return nil, err + } + } + + return data, nil +} + +func (p *Parser) readData(o *objectInfo) ([]byte, error) { + if !p.scanner.IsSeekable && o.DiskType.IsDelta() { + data, ok := p.deltas[o.Offset] + if !ok { + return nil, ErrDeltaNotCached + } + + return data, nil + } + + if _, err := p.scanner.SeekFromStart(o.Offset); err != nil { + return nil, err + } + + if _, err := p.scanner.NextObjectHeader(); err != nil { + return nil, err + } + + buf := new(bytes.Buffer) + if _, _, err := p.scanner.NextObject(buf); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) { + patched, err := PatchDelta(base, data) + if err != nil { + return nil, err + } + + if ota.SHA1 == plumbing.ZeroHash { + ota.Type = ota.Parent.Type + sha1, err := getSHA1(ota.Type, patched) + if err != nil { + return nil, err + } + + ota.SHA1 = sha1 + ota.Length = int64(len(patched)) + } + + return patched, nil +} + +func getSHA1(t plumbing.ObjectType, data []byte) (plumbing.Hash, error) { + hasher := plumbing.NewHasher(t, int64(len(data))) + if _, err := hasher.Write(data); err != nil { + return plumbing.ZeroHash, err + } + + return hasher.Sum(), nil +} + +type objectInfo struct { + Offset int64 + Length int64 + Type plumbing.ObjectType + DiskType plumbing.ObjectType + + Crc32 uint32 + + Parent *objectInfo + Children []*objectInfo + SHA1 plumbing.Hash +} + +func newBaseObject(offset, length int64, t plumbing.ObjectType) *objectInfo { + return newDeltaObject(offset, length, t, nil) +} + +func newDeltaObject( + offset, length int64, + t plumbing.ObjectType, + parent *objectInfo, +) *objectInfo { + obj := &objectInfo{ + Offset: offset, + Length: length, + Type: t, + DiskType: t, + Crc32: 0, + Parent: parent, + } + + return obj +} + +func (o *objectInfo) IsDelta() bool { + return o.Type.IsDelta() +} + +func (o *objectInfo) Size() int64 { + return o.Length +} diff --git a/plumbing/format/packfile/parser_test.go b/plumbing/format/packfile/parser_test.go new file mode 100644 index 0000000..012a140 --- /dev/null +++ b/plumbing/format/packfile/parser_test.go @@ -0,0 +1,195 @@ +package packfile_test + +import ( + "testing" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + + . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" +) + +type ParserSuite struct { + fixtures.Suite +} + +var _ = Suite(&ParserSuite{}) + +func (s *ParserSuite) TestParserHashes(c *C) { + f := fixtures.Basic().One() + scanner := packfile.NewScanner(f.Packfile()) + + obs := new(testObserver) + parser, err := packfile.NewParser(scanner, obs) + c.Assert(err, IsNil) + + ch, err := parser.Parse() + c.Assert(err, IsNil) + + checksum := "a3fed42da1e8189a077c0e6846c040dcf73fc9dd" + c.Assert(ch.String(), Equals, checksum) + + c.Assert(obs.checksum, Equals, checksum) + c.Assert(int(obs.count), Equals, int(31)) + + commit := plumbing.CommitObject + blob := plumbing.BlobObject + tree := plumbing.TreeObject + + objs := []observerObject{ + {"e8d3ffab552895c19b9fcf7aa264d277cde33881", commit, 254, 12, 0xaa07ba4b}, + {"6ecf0ef2c2dffb796033e5a02219af86ec6584e5", commit, 245, 186, 0xf706df58}, + {"918c48b83bd081e863dbe1b80f8998f058cd8294", commit, 242, 286, 0x12438846}, + {"af2d6a6954d532f8ffb47615169c8fdf9d383a1a", commit, 242, 449, 0x2905a38c}, + {"1669dce138d9b841a518c64b10914d88f5e488ea", commit, 333, 615, 0xd9429436}, + {"a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69", commit, 332, 838, 0xbecfde4e}, + {"35e85108805c84807bc66a02d91535e1e24b38b9", commit, 244, 1063, 0x780e4b3e}, + {"b8e471f58bcbca63b07bda20e428190409c2db47", commit, 243, 1230, 0xdc18344f}, + {"b029517f6300c2da0f4b651b8642506cd6aaf45d", commit, 187, 1392, 0xcf4e4280}, + {"32858aad3c383ed1ff0a0f9bdf231d54a00c9e88", blob, 189, 1524, 0x1f08118a}, + {"d3ff53e0564a9f87d8e84b6e28e5060e517008aa", blob, 18, 1685, 0xafded7b8}, + {"c192bd6a24ea1ab01d78686e417c8bdc7c3d197f", blob, 1072, 1713, 0xcc1428ed}, + {"d5c0f4ab811897cadf03aec358ae60d21f91c50d", blob, 76110, 2351, 0x1631d22f}, + {"880cd14280f4b9b6ed3986d6671f907d7cc2a198", blob, 2780, 78050, 0xbfff5850}, + {"49c6bb89b17060d7b4deacb7b338fcc6ea2352a9", blob, 217848, 78882, 0xd108e1d8}, + {"c8f1d8c61f9da76f4cb49fd86322b6e685dba956", blob, 706, 80725, 0x8e97ba25}, + {"9a48f23120e880dfbe41f7c9b7b708e9ee62a492", blob, 11488, 80998, 0x7316ff70}, + {"9dea2395f5403188298c1dabe8bdafe562c491e3", blob, 78, 84032, 0xdb4fce56}, + {"dbd3641b371024f44d0e469a9c8f5457b0660de1", tree, 272, 84115, 0x901cce2c}, + {"a8d315b2b1c615d43042c3a62402b8a54288cf5c", tree, 271, 84375, 0xec4552b0}, + {"a39771a7651f97faf5c72e08224d857fc35133db", tree, 38, 84430, 0x847905bf}, + {"5a877e6a906a2743ad6e45d99c1793642aaf8eda", tree, 75, 84479, 0x3689459a}, + {"586af567d0bb5e771e49bdd9434f5e0fb76d25fa", tree, 38, 84559, 0xe67af94a}, + {"cf4aa3b38974fb7d81f367c0830f7d78d65ab86b", tree, 34, 84608, 0xc2314a2e}, + {"7e59600739c96546163833214c36459e324bad0a", blob, 9, 84653, 0xcd987848}, + {"fb72698cab7617ac416264415f13224dfd7a165e", tree, 238, 84671, 0x8a853a6d}, + {"4d081c50e250fa32ea8b1313cf8bb7c2ad7627fd", tree, 179, 84688, 0x70c6518}, + {"eba74343e2f15d62adedfd8c883ee0262b5c8021", tree, 148, 84708, 0x4f4108e2}, + {"c2d30fa8ef288618f65f6eed6e168e0d514886f4", tree, 110, 84725, 0xd6fe09e9}, + {"8dcef98b1d52143e1e2dbc458ffe38f925786bf2", tree, 111, 84741, 0xf07a2804}, + {"aa9b383c260e1d05fbbf6b30a02914555e20c725", tree, 73, 84760, 0x1d75d6be}, + } + + c.Assert(obs.objects, DeepEquals, objs) +} + +type observerObject struct { + hash string + otype plumbing.ObjectType + size int64 + offset int64 + crc uint32 +} + +type testObserver struct { + count uint32 + checksum string + objects []observerObject + pos map[int64]int +} + +func (t *testObserver) OnHeader(count uint32) error { + t.count = count + t.pos = make(map[int64]int, count) + return nil +} + +func (t *testObserver) OnInflatedObjectHeader(otype plumbing.ObjectType, objSize int64, pos int64) error { + o := t.get(pos) + o.otype = otype + o.size = objSize + o.offset = pos + + t.put(pos, o) + + return nil +} + +func (t *testObserver) OnInflatedObjectContent(h plumbing.Hash, pos int64, crc uint32, _ []byte) error { + o := t.get(pos) + o.hash = h.String() + o.crc = crc + + t.put(pos, o) + + return nil +} + +func (t *testObserver) OnFooter(h plumbing.Hash) error { + t.checksum = h.String() + return nil +} + +func (t *testObserver) get(pos int64) observerObject { + i, ok := t.pos[pos] + if ok { + return t.objects[i] + } + + return observerObject{} +} + +func (t *testObserver) put(pos int64, o observerObject) { + i, ok := t.pos[pos] + if ok { + t.objects[i] = o + return + } + + t.pos[pos] = len(t.objects) + t.objects = append(t.objects, o) +} + +func BenchmarkParse(b *testing.B) { + if err := fixtures.Init(); err != nil { + b.Fatal(err) + } + + defer func() { + if err := fixtures.Clean(); err != nil { + b.Fatal(err) + } + }() + + for _, f := range fixtures.ByTag("packfile") { + b.Run(f.URL, func(b *testing.B) { + for i := 0; i < b.N; i++ { + parser, err := packfile.NewParser(packfile.NewScanner(f.Packfile())) + if err != nil { + b.Fatal(err) + } + + _, err = parser.Parse() + if err != nil { + b.Fatal(err) + } + } + }) + } +} + +func BenchmarkParseBasic(b *testing.B) { + if err := fixtures.Init(); err != nil { + b.Fatal(err) + } + + defer func() { + if err := fixtures.Clean(); err != nil { + b.Fatal(err) + } + }() + + f := fixtures.Basic().One() + for i := 0; i < b.N; i++ { + parser, err := packfile.NewParser(packfile.NewScanner(f.Packfile())) + if err != nil { + b.Fatal(err) + } + + _, err = parser.Parse() + if err != nil { + b.Fatal(err) + } + } +} diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go index c604851..a972f1c 100644 --- a/plumbing/format/packfile/patch_delta.go +++ b/plumbing/format/packfile/patch_delta.go @@ -63,8 +63,8 @@ func PatchDelta(src, delta []byte) ([]byte, error) { targetSz, delta := decodeLEB128(delta) remainingTargetSz := targetSz - var dest []byte var cmd byte + dest := make([]byte, 0, targetSz) for { if len(delta) == 0 { return nil, ErrInvalidDelta diff --git a/plumbing/format/packfile/scanner.go b/plumbing/format/packfile/scanner.go index d2d776f..6fc183b 100644 --- a/plumbing/format/packfile/scanner.go +++ b/plumbing/format/packfile/scanner.go @@ -63,10 +63,7 @@ func NewScanner(r io.Reader) *Scanner { crc := crc32.NewIEEE() return &Scanner{ - r: &teeReader{ - newByteReadSeeker(seeker), - crc, - }, + r: newTeeReader(newByteReadSeeker(seeker), crc), crc: crc, IsSeekable: ok, } @@ -143,6 +140,8 @@ func (s *Scanner) readCount() (uint32, error) { // NextObjectHeader returns the ObjectHeader for the next object in the reader func (s *Scanner) NextObjectHeader() (*ObjectHeader, error) { + defer s.Flush() + if err := s.doPending(); err != nil { return nil, err } @@ -271,6 +270,7 @@ func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err erro s.pendingObject = nil written, err = s.copyObject(w) + s.Flush() crc32 = s.crc.Sum32() return } @@ -279,14 +279,15 @@ func (s *Scanner) NextObject(w io.Writer) (written int64, crc32 uint32, err erro // from it zlib stream in an object entry in the packfile. func (s *Scanner) copyObject(w io.Writer) (n int64, err error) { if s.zr == nil { - zr, err := zlib.NewReader(s.r) + var zr io.ReadCloser + zr, err = zlib.NewReader(s.r) if err != nil { return 0, fmt.Errorf("zlib initialization error: %s", err) } s.zr = zr.(readerResetter) } else { - if err := s.zr.Reset(s.r, nil); err != nil { + if err = s.zr.Reset(s.r, nil); err != nil { return 0, fmt.Errorf("zlib reset error: %s", err) } } @@ -339,6 +340,16 @@ func (s *Scanner) Close() error { return err } +// Flush finishes writing the buffer to crc hasher in case we are using +// a teeReader. Otherwise it is a no-op. +func (s *Scanner) Flush() error { + tee, ok := s.r.(*teeReader) + if ok { + return tee.Flush() + } + return nil +} + type trackableReader struct { count int64 io.Reader @@ -400,10 +411,21 @@ type reader interface { type teeReader struct { reader - w hash.Hash32 + w hash.Hash32 + bufWriter *bufio.Writer +} + +func newTeeReader(r reader, h hash.Hash32) *teeReader { + return &teeReader{ + reader: r, + w: h, + bufWriter: bufio.NewWriter(h), + } } func (r *teeReader) Read(p []byte) (n int, err error) { + r.Flush() + n, err = r.reader.Read(p) if n > 0 { if n, err := r.w.Write(p[:n]); err != nil { @@ -416,11 +438,12 @@ func (r *teeReader) Read(p []byte) (n int, err error) { func (r *teeReader) ReadByte() (b byte, err error) { b, err = r.reader.ReadByte() if err == nil { - _, err := r.w.Write([]byte{b}) - if err != nil { - return 0, err - } + return b, r.bufWriter.WriteByte(b) } return } + +func (r *teeReader) Flush() (err error) { + return r.bufWriter.Flush() +} diff --git a/plumbing/format/packfile/scanner_test.go b/plumbing/format/packfile/scanner_test.go index 000be7f..644d0eb 100644 --- a/plumbing/format/packfile/scanner_test.go +++ b/plumbing/format/packfile/scanner_test.go @@ -41,14 +41,16 @@ func (s *ScannerSuite) TestNextObjectHeaderWithoutHeader(c *C) { } func (s *ScannerSuite) TestNextObjectHeaderREFDelta(c *C) { - s.testNextObjectHeader(c, "ref-delta", expectedHeadersREF) + s.testNextObjectHeader(c, "ref-delta", expectedHeadersREF, expectedCRCREF) } func (s *ScannerSuite) TestNextObjectHeaderOFSDelta(c *C) { - s.testNextObjectHeader(c, "ofs-delta", expectedHeadersOFS) + s.testNextObjectHeader(c, "ofs-delta", expectedHeadersOFS, expectedCRCOFS) } -func (s *ScannerSuite) testNextObjectHeader(c *C, tag string, expected []ObjectHeader) { +func (s *ScannerSuite) testNextObjectHeader(c *C, tag string, + expected []ObjectHeader, expectedCRC []uint32) { + r := fixtures.Basic().ByTag(tag).One().Packfile() p := NewScanner(r) @@ -61,9 +63,10 @@ func (s *ScannerSuite) testNextObjectHeader(c *C, tag string, expected []ObjectH c.Assert(*h, DeepEquals, expected[i]) buf := bytes.NewBuffer(nil) - n, _, err := p.NextObject(buf) + n, crcFromScanner, err := p.NextObject(buf) c.Assert(err, IsNil) c.Assert(n, Equals, h.Length) + c.Assert(crcFromScanner, Equals, expectedCRC[i]) } n, err := p.Checksum() @@ -149,6 +152,40 @@ var expectedHeadersOFS = []ObjectHeader{ {Type: plumbing.OFSDeltaObject, Offset: 84760, Length: 4, OffsetReference: 84741}, } +var expectedCRCOFS = []uint32{ + 0xaa07ba4b, + 0xf706df58, + 0x12438846, + 0x2905a38c, + 0xd9429436, + 0xbecfde4e, + 0x780e4b3e, + 0xdc18344f, + 0xcf4e4280, + 0x1f08118a, + 0xafded7b8, + 0xcc1428ed, + 0x1631d22f, + 0xbfff5850, + 0xd108e1d8, + 0x8e97ba25, + 0x7316ff70, + 0xdb4fce56, + 0x901cce2c, + 0xec4552b0, + 0x847905bf, + 0x3689459a, + 0xe67af94a, + 0xc2314a2e, + 0xcd987848, + 0x8a853a6d, + 0x70c6518, + 0x4f4108e2, + 0xd6fe09e9, + 0xf07a2804, + 0x1d75d6be, +} + var expectedHeadersREF = []ObjectHeader{ {Type: plumbing.CommitObject, Offset: 12, Length: 254}, {Type: plumbing.REFDeltaObject, Offset: 186, Length: 93, @@ -188,3 +225,37 @@ var expectedHeadersREF = []ObjectHeader{ Reference: plumbing.NewHash("eba74343e2f15d62adedfd8c883ee0262b5c8021")}, {Type: plumbing.TreeObject, Offset: 85485, Length: 73}, } + +var expectedCRCREF = []uint32{ + 0xaa07ba4b, + 0xfb4725a4, + 0x12438846, + 0x2905a38c, + 0xd9429436, + 0xbecfde4e, + 0xdc18344f, + 0x780e4b3e, + 0xcf4e4280, + 0x1f08118a, + 0xafded7b8, + 0xcc1428ed, + 0x1631d22f, + 0x847905bf, + 0x3e20f31d, + 0x3689459a, + 0xd108e1d8, + 0x71143d4a, + 0xe67af94a, + 0x739fb89f, + 0xc2314a2e, + 0x87864926, + 0x415d752f, + 0xf72fb182, + 0x3ffa37d4, + 0xcd987848, + 0x2f20ac8f, + 0xf2f0575, + 0x7d8726e1, + 0x740bf39, + 0x26af4735, +} |