package packfile import ( "bytes" "io" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/cache" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) // Format specifies if the packfile uses ref-deltas or ofs-deltas. type Format int // Possible values of the Format type. const ( UnknownFormat Format = iota OFSDeltaFormat REFDeltaFormat ) var ( // ErrMaxObjectsLimitReached is returned by Decode when the number // of objects in the packfile is higher than // Decoder.MaxObjectsLimit. ErrMaxObjectsLimitReached = NewError("max. objects limit reached") // ErrInvalidObject is returned by Decode when an invalid object is // found in the packfile. ErrInvalidObject = NewError("invalid git object") // ErrPackEntryNotFound is returned by Decode when a reference in // the packfile references and unknown object. ErrPackEntryNotFound = NewError("can't find a pack entry") // ErrZLib is returned by Decode when there was an error unzipping // the packfile contents. ErrZLib = NewError("zlib reading error") // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object // to recall cannot be returned. ErrCannotRecall = NewError("cannot recall object") // ErrResolveDeltasNotSupported is returned if a NewDecoder is used with a // non-seekable scanner and without a plumbing.ObjectStorage ErrResolveDeltasNotSupported = NewError("resolve delta is not supported") // ErrNonSeekable is returned if a ReadObjectAt method is called without a // seekable scanner ErrNonSeekable = NewError("non-seekable scanner") // ErrRollback error making Rollback over a transaction after an error ErrRollback = NewError("rollback error, during set error") // ErrAlreadyDecoded is returned if NewDecoder is called for a second time ErrAlreadyDecoded = NewError("packfile was already decoded") ) // Decoder reads and decodes packfiles from an input Scanner, if an ObjectStorer // was provided the decoded objects are store there. If not the decode object // is destroyed. The Offsets and CRCs are calculated whether an // ObjectStorer was provided or not. type Decoder struct { deltaBaseCache cache.Object s *Scanner o storer.EncodedObjectStorer tx storer.Transaction isDecoded bool // hasBuiltIndex indicates if the index is fully built or not. If it is not, // will be built incrementally while decoding. hasBuiltIndex bool idx idxfile.Index offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType offsetToHash map[int64]plumbing.Hash } // NewDecoder returns a new Decoder that decodes a Packfile using the given // Scanner and stores the objects in the provided EncodedObjectStorer. ObjectStorer can be nil, in this // If the passed EncodedObjectStorer is nil, objects are not stored, but // offsets on the Packfile and CRCs are calculated. // // If EncodedObjectStorer is nil and the Scanner is not Seekable, ErrNonSeekable is // returned. // // If the ObjectStorer implements storer.Transactioner, a transaction is created // during the Decode execution. If anything fails, Rollback is called func NewDecoder(s *Scanner, o storer.EncodedObjectStorer) (*Decoder, error) { return NewDecoderForType(s, o, plumbing.AnyObject, cache.NewObjectLRUDefault()) } // NewDecoderWithCache is a version of NewDecoder where cache can be specified. func NewDecoderWithCache(s *Scanner, o storer.EncodedObjectStorer, cacheObject cache.Object) (*Decoder, error) { return NewDecoderForType(s, o, plumbing.AnyObject, cacheObject) } // NewDecoderForType returns a new Decoder but in this case for a specific object type. // When an object is read using this Decoder instance and it is not of the same type of // the specified one, nil will be returned. This is intended to avoid the content // deserialization of all the objects. // // cacheObject is a cache.Object implementation that is used to speed up the // process. If cache is not needed you can pass nil. To create an LRU cache // object with the default size you can use the helper cache.ObjectLRUDefault(). func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, t plumbing.ObjectType, cacheObject cache.Object) (*Decoder, error) { if t == plumbing.OFSDeltaObject || t == plumbing.REFDeltaObject || t == plumbing.InvalidObject { return nil, plumbing.ErrInvalidType } if !canResolveDeltas(s, o) { return nil, ErrResolveDeltasNotSupported } return &Decoder{ s: s, o: o, deltaBaseCache: cacheObject, idx: idxfile.NewMemoryIndex(), offsetToType: make(map[int64]plumbing.ObjectType), offsetToHash: make(map[int64]plumbing.Hash), decoderType: t, }, nil } func canResolveDeltas(s *Scanner, o storer.EncodedObjectStorer) bool { return s.IsSeekable || o != nil } // Decode reads a packfile and stores it in the value pointed to by s. The // offsets and the CRCs are calculated by this method func (d *Decoder) Decode() (checksum plumbing.Hash, err error) { defer func() { d.isDecoded = true }() if d.isDecoded { return plumbing.ZeroHash, ErrAlreadyDecoded } if err := d.doDecode(); err != nil { return plumbing.ZeroHash, err } return d.s.Checksum() } func (d *Decoder) fillOffsetsToHashes() error { entries, err := d.idx.Entries() if err != nil { return err } for { e, err := entries.Next() if err != nil { if err == io.EOF { break } return err } d.offsetToHash[int64(e.Offset)] = e.Hash } return entries.Close() } func (d *Decoder) doDecode() error { _, count, err := d.s.Header() if err != nil { return err } if !d.hasBuiltIndex { // TODO: MemoryIndex is not writable, change to something else d.idx = idxfile.NewMemoryIndex() } defer func() { d.hasBuiltIndex = true }() if d.hasBuiltIndex && !d.s.IsSeekable { if err := d.fillOffsetsToHashes(); err != nil { return err } } _, isTxStorer := d.o.(storer.Transactioner) switch { case d.o == nil: return d.decodeObjects(int(count)) case isTxStorer: return d.decodeObjectsWithObjectStorerTx(int(count)) default: return d.decodeObjectsWithObjectStorer(int(count)) } } func (d *Decoder) decodeObjects(count int) error { for i := 0; i < count; i++ { if _, err := d.DecodeObject(); err != nil { return err } } return nil } func (d *Decoder) decodeObjectsWithObjectStorer(count int) error { for i := 0; i < count; i++ { obj, err := d.DecodeObject() if err != nil { return err } if _, err := d.o.SetEncodedObject(obj); err != nil { return err } } return nil } func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error { d.tx = d.o.(storer.Transactioner).Begin() for i := 0; i < count; i++ { obj, err := d.DecodeObject() if err != nil { return err } if _, err := d.tx.SetEncodedObject(obj); err != nil { if rerr := d.tx.Rollback(); rerr != nil { return ErrRollback.AddDetails( "error: %s, during tx.Set error: %s", rerr, err, ) } return err } } return d.tx.Commit() } // DecodeObject reads the next object from the scanner and returns it. This // method can be used in replacement of the Decode method, to work in a // interactive way. If you created a new decoder instance using NewDecoderForType // constructor, if the object decoded is not equals to the specified one, nil will // be returned func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) { return d.doDecodeObject(d.decoderType) } func (d *Decoder) doDecodeObject(t plumbing.ObjectType) (plumbing.EncodedObject, error) { h, err := d.s.NextObjectHeader() if err != nil { return nil, err } if t == plumbing.AnyObject { return d.decodeByHeader(h) } return d.decodeIfSpecificType(h) } func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) { var ( obj plumbing.EncodedObject realType plumbing.ObjectType err error ) switch h.Type { case plumbing.OFSDeltaObject: realType, err = d.ofsDeltaType(h.OffsetReference) case plumbing.REFDeltaObject: realType, err = d.refDeltaType(h.Reference) if err == plumbing.ErrObjectNotFound { obj, err = d.decodeByHeader(h) if err != nil { realType = obj.Type() } } default: realType = h.Type } if err != nil { return nil, err } d.offsetToType[h.Offset] = realType if d.decoderType == realType { if obj != nil { return obj, nil } return d.decodeByHeader(h) } return nil, nil } func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) { t, ok := d.offsetToType[offset] if !ok { return plumbing.InvalidObject, plumbing.ErrObjectNotFound } return t, nil } func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) { offset, err := d.idx.FindOffset(ref) if err != nil { return plumbing.InvalidObject, plumbing.ErrObjectNotFound } return d.ofsDeltaType(offset) } func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) { obj := d.newObject() obj.SetSize(h.Length) obj.SetType(h.Type) var err error switch h.Type { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: _, err = d.fillRegularObjectContent(obj) case plumbing.REFDeltaObject: _, err = d.fillREFDeltaObjectContent(obj, h.Reference) case plumbing.OFSDeltaObject: _, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) default: err = ErrInvalidObject.AddDetails("type %q", h.Type) } if err != nil { return obj, err } d.offsetToHash[h.Offset] = obj.Hash() return obj, nil } func (d *Decoder) newObject() plumbing.EncodedObject { if d.o == nil { return &plumbing.MemoryObject{} } return d.o.NewEncodedObject() } // DecodeObjectAt reads an object at the given location. Every EncodedObject // returned is added into a internal index. This is intended to be able to regenerate // objects from deltas (offset deltas or reference deltas) without an package index // (.idx file). If Decode wasn't called previously objects offset should provided // using the SetOffsets method. It decodes the object regardless of the Decoder // type. func (d *Decoder) DecodeObjectAt(offset int64) (plumbing.EncodedObject, error) { if !d.s.IsSeekable { return nil, ErrNonSeekable } beforeJump, err := d.s.SeekFromStart(offset) if err != nil { return nil, err } defer func() { _, seekErr := d.s.SeekFromStart(beforeJump) if err == nil { err = seekErr } }() return d.doDecodeObject(plumbing.AnyObject) } func (d *Decoder) fillRegularObjectContent(obj plumbing.EncodedObject) (uint32, error) { w, err := obj.Writer() if err != nil { return 0, err } _, crc, err := d.s.NextObject(w) return crc, err } func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plumbing.Hash) (uint32, error) { buf := bufPool.Get().(*bytes.Buffer) buf.Reset() _, crc, err := d.s.NextObject(buf) if err != nil { return 0, err } base, ok := d.cacheGet(ref) if !ok { base, err = d.recallByHash(ref) if err != nil { return 0, err } } obj.SetType(base.Type()) err = ApplyDelta(obj, base, buf.Bytes()) d.cachePut(obj) bufPool.Put(buf) return crc, err } func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) (uint32, error) { buf := bytes.NewBuffer(nil) _, crc, err := d.s.NextObject(buf) if err != nil { return 0, err } h, ok := d.offsetToHash[offset] var base plumbing.EncodedObject if ok { base, ok = d.cacheGet(h) } if !ok { base, err = d.recallByOffset(offset) if err != nil { return 0, err } d.cachePut(base) } obj.SetType(base.Type()) err = ApplyDelta(obj, base, buf.Bytes()) d.cachePut(obj) return crc, err } func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) { if d.deltaBaseCache == nil { return nil, false } return d.deltaBaseCache.Get(h) } func (d *Decoder) cachePut(obj plumbing.EncodedObject) { if d.deltaBaseCache == nil { return } d.deltaBaseCache.Put(obj) } func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { if d.s.IsSeekable { return d.DecodeObjectAt(o) } hash, err := d.idx.FindHash(o) if err != nil { return nil, err } return d.recallByHashNonSeekable(hash) } func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { if d.s.IsSeekable { if offset, err := d.idx.FindOffset(h); err == nil { return d.DecodeObjectAt(offset) } } return d.recallByHashNonSeekable(h) } // recallByHashNonSeekable if we are in a transaction the objects are read from // the transaction, if not are directly read from the ObjectStorer func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.EncodedObject, err error) { if d.tx != nil { obj, err = d.tx.EncodedObject(plumbing.AnyObject, h) } else { obj, err = d.o.EncodedObject(plumbing.AnyObject, h) } if err != plumbing.ErrObjectNotFound { return obj, err } return nil, plumbing.ErrObjectNotFound } // SetIndex sets an index for the packfile. It is recommended to set this. // The index might be read from a file or reused from a previous Decoder usage // (see Index function). func (d *Decoder) SetIndex(idx idxfile.Index) { d.hasBuiltIndex = true d.idx = idx } // Index returns the index for the packfile. If index was set with SetIndex, // Index will return it. Otherwise, it will return an index that is built while // decoding. If neither SetIndex was called with a full index or Decode called // for the whole packfile, then the returned index will be incomplete. func (d *Decoder) Index() idxfile.Index { return d.idx } // Close closes the Scanner. usually this mean that the whole reader is read and // discarded func (d *Decoder) Close() error { return d.s.Close() }