package packfile import ( "bytes" "io" "os" "gopkg.in/src-d/go-git.v4/core" ) // Format specifies if the packfile uses ref-deltas or ofs-deltas. type Format int // Possible values of the Format type. const ( UnknownFormat Format = iota OFSDeltaFormat REFDeltaFormat ) var ( // ErrMaxObjectsLimitReached is returned by Decode when the number // of objects in the packfile is higher than // Decoder.MaxObjectsLimit. ErrMaxObjectsLimitReached = NewError("max. objects limit reached") // ErrInvalidObject is returned by Decode when an invalid object is // found in the packfile. ErrInvalidObject = NewError("invalid git object") // ErrPackEntryNotFound is returned by Decode when a reference in // the packfile references and unknown object. ErrPackEntryNotFound = NewError("can't find a pack entry") // ErrZLib is returned by Decode when there was an error unzipping // the packfile contents. ErrZLib = NewError("zlib reading error") // ErrDuplicatedObject is returned by Remember if an object appears several // times in a packfile. ErrDuplicatedObject = NewError("duplicated object") // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object // to recall cannot be returned. ErrCannotRecall = NewError("cannot recall object") ) // Decoder reads and decodes packfiles from an input stream. type Decoder struct { p *Parser s core.ObjectStorage seeker io.Seeker offsetToObject map[int64]core.Object hashToOffset map[core.Hash]int64 } // NewDecoder returns a new Decoder that reads from r. func NewDecoder(s core.ObjectStorage, p *Parser, seeker io.Seeker) *Decoder { return &Decoder{ p: p, s: s, seeker: seeker, offsetToObject: make(map[int64]core.Object, 0), hashToOffset: make(map[core.Hash]int64, 0), } } // Decode reads a packfile and stores it in the value pointed to by s. func (d *Decoder) Decode() error { _, count, err := d.p.Header() if err != nil { return err } tx := d.s.Begin() if err := d.readObjects(tx, count); err != nil { if err := tx.Rollback(); err != nil { return nil } return err } return tx.Commit() } func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error { // This code has 50-80 µs of overhead per object not counting zlib inflation. // Together with zlib inflation, it's 400-410 µs for small objects. // That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB, // of which 12-20 % is _not_ zlib inflation (ie. is our code). for i := 0; i < int(count); i++ { obj, err := d.readObject() if err != nil { return err } _, err = tx.Set(obj) if err == io.EOF { break } } return nil } func (d *Decoder) readObject() (core.Object, error) { h, err := d.p.NextObjectHeader() if err != nil { return nil, err } obj := d.s.NewObject() obj.SetSize(h.Length) obj.SetType(h.Type) switch h.Type { case core.CommitObject, core.TreeObject, core.BlobObject, core.TagObject: err = d.fillRegularObjectContent(obj) case core.REFDeltaObject: err = d.fillREFDeltaObjectContent(obj, h.Reference) case core.OFSDeltaObject: err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) default: err = ErrInvalidObject.AddDetails("type %q", h.Type) } return obj, d.remember(h.Offset, obj) } func (d *Decoder) fillRegularObjectContent(obj core.Object) error { w, err := obj.Writer() if err != nil { return err } _, err = d.p.NextObject(w) return err } func (d *Decoder) fillREFDeltaObjectContent(obj core.Object, ref core.Hash) error { base, err := d.recallByHash(ref) if err != nil { return err } obj.SetType(base.Type()) if err := d.readAndApplyDelta(obj, base); err != nil { return err } return nil } func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) error { base, err := d.recallByOffset(offset) if err != nil { return err } obj.SetType(base.Type()) if err := d.readAndApplyDelta(obj, base); err != nil { return err } return nil } // ReadAndApplyDelta reads and apply the base patched with the contents // of a zlib compressed diff data in the delta portion of an object // entry in the packfile. func (d *Decoder) readAndApplyDelta(target, base core.Object) error { buf := bytes.NewBuffer(nil) if _, err := d.p.NextObject(buf); err != nil { return err } return ApplyDelta(target, base, buf.Bytes()) } // Remember stores the offset of the object and its hash, but not the // object itself. This implementation does not check for already stored // offsets, as it is too expensive to build this information from an // index every time a get operation is performed on the SeekableReadRecaller. func (r *Decoder) remember(o int64, obj core.Object) error { h := obj.Hash() r.hashToOffset[h] = o r.offsetToObject[o] = obj return nil } // RecallByHash returns the object for a given hash by looking for it again in // the io.ReadeSeerker. func (r *Decoder) recallByHash(h core.Hash) (core.Object, error) { o, ok := r.hashToOffset[h] if !ok { return nil, ErrCannotRecall.AddDetails("hash not found: %s", h) } return r.recallByOffset(o) } // RecallByOffset returns the object for a given offset by looking for it again in // the io.ReadeSeerker. For efficiency reasons, this method always find objects by // offset, even if they have not been remembered or if they have been forgetted. func (r *Decoder) recallByOffset(o int64) (obj core.Object, err error) { obj, ok := r.offsetToObject[o] if ok { return obj, nil } if !ok && r.seeker == nil { return nil, ErrCannotRecall.AddDetails("no object found at offset %d", o) } // remember current offset beforeJump, err := r.seeker.Seek(0, os.SEEK_CUR) if err != nil { return nil, err } defer func() { // jump back _, seekErr := r.seeker.Seek(beforeJump, os.SEEK_SET) if err == nil { err = seekErr } }() // jump to requested offset _, err = r.seeker.Seek(o, os.SEEK_SET) if err != nil { return nil, err } return r.readObject() }