diff options
Diffstat (limited to 'formats/packfile/decoder.go')
-rw-r--r-- | formats/packfile/decoder.go | 180 |
1 files changed, 104 insertions, 76 deletions
diff --git a/formats/packfile/decoder.go b/formats/packfile/decoder.go index 3da927d..18ec6b9 100644 --- a/formats/packfile/decoder.go +++ b/formats/packfile/decoder.go @@ -2,8 +2,6 @@ package packfile import ( "bytes" - "io" - "os" "gopkg.in/src-d/go-git.v4/core" ) @@ -32,9 +30,8 @@ var ( // ErrZLib is returned by Decode when there was an error unzipping // the packfile contents. ErrZLib = NewError("zlib reading error") - // ErrDuplicatedObject is returned by Remember if an object appears several - // times in a packfile. - ErrDuplicatedObject = NewError("duplicated object") + // ErrNotSeeker not seeker supported + ErrNotSeeker = NewError("no seeker capable decode") // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object // to recall cannot be returned. ErrCannotRecall = NewError("cannot recall object") @@ -42,33 +39,43 @@ var ( // Decoder reads and decodes packfiles from an input stream. type Decoder struct { - p *Parser - s core.ObjectStorage - seeker io.Seeker + scanner *Scanner + storage core.ObjectStorage offsetToObject map[int64]core.Object hashToOffset map[core.Hash]int64 } // NewDecoder returns a new Decoder that reads from r. -func NewDecoder(s core.ObjectStorage, p *Parser, seeker io.Seeker) *Decoder { +func NewDecoder(p *Scanner, s core.ObjectStorage) *Decoder { return &Decoder{ - p: p, - s: s, - seeker: seeker, + scanner: p, + storage: s, offsetToObject: make(map[int64]core.Object, 0), hashToOffset: make(map[core.Hash]int64, 0), } } // Decode reads a packfile and stores it in the value pointed to by s. -func (d *Decoder) Decode() error { - _, count, err := d.p.Header() +func (d *Decoder) Decode() (checksum core.Hash, err error) { + if err := d.doDecode(); err != nil { + return core.ZeroHash, err + } + + return d.scanner.Checksum() +} + +func (d *Decoder) doDecode() error { + _, count, err := d.scanner.Header() if err != nil { return err } - tx := d.s.Begin() - if err := d.readObjects(tx, count); err != nil { + if d.storage == nil { + return d.readObjects(count, nil) + } + + tx := d.storage.Begin() + if err := d.readObjects(count, tx); err != nil { if err := tx.Rollback(); err != nil { return nil } @@ -76,23 +83,27 @@ func (d *Decoder) Decode() error { return err } - return tx.Commit() + if err := tx.Commit(); err != nil { + return err + } + + return nil } -func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error { - // This code has 50-80 µs of overhead per object not counting zlib inflation. - // Together with zlib inflation, it's 400-410 µs for small objects. - // That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB, - // of which 12-20 % is _not_ zlib inflation (ie. is our code). +func (d *Decoder) readObjects(count uint32, tx core.TxObjectStorage) error { for i := 0; i < int(count); i++ { obj, err := d.readObject() if err != nil { return err } + if tx == nil { + continue + } + _, err = tx.Set(obj) - if err == io.EOF { - break + if err != nil { + return err } } @@ -100,12 +111,12 @@ func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error { } func (d *Decoder) readObject() (core.Object, error) { - h, err := d.p.NextObjectHeader() + h, err := d.scanner.NextObjectHeader() if err != nil { return nil, err } - obj := d.s.NewObject() + obj := d.newObject() obj.SetSize(h.Length) obj.SetType(h.Type) @@ -120,7 +131,20 @@ func (d *Decoder) readObject() (core.Object, error) { err = ErrInvalidObject.AddDetails("type %q", h.Type) } - return obj, d.remember(h.Offset, obj) + if err != nil { + return obj, err + } + + d.remember(h.Offset, obj) + return obj, nil +} + +func (d *Decoder) newObject() core.Object { + if d.storage == nil { + return &core.MemoryObject{} + } + + return d.storage.NewObject() } func (d *Decoder) fillRegularObjectContent(obj core.Object) error { @@ -129,103 +153,107 @@ func (d *Decoder) fillRegularObjectContent(obj core.Object) error { return err } - _, err = d.p.NextObject(w) + _, err = d.scanner.NextObject(w) return err } func (d *Decoder) fillREFDeltaObjectContent(obj core.Object, ref core.Hash) error { - base, err := d.recallByHash(ref) - if err != nil { - return err - } - obj.SetType(base.Type()) - if err := d.readAndApplyDelta(obj, base); err != nil { + buf := bytes.NewBuffer(nil) + if _, err := d.scanner.NextObject(buf); err != nil { return err } - return nil -} - -func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) error { - base, err := d.recallByOffset(offset) + base, err := d.recallByHash(ref) if err != nil { return err } obj.SetType(base.Type()) - if err := d.readAndApplyDelta(obj, base); err != nil { - return err - } - - return nil + return ApplyDelta(obj, base, buf.Bytes()) } -// ReadAndApplyDelta reads and apply the base patched with the contents -// of a zlib compressed diff data in the delta portion of an object -// entry in the packfile. -func (d *Decoder) readAndApplyDelta(target, base core.Object) error { +func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) error { buf := bytes.NewBuffer(nil) - if _, err := d.p.NextObject(buf); err != nil { + if _, err := d.scanner.NextObject(buf); err != nil { + return err + } + + base, err := d.recallByOffset(offset) + if err != nil { return err } - return ApplyDelta(target, base, buf.Bytes()) + obj.SetType(base.Type()) + return ApplyDelta(obj, base, buf.Bytes()) } -// Remember stores the offset of the object and its hash, but not the -// object itself. This implementation does not check for already stored -// offsets, as it is too expensive to build this information from an -// index every time a get operation is performed on the SeekableReadRecaller. -func (r *Decoder) remember(o int64, obj core.Object) error { +// remember stores the offset of the object and its hash and the object itself. +// If a seeker was not provided to the decoder, the objects are stored in memory +func (d *Decoder) remember(o int64, obj core.Object) { h := obj.Hash() - r.hashToOffset[h] = o - r.offsetToObject[o] = obj - return nil + + d.hashToOffset[h] = o + if !d.scanner.IsSeekable() { + d.offsetToObject[o] = obj + } } -// RecallByHash returns the object for a given hash by looking for it again in +// recallByHash returns the object for a given hash by looking for it again in // the io.ReadeSeerker. -func (r *Decoder) recallByHash(h core.Hash) (core.Object, error) { - o, ok := r.hashToOffset[h] +func (d *Decoder) recallByHash(h core.Hash) (core.Object, error) { + o, ok := d.hashToOffset[h] if !ok { return nil, ErrCannotRecall.AddDetails("hash not found: %s", h) } - return r.recallByOffset(o) + return d.recallByOffset(o) } -// RecallByOffset returns the object for a given offset by looking for it again in +// recallByOffset returns the object for a given offset by looking for it again in // the io.ReadeSeerker. For efficiency reasons, this method always find objects by // offset, even if they have not been remembered or if they have been forgetted. -func (r *Decoder) recallByOffset(o int64) (obj core.Object, err error) { - obj, ok := r.offsetToObject[o] +func (d *Decoder) recallByOffset(o int64) (core.Object, error) { + obj, ok := d.offsetToObject[o] if ok { return obj, nil } - if !ok && r.seeker == nil { + if !ok && !d.scanner.IsSeekable() { return nil, ErrCannotRecall.AddDetails("no object found at offset %d", o) } - // remember current offset - beforeJump, err := r.seeker.Seek(0, os.SEEK_CUR) + return d.ReadObjectAt(o) +} + +// ReadObjectAt reads an object at the given location +func (d *Decoder) ReadObjectAt(offset int64) (core.Object, error) { + if !d.scanner.IsSeekable() { + return nil, ErrNotSeeker + } + + beforeJump, err := d.scanner.Seek(offset) if err != nil { return nil, err } defer func() { - // jump back - _, seekErr := r.seeker.Seek(beforeJump, os.SEEK_SET) + _, seekErr := d.scanner.Seek(beforeJump) if err == nil { err = seekErr } }() - // jump to requested offset - _, err = r.seeker.Seek(o, os.SEEK_SET) - if err != nil { - return nil, err - } + return d.readObject() +} + +// Index returns an index of the objects read by hash and the position where +// was read +func (d *Decoder) Index() map[core.Hash]int64 { + return d.hashToOffset +} - return r.readObject() +// Close close the Scanner, usually this mean that the whole reader is read and +// discarded +func (d *Decoder) Close() error { + return d.scanner.Close() } |