aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile/decoder.go
diff options
context:
space:
mode:
Diffstat (limited to 'formats/packfile/decoder.go')
-rw-r--r--formats/packfile/decoder.go180
1 files changed, 104 insertions, 76 deletions
diff --git a/formats/packfile/decoder.go b/formats/packfile/decoder.go
index 3da927d..18ec6b9 100644
--- a/formats/packfile/decoder.go
+++ b/formats/packfile/decoder.go
@@ -2,8 +2,6 @@ package packfile
import (
"bytes"
- "io"
- "os"
"gopkg.in/src-d/go-git.v4/core"
)
@@ -32,9 +30,8 @@ var (
// ErrZLib is returned by Decode when there was an error unzipping
// the packfile contents.
ErrZLib = NewError("zlib reading error")
- // ErrDuplicatedObject is returned by Remember if an object appears several
- // times in a packfile.
- ErrDuplicatedObject = NewError("duplicated object")
+ // ErrNotSeeker not seeker supported
+ ErrNotSeeker = NewError("no seeker capable decode")
// ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object
// to recall cannot be returned.
ErrCannotRecall = NewError("cannot recall object")
@@ -42,33 +39,43 @@ var (
// Decoder reads and decodes packfiles from an input stream.
type Decoder struct {
- p *Parser
- s core.ObjectStorage
- seeker io.Seeker
+ scanner *Scanner
+ storage core.ObjectStorage
offsetToObject map[int64]core.Object
hashToOffset map[core.Hash]int64
}
// NewDecoder returns a new Decoder that reads from r.
-func NewDecoder(s core.ObjectStorage, p *Parser, seeker io.Seeker) *Decoder {
+func NewDecoder(p *Scanner, s core.ObjectStorage) *Decoder {
return &Decoder{
- p: p,
- s: s,
- seeker: seeker,
+ scanner: p,
+ storage: s,
offsetToObject: make(map[int64]core.Object, 0),
hashToOffset: make(map[core.Hash]int64, 0),
}
}
// Decode reads a packfile and stores it in the value pointed to by s.
-func (d *Decoder) Decode() error {
- _, count, err := d.p.Header()
+func (d *Decoder) Decode() (checksum core.Hash, err error) {
+ if err := d.doDecode(); err != nil {
+ return core.ZeroHash, err
+ }
+
+ return d.scanner.Checksum()
+}
+
+func (d *Decoder) doDecode() error {
+ _, count, err := d.scanner.Header()
if err != nil {
return err
}
- tx := d.s.Begin()
- if err := d.readObjects(tx, count); err != nil {
+ if d.storage == nil {
+ return d.readObjects(count, nil)
+ }
+
+ tx := d.storage.Begin()
+ if err := d.readObjects(count, tx); err != nil {
if err := tx.Rollback(); err != nil {
return nil
}
@@ -76,23 +83,27 @@ func (d *Decoder) Decode() error {
return err
}
- return tx.Commit()
+ if err := tx.Commit(); err != nil {
+ return err
+ }
+
+ return nil
}
-func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error {
- // This code has 50-80 µs of overhead per object not counting zlib inflation.
- // Together with zlib inflation, it's 400-410 µs for small objects.
- // That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
- // of which 12-20 % is _not_ zlib inflation (ie. is our code).
+func (d *Decoder) readObjects(count uint32, tx core.TxObjectStorage) error {
for i := 0; i < int(count); i++ {
obj, err := d.readObject()
if err != nil {
return err
}
+ if tx == nil {
+ continue
+ }
+
_, err = tx.Set(obj)
- if err == io.EOF {
- break
+ if err != nil {
+ return err
}
}
@@ -100,12 +111,12 @@ func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error {
}
func (d *Decoder) readObject() (core.Object, error) {
- h, err := d.p.NextObjectHeader()
+ h, err := d.scanner.NextObjectHeader()
if err != nil {
return nil, err
}
- obj := d.s.NewObject()
+ obj := d.newObject()
obj.SetSize(h.Length)
obj.SetType(h.Type)
@@ -120,7 +131,20 @@ func (d *Decoder) readObject() (core.Object, error) {
err = ErrInvalidObject.AddDetails("type %q", h.Type)
}
- return obj, d.remember(h.Offset, obj)
+ if err != nil {
+ return obj, err
+ }
+
+ d.remember(h.Offset, obj)
+ return obj, nil
+}
+
+func (d *Decoder) newObject() core.Object {
+ if d.storage == nil {
+ return &core.MemoryObject{}
+ }
+
+ return d.storage.NewObject()
}
func (d *Decoder) fillRegularObjectContent(obj core.Object) error {
@@ -129,103 +153,107 @@ func (d *Decoder) fillRegularObjectContent(obj core.Object) error {
return err
}
- _, err = d.p.NextObject(w)
+ _, err = d.scanner.NextObject(w)
return err
}
func (d *Decoder) fillREFDeltaObjectContent(obj core.Object, ref core.Hash) error {
- base, err := d.recallByHash(ref)
- if err != nil {
- return err
- }
- obj.SetType(base.Type())
- if err := d.readAndApplyDelta(obj, base); err != nil {
+ buf := bytes.NewBuffer(nil)
+ if _, err := d.scanner.NextObject(buf); err != nil {
return err
}
- return nil
-}
-
-func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) error {
- base, err := d.recallByOffset(offset)
+ base, err := d.recallByHash(ref)
if err != nil {
return err
}
obj.SetType(base.Type())
- if err := d.readAndApplyDelta(obj, base); err != nil {
- return err
- }
-
- return nil
+ return ApplyDelta(obj, base, buf.Bytes())
}
-// ReadAndApplyDelta reads and apply the base patched with the contents
-// of a zlib compressed diff data in the delta portion of an object
-// entry in the packfile.
-func (d *Decoder) readAndApplyDelta(target, base core.Object) error {
+func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) error {
buf := bytes.NewBuffer(nil)
- if _, err := d.p.NextObject(buf); err != nil {
+ if _, err := d.scanner.NextObject(buf); err != nil {
+ return err
+ }
+
+ base, err := d.recallByOffset(offset)
+ if err != nil {
return err
}
- return ApplyDelta(target, base, buf.Bytes())
+ obj.SetType(base.Type())
+ return ApplyDelta(obj, base, buf.Bytes())
}
-// Remember stores the offset of the object and its hash, but not the
-// object itself. This implementation does not check for already stored
-// offsets, as it is too expensive to build this information from an
-// index every time a get operation is performed on the SeekableReadRecaller.
-func (r *Decoder) remember(o int64, obj core.Object) error {
+// remember stores the offset of the object and its hash and the object itself.
+// If a seeker was not provided to the decoder, the objects are stored in memory
+func (d *Decoder) remember(o int64, obj core.Object) {
h := obj.Hash()
- r.hashToOffset[h] = o
- r.offsetToObject[o] = obj
- return nil
+
+ d.hashToOffset[h] = o
+ if !d.scanner.IsSeekable() {
+ d.offsetToObject[o] = obj
+ }
}
-// RecallByHash returns the object for a given hash by looking for it again in
+// recallByHash returns the object for a given hash by looking for it again in
// the io.ReadeSeerker.
-func (r *Decoder) recallByHash(h core.Hash) (core.Object, error) {
- o, ok := r.hashToOffset[h]
+func (d *Decoder) recallByHash(h core.Hash) (core.Object, error) {
+ o, ok := d.hashToOffset[h]
if !ok {
return nil, ErrCannotRecall.AddDetails("hash not found: %s", h)
}
- return r.recallByOffset(o)
+ return d.recallByOffset(o)
}
-// RecallByOffset returns the object for a given offset by looking for it again in
+// recallByOffset returns the object for a given offset by looking for it again in
// the io.ReadeSeerker. For efficiency reasons, this method always find objects by
// offset, even if they have not been remembered or if they have been forgetted.
-func (r *Decoder) recallByOffset(o int64) (obj core.Object, err error) {
- obj, ok := r.offsetToObject[o]
+func (d *Decoder) recallByOffset(o int64) (core.Object, error) {
+ obj, ok := d.offsetToObject[o]
if ok {
return obj, nil
}
- if !ok && r.seeker == nil {
+ if !ok && !d.scanner.IsSeekable() {
return nil, ErrCannotRecall.AddDetails("no object found at offset %d", o)
}
- // remember current offset
- beforeJump, err := r.seeker.Seek(0, os.SEEK_CUR)
+ return d.ReadObjectAt(o)
+}
+
+// ReadObjectAt reads an object at the given location
+func (d *Decoder) ReadObjectAt(offset int64) (core.Object, error) {
+ if !d.scanner.IsSeekable() {
+ return nil, ErrNotSeeker
+ }
+
+ beforeJump, err := d.scanner.Seek(offset)
if err != nil {
return nil, err
}
defer func() {
- // jump back
- _, seekErr := r.seeker.Seek(beforeJump, os.SEEK_SET)
+ _, seekErr := d.scanner.Seek(beforeJump)
if err == nil {
err = seekErr
}
}()
- // jump to requested offset
- _, err = r.seeker.Seek(o, os.SEEK_SET)
- if err != nil {
- return nil, err
- }
+ return d.readObject()
+}
+
+// Index returns an index of the objects read by hash and the position where
+// was read
+func (d *Decoder) Index() map[core.Hash]int64 {
+ return d.hashToOffset
+}
- return r.readObject()
+// Close close the Scanner, usually this mean that the whole reader is read and
+// discarded
+func (d *Decoder) Close() error {
+ return d.scanner.Close()
}