aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile/decoder.go
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2016-09-06 19:59:44 +0200
committerMáximo Cuadros <mcuadros@gmail.com>2016-09-06 19:59:44 +0200
commit98a22e72a808aa0d5dd62339817404fd9e1c4db6 (patch)
treed5544dce0176b55172f2fd25564618e8c0f18558 /formats/packfile/decoder.go
parentae2b10d50da5455b382ab9d543be4fe859afe9e0 (diff)
downloadgo-git-98a22e72a808aa0d5dd62339817404fd9e1c4db6.tar.gz
format: packfile new interface (wip)
Diffstat (limited to 'formats/packfile/decoder.go')
-rw-r--r--formats/packfile/decoder.go176
1 files changed, 152 insertions, 24 deletions
diff --git a/formats/packfile/decoder.go b/formats/packfile/decoder.go
index 5b5763c..3da927d 100644
--- a/formats/packfile/decoder.go
+++ b/formats/packfile/decoder.go
@@ -1,7 +1,9 @@
package packfile
import (
+ "bytes"
"io"
+ "os"
"gopkg.in/src-d/go-git.v4/core"
)
@@ -21,37 +23,46 @@ var (
// of objects in the packfile is higher than
// Decoder.MaxObjectsLimit.
ErrMaxObjectsLimitReached = NewError("max. objects limit reached")
-
// ErrInvalidObject is returned by Decode when an invalid object is
// found in the packfile.
ErrInvalidObject = NewError("invalid git object")
-
// ErrPackEntryNotFound is returned by Decode when a reference in
// the packfile references and unknown object.
ErrPackEntryNotFound = NewError("can't find a pack entry")
-
// ErrZLib is returned by Decode when there was an error unzipping
// the packfile contents.
ErrZLib = NewError("zlib reading error")
+ // ErrDuplicatedObject is returned by Remember if an object appears several
+ // times in a packfile.
+ ErrDuplicatedObject = NewError("duplicated object")
+ // ErrCannotRecall is returned by RecallByOffset or RecallByHash if the object
+ // to recall cannot be returned.
+ ErrCannotRecall = NewError("cannot recall object")
)
// Decoder reads and decodes packfiles from an input stream.
type Decoder struct {
- p *Parser
- s core.ObjectStorage
+ p *Parser
+ s core.ObjectStorage
+ seeker io.Seeker
+ offsetToObject map[int64]core.Object
+ hashToOffset map[core.Hash]int64
}
// NewDecoder returns a new Decoder that reads from r.
-func NewDecoder(r ReadRecaller, s core.ObjectStorage) *Decoder {
+func NewDecoder(s core.ObjectStorage, p *Parser, seeker io.Seeker) *Decoder {
return &Decoder{
- p: NewParser(r),
- s: s,
+ p: p,
+ s: s,
+ seeker: seeker,
+ offsetToObject: make(map[int64]core.Object, 0),
+ hashToOffset: make(map[core.Hash]int64, 0),
}
}
// Decode reads a packfile and stores it in the value pointed to by s.
func (d *Decoder) Decode() error {
- count, err := d.p.ReadHeader()
+ _, count, err := d.p.Header()
if err != nil {
return err
}
@@ -74,21 +85,7 @@ func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error {
// That's 1 sec for ~2450 objects, ~4.20 MB, or ~250 ms per MB,
// of which 12-20 % is _not_ zlib inflation (ie. is our code).
for i := 0; i < int(count); i++ {
- start, err := d.p.Offset()
- if err != nil {
- return err
- }
-
- obj := d.s.NewObject()
- if err := d.p.FillObject(obj); err != nil {
- if err == io.EOF {
- break
- }
-
- return err
- }
-
- err = d.p.Remember(start, obj)
+ obj, err := d.readObject()
if err != nil {
return err
}
@@ -101,3 +98,134 @@ func (d *Decoder) readObjects(tx core.TxObjectStorage, count uint32) error {
return nil
}
+
+func (d *Decoder) readObject() (core.Object, error) {
+ h, err := d.p.NextObjectHeader()
+ if err != nil {
+ return nil, err
+ }
+
+ obj := d.s.NewObject()
+ obj.SetSize(h.Length)
+ obj.SetType(h.Type)
+
+ switch h.Type {
+ case core.CommitObject, core.TreeObject, core.BlobObject, core.TagObject:
+ err = d.fillRegularObjectContent(obj)
+ case core.REFDeltaObject:
+ err = d.fillREFDeltaObjectContent(obj, h.Reference)
+ case core.OFSDeltaObject:
+ err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference)
+ default:
+ err = ErrInvalidObject.AddDetails("type %q", h.Type)
+ }
+
+ return obj, d.remember(h.Offset, obj)
+}
+
+func (d *Decoder) fillRegularObjectContent(obj core.Object) error {
+ w, err := obj.Writer()
+ if err != nil {
+ return err
+ }
+
+ _, err = d.p.NextObject(w)
+ return err
+}
+
+func (d *Decoder) fillREFDeltaObjectContent(obj core.Object, ref core.Hash) error {
+ base, err := d.recallByHash(ref)
+ if err != nil {
+ return err
+ }
+ obj.SetType(base.Type())
+ if err := d.readAndApplyDelta(obj, base); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) error {
+ base, err := d.recallByOffset(offset)
+ if err != nil {
+ return err
+ }
+
+ obj.SetType(base.Type())
+ if err := d.readAndApplyDelta(obj, base); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// ReadAndApplyDelta reads and apply the base patched with the contents
+// of a zlib compressed diff data in the delta portion of an object
+// entry in the packfile.
+func (d *Decoder) readAndApplyDelta(target, base core.Object) error {
+ buf := bytes.NewBuffer(nil)
+ if _, err := d.p.NextObject(buf); err != nil {
+ return err
+ }
+
+ return ApplyDelta(target, base, buf.Bytes())
+}
+
+// Remember stores the offset of the object and its hash, but not the
+// object itself. This implementation does not check for already stored
+// offsets, as it is too expensive to build this information from an
+// index every time a get operation is performed on the SeekableReadRecaller.
+func (r *Decoder) remember(o int64, obj core.Object) error {
+ h := obj.Hash()
+ r.hashToOffset[h] = o
+ r.offsetToObject[o] = obj
+ return nil
+}
+
+// RecallByHash returns the object for a given hash by looking for it again in
+// the io.ReadeSeerker.
+func (r *Decoder) recallByHash(h core.Hash) (core.Object, error) {
+ o, ok := r.hashToOffset[h]
+ if !ok {
+ return nil, ErrCannotRecall.AddDetails("hash not found: %s", h)
+ }
+
+ return r.recallByOffset(o)
+}
+
+// RecallByOffset returns the object for a given offset by looking for it again in
+// the io.ReadeSeerker. For efficiency reasons, this method always find objects by
+// offset, even if they have not been remembered or if they have been forgetted.
+func (r *Decoder) recallByOffset(o int64) (obj core.Object, err error) {
+ obj, ok := r.offsetToObject[o]
+ if ok {
+ return obj, nil
+ }
+
+ if !ok && r.seeker == nil {
+ return nil, ErrCannotRecall.AddDetails("no object found at offset %d", o)
+ }
+
+ // remember current offset
+ beforeJump, err := r.seeker.Seek(0, os.SEEK_CUR)
+ if err != nil {
+ return nil, err
+ }
+
+ defer func() {
+ // jump back
+ _, seekErr := r.seeker.Seek(beforeJump, os.SEEK_SET)
+ if err == nil {
+ err = seekErr
+ }
+ }()
+
+ // jump to requested offset
+ _, err = r.seeker.Seek(o, os.SEEK_SET)
+ if err != nil {
+ return nil, err
+ }
+
+ return r.readObject()
+}