From 3b1baea2dd9353f42b3a9d93f6bc92ecbe9f4f01 Mon Sep 17 00:00:00 2001 From: Máximo Cuadros Date: Thu, 8 Sep 2016 23:58:41 +0200 Subject: format: packfile based on ObjectStorage and CRC32 calculation --- formats/packfile/decoder.go | 161 +++++++++++++++++++------------------------- 1 file changed, 68 insertions(+), 93 deletions(-) (limited to 'formats/packfile/decoder.go') diff --git a/formats/packfile/decoder.go b/formats/packfile/decoder.go index 18ec6b9..92c42af 100644 --- a/formats/packfile/decoder.go +++ b/formats/packfile/decoder.go @@ -39,19 +39,23 @@ var ( // Decoder reads and decodes packfiles from an input stream. type Decoder struct { - scanner *Scanner - storage core.ObjectStorage - offsetToObject map[int64]core.Object - hashToOffset map[core.Hash]int64 + s *Scanner + o core.ObjectStorage + tx core.TxObjectStorage + + offsets map[int64]core.Hash + crcs map[core.Hash]uint32 } // NewDecoder returns a new Decoder that reads from r. -func NewDecoder(p *Scanner, s core.ObjectStorage) *Decoder { +func NewDecoder(s *Scanner, o core.ObjectStorage) *Decoder { return &Decoder{ - scanner: p, - storage: s, - offsetToObject: make(map[int64]core.Object, 0), - hashToOffset: make(map[core.Hash]int64, 0), + s: s, + o: o, + tx: o.Begin(), + + offsets: make(map[int64]core.Hash, 0), + crcs: make(map[core.Hash]uint32, 0), } } @@ -61,48 +65,38 @@ func (d *Decoder) Decode() (checksum core.Hash, err error) { return core.ZeroHash, err } - return d.scanner.Checksum() + return d.s.Checksum() } func (d *Decoder) doDecode() error { - _, count, err := d.scanner.Header() + _, count, err := d.s.Header() if err != nil { return err } - if d.storage == nil { - return d.readObjects(count, nil) - } - - tx := d.storage.Begin() - if err := d.readObjects(count, tx); err != nil { - if err := tx.Rollback(); err != nil { + if err := d.readObjects(count); err != nil { + if err := d.tx.Rollback(); err != nil { return nil } return err } - if err := tx.Commit(); err != nil { + if err := d.tx.Commit(); err != nil { return err } return nil } -func (d *Decoder) readObjects(count uint32, tx core.TxObjectStorage) error { +func (d *Decoder) readObjects(count uint32) error { for i := 0; i < int(count); i++ { obj, err := d.readObject() if err != nil { return err } - if tx == nil { - continue - } - - _, err = tx.Set(obj) - if err != nil { + if _, err := d.tx.Set(obj); err != nil { return err } } @@ -111,22 +105,22 @@ func (d *Decoder) readObjects(count uint32, tx core.TxObjectStorage) error { } func (d *Decoder) readObject() (core.Object, error) { - h, err := d.scanner.NextObjectHeader() + h, err := d.s.NextObjectHeader() if err != nil { return nil, err } - obj := d.newObject() + obj := d.o.NewObject() obj.SetSize(h.Length) obj.SetType(h.Type) - + var crc uint32 switch h.Type { case core.CommitObject, core.TreeObject, core.BlobObject, core.TagObject: - err = d.fillRegularObjectContent(obj) + crc, err = d.fillRegularObjectContent(obj) case core.REFDeltaObject: - err = d.fillREFDeltaObjectContent(obj, h.Reference) + crc, err = d.fillREFDeltaObjectContent(obj, h.Reference) case core.OFSDeltaObject: - err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) + crc, err = d.fillOFSDeltaObjectContent(obj, h.OffsetReference) default: err = ErrInvalidObject.AddDetails("type %q", h.Type) } @@ -135,109 +129,81 @@ func (d *Decoder) readObject() (core.Object, error) { return obj, err } - d.remember(h.Offset, obj) + d.remember(obj, h.Offset, crc) return obj, nil } -func (d *Decoder) newObject() core.Object { - if d.storage == nil { - return &core.MemoryObject{} - } - - return d.storage.NewObject() -} - -func (d *Decoder) fillRegularObjectContent(obj core.Object) error { +func (d *Decoder) fillRegularObjectContent(obj core.Object) (uint32, error) { w, err := obj.Writer() if err != nil { - return err + return 0, err } - _, err = d.scanner.NextObject(w) - return err + _, crc, err := d.s.NextObject(w) + return crc, err } -func (d *Decoder) fillREFDeltaObjectContent(obj core.Object, ref core.Hash) error { +func (d *Decoder) fillREFDeltaObjectContent(obj core.Object, ref core.Hash) (uint32, error) { buf := bytes.NewBuffer(nil) - if _, err := d.scanner.NextObject(buf); err != nil { - return err + _, crc, err := d.s.NextObject(buf) + if err != nil { + return 0, err } base, err := d.recallByHash(ref) if err != nil { - return err + return 0, err } obj.SetType(base.Type()) - return ApplyDelta(obj, base, buf.Bytes()) + return crc, ApplyDelta(obj, base, buf.Bytes()) } -func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) error { +func (d *Decoder) fillOFSDeltaObjectContent(obj core.Object, offset int64) (uint32, error) { buf := bytes.NewBuffer(nil) - if _, err := d.scanner.NextObject(buf); err != nil { - return err + _, crc, err := d.s.NextObject(buf) + if err != nil { + return 0, err } base, err := d.recallByOffset(offset) if err != nil { - return err + return 0, err } obj.SetType(base.Type()) - return ApplyDelta(obj, base, buf.Bytes()) + return crc, ApplyDelta(obj, base, buf.Bytes()) } -// remember stores the offset of the object and its hash and the object itself. -// If a seeker was not provided to the decoder, the objects are stored in memory -func (d *Decoder) remember(o int64, obj core.Object) { +func (d *Decoder) remember(obj core.Object, offset int64, crc uint32) { h := obj.Hash() - d.hashToOffset[h] = o - if !d.scanner.IsSeekable() { - d.offsetToObject[o] = obj - } + d.offsets[offset] = h + d.crcs[h] = crc } -// recallByHash returns the object for a given hash by looking for it again in -// the io.ReadeSeerker. -func (d *Decoder) recallByHash(h core.Hash) (core.Object, error) { - o, ok := d.hashToOffset[h] - if !ok { - return nil, ErrCannotRecall.AddDetails("hash not found: %s", h) - } - - return d.recallByOffset(o) -} - -// recallByOffset returns the object for a given offset by looking for it again in -// the io.ReadeSeerker. For efficiency reasons, this method always find objects by -// offset, even if they have not been remembered or if they have been forgetted. func (d *Decoder) recallByOffset(o int64) (core.Object, error) { - obj, ok := d.offsetToObject[o] + h, ok := d.offsets[o] if ok { - return obj, nil + return d.recallByHash(h) } - if !ok && !d.scanner.IsSeekable() { - return nil, ErrCannotRecall.AddDetails("no object found at offset %d", o) - } + return nil, ErrCannotRecall.AddDetails("no object found at offset %d", o) +} - return d.ReadObjectAt(o) +func (d *Decoder) recallByHash(h core.Hash) (core.Object, error) { + return d.tx.Get(core.AnyObject, h) } // ReadObjectAt reads an object at the given location func (d *Decoder) ReadObjectAt(offset int64) (core.Object, error) { - if !d.scanner.IsSeekable() { - return nil, ErrNotSeeker - } - - beforeJump, err := d.scanner.Seek(offset) + beforeJump, err := d.s.Seek(offset) if err != nil { return nil, err } defer func() { - _, seekErr := d.scanner.Seek(beforeJump) + _, seekErr := d.s.Seek(beforeJump) if err == nil { err = seekErr } @@ -246,14 +212,23 @@ func (d *Decoder) ReadObjectAt(offset int64) (core.Object, error) { return d.readObject() } -// Index returns an index of the objects read by hash and the position where -// was read -func (d *Decoder) Index() map[core.Hash]int64 { - return d.hashToOffset +// Offsets returns the objects read offset +func (d *Decoder) Offsets() map[core.Hash]int64 { + i := make(map[core.Hash]int64, len(d.offsets)) + for o, h := range d.offsets { + i[h] = o + } + + return i +} + +// CRCs returns the CRC-32 for each objected read +func (d *Decoder) CRCs() map[core.Hash]uint32 { + return d.crcs } // Close close the Scanner, usually this mean that the whole reader is read and // discarded func (d *Decoder) Close() error { - return d.scanner.Close() + return d.s.Close() } -- cgit