diff options
author | Santiago M. Mola <santi@mola.io> | 2017-07-25 10:08:36 +0200 |
---|---|---|
committer | Santiago M. Mola <santi@mola.io> | 2017-07-26 10:56:59 +0200 |
commit | c64eb817d5e5cbaec10dea1342e1ec95721e886b (patch) | |
tree | 952839005580551bf9042ded89e2dcda56d779fc /plumbing/format/packfile/decoder.go | |
parent | fbf2a4ab4588c78e3d9d0265dba774ae6b388b5f (diff) | |
download | go-git-c64eb817d5e5cbaec10dea1342e1ec95721e886b.tar.gz |
packfile: create packfile.Index and reuse it
There was an internal type (i.e. storage/filesystem.idx) to
use as in-memory index for packfiles. This was not convenient
to reuse in the packfile.
This commit creates a new representation (format/packfile.Index)
that can be converted to and from idxfile.Idxfile.
A packfile.Index now contains the functionality that was scattered
on storage/filesystem.idx and packfile.Decoder's internals.
storage/filesystem now reuses packfile.Index instances and this
also results in higher cache hit ratios when resolving deltas.
Diffstat (limited to 'plumbing/format/packfile/decoder.go')
-rw-r--r-- | plumbing/format/packfile/decoder.go | 104 |
1 files changed, 53 insertions, 51 deletions
diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 21ddbbf..39680a3 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -56,10 +56,12 @@ type Decoder struct { o storer.EncodedObjectStorer tx storer.Transaction - isDecoded bool - offsetToHash map[int64]plumbing.Hash - hashToOffset map[plumbing.Hash]int64 - crcs map[plumbing.Hash]uint32 + isDecoded bool + + // hasBuiltIndex indicates if the index is fully built or not. If it is not, + // will be built incrementally while decoding. + hasBuiltIndex bool + idx *Index offsetToType map[int64]plumbing.ObjectType decoderType plumbing.ObjectType @@ -102,10 +104,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, s: s, o: o, - offsetToHash: make(map[int64]plumbing.Hash, 0), - hashToOffset: make(map[plumbing.Hash]int64, 0), - crcs: make(map[plumbing.Hash]uint32, 0), - + idx: NewIndex(0), offsetToType: make(map[int64]plumbing.ObjectType, 0), decoderType: t, @@ -139,6 +138,11 @@ func (d *Decoder) doDecode() error { return err } + if !d.hasBuiltIndex { + d.idx = NewIndex(int(count)) + } + defer func() { d.hasBuiltIndex = true }() + _, isTxStorer := d.o.(storer.Transactioner) switch { case d.o == nil: @@ -218,13 +222,22 @@ func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) { } func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, error) { - var realType plumbing.ObjectType - var err error + var ( + obj plumbing.EncodedObject + realType plumbing.ObjectType + err error + ) switch h.Type { case plumbing.OFSDeltaObject: realType, err = d.ofsDeltaType(h.OffsetReference) case plumbing.REFDeltaObject: realType, err = d.refDeltaType(h.Reference) + if err == plumbing.ErrObjectNotFound { + obj, err = d.decodeByHeader(h) + if err != nil { + realType = obj.Type() + } + } default: realType = h.Type } @@ -236,6 +249,10 @@ func (d *Decoder) decodeIfSpecificType(h *ObjectHeader) (plumbing.EncodedObject, d.offsetToType[h.Offset] = realType if d.decoderType == realType { + if obj != nil { + return obj, nil + } + return d.decodeByHeader(h) } @@ -252,16 +269,12 @@ func (d *Decoder) ofsDeltaType(offset int64) (plumbing.ObjectType, error) { } func (d *Decoder) refDeltaType(ref plumbing.Hash) (plumbing.ObjectType, error) { - if o, ok := d.hashToOffset[ref]; ok { - return d.ofsDeltaType(o) - } - - obj, err := d.o.EncodedObject(plumbing.AnyObject, ref) - if err != nil { - return plumbing.InvalidObject, err + e, ok := d.idx.LookupHash(ref) + if !ok { + return plumbing.InvalidObject, plumbing.ErrObjectNotFound } - return obj.Type(), nil + return d.ofsDeltaType(int64(e.Offset)) } func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error) { @@ -285,9 +298,9 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error return obj, err } - hash := obj.Hash() - d.setOffset(hash, h.Offset) - d.setCRC(hash, crc) + if !d.hasBuiltIndex { + d.idx.Add(obj.Hash(), uint64(h.Offset), crc) + } return obj, nil } @@ -365,10 +378,10 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i return 0, err } - h := d.offsetToHash[offset] + e, ok := d.idx.LookupOffset(uint64(offset)) var base plumbing.EncodedObject - if h != plumbing.ZeroHash { - base = d.cache.Get(h) + if ok { + base = d.cache.Get(e.Hash) } if base == nil { @@ -385,22 +398,13 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i return crc, err } -func (d *Decoder) setOffset(h plumbing.Hash, offset int64) { - d.offsetToHash[offset] = h - d.hashToOffset[h] = offset -} - -func (d *Decoder) setCRC(h plumbing.Hash, crc uint32) { - d.crcs[h] = crc -} - func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { if d.s.IsSeekable { return d.DecodeObjectAt(o) } - if h, ok := d.offsetToHash[o]; ok { - return d.recallByHashNonSeekable(h) + if e, ok := d.idx.LookupOffset(uint64(o)); ok { + return d.recallByHashNonSeekable(e.Hash) } return nil, plumbing.ErrObjectNotFound @@ -408,8 +412,8 @@ func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) { func (d *Decoder) recallByHash(h plumbing.Hash) (plumbing.EncodedObject, error) { if d.s.IsSeekable { - if o, ok := d.hashToOffset[h]; ok { - return d.DecodeObjectAt(o) + if e, ok := d.idx.LookupHash(h); ok { + return d.DecodeObjectAt(int64(e.Offset)) } } @@ -432,22 +436,20 @@ func (d *Decoder) recallByHashNonSeekable(h plumbing.Hash) (obj plumbing.Encoded return nil, plumbing.ErrObjectNotFound } -// SetOffsets sets the offsets, required when using the method DecodeObjectAt, -// without decoding the full packfile -func (d *Decoder) SetOffsets(offsets map[plumbing.Hash]int64) { - d.hashToOffset = offsets -} - -// Offsets returns the objects read offset, Decode method should be called -// before to calculate the Offsets -func (d *Decoder) Offsets() map[plumbing.Hash]int64 { - return d.hashToOffset +// SetIndex sets an index for the packfile. It is recommended to set this. +// The index might be read from a file or reused from a previous Decoder usage +// (see Index function). +func (d *Decoder) SetIndex(idx *Index) { + d.hasBuiltIndex = true + d.idx = idx } -// CRCs returns the CRC-32 for each read object. Decode method should be called -// before to calculate the CRCs -func (d *Decoder) CRCs() map[plumbing.Hash]uint32 { - return d.crcs +// Index returns the index for the packfile. If index was set with SetIndex, +// Index will return it. Otherwise, it will return an index that is built while +// decoding. If neither SetIndex was called with a full index or Decode called +// for the whole packfile, then the returned index will be incomplete. +func (d *Decoder) Index() *Index { + return d.idx } // Close closes the Scanner. usually this mean that the whole reader is read and |