From c932bd462fed2ff470d9ff40f15102237a3287f8 Mon Sep 17 00:00:00 2001 From: Antonio Jesus Navarro Perez Date: Mon, 18 Dec 2017 18:11:38 +0100 Subject: Improve delta reutilization - Remove wrong 'if' on delta selector that causes poor delta reutilizations - packfile.Encoder now can write deltas and objects in a non specific order - ObjectToPack now saves the Offset on the packfile to be able to obtain base offset in a recursive manner and write them before the delta itself - Added encoder test to check cyclic delta chains - Check the output packfile hash in all encoder tests Signed-off-by: Antonio Jesus Navarro Perez --- plumbing/format/packfile/delta_selector.go | 5 -- plumbing/format/packfile/encoder.go | 57 +++++++++++----- plumbing/format/packfile/encoder_advanced_test.go | 6 +- plumbing/format/packfile/encoder_test.go | 81 +++++++++++++++++++++-- plumbing/format/packfile/object_pack.go | 32 ++++++++- 5 files changed, 152 insertions(+), 29 deletions(-) (limited to 'plumbing/format') diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go index 51adcdf..8792574 100644 --- a/plumbing/format/packfile/delta_selector.go +++ b/plumbing/format/packfile/delta_selector.go @@ -174,11 +174,6 @@ func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*O return dw.undeltify(otp) } - if base.Size() <= otp.Size() { - // Bases should be bigger - return dw.undeltify(otp) - } - if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil { return err } diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go index 7ee6546..6686dd5 100644 --- a/plumbing/format/packfile/encoder.go +++ b/plumbing/format/packfile/encoder.go @@ -18,10 +18,7 @@ type Encoder struct { w *offsetWriter zw *zlib.Writer hasher plumbing.Hasher - // offsets is a map of object hashes to corresponding offsets in the packfile. - // It is used to determine offset of the base of a delta when a OFS_DELTA is - // used. - offsets map[plumbing.Hash]int64 + useRefDeltas bool } @@ -40,7 +37,6 @@ func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *E w: ow, zw: zw, hasher: h, - offsets: make(map[plumbing.Hash]int64), useRefDeltas: useRefDeltas, } } @@ -85,11 +81,34 @@ func (e *Encoder) head(numEntries int) error { } func (e *Encoder) entry(o *ObjectToPack) error { - offset := e.w.Offset() - e.offsets[o.Hash()] = offset + if o.WantWrite() { + // A cycle exists in this delta chain. This should only occur if a + // selected object representation disappeared during writing + // (for example due to a concurrent repack) and a different base + // was chosen, forcing a cycle. Select something other than a + // delta, and write this object. + o.BackToOriginal() + } + + if o.IsWritten() { + return nil + } + + o.MarkWantWrite() + + if err := e.writeBaseIfDelta(o); err != nil { + return err + } + + // We need to check if we already write that object due a cyclic delta chain + if o.IsWritten() { + return nil + } + + o.Offset = e.w.Offset() if o.IsDelta() { - if err := e.writeDeltaHeader(o, offset); err != nil { + if err := e.writeDeltaHeader(o); err != nil { return err } } else { @@ -112,7 +131,16 @@ func (e *Encoder) entry(o *ObjectToPack) error { return e.zw.Close() } -func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error { +func (e *Encoder) writeBaseIfDelta(o *ObjectToPack) error { + if o.IsDelta() && !o.Base.IsWritten() { + // We must write base first + return e.entry(o.Base) + } + + return nil +} + +func (e *Encoder) writeDeltaHeader(o *ObjectToPack) error { // Write offset deltas by default t := plumbing.OFSDeltaObject if e.useRefDeltas { @@ -126,7 +154,7 @@ func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error { if e.useRefDeltas { return e.writeRefDeltaHeader(o.Base.Hash()) } else { - return e.writeOfsDeltaHeader(offset, o.Base.Hash()) + return e.writeOfsDeltaHeader(o) } } @@ -134,15 +162,10 @@ func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error { return binary.Write(e.w, base) } -func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) error { - baseOffset, ok := e.offsets[base] - if !ok { - return fmt.Errorf("base for delta not found, base hash: %v", base) - } - +func (e *Encoder) writeOfsDeltaHeader(o *ObjectToPack) error { // for OFS_DELTA, offset of the base is interpreted as negative offset // relative to the type-byte of the header of the ofs-delta entry. - relativeOffset := deltaOffset - baseOffset + relativeOffset := o.Offset - o.Base.Offset if relativeOffset <= 0 { return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset) } diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index 69d6b39..1075875 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -68,16 +68,18 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, pac buf := bytes.NewBuffer(nil) enc := NewEncoder(buf, storage, false) - _, err = enc.Encode(hashes, packWindow) + encodeHash, err := enc.Encode(hashes, packWindow) c.Assert(err, IsNil) scanner := NewScanner(buf) storage = memory.NewStorage() d, err := NewDecoder(scanner, storage) c.Assert(err, IsNil) - _, err = d.Decode() + decodeHash, err := d.Decode() c.Assert(err, IsNil) + c.Assert(encodeHash, Equals, decodeHash) + objIter, err = storage.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) obtainedObjects := map[plumbing.Hash]bool{} diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index f40517d..320036b 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -106,6 +106,16 @@ func (s *EncoderSuite) TestDecodeEncodeWithDeltasDecodeOFS(c *C) { s.deltaOverDeltaTest(c) } +func (s *EncoderSuite) TestDecodeEncodeWithCycleREF(c *C) { + s.enc = NewEncoder(s.buf, s.store, true) + s.deltaOverDeltaCyclicTest(c) +} + +func (s *EncoderSuite) TestDecodeEncodeWithCycleOFS(c *C) { + s.enc = NewEncoder(s.buf, s.store, false) + s.deltaOverDeltaCyclicTest(c) +} + func (s *EncoderSuite) simpleDeltaTest(c *C) { srcObject := newObject(plumbing.BlobObject, []byte("0")) targetObject := newObject(plumbing.BlobObject, []byte("01")) @@ -114,7 +124,7 @@ func (s *EncoderSuite) simpleDeltaTest(c *C) { c.Assert(err, IsNil) srcToPack := newObjectToPack(srcObject) - _, err = s.enc.encode([]*ObjectToPack{ + encHash, err := s.enc.encode([]*ObjectToPack{ srcToPack, newDeltaObjectToPack(srcToPack, targetObject, deltaObject), }) @@ -126,9 +136,11 @@ func (s *EncoderSuite) simpleDeltaTest(c *C) { d, err := NewDecoder(scanner, storage) c.Assert(err, IsNil) - _, err = d.Decode() + decHash, err := d.Decode() c.Assert(err, IsNil) + c.Assert(encHash, Equals, decHash) + decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash()) c.Assert(err, IsNil) c.Assert(decSrc, DeepEquals, srcObject) @@ -153,7 +165,8 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) { srcToPack := newObjectToPack(srcObject) targetToPack := newObjectToPack(targetObject) - _, err = s.enc.encode([]*ObjectToPack{ + encHash, err := s.enc.encode([]*ObjectToPack{ + targetToPack, srcToPack, newDeltaObjectToPack(srcToPack, targetObject, deltaObject), newDeltaObjectToPack(targetToPack, otherTargetObject, otherDeltaObject), @@ -165,9 +178,11 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) { d, err := NewDecoder(scanner, storage) c.Assert(err, IsNil) - _, err = d.Decode() + decHash, err := d.Decode() c.Assert(err, IsNil) + c.Assert(encHash, Equals, decHash) + decSrc, err := storage.EncodedObject(srcObject.Type(), srcObject.Hash()) c.Assert(err, IsNil) c.Assert(decSrc, DeepEquals, srcObject) @@ -180,3 +195,61 @@ func (s *EncoderSuite) deltaOverDeltaTest(c *C) { c.Assert(err, IsNil) c.Assert(decOtherTarget, DeepEquals, otherTargetObject) } + +func (s *EncoderSuite) deltaOverDeltaCyclicTest(c *C) { + o1 := newObject(plumbing.BlobObject, []byte("0")) + o2 := newObject(plumbing.BlobObject, []byte("01")) + o3 := newObject(plumbing.BlobObject, []byte("011111")) + o4 := newObject(plumbing.BlobObject, []byte("01111100000")) + + d2, err := GetDelta(o1, o2) + c.Assert(err, IsNil) + + d3, err := GetDelta(o4, o3) + c.Assert(err, IsNil) + + d4, err := GetDelta(o3, o4) + c.Assert(err, IsNil) + + po1 := newObjectToPack(o1) + pd2 := newDeltaObjectToPack(po1, o2, d2) + pd3 := newObjectToPack(o3) + pd4 := newObjectToPack(o4) + + pd3.SetDelta(pd4, d3) + pd4.SetDelta(pd3, d4) + + encHash, err := s.enc.encode([]*ObjectToPack{ + po1, + pd2, + pd3, + pd4, + }) + c.Assert(err, IsNil) + + scanner := NewScanner(s.buf) + storage := memory.NewStorage() + d, err := NewDecoder(scanner, storage) + c.Assert(err, IsNil) + + decHash, err := d.Decode() + c.Assert(err, IsNil) + + c.Assert(encHash, Equals, decHash) + + decSrc, err := storage.EncodedObject(o1.Type(), o1.Hash()) + c.Assert(err, IsNil) + c.Assert(decSrc, DeepEquals, o1) + + decTarget, err := storage.EncodedObject(o2.Type(), o2.Hash()) + c.Assert(err, IsNil) + c.Assert(decTarget, DeepEquals, o2) + + decOtherTarget, err := storage.EncodedObject(o3.Type(), o3.Hash()) + c.Assert(err, IsNil) + c.Assert(decOtherTarget, DeepEquals, o3) + + decAnotherTarget, err := storage.EncodedObject(o4.Type(), o4.Hash()) + c.Assert(err, IsNil) + c.Assert(decAnotherTarget, DeepEquals, o4) +} diff --git a/plumbing/format/packfile/object_pack.go b/plumbing/format/packfile/object_pack.go index e22e783..1563517 100644 --- a/plumbing/format/packfile/object_pack.go +++ b/plumbing/format/packfile/object_pack.go @@ -13,12 +13,16 @@ type ObjectToPack struct { // If the main object is not a delta, Base will be null Base *ObjectToPack // Original is the object that we can generate applying the delta to - // Base, or the same object as EncodedObject in the case of a non-delta + // Base, or the same object as Object in the case of a non-delta // object. Original plumbing.EncodedObject // Depth is the amount of deltas needed to resolve to obtain Original // (delta based on delta based on ...) Depth int + + // offset in pack when object has been already written, or 0 if it + // has not been written yet + Offset int64 } // newObjectToPack creates a correct ObjectToPack based on a non-delta object @@ -41,6 +45,32 @@ func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedOb } } +// BackToOriginal converts that ObjectToPack to a non-deltified object if it was one +func (o *ObjectToPack) BackToOriginal() { + if o.IsDelta() { + o.Object = o.Original + o.Base = nil + o.Depth = 0 + } +} + +// IsWritten returns if that ObjectToPack was +// already written into the packfile or not +func (o *ObjectToPack) IsWritten() bool { + return o.Offset > 1 +} + +// MarkWantWrite marks this ObjectToPack as WantWrite +// to avoid delta chain loops +func (o *ObjectToPack) MarkWantWrite() { + o.Offset = 1 +} + +// WantWrite checks if this ObjectToPack was marked as WantWrite before +func (o *ObjectToPack) WantWrite() bool { + return o.Offset == 1 +} + func (o *ObjectToPack) Type() plumbing.ObjectType { if o.Original != nil { return o.Original.Type() -- cgit