diff options
author | Antonio Navarro Perez <antnavper@gmail.com> | 2016-12-14 10:20:00 +0100 |
---|---|---|
committer | Máximo Cuadros <mcuadros@gmail.com> | 2016-12-14 10:20:00 +0100 |
commit | 500b1e1e183c73e3087710fca2f96acfd2e2d5cb (patch) | |
tree | b2777dedd22f7279f2df7da8eb3b433d560c5701 /plumbing/format | |
parent | 40875ee0df345468f36cb00d54820d622b37cbc5 (diff) | |
download | go-git-500b1e1e183c73e3087710fca2f96acfd2e2d5cb.tar.gz |
format/packfile: implement delta encoding (#172)
* format/packfile: implement delta encoding
- Added all the logic to the encoder to be able to encode ref-delta and offset-delta objects
- Created plumbing.ObjectToPack to handle deltas and standard objects when we are writting them into a packfile
- Added specific encoder delta tests, one standard object and one delta, and one standard object and two deltas
* Requested changes.
* Requested changes
Diffstat (limited to 'plumbing/format')
-rw-r--r-- | plumbing/format/packfile/diff_delta.go | 40 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder.go | 92 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder_test.go | 128 | ||||
-rw-r--r-- | plumbing/format/packfile/object_pack.go | 47 | ||||
-rw-r--r-- | plumbing/format/packfile/object_pack_test.go | 40 |
5 files changed, 322 insertions, 25 deletions
diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index eaed377..7a32d5d 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -1,6 +1,7 @@ package packfile import ( + "fmt" "io/ioutil" "gopkg.in/src-d/go-git.v4/plumbing" @@ -14,28 +15,53 @@ const ( maxCopyLen = 0xffff ) -// GetDelta returns the way of how to transform base object to target object -func GetDelta(base, target plumbing.Object) ([]byte, error) { - baseReader, err := base.Reader() +// GetOFSDelta returns an offset delta that knows the way of how to transform +// base object to target object +func GetOFSDelta(base, target plumbing.Object) (plumbing.Object, error) { + return getDelta(base, target, plumbing.OFSDeltaObject) +} + +// GetRefDelta returns a reference delta that knows the way of how to transform +// base object to target object +func GetRefDelta(base, target plumbing.Object) (plumbing.Object, error) { + return getDelta(base, target, plumbing.REFDeltaObject) +} + +func getDelta(base, target plumbing.Object, t plumbing.ObjectType) (plumbing.Object, error) { + if t != plumbing.OFSDeltaObject && t != plumbing.REFDeltaObject { + return nil, fmt.Errorf("Type not supported: %v", t) + } + + br, err := base.Reader() if err != nil { return nil, err } - targetReader, err := target.Reader() + tr, err := target.Reader() if err != nil { return nil, err } - baseBuf, err := ioutil.ReadAll(baseReader) + bb, err := ioutil.ReadAll(br) if err != nil { return nil, err } - targetBuf, err := ioutil.ReadAll(targetReader) + tb, err := ioutil.ReadAll(tr) if err != nil { return nil, err } - return DiffDelta(baseBuf, targetBuf), nil + db := DiffDelta(bb, tb) + delta := &plumbing.MemoryObject{} + _, err = delta.Write(db) + if err != nil { + return nil, err + } + + delta.SetSize(int64(len(db))) + delta.SetType(t) + + return delta, nil } // DiffDelta returns the way of how to transform baseBuf to targetBuf diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go index 1404dbe..eb1c532 100644 --- a/plumbing/format/packfile/encoder.go +++ b/plumbing/format/packfile/encoder.go @@ -15,9 +15,10 @@ import ( // format type Encoder struct { storage storer.ObjectStorer - w io.Writer + w *offsetWriter zw *zlib.Writer hasher plumbing.Hasher + offsets map[plumbing.Hash]int64 } // NewEncoder creates a new packfile encoder using a specific Writer and @@ -27,28 +28,38 @@ func NewEncoder(w io.Writer, s storer.ObjectStorer) *Encoder { Hash: sha1.New(), } mw := io.MultiWriter(w, h) + ow := newOffsetWriter(mw) zw := zlib.NewWriter(mw) return &Encoder{ storage: s, - w: mw, + w: ow, zw: zw, hasher: h, + offsets: make(map[plumbing.Hash]int64), } } // Encode creates a packfile containing all the objects referenced in hashes // and writes it to the writer in the Encoder. func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) { - if err := e.head(len(hashes)); err != nil { - return plumbing.ZeroHash, err - } - + var objects []*ObjectToPack for _, h := range hashes { o, err := e.storage.Object(plumbing.AnyObject, h) if err != nil { return plumbing.ZeroHash, err } + // TODO delta selection logic + objects = append(objects, newObjectToPack(o)) + } + + return e.encode(objects) +} +func (e *Encoder) encode(objects []*ObjectToPack) (plumbing.Hash, error) { + if err := e.head(len(objects)); err != nil { + return plumbing.ZeroHash, err + } + for _, o := range objects { if err := e.entry(o); err != nil { return plumbing.ZeroHash, err } @@ -56,7 +67,6 @@ func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) { return e.footer() } - func (e *Encoder) head(numEntries int) error { return binary.Write( e.w, @@ -66,19 +76,22 @@ func (e *Encoder) head(numEntries int) error { ) } -func (e *Encoder) entry(o plumbing.Object) error { - t := o.Type() - if t == plumbing.OFSDeltaObject || t == plumbing.REFDeltaObject { - // TODO implements delta objects - return fmt.Errorf("delta object not supported: %v", t) +func (e *Encoder) entry(o *ObjectToPack) error { + offset := e.w.Offset() + + if err := e.entryHead(o.Object.Type(), o.Object.Size()); err != nil { + return err } - if err := e.entryHead(t, o.Size()); err != nil { + // Save the position using the original hash, maybe a delta will need it + e.offsets[o.Original.Hash()] = offset + + if err := e.writeDeltaHeaderIfAny(o, offset); err != nil { return err } e.zw.Reset(e.w) - or, err := o.Reader() + or, err := o.Object.Reader() if err != nil { return err } @@ -90,6 +103,38 @@ func (e *Encoder) entry(o plumbing.Object) error { return e.zw.Close() } +func (e *Encoder) writeDeltaHeaderIfAny(o *ObjectToPack, offset int64) error { + if o.IsDelta() { + switch o.Object.Type() { + case plumbing.OFSDeltaObject: + if err := e.writeOfsDeltaHeader(offset, o.Base.Original.Hash()); err != nil { + return err + } + case plumbing.REFDeltaObject: + if err := e.writeRefDeltaHeader(o.Base.Original.Hash()); err != nil { + return err + } + } + } + + return nil +} + +func (e *Encoder) writeRefDeltaHeader(source plumbing.Hash) error { + return binary.Write(e.w, source) +} + +func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, source plumbing.Hash) error { + // because it is an offset delta, we need the source + // object position + offset, ok := e.offsets[source] + if !ok { + return fmt.Errorf("delta source not found. Hash: %v", source) + } + + return binary.WriteVariableWidthInt(e.w, deltaOffset-offset) +} + func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error { t := int64(typeNum) header := []byte{} @@ -114,3 +159,22 @@ func (e *Encoder) footer() (plumbing.Hash, error) { h := e.hasher.Sum() return h, binary.Write(e.w, h) } + +type offsetWriter struct { + w io.Writer + offset int64 +} + +func newOffsetWriter(w io.Writer) *offsetWriter { + return &offsetWriter{w: w} +} + +func (ow *offsetWriter) Write(p []byte) (n int, err error) { + n, err = ow.w.Write(p) + ow.offset += int64(n) + return n, err +} + +func (ow *offsetWriter) Offset() int64 { + return ow.offset +} diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index 729843d..b07e2f4 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -79,6 +79,13 @@ func (s *EncoderSuite) TestMaxObjectSize(c *C) { c.Assert(hash.IsZero(), Not(Equals), true) } +func (s *EncoderSuite) TestHashNotFound(c *C) { + h, err := s.enc.Encode([]plumbing.Hash{plumbing.NewHash("BAD")}) + c.Assert(h, Equals, plumbing.ZeroHash) + c.Assert(err, NotNil) + c.Assert(err, Equals, plumbing.ErrObjectNotFound) +} + func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) { fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { scanner := NewScanner(f.Packfile()) @@ -91,14 +98,16 @@ func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) { c.Assert(err, IsNil) c.Assert(ch, Equals, f.PackfileHash) - commitIter, err := d.o.IterObjects(plumbing.AnyObject) + objIter, err := d.o.IterObjects(plumbing.AnyObject) c.Assert(err, IsNil) objects := []plumbing.Object{} hashes := []plumbing.Hash{} - err = commitIter.ForEach(func(o plumbing.Object) error { + err = objIter.ForEach(func(o plumbing.Object) error { objects = append(objects, o) hash, err := s.store.SetObject(o) + c.Assert(err, IsNil) + hashes = append(hashes, hash) return err @@ -115,10 +124,10 @@ func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) { _, err = d.Decode() c.Assert(err, IsNil) - commitIter, err = d.o.IterObjects(plumbing.AnyObject) + objIter, err = d.o.IterObjects(plumbing.AnyObject) c.Assert(err, IsNil) obtainedObjects := []plumbing.Object{} - err = commitIter.ForEach(func(o plumbing.Object) error { + err = objIter.ForEach(func(o plumbing.Object) error { obtainedObjects = append(obtainedObjects, o) return nil @@ -138,3 +147,114 @@ func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) { c.Assert(len(obtainedObjects), Equals, equals) }) } + +func (s *EncoderSuite) TestDecodeEncodeWithDeltaDecodeREF(c *C) { + s.simpleDeltaTest(c, plumbing.REFDeltaObject) +} + +func (s *EncoderSuite) TestDecodeEncodeWithDeltaDecodeOFS(c *C) { + s.simpleDeltaTest(c, plumbing.OFSDeltaObject) +} + +func (s *EncoderSuite) TestDecodeEncodeWithDeltasDecodeREF(c *C) { + s.deltaOverDeltaTest(c, plumbing.REFDeltaObject) +} + +func (s *EncoderSuite) TestDecodeEncodeWithDeltasDecodeOFS(c *C) { + s.deltaOverDeltaTest(c, plumbing.OFSDeltaObject) +} + +func (s *EncoderSuite) simpleDeltaTest(c *C, t plumbing.ObjectType) { + srcObject := newObject(plumbing.BlobObject, []byte("0")) + targetObject := newObject(plumbing.BlobObject, []byte("01")) + + deltaObject, err := delta(srcObject, targetObject, t) + c.Assert(err, IsNil) + + srcToPack := newObjectToPack(srcObject) + _, err = s.enc.encode([]*ObjectToPack{ + srcToPack, + newDeltaObjectToPack(srcToPack, targetObject, deltaObject), + }) + c.Assert(err, IsNil) + + scanner := NewScanner(s.buf) + + storage := memory.NewStorage() + d, err := NewDecoder(scanner, storage) + c.Assert(err, IsNil) + + _, err = d.Decode() + c.Assert(err, IsNil) + + decSrc, err := storage.Object(srcObject.Type(), srcObject.Hash()) + c.Assert(err, IsNil) + c.Assert(decSrc, DeepEquals, srcObject) + + decTarget, err := storage.Object(targetObject.Type(), targetObject.Hash()) + c.Assert(err, IsNil) + c.Assert(decTarget, DeepEquals, targetObject) +} + +func (s *EncoderSuite) deltaOverDeltaTest(c *C, t plumbing.ObjectType) { + srcObject := newObject(plumbing.BlobObject, []byte("0")) + targetObject := newObject(plumbing.BlobObject, []byte("01")) + otherTargetObject := newObject(plumbing.BlobObject, []byte("011111")) + + deltaObject, err := delta(srcObject, targetObject, t) + c.Assert(err, IsNil) + c.Assert(deltaObject.Hash(), Not(Equals), plumbing.ZeroHash) + + otherDeltaObject, err := delta(targetObject, otherTargetObject, t) + c.Assert(err, IsNil) + c.Assert(otherDeltaObject.Hash(), Not(Equals), plumbing.ZeroHash) + + srcToPack := newObjectToPack(srcObject) + targetToPack := newObjectToPack(targetObject) + _, err = s.enc.encode([]*ObjectToPack{ + srcToPack, + newDeltaObjectToPack(srcToPack, targetObject, deltaObject), + newDeltaObjectToPack(targetToPack, otherTargetObject, otherDeltaObject), + }) + c.Assert(err, IsNil) + + scanner := NewScanner(s.buf) + storage := memory.NewStorage() + d, err := NewDecoder(scanner, storage) + c.Assert(err, IsNil) + + _, err = d.Decode() + c.Assert(err, IsNil) + + decSrc, err := storage.Object(srcObject.Type(), srcObject.Hash()) + c.Assert(err, IsNil) + c.Assert(decSrc, DeepEquals, srcObject) + + decTarget, err := storage.Object(targetObject.Type(), targetObject.Hash()) + c.Assert(err, IsNil) + c.Assert(decTarget, DeepEquals, targetObject) + + decOtherTarget, err := storage.Object(otherTargetObject.Type(), otherTargetObject.Hash()) + c.Assert(err, IsNil) + c.Assert(decOtherTarget, DeepEquals, otherTargetObject) +} + +func delta(base, target plumbing.Object, t plumbing.ObjectType) (plumbing.Object, error) { + switch t { + case plumbing.OFSDeltaObject: + return GetOFSDelta(base, target) + case plumbing.REFDeltaObject: + return GetRefDelta(base, target) + default: + panic("delta type not found") + } +} + +func newObject(t plumbing.ObjectType, cont []byte) plumbing.Object { + o := plumbing.MemoryObject{} + o.SetType(t) + o.SetSize(int64(len(cont))) + o.Write(cont) + + return &o +} diff --git a/plumbing/format/packfile/object_pack.go b/plumbing/format/packfile/object_pack.go new file mode 100644 index 0000000..dc5a3c7 --- /dev/null +++ b/plumbing/format/packfile/object_pack.go @@ -0,0 +1,47 @@ +package packfile + +import "gopkg.in/src-d/go-git.v4/plumbing" + +// ObjectToPack is a representation of an object that is going to be into a +// pack file. +type ObjectToPack struct { + // The main object to pack, it could be any object, including deltas + Object plumbing.Object + // Base is the object that a delta is based on (it could be also another delta). + // If the main object is not a delta, Base will be null + Base *ObjectToPack + // Original is the object that we can generate applying the delta to + // Base, or the same object as Object in the case of a non-delta object. + Original plumbing.Object + // Depth is the amount of deltas needed to resolve to obtain Original + // (delta based on delta based on ...) + Depth int +} + +// newObjectToPack creates a correct ObjectToPack based on a non-delta object +func newObjectToPack(o plumbing.Object) *ObjectToPack { + return &ObjectToPack{ + Object: o, + Original: o, + } +} + +// newDeltaObjectToPack creates a correct ObjectToPack for a delta object, based on +// his base (could be another delta), the delta target (in this case called original), +// and the delta Object itself +func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.Object) *ObjectToPack { + return &ObjectToPack{ + Object: delta, + Base: base, + Original: original, + Depth: base.Depth + 1, + } +} + +func (o *ObjectToPack) IsDelta() bool { + if o.Base != nil { + return true + } + + return false +} diff --git a/plumbing/format/packfile/object_pack_test.go b/plumbing/format/packfile/object_pack_test.go new file mode 100644 index 0000000..ddc7ab5 --- /dev/null +++ b/plumbing/format/packfile/object_pack_test.go @@ -0,0 +1,40 @@ +package packfile + +import ( + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + + . "gopkg.in/check.v1" +) + +type ObjectToPackSuite struct{} + +var _ = Suite(&ObjectToPackSuite{}) + +func (s *ObjectToPackSuite) TestObjectToPack(c *C) { + obj := &dummyObject{} + otp := newObjectToPack(obj) + c.Assert(obj, Equals, otp.Object) + c.Assert(obj, Equals, otp.Original) + c.Assert(otp.Base, IsNil) + c.Assert(otp.IsDelta(), Equals, false) + + original := &dummyObject{} + delta := &dummyObject{} + deltaToPack := newDeltaObjectToPack(otp, original, delta) + c.Assert(obj, Equals, deltaToPack.Object) + c.Assert(original, Equals, deltaToPack.Original) + c.Assert(otp, Equals, deltaToPack.Base) + c.Assert(deltaToPack.IsDelta(), Equals, true) +} + +type dummyObject struct{} + +func (*dummyObject) Hash() plumbing.Hash { return plumbing.ZeroHash } +func (*dummyObject) Type() plumbing.ObjectType { return plumbing.InvalidObject } +func (*dummyObject) SetType(plumbing.ObjectType) {} +func (*dummyObject) Size() int64 { return 0 } +func (*dummyObject) SetSize(s int64) {} +func (*dummyObject) Reader() (io.ReadCloser, error) { return nil, nil } +func (*dummyObject) Writer() (io.WriteCloser, error) { return nil, nil } |