aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntonio Navarro Perez <antnavper@gmail.com>2016-12-14 10:20:00 +0100
committerMáximo Cuadros <mcuadros@gmail.com>2016-12-14 10:20:00 +0100
commit500b1e1e183c73e3087710fca2f96acfd2e2d5cb (patch)
treeb2777dedd22f7279f2df7da8eb3b433d560c5701
parent40875ee0df345468f36cb00d54820d622b37cbc5 (diff)
downloadgo-git-500b1e1e183c73e3087710fca2f96acfd2e2d5cb.tar.gz
format/packfile: implement delta encoding (#172)
* format/packfile: implement delta encoding - Added all the logic to the encoder to be able to encode ref-delta and offset-delta objects - Created plumbing.ObjectToPack to handle deltas and standard objects when we are writting them into a packfile - Added specific encoder delta tests, one standard object and one delta, and one standard object and two deltas * Requested changes. * Requested changes
-rw-r--r--plumbing/format/packfile/diff_delta.go40
-rw-r--r--plumbing/format/packfile/encoder.go92
-rw-r--r--plumbing/format/packfile/encoder_test.go128
-rw-r--r--plumbing/format/packfile/object_pack.go47
-rw-r--r--plumbing/format/packfile/object_pack_test.go40
-rw-r--r--utils/binary/write.go14
-rw-r--r--utils/binary/writer_test.go16
7 files changed, 352 insertions, 25 deletions
diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go
index eaed377..7a32d5d 100644
--- a/plumbing/format/packfile/diff_delta.go
+++ b/plumbing/format/packfile/diff_delta.go
@@ -1,6 +1,7 @@
package packfile
import (
+ "fmt"
"io/ioutil"
"gopkg.in/src-d/go-git.v4/plumbing"
@@ -14,28 +15,53 @@ const (
maxCopyLen = 0xffff
)
-// GetDelta returns the way of how to transform base object to target object
-func GetDelta(base, target plumbing.Object) ([]byte, error) {
- baseReader, err := base.Reader()
+// GetOFSDelta returns an offset delta that knows the way of how to transform
+// base object to target object
+func GetOFSDelta(base, target plumbing.Object) (plumbing.Object, error) {
+ return getDelta(base, target, plumbing.OFSDeltaObject)
+}
+
+// GetRefDelta returns a reference delta that knows the way of how to transform
+// base object to target object
+func GetRefDelta(base, target plumbing.Object) (plumbing.Object, error) {
+ return getDelta(base, target, plumbing.REFDeltaObject)
+}
+
+func getDelta(base, target plumbing.Object, t plumbing.ObjectType) (plumbing.Object, error) {
+ if t != plumbing.OFSDeltaObject && t != plumbing.REFDeltaObject {
+ return nil, fmt.Errorf("Type not supported: %v", t)
+ }
+
+ br, err := base.Reader()
if err != nil {
return nil, err
}
- targetReader, err := target.Reader()
+ tr, err := target.Reader()
if err != nil {
return nil, err
}
- baseBuf, err := ioutil.ReadAll(baseReader)
+ bb, err := ioutil.ReadAll(br)
if err != nil {
return nil, err
}
- targetBuf, err := ioutil.ReadAll(targetReader)
+ tb, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
- return DiffDelta(baseBuf, targetBuf), nil
+ db := DiffDelta(bb, tb)
+ delta := &plumbing.MemoryObject{}
+ _, err = delta.Write(db)
+ if err != nil {
+ return nil, err
+ }
+
+ delta.SetSize(int64(len(db)))
+ delta.SetType(t)
+
+ return delta, nil
}
// DiffDelta returns the way of how to transform baseBuf to targetBuf
diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go
index 1404dbe..eb1c532 100644
--- a/plumbing/format/packfile/encoder.go
+++ b/plumbing/format/packfile/encoder.go
@@ -15,9 +15,10 @@ import (
// format
type Encoder struct {
storage storer.ObjectStorer
- w io.Writer
+ w *offsetWriter
zw *zlib.Writer
hasher plumbing.Hasher
+ offsets map[plumbing.Hash]int64
}
// NewEncoder creates a new packfile encoder using a specific Writer and
@@ -27,28 +28,38 @@ func NewEncoder(w io.Writer, s storer.ObjectStorer) *Encoder {
Hash: sha1.New(),
}
mw := io.MultiWriter(w, h)
+ ow := newOffsetWriter(mw)
zw := zlib.NewWriter(mw)
return &Encoder{
storage: s,
- w: mw,
+ w: ow,
zw: zw,
hasher: h,
+ offsets: make(map[plumbing.Hash]int64),
}
}
// Encode creates a packfile containing all the objects referenced in hashes
// and writes it to the writer in the Encoder.
func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) {
- if err := e.head(len(hashes)); err != nil {
- return plumbing.ZeroHash, err
- }
-
+ var objects []*ObjectToPack
for _, h := range hashes {
o, err := e.storage.Object(plumbing.AnyObject, h)
if err != nil {
return plumbing.ZeroHash, err
}
+ // TODO delta selection logic
+ objects = append(objects, newObjectToPack(o))
+ }
+
+ return e.encode(objects)
+}
+func (e *Encoder) encode(objects []*ObjectToPack) (plumbing.Hash, error) {
+ if err := e.head(len(objects)); err != nil {
+ return plumbing.ZeroHash, err
+ }
+ for _, o := range objects {
if err := e.entry(o); err != nil {
return plumbing.ZeroHash, err
}
@@ -56,7 +67,6 @@ func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) {
return e.footer()
}
-
func (e *Encoder) head(numEntries int) error {
return binary.Write(
e.w,
@@ -66,19 +76,22 @@ func (e *Encoder) head(numEntries int) error {
)
}
-func (e *Encoder) entry(o plumbing.Object) error {
- t := o.Type()
- if t == plumbing.OFSDeltaObject || t == plumbing.REFDeltaObject {
- // TODO implements delta objects
- return fmt.Errorf("delta object not supported: %v", t)
+func (e *Encoder) entry(o *ObjectToPack) error {
+ offset := e.w.Offset()
+
+ if err := e.entryHead(o.Object.Type(), o.Object.Size()); err != nil {
+ return err
}
- if err := e.entryHead(t, o.Size()); err != nil {
+ // Save the position using the original hash, maybe a delta will need it
+ e.offsets[o.Original.Hash()] = offset
+
+ if err := e.writeDeltaHeaderIfAny(o, offset); err != nil {
return err
}
e.zw.Reset(e.w)
- or, err := o.Reader()
+ or, err := o.Object.Reader()
if err != nil {
return err
}
@@ -90,6 +103,38 @@ func (e *Encoder) entry(o plumbing.Object) error {
return e.zw.Close()
}
+func (e *Encoder) writeDeltaHeaderIfAny(o *ObjectToPack, offset int64) error {
+ if o.IsDelta() {
+ switch o.Object.Type() {
+ case plumbing.OFSDeltaObject:
+ if err := e.writeOfsDeltaHeader(offset, o.Base.Original.Hash()); err != nil {
+ return err
+ }
+ case plumbing.REFDeltaObject:
+ if err := e.writeRefDeltaHeader(o.Base.Original.Hash()); err != nil {
+ return err
+ }
+ }
+ }
+
+ return nil
+}
+
+func (e *Encoder) writeRefDeltaHeader(source plumbing.Hash) error {
+ return binary.Write(e.w, source)
+}
+
+func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, source plumbing.Hash) error {
+ // because it is an offset delta, we need the source
+ // object position
+ offset, ok := e.offsets[source]
+ if !ok {
+ return fmt.Errorf("delta source not found. Hash: %v", source)
+ }
+
+ return binary.WriteVariableWidthInt(e.w, deltaOffset-offset)
+}
+
func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error {
t := int64(typeNum)
header := []byte{}
@@ -114,3 +159,22 @@ func (e *Encoder) footer() (plumbing.Hash, error) {
h := e.hasher.Sum()
return h, binary.Write(e.w, h)
}
+
+type offsetWriter struct {
+ w io.Writer
+ offset int64
+}
+
+func newOffsetWriter(w io.Writer) *offsetWriter {
+ return &offsetWriter{w: w}
+}
+
+func (ow *offsetWriter) Write(p []byte) (n int, err error) {
+ n, err = ow.w.Write(p)
+ ow.offset += int64(n)
+ return n, err
+}
+
+func (ow *offsetWriter) Offset() int64 {
+ return ow.offset
+}
diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go
index 729843d..b07e2f4 100644
--- a/plumbing/format/packfile/encoder_test.go
+++ b/plumbing/format/packfile/encoder_test.go
@@ -79,6 +79,13 @@ func (s *EncoderSuite) TestMaxObjectSize(c *C) {
c.Assert(hash.IsZero(), Not(Equals), true)
}
+func (s *EncoderSuite) TestHashNotFound(c *C) {
+ h, err := s.enc.Encode([]plumbing.Hash{plumbing.NewHash("BAD")})
+ c.Assert(h, Equals, plumbing.ZeroHash)
+ c.Assert(err, NotNil)
+ c.Assert(err, Equals, plumbing.ErrObjectNotFound)
+}
+
func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
scanner := NewScanner(f.Packfile())
@@ -91,14 +98,16 @@ func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
c.Assert(err, IsNil)
c.Assert(ch, Equals, f.PackfileHash)
- commitIter, err := d.o.IterObjects(plumbing.AnyObject)
+ objIter, err := d.o.IterObjects(plumbing.AnyObject)
c.Assert(err, IsNil)
objects := []plumbing.Object{}
hashes := []plumbing.Hash{}
- err = commitIter.ForEach(func(o plumbing.Object) error {
+ err = objIter.ForEach(func(o plumbing.Object) error {
objects = append(objects, o)
hash, err := s.store.SetObject(o)
+ c.Assert(err, IsNil)
+
hashes = append(hashes, hash)
return err
@@ -115,10 +124,10 @@ func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
_, err = d.Decode()
c.Assert(err, IsNil)
- commitIter, err = d.o.IterObjects(plumbing.AnyObject)
+ objIter, err = d.o.IterObjects(plumbing.AnyObject)
c.Assert(err, IsNil)
obtainedObjects := []plumbing.Object{}
- err = commitIter.ForEach(func(o plumbing.Object) error {
+ err = objIter.ForEach(func(o plumbing.Object) error {
obtainedObjects = append(obtainedObjects, o)
return nil
@@ -138,3 +147,114 @@ func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
c.Assert(len(obtainedObjects), Equals, equals)
})
}
+
+func (s *EncoderSuite) TestDecodeEncodeWithDeltaDecodeREF(c *C) {
+ s.simpleDeltaTest(c, plumbing.REFDeltaObject)
+}
+
+func (s *EncoderSuite) TestDecodeEncodeWithDeltaDecodeOFS(c *C) {
+ s.simpleDeltaTest(c, plumbing.OFSDeltaObject)
+}
+
+func (s *EncoderSuite) TestDecodeEncodeWithDeltasDecodeREF(c *C) {
+ s.deltaOverDeltaTest(c, plumbing.REFDeltaObject)
+}
+
+func (s *EncoderSuite) TestDecodeEncodeWithDeltasDecodeOFS(c *C) {
+ s.deltaOverDeltaTest(c, plumbing.OFSDeltaObject)
+}
+
+func (s *EncoderSuite) simpleDeltaTest(c *C, t plumbing.ObjectType) {
+ srcObject := newObject(plumbing.BlobObject, []byte("0"))
+ targetObject := newObject(plumbing.BlobObject, []byte("01"))
+
+ deltaObject, err := delta(srcObject, targetObject, t)
+ c.Assert(err, IsNil)
+
+ srcToPack := newObjectToPack(srcObject)
+ _, err = s.enc.encode([]*ObjectToPack{
+ srcToPack,
+ newDeltaObjectToPack(srcToPack, targetObject, deltaObject),
+ })
+ c.Assert(err, IsNil)
+
+ scanner := NewScanner(s.buf)
+
+ storage := memory.NewStorage()
+ d, err := NewDecoder(scanner, storage)
+ c.Assert(err, IsNil)
+
+ _, err = d.Decode()
+ c.Assert(err, IsNil)
+
+ decSrc, err := storage.Object(srcObject.Type(), srcObject.Hash())
+ c.Assert(err, IsNil)
+ c.Assert(decSrc, DeepEquals, srcObject)
+
+ decTarget, err := storage.Object(targetObject.Type(), targetObject.Hash())
+ c.Assert(err, IsNil)
+ c.Assert(decTarget, DeepEquals, targetObject)
+}
+
+func (s *EncoderSuite) deltaOverDeltaTest(c *C, t plumbing.ObjectType) {
+ srcObject := newObject(plumbing.BlobObject, []byte("0"))
+ targetObject := newObject(plumbing.BlobObject, []byte("01"))
+ otherTargetObject := newObject(plumbing.BlobObject, []byte("011111"))
+
+ deltaObject, err := delta(srcObject, targetObject, t)
+ c.Assert(err, IsNil)
+ c.Assert(deltaObject.Hash(), Not(Equals), plumbing.ZeroHash)
+
+ otherDeltaObject, err := delta(targetObject, otherTargetObject, t)
+ c.Assert(err, IsNil)
+ c.Assert(otherDeltaObject.Hash(), Not(Equals), plumbing.ZeroHash)
+
+ srcToPack := newObjectToPack(srcObject)
+ targetToPack := newObjectToPack(targetObject)
+ _, err = s.enc.encode([]*ObjectToPack{
+ srcToPack,
+ newDeltaObjectToPack(srcToPack, targetObject, deltaObject),
+ newDeltaObjectToPack(targetToPack, otherTargetObject, otherDeltaObject),
+ })
+ c.Assert(err, IsNil)
+
+ scanner := NewScanner(s.buf)
+ storage := memory.NewStorage()
+ d, err := NewDecoder(scanner, storage)
+ c.Assert(err, IsNil)
+
+ _, err = d.Decode()
+ c.Assert(err, IsNil)
+
+ decSrc, err := storage.Object(srcObject.Type(), srcObject.Hash())
+ c.Assert(err, IsNil)
+ c.Assert(decSrc, DeepEquals, srcObject)
+
+ decTarget, err := storage.Object(targetObject.Type(), targetObject.Hash())
+ c.Assert(err, IsNil)
+ c.Assert(decTarget, DeepEquals, targetObject)
+
+ decOtherTarget, err := storage.Object(otherTargetObject.Type(), otherTargetObject.Hash())
+ c.Assert(err, IsNil)
+ c.Assert(decOtherTarget, DeepEquals, otherTargetObject)
+}
+
+func delta(base, target plumbing.Object, t plumbing.ObjectType) (plumbing.Object, error) {
+ switch t {
+ case plumbing.OFSDeltaObject:
+ return GetOFSDelta(base, target)
+ case plumbing.REFDeltaObject:
+ return GetRefDelta(base, target)
+ default:
+ panic("delta type not found")
+ }
+}
+
+func newObject(t plumbing.ObjectType, cont []byte) plumbing.Object {
+ o := plumbing.MemoryObject{}
+ o.SetType(t)
+ o.SetSize(int64(len(cont)))
+ o.Write(cont)
+
+ return &o
+}
diff --git a/plumbing/format/packfile/object_pack.go b/plumbing/format/packfile/object_pack.go
new file mode 100644
index 0000000..dc5a3c7
--- /dev/null
+++ b/plumbing/format/packfile/object_pack.go
@@ -0,0 +1,47 @@
+package packfile
+
+import "gopkg.in/src-d/go-git.v4/plumbing"
+
+// ObjectToPack is a representation of an object that is going to be into a
+// pack file.
+type ObjectToPack struct {
+ // The main object to pack, it could be any object, including deltas
+ Object plumbing.Object
+ // Base is the object that a delta is based on (it could be also another delta).
+ // If the main object is not a delta, Base will be null
+ Base *ObjectToPack
+ // Original is the object that we can generate applying the delta to
+ // Base, or the same object as Object in the case of a non-delta object.
+ Original plumbing.Object
+ // Depth is the amount of deltas needed to resolve to obtain Original
+ // (delta based on delta based on ...)
+ Depth int
+}
+
+// newObjectToPack creates a correct ObjectToPack based on a non-delta object
+func newObjectToPack(o plumbing.Object) *ObjectToPack {
+ return &ObjectToPack{
+ Object: o,
+ Original: o,
+ }
+}
+
+// newDeltaObjectToPack creates a correct ObjectToPack for a delta object, based on
+// his base (could be another delta), the delta target (in this case called original),
+// and the delta Object itself
+func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.Object) *ObjectToPack {
+ return &ObjectToPack{
+ Object: delta,
+ Base: base,
+ Original: original,
+ Depth: base.Depth + 1,
+ }
+}
+
+func (o *ObjectToPack) IsDelta() bool {
+ if o.Base != nil {
+ return true
+ }
+
+ return false
+}
diff --git a/plumbing/format/packfile/object_pack_test.go b/plumbing/format/packfile/object_pack_test.go
new file mode 100644
index 0000000..ddc7ab5
--- /dev/null
+++ b/plumbing/format/packfile/object_pack_test.go
@@ -0,0 +1,40 @@
+package packfile
+
+import (
+ "io"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+
+ . "gopkg.in/check.v1"
+)
+
+type ObjectToPackSuite struct{}
+
+var _ = Suite(&ObjectToPackSuite{})
+
+func (s *ObjectToPackSuite) TestObjectToPack(c *C) {
+ obj := &dummyObject{}
+ otp := newObjectToPack(obj)
+ c.Assert(obj, Equals, otp.Object)
+ c.Assert(obj, Equals, otp.Original)
+ c.Assert(otp.Base, IsNil)
+ c.Assert(otp.IsDelta(), Equals, false)
+
+ original := &dummyObject{}
+ delta := &dummyObject{}
+ deltaToPack := newDeltaObjectToPack(otp, original, delta)
+ c.Assert(obj, Equals, deltaToPack.Object)
+ c.Assert(original, Equals, deltaToPack.Original)
+ c.Assert(otp, Equals, deltaToPack.Base)
+ c.Assert(deltaToPack.IsDelta(), Equals, true)
+}
+
+type dummyObject struct{}
+
+func (*dummyObject) Hash() plumbing.Hash { return plumbing.ZeroHash }
+func (*dummyObject) Type() plumbing.ObjectType { return plumbing.InvalidObject }
+func (*dummyObject) SetType(plumbing.ObjectType) {}
+func (*dummyObject) Size() int64 { return 0 }
+func (*dummyObject) SetSize(s int64) {}
+func (*dummyObject) Reader() (io.ReadCloser, error) { return nil, nil }
+func (*dummyObject) Writer() (io.WriteCloser, error) { return nil, nil }
diff --git a/utils/binary/write.go b/utils/binary/write.go
index 3ea1d91..2ec3581 100644
--- a/utils/binary/write.go
+++ b/utils/binary/write.go
@@ -17,6 +17,20 @@ func Write(w io.Writer, data ...interface{}) error {
return nil
}
+func WriteVariableWidthInt(w io.Writer, n int64) error {
+ buf := []byte{byte(n & 0x7f)}
+ n >>= 7
+ for n != 0 {
+ n--
+ buf = append([]byte{0x80 | (byte(n & 0x7f))}, buf...)
+ n >>= 7
+ }
+
+ _, err := w.Write(buf)
+
+ return err
+}
+
// WriteUint32 writes the binary representation of a uint32 into w, in BigEndian
// order
func WriteUint32(w io.Writer, value uint32) error {
diff --git a/utils/binary/writer_test.go b/utils/binary/writer_test.go
index 88140a1..1380280 100644
--- a/utils/binary/writer_test.go
+++ b/utils/binary/writer_test.go
@@ -41,3 +41,19 @@ func (s *BinarySuite) TestWriteUint16(c *C) {
c.Assert(err, IsNil)
c.Assert(buf, DeepEquals, expected)
}
+
+func (s *BinarySuite) TestWriteVariableWidthInt(c *C) {
+ buf := bytes.NewBuffer(nil)
+
+ err := WriteVariableWidthInt(buf, 366)
+ c.Assert(err, IsNil)
+ c.Assert(buf.Bytes(), DeepEquals, []byte{129, 110})
+}
+
+func (s *BinarySuite) TestWriteVariableWidthIntShort(c *C) {
+ buf := bytes.NewBuffer(nil)
+
+ err := WriteVariableWidthInt(buf, 19)
+ c.Assert(err, IsNil)
+ c.Assert(buf.Bytes(), DeepEquals, []byte{19})
+}