aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2017-07-27 18:04:00 +0200
committerGitHub <noreply@github.com>2017-07-27 18:04:00 +0200
commit86f33ed017b55898758bf8900a085f355b2793d0 (patch)
tree1e741bed0672a7afb62cda37ca0b1d86fad52889 /plumbing/format
parent7b08a3005480a50f0f4290aff8f3702085d5e30d (diff)
parent16b24f84e9342234ad90da27a6532887b05d1965 (diff)
downloadgo-git-86f33ed017b55898758bf8900a085f355b2793d0.tar.gz
Merge pull request #515 from smola/reuse-packed-objects
storage: reuse deltas from packfiles
Diffstat (limited to 'plumbing/format')
-rw-r--r--plumbing/format/packfile/delta_selector.go145
-rw-r--r--plumbing/format/packfile/encoder.go29
-rw-r--r--plumbing/format/packfile/encoder_advanced_test.go91
-rw-r--r--plumbing/format/packfile/encoder_test.go62
-rw-r--r--plumbing/format/packfile/object_pack.go46
5 files changed, 289 insertions, 84 deletions
diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go
index 20c8cea..efcbd53 100644
--- a/plumbing/format/packfile/delta_selector.go
+++ b/plumbing/format/packfile/delta_selector.go
@@ -47,17 +47,123 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) {
var objectsToPack []*ObjectToPack
for _, h := range hashes {
- o, err := dw.storer.EncodedObject(plumbing.AnyObject, h)
+ o, err := dw.encodedDeltaObject(h)
if err != nil {
return nil, err
}
- objectsToPack = append(objectsToPack, newObjectToPack(o))
+ otp := newObjectToPack(o)
+ if _, ok := o.(plumbing.DeltaObject); ok {
+ otp.Original = nil
+ }
+
+ objectsToPack = append(objectsToPack, otp)
+ }
+
+ if err := dw.fixAndBreakChains(objectsToPack); err != nil {
+ return nil, err
}
return objectsToPack, nil
}
+func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
+ edos, ok := dw.storer.(storer.DeltaObjectStorer)
+ if !ok {
+ return dw.encodedObject(h)
+ }
+
+ return edos.DeltaObject(plumbing.AnyObject, h)
+}
+
+func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
+ return dw.storer.EncodedObject(plumbing.AnyObject, h)
+}
+
+func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error {
+ m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack))
+ for _, otp := range objectsToPack {
+ m[otp.Hash()] = otp
+ }
+
+ for _, otp := range objectsToPack {
+ if err := dw.fixAndBreakChainsOne(m, otp); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error {
+ if !otp.Object.Type().IsDelta() {
+ return nil
+ }
+
+ // Initial ObjectToPack instances might have a delta assigned to Object
+ // but no actual base initially. Once Base is assigned to a delta, it means
+ // we already fixed it.
+ if otp.Base != nil {
+ return nil
+ }
+
+ do, ok := otp.Object.(plumbing.DeltaObject)
+ if !ok {
+ // if this is not a DeltaObject, then we cannot retrieve its base,
+ // so we have to break the delta chain here.
+ return dw.undeltify(otp)
+ }
+
+ base, ok := objectsToPack[do.BaseHash()]
+ if !ok {
+ // The base of the delta is not in our list of objects to pack, so
+ // we break the chain.
+ return dw.undeltify(otp)
+ }
+
+ if base.Size() <= otp.Size() {
+ // Bases should be bigger
+ return dw.undeltify(otp)
+ }
+
+ if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil {
+ return err
+ }
+
+ otp.SetDelta(base, otp.Object)
+ return nil
+}
+
+func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error {
+ if otp.Original != nil {
+ return nil
+ }
+
+ if !otp.Object.Type().IsDelta() {
+ return nil
+ }
+
+ obj, err := dw.encodedObject(otp.Hash())
+ if err != nil {
+ return err
+ }
+
+ otp.Original = obj
+ return nil
+}
+
+// undeltify undeltifies an *ObjectToPack by retrieving the original object from
+// the storer and resetting it.
+func (dw *deltaSelector) undeltify(otp *ObjectToPack) error {
+ if err := dw.restoreOriginal(otp); err != nil {
+ return err
+ }
+
+ otp.Object = otp.Original
+ otp.Depth = 0
+ return nil
+}
+
func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) {
sort.Sort(byTypeAndSize(objectsToPack))
}
@@ -66,15 +172,24 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
for i := 0; i < len(objectsToPack); i++ {
target := objectsToPack[i]
- // We only want to create deltas from specific types
- if !applyDelta[target.Original.Type()] {
+ // If we already have a delta, we don't try to find a new one for this
+ // object. This happens when a delta is set to be reused from an existing
+ // packfile.
+ if target.IsDelta() {
+ continue
+ }
+
+ // We only want to create deltas from specific types.
+ if !applyDelta[target.Type()] {
continue
}
for j := i - 1; j >= 0; j-- {
base := objectsToPack[j]
// Objects must use only the same type as their delta base.
- if base.Original.Type() != target.Original.Type() {
+ // Since objectsToPack is sorted by type and size, once we find
+ // a different type, we know we won't find more of them.
+ if base.Type() != target.Type() {
break
}
@@ -89,7 +204,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error {
// If the sizes are radically different, this is a bad pairing.
- if target.Original.Size() < base.Original.Size()>>4 {
+ if target.Size() < base.Size()>>4 {
return nil
}
@@ -106,10 +221,20 @@ func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error {
}
// If we have to insert a lot to make this work, find another.
- if base.Original.Size()-target.Object.Size() > msz {
+ if base.Size()-target.Size() > msz {
return nil
}
+ // Original object might not be present if we're reusing a delta, so we
+ // ensure it is restored.
+ if err := dw.restoreOriginal(target); err != nil {
+ return err
+ }
+
+ if err := dw.restoreOriginal(base); err != nil {
+ return err
+ }
+
// Now we can generate the delta using originals
delta, err := GetDelta(base.Original, target.Original)
if err != nil {
@@ -162,13 +287,13 @@ func (a byTypeAndSize) Len() int { return len(a) }
func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byTypeAndSize) Less(i, j int) bool {
- if a[i].Object.Type() < a[j].Object.Type() {
+ if a[i].Type() < a[j].Type() {
return false
}
- if a[i].Object.Type() > a[j].Object.Type() {
+ if a[i].Type() > a[j].Type() {
return true
}
- return a[i].Object.Size() > a[j].Object.Size()
+ return a[i].Size() > a[j].Size()
}
diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go
index ae83752..1426559 100644
--- a/plumbing/format/packfile/encoder.go
+++ b/plumbing/format/packfile/encoder.go
@@ -18,6 +18,9 @@ type Encoder struct {
w *offsetWriter
zw *zlib.Writer
hasher plumbing.Hasher
+ // offsets is a map of object hashes to corresponding offsets in the packfile.
+ // It is used to determine offset of the base of a delta when a OFS_DELTA is
+ // used.
offsets map[plumbing.Hash]int64
useRefDeltas bool
}
@@ -78,25 +81,24 @@ func (e *Encoder) head(numEntries int) error {
func (e *Encoder) entry(o *ObjectToPack) error {
offset := e.w.Offset()
+ e.offsets[o.Hash()] = offset
if o.IsDelta() {
if err := e.writeDeltaHeader(o, offset); err != nil {
return err
}
} else {
- if err := e.entryHead(o.Object.Type(), o.Object.Size()); err != nil {
+ if err := e.entryHead(o.Type(), o.Size()); err != nil {
return err
}
}
- // Save the position using the original hash, maybe a delta will need it
- e.offsets[o.Original.Hash()] = offset
-
e.zw.Reset(e.w)
or, err := o.Object.Reader()
if err != nil {
return err
}
+
_, err = io.Copy(e.zw, or)
if err != nil {
return err
@@ -117,9 +119,9 @@ func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error {
}
if e.useRefDeltas {
- return e.writeRefDeltaHeader(o.Base.Original.Hash())
+ return e.writeRefDeltaHeader(o.Base.Hash())
} else {
- return e.writeOfsDeltaHeader(offset, o.Base.Original.Hash())
+ return e.writeOfsDeltaHeader(offset, o.Base.Hash())
}
}
@@ -128,14 +130,19 @@ func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error {
}
func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) error {
- // because it is an offset delta, we need the base
- // object position
- offset, ok := e.offsets[base]
+ baseOffset, ok := e.offsets[base]
if !ok {
- return fmt.Errorf("delta base not found. Hash: %v", base)
+ return fmt.Errorf("base for delta not found, base hash: %v", base)
+ }
+
+ // for OFS_DELTA, offset of the base is interpreted as negative offset
+ // relative to the type-byte of the header of the ofs-delta entry.
+ relativeOffset := deltaOffset-baseOffset
+ if relativeOffset <= 0 {
+ return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
}
- return binary.WriteVariableWidthInt(e.w, deltaOffset-offset)
+ return binary.WriteVariableWidthInt(e.w, relativeOffset)
}
func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error {
diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go
new file mode 100644
index 0000000..d92e2c4
--- /dev/null
+++ b/plumbing/format/packfile/encoder_advanced_test.go
@@ -0,0 +1,91 @@
+package packfile_test
+
+import (
+ "bytes"
+ "math/rand"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
+ "gopkg.in/src-d/go-git.v4/plumbing/storer"
+ "gopkg.in/src-d/go-git.v4/storage/filesystem"
+ "gopkg.in/src-d/go-git.v4/storage/memory"
+
+ "github.com/src-d/go-git-fixtures"
+ . "gopkg.in/check.v1"
+)
+
+type EncoderAdvancedSuite struct {
+ fixtures.Suite
+}
+
+var _ = Suite(&EncoderAdvancedSuite{})
+
+func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) {
+ fixs := fixtures.Basic().ByTag("packfile").ByTag(".git")
+ fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git").
+ ByTag("packfile").ByTag(".git").One())
+ fixs.Test(c, func(f *fixtures.Fixture) {
+ storage, err := filesystem.NewStorage(f.DotGit())
+ c.Assert(err, IsNil)
+ s.testEncodeDecode(c, storage)
+ })
+
+}
+
+func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) {
+
+ objIter, err := storage.IterEncodedObjects(plumbing.AnyObject)
+ c.Assert(err, IsNil)
+
+ expectedObjects := map[plumbing.Hash]bool{}
+ var hashes []plumbing.Hash
+ err = objIter.ForEach(func(o plumbing.EncodedObject) error {
+ expectedObjects[o.Hash()] = true
+ hashes = append(hashes, o.Hash())
+ return err
+
+ })
+ c.Assert(err, IsNil)
+
+ // Shuffle hashes to avoid delta selector getting order right just because
+ // the initial order is correct.
+ auxHashes := make([]plumbing.Hash, len(hashes))
+ for i, j := range rand.Perm(len(hashes)) {
+ auxHashes[j] = hashes[i]
+ }
+ hashes = auxHashes
+
+ buf := bytes.NewBuffer(nil)
+ enc := NewEncoder(buf, storage, false)
+ _, err = enc.Encode(hashes)
+ c.Assert(err, IsNil)
+
+ scanner := NewScanner(buf)
+ storage = memory.NewStorage()
+ d, err := NewDecoder(scanner, storage)
+ c.Assert(err, IsNil)
+ _, err = d.Decode()
+ c.Assert(err, IsNil)
+
+ objIter, err = storage.IterEncodedObjects(plumbing.AnyObject)
+ c.Assert(err, IsNil)
+ obtainedObjects := map[plumbing.Hash]bool{}
+ err = objIter.ForEach(func(o plumbing.EncodedObject) error {
+ obtainedObjects[o.Hash()] = true
+ return nil
+ })
+ c.Assert(err, IsNil)
+ c.Assert(obtainedObjects, DeepEquals, expectedObjects)
+
+ for h := range obtainedObjects {
+ if !expectedObjects[h] {
+ c.Errorf("obtained unexpected object: %s", h)
+ }
+ }
+
+ for h := range expectedObjects {
+ if !obtainedObjects[h] {
+ c.Errorf("missing object: %s", h)
+ }
+ }
+}
diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go
index fa01ed0..b5b0c42 100644
--- a/plumbing/format/packfile/encoder_test.go
+++ b/plumbing/format/packfile/encoder_test.go
@@ -86,68 +86,6 @@ func (s *EncoderSuite) TestHashNotFound(c *C) {
c.Assert(err, Equals, plumbing.ErrObjectNotFound)
}
-func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
- fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
- scanner := NewScanner(f.Packfile())
- storage := memory.NewStorage()
-
- d, err := NewDecoder(scanner, storage)
- c.Assert(err, IsNil)
-
- ch, err := d.Decode()
- c.Assert(err, IsNil)
- c.Assert(ch, Equals, f.PackfileHash)
-
- objIter, err := d.o.IterEncodedObjects(plumbing.AnyObject)
- c.Assert(err, IsNil)
-
- objects := []plumbing.EncodedObject{}
- hashes := []plumbing.Hash{}
- err = objIter.ForEach(func(o plumbing.EncodedObject) error {
- objects = append(objects, o)
- hash, err := s.store.SetEncodedObject(o)
- c.Assert(err, IsNil)
-
- hashes = append(hashes, hash)
-
- return err
-
- })
- c.Assert(err, IsNil)
- _, err = s.enc.Encode(hashes)
- c.Assert(err, IsNil)
-
- scanner = NewScanner(s.buf)
- storage = memory.NewStorage()
- d, err = NewDecoder(scanner, storage)
- c.Assert(err, IsNil)
- _, err = d.Decode()
- c.Assert(err, IsNil)
-
- objIter, err = d.o.IterEncodedObjects(plumbing.AnyObject)
- c.Assert(err, IsNil)
- obtainedObjects := []plumbing.EncodedObject{}
- err = objIter.ForEach(func(o plumbing.EncodedObject) error {
- obtainedObjects = append(obtainedObjects, o)
-
- return nil
- })
- c.Assert(err, IsNil)
- c.Assert(len(obtainedObjects), Equals, len(objects))
-
- equals := 0
- for _, oo := range obtainedObjects {
- for _, o := range objects {
- if o.Hash() == oo.Hash() {
- equals++
- }
- }
- }
-
- c.Assert(len(obtainedObjects), Equals, equals)
- })
-}
-
func (s *EncoderSuite) TestDecodeEncodeWithDeltaDecodeREF(c *C) {
s.enc = NewEncoder(s.buf, s.store, true)
s.simpleDeltaTest(c)
diff --git a/plumbing/format/packfile/object_pack.go b/plumbing/format/packfile/object_pack.go
index a3e99c0..14337d1 100644
--- a/plumbing/format/packfile/object_pack.go
+++ b/plumbing/format/packfile/object_pack.go
@@ -1,6 +1,8 @@
package packfile
-import "gopkg.in/src-d/go-git.v4/plumbing"
+import (
+ "gopkg.in/src-d/go-git.v4/plumbing"
+)
// ObjectToPack is a representation of an object that is going to be into a
// pack file.
@@ -39,6 +41,48 @@ func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedOb
}
}
+func (o *ObjectToPack) Type() plumbing.ObjectType {
+ if o.Original != nil {
+ return o.Original.Type()
+ }
+
+ if o.Base != nil {
+ return o.Base.Type()
+ }
+
+ if o.Object != nil {
+ return o.Object.Type()
+ }
+
+ panic("cannot get type")
+}
+
+func (o *ObjectToPack) Hash() plumbing.Hash {
+ if o.Original != nil {
+ return o.Original.Hash()
+ }
+
+ do, ok := o.Object.(plumbing.DeltaObject)
+ if ok {
+ return do.ActualHash()
+ }
+
+ panic("cannot get hash")
+}
+
+func (o *ObjectToPack) Size() int64 {
+ if o.Original != nil {
+ return o.Original.Size()
+ }
+
+ do, ok := o.Object.(plumbing.DeltaObject)
+ if ok {
+ return do.ActualSize()
+ }
+
+ panic("cannot get ObjectToPack size")
+}
+
func (o *ObjectToPack) IsDelta() bool {
if o.Base != nil {
return true