aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/packfile
diff options
context:
space:
mode:
authorSantiago M. Mola <santi@mola.io>2017-07-25 15:00:01 +0200
committerSantiago M. Mola <santi@mola.io>2017-07-27 15:33:14 +0200
commit87413ced43b02a41359ce7a1a07ab41aec6ee313 (patch)
tree07975422ab63bfbb13aefc1a2d53d757c7342848 /plumbing/format/packfile
parent3834038893d5cacb49e5f2786ad955d26f666546 (diff)
downloadgo-git-87413ced43b02a41359ce7a1a07ab41aec6ee313.tar.gz
storage: reuse deltas from packfiles
* plumbing: add DeltaObject interface for EncodedObjects that are deltas and hold additional information about them, such as the hash of the base object. * plumbing/storer: add DeltaObjectStorer interface for object storers that can return DeltaObject. Note that calls to EncodedObject will never return instances of DeltaObject. That requires explicit calls to DeltaObject. * storage/filesystem: implement DeltaObjectStorer interface. * plumbing/packfile: packfile encoder now supports reusing deltas that are already computed (e.g. from an existing packfile) if the storage implements DeltaObjectStorer. Reusing deltas boosts performance of packfile generation (e.g. on push).
Diffstat (limited to 'plumbing/format/packfile')
-rw-r--r--plumbing/format/packfile/delta_selector.go149
-rw-r--r--plumbing/format/packfile/encoder.go29
-rw-r--r--plumbing/format/packfile/encoder_advanced_test.go91
-rw-r--r--plumbing/format/packfile/encoder_test.go76
-rw-r--r--plumbing/format/packfile/object_pack.go46
5 files changed, 293 insertions, 98 deletions
diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go
index 20c8cea..4bee6d3 100644
--- a/plumbing/format/packfile/delta_selector.go
+++ b/plumbing/format/packfile/delta_selector.go
@@ -47,17 +47,127 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) {
var objectsToPack []*ObjectToPack
for _, h := range hashes {
- o, err := dw.storer.EncodedObject(plumbing.AnyObject, h)
+ o, err := dw.encodedDeltaObject(h)
if err != nil {
return nil, err
}
- objectsToPack = append(objectsToPack, newObjectToPack(o))
+ otp := newObjectToPack(o)
+ if _, ok := o.(plumbing.DeltaObject); ok {
+ otp.Original = nil
+ }
+
+ objectsToPack = append(objectsToPack, otp)
+ }
+
+ if err := dw.fixAndBreakChains(objectsToPack); err != nil {
+ return nil, err
}
return objectsToPack, nil
}
+func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
+ edos, ok := dw.storer.(storer.DeltaObjectStorer)
+ if !ok {
+ return dw.encodedObject(h)
+ }
+
+ return edos.DeltaObject(plumbing.AnyObject, h)
+}
+
+func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
+ return dw.storer.EncodedObject(plumbing.AnyObject, h)
+}
+
+func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error {
+ m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack))
+ for _, otp := range objectsToPack {
+ m[otp.Hash()] = otp
+ }
+
+ for _, otp := range objectsToPack {
+ if err := dw.fixAndBreakChainsOne(m, otp); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error {
+ isDelta := otp.Object.Type() == plumbing.OFSDeltaObject ||
+ otp.Object.Type() == plumbing.REFDeltaObject
+ if !isDelta {
+ return nil
+ }
+
+ // Initial ObjectToPack instances might have a delta assigned to Object
+ // but no actual base initially. Once Base is assigned to a delta, it means
+ // we already fixed it.
+ if otp.Base != nil {
+ return nil
+ }
+
+ do, ok := otp.Object.(plumbing.DeltaObject)
+ if !ok {
+ // if this is not a DeltaObject, then we cannot retrieve its base,
+ // so we have to break the delta chain here.
+ return dw.undeltify(otp)
+ }
+
+ base, ok := objectsToPack[do.BaseHash()]
+ if !ok {
+ // The base of the delta is not in our list of objects to pack, so
+ // we break the chain.
+ return dw.undeltify(otp)
+ }
+
+ if base.Size() <= otp.Size() {
+ // Bases should be bigger
+ return dw.undeltify(otp)
+ }
+
+ if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil {
+ return err
+ }
+
+ otp.SetDelta(base, otp.Object)
+ return nil
+}
+
+func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error {
+ if otp.Original != nil {
+ return nil
+ }
+
+ isDelta := otp.Object.Type() == plumbing.OFSDeltaObject ||
+ otp.Object.Type() == plumbing.REFDeltaObject
+ if !isDelta {
+ return nil
+ }
+
+ obj, err := dw.encodedObject(otp.Hash())
+ if err != nil {
+ return err
+ }
+
+ otp.Original = obj
+ return nil
+}
+
+// undeltify undeltifies an *ObjectToPack by retrieving the original object from
+// the storer and resetting it.
+func (dw *deltaSelector) undeltify(otp *ObjectToPack) error {
+ if err := dw.restoreOriginal(otp); err != nil {
+ return err
+ }
+
+ otp.Object = otp.Original
+ otp.Depth = 0
+ return nil
+}
+
func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) {
sort.Sort(byTypeAndSize(objectsToPack))
}
@@ -66,15 +176,24 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
for i := 0; i < len(objectsToPack); i++ {
target := objectsToPack[i]
- // We only want to create deltas from specific types
- if !applyDelta[target.Original.Type()] {
+ // If we already have a delta, we don't try to find a new one for this
+ // object. This happens when a delta is set to be reused from an existing
+ // packfile.
+ if target.IsDelta() {
+ continue
+ }
+
+ // We only want to create deltas from specific types.
+ if !applyDelta[target.Type()] {
continue
}
for j := i - 1; j >= 0; j-- {
base := objectsToPack[j]
// Objects must use only the same type as their delta base.
- if base.Original.Type() != target.Original.Type() {
+ // Since objectsToPack is sorted by type and size, once we find
+ // a different type, we know we won't find more of them.
+ if base.Type() != target.Type() {
break
}
@@ -89,7 +208,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error {
// If the sizes are radically different, this is a bad pairing.
- if target.Original.Size() < base.Original.Size()>>4 {
+ if target.Size() < base.Size()>>4 {
return nil
}
@@ -106,10 +225,20 @@ func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error {
}
// If we have to insert a lot to make this work, find another.
- if base.Original.Size()-target.Object.Size() > msz {
+ if base.Size()-target.Size() > msz {
return nil
}
+ // Original object might not be present if we're reusing a delta, so we
+ // ensure it is restored.
+ if err := dw.restoreOriginal(target); err != nil {
+ return err
+ }
+
+ if err := dw.restoreOriginal(base); err != nil {
+ return err
+ }
+
// Now we can generate the delta using originals
delta, err := GetDelta(base.Original, target.Original)
if err != nil {
@@ -162,13 +291,13 @@ func (a byTypeAndSize) Len() int { return len(a) }
func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byTypeAndSize) Less(i, j int) bool {
- if a[i].Object.Type() < a[j].Object.Type() {
+ if a[i].Type() < a[j].Type() {
return false
}
- if a[i].Object.Type() > a[j].Object.Type() {
+ if a[i].Type() > a[j].Type() {
return true
}
- return a[i].Object.Size() > a[j].Object.Size()
+ return a[i].Size() > a[j].Size()
}
diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go
index ae83752..1426559 100644
--- a/plumbing/format/packfile/encoder.go
+++ b/plumbing/format/packfile/encoder.go
@@ -18,6 +18,9 @@ type Encoder struct {
w *offsetWriter
zw *zlib.Writer
hasher plumbing.Hasher
+ // offsets is a map of object hashes to corresponding offsets in the packfile.
+ // It is used to determine offset of the base of a delta when a OFS_DELTA is
+ // used.
offsets map[plumbing.Hash]int64
useRefDeltas bool
}
@@ -78,25 +81,24 @@ func (e *Encoder) head(numEntries int) error {
func (e *Encoder) entry(o *ObjectToPack) error {
offset := e.w.Offset()
+ e.offsets[o.Hash()] = offset
if o.IsDelta() {
if err := e.writeDeltaHeader(o, offset); err != nil {
return err
}
} else {
- if err := e.entryHead(o.Object.Type(), o.Object.Size()); err != nil {
+ if err := e.entryHead(o.Type(), o.Size()); err != nil {
return err
}
}
- // Save the position using the original hash, maybe a delta will need it
- e.offsets[o.Original.Hash()] = offset
-
e.zw.Reset(e.w)
or, err := o.Object.Reader()
if err != nil {
return err
}
+
_, err = io.Copy(e.zw, or)
if err != nil {
return err
@@ -117,9 +119,9 @@ func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error {
}
if e.useRefDeltas {
- return e.writeRefDeltaHeader(o.Base.Original.Hash())
+ return e.writeRefDeltaHeader(o.Base.Hash())
} else {
- return e.writeOfsDeltaHeader(offset, o.Base.Original.Hash())
+ return e.writeOfsDeltaHeader(offset, o.Base.Hash())
}
}
@@ -128,14 +130,19 @@ func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error {
}
func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) error {
- // because it is an offset delta, we need the base
- // object position
- offset, ok := e.offsets[base]
+ baseOffset, ok := e.offsets[base]
if !ok {
- return fmt.Errorf("delta base not found. Hash: %v", base)
+ return fmt.Errorf("base for delta not found, base hash: %v", base)
+ }
+
+ // for OFS_DELTA, offset of the base is interpreted as negative offset
+ // relative to the type-byte of the header of the ofs-delta entry.
+ relativeOffset := deltaOffset-baseOffset
+ if relativeOffset <= 0 {
+ return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
}
- return binary.WriteVariableWidthInt(e.w, deltaOffset-offset)
+ return binary.WriteVariableWidthInt(e.w, relativeOffset)
}
func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error {
diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go
new file mode 100644
index 0000000..d92e2c4
--- /dev/null
+++ b/plumbing/format/packfile/encoder_advanced_test.go
@@ -0,0 +1,91 @@
+package packfile_test
+
+import (
+ "bytes"
+ "math/rand"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
+ "gopkg.in/src-d/go-git.v4/plumbing/storer"
+ "gopkg.in/src-d/go-git.v4/storage/filesystem"
+ "gopkg.in/src-d/go-git.v4/storage/memory"
+
+ "github.com/src-d/go-git-fixtures"
+ . "gopkg.in/check.v1"
+)
+
+type EncoderAdvancedSuite struct {
+ fixtures.Suite
+}
+
+var _ = Suite(&EncoderAdvancedSuite{})
+
+func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) {
+ fixs := fixtures.Basic().ByTag("packfile").ByTag(".git")
+ fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git").
+ ByTag("packfile").ByTag(".git").One())
+ fixs.Test(c, func(f *fixtures.Fixture) {
+ storage, err := filesystem.NewStorage(f.DotGit())
+ c.Assert(err, IsNil)
+ s.testEncodeDecode(c, storage)
+ })
+
+}
+
+func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) {
+
+ objIter, err := storage.IterEncodedObjects(plumbing.AnyObject)
+ c.Assert(err, IsNil)
+
+ expectedObjects := map[plumbing.Hash]bool{}
+ var hashes []plumbing.Hash
+ err = objIter.ForEach(func(o plumbing.EncodedObject) error {
+ expectedObjects[o.Hash()] = true
+ hashes = append(hashes, o.Hash())
+ return err
+
+ })
+ c.Assert(err, IsNil)
+
+ // Shuffle hashes to avoid delta selector getting order right just because
+ // the initial order is correct.
+ auxHashes := make([]plumbing.Hash, len(hashes))
+ for i, j := range rand.Perm(len(hashes)) {
+ auxHashes[j] = hashes[i]
+ }
+ hashes = auxHashes
+
+ buf := bytes.NewBuffer(nil)
+ enc := NewEncoder(buf, storage, false)
+ _, err = enc.Encode(hashes)
+ c.Assert(err, IsNil)
+
+ scanner := NewScanner(buf)
+ storage = memory.NewStorage()
+ d, err := NewDecoder(scanner, storage)
+ c.Assert(err, IsNil)
+ _, err = d.Decode()
+ c.Assert(err, IsNil)
+
+ objIter, err = storage.IterEncodedObjects(plumbing.AnyObject)
+ c.Assert(err, IsNil)
+ obtainedObjects := map[plumbing.Hash]bool{}
+ err = objIter.ForEach(func(o plumbing.EncodedObject) error {
+ obtainedObjects[o.Hash()] = true
+ return nil
+ })
+ c.Assert(err, IsNil)
+ c.Assert(obtainedObjects, DeepEquals, expectedObjects)
+
+ for h := range obtainedObjects {
+ if !expectedObjects[h] {
+ c.Errorf("obtained unexpected object: %s", h)
+ }
+ }
+
+ for h := range expectedObjects {
+ if !obtainedObjects[h] {
+ c.Errorf("missing object: %s", h)
+ }
+ }
+}
diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go
index 551d7ec..b5b0c42 100644
--- a/plumbing/format/packfile/encoder_test.go
+++ b/plumbing/format/packfile/encoder_test.go
@@ -2,11 +2,9 @@ package packfile
import (
"bytes"
- "io"
"github.com/src-d/go-git-fixtures"
"gopkg.in/src-d/go-git.v4/plumbing"
- "gopkg.in/src-d/go-git.v4/plumbing/storer"
"gopkg.in/src-d/go-git.v4/storage/memory"
. "gopkg.in/check.v1"
@@ -88,80 +86,6 @@ func (s *EncoderSuite) TestHashNotFound(c *C) {
c.Assert(err, Equals, plumbing.ErrObjectNotFound)
}
-func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) {
- fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) {
- pf := f.Packfile()
- ph := f.PackfileHash
- storage := memory.NewStorage()
- s.testDecodeEncodeDecode(c, pf, ph, storage)
- })
-}
-
-func (s *EncoderSuite) testDecodeEncodeDecode(c *C,
- pf io.ReadCloser,
- ph plumbing.Hash,
- storage storer.Storer) {
-
- defer func() {
- c.Assert(pf.Close(), IsNil)
- }()
-
- scanner := NewScanner(pf)
-
- d, err := NewDecoder(scanner, storage)
- c.Assert(err, IsNil)
-
- ch, err := d.Decode()
- c.Assert(err, IsNil)
- c.Assert(ch, Equals, ph)
-
- objIter, err := storage.IterEncodedObjects(plumbing.AnyObject)
- c.Assert(err, IsNil)
-
- expectedObjects := map[plumbing.Hash]bool{}
- var hashes []plumbing.Hash
- err = objIter.ForEach(func(o plumbing.EncodedObject) error {
- expectedObjects[o.Hash()] = true
- hashes = append(hashes, o.Hash())
- return err
-
- })
- c.Assert(err, IsNil)
-
- enc := NewEncoder(s.buf, storage, false)
- _, err = enc.Encode(hashes)
- c.Assert(err, IsNil)
-
- scanner = NewScanner(s.buf)
- storage = memory.NewStorage()
- d, err = NewDecoder(scanner, storage)
- c.Assert(err, IsNil)
- _, err = d.Decode()
- c.Assert(err, IsNil)
-
- objIter, err = storage.IterEncodedObjects(plumbing.AnyObject)
- c.Assert(err, IsNil)
- obtainedObjects := map[plumbing.Hash]bool{}
- err = objIter.ForEach(func(o plumbing.EncodedObject) error {
- obtainedObjects[o.Hash()] = true
- return nil
- })
- c.Assert(err, IsNil)
- c.Assert(obtainedObjects, DeepEquals, expectedObjects)
-
- for h := range obtainedObjects {
- if !expectedObjects[h] {
- c.Errorf("obtained unexpected object: %s", h)
- }
- }
-
- for h := range expectedObjects {
- if !obtainedObjects[h] {
- c.Errorf("missing object: %s", h)
- }
- }
-}
-
func (s *EncoderSuite) TestDecodeEncodeWithDeltaDecodeREF(c *C) {
s.enc = NewEncoder(s.buf, s.store, true)
s.simpleDeltaTest(c)
diff --git a/plumbing/format/packfile/object_pack.go b/plumbing/format/packfile/object_pack.go
index a3e99c0..14337d1 100644
--- a/plumbing/format/packfile/object_pack.go
+++ b/plumbing/format/packfile/object_pack.go
@@ -1,6 +1,8 @@
package packfile
-import "gopkg.in/src-d/go-git.v4/plumbing"
+import (
+ "gopkg.in/src-d/go-git.v4/plumbing"
+)
// ObjectToPack is a representation of an object that is going to be into a
// pack file.
@@ -39,6 +41,48 @@ func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedOb
}
}
+func (o *ObjectToPack) Type() plumbing.ObjectType {
+ if o.Original != nil {
+ return o.Original.Type()
+ }
+
+ if o.Base != nil {
+ return o.Base.Type()
+ }
+
+ if o.Object != nil {
+ return o.Object.Type()
+ }
+
+ panic("cannot get type")
+}
+
+func (o *ObjectToPack) Hash() plumbing.Hash {
+ if o.Original != nil {
+ return o.Original.Hash()
+ }
+
+ do, ok := o.Object.(plumbing.DeltaObject)
+ if ok {
+ return do.ActualHash()
+ }
+
+ panic("cannot get hash")
+}
+
+func (o *ObjectToPack) Size() int64 {
+ if o.Original != nil {
+ return o.Original.Size()
+ }
+
+ do, ok := o.Object.(plumbing.DeltaObject)
+ if ok {
+ return do.ActualSize()
+ }
+
+ panic("cannot get ObjectToPack size")
+}
+
func (o *ObjectToPack) IsDelta() bool {
if o.Base != nil {
return true