diff options
author | Santiago M. Mola <santi@mola.io> | 2017-07-25 15:00:01 +0200 |
---|---|---|
committer | Santiago M. Mola <santi@mola.io> | 2017-07-27 15:33:14 +0200 |
commit | 87413ced43b02a41359ce7a1a07ab41aec6ee313 (patch) | |
tree | 07975422ab63bfbb13aefc1a2d53d757c7342848 /plumbing/format | |
parent | 3834038893d5cacb49e5f2786ad955d26f666546 (diff) | |
download | go-git-87413ced43b02a41359ce7a1a07ab41aec6ee313.tar.gz |
storage: reuse deltas from packfiles
* plumbing: add DeltaObject interface for EncodedObjects that
are deltas and hold additional information about them, such
as the hash of the base object.
* plumbing/storer: add DeltaObjectStorer interface for object
storers that can return DeltaObject. Note that calls to
EncodedObject will never return instances of DeltaObject.
That requires explicit calls to DeltaObject.
* storage/filesystem: implement DeltaObjectStorer interface.
* plumbing/packfile: packfile encoder now supports reusing
deltas that are already computed (e.g. from an existing
packfile) if the storage implements DeltaObjectStorer.
Reusing deltas boosts performance of packfile generation
(e.g. on push).
Diffstat (limited to 'plumbing/format')
-rw-r--r-- | plumbing/format/packfile/delta_selector.go | 149 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder.go | 29 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder_advanced_test.go | 91 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder_test.go | 76 | ||||
-rw-r--r-- | plumbing/format/packfile/object_pack.go | 46 |
5 files changed, 293 insertions, 98 deletions
diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go index 20c8cea..4bee6d3 100644 --- a/plumbing/format/packfile/delta_selector.go +++ b/plumbing/format/packfile/delta_selector.go @@ -47,17 +47,127 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) { var objectsToPack []*ObjectToPack for _, h := range hashes { - o, err := dw.storer.EncodedObject(plumbing.AnyObject, h) + o, err := dw.encodedDeltaObject(h) if err != nil { return nil, err } - objectsToPack = append(objectsToPack, newObjectToPack(o)) + otp := newObjectToPack(o) + if _, ok := o.(plumbing.DeltaObject); ok { + otp.Original = nil + } + + objectsToPack = append(objectsToPack, otp) + } + + if err := dw.fixAndBreakChains(objectsToPack); err != nil { + return nil, err } return objectsToPack, nil } +func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) { + edos, ok := dw.storer.(storer.DeltaObjectStorer) + if !ok { + return dw.encodedObject(h) + } + + return edos.DeltaObject(plumbing.AnyObject, h) +} + +func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) { + return dw.storer.EncodedObject(plumbing.AnyObject, h) +} + +func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error { + m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack)) + for _, otp := range objectsToPack { + m[otp.Hash()] = otp + } + + for _, otp := range objectsToPack { + if err := dw.fixAndBreakChainsOne(m, otp); err != nil { + return err + } + } + + return nil +} + +func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error { + isDelta := otp.Object.Type() == plumbing.OFSDeltaObject || + otp.Object.Type() == plumbing.REFDeltaObject + if !isDelta { + return nil + } + + // Initial ObjectToPack instances might have a delta assigned to Object + // but no actual base initially. Once Base is assigned to a delta, it means + // we already fixed it. + if otp.Base != nil { + return nil + } + + do, ok := otp.Object.(plumbing.DeltaObject) + if !ok { + // if this is not a DeltaObject, then we cannot retrieve its base, + // so we have to break the delta chain here. + return dw.undeltify(otp) + } + + base, ok := objectsToPack[do.BaseHash()] + if !ok { + // The base of the delta is not in our list of objects to pack, so + // we break the chain. + return dw.undeltify(otp) + } + + if base.Size() <= otp.Size() { + // Bases should be bigger + return dw.undeltify(otp) + } + + if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil { + return err + } + + otp.SetDelta(base, otp.Object) + return nil +} + +func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error { + if otp.Original != nil { + return nil + } + + isDelta := otp.Object.Type() == plumbing.OFSDeltaObject || + otp.Object.Type() == plumbing.REFDeltaObject + if !isDelta { + return nil + } + + obj, err := dw.encodedObject(otp.Hash()) + if err != nil { + return err + } + + otp.Original = obj + return nil +} + +// undeltify undeltifies an *ObjectToPack by retrieving the original object from +// the storer and resetting it. +func (dw *deltaSelector) undeltify(otp *ObjectToPack) error { + if err := dw.restoreOriginal(otp); err != nil { + return err + } + + otp.Object = otp.Original + otp.Depth = 0 + return nil +} + func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) { sort.Sort(byTypeAndSize(objectsToPack)) } @@ -66,15 +176,24 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { for i := 0; i < len(objectsToPack); i++ { target := objectsToPack[i] - // We only want to create deltas from specific types - if !applyDelta[target.Original.Type()] { + // If we already have a delta, we don't try to find a new one for this + // object. This happens when a delta is set to be reused from an existing + // packfile. + if target.IsDelta() { + continue + } + + // We only want to create deltas from specific types. + if !applyDelta[target.Type()] { continue } for j := i - 1; j >= 0; j-- { base := objectsToPack[j] // Objects must use only the same type as their delta base. - if base.Original.Type() != target.Original.Type() { + // Since objectsToPack is sorted by type and size, once we find + // a different type, we know we won't find more of them. + if base.Type() != target.Type() { break } @@ -89,7 +208,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { // If the sizes are radically different, this is a bad pairing. - if target.Original.Size() < base.Original.Size()>>4 { + if target.Size() < base.Size()>>4 { return nil } @@ -106,10 +225,20 @@ func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { } // If we have to insert a lot to make this work, find another. - if base.Original.Size()-target.Object.Size() > msz { + if base.Size()-target.Size() > msz { return nil } + // Original object might not be present if we're reusing a delta, so we + // ensure it is restored. + if err := dw.restoreOriginal(target); err != nil { + return err + } + + if err := dw.restoreOriginal(base); err != nil { + return err + } + // Now we can generate the delta using originals delta, err := GetDelta(base.Original, target.Original) if err != nil { @@ -162,13 +291,13 @@ func (a byTypeAndSize) Len() int { return len(a) } func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byTypeAndSize) Less(i, j int) bool { - if a[i].Object.Type() < a[j].Object.Type() { + if a[i].Type() < a[j].Type() { return false } - if a[i].Object.Type() > a[j].Object.Type() { + if a[i].Type() > a[j].Type() { return true } - return a[i].Object.Size() > a[j].Object.Size() + return a[i].Size() > a[j].Size() } diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go index ae83752..1426559 100644 --- a/plumbing/format/packfile/encoder.go +++ b/plumbing/format/packfile/encoder.go @@ -18,6 +18,9 @@ type Encoder struct { w *offsetWriter zw *zlib.Writer hasher plumbing.Hasher + // offsets is a map of object hashes to corresponding offsets in the packfile. + // It is used to determine offset of the base of a delta when a OFS_DELTA is + // used. offsets map[plumbing.Hash]int64 useRefDeltas bool } @@ -78,25 +81,24 @@ func (e *Encoder) head(numEntries int) error { func (e *Encoder) entry(o *ObjectToPack) error { offset := e.w.Offset() + e.offsets[o.Hash()] = offset if o.IsDelta() { if err := e.writeDeltaHeader(o, offset); err != nil { return err } } else { - if err := e.entryHead(o.Object.Type(), o.Object.Size()); err != nil { + if err := e.entryHead(o.Type(), o.Size()); err != nil { return err } } - // Save the position using the original hash, maybe a delta will need it - e.offsets[o.Original.Hash()] = offset - e.zw.Reset(e.w) or, err := o.Object.Reader() if err != nil { return err } + _, err = io.Copy(e.zw, or) if err != nil { return err @@ -117,9 +119,9 @@ func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error { } if e.useRefDeltas { - return e.writeRefDeltaHeader(o.Base.Original.Hash()) + return e.writeRefDeltaHeader(o.Base.Hash()) } else { - return e.writeOfsDeltaHeader(offset, o.Base.Original.Hash()) + return e.writeOfsDeltaHeader(offset, o.Base.Hash()) } } @@ -128,14 +130,19 @@ func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error { } func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) error { - // because it is an offset delta, we need the base - // object position - offset, ok := e.offsets[base] + baseOffset, ok := e.offsets[base] if !ok { - return fmt.Errorf("delta base not found. Hash: %v", base) + return fmt.Errorf("base for delta not found, base hash: %v", base) + } + + // for OFS_DELTA, offset of the base is interpreted as negative offset + // relative to the type-byte of the header of the ofs-delta entry. + relativeOffset := deltaOffset-baseOffset + if relativeOffset <= 0 { + return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset) } - return binary.WriteVariableWidthInt(e.w, deltaOffset-offset) + return binary.WriteVariableWidthInt(e.w, relativeOffset) } func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error { diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go new file mode 100644 index 0000000..d92e2c4 --- /dev/null +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -0,0 +1,91 @@ +package packfile_test + +import ( + "bytes" + "math/rand" + + "gopkg.in/src-d/go-git.v4/plumbing" + . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + "gopkg.in/src-d/go-git.v4/plumbing/storer" + "gopkg.in/src-d/go-git.v4/storage/filesystem" + "gopkg.in/src-d/go-git.v4/storage/memory" + + "github.com/src-d/go-git-fixtures" + . "gopkg.in/check.v1" +) + +type EncoderAdvancedSuite struct { + fixtures.Suite +} + +var _ = Suite(&EncoderAdvancedSuite{}) + +func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) { + fixs := fixtures.Basic().ByTag("packfile").ByTag(".git") + fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git"). + ByTag("packfile").ByTag(".git").One()) + fixs.Test(c, func(f *fixtures.Fixture) { + storage, err := filesystem.NewStorage(f.DotGit()) + c.Assert(err, IsNil) + s.testEncodeDecode(c, storage) + }) + +} + +func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) { + + objIter, err := storage.IterEncodedObjects(plumbing.AnyObject) + c.Assert(err, IsNil) + + expectedObjects := map[plumbing.Hash]bool{} + var hashes []plumbing.Hash + err = objIter.ForEach(func(o plumbing.EncodedObject) error { + expectedObjects[o.Hash()] = true + hashes = append(hashes, o.Hash()) + return err + + }) + c.Assert(err, IsNil) + + // Shuffle hashes to avoid delta selector getting order right just because + // the initial order is correct. + auxHashes := make([]plumbing.Hash, len(hashes)) + for i, j := range rand.Perm(len(hashes)) { + auxHashes[j] = hashes[i] + } + hashes = auxHashes + + buf := bytes.NewBuffer(nil) + enc := NewEncoder(buf, storage, false) + _, err = enc.Encode(hashes) + c.Assert(err, IsNil) + + scanner := NewScanner(buf) + storage = memory.NewStorage() + d, err := NewDecoder(scanner, storage) + c.Assert(err, IsNil) + _, err = d.Decode() + c.Assert(err, IsNil) + + objIter, err = storage.IterEncodedObjects(plumbing.AnyObject) + c.Assert(err, IsNil) + obtainedObjects := map[plumbing.Hash]bool{} + err = objIter.ForEach(func(o plumbing.EncodedObject) error { + obtainedObjects[o.Hash()] = true + return nil + }) + c.Assert(err, IsNil) + c.Assert(obtainedObjects, DeepEquals, expectedObjects) + + for h := range obtainedObjects { + if !expectedObjects[h] { + c.Errorf("obtained unexpected object: %s", h) + } + } + + for h := range expectedObjects { + if !obtainedObjects[h] { + c.Errorf("missing object: %s", h) + } + } +} diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index 551d7ec..b5b0c42 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -2,11 +2,9 @@ package packfile import ( "bytes" - "io" "github.com/src-d/go-git-fixtures" "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/storer" "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" @@ -88,80 +86,6 @@ func (s *EncoderSuite) TestHashNotFound(c *C) { c.Assert(err, Equals, plumbing.ErrObjectNotFound) } -func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - pf := f.Packfile() - ph := f.PackfileHash - storage := memory.NewStorage() - s.testDecodeEncodeDecode(c, pf, ph, storage) - }) -} - -func (s *EncoderSuite) testDecodeEncodeDecode(c *C, - pf io.ReadCloser, - ph plumbing.Hash, - storage storer.Storer) { - - defer func() { - c.Assert(pf.Close(), IsNil) - }() - - scanner := NewScanner(pf) - - d, err := NewDecoder(scanner, storage) - c.Assert(err, IsNil) - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, ph) - - objIter, err := storage.IterEncodedObjects(plumbing.AnyObject) - c.Assert(err, IsNil) - - expectedObjects := map[plumbing.Hash]bool{} - var hashes []plumbing.Hash - err = objIter.ForEach(func(o plumbing.EncodedObject) error { - expectedObjects[o.Hash()] = true - hashes = append(hashes, o.Hash()) - return err - - }) - c.Assert(err, IsNil) - - enc := NewEncoder(s.buf, storage, false) - _, err = enc.Encode(hashes) - c.Assert(err, IsNil) - - scanner = NewScanner(s.buf) - storage = memory.NewStorage() - d, err = NewDecoder(scanner, storage) - c.Assert(err, IsNil) - _, err = d.Decode() - c.Assert(err, IsNil) - - objIter, err = storage.IterEncodedObjects(plumbing.AnyObject) - c.Assert(err, IsNil) - obtainedObjects := map[plumbing.Hash]bool{} - err = objIter.ForEach(func(o plumbing.EncodedObject) error { - obtainedObjects[o.Hash()] = true - return nil - }) - c.Assert(err, IsNil) - c.Assert(obtainedObjects, DeepEquals, expectedObjects) - - for h := range obtainedObjects { - if !expectedObjects[h] { - c.Errorf("obtained unexpected object: %s", h) - } - } - - for h := range expectedObjects { - if !obtainedObjects[h] { - c.Errorf("missing object: %s", h) - } - } -} - func (s *EncoderSuite) TestDecodeEncodeWithDeltaDecodeREF(c *C) { s.enc = NewEncoder(s.buf, s.store, true) s.simpleDeltaTest(c) diff --git a/plumbing/format/packfile/object_pack.go b/plumbing/format/packfile/object_pack.go index a3e99c0..14337d1 100644 --- a/plumbing/format/packfile/object_pack.go +++ b/plumbing/format/packfile/object_pack.go @@ -1,6 +1,8 @@ package packfile -import "gopkg.in/src-d/go-git.v4/plumbing" +import ( + "gopkg.in/src-d/go-git.v4/plumbing" +) // ObjectToPack is a representation of an object that is going to be into a // pack file. @@ -39,6 +41,48 @@ func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedOb } } +func (o *ObjectToPack) Type() plumbing.ObjectType { + if o.Original != nil { + return o.Original.Type() + } + + if o.Base != nil { + return o.Base.Type() + } + + if o.Object != nil { + return o.Object.Type() + } + + panic("cannot get type") +} + +func (o *ObjectToPack) Hash() plumbing.Hash { + if o.Original != nil { + return o.Original.Hash() + } + + do, ok := o.Object.(plumbing.DeltaObject) + if ok { + return do.ActualHash() + } + + panic("cannot get hash") +} + +func (o *ObjectToPack) Size() int64 { + if o.Original != nil { + return o.Original.Size() + } + + do, ok := o.Object.(plumbing.DeltaObject) + if ok { + return do.ActualSize() + } + + panic("cannot get ObjectToPack size") +} + func (o *ObjectToPack) IsDelta() bool { if o.Base != nil { return true |