diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2017-07-27 18:04:00 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-07-27 18:04:00 +0200 |
commit | 86f33ed017b55898758bf8900a085f355b2793d0 (patch) | |
tree | 1e741bed0672a7afb62cda37ca0b1d86fad52889 /plumbing/format | |
parent | 7b08a3005480a50f0f4290aff8f3702085d5e30d (diff) | |
parent | 16b24f84e9342234ad90da27a6532887b05d1965 (diff) | |
download | go-git-86f33ed017b55898758bf8900a085f355b2793d0.tar.gz |
Merge pull request #515 from smola/reuse-packed-objects
storage: reuse deltas from packfiles
Diffstat (limited to 'plumbing/format')
-rw-r--r-- | plumbing/format/packfile/delta_selector.go | 145 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder.go | 29 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder_advanced_test.go | 91 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder_test.go | 62 | ||||
-rw-r--r-- | plumbing/format/packfile/object_pack.go | 46 |
5 files changed, 289 insertions, 84 deletions
diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go index 20c8cea..efcbd53 100644 --- a/plumbing/format/packfile/delta_selector.go +++ b/plumbing/format/packfile/delta_selector.go @@ -47,17 +47,123 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) { var objectsToPack []*ObjectToPack for _, h := range hashes { - o, err := dw.storer.EncodedObject(plumbing.AnyObject, h) + o, err := dw.encodedDeltaObject(h) if err != nil { return nil, err } - objectsToPack = append(objectsToPack, newObjectToPack(o)) + otp := newObjectToPack(o) + if _, ok := o.(plumbing.DeltaObject); ok { + otp.Original = nil + } + + objectsToPack = append(objectsToPack, otp) + } + + if err := dw.fixAndBreakChains(objectsToPack); err != nil { + return nil, err } return objectsToPack, nil } +func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) { + edos, ok := dw.storer.(storer.DeltaObjectStorer) + if !ok { + return dw.encodedObject(h) + } + + return edos.DeltaObject(plumbing.AnyObject, h) +} + +func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) { + return dw.storer.EncodedObject(plumbing.AnyObject, h) +} + +func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error { + m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack)) + for _, otp := range objectsToPack { + m[otp.Hash()] = otp + } + + for _, otp := range objectsToPack { + if err := dw.fixAndBreakChainsOne(m, otp); err != nil { + return err + } + } + + return nil +} + +func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error { + if !otp.Object.Type().IsDelta() { + return nil + } + + // Initial ObjectToPack instances might have a delta assigned to Object + // but no actual base initially. Once Base is assigned to a delta, it means + // we already fixed it. + if otp.Base != nil { + return nil + } + + do, ok := otp.Object.(plumbing.DeltaObject) + if !ok { + // if this is not a DeltaObject, then we cannot retrieve its base, + // so we have to break the delta chain here. + return dw.undeltify(otp) + } + + base, ok := objectsToPack[do.BaseHash()] + if !ok { + // The base of the delta is not in our list of objects to pack, so + // we break the chain. + return dw.undeltify(otp) + } + + if base.Size() <= otp.Size() { + // Bases should be bigger + return dw.undeltify(otp) + } + + if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil { + return err + } + + otp.SetDelta(base, otp.Object) + return nil +} + +func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error { + if otp.Original != nil { + return nil + } + + if !otp.Object.Type().IsDelta() { + return nil + } + + obj, err := dw.encodedObject(otp.Hash()) + if err != nil { + return err + } + + otp.Original = obj + return nil +} + +// undeltify undeltifies an *ObjectToPack by retrieving the original object from +// the storer and resetting it. +func (dw *deltaSelector) undeltify(otp *ObjectToPack) error { + if err := dw.restoreOriginal(otp); err != nil { + return err + } + + otp.Object = otp.Original + otp.Depth = 0 + return nil +} + func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) { sort.Sort(byTypeAndSize(objectsToPack)) } @@ -66,15 +172,24 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { for i := 0; i < len(objectsToPack); i++ { target := objectsToPack[i] - // We only want to create deltas from specific types - if !applyDelta[target.Original.Type()] { + // If we already have a delta, we don't try to find a new one for this + // object. This happens when a delta is set to be reused from an existing + // packfile. + if target.IsDelta() { + continue + } + + // We only want to create deltas from specific types. + if !applyDelta[target.Type()] { continue } for j := i - 1; j >= 0; j-- { base := objectsToPack[j] // Objects must use only the same type as their delta base. - if base.Original.Type() != target.Original.Type() { + // Since objectsToPack is sorted by type and size, once we find + // a different type, we know we won't find more of them. + if base.Type() != target.Type() { break } @@ -89,7 +204,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { // If the sizes are radically different, this is a bad pairing. - if target.Original.Size() < base.Original.Size()>>4 { + if target.Size() < base.Size()>>4 { return nil } @@ -106,10 +221,20 @@ func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { } // If we have to insert a lot to make this work, find another. - if base.Original.Size()-target.Object.Size() > msz { + if base.Size()-target.Size() > msz { return nil } + // Original object might not be present if we're reusing a delta, so we + // ensure it is restored. + if err := dw.restoreOriginal(target); err != nil { + return err + } + + if err := dw.restoreOriginal(base); err != nil { + return err + } + // Now we can generate the delta using originals delta, err := GetDelta(base.Original, target.Original) if err != nil { @@ -162,13 +287,13 @@ func (a byTypeAndSize) Len() int { return len(a) } func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byTypeAndSize) Less(i, j int) bool { - if a[i].Object.Type() < a[j].Object.Type() { + if a[i].Type() < a[j].Type() { return false } - if a[i].Object.Type() > a[j].Object.Type() { + if a[i].Type() > a[j].Type() { return true } - return a[i].Object.Size() > a[j].Object.Size() + return a[i].Size() > a[j].Size() } diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go index ae83752..1426559 100644 --- a/plumbing/format/packfile/encoder.go +++ b/plumbing/format/packfile/encoder.go @@ -18,6 +18,9 @@ type Encoder struct { w *offsetWriter zw *zlib.Writer hasher plumbing.Hasher + // offsets is a map of object hashes to corresponding offsets in the packfile. + // It is used to determine offset of the base of a delta when a OFS_DELTA is + // used. offsets map[plumbing.Hash]int64 useRefDeltas bool } @@ -78,25 +81,24 @@ func (e *Encoder) head(numEntries int) error { func (e *Encoder) entry(o *ObjectToPack) error { offset := e.w.Offset() + e.offsets[o.Hash()] = offset if o.IsDelta() { if err := e.writeDeltaHeader(o, offset); err != nil { return err } } else { - if err := e.entryHead(o.Object.Type(), o.Object.Size()); err != nil { + if err := e.entryHead(o.Type(), o.Size()); err != nil { return err } } - // Save the position using the original hash, maybe a delta will need it - e.offsets[o.Original.Hash()] = offset - e.zw.Reset(e.w) or, err := o.Object.Reader() if err != nil { return err } + _, err = io.Copy(e.zw, or) if err != nil { return err @@ -117,9 +119,9 @@ func (e *Encoder) writeDeltaHeader(o *ObjectToPack, offset int64) error { } if e.useRefDeltas { - return e.writeRefDeltaHeader(o.Base.Original.Hash()) + return e.writeRefDeltaHeader(o.Base.Hash()) } else { - return e.writeOfsDeltaHeader(offset, o.Base.Original.Hash()) + return e.writeOfsDeltaHeader(offset, o.Base.Hash()) } } @@ -128,14 +130,19 @@ func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error { } func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) error { - // because it is an offset delta, we need the base - // object position - offset, ok := e.offsets[base] + baseOffset, ok := e.offsets[base] if !ok { - return fmt.Errorf("delta base not found. Hash: %v", base) + return fmt.Errorf("base for delta not found, base hash: %v", base) + } + + // for OFS_DELTA, offset of the base is interpreted as negative offset + // relative to the type-byte of the header of the ofs-delta entry. + relativeOffset := deltaOffset-baseOffset + if relativeOffset <= 0 { + return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset) } - return binary.WriteVariableWidthInt(e.w, deltaOffset-offset) + return binary.WriteVariableWidthInt(e.w, relativeOffset) } func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error { diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go new file mode 100644 index 0000000..d92e2c4 --- /dev/null +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -0,0 +1,91 @@ +package packfile_test + +import ( + "bytes" + "math/rand" + + "gopkg.in/src-d/go-git.v4/plumbing" + . "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" + "gopkg.in/src-d/go-git.v4/plumbing/storer" + "gopkg.in/src-d/go-git.v4/storage/filesystem" + "gopkg.in/src-d/go-git.v4/storage/memory" + + "github.com/src-d/go-git-fixtures" + . "gopkg.in/check.v1" +) + +type EncoderAdvancedSuite struct { + fixtures.Suite +} + +var _ = Suite(&EncoderAdvancedSuite{}) + +func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) { + fixs := fixtures.Basic().ByTag("packfile").ByTag(".git") + fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git"). + ByTag("packfile").ByTag(".git").One()) + fixs.Test(c, func(f *fixtures.Fixture) { + storage, err := filesystem.NewStorage(f.DotGit()) + c.Assert(err, IsNil) + s.testEncodeDecode(c, storage) + }) + +} + +func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) { + + objIter, err := storage.IterEncodedObjects(plumbing.AnyObject) + c.Assert(err, IsNil) + + expectedObjects := map[plumbing.Hash]bool{} + var hashes []plumbing.Hash + err = objIter.ForEach(func(o plumbing.EncodedObject) error { + expectedObjects[o.Hash()] = true + hashes = append(hashes, o.Hash()) + return err + + }) + c.Assert(err, IsNil) + + // Shuffle hashes to avoid delta selector getting order right just because + // the initial order is correct. + auxHashes := make([]plumbing.Hash, len(hashes)) + for i, j := range rand.Perm(len(hashes)) { + auxHashes[j] = hashes[i] + } + hashes = auxHashes + + buf := bytes.NewBuffer(nil) + enc := NewEncoder(buf, storage, false) + _, err = enc.Encode(hashes) + c.Assert(err, IsNil) + + scanner := NewScanner(buf) + storage = memory.NewStorage() + d, err := NewDecoder(scanner, storage) + c.Assert(err, IsNil) + _, err = d.Decode() + c.Assert(err, IsNil) + + objIter, err = storage.IterEncodedObjects(plumbing.AnyObject) + c.Assert(err, IsNil) + obtainedObjects := map[plumbing.Hash]bool{} + err = objIter.ForEach(func(o plumbing.EncodedObject) error { + obtainedObjects[o.Hash()] = true + return nil + }) + c.Assert(err, IsNil) + c.Assert(obtainedObjects, DeepEquals, expectedObjects) + + for h := range obtainedObjects { + if !expectedObjects[h] { + c.Errorf("obtained unexpected object: %s", h) + } + } + + for h := range expectedObjects { + if !obtainedObjects[h] { + c.Errorf("missing object: %s", h) + } + } +} diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index fa01ed0..b5b0c42 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -86,68 +86,6 @@ func (s *EncoderSuite) TestHashNotFound(c *C) { c.Assert(err, Equals, plumbing.ErrObjectNotFound) } -func (s *EncoderSuite) TestDecodeEncodeDecode(c *C) { - fixtures.Basic().ByTag("packfile").Test(c, func(f *fixtures.Fixture) { - scanner := NewScanner(f.Packfile()) - storage := memory.NewStorage() - - d, err := NewDecoder(scanner, storage) - c.Assert(err, IsNil) - - ch, err := d.Decode() - c.Assert(err, IsNil) - c.Assert(ch, Equals, f.PackfileHash) - - objIter, err := d.o.IterEncodedObjects(plumbing.AnyObject) - c.Assert(err, IsNil) - - objects := []plumbing.EncodedObject{} - hashes := []plumbing.Hash{} - err = objIter.ForEach(func(o plumbing.EncodedObject) error { - objects = append(objects, o) - hash, err := s.store.SetEncodedObject(o) - c.Assert(err, IsNil) - - hashes = append(hashes, hash) - - return err - - }) - c.Assert(err, IsNil) - _, err = s.enc.Encode(hashes) - c.Assert(err, IsNil) - - scanner = NewScanner(s.buf) - storage = memory.NewStorage() - d, err = NewDecoder(scanner, storage) - c.Assert(err, IsNil) - _, err = d.Decode() - c.Assert(err, IsNil) - - objIter, err = d.o.IterEncodedObjects(plumbing.AnyObject) - c.Assert(err, IsNil) - obtainedObjects := []plumbing.EncodedObject{} - err = objIter.ForEach(func(o plumbing.EncodedObject) error { - obtainedObjects = append(obtainedObjects, o) - - return nil - }) - c.Assert(err, IsNil) - c.Assert(len(obtainedObjects), Equals, len(objects)) - - equals := 0 - for _, oo := range obtainedObjects { - for _, o := range objects { - if o.Hash() == oo.Hash() { - equals++ - } - } - } - - c.Assert(len(obtainedObjects), Equals, equals) - }) -} - func (s *EncoderSuite) TestDecodeEncodeWithDeltaDecodeREF(c *C) { s.enc = NewEncoder(s.buf, s.store, true) s.simpleDeltaTest(c) diff --git a/plumbing/format/packfile/object_pack.go b/plumbing/format/packfile/object_pack.go index a3e99c0..14337d1 100644 --- a/plumbing/format/packfile/object_pack.go +++ b/plumbing/format/packfile/object_pack.go @@ -1,6 +1,8 @@ package packfile -import "gopkg.in/src-d/go-git.v4/plumbing" +import ( + "gopkg.in/src-d/go-git.v4/plumbing" +) // ObjectToPack is a representation of an object that is going to be into a // pack file. @@ -39,6 +41,48 @@ func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedOb } } +func (o *ObjectToPack) Type() plumbing.ObjectType { + if o.Original != nil { + return o.Original.Type() + } + + if o.Base != nil { + return o.Base.Type() + } + + if o.Object != nil { + return o.Object.Type() + } + + panic("cannot get type") +} + +func (o *ObjectToPack) Hash() plumbing.Hash { + if o.Original != nil { + return o.Original.Hash() + } + + do, ok := o.Object.(plumbing.DeltaObject) + if ok { + return do.ActualHash() + } + + panic("cannot get hash") +} + +func (o *ObjectToPack) Size() int64 { + if o.Original != nil { + return o.Original.Size() + } + + do, ok := o.Object.(plumbing.DeltaObject) + if ok { + return do.ActualSize() + } + + panic("cannot get ObjectToPack size") +} + func (o *ObjectToPack) IsDelta() bool { if o.Base != nil { return true |