From 87413ced43b02a41359ce7a1a07ab41aec6ee313 Mon Sep 17 00:00:00 2001 From: "Santiago M. Mola" Date: Tue, 25 Jul 2017 15:00:01 +0200 Subject: storage: reuse deltas from packfiles * plumbing: add DeltaObject interface for EncodedObjects that are deltas and hold additional information about them, such as the hash of the base object. * plumbing/storer: add DeltaObjectStorer interface for object storers that can return DeltaObject. Note that calls to EncodedObject will never return instances of DeltaObject. That requires explicit calls to DeltaObject. * storage/filesystem: implement DeltaObjectStorer interface. * plumbing/packfile: packfile encoder now supports reusing deltas that are already computed (e.g. from an existing packfile) if the storage implements DeltaObjectStorer. Reusing deltas boosts performance of packfile generation (e.g. on push). --- plumbing/format/packfile/delta_selector.go | 149 +++++++++++++++++++++++++++-- 1 file changed, 139 insertions(+), 10 deletions(-) (limited to 'plumbing/format/packfile/delta_selector.go') diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go index 20c8cea..4bee6d3 100644 --- a/plumbing/format/packfile/delta_selector.go +++ b/plumbing/format/packfile/delta_selector.go @@ -47,17 +47,127 @@ func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) { var objectsToPack []*ObjectToPack for _, h := range hashes { - o, err := dw.storer.EncodedObject(plumbing.AnyObject, h) + o, err := dw.encodedDeltaObject(h) if err != nil { return nil, err } - objectsToPack = append(objectsToPack, newObjectToPack(o)) + otp := newObjectToPack(o) + if _, ok := o.(plumbing.DeltaObject); ok { + otp.Original = nil + } + + objectsToPack = append(objectsToPack, otp) + } + + if err := dw.fixAndBreakChains(objectsToPack); err != nil { + return nil, err } return objectsToPack, nil } +func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) { + edos, ok := dw.storer.(storer.DeltaObjectStorer) + if !ok { + return dw.encodedObject(h) + } + + return edos.DeltaObject(plumbing.AnyObject, h) +} + +func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) { + return dw.storer.EncodedObject(plumbing.AnyObject, h) +} + +func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error { + m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack)) + for _, otp := range objectsToPack { + m[otp.Hash()] = otp + } + + for _, otp := range objectsToPack { + if err := dw.fixAndBreakChainsOne(m, otp); err != nil { + return err + } + } + + return nil +} + +func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error { + isDelta := otp.Object.Type() == plumbing.OFSDeltaObject || + otp.Object.Type() == plumbing.REFDeltaObject + if !isDelta { + return nil + } + + // Initial ObjectToPack instances might have a delta assigned to Object + // but no actual base initially. Once Base is assigned to a delta, it means + // we already fixed it. + if otp.Base != nil { + return nil + } + + do, ok := otp.Object.(plumbing.DeltaObject) + if !ok { + // if this is not a DeltaObject, then we cannot retrieve its base, + // so we have to break the delta chain here. + return dw.undeltify(otp) + } + + base, ok := objectsToPack[do.BaseHash()] + if !ok { + // The base of the delta is not in our list of objects to pack, so + // we break the chain. + return dw.undeltify(otp) + } + + if base.Size() <= otp.Size() { + // Bases should be bigger + return dw.undeltify(otp) + } + + if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil { + return err + } + + otp.SetDelta(base, otp.Object) + return nil +} + +func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error { + if otp.Original != nil { + return nil + } + + isDelta := otp.Object.Type() == plumbing.OFSDeltaObject || + otp.Object.Type() == plumbing.REFDeltaObject + if !isDelta { + return nil + } + + obj, err := dw.encodedObject(otp.Hash()) + if err != nil { + return err + } + + otp.Original = obj + return nil +} + +// undeltify undeltifies an *ObjectToPack by retrieving the original object from +// the storer and resetting it. +func (dw *deltaSelector) undeltify(otp *ObjectToPack) error { + if err := dw.restoreOriginal(otp); err != nil { + return err + } + + otp.Object = otp.Original + otp.Depth = 0 + return nil +} + func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) { sort.Sort(byTypeAndSize(objectsToPack)) } @@ -66,15 +176,24 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { for i := 0; i < len(objectsToPack); i++ { target := objectsToPack[i] - // We only want to create deltas from specific types - if !applyDelta[target.Original.Type()] { + // If we already have a delta, we don't try to find a new one for this + // object. This happens when a delta is set to be reused from an existing + // packfile. + if target.IsDelta() { + continue + } + + // We only want to create deltas from specific types. + if !applyDelta[target.Type()] { continue } for j := i - 1; j >= 0; j-- { base := objectsToPack[j] // Objects must use only the same type as their delta base. - if base.Original.Type() != target.Original.Type() { + // Since objectsToPack is sorted by type and size, once we find + // a different type, we know we won't find more of them. + if base.Type() != target.Type() { break } @@ -89,7 +208,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { // If the sizes are radically different, this is a bad pairing. - if target.Original.Size() < base.Original.Size()>>4 { + if target.Size() < base.Size()>>4 { return nil } @@ -106,10 +225,20 @@ func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { } // If we have to insert a lot to make this work, find another. - if base.Original.Size()-target.Object.Size() > msz { + if base.Size()-target.Size() > msz { return nil } + // Original object might not be present if we're reusing a delta, so we + // ensure it is restored. + if err := dw.restoreOriginal(target); err != nil { + return err + } + + if err := dw.restoreOriginal(base); err != nil { + return err + } + // Now we can generate the delta using originals delta, err := GetDelta(base.Original, target.Original) if err != nil { @@ -162,13 +291,13 @@ func (a byTypeAndSize) Len() int { return len(a) } func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byTypeAndSize) Less(i, j int) bool { - if a[i].Object.Type() < a[j].Object.Type() { + if a[i].Type() < a[j].Type() { return false } - if a[i].Object.Type() > a[j].Object.Type() { + if a[i].Type() > a[j].Type() { return true } - return a[i].Object.Size() > a[j].Object.Size() + return a[i].Size() > a[j].Size() } -- cgit