diff options
Diffstat (limited to 'plumbing/format/packfile/delta_selector.go')
-rw-r--r-- | plumbing/format/packfile/delta_selector.go | 169 |
1 files changed, 169 insertions, 0 deletions
diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go new file mode 100644 index 0000000..a73a209 --- /dev/null +++ b/plumbing/format/packfile/delta_selector.go @@ -0,0 +1,169 @@ +package packfile + +import ( + "sort" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +const ( + // deltas based on deltas, how many steps we can do. + // 50 is the default value used in JGit + maxDepth = int64(50) +) + +// applyDelta is the set of object types that we should apply deltas +var applyDelta = map[plumbing.ObjectType]bool{ + plumbing.BlobObject: true, + plumbing.TreeObject: true, +} + +type deltaSelector struct { + storer storer.EncodedObjectStorer +} + +func newDeltaSelector(s storer.EncodedObjectStorer) *deltaSelector { + return &deltaSelector{s} +} + +// ObjectsToPack creates a list of ObjectToPack from the hashes provided, +// creating deltas if it's suitable, using an specific internal logic +func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) { + otp, err := dw.objectsToPack(hashes) + if err != nil { + return nil, err + } + + dw.sort(otp) + + if err := dw.walk(otp); err != nil { + return nil, err + } + + return otp, nil +} + +func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) { + var objectsToPack []*ObjectToPack + for _, h := range hashes { + o, err := dw.storer.EncodedObject(plumbing.AnyObject, h) + if err != nil { + return nil, err + } + + objectsToPack = append(objectsToPack, newObjectToPack(o)) + } + + return objectsToPack, nil +} + +func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) { + sort.Sort(byTypeAndSize(objectsToPack)) +} + +func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { + for i := 0; i < len(objectsToPack); i++ { + target := objectsToPack[i] + + // We only want to create deltas from specific types + if !applyDelta[target.Original.Type()] { + continue + } + + for j := i - 1; j >= 0; j-- { + base := objectsToPack[j] + // Objects must use only the same type as their delta base. + if base.Original.Type() != target.Original.Type() { + break + } + + if err := dw.tryToDeltify(base, target); err != nil { + return err + } + } + } + + return nil +} + +func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { + // If the sizes are radically different, this is a bad pairing. + if target.Original.Size() < base.Original.Size()>>4 { + return nil + } + + msz := dw.deltaSizeLimit( + target.Object.Size(), + base.Depth, + target.Depth, + target.IsDelta(), + ) + + // Nearly impossible to fit useful delta. + if msz <= 8 { + return nil + } + + // If we have to insert a lot to make this work, find another. + if base.Original.Size()-target.Object.Size() > msz { + return nil + } + + // Now we can generate the delta using originals + delta, err := GetDelta(base.Original, target.Original) + if err != nil { + return err + } + + // if delta better than target + if delta.Size() < msz { + target.SetDelta(base, delta) + } + + return nil +} + +func (dw *deltaSelector) deltaSizeLimit(targetSize int64, baseDepth int, + targetDepth int, targetDelta bool) int64 { + if !targetDelta { + // Any delta should be no more than 50% of the original size + // (for text files deflate of whole form should shrink 50%). + n := targetSize >> 1 + + // Evenly distribute delta size limits over allowed depth. + // If src is non-delta (depth = 0), delta <= 50% of original. + // If src is almost at limit (9/10), delta <= 10% of original. + return n * (maxDepth - int64(baseDepth)) / maxDepth + } + + // With a delta base chosen any new delta must be "better". + // Retain the distribution described above. + d := int64(targetDepth) + n := targetSize + + // If src is whole (depth=0) and base is near limit (depth=9/10) + // any delta using src can be 10x larger and still be better. + // + // If src is near limit (depth=9/10) and base is whole (depth=0) + // a new delta dependent on src must be 1/10th the size. + return n * (maxDepth - int64(baseDepth)) / (maxDepth - d) +} + +type byTypeAndSize []*ObjectToPack + +func (a byTypeAndSize) Len() int { return len(a) } + +func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +func (a byTypeAndSize) Less(i, j int) bool { + if a[i].Object.Type() < a[j].Object.Type() { + return false + } + + if a[i].Object.Type() > a[j].Object.Type() { + return true + } + + return a[i].Object.Size() > a[j].Object.Size() +} |