aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/packfile/diff_delta.go
diff options
context:
space:
mode:
authorAntonio Jesus Navarro Perez <antnavper@gmail.com>2017-05-23 14:39:42 +0200
committerAntonio Jesus Navarro Perez <antnavper@gmail.com>2017-05-24 10:55:43 +0200
commitf369a7820ddc6224d5318d562de49e992942ad1f (patch)
treef3d79534783d1dbe438f076edb00e0962122feca /plumbing/format/packfile/diff_delta.go
parentf663a9384619965ed8df7a7224e6f15ad18ed4af (diff)
downloadgo-git-f369a7820ddc6224d5318d562de49e992942ad1f.tar.gz
format/packfile: improve binary delta algorithm
Implemented algorithm described in "File System Support for Delta Compression" paper, from "Joshua P. MacDonald".
Diffstat (limited to 'plumbing/format/packfile/diff_delta.go')
-rw-r--r--plumbing/format/packfile/diff_delta.go138
1 files changed, 94 insertions, 44 deletions
diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go
index 40d450f..e3438aa 100644
--- a/plumbing/format/packfile/diff_delta.go
+++ b/plumbing/format/packfile/diff_delta.go
@@ -1,6 +1,8 @@
package packfile
import (
+ "bytes"
+ "hash/adler32"
"io/ioutil"
"gopkg.in/src-d/go-git.v4/plumbing"
@@ -11,7 +13,8 @@ import (
// for more info
const (
- maxCopyLen = 0xffff
+ // Standard chunk size used to generate fingerprints
+ s = 16
)
// GetDelta returns an EncodedObject of type OFSDeltaObject. Base and Target object,
@@ -51,52 +54,99 @@ func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, erro
return delta, nil
}
-// DiffDelta returns the delta that transforms baseBuf into targetBuf.
-func DiffDelta(baseBuf []byte, targetBuf []byte) []byte {
- var outBuff []byte
-
- outBuff = append(outBuff, deltaEncodeSize(len(baseBuf))...)
- outBuff = append(outBuff, deltaEncodeSize(len(targetBuf))...)
-
- sm := newMatcher(baseBuf, targetBuf)
- for _, op := range sm.GetOpCodes() {
- switch {
- case op.Tag == tagEqual:
- copyStart := op.I1
- copyLen := op.I2 - op.I1
- for {
- if copyLen <= 0 {
- break
- }
- var toCopy int
- if copyLen < maxCopyLen {
- toCopy = copyLen
- } else {
- toCopy = maxCopyLen
- }
-
- outBuff = append(outBuff, encodeCopyOperation(copyStart, toCopy)...)
- copyStart += toCopy
- copyLen -= toCopy
- }
- case op.Tag == tagReplace || op.Tag == tagInsert:
- s := op.J2 - op.J1
- o := op.J1
- for {
- if s <= 127 {
- break
- }
- outBuff = append(outBuff, byte(127))
- outBuff = append(outBuff, targetBuf[o:o+127]...)
- s -= 127
- o += 127
- }
- outBuff = append(outBuff, byte(s))
- outBuff = append(outBuff, targetBuf[o:o+s]...)
+// DiffDelta returns the delta that transforms src into tgt.
+func DiffDelta(src []byte, tgt []byte) []byte {
+ buf := bytes.NewBuffer(nil)
+ buf.Write(deltaEncodeSize(len(src)))
+ buf.Write(deltaEncodeSize(len(tgt)))
+
+ sindex := initMatch(src)
+
+ ibuf := bytes.NewBuffer(nil)
+ for i := 0; i < len(tgt); i++ {
+ offset, l := findMatch(src, tgt, sindex, i)
+
+ if l < s {
+ ibuf.WriteByte(tgt[i])
+ } else {
+ encodeInsertOperation(ibuf, buf)
+ buf.Write(encodeCopyOperation(offset, l))
+ i += l - 1
+ }
+ }
+
+ encodeInsertOperation(ibuf, buf)
+
+ return buf.Bytes()
+}
+
+func encodeInsertOperation(ibuf, buf *bytes.Buffer) {
+ if ibuf.Len() == 0 {
+ return
+ }
+
+ b := ibuf.Bytes()
+ s := ibuf.Len()
+ o := 0
+ for {
+ if s <= 127 {
+ break
}
+ buf.WriteByte(byte(127))
+ buf.Write(b[o : o+127])
+ s -= 127
+ o += 127
+ }
+ buf.WriteByte(byte(s))
+ buf.Write(b[o : o+s])
+
+ ibuf.Reset()
+}
+
+func initMatch(src []byte) map[uint32]int {
+ i := 0
+ index := make(map[uint32]int)
+ for {
+ if i+s > len(src) {
+ break
+ }
+
+ ch := adler32.Checksum(src[i : i+s])
+ index[ch] = i
+ i += s
+ }
+
+ return index
+}
+
+func findMatch(src, tgt []byte, sindex map[uint32]int, tgtOffset int) (srcOffset, l int) {
+ if len(tgt) >= tgtOffset+s {
+ ch := adler32.Checksum(tgt[tgtOffset : tgtOffset+s])
+ var ok bool
+ srcOffset, ok = sindex[ch]
+ if !ok {
+ return
+ }
+
+ l = matchLength(src, tgt, tgtOffset, srcOffset)
+ }
+
+ return
+}
+
+func matchLength(src, tgt []byte, otgt, osrc int) int {
+ l := 0
+ for {
+ if (osrc >= len(src) || otgt >= len(tgt)) || src[osrc] != tgt[otgt] {
+ break
+ }
+
+ l++
+ osrc++
+ otgt++
}
- return outBuff
+ return l
}
func deltaEncodeSize(size int) []byte {