diff options
author | Antonio Jesus Navarro Perez <antnavper@gmail.com> | 2017-05-23 14:39:42 +0200 |
---|---|---|
committer | Antonio Jesus Navarro Perez <antnavper@gmail.com> | 2017-05-24 10:55:43 +0200 |
commit | f369a7820ddc6224d5318d562de49e992942ad1f (patch) | |
tree | f3d79534783d1dbe438f076edb00e0962122feca /plumbing/format/packfile/diff_delta.go | |
parent | f663a9384619965ed8df7a7224e6f15ad18ed4af (diff) | |
download | go-git-f369a7820ddc6224d5318d562de49e992942ad1f.tar.gz |
format/packfile: improve binary delta algorithm
Implemented algorithm described in "File System Support for Delta Compression" paper, from "Joshua P. MacDonald".
Diffstat (limited to 'plumbing/format/packfile/diff_delta.go')
-rw-r--r-- | plumbing/format/packfile/diff_delta.go | 138 |
1 files changed, 94 insertions, 44 deletions
diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index 40d450f..e3438aa 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -1,6 +1,8 @@ package packfile import ( + "bytes" + "hash/adler32" "io/ioutil" "gopkg.in/src-d/go-git.v4/plumbing" @@ -11,7 +13,8 @@ import ( // for more info const ( - maxCopyLen = 0xffff + // Standard chunk size used to generate fingerprints + s = 16 ) // GetDelta returns an EncodedObject of type OFSDeltaObject. Base and Target object, @@ -51,52 +54,99 @@ func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, erro return delta, nil } -// DiffDelta returns the delta that transforms baseBuf into targetBuf. -func DiffDelta(baseBuf []byte, targetBuf []byte) []byte { - var outBuff []byte - - outBuff = append(outBuff, deltaEncodeSize(len(baseBuf))...) - outBuff = append(outBuff, deltaEncodeSize(len(targetBuf))...) - - sm := newMatcher(baseBuf, targetBuf) - for _, op := range sm.GetOpCodes() { - switch { - case op.Tag == tagEqual: - copyStart := op.I1 - copyLen := op.I2 - op.I1 - for { - if copyLen <= 0 { - break - } - var toCopy int - if copyLen < maxCopyLen { - toCopy = copyLen - } else { - toCopy = maxCopyLen - } - - outBuff = append(outBuff, encodeCopyOperation(copyStart, toCopy)...) - copyStart += toCopy - copyLen -= toCopy - } - case op.Tag == tagReplace || op.Tag == tagInsert: - s := op.J2 - op.J1 - o := op.J1 - for { - if s <= 127 { - break - } - outBuff = append(outBuff, byte(127)) - outBuff = append(outBuff, targetBuf[o:o+127]...) - s -= 127 - o += 127 - } - outBuff = append(outBuff, byte(s)) - outBuff = append(outBuff, targetBuf[o:o+s]...) +// DiffDelta returns the delta that transforms src into tgt. +func DiffDelta(src []byte, tgt []byte) []byte { + buf := bytes.NewBuffer(nil) + buf.Write(deltaEncodeSize(len(src))) + buf.Write(deltaEncodeSize(len(tgt))) + + sindex := initMatch(src) + + ibuf := bytes.NewBuffer(nil) + for i := 0; i < len(tgt); i++ { + offset, l := findMatch(src, tgt, sindex, i) + + if l < s { + ibuf.WriteByte(tgt[i]) + } else { + encodeInsertOperation(ibuf, buf) + buf.Write(encodeCopyOperation(offset, l)) + i += l - 1 + } + } + + encodeInsertOperation(ibuf, buf) + + return buf.Bytes() +} + +func encodeInsertOperation(ibuf, buf *bytes.Buffer) { + if ibuf.Len() == 0 { + return + } + + b := ibuf.Bytes() + s := ibuf.Len() + o := 0 + for { + if s <= 127 { + break } + buf.WriteByte(byte(127)) + buf.Write(b[o : o+127]) + s -= 127 + o += 127 + } + buf.WriteByte(byte(s)) + buf.Write(b[o : o+s]) + + ibuf.Reset() +} + +func initMatch(src []byte) map[uint32]int { + i := 0 + index := make(map[uint32]int) + for { + if i+s > len(src) { + break + } + + ch := adler32.Checksum(src[i : i+s]) + index[ch] = i + i += s + } + + return index +} + +func findMatch(src, tgt []byte, sindex map[uint32]int, tgtOffset int) (srcOffset, l int) { + if len(tgt) >= tgtOffset+s { + ch := adler32.Checksum(tgt[tgtOffset : tgtOffset+s]) + var ok bool + srcOffset, ok = sindex[ch] + if !ok { + return + } + + l = matchLength(src, tgt, tgtOffset, srcOffset) + } + + return +} + +func matchLength(src, tgt []byte, otgt, osrc int) int { + l := 0 + for { + if (osrc >= len(src) || otgt >= len(tgt)) || src[osrc] != tgt[otgt] { + break + } + + l++ + osrc++ + otgt++ } - return outBuff + return l } func deltaEncodeSize(size int) []byte { |