diff options
Diffstat (limited to 'formats/packfile/delta.go')
-rw-r--r-- | formats/packfile/delta.go | 195 |
1 files changed, 113 insertions, 82 deletions
diff --git a/formats/packfile/delta.go b/formats/packfile/delta.go index 571ccf8..e0bbb65 100644 --- a/formats/packfile/delta.go +++ b/formats/packfile/delta.go @@ -1,117 +1,148 @@ package packfile -import "io" +// See https://github.com/git/git/blob/49fa3dc76179e04b0833542fa52d0f287a4955ac/delta.h +// https://github.com/git/git/blob/c2c5f6b1e479f2c38e0e01345350620944e3527f/patch-delta.c, +// and https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js +// for details about the delta format. const deltaSizeMin = 4 -func deltaHeaderSize(b []byte) (uint, []byte) { - var size, j uint - var cmd byte - for { - cmd = b[j] - size |= (uint(cmd) & 0x7f) << (j * 7) - j++ - if uint(cmd)&0xb80 == 0 || j == uint(len(b)) { - break - } - } - return size, b[j:] -} - -func patchDelta(src, delta []byte) []byte { +// PatchDelta returns the result of applying the modification deltas in delta to src. +func PatchDelta(src, delta []byte) []byte { if len(delta) < deltaSizeMin { return nil } - size, delta := deltaHeaderSize(delta) - if size != uint(len(src)) { + + srcSz, delta := decodeLEB128(delta) + if srcSz != uint(len(src)) { return nil } - size, delta = deltaHeaderSize(delta) - origSize := size - dest := make([]byte, 0) + targetSz, delta := decodeLEB128(delta) + remainingTargetSz := targetSz - // var offset uint + var dest []byte var cmd byte for { cmd = delta[0] delta = delta[1:] - if (cmd & 0x80) != 0 { - var cp_off, cp_size uint - if (cmd & 0x01) != 0 { - cp_off = uint(delta[0]) - delta = delta[1:] - } - if (cmd & 0x02) != 0 { - cp_off |= uint(delta[0]) << 8 - delta = delta[1:] - } - if (cmd & 0x04) != 0 { - cp_off |= uint(delta[0]) << 16 - delta = delta[1:] - } - if (cmd & 0x08) != 0 { - cp_off |= uint(delta[0]) << 24 - delta = delta[1:] - } - - if (cmd & 0x10) != 0 { - cp_size = uint(delta[0]) - delta = delta[1:] - } - if (cmd & 0x20) != 0 { - cp_size |= uint(delta[0]) << 8 - delta = delta[1:] - } - if (cmd & 0x40) != 0 { - cp_size |= uint(delta[0]) << 16 - delta = delta[1:] - } - if cp_size == 0 { - cp_size = 0x10000 - } - if cp_off+cp_size < cp_off || - cp_off+cp_size > uint(len(src)) || - cp_size > origSize { + if isCopyFromSrc(cmd) { + var offset, sz uint + offset, delta = decodeOffset(cmd, delta) + sz, delta = decodeSize(cmd, delta) + if invalidSize(sz, targetSz) || + invalidOffsetSize(offset, sz, srcSz) { break } - dest = append(dest, src[cp_off:cp_off+cp_size]...) - size -= cp_size - } else if cmd != 0 { - if uint(cmd) > origSize { + dest = append(dest, src[offset:offset+sz]...) + remainingTargetSz -= sz + } else if isCopyFromDelta(cmd) { + sz := uint(cmd) // cmd is the size itself + if invalidSize(sz, targetSz) { break } - dest = append(dest, delta[0:uint(cmd)]...) - size -= uint(cmd) - delta = delta[uint(cmd):] + dest = append(dest, delta[0:sz]...) + remainingTargetSz -= sz + delta = delta[sz:] } else { return nil } - if size <= 0 { + + if remainingTargetSz <= 0 { break } } + return dest } -func decodeOffset(src io.ByteReader, steps int64) (int64, error) { - b, err := src.ReadByte() - if err != nil { - return 0, err - } +// Decodes a number encoded as an unsigned LEB128 at the start of some +// binary data and returns the decoded number and the rest of the +// stream. +// +// This must be called twice on the delta data buffer, first to get the +// expected source buffer size, and again to get the target buffer size. +func decodeLEB128(input []byte) (uint, []byte) { + var num, sz uint + var b byte + for { + b = input[sz] + num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks + sz++ - var offset = int64(b & 0x7f) - for (b & 0x80) != 0 { - offset++ // WHY? - b, err = src.ReadByte() - if err != nil { - return 0, err + if uint(b)&continuation == 0 || sz == uint(len(input)) { + break } + } + + return num, input[sz:] +} + +const ( + payload = 0x7f // 0111 1111 + continuation = 0x80 // 1000 0000 +) - offset = (offset << 7) + int64(b&0x7f) +func isCopyFromSrc(cmd byte) bool { + return (cmd & 0x80) != 0 +} + +func isCopyFromDelta(cmd byte) bool { + return (cmd&0x80) == 0 && cmd != 0 +} + +func decodeOffset(cmd byte, delta []byte) (uint, []byte) { + var offset uint + if (cmd & 0x01) != 0 { + offset = uint(delta[0]) + delta = delta[1:] + } + if (cmd & 0x02) != 0 { + offset |= uint(delta[0]) << 8 + delta = delta[1:] + } + if (cmd & 0x04) != 0 { + offset |= uint(delta[0]) << 16 + delta = delta[1:] + } + if (cmd & 0x08) != 0 { + offset |= uint(delta[0]) << 24 + delta = delta[1:] + } + + return offset, delta +} + +func decodeSize(cmd byte, delta []byte) (uint, []byte) { + var sz uint + if (cmd & 0x10) != 0 { + sz = uint(delta[0]) + delta = delta[1:] + } + if (cmd & 0x20) != 0 { + sz |= uint(delta[0]) << 8 + delta = delta[1:] + } + if (cmd & 0x40) != 0 { + sz |= uint(delta[0]) << 16 + delta = delta[1:] + } + if sz == 0 { + sz = 0x10000 } - // offset needs to be aware of the bytes we read for `o.typ` and `o.size` - offset += steps - return -offset, nil + return sz, delta +} + +func invalidSize(sz, targetSz uint) bool { + return sz > targetSz +} + +func invalidOffsetSize(offset, sz, srcSz uint) bool { + return sumOverflows(offset, sz) || + offset+sz > srcSz +} + +func sumOverflows(a, b uint) bool { + return a+b < a } |