aboutsummaryrefslogtreecommitdiffstats
path: root/formats/packfile/delta.go
diff options
context:
space:
mode:
authorAlberto Cortés <alcortesm@gmail.com>2016-07-04 17:09:22 +0200
committerMáximo Cuadros <mcuadros@gmail.com>2016-07-04 17:09:22 +0200
commit5e73f01cb2e027a8f02801635b79d3a9bc866914 (patch)
treec0e7eb355c9b8633d99bab9295cb72b6c3a9c0e1 /formats/packfile/delta.go
parent808076af869550a200a3a544c9ee2fa22a8b6a85 (diff)
downloadgo-git-5e73f01cb2e027a8f02801635b79d3a9bc866914.tar.gz
Adds support to open local repositories and to use file-based object storage (#55)v3.1.0
* remove some comments * idx writer/reader * Shut up ssh tests, they are annoying * Add file scheme test to clients * Add dummy file client * Add test fot file client * Make tests use fixture endpoint * add parser for packed-refs format * add parser for packed-refs format * WIP adding dir.Refs() tests * Add test for fixture refs * refs parser for the refs directory * Documentation * Add Capabilities to file client * tgz.Exatract now accpets a path instead of a Reader * fix bug in idxfile fanout calculation * remove dead code * packfile documentation * clean packfile parser code * add core.Object.Content() and returns errors for core.ObjectStorage.Iter() * add seekable storage * add dir repos to NewRepository * clean prints * Add dir client documentation to README * Organize the README * README * Clean tgz package * Clean temp dirs after tgz tests * Gometalinter on gitdir * Clean pattern function * metalinter tgz * metalinter gitdir * gitdir coverage and remove seekable packfile filedescriptor leak * gitdir Idxfile tests and remove file descriptor leak * gitdir Idxfile tests when no idx is found * clean storage/seekable/internal/index and some formats/idxfile API issues * clean storage/seekable * clean formats/idx * turn packfile/doc.go into packfile/doc.txt * move formats/packfile/reader to decoder * fix packfile decoder error names * improve documentation * comment packfile decoder errors * comment public API (format/packfile) * remve duplicated code in packfile decoder test * move tracking_reader into an internal package and clean it * use iota for packfile format * rename packfile parse.go to packfile object_at.go * clean packfile deltas * fix delta header size bug * improve delta documentation * clean packfile deltas * clean packfiles deltas * clean repository.go * Remove go 1.5 from Travis CI Because go 1.5 does not suport internal packages. * change local repo scheme to local:// * change "local://" to "file://" as the local scheme * fix broken indentation * shortens names of variables in short scopes * more shortening of variable names * more shortening of variable names * Rename git dir client to "file", as the scheme used for it * Fix file format ctor name, now that the package name has change * Sortcut local repo constructor to not use remotes The object storage is build directly in the repository ctor, instead of creating a remote and waiting for the user to pull it. * update README and fix some errors in it * remove file scheme client * Local respositories has now a new ctor This is, they are no longer identified by the scheme of the URL, but are created different from inception. * remove unused URL field form Repository * move all git dir logic to seekable sotrage ctor * fix documentation * Make formats/file/dir an internal package to storage/seekable * change package storage/seekable to storage/fs * clean storage/fs * overall storage/fs clean * more cleaning * some metalinter fixes * upgrade cshared to last changes * remove dead code * fix test error info * remove file scheme check from clients * fix test error message * fix test error message * fix error messages * style changes * fix comments everywhere * style changes * style changes * scaffolding and tests for local packfiles without ifx files * outsource index building from packfile to the packfile decoder * refactor packfile header reading into a new function * move code to generate index from packfile back to index package * add header parsing * fix documentation errata * add undeltified and OFS delta support for index building from the packfile * add tests for packfile with ref-deltas * support for packfiles with ref-deltas and no idx * refactor packfile format parser to reuse code * refactor packfile format parser to reuse code * refactor packfile format parser to reuse code * refactor packfile format parser to reuse code * refactor packfile format parser to reuse code * WIP refactor packfile format parser to reuse code * refactor packfile format parser to reuse code * remove prints from tests * remove prints from tests * refactor packfile.core into packfile.parser * rename packfile reader to something that shows it is a recaller * rename cannot recall error * rename packfile.Reader to packfile.ReadRecaller and document * speed up test by using StreamReader instead of SeekableReader when possible * clean packfile StreamReader * stream_reader tests * refactor packfile.StreamReader into packfile.StreamReadRecaller * refactor packfile.SeekableReader into packfile.SeekableReadRecaller and document it * generalize packfile.StreamReadRecaller test to all packfile.ReadRecaller implementations * speed up storage/fs tests * speed up tests in . by loading packfiles in memory * speed up repository tests by using and smaller fixture * restore doc.go files * rename packfile.ReadRecaller implementations to shorter names * update comments to type changes * packfile.Parser test (WIP) * packfile.Parser tests and add ForgetAll() to packfile.ReadRecaller * add test for packfile.ReadRecaller.ForgetAll() * clarify seekable being able to recallByOffset forgetted objects * use better names for internal maps * metalinter packfile package * speed up some tests * documentation fixes * change storage.fs package name to storage.proxy to avoid confusion with new filesystem support * New fs package and os transparent implementation Now NewRepositoryFromFS receives a fs and a path and tests are modified accordingly, but it is still not using for anything. * add fs to gitdir and proxy.store * reduce fs interface for easier implementation * remove garbage dirs from tgz tests * change file name gitdir/dir.go to gitdir/gitdir.go * fs.OS tests * metalinter utils/fs * add NewRepositoryFromFS documentation to README * Readability fixes to README * move tgz to an external dependency * move filesystem impl. example to example dir * rename proxy/store.go to proxy/storage.go for coherence with memory/storage.go * rename proxy package to seekable
Diffstat (limited to 'formats/packfile/delta.go')
-rw-r--r--formats/packfile/delta.go195
1 files changed, 113 insertions, 82 deletions
diff --git a/formats/packfile/delta.go b/formats/packfile/delta.go
index 571ccf8..e0bbb65 100644
--- a/formats/packfile/delta.go
+++ b/formats/packfile/delta.go
@@ -1,117 +1,148 @@
package packfile
-import "io"
+// See https://github.com/git/git/blob/49fa3dc76179e04b0833542fa52d0f287a4955ac/delta.h
+// https://github.com/git/git/blob/c2c5f6b1e479f2c38e0e01345350620944e3527f/patch-delta.c,
+// and https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js
+// for details about the delta format.
const deltaSizeMin = 4
-func deltaHeaderSize(b []byte) (uint, []byte) {
- var size, j uint
- var cmd byte
- for {
- cmd = b[j]
- size |= (uint(cmd) & 0x7f) << (j * 7)
- j++
- if uint(cmd)&0xb80 == 0 || j == uint(len(b)) {
- break
- }
- }
- return size, b[j:]
-}
-
-func patchDelta(src, delta []byte) []byte {
+// PatchDelta returns the result of applying the modification deltas in delta to src.
+func PatchDelta(src, delta []byte) []byte {
if len(delta) < deltaSizeMin {
return nil
}
- size, delta := deltaHeaderSize(delta)
- if size != uint(len(src)) {
+
+ srcSz, delta := decodeLEB128(delta)
+ if srcSz != uint(len(src)) {
return nil
}
- size, delta = deltaHeaderSize(delta)
- origSize := size
- dest := make([]byte, 0)
+ targetSz, delta := decodeLEB128(delta)
+ remainingTargetSz := targetSz
- // var offset uint
+ var dest []byte
var cmd byte
for {
cmd = delta[0]
delta = delta[1:]
- if (cmd & 0x80) != 0 {
- var cp_off, cp_size uint
- if (cmd & 0x01) != 0 {
- cp_off = uint(delta[0])
- delta = delta[1:]
- }
- if (cmd & 0x02) != 0 {
- cp_off |= uint(delta[0]) << 8
- delta = delta[1:]
- }
- if (cmd & 0x04) != 0 {
- cp_off |= uint(delta[0]) << 16
- delta = delta[1:]
- }
- if (cmd & 0x08) != 0 {
- cp_off |= uint(delta[0]) << 24
- delta = delta[1:]
- }
-
- if (cmd & 0x10) != 0 {
- cp_size = uint(delta[0])
- delta = delta[1:]
- }
- if (cmd & 0x20) != 0 {
- cp_size |= uint(delta[0]) << 8
- delta = delta[1:]
- }
- if (cmd & 0x40) != 0 {
- cp_size |= uint(delta[0]) << 16
- delta = delta[1:]
- }
- if cp_size == 0 {
- cp_size = 0x10000
- }
- if cp_off+cp_size < cp_off ||
- cp_off+cp_size > uint(len(src)) ||
- cp_size > origSize {
+ if isCopyFromSrc(cmd) {
+ var offset, sz uint
+ offset, delta = decodeOffset(cmd, delta)
+ sz, delta = decodeSize(cmd, delta)
+ if invalidSize(sz, targetSz) ||
+ invalidOffsetSize(offset, sz, srcSz) {
break
}
- dest = append(dest, src[cp_off:cp_off+cp_size]...)
- size -= cp_size
- } else if cmd != 0 {
- if uint(cmd) > origSize {
+ dest = append(dest, src[offset:offset+sz]...)
+ remainingTargetSz -= sz
+ } else if isCopyFromDelta(cmd) {
+ sz := uint(cmd) // cmd is the size itself
+ if invalidSize(sz, targetSz) {
break
}
- dest = append(dest, delta[0:uint(cmd)]...)
- size -= uint(cmd)
- delta = delta[uint(cmd):]
+ dest = append(dest, delta[0:sz]...)
+ remainingTargetSz -= sz
+ delta = delta[sz:]
} else {
return nil
}
- if size <= 0 {
+
+ if remainingTargetSz <= 0 {
break
}
}
+
return dest
}
-func decodeOffset(src io.ByteReader, steps int64) (int64, error) {
- b, err := src.ReadByte()
- if err != nil {
- return 0, err
- }
+// Decodes a number encoded as an unsigned LEB128 at the start of some
+// binary data and returns the decoded number and the rest of the
+// stream.
+//
+// This must be called twice on the delta data buffer, first to get the
+// expected source buffer size, and again to get the target buffer size.
+func decodeLEB128(input []byte) (uint, []byte) {
+ var num, sz uint
+ var b byte
+ for {
+ b = input[sz]
+ num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks
+ sz++
- var offset = int64(b & 0x7f)
- for (b & 0x80) != 0 {
- offset++ // WHY?
- b, err = src.ReadByte()
- if err != nil {
- return 0, err
+ if uint(b)&continuation == 0 || sz == uint(len(input)) {
+ break
}
+ }
+
+ return num, input[sz:]
+}
+
+const (
+ payload = 0x7f // 0111 1111
+ continuation = 0x80 // 1000 0000
+)
- offset = (offset << 7) + int64(b&0x7f)
+func isCopyFromSrc(cmd byte) bool {
+ return (cmd & 0x80) != 0
+}
+
+func isCopyFromDelta(cmd byte) bool {
+ return (cmd&0x80) == 0 && cmd != 0
+}
+
+func decodeOffset(cmd byte, delta []byte) (uint, []byte) {
+ var offset uint
+ if (cmd & 0x01) != 0 {
+ offset = uint(delta[0])
+ delta = delta[1:]
+ }
+ if (cmd & 0x02) != 0 {
+ offset |= uint(delta[0]) << 8
+ delta = delta[1:]
+ }
+ if (cmd & 0x04) != 0 {
+ offset |= uint(delta[0]) << 16
+ delta = delta[1:]
+ }
+ if (cmd & 0x08) != 0 {
+ offset |= uint(delta[0]) << 24
+ delta = delta[1:]
+ }
+
+ return offset, delta
+}
+
+func decodeSize(cmd byte, delta []byte) (uint, []byte) {
+ var sz uint
+ if (cmd & 0x10) != 0 {
+ sz = uint(delta[0])
+ delta = delta[1:]
+ }
+ if (cmd & 0x20) != 0 {
+ sz |= uint(delta[0]) << 8
+ delta = delta[1:]
+ }
+ if (cmd & 0x40) != 0 {
+ sz |= uint(delta[0]) << 16
+ delta = delta[1:]
+ }
+ if sz == 0 {
+ sz = 0x10000
}
- // offset needs to be aware of the bytes we read for `o.typ` and `o.size`
- offset += steps
- return -offset, nil
+ return sz, delta
+}
+
+func invalidSize(sz, targetSz uint) bool {
+ return sz > targetSz
+}
+
+func invalidOffsetSize(offset, sz, srcSz uint) bool {
+ return sumOverflows(offset, sz) ||
+ offset+sz > srcSz
+}
+
+func sumOverflows(a, b uint) bool {
+ return a+b < a
}