aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/format/packfile
diff options
context:
space:
mode:
Diffstat (limited to 'plumbing/format/packfile')
-rw-r--r--plumbing/format/packfile/decoder.go14
-rw-r--r--plumbing/format/packfile/decoder_test.go26
-rw-r--r--plumbing/format/packfile/delta_index.go297
-rw-r--r--plumbing/format/packfile/delta_selector.go94
-rw-r--r--plumbing/format/packfile/delta_selector_test.go31
-rw-r--r--plumbing/format/packfile/diff_delta.go96
-rw-r--r--plumbing/format/packfile/encoder.go23
-rw-r--r--plumbing/format/packfile/encoder_advanced_test.go19
-rw-r--r--plumbing/format/packfile/encoder_test.go10
-rw-r--r--plumbing/format/packfile/object_pack.go6
-rw-r--r--plumbing/format/packfile/patch_delta.go7
-rw-r--r--plumbing/format/packfile/scanner_test.go2
12 files changed, 509 insertions, 116 deletions
diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go
index 3d475b2..ad72ea0 100644
--- a/plumbing/format/packfile/decoder.go
+++ b/plumbing/format/packfile/decoder.go
@@ -105,7 +105,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
o: o,
idx: NewIndex(0),
- offsetToType: make(map[int64]plumbing.ObjectType, 0),
+ offsetToType: make(map[int64]plumbing.ObjectType),
decoderType: t,
}, nil
}
@@ -207,12 +207,16 @@ func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error {
// constructor, if the object decoded is not equals to the specified one, nil will
// be returned
func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) {
+ return d.doDecodeObject(d.decoderType)
+}
+
+func (d *Decoder) doDecodeObject(t plumbing.ObjectType) (plumbing.EncodedObject, error) {
h, err := d.s.NextObjectHeader()
if err != nil {
return nil, err
}
- if d.decoderType == plumbing.AnyObject {
+ if t == plumbing.AnyObject {
return d.decodeByHeader(h)
}
@@ -279,6 +283,7 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error
obj := d.newObject()
obj.SetSize(h.Length)
obj.SetType(h.Type)
+
var crc uint32
var err error
switch h.Type {
@@ -315,7 +320,8 @@ func (d *Decoder) newObject() plumbing.EncodedObject {
// returned is added into a internal index. This is intended to be able to regenerate
// objects from deltas (offset deltas or reference deltas) without an package index
// (.idx file). If Decode wasn't called previously objects offset should provided
-// using the SetOffsets method.
+// using the SetOffsets method. It decodes the object regardless of the Decoder
+// type.
func (d *Decoder) DecodeObjectAt(offset int64) (plumbing.EncodedObject, error) {
if !d.s.IsSeekable {
return nil, ErrNonSeekable
@@ -333,7 +339,7 @@ func (d *Decoder) DecodeObjectAt(offset int64) (plumbing.EncodedObject, error) {
}
}()
- return d.DecodeObject()
+ return d.doDecodeObject(plumbing.AnyObject)
}
func (d *Decoder) fillRegularObjectContent(obj plumbing.EncodedObject) (uint32, error) {
diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go
index ecf7c81..1a1a74a 100644
--- a/plumbing/format/packfile/decoder_test.go
+++ b/plumbing/format/packfile/decoder_test.go
@@ -3,9 +3,6 @@ package packfile_test
import (
"io"
- "gopkg.in/src-d/go-billy.v3/memfs"
-
- "github.com/src-d/go-git-fixtures"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
@@ -14,6 +11,8 @@ import (
"gopkg.in/src-d/go-git.v4/storage/memory"
. "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-billy.v4/memfs"
+ "gopkg.in/src-d/go-git-fixtures.v3"
)
type ReaderSuite struct {
@@ -293,7 +292,7 @@ func (s *ReaderSuite) TestDecodeCRCs(c *C) {
c.Assert(int(sum), Equals, 78022211966)
}
-func (s *ReaderSuite) TestReadObjectAt(c *C) {
+func (s *ReaderSuite) TestDecodeObjectAt(c *C) {
f := fixtures.Basic().One()
scanner := packfile.NewScanner(f.Packfile())
d, err := packfile.NewDecoder(scanner, nil)
@@ -311,6 +310,25 @@ func (s *ReaderSuite) TestReadObjectAt(c *C) {
c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5")
}
+func (s *ReaderSuite) TestDecodeObjectAtForType(c *C) {
+ f := fixtures.Basic().One()
+ scanner := packfile.NewScanner(f.Packfile())
+ d, err := packfile.NewDecoderForType(scanner, nil, plumbing.TreeObject)
+ c.Assert(err, IsNil)
+
+ // when the packfile is ref-delta based, the offsets are required
+ if f.Is("ref-delta") {
+ d.SetIndex(getIndexFromIdxFile(f.Idx()))
+ }
+
+ // the objects at reference 186, is a delta, so should be recall,
+ // without being read before.
+ obj, err := d.DecodeObjectAt(186)
+ c.Assert(err, IsNil)
+ c.Assert(obj.Type(), Equals, plumbing.CommitObject)
+ c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5")
+}
+
func (s *ReaderSuite) TestIndex(c *C) {
f := fixtures.Basic().One()
scanner := packfile.NewScanner(f.Packfile())
diff --git a/plumbing/format/packfile/delta_index.go b/plumbing/format/packfile/delta_index.go
new file mode 100644
index 0000000..07a6112
--- /dev/null
+++ b/plumbing/format/packfile/delta_index.go
@@ -0,0 +1,297 @@
+package packfile
+
+const blksz = 16
+const maxChainLength = 64
+
+// deltaIndex is a modified version of JGit's DeltaIndex adapted to our current
+// design.
+type deltaIndex struct {
+ table []int
+ entries []int
+ mask int
+}
+
+func (idx *deltaIndex) init(buf []byte) {
+ scanner := newDeltaIndexScanner(buf, len(buf))
+ idx.mask = scanner.mask
+ idx.table = scanner.table
+ idx.entries = make([]int, countEntries(scanner)+1)
+ idx.copyEntries(scanner)
+}
+
+// findMatch returns the offset of src where the block starting at tgtOffset
+// is and the length of the match. A length of 0 means there was no match. A
+// length of -1 means the src length is lower than the blksz and whatever
+// other positive length is the length of the match in bytes.
+func (idx *deltaIndex) findMatch(src, tgt []byte, tgtOffset int) (srcOffset, l int) {
+ if len(tgt) < tgtOffset+s {
+ return 0, len(tgt) - tgtOffset
+ }
+
+ if len(src) < blksz {
+ return 0, -1
+ }
+
+ if len(tgt) >= tgtOffset+s && len(src) >= blksz {
+ h := hashBlock(tgt, tgtOffset)
+ tIdx := h & idx.mask
+ eIdx := idx.table[tIdx]
+ if eIdx != 0 {
+ srcOffset = idx.entries[eIdx]
+ } else {
+ return
+ }
+
+ l = matchLength(src, tgt, tgtOffset, srcOffset)
+ }
+
+ return
+}
+
+func matchLength(src, tgt []byte, otgt, osrc int) (l int) {
+ lensrc := len(src)
+ lentgt := len(tgt)
+ for (osrc < lensrc && otgt < lentgt) && src[osrc] == tgt[otgt] {
+ l++
+ osrc++
+ otgt++
+ }
+ return
+}
+
+func countEntries(scan *deltaIndexScanner) (cnt int) {
+ // Figure out exactly how many entries we need. As we do the
+ // enumeration truncate any delta chains longer than what we
+ // are willing to scan during encode. This keeps the encode
+ // logic linear in the size of the input rather than quadratic.
+ for i := 0; i < len(scan.table); i++ {
+ h := scan.table[i]
+ if h == 0 {
+ continue
+ }
+
+ size := 0
+ for {
+ size++
+ if size == maxChainLength {
+ scan.next[h] = 0
+ break
+ }
+ h = scan.next[h]
+
+ if h == 0 {
+ break
+ }
+ }
+ cnt += size
+ }
+
+ return
+}
+
+func (idx *deltaIndex) copyEntries(scanner *deltaIndexScanner) {
+ // Rebuild the entries list from the scanner, positioning all
+ // blocks in the same hash chain next to each other. We can
+ // then later discard the next list, along with the scanner.
+ //
+ next := 1
+ for i := 0; i < len(idx.table); i++ {
+ h := idx.table[i]
+ if h == 0 {
+ continue
+ }
+
+ idx.table[i] = next
+ for {
+ idx.entries[next] = scanner.entries[h]
+ next++
+ h = scanner.next[h]
+
+ if h == 0 {
+ break
+ }
+ }
+ }
+}
+
+type deltaIndexScanner struct {
+ table []int
+ entries []int
+ next []int
+ mask int
+ count int
+}
+
+func newDeltaIndexScanner(buf []byte, size int) *deltaIndexScanner {
+ size -= size % blksz
+ worstCaseBlockCnt := size / blksz
+ if worstCaseBlockCnt < 1 {
+ return new(deltaIndexScanner)
+ }
+
+ tableSize := tableSize(worstCaseBlockCnt)
+ scanner := &deltaIndexScanner{
+ table: make([]int, tableSize),
+ mask: tableSize - 1,
+ entries: make([]int, worstCaseBlockCnt+1),
+ next: make([]int, worstCaseBlockCnt+1),
+ }
+
+ scanner.scan(buf, size)
+ return scanner
+}
+
+// slightly modified version of JGit's DeltaIndexScanner. We store the offset on the entries
+// instead of the entries and the key, so we avoid operations to retrieve the offset later, as
+// we don't use the key.
+// See: https://github.com/eclipse/jgit/blob/005e5feb4ecd08c4e4d141a38b9e7942accb3212/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaIndexScanner.java
+func (s *deltaIndexScanner) scan(buf []byte, end int) {
+ lastHash := 0
+ ptr := end - blksz
+
+ for {
+ key := hashBlock(buf, ptr)
+ tIdx := key & s.mask
+ head := s.table[tIdx]
+ if head != 0 && lastHash == key {
+ s.entries[head] = ptr
+ } else {
+ s.count++
+ eIdx := s.count
+ s.entries[eIdx] = ptr
+ s.next[eIdx] = head
+ s.table[tIdx] = eIdx
+ }
+
+ lastHash = key
+ ptr -= blksz
+
+ if 0 > ptr {
+ break
+ }
+ }
+}
+
+func tableSize(worstCaseBlockCnt int) int {
+ shift := 32 - leadingZeros(uint32(worstCaseBlockCnt))
+ sz := 1 << uint(shift-1)
+ if sz < worstCaseBlockCnt {
+ sz <<= 1
+ }
+ return sz
+}
+
+// use https://golang.org/pkg/math/bits/#LeadingZeros32 in the future
+func leadingZeros(x uint32) (n int) {
+ if x >= 1<<16 {
+ x >>= 16
+ n = 16
+ }
+ if x >= 1<<8 {
+ x >>= 8
+ n += 8
+ }
+ n += int(len8tab[x])
+ return 32 - n
+}
+
+var len8tab = [256]uint8{
+ 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+}
+
+func hashBlock(raw []byte, ptr int) int {
+ // The first 4 steps collapse out into a 4 byte big-endian decode,
+ // with a larger right shift as we combined shift lefts together.
+ //
+ hash := ((uint32(raw[ptr]) & 0xff) << 24) |
+ ((uint32(raw[ptr+1]) & 0xff) << 16) |
+ ((uint32(raw[ptr+2]) & 0xff) << 8) |
+ (uint32(raw[ptr+3]) & 0xff)
+ hash ^= T[hash>>31]
+
+ hash = ((hash << 8) | (uint32(raw[ptr+4]) & 0xff)) ^ T[hash>>23]
+ hash = ((hash << 8) | (uint32(raw[ptr+5]) & 0xff)) ^ T[hash>>23]
+ hash = ((hash << 8) | (uint32(raw[ptr+6]) & 0xff)) ^ T[hash>>23]
+ hash = ((hash << 8) | (uint32(raw[ptr+7]) & 0xff)) ^ T[hash>>23]
+
+ hash = ((hash << 8) | (uint32(raw[ptr+8]) & 0xff)) ^ T[hash>>23]
+ hash = ((hash << 8) | (uint32(raw[ptr+9]) & 0xff)) ^ T[hash>>23]
+ hash = ((hash << 8) | (uint32(raw[ptr+10]) & 0xff)) ^ T[hash>>23]
+ hash = ((hash << 8) | (uint32(raw[ptr+11]) & 0xff)) ^ T[hash>>23]
+
+ hash = ((hash << 8) | (uint32(raw[ptr+12]) & 0xff)) ^ T[hash>>23]
+ hash = ((hash << 8) | (uint32(raw[ptr+13]) & 0xff)) ^ T[hash>>23]
+ hash = ((hash << 8) | (uint32(raw[ptr+14]) & 0xff)) ^ T[hash>>23]
+ hash = ((hash << 8) | (uint32(raw[ptr+15]) & 0xff)) ^ T[hash>>23]
+
+ return int(hash)
+}
+
+var T = []uint32{0x00000000, 0xd4c6b32d, 0x7d4bd577,
+ 0xa98d665a, 0x2e5119c3, 0xfa97aaee, 0x531accb4, 0x87dc7f99,
+ 0x5ca23386, 0x886480ab, 0x21e9e6f1, 0xf52f55dc, 0x72f32a45,
+ 0xa6359968, 0x0fb8ff32, 0xdb7e4c1f, 0x6d82d421, 0xb944670c,
+ 0x10c90156, 0xc40fb27b, 0x43d3cde2, 0x97157ecf, 0x3e981895,
+ 0xea5eabb8, 0x3120e7a7, 0xe5e6548a, 0x4c6b32d0, 0x98ad81fd,
+ 0x1f71fe64, 0xcbb74d49, 0x623a2b13, 0xb6fc983e, 0x0fc31b6f,
+ 0xdb05a842, 0x7288ce18, 0xa64e7d35, 0x219202ac, 0xf554b181,
+ 0x5cd9d7db, 0x881f64f6, 0x536128e9, 0x87a79bc4, 0x2e2afd9e,
+ 0xfaec4eb3, 0x7d30312a, 0xa9f68207, 0x007be45d, 0xd4bd5770,
+ 0x6241cf4e, 0xb6877c63, 0x1f0a1a39, 0xcbcca914, 0x4c10d68d,
+ 0x98d665a0, 0x315b03fa, 0xe59db0d7, 0x3ee3fcc8, 0xea254fe5,
+ 0x43a829bf, 0x976e9a92, 0x10b2e50b, 0xc4745626, 0x6df9307c,
+ 0xb93f8351, 0x1f8636de, 0xcb4085f3, 0x62cde3a9, 0xb60b5084,
+ 0x31d72f1d, 0xe5119c30, 0x4c9cfa6a, 0x985a4947, 0x43240558,
+ 0x97e2b675, 0x3e6fd02f, 0xeaa96302, 0x6d751c9b, 0xb9b3afb6,
+ 0x103ec9ec, 0xc4f87ac1, 0x7204e2ff, 0xa6c251d2, 0x0f4f3788,
+ 0xdb8984a5, 0x5c55fb3c, 0x88934811, 0x211e2e4b, 0xf5d89d66,
+ 0x2ea6d179, 0xfa606254, 0x53ed040e, 0x872bb723, 0x00f7c8ba,
+ 0xd4317b97, 0x7dbc1dcd, 0xa97aaee0, 0x10452db1, 0xc4839e9c,
+ 0x6d0ef8c6, 0xb9c84beb, 0x3e143472, 0xead2875f, 0x435fe105,
+ 0x97995228, 0x4ce71e37, 0x9821ad1a, 0x31accb40, 0xe56a786d,
+ 0x62b607f4, 0xb670b4d9, 0x1ffdd283, 0xcb3b61ae, 0x7dc7f990,
+ 0xa9014abd, 0x008c2ce7, 0xd44a9fca, 0x5396e053, 0x8750537e,
+ 0x2edd3524, 0xfa1b8609, 0x2165ca16, 0xf5a3793b, 0x5c2e1f61,
+ 0x88e8ac4c, 0x0f34d3d5, 0xdbf260f8, 0x727f06a2, 0xa6b9b58f,
+ 0x3f0c6dbc, 0xebcade91, 0x4247b8cb, 0x96810be6, 0x115d747f,
+ 0xc59bc752, 0x6c16a108, 0xb8d01225, 0x63ae5e3a, 0xb768ed17,
+ 0x1ee58b4d, 0xca233860, 0x4dff47f9, 0x9939f4d4, 0x30b4928e,
+ 0xe47221a3, 0x528eb99d, 0x86480ab0, 0x2fc56cea, 0xfb03dfc7,
+ 0x7cdfa05e, 0xa8191373, 0x01947529, 0xd552c604, 0x0e2c8a1b,
+ 0xdaea3936, 0x73675f6c, 0xa7a1ec41, 0x207d93d8, 0xf4bb20f5,
+ 0x5d3646af, 0x89f0f582, 0x30cf76d3, 0xe409c5fe, 0x4d84a3a4,
+ 0x99421089, 0x1e9e6f10, 0xca58dc3d, 0x63d5ba67, 0xb713094a,
+ 0x6c6d4555, 0xb8abf678, 0x11269022, 0xc5e0230f, 0x423c5c96,
+ 0x96faefbb, 0x3f7789e1, 0xebb13acc, 0x5d4da2f2, 0x898b11df,
+ 0x20067785, 0xf4c0c4a8, 0x731cbb31, 0xa7da081c, 0x0e576e46,
+ 0xda91dd6b, 0x01ef9174, 0xd5292259, 0x7ca44403, 0xa862f72e,
+ 0x2fbe88b7, 0xfb783b9a, 0x52f55dc0, 0x8633eeed, 0x208a5b62,
+ 0xf44ce84f, 0x5dc18e15, 0x89073d38, 0x0edb42a1, 0xda1df18c,
+ 0x739097d6, 0xa75624fb, 0x7c2868e4, 0xa8eedbc9, 0x0163bd93,
+ 0xd5a50ebe, 0x52797127, 0x86bfc20a, 0x2f32a450, 0xfbf4177d,
+ 0x4d088f43, 0x99ce3c6e, 0x30435a34, 0xe485e919, 0x63599680,
+ 0xb79f25ad, 0x1e1243f7, 0xcad4f0da, 0x11aabcc5, 0xc56c0fe8,
+ 0x6ce169b2, 0xb827da9f, 0x3ffba506, 0xeb3d162b, 0x42b07071,
+ 0x9676c35c, 0x2f49400d, 0xfb8ff320, 0x5202957a, 0x86c42657,
+ 0x011859ce, 0xd5deeae3, 0x7c538cb9, 0xa8953f94, 0x73eb738b,
+ 0xa72dc0a6, 0x0ea0a6fc, 0xda6615d1, 0x5dba6a48, 0x897cd965,
+ 0x20f1bf3f, 0xf4370c12, 0x42cb942c, 0x960d2701, 0x3f80415b,
+ 0xeb46f276, 0x6c9a8def, 0xb85c3ec2, 0x11d15898, 0xc517ebb5,
+ 0x1e69a7aa, 0xcaaf1487, 0x632272dd, 0xb7e4c1f0, 0x3038be69,
+ 0xe4fe0d44, 0x4d736b1e, 0x99b5d833,
+}
diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go
index cc0ae0f..51adcdf 100644
--- a/plumbing/format/packfile/delta_selector.go
+++ b/plumbing/format/packfile/delta_selector.go
@@ -2,15 +2,13 @@ package packfile
import (
"sort"
+ "sync"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
const (
- // How far back in the sorted list to search for deltas. 10 is
- // the default in command line git.
- deltaWindowSize = 10
// deltas based on deltas, how many steps we can do.
// 50 is the default value used in JGit
maxDepth = int64(50)
@@ -30,27 +28,75 @@ func newDeltaSelector(s storer.EncodedObjectStorer) *deltaSelector {
return &deltaSelector{s}
}
-// ObjectsToPack creates a list of ObjectToPack from the hashes provided,
-// creating deltas if it's suitable, using an specific internal logic
-func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) {
- otp, err := dw.objectsToPack(hashes)
+// ObjectsToPack creates a list of ObjectToPack from the hashes
+// provided, creating deltas if it's suitable, using an specific
+// internal logic. `packWindow` specifies the size of the sliding
+// window used to compare objects for delta compression; 0 turns off
+// delta compression entirely.
+func (dw *deltaSelector) ObjectsToPack(
+ hashes []plumbing.Hash,
+ packWindow uint,
+) ([]*ObjectToPack, error) {
+ otp, err := dw.objectsToPack(hashes, packWindow)
if err != nil {
return nil, err
}
+ if packWindow == 0 {
+ return otp, nil
+ }
+
dw.sort(otp)
- if err := dw.walk(otp); err != nil {
+ var objectGroups [][]*ObjectToPack
+ var prev *ObjectToPack
+ i := -1
+ for _, obj := range otp {
+ if prev == nil || prev.Type() != obj.Type() {
+ objectGroups = append(objectGroups, []*ObjectToPack{obj})
+ i++
+ prev = obj
+ } else {
+ objectGroups[i] = append(objectGroups[i], obj)
+ }
+ }
+
+ var wg sync.WaitGroup
+ var once sync.Once
+ for _, objs := range objectGroups {
+ objs := objs
+ wg.Add(1)
+ go func() {
+ if walkErr := dw.walk(objs, packWindow); walkErr != nil {
+ once.Do(func() {
+ err = walkErr
+ })
+ }
+ wg.Done()
+ }()
+ }
+ wg.Wait()
+
+ if err != nil {
return nil, err
}
return otp, nil
}
-func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) {
+func (dw *deltaSelector) objectsToPack(
+ hashes []plumbing.Hash,
+ packWindow uint,
+) ([]*ObjectToPack, error) {
var objectsToPack []*ObjectToPack
for _, h := range hashes {
- o, err := dw.encodedDeltaObject(h)
+ var o plumbing.EncodedObject
+ var err error
+ if packWindow == 0 {
+ o, err = dw.encodedObject(h)
+ } else {
+ o, err = dw.encodedDeltaObject(h)
+ }
if err != nil {
return nil, err
}
@@ -63,6 +109,10 @@ func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack,
objectsToPack = append(objectsToPack, otp)
}
+ if packWindow == 0 {
+ return objectsToPack, nil
+ }
+
if err := dw.fixAndBreakChains(objectsToPack); err != nil {
return nil, err
}
@@ -171,8 +221,18 @@ func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) {
sort.Sort(byTypeAndSize(objectsToPack))
}
-func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
+func (dw *deltaSelector) walk(
+ objectsToPack []*ObjectToPack,
+ packWindow uint,
+) error {
+ indexMap := make(map[plumbing.Hash]*deltaIndex)
for i := 0; i < len(objectsToPack); i++ {
+ // Clean up the index map for anything outside our pack
+ // window, to save memory.
+ if i > int(packWindow) {
+ delete(indexMap, objectsToPack[i-int(packWindow)].Hash())
+ }
+
target := objectsToPack[i]
// If we already have a delta, we don't try to find a new one for this
@@ -187,7 +247,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
continue
}
- for j := i - 1; j >= 0 && i-j < deltaWindowSize; j-- {
+ for j := i - 1; j >= 0 && i-j < int(packWindow); j-- {
base := objectsToPack[j]
// Objects must use only the same type as their delta base.
// Since objectsToPack is sorted by type and size, once we find
@@ -196,7 +256,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
break
}
- if err := dw.tryToDeltify(base, target); err != nil {
+ if err := dw.tryToDeltify(indexMap, base, target); err != nil {
return err
}
}
@@ -205,7 +265,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error {
return nil
}
-func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error {
+func (dw *deltaSelector) tryToDeltify(indexMap map[plumbing.Hash]*deltaIndex, base, target *ObjectToPack) error {
// If the sizes are radically different, this is a bad pairing.
if target.Size() < base.Size()>>4 {
return nil
@@ -238,8 +298,12 @@ func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error {
return err
}
+ if _, ok := indexMap[base.Hash()]; !ok {
+ indexMap[base.Hash()] = new(deltaIndex)
+ }
+
// Now we can generate the delta using originals
- delta, err := GetDelta(base.Original, target.Original)
+ delta, err := getDelta(indexMap[base.Hash()], base.Original, target.Original)
if err != nil {
return err
}
diff --git a/plumbing/format/packfile/delta_selector_test.go b/plumbing/format/packfile/delta_selector_test.go
index ca4a96b..7d7fd0c 100644
--- a/plumbing/format/packfile/delta_selector_test.go
+++ b/plumbing/format/packfile/delta_selector_test.go
@@ -146,7 +146,8 @@ func (s *DeltaSelectorSuite) createTestObjects() {
func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
// Different type
hashes := []plumbing.Hash{s.hashes["base"], s.hashes["treeType"]}
- otp, err := s.ds.ObjectsToPack(hashes)
+ deltaWindowSize := uint(10)
+ otp, err := s.ds.ObjectsToPack(hashes, deltaWindowSize)
c.Assert(err, IsNil)
c.Assert(len(otp), Equals, 2)
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["base"]])
@@ -154,7 +155,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
// Size radically different
hashes = []plumbing.Hash{s.hashes["bigBase"], s.hashes["target"]}
- otp, err = s.ds.ObjectsToPack(hashes)
+ otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
c.Assert(err, IsNil)
c.Assert(len(otp), Equals, 2)
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["bigBase"]])
@@ -162,7 +163,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
// Delta Size Limit with no best delta yet
hashes = []plumbing.Hash{s.hashes["smallBase"], s.hashes["smallTarget"]}
- otp, err = s.ds.ObjectsToPack(hashes)
+ otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
c.Assert(err, IsNil)
c.Assert(len(otp), Equals, 2)
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["smallBase"]])
@@ -170,7 +171,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
// It will create the delta
hashes = []plumbing.Hash{s.hashes["base"], s.hashes["target"]}
- otp, err = s.ds.ObjectsToPack(hashes)
+ otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
c.Assert(err, IsNil)
c.Assert(len(otp), Equals, 2)
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["target"]])
@@ -185,7 +186,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
s.hashes["o2"],
s.hashes["o3"],
}
- otp, err = s.ds.ObjectsToPack(hashes)
+ otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize)
c.Assert(err, IsNil)
c.Assert(len(otp), Equals, 3)
c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["o1"]])
@@ -201,20 +202,32 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) {
// a delta.
hashes = make([]plumbing.Hash, 0, deltaWindowSize+2)
hashes = append(hashes, s.hashes["base"])
- for i := 0; i < deltaWindowSize; i++ {
+ for i := uint(0); i < deltaWindowSize; i++ {
hashes = append(hashes, s.hashes["smallTarget"])
}
hashes = append(hashes, s.hashes["target"])
// Don't sort so we can easily check the sliding window without
// creating a bunch of new objects.
- otp, err = s.ds.objectsToPack(hashes)
+ otp, err = s.ds.objectsToPack(hashes, deltaWindowSize)
c.Assert(err, IsNil)
- err = s.ds.walk(otp)
+ err = s.ds.walk(otp, deltaWindowSize)
c.Assert(err, IsNil)
- c.Assert(len(otp), Equals, deltaWindowSize+2)
+ c.Assert(len(otp), Equals, int(deltaWindowSize)+2)
targetIdx := len(otp) - 1
c.Assert(otp[targetIdx].IsDelta(), Equals, false)
+
+ // Check that no deltas are created, and the objects are unsorted,
+ // if compression is off.
+ hashes = []plumbing.Hash{s.hashes["base"], s.hashes["target"]}
+ otp, err = s.ds.ObjectsToPack(hashes, 0)
+ c.Assert(err, IsNil)
+ c.Assert(len(otp), Equals, 2)
+ c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["base"]])
+ c.Assert(otp[0].IsDelta(), Equals, false)
+ c.Assert(otp[1].Original, Equals, s.store.Objects[s.hashes["target"]])
+ c.Assert(otp[1].IsDelta(), Equals, false)
+ c.Assert(otp[1].Depth, Equals, 0)
}
func (s *DeltaSelectorSuite) TestMaxDepth(c *C) {
diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go
index 7e9f822..4d56dc1 100644
--- a/plumbing/format/packfile/diff_delta.go
+++ b/plumbing/format/packfile/diff_delta.go
@@ -2,8 +2,6 @@ package packfile
import (
"bytes"
- "hash/adler32"
- "io/ioutil"
"gopkg.in/src-d/go-git.v4/plumbing"
)
@@ -26,26 +24,40 @@ const (
// To generate target again, you will need the obtained object and "base" one.
// Error will be returned if base or target object cannot be read.
func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, error) {
+ return getDelta(new(deltaIndex), base, target)
+}
+
+func getDelta(index *deltaIndex, base, target plumbing.EncodedObject) (plumbing.EncodedObject, error) {
br, err := base.Reader()
if err != nil {
return nil, err
}
+ defer br.Close()
tr, err := target.Reader()
if err != nil {
return nil, err
}
+ defer tr.Close()
- bb, err := ioutil.ReadAll(br)
+ bb := bufPool.Get().(*bytes.Buffer)
+ bb.Reset()
+ defer bufPool.Put(bb)
+
+ _, err = bb.ReadFrom(br)
if err != nil {
return nil, err
}
- tb, err := ioutil.ReadAll(tr)
+ tb := bufPool.Get().(*bytes.Buffer)
+ tb.Reset()
+ defer bufPool.Put(tb)
+
+ _, err = tb.ReadFrom(tr)
if err != nil {
return nil, err
}
- db := DiffDelta(bb, tb)
+ db := diffDelta(index, bb.Bytes(), tb.Bytes())
delta := &plumbing.MemoryObject{}
_, err = delta.Write(db)
if err != nil {
@@ -59,21 +71,41 @@ func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, erro
}
// DiffDelta returns the delta that transforms src into tgt.
-func DiffDelta(src []byte, tgt []byte) []byte {
+func DiffDelta(src, tgt []byte) []byte {
+ return diffDelta(new(deltaIndex), src, tgt)
+}
+
+func diffDelta(index *deltaIndex, src []byte, tgt []byte) []byte {
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
buf.Write(deltaEncodeSize(len(src)))
buf.Write(deltaEncodeSize(len(tgt)))
- sindex := initMatch(src)
+ if len(index.entries) == 0 {
+ index.init(src)
+ }
ibuf := bufPool.Get().(*bytes.Buffer)
ibuf.Reset()
for i := 0; i < len(tgt); i++ {
- offset, l := findMatch(src, tgt, sindex, i)
+ offset, l := index.findMatch(src, tgt, i)
- if l < s {
+ if l == 0 {
+ // couldn't find a match, just write the current byte and continue
ibuf.WriteByte(tgt[i])
+ } else if l < 0 {
+ // src is less than blksz, copy the rest of the target to avoid
+ // calls to findMatch
+ for ; i < len(tgt); i++ {
+ ibuf.WriteByte(tgt[i])
+ }
+ } else if l < s {
+ // remaining target is less than blksz, copy what's left of it
+ // and avoid calls to findMatch
+ for j := i; j < i+l; j++ {
+ ibuf.WriteByte(tgt[j])
+ }
+ i += l - 1
} else {
encodeInsertOperation(ibuf, buf)
@@ -126,52 +158,6 @@ func encodeInsertOperation(ibuf, buf *bytes.Buffer) {
ibuf.Reset()
}
-func initMatch(src []byte) map[uint32]int {
- i := 0
- index := make(map[uint32]int)
- for {
- if i+s > len(src) {
- break
- }
-
- ch := adler32.Checksum(src[i : i+s])
- index[ch] = i
- i += s
- }
-
- return index
-}
-
-func findMatch(src, tgt []byte, sindex map[uint32]int, tgtOffset int) (srcOffset, l int) {
- if len(tgt) >= tgtOffset+s {
- ch := adler32.Checksum(tgt[tgtOffset : tgtOffset+s])
- var ok bool
- srcOffset, ok = sindex[ch]
- if !ok {
- return
- }
-
- l = matchLength(src, tgt, tgtOffset, srcOffset)
- }
-
- return
-}
-
-func matchLength(src, tgt []byte, otgt, osrc int) int {
- l := 0
- for {
- if (osrc >= len(src) || otgt >= len(tgt)) || src[osrc] != tgt[otgt] {
- break
- }
-
- l++
- osrc++
- otgt++
- }
-
- return l
-}
-
func deltaEncodeSize(size int) []byte {
var ret []byte
c := size & 0x7f
diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go
index 1426559..7ee6546 100644
--- a/plumbing/format/packfile/encoder.go
+++ b/plumbing/format/packfile/encoder.go
@@ -14,10 +14,10 @@ import (
// Encoder gets the data from the storage and write it into the writer in PACK
// format
type Encoder struct {
- selector *deltaSelector
- w *offsetWriter
- zw *zlib.Writer
- hasher plumbing.Hasher
+ selector *deltaSelector
+ w *offsetWriter
+ zw *zlib.Writer
+ hasher plumbing.Hasher
// offsets is a map of object hashes to corresponding offsets in the packfile.
// It is used to determine offset of the base of a delta when a OFS_DELTA is
// used.
@@ -45,10 +45,15 @@ func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *E
}
}
-// Encode creates a packfile containing all the objects referenced in hashes
-// and writes it to the writer in the Encoder.
-func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) {
- objects, err := e.selector.ObjectsToPack(hashes)
+// Encode creates a packfile containing all the objects referenced in
+// hashes and writes it to the writer in the Encoder. `packWindow`
+// specifies the size of the sliding window used to compare objects
+// for delta compression; 0 turns off delta compression entirely.
+func (e *Encoder) Encode(
+ hashes []plumbing.Hash,
+ packWindow uint,
+) (plumbing.Hash, error) {
+ objects, err := e.selector.ObjectsToPack(hashes, packWindow)
if err != nil {
return plumbing.ZeroHash, err
}
@@ -137,7 +142,7 @@ func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) err
// for OFS_DELTA, offset of the base is interpreted as negative offset
// relative to the type-byte of the header of the ofs-delta entry.
- relativeOffset := deltaOffset-baseOffset
+ relativeOffset := deltaOffset - baseOffset
if relativeOffset <= 0 {
return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
}
diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go
index d92e2c4..8011596 100644
--- a/plumbing/format/packfile/encoder_advanced_test.go
+++ b/plumbing/format/packfile/encoder_advanced_test.go
@@ -10,7 +10,7 @@ import (
"gopkg.in/src-d/go-git.v4/storage/filesystem"
"gopkg.in/src-d/go-git.v4/storage/memory"
- "github.com/src-d/go-git-fixtures"
+ "gopkg.in/src-d/go-git-fixtures.v3"
. "gopkg.in/check.v1"
)
@@ -27,12 +27,23 @@ func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) {
fixs.Test(c, func(f *fixtures.Fixture) {
storage, err := filesystem.NewStorage(f.DotGit())
c.Assert(err, IsNil)
- s.testEncodeDecode(c, storage)
+ s.testEncodeDecode(c, storage, 10)
})
}
-func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) {
+func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) {
+ fixs := fixtures.Basic().ByTag("packfile").ByTag(".git")
+ fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git").
+ ByTag("packfile").ByTag(".git").One())
+ fixs.Test(c, func(f *fixtures.Fixture) {
+ storage, err := filesystem.NewStorage(f.DotGit())
+ c.Assert(err, IsNil)
+ s.testEncodeDecode(c, storage, 0)
+ })
+}
+
+func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, packWindow uint) {
objIter, err := storage.IterEncodedObjects(plumbing.AnyObject)
c.Assert(err, IsNil)
@@ -57,7 +68,7 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) {
buf := bytes.NewBuffer(nil)
enc := NewEncoder(buf, storage, false)
- _, err = enc.Encode(hashes)
+ _, err = enc.Encode(hashes, packWindow)
c.Assert(err, IsNil)
scanner := NewScanner(buf)
diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go
index b5b0c42..f40517d 100644
--- a/plumbing/format/packfile/encoder_test.go
+++ b/plumbing/format/packfile/encoder_test.go
@@ -3,11 +3,11 @@ package packfile
import (
"bytes"
- "github.com/src-d/go-git-fixtures"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/storage/memory"
. "gopkg.in/check.v1"
+ "gopkg.in/src-d/go-git-fixtures.v3"
)
type EncoderSuite struct {
@@ -26,7 +26,7 @@ func (s *EncoderSuite) SetUpTest(c *C) {
}
func (s *EncoderSuite) TestCorrectPackHeader(c *C) {
- hash, err := s.enc.Encode([]plumbing.Hash{})
+ hash, err := s.enc.Encode([]plumbing.Hash{}, 10)
c.Assert(err, IsNil)
hb := [20]byte(hash)
@@ -47,7 +47,7 @@ func (s *EncoderSuite) TestCorrectPackWithOneEmptyObject(c *C) {
_, err := s.store.SetEncodedObject(o)
c.Assert(err, IsNil)
- hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()})
+ hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}, 10)
c.Assert(err, IsNil)
// PACK + VERSION(2) + OBJECT NUMBER(1)
@@ -74,13 +74,13 @@ func (s *EncoderSuite) TestMaxObjectSize(c *C) {
o.SetType(plumbing.CommitObject)
_, err := s.store.SetEncodedObject(o)
c.Assert(err, IsNil)
- hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()})
+ hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}, 10)
c.Assert(err, IsNil)
c.Assert(hash.IsZero(), Not(Equals), true)
}
func (s *EncoderSuite) TestHashNotFound(c *C) {
- h, err := s.enc.Encode([]plumbing.Hash{plumbing.NewHash("BAD")})
+ h, err := s.enc.Encode([]plumbing.Hash{plumbing.NewHash("BAD")}, 10)
c.Assert(h, Equals, plumbing.ZeroHash)
c.Assert(err, NotNil)
c.Assert(err, Equals, plumbing.ErrObjectNotFound)
diff --git a/plumbing/format/packfile/object_pack.go b/plumbing/format/packfile/object_pack.go
index 14337d1..e22e783 100644
--- a/plumbing/format/packfile/object_pack.go
+++ b/plumbing/format/packfile/object_pack.go
@@ -84,11 +84,7 @@ func (o *ObjectToPack) Size() int64 {
}
func (o *ObjectToPack) IsDelta() bool {
- if o.Base != nil {
- return true
- }
-
- return false
+ return o.Base != nil
}
func (o *ObjectToPack) SetDelta(base *ObjectToPack, delta plumbing.EncodedObject) {
diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go
index 976cabc..c604851 100644
--- a/plumbing/format/packfile/patch_delta.go
+++ b/plumbing/format/packfile/patch_delta.go
@@ -38,11 +38,8 @@ func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) error {
target.SetSize(int64(len(dst)))
- if _, err := w.Write(dst); err != nil {
- return err
- }
-
- return nil
+ _, err = w.Write(dst)
+ return err
}
var (
diff --git a/plumbing/format/packfile/scanner_test.go b/plumbing/format/packfile/scanner_test.go
index 1ca8b6e..ab87642 100644
--- a/plumbing/format/packfile/scanner_test.go
+++ b/plumbing/format/packfile/scanner_test.go
@@ -6,7 +6,7 @@ import (
"gopkg.in/src-d/go-git.v4/plumbing"
- "github.com/src-d/go-git-fixtures"
+ "gopkg.in/src-d/go-git-fixtures.v3"
. "gopkg.in/check.v1"
)