diff options
Diffstat (limited to 'plumbing/format/packfile')
-rw-r--r-- | plumbing/format/packfile/decoder.go | 14 | ||||
-rw-r--r-- | plumbing/format/packfile/decoder_test.go | 26 | ||||
-rw-r--r-- | plumbing/format/packfile/delta_index.go | 297 | ||||
-rw-r--r-- | plumbing/format/packfile/delta_selector.go | 94 | ||||
-rw-r--r-- | plumbing/format/packfile/delta_selector_test.go | 31 | ||||
-rw-r--r-- | plumbing/format/packfile/diff_delta.go | 96 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder.go | 23 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder_advanced_test.go | 19 | ||||
-rw-r--r-- | plumbing/format/packfile/encoder_test.go | 10 | ||||
-rw-r--r-- | plumbing/format/packfile/object_pack.go | 6 | ||||
-rw-r--r-- | plumbing/format/packfile/patch_delta.go | 7 | ||||
-rw-r--r-- | plumbing/format/packfile/scanner_test.go | 2 |
12 files changed, 509 insertions, 116 deletions
diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go index 3d475b2..ad72ea0 100644 --- a/plumbing/format/packfile/decoder.go +++ b/plumbing/format/packfile/decoder.go @@ -105,7 +105,7 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer, o: o, idx: NewIndex(0), - offsetToType: make(map[int64]plumbing.ObjectType, 0), + offsetToType: make(map[int64]plumbing.ObjectType), decoderType: t, }, nil } @@ -207,12 +207,16 @@ func (d *Decoder) decodeObjectsWithObjectStorerTx(count int) error { // constructor, if the object decoded is not equals to the specified one, nil will // be returned func (d *Decoder) DecodeObject() (plumbing.EncodedObject, error) { + return d.doDecodeObject(d.decoderType) +} + +func (d *Decoder) doDecodeObject(t plumbing.ObjectType) (plumbing.EncodedObject, error) { h, err := d.s.NextObjectHeader() if err != nil { return nil, err } - if d.decoderType == plumbing.AnyObject { + if t == plumbing.AnyObject { return d.decodeByHeader(h) } @@ -279,6 +283,7 @@ func (d *Decoder) decodeByHeader(h *ObjectHeader) (plumbing.EncodedObject, error obj := d.newObject() obj.SetSize(h.Length) obj.SetType(h.Type) + var crc uint32 var err error switch h.Type { @@ -315,7 +320,8 @@ func (d *Decoder) newObject() plumbing.EncodedObject { // returned is added into a internal index. This is intended to be able to regenerate // objects from deltas (offset deltas or reference deltas) without an package index // (.idx file). If Decode wasn't called previously objects offset should provided -// using the SetOffsets method. +// using the SetOffsets method. It decodes the object regardless of the Decoder +// type. func (d *Decoder) DecodeObjectAt(offset int64) (plumbing.EncodedObject, error) { if !d.s.IsSeekable { return nil, ErrNonSeekable @@ -333,7 +339,7 @@ func (d *Decoder) DecodeObjectAt(offset int64) (plumbing.EncodedObject, error) { } }() - return d.DecodeObject() + return d.doDecodeObject(plumbing.AnyObject) } func (d *Decoder) fillRegularObjectContent(obj plumbing.EncodedObject) (uint32, error) { diff --git a/plumbing/format/packfile/decoder_test.go b/plumbing/format/packfile/decoder_test.go index ecf7c81..1a1a74a 100644 --- a/plumbing/format/packfile/decoder_test.go +++ b/plumbing/format/packfile/decoder_test.go @@ -3,9 +3,6 @@ package packfile_test import ( "io" - "gopkg.in/src-d/go-billy.v3/memfs" - - "github.com/src-d/go-git-fixtures" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/idxfile" "gopkg.in/src-d/go-git.v4/plumbing/format/packfile" @@ -14,6 +11,8 @@ import ( "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" + "gopkg.in/src-d/go-billy.v4/memfs" + "gopkg.in/src-d/go-git-fixtures.v3" ) type ReaderSuite struct { @@ -293,7 +292,7 @@ func (s *ReaderSuite) TestDecodeCRCs(c *C) { c.Assert(int(sum), Equals, 78022211966) } -func (s *ReaderSuite) TestReadObjectAt(c *C) { +func (s *ReaderSuite) TestDecodeObjectAt(c *C) { f := fixtures.Basic().One() scanner := packfile.NewScanner(f.Packfile()) d, err := packfile.NewDecoder(scanner, nil) @@ -311,6 +310,25 @@ func (s *ReaderSuite) TestReadObjectAt(c *C) { c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5") } +func (s *ReaderSuite) TestDecodeObjectAtForType(c *C) { + f := fixtures.Basic().One() + scanner := packfile.NewScanner(f.Packfile()) + d, err := packfile.NewDecoderForType(scanner, nil, plumbing.TreeObject) + c.Assert(err, IsNil) + + // when the packfile is ref-delta based, the offsets are required + if f.Is("ref-delta") { + d.SetIndex(getIndexFromIdxFile(f.Idx())) + } + + // the objects at reference 186, is a delta, so should be recall, + // without being read before. + obj, err := d.DecodeObjectAt(186) + c.Assert(err, IsNil) + c.Assert(obj.Type(), Equals, plumbing.CommitObject) + c.Assert(obj.Hash().String(), Equals, "6ecf0ef2c2dffb796033e5a02219af86ec6584e5") +} + func (s *ReaderSuite) TestIndex(c *C) { f := fixtures.Basic().One() scanner := packfile.NewScanner(f.Packfile()) diff --git a/plumbing/format/packfile/delta_index.go b/plumbing/format/packfile/delta_index.go new file mode 100644 index 0000000..07a6112 --- /dev/null +++ b/plumbing/format/packfile/delta_index.go @@ -0,0 +1,297 @@ +package packfile + +const blksz = 16 +const maxChainLength = 64 + +// deltaIndex is a modified version of JGit's DeltaIndex adapted to our current +// design. +type deltaIndex struct { + table []int + entries []int + mask int +} + +func (idx *deltaIndex) init(buf []byte) { + scanner := newDeltaIndexScanner(buf, len(buf)) + idx.mask = scanner.mask + idx.table = scanner.table + idx.entries = make([]int, countEntries(scanner)+1) + idx.copyEntries(scanner) +} + +// findMatch returns the offset of src where the block starting at tgtOffset +// is and the length of the match. A length of 0 means there was no match. A +// length of -1 means the src length is lower than the blksz and whatever +// other positive length is the length of the match in bytes. +func (idx *deltaIndex) findMatch(src, tgt []byte, tgtOffset int) (srcOffset, l int) { + if len(tgt) < tgtOffset+s { + return 0, len(tgt) - tgtOffset + } + + if len(src) < blksz { + return 0, -1 + } + + if len(tgt) >= tgtOffset+s && len(src) >= blksz { + h := hashBlock(tgt, tgtOffset) + tIdx := h & idx.mask + eIdx := idx.table[tIdx] + if eIdx != 0 { + srcOffset = idx.entries[eIdx] + } else { + return + } + + l = matchLength(src, tgt, tgtOffset, srcOffset) + } + + return +} + +func matchLength(src, tgt []byte, otgt, osrc int) (l int) { + lensrc := len(src) + lentgt := len(tgt) + for (osrc < lensrc && otgt < lentgt) && src[osrc] == tgt[otgt] { + l++ + osrc++ + otgt++ + } + return +} + +func countEntries(scan *deltaIndexScanner) (cnt int) { + // Figure out exactly how many entries we need. As we do the + // enumeration truncate any delta chains longer than what we + // are willing to scan during encode. This keeps the encode + // logic linear in the size of the input rather than quadratic. + for i := 0; i < len(scan.table); i++ { + h := scan.table[i] + if h == 0 { + continue + } + + size := 0 + for { + size++ + if size == maxChainLength { + scan.next[h] = 0 + break + } + h = scan.next[h] + + if h == 0 { + break + } + } + cnt += size + } + + return +} + +func (idx *deltaIndex) copyEntries(scanner *deltaIndexScanner) { + // Rebuild the entries list from the scanner, positioning all + // blocks in the same hash chain next to each other. We can + // then later discard the next list, along with the scanner. + // + next := 1 + for i := 0; i < len(idx.table); i++ { + h := idx.table[i] + if h == 0 { + continue + } + + idx.table[i] = next + for { + idx.entries[next] = scanner.entries[h] + next++ + h = scanner.next[h] + + if h == 0 { + break + } + } + } +} + +type deltaIndexScanner struct { + table []int + entries []int + next []int + mask int + count int +} + +func newDeltaIndexScanner(buf []byte, size int) *deltaIndexScanner { + size -= size % blksz + worstCaseBlockCnt := size / blksz + if worstCaseBlockCnt < 1 { + return new(deltaIndexScanner) + } + + tableSize := tableSize(worstCaseBlockCnt) + scanner := &deltaIndexScanner{ + table: make([]int, tableSize), + mask: tableSize - 1, + entries: make([]int, worstCaseBlockCnt+1), + next: make([]int, worstCaseBlockCnt+1), + } + + scanner.scan(buf, size) + return scanner +} + +// slightly modified version of JGit's DeltaIndexScanner. We store the offset on the entries +// instead of the entries and the key, so we avoid operations to retrieve the offset later, as +// we don't use the key. +// See: https://github.com/eclipse/jgit/blob/005e5feb4ecd08c4e4d141a38b9e7942accb3212/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaIndexScanner.java +func (s *deltaIndexScanner) scan(buf []byte, end int) { + lastHash := 0 + ptr := end - blksz + + for { + key := hashBlock(buf, ptr) + tIdx := key & s.mask + head := s.table[tIdx] + if head != 0 && lastHash == key { + s.entries[head] = ptr + } else { + s.count++ + eIdx := s.count + s.entries[eIdx] = ptr + s.next[eIdx] = head + s.table[tIdx] = eIdx + } + + lastHash = key + ptr -= blksz + + if 0 > ptr { + break + } + } +} + +func tableSize(worstCaseBlockCnt int) int { + shift := 32 - leadingZeros(uint32(worstCaseBlockCnt)) + sz := 1 << uint(shift-1) + if sz < worstCaseBlockCnt { + sz <<= 1 + } + return sz +} + +// use https://golang.org/pkg/math/bits/#LeadingZeros32 in the future +func leadingZeros(x uint32) (n int) { + if x >= 1<<16 { + x >>= 16 + n = 16 + } + if x >= 1<<8 { + x >>= 8 + n += 8 + } + n += int(len8tab[x]) + return 32 - n +} + +var len8tab = [256]uint8{ + 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, +} + +func hashBlock(raw []byte, ptr int) int { + // The first 4 steps collapse out into a 4 byte big-endian decode, + // with a larger right shift as we combined shift lefts together. + // + hash := ((uint32(raw[ptr]) & 0xff) << 24) | + ((uint32(raw[ptr+1]) & 0xff) << 16) | + ((uint32(raw[ptr+2]) & 0xff) << 8) | + (uint32(raw[ptr+3]) & 0xff) + hash ^= T[hash>>31] + + hash = ((hash << 8) | (uint32(raw[ptr+4]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+5]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+6]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+7]) & 0xff)) ^ T[hash>>23] + + hash = ((hash << 8) | (uint32(raw[ptr+8]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+9]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+10]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+11]) & 0xff)) ^ T[hash>>23] + + hash = ((hash << 8) | (uint32(raw[ptr+12]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+13]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+14]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+15]) & 0xff)) ^ T[hash>>23] + + return int(hash) +} + +var T = []uint32{0x00000000, 0xd4c6b32d, 0x7d4bd577, + 0xa98d665a, 0x2e5119c3, 0xfa97aaee, 0x531accb4, 0x87dc7f99, + 0x5ca23386, 0x886480ab, 0x21e9e6f1, 0xf52f55dc, 0x72f32a45, + 0xa6359968, 0x0fb8ff32, 0xdb7e4c1f, 0x6d82d421, 0xb944670c, + 0x10c90156, 0xc40fb27b, 0x43d3cde2, 0x97157ecf, 0x3e981895, + 0xea5eabb8, 0x3120e7a7, 0xe5e6548a, 0x4c6b32d0, 0x98ad81fd, + 0x1f71fe64, 0xcbb74d49, 0x623a2b13, 0xb6fc983e, 0x0fc31b6f, + 0xdb05a842, 0x7288ce18, 0xa64e7d35, 0x219202ac, 0xf554b181, + 0x5cd9d7db, 0x881f64f6, 0x536128e9, 0x87a79bc4, 0x2e2afd9e, + 0xfaec4eb3, 0x7d30312a, 0xa9f68207, 0x007be45d, 0xd4bd5770, + 0x6241cf4e, 0xb6877c63, 0x1f0a1a39, 0xcbcca914, 0x4c10d68d, + 0x98d665a0, 0x315b03fa, 0xe59db0d7, 0x3ee3fcc8, 0xea254fe5, + 0x43a829bf, 0x976e9a92, 0x10b2e50b, 0xc4745626, 0x6df9307c, + 0xb93f8351, 0x1f8636de, 0xcb4085f3, 0x62cde3a9, 0xb60b5084, + 0x31d72f1d, 0xe5119c30, 0x4c9cfa6a, 0x985a4947, 0x43240558, + 0x97e2b675, 0x3e6fd02f, 0xeaa96302, 0x6d751c9b, 0xb9b3afb6, + 0x103ec9ec, 0xc4f87ac1, 0x7204e2ff, 0xa6c251d2, 0x0f4f3788, + 0xdb8984a5, 0x5c55fb3c, 0x88934811, 0x211e2e4b, 0xf5d89d66, + 0x2ea6d179, 0xfa606254, 0x53ed040e, 0x872bb723, 0x00f7c8ba, + 0xd4317b97, 0x7dbc1dcd, 0xa97aaee0, 0x10452db1, 0xc4839e9c, + 0x6d0ef8c6, 0xb9c84beb, 0x3e143472, 0xead2875f, 0x435fe105, + 0x97995228, 0x4ce71e37, 0x9821ad1a, 0x31accb40, 0xe56a786d, + 0x62b607f4, 0xb670b4d9, 0x1ffdd283, 0xcb3b61ae, 0x7dc7f990, + 0xa9014abd, 0x008c2ce7, 0xd44a9fca, 0x5396e053, 0x8750537e, + 0x2edd3524, 0xfa1b8609, 0x2165ca16, 0xf5a3793b, 0x5c2e1f61, + 0x88e8ac4c, 0x0f34d3d5, 0xdbf260f8, 0x727f06a2, 0xa6b9b58f, + 0x3f0c6dbc, 0xebcade91, 0x4247b8cb, 0x96810be6, 0x115d747f, + 0xc59bc752, 0x6c16a108, 0xb8d01225, 0x63ae5e3a, 0xb768ed17, + 0x1ee58b4d, 0xca233860, 0x4dff47f9, 0x9939f4d4, 0x30b4928e, + 0xe47221a3, 0x528eb99d, 0x86480ab0, 0x2fc56cea, 0xfb03dfc7, + 0x7cdfa05e, 0xa8191373, 0x01947529, 0xd552c604, 0x0e2c8a1b, + 0xdaea3936, 0x73675f6c, 0xa7a1ec41, 0x207d93d8, 0xf4bb20f5, + 0x5d3646af, 0x89f0f582, 0x30cf76d3, 0xe409c5fe, 0x4d84a3a4, + 0x99421089, 0x1e9e6f10, 0xca58dc3d, 0x63d5ba67, 0xb713094a, + 0x6c6d4555, 0xb8abf678, 0x11269022, 0xc5e0230f, 0x423c5c96, + 0x96faefbb, 0x3f7789e1, 0xebb13acc, 0x5d4da2f2, 0x898b11df, + 0x20067785, 0xf4c0c4a8, 0x731cbb31, 0xa7da081c, 0x0e576e46, + 0xda91dd6b, 0x01ef9174, 0xd5292259, 0x7ca44403, 0xa862f72e, + 0x2fbe88b7, 0xfb783b9a, 0x52f55dc0, 0x8633eeed, 0x208a5b62, + 0xf44ce84f, 0x5dc18e15, 0x89073d38, 0x0edb42a1, 0xda1df18c, + 0x739097d6, 0xa75624fb, 0x7c2868e4, 0xa8eedbc9, 0x0163bd93, + 0xd5a50ebe, 0x52797127, 0x86bfc20a, 0x2f32a450, 0xfbf4177d, + 0x4d088f43, 0x99ce3c6e, 0x30435a34, 0xe485e919, 0x63599680, + 0xb79f25ad, 0x1e1243f7, 0xcad4f0da, 0x11aabcc5, 0xc56c0fe8, + 0x6ce169b2, 0xb827da9f, 0x3ffba506, 0xeb3d162b, 0x42b07071, + 0x9676c35c, 0x2f49400d, 0xfb8ff320, 0x5202957a, 0x86c42657, + 0x011859ce, 0xd5deeae3, 0x7c538cb9, 0xa8953f94, 0x73eb738b, + 0xa72dc0a6, 0x0ea0a6fc, 0xda6615d1, 0x5dba6a48, 0x897cd965, + 0x20f1bf3f, 0xf4370c12, 0x42cb942c, 0x960d2701, 0x3f80415b, + 0xeb46f276, 0x6c9a8def, 0xb85c3ec2, 0x11d15898, 0xc517ebb5, + 0x1e69a7aa, 0xcaaf1487, 0x632272dd, 0xb7e4c1f0, 0x3038be69, + 0xe4fe0d44, 0x4d736b1e, 0x99b5d833, +} diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go index cc0ae0f..51adcdf 100644 --- a/plumbing/format/packfile/delta_selector.go +++ b/plumbing/format/packfile/delta_selector.go @@ -2,15 +2,13 @@ package packfile import ( "sort" + "sync" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) const ( - // How far back in the sorted list to search for deltas. 10 is - // the default in command line git. - deltaWindowSize = 10 // deltas based on deltas, how many steps we can do. // 50 is the default value used in JGit maxDepth = int64(50) @@ -30,27 +28,75 @@ func newDeltaSelector(s storer.EncodedObjectStorer) *deltaSelector { return &deltaSelector{s} } -// ObjectsToPack creates a list of ObjectToPack from the hashes provided, -// creating deltas if it's suitable, using an specific internal logic -func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) { - otp, err := dw.objectsToPack(hashes) +// ObjectsToPack creates a list of ObjectToPack from the hashes +// provided, creating deltas if it's suitable, using an specific +// internal logic. `packWindow` specifies the size of the sliding +// window used to compare objects for delta compression; 0 turns off +// delta compression entirely. +func (dw *deltaSelector) ObjectsToPack( + hashes []plumbing.Hash, + packWindow uint, +) ([]*ObjectToPack, error) { + otp, err := dw.objectsToPack(hashes, packWindow) if err != nil { return nil, err } + if packWindow == 0 { + return otp, nil + } + dw.sort(otp) - if err := dw.walk(otp); err != nil { + var objectGroups [][]*ObjectToPack + var prev *ObjectToPack + i := -1 + for _, obj := range otp { + if prev == nil || prev.Type() != obj.Type() { + objectGroups = append(objectGroups, []*ObjectToPack{obj}) + i++ + prev = obj + } else { + objectGroups[i] = append(objectGroups[i], obj) + } + } + + var wg sync.WaitGroup + var once sync.Once + for _, objs := range objectGroups { + objs := objs + wg.Add(1) + go func() { + if walkErr := dw.walk(objs, packWindow); walkErr != nil { + once.Do(func() { + err = walkErr + }) + } + wg.Done() + }() + } + wg.Wait() + + if err != nil { return nil, err } return otp, nil } -func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) { +func (dw *deltaSelector) objectsToPack( + hashes []plumbing.Hash, + packWindow uint, +) ([]*ObjectToPack, error) { var objectsToPack []*ObjectToPack for _, h := range hashes { - o, err := dw.encodedDeltaObject(h) + var o plumbing.EncodedObject + var err error + if packWindow == 0 { + o, err = dw.encodedObject(h) + } else { + o, err = dw.encodedDeltaObject(h) + } if err != nil { return nil, err } @@ -63,6 +109,10 @@ func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, objectsToPack = append(objectsToPack, otp) } + if packWindow == 0 { + return objectsToPack, nil + } + if err := dw.fixAndBreakChains(objectsToPack); err != nil { return nil, err } @@ -171,8 +221,18 @@ func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) { sort.Sort(byTypeAndSize(objectsToPack)) } -func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { +func (dw *deltaSelector) walk( + objectsToPack []*ObjectToPack, + packWindow uint, +) error { + indexMap := make(map[plumbing.Hash]*deltaIndex) for i := 0; i < len(objectsToPack); i++ { + // Clean up the index map for anything outside our pack + // window, to save memory. + if i > int(packWindow) { + delete(indexMap, objectsToPack[i-int(packWindow)].Hash()) + } + target := objectsToPack[i] // If we already have a delta, we don't try to find a new one for this @@ -187,7 +247,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { continue } - for j := i - 1; j >= 0 && i-j < deltaWindowSize; j-- { + for j := i - 1; j >= 0 && i-j < int(packWindow); j-- { base := objectsToPack[j] // Objects must use only the same type as their delta base. // Since objectsToPack is sorted by type and size, once we find @@ -196,7 +256,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { break } - if err := dw.tryToDeltify(base, target); err != nil { + if err := dw.tryToDeltify(indexMap, base, target); err != nil { return err } } @@ -205,7 +265,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { return nil } -func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { +func (dw *deltaSelector) tryToDeltify(indexMap map[plumbing.Hash]*deltaIndex, base, target *ObjectToPack) error { // If the sizes are radically different, this is a bad pairing. if target.Size() < base.Size()>>4 { return nil @@ -238,8 +298,12 @@ func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { return err } + if _, ok := indexMap[base.Hash()]; !ok { + indexMap[base.Hash()] = new(deltaIndex) + } + // Now we can generate the delta using originals - delta, err := GetDelta(base.Original, target.Original) + delta, err := getDelta(indexMap[base.Hash()], base.Original, target.Original) if err != nil { return err } diff --git a/plumbing/format/packfile/delta_selector_test.go b/plumbing/format/packfile/delta_selector_test.go index ca4a96b..7d7fd0c 100644 --- a/plumbing/format/packfile/delta_selector_test.go +++ b/plumbing/format/packfile/delta_selector_test.go @@ -146,7 +146,8 @@ func (s *DeltaSelectorSuite) createTestObjects() { func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { // Different type hashes := []plumbing.Hash{s.hashes["base"], s.hashes["treeType"]} - otp, err := s.ds.ObjectsToPack(hashes) + deltaWindowSize := uint(10) + otp, err := s.ds.ObjectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) c.Assert(len(otp), Equals, 2) c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["base"]]) @@ -154,7 +155,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { // Size radically different hashes = []plumbing.Hash{s.hashes["bigBase"], s.hashes["target"]} - otp, err = s.ds.ObjectsToPack(hashes) + otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) c.Assert(len(otp), Equals, 2) c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["bigBase"]]) @@ -162,7 +163,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { // Delta Size Limit with no best delta yet hashes = []plumbing.Hash{s.hashes["smallBase"], s.hashes["smallTarget"]} - otp, err = s.ds.ObjectsToPack(hashes) + otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) c.Assert(len(otp), Equals, 2) c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["smallBase"]]) @@ -170,7 +171,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { // It will create the delta hashes = []plumbing.Hash{s.hashes["base"], s.hashes["target"]} - otp, err = s.ds.ObjectsToPack(hashes) + otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) c.Assert(len(otp), Equals, 2) c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["target"]]) @@ -185,7 +186,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { s.hashes["o2"], s.hashes["o3"], } - otp, err = s.ds.ObjectsToPack(hashes) + otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) c.Assert(len(otp), Equals, 3) c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["o1"]]) @@ -201,20 +202,32 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { // a delta. hashes = make([]plumbing.Hash, 0, deltaWindowSize+2) hashes = append(hashes, s.hashes["base"]) - for i := 0; i < deltaWindowSize; i++ { + for i := uint(0); i < deltaWindowSize; i++ { hashes = append(hashes, s.hashes["smallTarget"]) } hashes = append(hashes, s.hashes["target"]) // Don't sort so we can easily check the sliding window without // creating a bunch of new objects. - otp, err = s.ds.objectsToPack(hashes) + otp, err = s.ds.objectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) - err = s.ds.walk(otp) + err = s.ds.walk(otp, deltaWindowSize) c.Assert(err, IsNil) - c.Assert(len(otp), Equals, deltaWindowSize+2) + c.Assert(len(otp), Equals, int(deltaWindowSize)+2) targetIdx := len(otp) - 1 c.Assert(otp[targetIdx].IsDelta(), Equals, false) + + // Check that no deltas are created, and the objects are unsorted, + // if compression is off. + hashes = []plumbing.Hash{s.hashes["base"], s.hashes["target"]} + otp, err = s.ds.ObjectsToPack(hashes, 0) + c.Assert(err, IsNil) + c.Assert(len(otp), Equals, 2) + c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["base"]]) + c.Assert(otp[0].IsDelta(), Equals, false) + c.Assert(otp[1].Original, Equals, s.store.Objects[s.hashes["target"]]) + c.Assert(otp[1].IsDelta(), Equals, false) + c.Assert(otp[1].Depth, Equals, 0) } func (s *DeltaSelectorSuite) TestMaxDepth(c *C) { diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index 7e9f822..4d56dc1 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -2,8 +2,6 @@ package packfile import ( "bytes" - "hash/adler32" - "io/ioutil" "gopkg.in/src-d/go-git.v4/plumbing" ) @@ -26,26 +24,40 @@ const ( // To generate target again, you will need the obtained object and "base" one. // Error will be returned if base or target object cannot be read. func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, error) { + return getDelta(new(deltaIndex), base, target) +} + +func getDelta(index *deltaIndex, base, target plumbing.EncodedObject) (plumbing.EncodedObject, error) { br, err := base.Reader() if err != nil { return nil, err } + defer br.Close() tr, err := target.Reader() if err != nil { return nil, err } + defer tr.Close() - bb, err := ioutil.ReadAll(br) + bb := bufPool.Get().(*bytes.Buffer) + bb.Reset() + defer bufPool.Put(bb) + + _, err = bb.ReadFrom(br) if err != nil { return nil, err } - tb, err := ioutil.ReadAll(tr) + tb := bufPool.Get().(*bytes.Buffer) + tb.Reset() + defer bufPool.Put(tb) + + _, err = tb.ReadFrom(tr) if err != nil { return nil, err } - db := DiffDelta(bb, tb) + db := diffDelta(index, bb.Bytes(), tb.Bytes()) delta := &plumbing.MemoryObject{} _, err = delta.Write(db) if err != nil { @@ -59,21 +71,41 @@ func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, erro } // DiffDelta returns the delta that transforms src into tgt. -func DiffDelta(src []byte, tgt []byte) []byte { +func DiffDelta(src, tgt []byte) []byte { + return diffDelta(new(deltaIndex), src, tgt) +} + +func diffDelta(index *deltaIndex, src []byte, tgt []byte) []byte { buf := bufPool.Get().(*bytes.Buffer) buf.Reset() buf.Write(deltaEncodeSize(len(src))) buf.Write(deltaEncodeSize(len(tgt))) - sindex := initMatch(src) + if len(index.entries) == 0 { + index.init(src) + } ibuf := bufPool.Get().(*bytes.Buffer) ibuf.Reset() for i := 0; i < len(tgt); i++ { - offset, l := findMatch(src, tgt, sindex, i) + offset, l := index.findMatch(src, tgt, i) - if l < s { + if l == 0 { + // couldn't find a match, just write the current byte and continue ibuf.WriteByte(tgt[i]) + } else if l < 0 { + // src is less than blksz, copy the rest of the target to avoid + // calls to findMatch + for ; i < len(tgt); i++ { + ibuf.WriteByte(tgt[i]) + } + } else if l < s { + // remaining target is less than blksz, copy what's left of it + // and avoid calls to findMatch + for j := i; j < i+l; j++ { + ibuf.WriteByte(tgt[j]) + } + i += l - 1 } else { encodeInsertOperation(ibuf, buf) @@ -126,52 +158,6 @@ func encodeInsertOperation(ibuf, buf *bytes.Buffer) { ibuf.Reset() } -func initMatch(src []byte) map[uint32]int { - i := 0 - index := make(map[uint32]int) - for { - if i+s > len(src) { - break - } - - ch := adler32.Checksum(src[i : i+s]) - index[ch] = i - i += s - } - - return index -} - -func findMatch(src, tgt []byte, sindex map[uint32]int, tgtOffset int) (srcOffset, l int) { - if len(tgt) >= tgtOffset+s { - ch := adler32.Checksum(tgt[tgtOffset : tgtOffset+s]) - var ok bool - srcOffset, ok = sindex[ch] - if !ok { - return - } - - l = matchLength(src, tgt, tgtOffset, srcOffset) - } - - return -} - -func matchLength(src, tgt []byte, otgt, osrc int) int { - l := 0 - for { - if (osrc >= len(src) || otgt >= len(tgt)) || src[osrc] != tgt[otgt] { - break - } - - l++ - osrc++ - otgt++ - } - - return l -} - func deltaEncodeSize(size int) []byte { var ret []byte c := size & 0x7f diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go index 1426559..7ee6546 100644 --- a/plumbing/format/packfile/encoder.go +++ b/plumbing/format/packfile/encoder.go @@ -14,10 +14,10 @@ import ( // Encoder gets the data from the storage and write it into the writer in PACK // format type Encoder struct { - selector *deltaSelector - w *offsetWriter - zw *zlib.Writer - hasher plumbing.Hasher + selector *deltaSelector + w *offsetWriter + zw *zlib.Writer + hasher plumbing.Hasher // offsets is a map of object hashes to corresponding offsets in the packfile. // It is used to determine offset of the base of a delta when a OFS_DELTA is // used. @@ -45,10 +45,15 @@ func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *E } } -// Encode creates a packfile containing all the objects referenced in hashes -// and writes it to the writer in the Encoder. -func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) { - objects, err := e.selector.ObjectsToPack(hashes) +// Encode creates a packfile containing all the objects referenced in +// hashes and writes it to the writer in the Encoder. `packWindow` +// specifies the size of the sliding window used to compare objects +// for delta compression; 0 turns off delta compression entirely. +func (e *Encoder) Encode( + hashes []plumbing.Hash, + packWindow uint, +) (plumbing.Hash, error) { + objects, err := e.selector.ObjectsToPack(hashes, packWindow) if err != nil { return plumbing.ZeroHash, err } @@ -137,7 +142,7 @@ func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) err // for OFS_DELTA, offset of the base is interpreted as negative offset // relative to the type-byte of the header of the ofs-delta entry. - relativeOffset := deltaOffset-baseOffset + relativeOffset := deltaOffset - baseOffset if relativeOffset <= 0 { return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset) } diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index d92e2c4..8011596 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -10,7 +10,7 @@ import ( "gopkg.in/src-d/go-git.v4/storage/filesystem" "gopkg.in/src-d/go-git.v4/storage/memory" - "github.com/src-d/go-git-fixtures" + "gopkg.in/src-d/go-git-fixtures.v3" . "gopkg.in/check.v1" ) @@ -27,12 +27,23 @@ func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) { fixs.Test(c, func(f *fixtures.Fixture) { storage, err := filesystem.NewStorage(f.DotGit()) c.Assert(err, IsNil) - s.testEncodeDecode(c, storage) + s.testEncodeDecode(c, storage, 10) }) } -func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) { +func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) { + fixs := fixtures.Basic().ByTag("packfile").ByTag(".git") + fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git"). + ByTag("packfile").ByTag(".git").One()) + fixs.Test(c, func(f *fixtures.Fixture) { + storage, err := filesystem.NewStorage(f.DotGit()) + c.Assert(err, IsNil) + s.testEncodeDecode(c, storage, 0) + }) +} + +func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, packWindow uint) { objIter, err := storage.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) @@ -57,7 +68,7 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) { buf := bytes.NewBuffer(nil) enc := NewEncoder(buf, storage, false) - _, err = enc.Encode(hashes) + _, err = enc.Encode(hashes, packWindow) c.Assert(err, IsNil) scanner := NewScanner(buf) diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index b5b0c42..f40517d 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -3,11 +3,11 @@ package packfile import ( "bytes" - "github.com/src-d/go-git-fixtures" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/storage/memory" . "gopkg.in/check.v1" + "gopkg.in/src-d/go-git-fixtures.v3" ) type EncoderSuite struct { @@ -26,7 +26,7 @@ func (s *EncoderSuite) SetUpTest(c *C) { } func (s *EncoderSuite) TestCorrectPackHeader(c *C) { - hash, err := s.enc.Encode([]plumbing.Hash{}) + hash, err := s.enc.Encode([]plumbing.Hash{}, 10) c.Assert(err, IsNil) hb := [20]byte(hash) @@ -47,7 +47,7 @@ func (s *EncoderSuite) TestCorrectPackWithOneEmptyObject(c *C) { _, err := s.store.SetEncodedObject(o) c.Assert(err, IsNil) - hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}) + hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}, 10) c.Assert(err, IsNil) // PACK + VERSION(2) + OBJECT NUMBER(1) @@ -74,13 +74,13 @@ func (s *EncoderSuite) TestMaxObjectSize(c *C) { o.SetType(plumbing.CommitObject) _, err := s.store.SetEncodedObject(o) c.Assert(err, IsNil) - hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}) + hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}, 10) c.Assert(err, IsNil) c.Assert(hash.IsZero(), Not(Equals), true) } func (s *EncoderSuite) TestHashNotFound(c *C) { - h, err := s.enc.Encode([]plumbing.Hash{plumbing.NewHash("BAD")}) + h, err := s.enc.Encode([]plumbing.Hash{plumbing.NewHash("BAD")}, 10) c.Assert(h, Equals, plumbing.ZeroHash) c.Assert(err, NotNil) c.Assert(err, Equals, plumbing.ErrObjectNotFound) diff --git a/plumbing/format/packfile/object_pack.go b/plumbing/format/packfile/object_pack.go index 14337d1..e22e783 100644 --- a/plumbing/format/packfile/object_pack.go +++ b/plumbing/format/packfile/object_pack.go @@ -84,11 +84,7 @@ func (o *ObjectToPack) Size() int64 { } func (o *ObjectToPack) IsDelta() bool { - if o.Base != nil { - return true - } - - return false + return o.Base != nil } func (o *ObjectToPack) SetDelta(base *ObjectToPack, delta plumbing.EncodedObject) { diff --git a/plumbing/format/packfile/patch_delta.go b/plumbing/format/packfile/patch_delta.go index 976cabc..c604851 100644 --- a/plumbing/format/packfile/patch_delta.go +++ b/plumbing/format/packfile/patch_delta.go @@ -38,11 +38,8 @@ func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) error { target.SetSize(int64(len(dst))) - if _, err := w.Write(dst); err != nil { - return err - } - - return nil + _, err = w.Write(dst) + return err } var ( diff --git a/plumbing/format/packfile/scanner_test.go b/plumbing/format/packfile/scanner_test.go index 1ca8b6e..ab87642 100644 --- a/plumbing/format/packfile/scanner_test.go +++ b/plumbing/format/packfile/scanner_test.go @@ -6,7 +6,7 @@ import ( "gopkg.in/src-d/go-git.v4/plumbing" - "github.com/src-d/go-git-fixtures" + "gopkg.in/src-d/go-git-fixtures.v3" . "gopkg.in/check.v1" ) |