diff options
Diffstat (limited to 'plumbing')
22 files changed, 919 insertions, 134 deletions
diff --git a/plumbing/format/index/encoder_test.go b/plumbing/format/index/encoder_test.go index bc5df0f..78cbbba 100644 --- a/plumbing/format/index/encoder_test.go +++ b/plumbing/format/index/encoder_test.go @@ -5,6 +5,7 @@ import ( "strings" "time" + "github.com/google/go-cmp/cmp" . "gopkg.in/check.v1" "gopkg.in/src-d/go-git.v4/plumbing" ) @@ -46,7 +47,7 @@ func (s *IndexSuite) TestEncode(c *C) { err = d.Decode(output) c.Assert(err, IsNil) - c.Assert(idx, DeepEquals, output) + c.Assert(cmp.Equal(idx, output), Equals, true) c.Assert(output.Entries[0].Name, Equals, strings.Repeat(" ", 20)) c.Assert(output.Entries[1].Name, Equals, "bar") diff --git a/plumbing/format/packfile/delta_index.go b/plumbing/format/packfile/delta_index.go new file mode 100644 index 0000000..349bedf --- /dev/null +++ b/plumbing/format/packfile/delta_index.go @@ -0,0 +1,299 @@ +package packfile + +const blksz = 16 +const maxChainLength = 64 + +// deltaIndex is a modified version of JGit's DeltaIndex adapted to our current +// design. +type deltaIndex struct { + table []int + entries []int + mask int +} + +func (idx *deltaIndex) init(buf []byte) { + scanner := newDeltaIndexScanner(buf, len(buf)) + idx.mask = scanner.mask + idx.table = scanner.table + idx.entries = make([]int, countEntries(scanner)+1) + idx.copyEntries(scanner) +} + +// findMatch returns the offset of src where the block starting at tgtOffset +// is and the length of the match. A length of 0 means there was no match. A +// length of -1 means the src length is lower than the blksz and whatever +// other positive length is the length of the match in bytes. +func (idx *deltaIndex) findMatch(src, tgt []byte, tgtOffset int) (srcOffset, l int) { + if len(tgt) < tgtOffset+s { + return 0, len(tgt) - tgtOffset + } + + if len(src) < blksz { + return 0, -1 + } + + if len(tgt) >= tgtOffset+s && len(src) >= blksz { + h := hashBlock(tgt, tgtOffset) + tIdx := h & idx.mask + eIdx := idx.table[tIdx] + if eIdx != 0 { + srcOffset = idx.entries[eIdx] + } else { + return + } + + l = matchLength(src, tgt, tgtOffset, srcOffset) + } + + return +} + +func matchLength(src, tgt []byte, otgt, osrc int) (l int) { + lensrc := len(src) + lentgt := len(tgt) + for (osrc < lensrc && otgt < lentgt) && src[osrc] == tgt[otgt] { + l++ + osrc++ + otgt++ + } + return +} + +func countEntries(scan *deltaIndexScanner) (cnt int) { + // Figure out exactly how many entries we need. As we do the + // enumeration truncate any delta chains longer than what we + // are willing to scan during encode. This keeps the encode + // logic linear in the size of the input rather than quadratic. + for i := 0; i < len(scan.table); i++ { + h := scan.table[i] + if h == 0 { + continue + } + + size := 0 + for { + size++ + if size == maxChainLength { + scan.next[h] = 0 + break + } + h = scan.next[h] + + if h == 0 { + break + } + } + cnt += size + } + + return +} + +func (idx *deltaIndex) copyEntries(scanner *deltaIndexScanner) { + // Rebuild the entries list from the scanner, positioning all + // blocks in the same hash chain next to each other. We can + // then later discard the next list, along with the scanner. + // + next := 1 + for i := 0; i < len(idx.table); i++ { + h := idx.table[i] + if h == 0 { + continue + } + + idx.table[i] = next + for { + idx.entries[next] = scanner.entries[h] + next++ + h = scanner.next[h] + + if h == 0 { + break + } + } + } +} + +type deltaIndexScanner struct { + table []int + entries []int + next []int + mask int + count int +} + +func newDeltaIndexScanner(buf []byte, size int) *deltaIndexScanner { + size -= size % blksz + worstCaseBlockCnt := size / blksz + if worstCaseBlockCnt < 1 { + return new(deltaIndexScanner) + } + + tableSize := tableSize(worstCaseBlockCnt) + scanner := &deltaIndexScanner{ + table: make([]int, tableSize), + mask: tableSize - 1, + entries: make([]int, worstCaseBlockCnt+1), + next: make([]int, worstCaseBlockCnt+1), + } + + scanner.scan(buf, size) + return scanner +} + +// slightly modified version of JGit's DeltaIndexScanner. We store the offset on the entries +// instead of the entries and the key, so we avoid operations to retrieve the offset later, as +// we don't use the key. +// See: https://github.com/eclipse/jgit/blob/005e5feb4ecd08c4e4d141a38b9e7942accb3212/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaIndexScanner.java +func (s *deltaIndexScanner) scan(buf []byte, end int) { + lastHash := 0 + ptr := end - blksz + + for { + key := hashBlock(buf, ptr) + tIdx := key & s.mask + head := s.table[tIdx] + if head != 0 && lastHash == key { + s.entries[head] = ptr + } else { + s.count++ + eIdx := s.count + s.entries[eIdx] = ptr + s.next[eIdx] = head + s.table[tIdx] = eIdx + } + + lastHash = key + ptr -= blksz + + if 0 > ptr { + break + } + } +} + +func tableSize(worstCaseBlockCnt int) int { + shift := 32 - leadingZeros(uint32(worstCaseBlockCnt)) + sz := 1 << uint(shift-1) + if sz < worstCaseBlockCnt { + sz <<= 1 + } + return sz +} + +// use https://golang.org/pkg/math/bits/#LeadingZeros32 in the future +func leadingZeros(x uint32) (n int) { + if x >= 1<<16 { + x >>= 16 + n = 16 + } + if x >= 1<<8 { + x >>= 8 + n += 8 + } + n += int(len8tab[x]) + return 32 - n +} + +var len8tab = [256]uint8{ + 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, +} + +func hashBlock(raw []byte, ptr int) int { + var hash uint32 + + // The first 4 steps collapse out into a 4 byte big-endian decode, + // with a larger right shift as we combined shift lefts together. + // + hash = ((uint32(raw[ptr]) & 0xff) << 24) | + ((uint32(raw[ptr+1]) & 0xff) << 16) | + ((uint32(raw[ptr+2]) & 0xff) << 8) | + (uint32(raw[ptr+3]) & 0xff) + hash ^= T[hash>>31] + + hash = ((hash << 8) | (uint32(raw[ptr+4]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+5]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+6]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+7]) & 0xff)) ^ T[hash>>23] + + hash = ((hash << 8) | (uint32(raw[ptr+8]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+9]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+10]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+11]) & 0xff)) ^ T[hash>>23] + + hash = ((hash << 8) | (uint32(raw[ptr+12]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+13]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+14]) & 0xff)) ^ T[hash>>23] + hash = ((hash << 8) | (uint32(raw[ptr+15]) & 0xff)) ^ T[hash>>23] + + return int(hash) +} + +var T = []uint32{0x00000000, 0xd4c6b32d, 0x7d4bd577, + 0xa98d665a, 0x2e5119c3, 0xfa97aaee, 0x531accb4, 0x87dc7f99, + 0x5ca23386, 0x886480ab, 0x21e9e6f1, 0xf52f55dc, 0x72f32a45, + 0xa6359968, 0x0fb8ff32, 0xdb7e4c1f, 0x6d82d421, 0xb944670c, + 0x10c90156, 0xc40fb27b, 0x43d3cde2, 0x97157ecf, 0x3e981895, + 0xea5eabb8, 0x3120e7a7, 0xe5e6548a, 0x4c6b32d0, 0x98ad81fd, + 0x1f71fe64, 0xcbb74d49, 0x623a2b13, 0xb6fc983e, 0x0fc31b6f, + 0xdb05a842, 0x7288ce18, 0xa64e7d35, 0x219202ac, 0xf554b181, + 0x5cd9d7db, 0x881f64f6, 0x536128e9, 0x87a79bc4, 0x2e2afd9e, + 0xfaec4eb3, 0x7d30312a, 0xa9f68207, 0x007be45d, 0xd4bd5770, + 0x6241cf4e, 0xb6877c63, 0x1f0a1a39, 0xcbcca914, 0x4c10d68d, + 0x98d665a0, 0x315b03fa, 0xe59db0d7, 0x3ee3fcc8, 0xea254fe5, + 0x43a829bf, 0x976e9a92, 0x10b2e50b, 0xc4745626, 0x6df9307c, + 0xb93f8351, 0x1f8636de, 0xcb4085f3, 0x62cde3a9, 0xb60b5084, + 0x31d72f1d, 0xe5119c30, 0x4c9cfa6a, 0x985a4947, 0x43240558, + 0x97e2b675, 0x3e6fd02f, 0xeaa96302, 0x6d751c9b, 0xb9b3afb6, + 0x103ec9ec, 0xc4f87ac1, 0x7204e2ff, 0xa6c251d2, 0x0f4f3788, + 0xdb8984a5, 0x5c55fb3c, 0x88934811, 0x211e2e4b, 0xf5d89d66, + 0x2ea6d179, 0xfa606254, 0x53ed040e, 0x872bb723, 0x00f7c8ba, + 0xd4317b97, 0x7dbc1dcd, 0xa97aaee0, 0x10452db1, 0xc4839e9c, + 0x6d0ef8c6, 0xb9c84beb, 0x3e143472, 0xead2875f, 0x435fe105, + 0x97995228, 0x4ce71e37, 0x9821ad1a, 0x31accb40, 0xe56a786d, + 0x62b607f4, 0xb670b4d9, 0x1ffdd283, 0xcb3b61ae, 0x7dc7f990, + 0xa9014abd, 0x008c2ce7, 0xd44a9fca, 0x5396e053, 0x8750537e, + 0x2edd3524, 0xfa1b8609, 0x2165ca16, 0xf5a3793b, 0x5c2e1f61, + 0x88e8ac4c, 0x0f34d3d5, 0xdbf260f8, 0x727f06a2, 0xa6b9b58f, + 0x3f0c6dbc, 0xebcade91, 0x4247b8cb, 0x96810be6, 0x115d747f, + 0xc59bc752, 0x6c16a108, 0xb8d01225, 0x63ae5e3a, 0xb768ed17, + 0x1ee58b4d, 0xca233860, 0x4dff47f9, 0x9939f4d4, 0x30b4928e, + 0xe47221a3, 0x528eb99d, 0x86480ab0, 0x2fc56cea, 0xfb03dfc7, + 0x7cdfa05e, 0xa8191373, 0x01947529, 0xd552c604, 0x0e2c8a1b, + 0xdaea3936, 0x73675f6c, 0xa7a1ec41, 0x207d93d8, 0xf4bb20f5, + 0x5d3646af, 0x89f0f582, 0x30cf76d3, 0xe409c5fe, 0x4d84a3a4, + 0x99421089, 0x1e9e6f10, 0xca58dc3d, 0x63d5ba67, 0xb713094a, + 0x6c6d4555, 0xb8abf678, 0x11269022, 0xc5e0230f, 0x423c5c96, + 0x96faefbb, 0x3f7789e1, 0xebb13acc, 0x5d4da2f2, 0x898b11df, + 0x20067785, 0xf4c0c4a8, 0x731cbb31, 0xa7da081c, 0x0e576e46, + 0xda91dd6b, 0x01ef9174, 0xd5292259, 0x7ca44403, 0xa862f72e, + 0x2fbe88b7, 0xfb783b9a, 0x52f55dc0, 0x8633eeed, 0x208a5b62, + 0xf44ce84f, 0x5dc18e15, 0x89073d38, 0x0edb42a1, 0xda1df18c, + 0x739097d6, 0xa75624fb, 0x7c2868e4, 0xa8eedbc9, 0x0163bd93, + 0xd5a50ebe, 0x52797127, 0x86bfc20a, 0x2f32a450, 0xfbf4177d, + 0x4d088f43, 0x99ce3c6e, 0x30435a34, 0xe485e919, 0x63599680, + 0xb79f25ad, 0x1e1243f7, 0xcad4f0da, 0x11aabcc5, 0xc56c0fe8, + 0x6ce169b2, 0xb827da9f, 0x3ffba506, 0xeb3d162b, 0x42b07071, + 0x9676c35c, 0x2f49400d, 0xfb8ff320, 0x5202957a, 0x86c42657, + 0x011859ce, 0xd5deeae3, 0x7c538cb9, 0xa8953f94, 0x73eb738b, + 0xa72dc0a6, 0x0ea0a6fc, 0xda6615d1, 0x5dba6a48, 0x897cd965, + 0x20f1bf3f, 0xf4370c12, 0x42cb942c, 0x960d2701, 0x3f80415b, + 0xeb46f276, 0x6c9a8def, 0xb85c3ec2, 0x11d15898, 0xc517ebb5, + 0x1e69a7aa, 0xcaaf1487, 0x632272dd, 0xb7e4c1f0, 0x3038be69, + 0xe4fe0d44, 0x4d736b1e, 0x99b5d833, +} diff --git a/plumbing/format/packfile/delta_selector.go b/plumbing/format/packfile/delta_selector.go index cc0ae0f..51adcdf 100644 --- a/plumbing/format/packfile/delta_selector.go +++ b/plumbing/format/packfile/delta_selector.go @@ -2,15 +2,13 @@ package packfile import ( "sort" + "sync" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/storer" ) const ( - // How far back in the sorted list to search for deltas. 10 is - // the default in command line git. - deltaWindowSize = 10 // deltas based on deltas, how many steps we can do. // 50 is the default value used in JGit maxDepth = int64(50) @@ -30,27 +28,75 @@ func newDeltaSelector(s storer.EncodedObjectStorer) *deltaSelector { return &deltaSelector{s} } -// ObjectsToPack creates a list of ObjectToPack from the hashes provided, -// creating deltas if it's suitable, using an specific internal logic -func (dw *deltaSelector) ObjectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) { - otp, err := dw.objectsToPack(hashes) +// ObjectsToPack creates a list of ObjectToPack from the hashes +// provided, creating deltas if it's suitable, using an specific +// internal logic. `packWindow` specifies the size of the sliding +// window used to compare objects for delta compression; 0 turns off +// delta compression entirely. +func (dw *deltaSelector) ObjectsToPack( + hashes []plumbing.Hash, + packWindow uint, +) ([]*ObjectToPack, error) { + otp, err := dw.objectsToPack(hashes, packWindow) if err != nil { return nil, err } + if packWindow == 0 { + return otp, nil + } + dw.sort(otp) - if err := dw.walk(otp); err != nil { + var objectGroups [][]*ObjectToPack + var prev *ObjectToPack + i := -1 + for _, obj := range otp { + if prev == nil || prev.Type() != obj.Type() { + objectGroups = append(objectGroups, []*ObjectToPack{obj}) + i++ + prev = obj + } else { + objectGroups[i] = append(objectGroups[i], obj) + } + } + + var wg sync.WaitGroup + var once sync.Once + for _, objs := range objectGroups { + objs := objs + wg.Add(1) + go func() { + if walkErr := dw.walk(objs, packWindow); walkErr != nil { + once.Do(func() { + err = walkErr + }) + } + wg.Done() + }() + } + wg.Wait() + + if err != nil { return nil, err } return otp, nil } -func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, error) { +func (dw *deltaSelector) objectsToPack( + hashes []plumbing.Hash, + packWindow uint, +) ([]*ObjectToPack, error) { var objectsToPack []*ObjectToPack for _, h := range hashes { - o, err := dw.encodedDeltaObject(h) + var o plumbing.EncodedObject + var err error + if packWindow == 0 { + o, err = dw.encodedObject(h) + } else { + o, err = dw.encodedDeltaObject(h) + } if err != nil { return nil, err } @@ -63,6 +109,10 @@ func (dw *deltaSelector) objectsToPack(hashes []plumbing.Hash) ([]*ObjectToPack, objectsToPack = append(objectsToPack, otp) } + if packWindow == 0 { + return objectsToPack, nil + } + if err := dw.fixAndBreakChains(objectsToPack); err != nil { return nil, err } @@ -171,8 +221,18 @@ func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) { sort.Sort(byTypeAndSize(objectsToPack)) } -func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { +func (dw *deltaSelector) walk( + objectsToPack []*ObjectToPack, + packWindow uint, +) error { + indexMap := make(map[plumbing.Hash]*deltaIndex) for i := 0; i < len(objectsToPack); i++ { + // Clean up the index map for anything outside our pack + // window, to save memory. + if i > int(packWindow) { + delete(indexMap, objectsToPack[i-int(packWindow)].Hash()) + } + target := objectsToPack[i] // If we already have a delta, we don't try to find a new one for this @@ -187,7 +247,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { continue } - for j := i - 1; j >= 0 && i-j < deltaWindowSize; j-- { + for j := i - 1; j >= 0 && i-j < int(packWindow); j-- { base := objectsToPack[j] // Objects must use only the same type as their delta base. // Since objectsToPack is sorted by type and size, once we find @@ -196,7 +256,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { break } - if err := dw.tryToDeltify(base, target); err != nil { + if err := dw.tryToDeltify(indexMap, base, target); err != nil { return err } } @@ -205,7 +265,7 @@ func (dw *deltaSelector) walk(objectsToPack []*ObjectToPack) error { return nil } -func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { +func (dw *deltaSelector) tryToDeltify(indexMap map[plumbing.Hash]*deltaIndex, base, target *ObjectToPack) error { // If the sizes are radically different, this is a bad pairing. if target.Size() < base.Size()>>4 { return nil @@ -238,8 +298,12 @@ func (dw *deltaSelector) tryToDeltify(base, target *ObjectToPack) error { return err } + if _, ok := indexMap[base.Hash()]; !ok { + indexMap[base.Hash()] = new(deltaIndex) + } + // Now we can generate the delta using originals - delta, err := GetDelta(base.Original, target.Original) + delta, err := getDelta(indexMap[base.Hash()], base.Original, target.Original) if err != nil { return err } diff --git a/plumbing/format/packfile/delta_selector_test.go b/plumbing/format/packfile/delta_selector_test.go index ca4a96b..7d7fd0c 100644 --- a/plumbing/format/packfile/delta_selector_test.go +++ b/plumbing/format/packfile/delta_selector_test.go @@ -146,7 +146,8 @@ func (s *DeltaSelectorSuite) createTestObjects() { func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { // Different type hashes := []plumbing.Hash{s.hashes["base"], s.hashes["treeType"]} - otp, err := s.ds.ObjectsToPack(hashes) + deltaWindowSize := uint(10) + otp, err := s.ds.ObjectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) c.Assert(len(otp), Equals, 2) c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["base"]]) @@ -154,7 +155,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { // Size radically different hashes = []plumbing.Hash{s.hashes["bigBase"], s.hashes["target"]} - otp, err = s.ds.ObjectsToPack(hashes) + otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) c.Assert(len(otp), Equals, 2) c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["bigBase"]]) @@ -162,7 +163,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { // Delta Size Limit with no best delta yet hashes = []plumbing.Hash{s.hashes["smallBase"], s.hashes["smallTarget"]} - otp, err = s.ds.ObjectsToPack(hashes) + otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) c.Assert(len(otp), Equals, 2) c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["smallBase"]]) @@ -170,7 +171,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { // It will create the delta hashes = []plumbing.Hash{s.hashes["base"], s.hashes["target"]} - otp, err = s.ds.ObjectsToPack(hashes) + otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) c.Assert(len(otp), Equals, 2) c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["target"]]) @@ -185,7 +186,7 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { s.hashes["o2"], s.hashes["o3"], } - otp, err = s.ds.ObjectsToPack(hashes) + otp, err = s.ds.ObjectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) c.Assert(len(otp), Equals, 3) c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["o1"]]) @@ -201,20 +202,32 @@ func (s *DeltaSelectorSuite) TestObjectsToPack(c *C) { // a delta. hashes = make([]plumbing.Hash, 0, deltaWindowSize+2) hashes = append(hashes, s.hashes["base"]) - for i := 0; i < deltaWindowSize; i++ { + for i := uint(0); i < deltaWindowSize; i++ { hashes = append(hashes, s.hashes["smallTarget"]) } hashes = append(hashes, s.hashes["target"]) // Don't sort so we can easily check the sliding window without // creating a bunch of new objects. - otp, err = s.ds.objectsToPack(hashes) + otp, err = s.ds.objectsToPack(hashes, deltaWindowSize) c.Assert(err, IsNil) - err = s.ds.walk(otp) + err = s.ds.walk(otp, deltaWindowSize) c.Assert(err, IsNil) - c.Assert(len(otp), Equals, deltaWindowSize+2) + c.Assert(len(otp), Equals, int(deltaWindowSize)+2) targetIdx := len(otp) - 1 c.Assert(otp[targetIdx].IsDelta(), Equals, false) + + // Check that no deltas are created, and the objects are unsorted, + // if compression is off. + hashes = []plumbing.Hash{s.hashes["base"], s.hashes["target"]} + otp, err = s.ds.ObjectsToPack(hashes, 0) + c.Assert(err, IsNil) + c.Assert(len(otp), Equals, 2) + c.Assert(otp[0].Object, Equals, s.store.Objects[s.hashes["base"]]) + c.Assert(otp[0].IsDelta(), Equals, false) + c.Assert(otp[1].Original, Equals, s.store.Objects[s.hashes["target"]]) + c.Assert(otp[1].IsDelta(), Equals, false) + c.Assert(otp[1].Depth, Equals, 0) } func (s *DeltaSelectorSuite) TestMaxDepth(c *C) { diff --git a/plumbing/format/packfile/diff_delta.go b/plumbing/format/packfile/diff_delta.go index 7e9f822..4d56dc1 100644 --- a/plumbing/format/packfile/diff_delta.go +++ b/plumbing/format/packfile/diff_delta.go @@ -2,8 +2,6 @@ package packfile import ( "bytes" - "hash/adler32" - "io/ioutil" "gopkg.in/src-d/go-git.v4/plumbing" ) @@ -26,26 +24,40 @@ const ( // To generate target again, you will need the obtained object and "base" one. // Error will be returned if base or target object cannot be read. func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, error) { + return getDelta(new(deltaIndex), base, target) +} + +func getDelta(index *deltaIndex, base, target plumbing.EncodedObject) (plumbing.EncodedObject, error) { br, err := base.Reader() if err != nil { return nil, err } + defer br.Close() tr, err := target.Reader() if err != nil { return nil, err } + defer tr.Close() - bb, err := ioutil.ReadAll(br) + bb := bufPool.Get().(*bytes.Buffer) + bb.Reset() + defer bufPool.Put(bb) + + _, err = bb.ReadFrom(br) if err != nil { return nil, err } - tb, err := ioutil.ReadAll(tr) + tb := bufPool.Get().(*bytes.Buffer) + tb.Reset() + defer bufPool.Put(tb) + + _, err = tb.ReadFrom(tr) if err != nil { return nil, err } - db := DiffDelta(bb, tb) + db := diffDelta(index, bb.Bytes(), tb.Bytes()) delta := &plumbing.MemoryObject{} _, err = delta.Write(db) if err != nil { @@ -59,21 +71,41 @@ func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, erro } // DiffDelta returns the delta that transforms src into tgt. -func DiffDelta(src []byte, tgt []byte) []byte { +func DiffDelta(src, tgt []byte) []byte { + return diffDelta(new(deltaIndex), src, tgt) +} + +func diffDelta(index *deltaIndex, src []byte, tgt []byte) []byte { buf := bufPool.Get().(*bytes.Buffer) buf.Reset() buf.Write(deltaEncodeSize(len(src))) buf.Write(deltaEncodeSize(len(tgt))) - sindex := initMatch(src) + if len(index.entries) == 0 { + index.init(src) + } ibuf := bufPool.Get().(*bytes.Buffer) ibuf.Reset() for i := 0; i < len(tgt); i++ { - offset, l := findMatch(src, tgt, sindex, i) + offset, l := index.findMatch(src, tgt, i) - if l < s { + if l == 0 { + // couldn't find a match, just write the current byte and continue ibuf.WriteByte(tgt[i]) + } else if l < 0 { + // src is less than blksz, copy the rest of the target to avoid + // calls to findMatch + for ; i < len(tgt); i++ { + ibuf.WriteByte(tgt[i]) + } + } else if l < s { + // remaining target is less than blksz, copy what's left of it + // and avoid calls to findMatch + for j := i; j < i+l; j++ { + ibuf.WriteByte(tgt[j]) + } + i += l - 1 } else { encodeInsertOperation(ibuf, buf) @@ -126,52 +158,6 @@ func encodeInsertOperation(ibuf, buf *bytes.Buffer) { ibuf.Reset() } -func initMatch(src []byte) map[uint32]int { - i := 0 - index := make(map[uint32]int) - for { - if i+s > len(src) { - break - } - - ch := adler32.Checksum(src[i : i+s]) - index[ch] = i - i += s - } - - return index -} - -func findMatch(src, tgt []byte, sindex map[uint32]int, tgtOffset int) (srcOffset, l int) { - if len(tgt) >= tgtOffset+s { - ch := adler32.Checksum(tgt[tgtOffset : tgtOffset+s]) - var ok bool - srcOffset, ok = sindex[ch] - if !ok { - return - } - - l = matchLength(src, tgt, tgtOffset, srcOffset) - } - - return -} - -func matchLength(src, tgt []byte, otgt, osrc int) int { - l := 0 - for { - if (osrc >= len(src) || otgt >= len(tgt)) || src[osrc] != tgt[otgt] { - break - } - - l++ - osrc++ - otgt++ - } - - return l -} - func deltaEncodeSize(size int) []byte { var ret []byte c := size & 0x7f diff --git a/plumbing/format/packfile/encoder.go b/plumbing/format/packfile/encoder.go index 1426559..7ee6546 100644 --- a/plumbing/format/packfile/encoder.go +++ b/plumbing/format/packfile/encoder.go @@ -14,10 +14,10 @@ import ( // Encoder gets the data from the storage and write it into the writer in PACK // format type Encoder struct { - selector *deltaSelector - w *offsetWriter - zw *zlib.Writer - hasher plumbing.Hasher + selector *deltaSelector + w *offsetWriter + zw *zlib.Writer + hasher plumbing.Hasher // offsets is a map of object hashes to corresponding offsets in the packfile. // It is used to determine offset of the base of a delta when a OFS_DELTA is // used. @@ -45,10 +45,15 @@ func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *E } } -// Encode creates a packfile containing all the objects referenced in hashes -// and writes it to the writer in the Encoder. -func (e *Encoder) Encode(hashes []plumbing.Hash) (plumbing.Hash, error) { - objects, err := e.selector.ObjectsToPack(hashes) +// Encode creates a packfile containing all the objects referenced in +// hashes and writes it to the writer in the Encoder. `packWindow` +// specifies the size of the sliding window used to compare objects +// for delta compression; 0 turns off delta compression entirely. +func (e *Encoder) Encode( + hashes []plumbing.Hash, + packWindow uint, +) (plumbing.Hash, error) { + objects, err := e.selector.ObjectsToPack(hashes, packWindow) if err != nil { return plumbing.ZeroHash, err } @@ -137,7 +142,7 @@ func (e *Encoder) writeOfsDeltaHeader(deltaOffset int64, base plumbing.Hash) err // for OFS_DELTA, offset of the base is interpreted as negative offset // relative to the type-byte of the header of the ofs-delta entry. - relativeOffset := deltaOffset-baseOffset + relativeOffset := deltaOffset - baseOffset if relativeOffset <= 0 { return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset) } diff --git a/plumbing/format/packfile/encoder_advanced_test.go b/plumbing/format/packfile/encoder_advanced_test.go index d92e2c4..39c0700 100644 --- a/plumbing/format/packfile/encoder_advanced_test.go +++ b/plumbing/format/packfile/encoder_advanced_test.go @@ -27,12 +27,23 @@ func (s *EncoderAdvancedSuite) TestEncodeDecode(c *C) { fixs.Test(c, func(f *fixtures.Fixture) { storage, err := filesystem.NewStorage(f.DotGit()) c.Assert(err, IsNil) - s.testEncodeDecode(c, storage) + s.testEncodeDecode(c, storage, 10) }) } -func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) { +func (s *EncoderAdvancedSuite) TestEncodeDecodeNoDeltaCompression(c *C) { + fixs := fixtures.Basic().ByTag("packfile").ByTag(".git") + fixs = append(fixs, fixtures.ByURL("https://github.com/src-d/go-git.git"). + ByTag("packfile").ByTag(".git").One()) + fixs.Test(c, func(f *fixtures.Fixture) { + storage, err := filesystem.NewStorage(f.DotGit()) + c.Assert(err, IsNil) + s.testEncodeDecode(c, storage, 0) + }) +} + +func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer, packWindow uint) { objIter, err := storage.IterEncodedObjects(plumbing.AnyObject) c.Assert(err, IsNil) @@ -57,7 +68,7 @@ func (s *EncoderAdvancedSuite) testEncodeDecode(c *C, storage storer.Storer) { buf := bytes.NewBuffer(nil) enc := NewEncoder(buf, storage, false) - _, err = enc.Encode(hashes) + _, err = enc.Encode(hashes, packWindow) c.Assert(err, IsNil) scanner := NewScanner(buf) diff --git a/plumbing/format/packfile/encoder_test.go b/plumbing/format/packfile/encoder_test.go index b5b0c42..2cb9094 100644 --- a/plumbing/format/packfile/encoder_test.go +++ b/plumbing/format/packfile/encoder_test.go @@ -26,7 +26,7 @@ func (s *EncoderSuite) SetUpTest(c *C) { } func (s *EncoderSuite) TestCorrectPackHeader(c *C) { - hash, err := s.enc.Encode([]plumbing.Hash{}) + hash, err := s.enc.Encode([]plumbing.Hash{}, 10) c.Assert(err, IsNil) hb := [20]byte(hash) @@ -47,7 +47,7 @@ func (s *EncoderSuite) TestCorrectPackWithOneEmptyObject(c *C) { _, err := s.store.SetEncodedObject(o) c.Assert(err, IsNil) - hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}) + hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}, 10) c.Assert(err, IsNil) // PACK + VERSION(2) + OBJECT NUMBER(1) @@ -74,13 +74,13 @@ func (s *EncoderSuite) TestMaxObjectSize(c *C) { o.SetType(plumbing.CommitObject) _, err := s.store.SetEncodedObject(o) c.Assert(err, IsNil) - hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}) + hash, err := s.enc.Encode([]plumbing.Hash{o.Hash()}, 10) c.Assert(err, IsNil) c.Assert(hash.IsZero(), Not(Equals), true) } func (s *EncoderSuite) TestHashNotFound(c *C) { - h, err := s.enc.Encode([]plumbing.Hash{plumbing.NewHash("BAD")}) + h, err := s.enc.Encode([]plumbing.Hash{plumbing.NewHash("BAD")}, 10) c.Assert(h, Equals, plumbing.ZeroHash) c.Assert(err, NotNil) c.Assert(err, Equals, plumbing.ErrObjectNotFound) diff --git a/plumbing/object/commit.go b/plumbing/object/commit.go index eee015b..fad3dac 100644 --- a/plumbing/object/commit.go +++ b/plumbing/object/commit.go @@ -12,6 +12,11 @@ import ( "gopkg.in/src-d/go-git.v4/utils/ioutil" ) +const ( + beginpgp string = "-----BEGIN PGP SIGNATURE-----" + endpgp string = "-----END PGP SIGNATURE-----" +) + // Hash represents the hash of an object type Hash plumbing.Hash @@ -19,7 +24,7 @@ type Hash plumbing.Hash // at a certain point in time. It contains meta-information about that point // in time, such as a timestamp, the author of the changes since the last // commit, a pointer to the previous commit(s), etc. -// http://schacon.github.io/gitbook/1_the_git_object_model.html +// http://shafiulazam.com/gitbook/1_the_git_object_model.html type Commit struct { // Hash of the commit object. Hash plumbing.Hash @@ -28,6 +33,8 @@ type Commit struct { // Committer is the one performing the commit, might be different from // Author. Committer Signature + // PGPSignature is the PGP signature of the commit. + PGPSignature string // Message is the commit message, contains arbitrary text. Message string // TreeHash is the hash of the root tree of the commit. @@ -145,12 +152,33 @@ func (c *Commit) Decode(o plumbing.EncodedObject) (err error) { r := bufio.NewReader(reader) var message bool + var pgpsig bool for { line, err := r.ReadBytes('\n') if err != nil && err != io.EOF { return err } + if pgpsig { + // Check if it's the end of a PGP signature. + if bytes.Contains(line, []byte(endpgp)) { + c.PGPSignature += endpgp + "\n" + pgpsig = false + } else { + // Trim the left padding. + line = bytes.TrimLeft(line, " ") + c.PGPSignature += string(line) + } + continue + } + + // Check if it's the beginning of a PGP signature. + if bytes.Contains(line, []byte(beginpgp)) { + c.PGPSignature += beginpgp + "\n" + pgpsig = true + continue + } + if !message { line = bytes.TrimSpace(line) if len(line) == 0 { @@ -215,6 +243,21 @@ func (b *Commit) Encode(o plumbing.EncodedObject) error { return err } + if b.PGPSignature != "" { + if _, err = fmt.Fprint(w, "pgpsig"); err != nil { + return err + } + + // Split all the signature lines and write with a left padding and + // newline at the end. + lines := strings.Split(b.PGPSignature, "\n") + for _, line := range lines { + if _, err = fmt.Fprintf(w, " %s\n", line); err != nil { + return err + } + } + } + if _, err = fmt.Fprintf(w, "\n\n%s", b.Message); err != nil { return err } @@ -222,6 +265,32 @@ func (b *Commit) Encode(o plumbing.EncodedObject) error { return err } +// Stats shows the status of commit. +func (c *Commit) Stats() (FileStats, error) { + // Get the previous commit. + ci := c.Parents() + parentCommit, err := ci.Next() + if err != nil { + if err == io.EOF { + emptyNoder := treeNoder{} + parentCommit = &Commit{ + Hash: emptyNoder.hash, + // TreeHash: emptyNoder.parent.Hash, + s: c.s, + } + } else { + return nil, err + } + } + + patch, err := parentCommit.Patch(c) + if err != nil { + return nil, err + } + + return getFileStatsFromFilePatches(patch.FilePatches()), nil +} + func (c *Commit) String() string { return fmt.Sprintf( "%s %s\nAuthor: %s\nDate: %s\n\n%s\n", diff --git a/plumbing/object/commit_test.go b/plumbing/object/commit_test.go index e89302d..e84160b 100644 --- a/plumbing/object/commit_test.go +++ b/plumbing/object/commit_test.go @@ -232,3 +232,55 @@ func (s *SuiteCommit) TestLongCommitMessageSerialization(c *C) { c.Assert(err, IsNil) c.Assert(decoded.Message, Equals, longMessage) } + +func (s *SuiteCommit) TestPGPSignatureSerialization(c *C) { + encoded := &plumbing.MemoryObject{} + decoded := &Commit{} + commit := *s.Commit + + pgpsignature := `-----BEGIN PGP SIGNATURE----- + +iQEcBAABAgAGBQJTZbQlAAoJEF0+sviABDDrZbQH/09PfE51KPVPlanr6q1v4/Ut +LQxfojUWiLQdg2ESJItkcuweYg+kc3HCyFejeDIBw9dpXt00rY26p05qrpnG+85b +hM1/PswpPLuBSr+oCIDj5GMC2r2iEKsfv2fJbNW8iWAXVLoWZRF8B0MfqX/YTMbm +ecorc4iXzQu7tupRihslbNkfvfciMnSDeSvzCpWAHl7h8Wj6hhqePmLm9lAYqnKp +8S5B/1SSQuEAjRZgI4IexpZoeKGVDptPHxLLS38fozsyi0QyDyzEgJxcJQVMXxVi +RUysgqjcpT8+iQM1PblGfHR4XAhuOqN5Fx06PSaFZhqvWFezJ28/CLyX5q+oIVk= +=EFTF +-----END PGP SIGNATURE----- +` + commit.PGPSignature = pgpsignature + + err := commit.Encode(encoded) + c.Assert(err, IsNil) + + err = decoded.Decode(encoded) + c.Assert(err, IsNil) + c.Assert(decoded.PGPSignature, Equals, pgpsignature) +} + +func (s *SuiteCommit) TestStat(c *C) { + aCommit := s.commit(c, plumbing.NewHash("6ecf0ef2c2dffb796033e5a02219af86ec6584e5")) + fileStats, err := aCommit.Stats() + c.Assert(err, IsNil) + + c.Assert(fileStats[0].Name, Equals, "vendor/foo.go") + c.Assert(fileStats[0].Addition, Equals, 7) + c.Assert(fileStats[0].Deletion, Equals, 0) + c.Assert(fileStats[0].String(), Equals, " vendor/foo.go | 7 +++++++\n") + + // Stats for another commit. + aCommit = s.commit(c, plumbing.NewHash("918c48b83bd081e863dbe1b80f8998f058cd8294")) + fileStats, err = aCommit.Stats() + c.Assert(err, IsNil) + + c.Assert(fileStats[0].Name, Equals, "go/example.go") + c.Assert(fileStats[0].Addition, Equals, 142) + c.Assert(fileStats[0].Deletion, Equals, 0) + c.Assert(fileStats[0].String(), Equals, " go/example.go | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++\n") + + c.Assert(fileStats[1].Name, Equals, "php/crappy.php") + c.Assert(fileStats[1].Addition, Equals, 259) + c.Assert(fileStats[1].Deletion, Equals, 0) + c.Assert(fileStats[1].String(), Equals, " php/crappy.php | 259 ++++++++++++++++++++++++++++++++++++++++++++++++++++\n") +} diff --git a/plumbing/object/commit_walker.go b/plumbing/object/commit_walker.go index 797c17a..40ad258 100644 --- a/plumbing/object/commit_walker.go +++ b/plumbing/object/commit_walker.go @@ -8,9 +8,10 @@ import ( ) type commitPreIterator struct { - seen map[plumbing.Hash]bool - stack []CommitIter - start *Commit + seenExternal map[plumbing.Hash]bool + seen map[plumbing.Hash]bool + stack []CommitIter + start *Commit } // NewCommitPreorderIter returns a CommitIter that walks the commit history, @@ -20,16 +21,21 @@ type commitPreIterator struct { // and will return the error. Other errors might be returned if the history // cannot be traversed (e.g. missing objects). Ignore allows to skip some // commits from being iterated. -func NewCommitPreorderIter(c *Commit, ignore []plumbing.Hash) CommitIter { +func NewCommitPreorderIter( + c *Commit, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } return &commitPreIterator{ - seen: seen, - stack: make([]CommitIter, 0), - start: c, + seenExternal: seenExternal, + seen: seen, + stack: make([]CommitIter, 0), + start: c, } } @@ -57,7 +63,7 @@ func (w *commitPreIterator) Next() (*Commit, error) { } } - if w.seen[c.Hash] { + if w.seen[c.Hash] || w.seenExternal[c.Hash] { continue } diff --git a/plumbing/object/commit_walker_test.go b/plumbing/object/commit_walker_test.go index 48b504d..a27104e 100644 --- a/plumbing/object/commit_walker_test.go +++ b/plumbing/object/commit_walker_test.go @@ -16,7 +16,7 @@ func (s *CommitWalkerSuite) TestCommitPreIterator(c *C) { commit := s.commit(c, s.Fixture.Head) var commits []*Commit - NewCommitPreorderIter(commit, nil).ForEach(func(c *Commit) error { + NewCommitPreorderIter(commit, nil, nil).ForEach(func(c *Commit) error { commits = append(commits, c) return nil }) @@ -42,7 +42,7 @@ func (s *CommitWalkerSuite) TestCommitPreIteratorWithIgnore(c *C) { commit := s.commit(c, s.Fixture.Head) var commits []*Commit - NewCommitPreorderIter(commit, []plumbing.Hash{ + NewCommitPreorderIter(commit, nil, []plumbing.Hash{ plumbing.NewHash("af2d6a6954d532f8ffb47615169c8fdf9d383a1a"), }).ForEach(func(c *Commit) error { commits = append(commits, c) @@ -60,6 +60,30 @@ func (s *CommitWalkerSuite) TestCommitPreIteratorWithIgnore(c *C) { } } +func (s *CommitWalkerSuite) TestCommitPreIteratorWithSeenExternal(c *C) { + commit := s.commit(c, s.Fixture.Head) + + var commits []*Commit + seenExternal := map[plumbing.Hash]bool{ + plumbing.NewHash("af2d6a6954d532f8ffb47615169c8fdf9d383a1a"): true, + } + NewCommitPreorderIter(commit, seenExternal, nil). + ForEach(func(c *Commit) error { + commits = append(commits, c) + return nil + }) + + c.Assert(commits, HasLen, 2) + + expected := []string{ + "6ecf0ef2c2dffb796033e5a02219af86ec6584e5", + "918c48b83bd081e863dbe1b80f8998f058cd8294", + } + for i, commit := range commits { + c.Assert(commit.Hash.String(), Equals, expected[i]) + } +} + func (s *CommitWalkerSuite) TestCommitPostIterator(c *C) { commit := s.commit(c, s.Fixture.Head) diff --git a/plumbing/object/patch.go b/plumbing/object/patch.go index d413114..a920631 100644 --- a/plumbing/object/patch.go +++ b/plumbing/object/patch.go @@ -4,6 +4,8 @@ import ( "bytes" "fmt" "io" + "math" + "strings" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/filemode" @@ -105,6 +107,10 @@ func (p *Patch) Encode(w io.Writer) error { return ue.Encode(p) } +func (p *Patch) Stats() FileStats { + return getFileStatsFromFilePatches(p.FilePatches()) +} + func (p *Patch) String() string { buf := bytes.NewBuffer(nil) err := p.Encode(buf) @@ -185,3 +191,112 @@ func (t *textChunk) Content() string { func (t *textChunk) Type() fdiff.Operation { return t.op } + +// FileStat stores the status of changes in content of a file. +type FileStat struct { + Name string + Addition int + Deletion int +} + +func (fs FileStat) String() string { + return printStat([]FileStat{fs}) +} + +// FileStats is a collection of FileStat. +type FileStats []FileStat + +func (fileStats FileStats) String() string { + return printStat(fileStats) +} + +func printStat(fileStats []FileStat) string { + padLength := float64(len(" ")) + newlineLength := float64(len("\n")) + separatorLength := float64(len("|")) + // Soft line length limit. The text length calculation below excludes + // length of the change number. Adding that would take it closer to 80, + // but probably not more than 80, until it's a huge number. + lineLength := 72.0 + + // Get the longest filename and longest total change. + var longestLength float64 + var longestTotalChange float64 + for _, fs := range fileStats { + if int(longestLength) < len(fs.Name) { + longestLength = float64(len(fs.Name)) + } + totalChange := fs.Addition + fs.Deletion + if int(longestTotalChange) < totalChange { + longestTotalChange = float64(totalChange) + } + } + + // Parts of the output: + // <pad><filename><pad>|<pad><changeNumber><pad><+++/---><newline> + // example: " main.go | 10 +++++++--- " + + // <pad><filename><pad> + leftTextLength := padLength + longestLength + padLength + + // <pad><number><pad><+++++/-----><newline> + // Excluding number length here. + rightTextLength := padLength + padLength + newlineLength + + totalTextArea := leftTextLength + separatorLength + rightTextLength + heightOfHistogram := lineLength - totalTextArea + + // Scale the histogram. + var scaleFactor float64 + if longestTotalChange > heightOfHistogram { + // Scale down to heightOfHistogram. + scaleFactor = float64(longestTotalChange / heightOfHistogram) + } else { + scaleFactor = 1.0 + } + + finalOutput := "" + for _, fs := range fileStats { + addn := float64(fs.Addition) + deln := float64(fs.Deletion) + adds := strings.Repeat("+", int(math.Floor(addn/scaleFactor))) + dels := strings.Repeat("-", int(math.Floor(deln/scaleFactor))) + finalOutput += fmt.Sprintf(" %s | %d %s%s\n", fs.Name, (fs.Addition + fs.Deletion), adds, dels) + } + + return finalOutput +} + +func getFileStatsFromFilePatches(filePatches []fdiff.FilePatch) FileStats { + var fileStats FileStats + + for _, fp := range filePatches { + cs := FileStat{} + from, to := fp.Files() + if from == nil { + // New File is created. + cs.Name = to.Path() + } else if to == nil { + // File is deleted. + cs.Name = from.Path() + } else if from.Path() != to.Path() { + // File is renamed. Not supported. + // cs.Name = fmt.Sprintf("%s => %s", from.Path(), to.Path()) + } else { + cs.Name = from.Path() + } + + for _, chunk := range fp.Chunks() { + switch chunk.Type() { + case fdiff.Add: + cs.Addition += strings.Count(chunk.Content(), "\n") + case fdiff.Delete: + cs.Deletion += strings.Count(chunk.Content(), "\n") + } + } + + fileStats = append(fileStats, cs) + } + + return fileStats +} diff --git a/plumbing/protocol/packp/capability/capability.go b/plumbing/protocol/packp/capability/capability.go index 96d93f6..a129781 100644 --- a/plumbing/protocol/packp/capability/capability.go +++ b/plumbing/protocol/packp/capability/capability.go @@ -234,7 +234,7 @@ const ( const DefaultAgent = "go-git/4.x" -var valid = map[Capability]bool{ +var known = map[Capability]bool{ MultiACK: true, MultiACKDetailed: true, NoDone: true, ThinPack: true, Sideband: true, Sideband64k: true, OFSDelta: true, Agent: true, Shallow: true, DeepenSince: true, DeepenNot: true, DeepenRelative: true, diff --git a/plumbing/protocol/packp/capability/list.go b/plumbing/protocol/packp/capability/list.go index 3904a4e..26a79b6 100644 --- a/plumbing/protocol/packp/capability/list.go +++ b/plumbing/protocol/packp/capability/list.go @@ -108,7 +108,7 @@ func (l *List) Add(c Capability, values ...string) error { return nil } - if !multipleArgument[c] && len(l.m[c].Values) > 0 { + if known[c] && !multipleArgument[c] && len(l.m[c].Values) > 0 { return ErrMultipleArguments } @@ -116,7 +116,19 @@ func (l *List) Add(c Capability, values ...string) error { return nil } +func (l *List) validateNoEmptyArgs(values []string) error { + for _, v := range values { + if v == "" { + return ErrEmtpyArgument + } + } + return nil +} + func (l *List) validate(c Capability, values []string) error { + if !known[c] { + return l.validateNoEmptyArgs(values) + } if requiresArgument[c] && len(values) == 0 { return ErrArgumentsRequired } @@ -128,14 +140,7 @@ func (l *List) validate(c Capability, values []string) error { if !multipleArgument[c] && len(values) > 1 { return ErrMultipleArguments } - - for _, v := range values { - if v == "" { - return ErrEmtpyArgument - } - } - - return nil + return l.validateNoEmptyArgs(values) } // Supports returns true if capability is present diff --git a/plumbing/protocol/packp/capability/list_test.go b/plumbing/protocol/packp/capability/list_test.go index 9665e89..82dd63f 100644 --- a/plumbing/protocol/packp/capability/list_test.go +++ b/plumbing/protocol/packp/capability/list_test.go @@ -65,6 +65,26 @@ func (s *SuiteCapabilities) TestDecodeWithUnknownCapability(c *check.C) { c.Assert(cap.Supports(Capability("foo")), check.Equals, true) } +func (s *SuiteCapabilities) TestDecodeWithUnknownCapabilityWithArgument(c *check.C) { + cap := NewList() + err := cap.Decode([]byte("oldref=HEAD:refs/heads/v2 thin-pack")) + c.Assert(err, check.IsNil) + + c.Assert(cap.m, check.HasLen, 2) + c.Assert(cap.Get("oldref"), check.DeepEquals, []string{"HEAD:refs/heads/v2"}) + c.Assert(cap.Get(ThinPack), check.IsNil) +} + +func (s *SuiteCapabilities) TestDecodeWithUnknownCapabilityWithMultipleArgument(c *check.C) { + cap := NewList() + err := cap.Decode([]byte("foo=HEAD:refs/heads/v2 foo=HEAD:refs/heads/v1 thin-pack")) + c.Assert(err, check.IsNil) + + c.Assert(cap.m, check.HasLen, 2) + c.Assert(cap.Get("foo"), check.DeepEquals, []string{"HEAD:refs/heads/v2", "HEAD:refs/heads/v1"}) + c.Assert(cap.Get(ThinPack), check.IsNil) +} + func (s *SuiteCapabilities) TestString(c *check.C) { cap := NewList() cap.Set(Agent, "bar") @@ -153,7 +173,7 @@ func (s *SuiteCapabilities) TestAddErrArgumentsNotAllowed(c *check.C) { c.Assert(err, check.Equals, ErrArguments) } -func (s *SuiteCapabilities) TestAddErrArgumendts(c *check.C) { +func (s *SuiteCapabilities) TestAddErrArguments(c *check.C) { cap := NewList() err := cap.Add(SymRef, "") c.Assert(err, check.Equals, ErrEmtpyArgument) diff --git a/plumbing/revlist/revlist.go b/plumbing/revlist/revlist.go index 5b2ff99..0a9d1e8 100644 --- a/plumbing/revlist/revlist.go +++ b/plumbing/revlist/revlist.go @@ -35,9 +35,9 @@ func objects( ignore []plumbing.Hash, allowMissingObjects bool, ) ([]plumbing.Hash, error) { - seen := hashListToSet(ignore) result := make(map[plumbing.Hash]bool) + visited := make(map[plumbing.Hash]bool) walkerFunc := func(h plumbing.Hash) { if !seen[h] { @@ -47,7 +47,7 @@ func objects( } for _, h := range objects { - if err := processObject(s, h, seen, ignore, walkerFunc); err != nil { + if err := processObject(s, h, seen, visited, ignore, walkerFunc); err != nil { if allowMissingObjects && err == plumbing.ErrObjectNotFound { continue } @@ -64,6 +64,7 @@ func processObject( s storer.EncodedObjectStorer, h plumbing.Hash, seen map[plumbing.Hash]bool, + visited map[plumbing.Hash]bool, ignore []plumbing.Hash, walkerFunc func(h plumbing.Hash), ) error { @@ -83,12 +84,12 @@ func processObject( switch do := do.(type) { case *object.Commit: - return reachableObjects(do, seen, ignore, walkerFunc) + return reachableObjects(do, seen, visited, ignore, walkerFunc) case *object.Tree: return iterateCommitTrees(seen, do, walkerFunc) case *object.Tag: walkerFunc(do.Hash) - return processObject(s, do.Target, seen, ignore, walkerFunc) + return processObject(s, do.Target, seen, visited, ignore, walkerFunc) case *object.Blob: walkerFunc(do.Hash) default: @@ -106,13 +107,36 @@ func processObject( func reachableObjects( commit *object.Commit, seen map[plumbing.Hash]bool, + visited map[plumbing.Hash]bool, ignore []plumbing.Hash, - cb func(h plumbing.Hash)) error { + cb func(h plumbing.Hash), +) error { + i := object.NewCommitPreorderIter(commit, seen, ignore) + pending := make(map[plumbing.Hash]bool) + addPendingParents(pending, visited, commit) + + for { + commit, err := i.Next() + if err == io.EOF { + break + } + + if err != nil { + return err + } + + if pending[commit.Hash] { + delete(pending, commit.Hash) + } + + addPendingParents(pending, visited, commit) + + if visited[commit.Hash] && len(pending) == 0 { + break + } - i := object.NewCommitPreorderIter(commit, ignore) - return i.ForEach(func(commit *object.Commit) error { if seen[commit.Hash] { - return nil + continue } cb(commit.Hash) @@ -122,15 +146,28 @@ func reachableObjects( return err } - return iterateCommitTrees(seen, tree, cb) - }) + if err := iterateCommitTrees(seen, tree, cb); err != nil { + return err + } + } + + return nil +} + +func addPendingParents(pending, visited map[plumbing.Hash]bool, commit *object.Commit) { + for _, p := range commit.ParentHashes { + if !visited[p] { + pending[p] = true + } + } } // iterateCommitTrees iterate all reachable trees from the given commit func iterateCommitTrees( seen map[plumbing.Hash]bool, tree *object.Tree, - cb func(h plumbing.Hash)) error { + cb func(h plumbing.Hash), +) error { if seen[tree.Hash] { return nil } diff --git a/plumbing/revlist/revlist_test.go b/plumbing/revlist/revlist_test.go index dd1e8c1..643e3eb 100644 --- a/plumbing/revlist/revlist_test.go +++ b/plumbing/revlist/revlist_test.go @@ -217,3 +217,60 @@ func (s *RevListSuite) TestRevListObjectsNewBranch(c *C) { } c.Assert(len(remoteHist), Equals, len(revList)) } + +// This tests will ensure that a5b8b09 and b8e471f will be visited even if +// 35e8510 has already been visited and will not stop iterating until they +// have been as well. +// +// * af2d6a6 some json +// * 1669dce Merge branch 'master' +// |\ +// | * a5b8b09 Merge pull request #1 +// | |\ +// | | * b8e471f Creating changelog +// | |/ +// * | 35e8510 binary file +// |/ +// * b029517 Initial commit +func (s *RevListSuite) TestReachableObjectsNoRevisit(c *C) { + obj, err := s.Storer.EncodedObject(plumbing.CommitObject, plumbing.NewHash("af2d6a6954d532f8ffb47615169c8fdf9d383a1a")) + c.Assert(err, IsNil) + + do, err := object.DecodeObject(s.Storer, obj) + c.Assert(err, IsNil) + + commit, ok := do.(*object.Commit) + c.Assert(ok, Equals, true) + + var visited []plumbing.Hash + err = reachableObjects( + commit, + map[plumbing.Hash]bool{ + plumbing.NewHash("35e85108805c84807bc66a02d91535e1e24b38b9"): true, + }, + map[plumbing.Hash]bool{ + plumbing.NewHash("35e85108805c84807bc66a02d91535e1e24b38b9"): true, + }, + nil, + func(h plumbing.Hash) { + obj, err := s.Storer.EncodedObject(plumbing.AnyObject, h) + c.Assert(err, IsNil) + + do, err := object.DecodeObject(s.Storer, obj) + c.Assert(err, IsNil) + + if _, ok := do.(*object.Commit); ok { + visited = append(visited, h) + } + }, + ) + c.Assert(err, IsNil) + + c.Assert(visited, DeepEquals, []plumbing.Hash{ + plumbing.NewHash("af2d6a6954d532f8ffb47615169c8fdf9d383a1a"), + plumbing.NewHash("1669dce138d9b841a518c64b10914d88f5e488ea"), + plumbing.NewHash("a5b8b09e2f8fcb0bb99d3ccb0958157b40890d69"), + plumbing.NewHash("b029517f6300c2da0f4b651b8642506cd6aaf45d"), + plumbing.NewHash("b8e471f58bcbca63b07bda20e428190409c2db47"), + }) +} diff --git a/plumbing/transport/common.go b/plumbing/transport/common.go index 2088500..ac71bb3 100644 --- a/plumbing/transport/common.go +++ b/plumbing/transport/common.go @@ -187,6 +187,7 @@ func (e urlEndpoint) Path() string { type scpEndpoint struct { user string host string + port string path string } @@ -194,8 +195,14 @@ func (e *scpEndpoint) Protocol() string { return "ssh" } func (e *scpEndpoint) User() string { return e.user } func (e *scpEndpoint) Password() string { return "" } func (e *scpEndpoint) Host() string { return e.host } -func (e *scpEndpoint) Port() int { return 22 } func (e *scpEndpoint) Path() string { return e.path } +func (e *scpEndpoint) Port() int { + i, err := strconv.Atoi(e.port) + if err != nil { + return 22 + } + return i +} func (e *scpEndpoint) String() string { var user string @@ -220,7 +227,7 @@ func (e *fileEndpoint) String() string { return e.path } var ( isSchemeRegExp = regexp.MustCompile(`^[^:]+://`) - scpLikeUrlRegExp = regexp.MustCompile(`^(?:(?P<user>[^@]+)@)?(?P<host>[^:\s]+):(?P<path>[^\\].*)$`) + scpLikeUrlRegExp = regexp.MustCompile(`^(?:(?P<user>[^@]+)@)?(?P<host>[^:\s]+):(?:(?P<port>[0-9]{1,5})/)?(?P<path>[^\\].*)$`) ) func parseSCPLike(endpoint string) (Endpoint, bool) { @@ -232,7 +239,8 @@ func parseSCPLike(endpoint string) (Endpoint, bool) { return &scpEndpoint{ user: m[1], host: m[2], - path: m[3], + port: m[3], + path: m[4], }, true } diff --git a/plumbing/transport/common_test.go b/plumbing/transport/common_test.go index ec617bd..52759e6 100644 --- a/plumbing/transport/common_test.go +++ b/plumbing/transport/common_test.go @@ -74,6 +74,18 @@ func (s *SuiteCommon) TestNewEndpointSCPLike(c *C) { c.Assert(e.String(), Equals, "git@github.com:user/repository.git") } +func (s *SuiteCommon) TestNewEndpointSCPLikeWithPort(c *C) { + e, err := NewEndpoint("git@github.com:9999/user/repository.git") + c.Assert(err, IsNil) + c.Assert(e.Protocol(), Equals, "ssh") + c.Assert(e.User(), Equals, "git") + c.Assert(e.Password(), Equals, "") + c.Assert(e.Host(), Equals, "github.com") + c.Assert(e.Port(), Equals, 9999) + c.Assert(e.Path(), Equals, "user/repository.git") + c.Assert(e.String(), Equals, "git@github.com:user/repository.git") +} + func (s *SuiteCommon) TestNewEndpointFileAbs(c *C) { e, err := NewEndpoint("/foo.git") c.Assert(err, IsNil) diff --git a/plumbing/transport/server/server.go b/plumbing/transport/server/server.go index be36de5..f896f7a 100644 --- a/plumbing/transport/server/server.go +++ b/plumbing/transport/server/server.go @@ -165,7 +165,8 @@ func (s *upSession) UploadPack(ctx context.Context, req *packp.UploadPackRequest pr, pw := io.Pipe() e := packfile.NewEncoder(pw, s.storer, false) go func() { - _, err := e.Encode(objs) + // TODO: plumb through a pack window. + _, err := e.Encode(objs, 10) pw.CloseWithError(err) }() diff --git a/plumbing/transport/test/receive_pack.go b/plumbing/transport/test/receive_pack.go index d29d9ca..ed0f517 100644 --- a/plumbing/transport/test/receive_pack.go +++ b/plumbing/transport/test/receive_pack.go @@ -348,7 +348,7 @@ func (s *ReceivePackSuite) testSendPackDeleteReference(c *C) { func (s *ReceivePackSuite) emptyPackfile() io.ReadCloser { var buf bytes.Buffer e := packfile.NewEncoder(&buf, memory.NewStorage(), false) - _, err := e.Encode(nil) + _, err := e.Encode(nil, 10) if err != nil { panic(err) } |