diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2020-04-23 18:36:50 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-04-23 18:36:50 +0200 |
commit | 218a744b6995a89f5c322aa58e79138d65392ea6 (patch) | |
tree | 385575e9f68787661fcd04840f6061b40658ab40 /plumbing/object/rename_test.go | |
parent | 53f87846a196c856e00fe825bc5f29551b2ea524 (diff) | |
parent | 90696f07f3a64243c6f33963f5267ab98622db9f (diff) | |
download | go-git-218a744b6995a89f5c322aa58e79138d65392ea6.tar.gz |
Merge pull request #38 from go-git/feature/difftree-renames
plumbing: detect renames by hash and similar content in diff tree
Diffstat (limited to 'plumbing/object/rename_test.go')
-rw-r--r-- | plumbing/object/rename_test.go | 490 |
1 files changed, 490 insertions, 0 deletions
diff --git a/plumbing/object/rename_test.go b/plumbing/object/rename_test.go new file mode 100644 index 0000000..bf5044d --- /dev/null +++ b/plumbing/object/rename_test.go @@ -0,0 +1,490 @@ +package object + +import ( + "path/filepath" + "strings" + + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/storage/memory" + . "gopkg.in/check.v1" +) + +type RenameSuite struct { + BaseObjectsSuite +} + +var _ = Suite(&RenameSuite{}) + +func (s *RenameSuite) TestNameSimilarityScore(c *C) { + testCases := []struct { + a, b string + score int + }{ + {"foo/bar.c", "foo/baz.c", 70}, + {"src/utils/Foo.java", "tests/utils/Foo.java", 64}, + {"foo/bar/baz.py", "README.md", 0}, + {"src/utils/something/foo.py", "src/utils/something/other/foo.py", 69}, + {"src/utils/something/foo.py", "src/utils/yada/foo.py", 63}, + {"src/utils/something/foo.py", "src/utils/something/other/bar.py", 44}, + {"src/utils/something/foo.py", "src/utils/something/foo.py", 100}, + } + + for _, tt := range testCases { + c.Assert(nameSimilarityScore(tt.a, tt.b), Equals, tt.score) + } +} + +const ( + pathA = "src/A" + pathB = "src/B" + pathH = "src/H" + pathQ = "src/Q" +) + +func (s *RenameSuite) TestExactRename_OneRename(c *C) { + a := makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo")) + b := makeDelete(c, makeFile(c, pathQ, filemode.Regular, "foo")) + + result := detectRenames(c, Changes{a, b}, nil, 1) + assertRename(c, b, a, result[0]) +} + +func (s *RenameSuite) TestExactRename_DifferentObjects(c *C) { + a := makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo")) + h := makeAdd(c, makeFile(c, pathH, filemode.Regular, "foo")) + q := makeDelete(c, makeFile(c, pathQ, filemode.Regular, "bar")) + + result := detectRenames(c, Changes{a, h, q}, nil, 3) + c.Assert(result[0], DeepEquals, a) + c.Assert(result[1], DeepEquals, h) + c.Assert(result[2], DeepEquals, q) +} + +func (s *RenameSuite) TestExactRename_OneRenameOneModify(c *C) { + c1 := makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo")) + c2 := makeDelete(c, makeFile(c, pathQ, filemode.Regular, "foo")) + c3 := makeChange(c, + makeFile(c, pathH, filemode.Regular, "bar"), + makeFile(c, pathH, filemode.Regular, "bar"), + ) + + result := detectRenames(c, Changes{c1, c2, c3}, nil, 2) + c.Assert(result[0], DeepEquals, c3) + assertRename(c, c2, c1, result[1]) +} + +func (s *RenameSuite) TestExactRename_ManyRenames(c *C) { + c1 := makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo")) + c2 := makeDelete(c, makeFile(c, pathQ, filemode.Regular, "foo")) + c3 := makeAdd(c, makeFile(c, pathH, filemode.Regular, "bar")) + c4 := makeDelete(c, makeFile(c, pathB, filemode.Regular, "bar")) + + result := detectRenames(c, Changes{c1, c2, c3, c4}, nil, 2) + assertRename(c, c4, c3, result[0]) + assertRename(c, c2, c1, result[1]) +} + +func (s *RenameSuite) TestExactRename_MultipleIdenticalDeletes(c *C) { + changes := Changes{ + makeDelete(c, makeFile(c, pathA, filemode.Regular, "foo")), + makeDelete(c, makeFile(c, pathB, filemode.Regular, "foo")), + makeDelete(c, makeFile(c, pathH, filemode.Regular, "foo")), + makeAdd(c, makeFile(c, pathQ, filemode.Regular, "foo")), + } + + result := detectRenames(c, changes, nil, 3) + assertRename(c, changes[0], changes[3], result[0]) + c.Assert(result[1], DeepEquals, changes[1]) + c.Assert(result[2], DeepEquals, changes[2]) +} + +func (s *RenameSuite) TestRenameExact_PathBreaksTie(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, "src/com/foo/a.java", filemode.Regular, "foo")), + makeDelete(c, makeFile(c, "src/com/foo/b.java", filemode.Regular, "foo")), + makeAdd(c, makeFile(c, "c.txt", filemode.Regular, "foo")), + makeDelete(c, makeFile(c, "d.txt", filemode.Regular, "foo")), + makeAdd(c, makeFile(c, "the_e_file.txt", filemode.Regular, "foo")), + } + + // Add out of order to avoid first-match succeeding + result := detectRenames(c, Changes{ + changes[0], + changes[3], + changes[4], + changes[1], + changes[2], + }, nil, 3) + assertRename(c, changes[3], changes[2], result[0]) + assertRename(c, changes[1], changes[0], result[1]) + c.Assert(result[2], DeepEquals, changes[4]) +} + +func (s *RenameSuite) TestExactRename_OneDeleteManyAdds(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, "src/com/foo/a.java", filemode.Regular, "foo")), + makeAdd(c, makeFile(c, "src/com/foo/b.java", filemode.Regular, "foo")), + makeAdd(c, makeFile(c, "c.txt", filemode.Regular, "foo")), + makeDelete(c, makeFile(c, "d.txt", filemode.Regular, "foo")), + } + + result := detectRenames(c, changes, nil, 3) + assertRename(c, changes[3], changes[2], result[0]) + c.Assert(result[1], DeepEquals, changes[0]) + c.Assert(result[2], DeepEquals, changes[1]) +} + +func (s *RenameSuite) TestExactRename_UnstagedFile(c *C) { + changes := Changes{ + makeDelete(c, makeFile(c, pathA, filemode.Regular, "foo")), + makeAdd(c, makeFile(c, pathB, filemode.Regular, "foo")), + } + result := detectRenames(c, changes, nil, 1) + assertRename(c, changes[0], changes[1], result[0]) +} + +func (s *RenameSuite) TestContentRename_OnePair(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo\nbar\nbaz\nblarg\n")), + makeDelete(c, makeFile(c, pathA, filemode.Regular, "foo\nbar\nbaz\nblah\n")), + } + + result := detectRenames(c, changes, nil, 1) + assertRename(c, changes[1], changes[0], result[0]) +} + +func (s *RenameSuite) TestContentRename_OneRenameTwoUnrelatedFiles(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo\nbar\nbaz\nblarg\n")), + makeDelete(c, makeFile(c, pathQ, filemode.Regular, "foo\nbar\nbaz\nblah\n")), + makeAdd(c, makeFile(c, pathB, filemode.Regular, "some\nsort\nof\ntext\n")), + makeDelete(c, makeFile(c, pathH, filemode.Regular, "completely\nunrelated\ntext\n")), + } + + result := detectRenames(c, changes, nil, 3) + c.Assert(result[0], DeepEquals, changes[2]) + c.Assert(result[1], DeepEquals, changes[3]) + assertRename(c, changes[1], changes[0], result[2]) +} + +func (s *RenameSuite) TestContentRename_LastByteDifferent(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo\nbar\na")), + makeDelete(c, makeFile(c, pathQ, filemode.Regular, "foo\nbar\nb")), + } + + result := detectRenames(c, changes, nil, 1) + assertRename(c, changes[1], changes[0], result[0]) +} + +func (s *RenameSuite) TestContentRename_NewlinesOnly(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, strings.Repeat("\n", 3))), + makeDelete(c, makeFile(c, pathQ, filemode.Regular, strings.Repeat("\n", 4))), + } + + result := detectRenames(c, changes, nil, 1) + assertRename(c, changes[1], changes[0], result[0]) +} + +func (s *RenameSuite) TestContentRename_SameContentMultipleTimes(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "a\na\na\na\n")), + makeDelete(c, makeFile(c, pathQ, filemode.Regular, "a\na\na\n")), + } + + result := detectRenames(c, changes, nil, 1) + assertRename(c, changes[1], changes[0], result[0]) +} + +func (s *RenameSuite) TestContentRename_OnePairRenameScore50(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "ab\nab\nab\nac\nad\nae\n")), + makeDelete(c, makeFile(c, pathQ, filemode.Regular, "ac\nab\nab\nab\naa\na0\na1\n")), + } + + result := detectRenames(c, changes, &DiffTreeOptions{RenameScore: 50}, 1) + assertRename(c, changes[1], changes[0], result[0]) +} + +func (s *RenameSuite) TestNoRenames_SingleByteFiles(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "a")), + makeAdd(c, makeFile(c, pathQ, filemode.Regular, "b")), + } + + result := detectRenames(c, changes, nil, 2) + c.Assert(result[0], DeepEquals, changes[0]) + c.Assert(result[1], DeepEquals, changes[1]) +} + +func (s *RenameSuite) TestNoRenames_EmptyFile(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "")), + } + result := detectRenames(c, changes, nil, 1) + c.Assert(result[0], DeepEquals, changes[0]) +} + +func (s *RenameSuite) TestNoRenames_EmptyFile2(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "")), + makeDelete(c, makeFile(c, pathQ, filemode.Regular, "blah")), + } + result := detectRenames(c, changes, nil, 2) + c.Assert(result[0], DeepEquals, changes[0]) + c.Assert(result[1], DeepEquals, changes[1]) +} + +func (s *RenameSuite) TestNoRenames_SymlinkAndFile(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "src/dest")), + makeDelete(c, makeFile(c, pathQ, filemode.Symlink, "src/dest")), + } + result := detectRenames(c, changes, nil, 2) + c.Assert(result[0], DeepEquals, changes[0]) + c.Assert(result[1], DeepEquals, changes[1]) +} + +func (s *RenameSuite) TestNoRenames_SymlinkAndFileSamePath(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "src/dest")), + makeDelete(c, makeFile(c, pathA, filemode.Symlink, "src/dest")), + } + result := detectRenames(c, changes, nil, 2) + c.Assert(result[0], DeepEquals, changes[0]) + c.Assert(result[1], DeepEquals, changes[1]) +} + +func (s *RenameSuite) TestRenameLimit(c *C) { + changes := Changes{ + makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo\nbar\nbaz\nblarg\n")), + makeDelete(c, makeFile(c, pathB, filemode.Regular, "foo\nbar\nbaz\nblah\n")), + makeAdd(c, makeFile(c, pathH, filemode.Regular, "a\nb\nc\nd\n")), + makeDelete(c, makeFile(c, pathQ, filemode.Regular, "a\nb\nc\n")), + } + + result := detectRenames(c, changes, &DiffTreeOptions{RenameLimit: 1}, 4) + for i, res := range result { + c.Assert(res, DeepEquals, changes[i]) + } +} + +func detectRenames(c *C, changes Changes, opts *DiffTreeOptions, expectedResults int) Changes { + result, err := DetectRenames(changes, opts) + c.Assert(err, IsNil) + c.Assert(result, HasLen, expectedResults) + return result +} + +func assertRename(c *C, from, to *Change, rename *Change) { + c.Assert(&Change{From: from.From, To: to.To}, DeepEquals, rename) +} + +type SimilarityIndexSuite struct { + BaseObjectsSuite +} + +var _ = Suite(&SimilarityIndexSuite{}) + +func (s *SimilarityIndexSuite) TestScoreFiles(c *C) { + tree := s.tree(c, plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")) + binary, err := tree.File("binary.jpg") + c.Assert(err, IsNil) + binIndex, err := fileSimilarityIndex(binary) + c.Assert(err, IsNil) + + long, err := tree.File("json/long.json") + c.Assert(err, IsNil) + longIndex, err := fileSimilarityIndex(long) + c.Assert(err, IsNil) + + short, err := tree.File("json/short.json") + c.Assert(err, IsNil) + shortIndex, err := fileSimilarityIndex(short) + c.Assert(err, IsNil) + + php, err := tree.File("php/crappy.php") + c.Assert(err, IsNil) + phpIndex, err := fileSimilarityIndex(php) + c.Assert(err, IsNil) + + testCases := []struct { + src, dst *similarityIndex + expectedScore int + }{ + {binIndex, binIndex, 10000}, // same file + {shortIndex, longIndex, 32}, // slightly similar files + {longIndex, shortIndex, 32}, // same as previous, diff order + {shortIndex, phpIndex, 1}, // different files + {longIndex, binIndex, 0}, // code vs binary file + } + + for _, tt := range testCases { + score := tt.src.score(tt.dst, 10000) + c.Assert(score, Equals, tt.expectedScore) + } +} + +func (s *SimilarityIndexSuite) TestHashContent(c *C) { + idx := textIndex(c, "A\n"+ + "B\n"+ + "D\n"+ + "B\n") + + keyA := keyFor(c, "A\n") + keyB := keyFor(c, "B\n") + keyD := keyFor(c, "D\n") + + c.Assert(keyA, Not(Equals), keyB) + c.Assert(keyA, Not(Equals), keyD) + c.Assert(keyD, Not(Equals), keyB) + + c.Assert(idx.numHashes, Equals, 3) + c.Assert(idx.hashes[findIndex(idx, keyA)].count(), Equals, uint64(2)) + c.Assert(idx.hashes[findIndex(idx, keyB)].count(), Equals, uint64(4)) + c.Assert(idx.hashes[findIndex(idx, keyD)].count(), Equals, uint64(2)) +} + +func (s *SimilarityIndexSuite) TestCommonSameFiles(c *C) { + content := "A\n" + + "B\n" + + "D\n" + + "B\n" + + src := textIndex(c, content) + dst := textIndex(c, content) + + c.Assert(src.common(dst), Equals, uint64(8)) + c.Assert(dst.common(src), Equals, uint64(8)) + + c.Assert(src.score(dst, 100), Equals, 100) + c.Assert(dst.score(src, 100), Equals, 100) +} + +func (s *SimilarityIndexSuite) TestCommonSameFilesCR(c *C) { + content := "A\r\n" + + "B\r\n" + + "D\r\n" + + "B\r\n" + + src := textIndex(c, content) + dst := textIndex(c, strings.ReplaceAll(content, "\r", "")) + + c.Assert(src.common(dst), Equals, uint64(8)) + c.Assert(dst.common(src), Equals, uint64(8)) + + c.Assert(src.score(dst, 100), Equals, 100) + c.Assert(dst.score(src, 100), Equals, 100) +} + +func (s *SimilarityIndexSuite) TestCommonEmptyFiles(c *C) { + src := textIndex(c, "") + dst := textIndex(c, "") + + c.Assert(src.common(dst), Equals, uint64(0)) + c.Assert(dst.common(src), Equals, uint64(0)) +} + +func (s *SimilarityIndexSuite) TestCommonTotallyDifferentFiles(c *C) { + src := textIndex(c, "A\n") + dst := textIndex(c, "D\n") + + c.Assert(src.common(dst), Equals, uint64(0)) + c.Assert(dst.common(src), Equals, uint64(0)) +} + +func (s *SimilarityIndexSuite) TestSimilarity75(c *C) { + src := textIndex(c, "A\nB\nC\nD\n") + dst := textIndex(c, "A\nB\nC\nQ\n") + + c.Assert(src.common(dst), Equals, uint64(6)) + c.Assert(dst.common(src), Equals, uint64(6)) + + c.Assert(src.score(dst, 100), Equals, 75) + c.Assert(dst.score(src, 100), Equals, 75) +} + +func keyFor(c *C, line string) int { + idx := newSimilarityIndex() + err := idx.hashContent(strings.NewReader(line), int64(len(line)), false) + c.Assert(err, IsNil) + + c.Assert(idx.numHashes, Equals, 1) + for _, h := range idx.hashes { + if h != 0 { + return h.key() + } + } + + return -1 +} + +func textIndex(c *C, content string) *similarityIndex { + idx := newSimilarityIndex() + err := idx.hashContent(strings.NewReader(content), int64(len(content)), false) + c.Assert(err, IsNil) + return idx +} + +func findIndex(idx *similarityIndex, key int) int { + for i, h := range idx.hashes { + if h.key() == key { + return i + } + } + return -1 +} + +func makeFile(c *C, name string, mode filemode.FileMode, content string) *File { + obj := new(plumbing.MemoryObject) + obj.SetType(plumbing.BlobObject) + _, err := obj.Write([]byte(content)) + c.Assert(err, IsNil) + return &File{ + Name: name, + Mode: mode, + Blob: Blob{Hash: obj.Hash(), Size: obj.Size(), obj: obj}, + } +} + +func makeChangeEntry(f *File) ChangeEntry { + sto := memory.NewStorage() + sto.SetEncodedObject(f.obj) + tree := &Tree{s: sto} + + return ChangeEntry{ + Name: f.Name, + Tree: tree, + TreeEntry: TreeEntry{ + Name: filepath.Base(f.Name), + Mode: f.Mode, + Hash: f.Hash, + }, + } +} + +func makeAdd(c *C, f *File) *Change { + return makeChange(c, nil, f) +} + +func makeDelete(c *C, f *File) *Change { + return makeChange(c, f, nil) +} + +func makeChange(c *C, from *File, to *File) *Change { + if from == nil { + return &Change{To: makeChangeEntry(to)} + } + + if to == nil { + return &Change{From: makeChangeEntry(from)} + } + + if from == nil && to == nil { + c.Error("cannot make change without from or to") + } + + return &Change{From: makeChangeEntry(from), To: makeChangeEntry(to)} +} |