package object import ( "path/filepath" "strings" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/filemode" "github.com/go-git/go-git/v5/storage/memory" . "gopkg.in/check.v1" ) type RenameSuite struct { BaseObjectsSuite } var _ = Suite(&RenameSuite{}) func (s *RenameSuite) TestNameSimilarityScore(c *C) { testCases := []struct { a, b string score int }{ {"foo/bar.c", "foo/baz.c", 70}, {"src/utils/Foo.java", "tests/utils/Foo.java", 64}, {"foo/bar/baz.py", "README.md", 0}, {"src/utils/something/foo.py", "src/utils/something/other/foo.py", 69}, {"src/utils/something/foo.py", "src/utils/yada/foo.py", 63}, {"src/utils/something/foo.py", "src/utils/something/other/bar.py", 44}, {"src/utils/something/foo.py", "src/utils/something/foo.py", 100}, } for _, tt := range testCases { c.Assert(nameSimilarityScore(tt.a, tt.b), Equals, tt.score) } } const ( pathA = "src/A" pathB = "src/B" pathH = "src/H" pathQ = "src/Q" ) func (s *RenameSuite) TestExactRename_OneRename(c *C) { a := makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo")) b := makeDelete(c, makeFile(c, pathQ, filemode.Regular, "foo")) result := detectRenames(c, Changes{a, b}, nil, 1) assertRename(c, b, a, result[0]) } func (s *RenameSuite) TestExactRename_DifferentObjects(c *C) { a := makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo")) h := makeAdd(c, makeFile(c, pathH, filemode.Regular, "foo")) q := makeDelete(c, makeFile(c, pathQ, filemode.Regular, "bar")) result := detectRenames(c, Changes{a, h, q}, nil, 3) c.Assert(result[0], DeepEquals, a) c.Assert(result[1], DeepEquals, h) c.Assert(result[2], DeepEquals, q) } func (s *RenameSuite) TestExactRename_OneRenameOneModify(c *C) { c1 := makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo")) c2 := makeDelete(c, makeFile(c, pathQ, filemode.Regular, "foo")) c3 := makeChange(c, makeFile(c, pathH, filemode.Regular, "bar"), makeFile(c, pathH, filemode.Regular, "bar"), ) result := detectRenames(c, Changes{c1, c2, c3}, nil, 2) c.Assert(result[0], DeepEquals, c3) assertRename(c, c2, c1, result[1]) } func (s *RenameSuite) TestExactRename_ManyRenames(c *C) { c1 := makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo")) c2 := makeDelete(c, makeFile(c, pathQ, filemode.Regular, "foo")) c3 := makeAdd(c, makeFile(c, pathH, filemode.Regular, "bar")) c4 := makeDelete(c, makeFile(c, pathB, filemode.Regular, "bar")) result := detectRenames(c, Changes{c1, c2, c3, c4}, nil, 2) assertRename(c, c4, c3, result[0]) assertRename(c, c2, c1, result[1]) } func (s *RenameSuite) TestExactRename_MultipleIdenticalDeletes(c *C) { changes := Changes{ makeDelete(c, makeFile(c, pathA, filemode.Regular, "foo")), makeDelete(c, makeFile(c, pathB, filemode.Regular, "foo")), makeDelete(c, makeFile(c, pathH, filemode.Regular, "foo")), makeAdd(c, makeFile(c, pathQ, filemode.Regular, "foo")), } result := detectRenames(c, changes, nil, 3) assertRename(c, changes[0], changes[3], result[0]) c.Assert(result[1], DeepEquals, changes[1]) c.Assert(result[2], DeepEquals, changes[2]) } func (s *RenameSuite) TestRenameExact_PathBreaksTie(c *C) { changes := Changes{ makeAdd(c, makeFile(c, "src/com/foo/a.java", filemode.Regular, "foo")), makeDelete(c, makeFile(c, "src/com/foo/b.java", filemode.Regular, "foo")), makeAdd(c, makeFile(c, "c.txt", filemode.Regular, "foo")), makeDelete(c, makeFile(c, "d.txt", filemode.Regular, "foo")), makeAdd(c, makeFile(c, "the_e_file.txt", filemode.Regular, "foo")), } // Add out of order to avoid first-match succeeding result := detectRenames(c, Changes{ changes[0], changes[3], changes[4], changes[1], changes[2], }, nil, 3) assertRename(c, changes[3], changes[2], result[0]) assertRename(c, changes[1], changes[0], result[1]) c.Assert(result[2], DeepEquals, changes[4]) } func (s *RenameSuite) TestExactRename_OneDeleteManyAdds(c *C) { changes := Changes{ makeAdd(c, makeFile(c, "src/com/foo/a.java", filemode.Regular, "foo")), makeAdd(c, makeFile(c, "src/com/foo/b.java", filemode.Regular, "foo")), makeAdd(c, makeFile(c, "c.txt", filemode.Regular, "foo")), makeDelete(c, makeFile(c, "d.txt", filemode.Regular, "foo")), } result := detectRenames(c, changes, nil, 3) assertRename(c, changes[3], changes[2], result[0]) c.Assert(result[1], DeepEquals, changes[0]) c.Assert(result[2], DeepEquals, changes[1]) } func (s *RenameSuite) TestExactRename_UnstagedFile(c *C) { changes := Changes{ makeDelete(c, makeFile(c, pathA, filemode.Regular, "foo")), makeAdd(c, makeFile(c, pathB, filemode.Regular, "foo")), } result := detectRenames(c, changes, nil, 1) assertRename(c, changes[0], changes[1], result[0]) } func (s *RenameSuite) TestContentRename_OnePair(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo\nbar\nbaz\nblarg\n")), makeDelete(c, makeFile(c, pathA, filemode.Regular, "foo\nbar\nbaz\nblah\n")), } result := detectRenames(c, changes, nil, 1) assertRename(c, changes[1], changes[0], result[0]) } func (s *RenameSuite) TestContentRename_OneRenameTwoUnrelatedFiles(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo\nbar\nbaz\nblarg\n")), makeDelete(c, makeFile(c, pathQ, filemode.Regular, "foo\nbar\nbaz\nblah\n")), makeAdd(c, makeFile(c, pathB, filemode.Regular, "some\nsort\nof\ntext\n")), makeDelete(c, makeFile(c, pathH, filemode.Regular, "completely\nunrelated\ntext\n")), } result := detectRenames(c, changes, nil, 3) c.Assert(result[0], DeepEquals, changes[2]) c.Assert(result[1], DeepEquals, changes[3]) assertRename(c, changes[1], changes[0], result[2]) } func (s *RenameSuite) TestContentRename_LastByteDifferent(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo\nbar\na")), makeDelete(c, makeFile(c, pathQ, filemode.Regular, "foo\nbar\nb")), } result := detectRenames(c, changes, nil, 1) assertRename(c, changes[1], changes[0], result[0]) } func (s *RenameSuite) TestContentRename_NewlinesOnly(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, strings.Repeat("\n", 3))), makeDelete(c, makeFile(c, pathQ, filemode.Regular, strings.Repeat("\n", 4))), } result := detectRenames(c, changes, nil, 1) assertRename(c, changes[1], changes[0], result[0]) } func (s *RenameSuite) TestContentRename_SameContentMultipleTimes(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "a\na\na\na\n")), makeDelete(c, makeFile(c, pathQ, filemode.Regular, "a\na\na\n")), } result := detectRenames(c, changes, nil, 1) assertRename(c, changes[1], changes[0], result[0]) } func (s *RenameSuite) TestContentRename_OnePairRenameScore50(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "ab\nab\nab\nac\nad\nae\n")), makeDelete(c, makeFile(c, pathQ, filemode.Regular, "ac\nab\nab\nab\naa\na0\na1\n")), } result := detectRenames(c, changes, &DiffTreeOptions{RenameScore: 50}, 1) assertRename(c, changes[1], changes[0], result[0]) } func (s *RenameSuite) TestNoRenames_SingleByteFiles(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "a")), makeAdd(c, makeFile(c, pathQ, filemode.Regular, "b")), } result := detectRenames(c, changes, nil, 2) c.Assert(result[0], DeepEquals, changes[0]) c.Assert(result[1], DeepEquals, changes[1]) } func (s *RenameSuite) TestNoRenames_EmptyFile(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "")), } result := detectRenames(c, changes, nil, 1) c.Assert(result[0], DeepEquals, changes[0]) } func (s *RenameSuite) TestNoRenames_EmptyFile2(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "")), makeDelete(c, makeFile(c, pathQ, filemode.Regular, "blah")), } result := detectRenames(c, changes, nil, 2) c.Assert(result[0], DeepEquals, changes[0]) c.Assert(result[1], DeepEquals, changes[1]) } func (s *RenameSuite) TestNoRenames_SymlinkAndFile(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "src/dest")), makeDelete(c, makeFile(c, pathQ, filemode.Symlink, "src/dest")), } result := detectRenames(c, changes, nil, 2) c.Assert(result[0], DeepEquals, changes[0]) c.Assert(result[1], DeepEquals, changes[1]) } func (s *RenameSuite) TestNoRenames_SymlinkAndFileSamePath(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "src/dest")), makeDelete(c, makeFile(c, pathA, filemode.Symlink, "src/dest")), } result := detectRenames(c, changes, nil, 2) c.Assert(result[0], DeepEquals, changes[0]) c.Assert(result[1], DeepEquals, changes[1]) } func (s *RenameSuite) TestRenameLimit(c *C) { changes := Changes{ makeAdd(c, makeFile(c, pathA, filemode.Regular, "foo\nbar\nbaz\nblarg\n")), makeDelete(c, makeFile(c, pathB, filemode.Regular, "foo\nbar\nbaz\nblah\n")), makeAdd(c, makeFile(c, pathH, filemode.Regular, "a\nb\nc\nd\n")), makeDelete(c, makeFile(c, pathQ, filemode.Regular, "a\nb\nc\n")), } result := detectRenames(c, changes, &DiffTreeOptions{RenameLimit: 1}, 4) for i, res := range result { c.Assert(res, DeepEquals, changes[i]) } } func detectRenames(c *C, changes Changes, opts *DiffTreeOptions, expectedResults int) Changes { result, err := DetectRenames(changes, opts) c.Assert(err, IsNil) c.Assert(result, HasLen, expectedResults) return result } func assertRename(c *C, from, to *Change, rename *Change) { c.Assert(&Change{From: from.From, To: to.To}, DeepEquals, rename) } type SimilarityIndexSuite struct { BaseObjectsSuite } var _ = Suite(&SimilarityIndexSuite{}) func (s *SimilarityIndexSuite) TestScoreFiles(c *C) { tree := s.tree(c, plumbing.NewHash("a8d315b2b1c615d43042c3a62402b8a54288cf5c")) binary, err := tree.File("binary.jpg") c.Assert(err, IsNil) binIndex, err := fileSimilarityIndex(binary) c.Assert(err, IsNil) long, err := tree.File("json/long.json") c.Assert(err, IsNil) longIndex, err := fileSimilarityIndex(long) c.Assert(err, IsNil) short, err := tree.File("json/short.json") c.Assert(err, IsNil) shortIndex, err := fileSimilarityIndex(short) c.Assert(err, IsNil) php, err := tree.File("php/crappy.php") c.Assert(err, IsNil) phpIndex, err := fileSimilarityIndex(php) c.Assert(err, IsNil) testCases := []struct { src, dst *similarityIndex expectedScore int }{ {binIndex, binIndex, 10000}, // same file {shortIndex, longIndex, 32}, // slightly similar files {longIndex, shortIndex, 32}, // same as previous, diff order {shortIndex, phpIndex, 1}, // different files {longIndex, binIndex, 0}, // code vs binary file } for _, tt := range testCases { score := tt.src.score(tt.dst, 10000) c.Assert(score, Equals, tt.expectedScore) } } func (s *SimilarityIndexSuite) TestHashContent(c *C) { idx := textIndex(c, "A\n"+ "B\n"+ "D\n"+ "B\n") keyA := keyFor(c, "A\n") keyB := keyFor(c, "B\n") keyD := keyFor(c, "D\n") c.Assert(keyA, Not(Equals), keyB) c.Assert(keyA, Not(Equals), keyD) c.Assert(keyD, Not(Equals), keyB) c.Assert(idx.numHashes, Equals, 3) c.Assert(idx.hashes[findIndex(idx, keyA)].count(), Equals, uint64(2)) c.Assert(idx.hashes[findIndex(idx, keyB)].count(), Equals, uint64(4)) c.Assert(idx.hashes[findIndex(idx, keyD)].count(), Equals, uint64(2)) } func (s *SimilarityIndexSuite) TestCommonSameFiles(c *C) { content := "A\n" + "B\n" + "D\n" + "B\n" src := textIndex(c, content) dst := textIndex(c, content) c.Assert(src.common(dst), Equals, uint64(8)) c.Assert(dst.common(src), Equals, uint64(8)) c.Assert(src.score(dst, 100), Equals, 100) c.Assert(dst.score(src, 100), Equals, 100) } func (s *SimilarityIndexSuite) TestCommonSameFilesCR(c *C) { content := "A\r\n" + "B\r\n" + "D\r\n" + "B\r\n" src := textIndex(c, content) dst := textIndex(c, strings.ReplaceAll(content, "\r", "")) c.Assert(src.common(dst), Equals, uint64(8)) c.Assert(dst.common(src), Equals, uint64(8)) c.Assert(src.score(dst, 100), Equals, 100) c.Assert(dst.score(src, 100), Equals, 100) } func (s *SimilarityIndexSuite) TestCommonEmptyFiles(c *C) { src := textIndex(c, "") dst := textIndex(c, "") c.Assert(src.common(dst), Equals, uint64(0)) c.Assert(dst.common(src), Equals, uint64(0)) } func (s *SimilarityIndexSuite) TestCommonTotallyDifferentFiles(c *C) { src := textIndex(c, "A\n") dst := textIndex(c, "D\n") c.Assert(src.common(dst), Equals, uint64(0)) c.Assert(dst.common(src), Equals, uint64(0)) } func (s *SimilarityIndexSuite) TestSimilarity75(c *C) { src := textIndex(c, "A\nB\nC\nD\n") dst := textIndex(c, "A\nB\nC\nQ\n") c.Assert(src.common(dst), Equals, uint64(6)) c.Assert(dst.common(src), Equals, uint64(6)) c.Assert(src.score(dst, 100), Equals, 75) c.Assert(dst.score(src, 100), Equals, 75) } func keyFor(c *C, line string) int { idx := newSimilarityIndex() err := idx.hashContent(strings.NewReader(line), int64(len(line)), false) c.Assert(err, IsNil) c.Assert(idx.numHashes, Equals, 1) for _, h := range idx.hashes { if h != 0 { return h.key() } } return -1 } func textIndex(c *C, content string) *similarityIndex { idx := newSimilarityIndex() err := idx.hashContent(strings.NewReader(content), int64(len(content)), false) c.Assert(err, IsNil) return idx } func findIndex(idx *similarityIndex, key int) int { for i, h := range idx.hashes { if h.key() == key { return i } } return -1 } func makeFile(c *C, name string, mode filemode.FileMode, content string) *File { obj := new(plumbing.MemoryObject) obj.SetType(plumbing.BlobObject) _, err := obj.Write([]byte(content)) c.Assert(err, IsNil) return &File{ Name: name, Mode: mode, Blob: Blob{Hash: obj.Hash(), Size: obj.Size(), obj: obj}, } } func makeChangeEntry(f *File) ChangeEntry { sto := memory.NewStorage() sto.SetEncodedObject(f.obj) tree := &Tree{s: sto} return ChangeEntry{ Name: f.Name, Tree: tree, TreeEntry: TreeEntry{ Name: filepath.Base(f.Name), Mode: f.Mode, Hash: f.Hash, }, } } func makeAdd(c *C, f *File) *Change { return makeChange(c, nil, f) } func makeDelete(c *C, f *File) *Change { return makeChange(c, f, nil) } func makeChange(c *C, from *File, to *File) *Change { if from == nil { return &Change{To: makeChangeEntry(to)} } if to == nil { return &Change{From: makeChangeEntry(from)} } if from == nil && to == nil { c.Error("cannot make change without from or to") } return &Change{From: makeChangeEntry(from), To: makeChangeEntry(to)} }