diff options
-rw-r--r-- | git_deps/blame.py | 61 | ||||
-rwxr-xr-x | git_deps/cli.py | 2 | ||||
-rw-r--r-- | git_deps/detector.py | 46 | ||||
-rwxr-xr-x | tests/self_test.sh | 9 | ||||
-rw-r--r-- | tests/test_blame.py | 24 |
5 files changed, 122 insertions, 20 deletions
diff --git a/git_deps/blame.py b/git_deps/blame.py new file mode 100644 index 0000000..41d79fe --- /dev/null +++ b/git_deps/blame.py @@ -0,0 +1,61 @@ +import subprocess +import re +from dataclasses import dataclass + +# The following classes are introduced to imitate their counterparts in pygit2, +# so that the output of 'blame_via_subprocess' can be swapped with pygit2's own +# blame output. + +@dataclass +class GitRef: + """ + A reference to a commit + """ + hex: str + +@dataclass +class BlameHunk: + """ + A chunk of a blame output which has the same commit information + for a consecutive set of lines + """ + orig_commit_id: GitRef + orig_start_line_number: int + final_start_line_number: int + lines_in_hunk: int = 1 + + +def blame_via_subprocess(path, commit, start_line, num_lines): + """ + Generate a list of blame hunks by calling 'git blame' as a separate process. + This is a workaround for the slowness of pygit2's own blame algorithm. + See https://github.com/aspiers/git-deps/issues/1 + """ + cmd = [ + 'git', 'blame', + '--porcelain', + '-L', "%d,+%d" % (start_line, num_lines), + commit, '--', path + ] + output = subprocess.check_output(cmd, universal_newlines=True) + + current_hunk = None + for line in output.split('\n'): + m = re.match(r'^([0-9a-f]{40}) (\d+) (\d+) (\d+)$', line) + + if m: # starting a new hunk + if current_hunk: + yield current_hunk + dependency_sha1, orig_line_num, line_num, length = m.group(1, 2, 3, 4) + orig_line_num = int(orig_line_num) + line_num = int(line_num) + length = int(length) + current_hunk = BlameHunk( + orig_commit_id=GitRef(dependency_sha1), + orig_start_line_number = orig_line_num, + final_start_line_number = line_num, + lines_in_hunk = length + ) + + if current_hunk: + yield current_hunk diff --git a/git_deps/cli.py b/git_deps/cli.py index 34a45e1..701b004 100755 --- a/git_deps/cli.py +++ b/git_deps/cli.py @@ -75,6 +75,8 @@ def parse_args(): '[%(default)s]') parser.add_argument('-d', '--debug', dest='debug', action='store_true', help='Show debugging') + parser.add_argument('--pygit2-blame', dest='pygit2_blame', action='store_true', + help="Use pygit2's blame algorithm (slower than git's)") options, args = parser.parse_known_args() diff --git a/git_deps/detector.py b/git_deps/detector.py index 1836a56..82996a6 100644 --- a/git_deps/detector.py +++ b/git_deps/detector.py @@ -7,6 +7,7 @@ from git_deps.utils import abort, standard_logger from git_deps.gitutils import GitUtils from git_deps.listener.base import DependencyListener from git_deps.errors import InvalidCommitish +from git_deps.blame import blame_via_subprocess class DependencyDetector(object): @@ -172,24 +173,26 @@ class DependencyDetector(object): line_to_culprit = {} - for line in blame.split('\n'): - self.process_hunk_line(dependent, dependent_sha1, parent, - path, line, line_to_culprit) + for blame_hunk in blame: + self.process_blame_hunk(dependent, dependent_sha1, parent, + path, blame_hunk, line_to_culprit) self.debug_hunk(line_range_before, line_range_after, hunk, line_to_culprit) - def process_hunk_line(self, dependent, dependent_sha1, parent, - path, line, line_to_culprit): - self.logger.debug(" ! " + line.rstrip()) - m = re.match(r'^([0-9a-f]{40}) (\d+) (\d+)( \d+)?$', line) - if not m: - return + def process_blame_hunk(self, dependent, dependent_sha1, parent, + path, blame_hunk, line_to_culprit): + + orig_line_num = blame_hunk.orig_start_line_number + line_num = blame_hunk.final_start_line_number + dependency_sha1 = blame_hunk.orig_commit_id.hex + line_representation = f"{dependency_sha1} {orig_line_num} {line_num}" + + self.logger.debug(f" ! {line_representation}") - dependency_sha1, orig_line_num, line_num = m.group(1, 2, 3) - line_num = int(line_num) dependency = self.get_commit(dependency_sha1) - line_to_culprit[line_num] = dependency.hex + for i in range(blame_hunk.lines_in_hunk): + line_to_culprit[line_num + i] = dependency.hex if self.is_excluded(dependency): self.logger.debug( @@ -206,7 +209,7 @@ class DependencyDetector(object): self.record_dependency_source(parent, dependent, dependent_sha1, dependency, dependency_sha1, - path, line_num, line) + path, line_num, line_representation) def debug_hunk(self, line_range_before, line_range_after, hunk, line_to_culprit): @@ -234,13 +237,16 @@ class DependencyDetector(object): self.notify_listeners("new_dependent", dependent) def run_blame(self, hunk, parent, path): - cmd = [ - 'git', 'blame', - '--porcelain', - '-L', "%d,+%d" % (hunk.old_start, hunk.old_lines), - parent.hex, '--', path - ] - return subprocess.check_output(cmd, universal_newlines=True) + if self.options.pygit2_blame: + return self.repo.blame(path, + newest_commit=parent.hex, + min_line=hunk.old_start, + max_line=hunk.old_start + hunk.old_lines - 1) + else: + return blame_via_subprocess(path, + parent.hex, + hunk.old_start, + hunk.old_lines) def is_excluded(self, commit): if self.options.exclude_commits is not None: diff --git a/tests/self_test.sh b/tests/self_test.sh index a0b2272..52e1a28 100755 --- a/tests/self_test.sh +++ b/tests/self_test.sh @@ -13,12 +13,21 @@ echo "Running test suite" echo "* Dependencies of 4f27a1e, a regular commit" git-deps 4f27a1e^! | sort | diff tests/expected_outputs/deps_4f27a1e - +echo "* Same, but via pygit2's blame algorithm" +git-deps --pygit2-blame 4f27a1e^! | sort | diff tests/expected_outputs/deps_4f27a1e - + echo "* Dependencies of 1ba7ad5, a merge commit" git-deps 1ba7ad5^! | sort | diff tests/expected_outputs/deps_1ba7ad5 - +echo "* Same, but via pygit2's blame algorithm" +git-deps --pygit2-blame 1ba7ad5^! | sort | diff tests/expected_outputs/deps_1ba7ad5 - + echo "* Dependencies of the root commit" git-deps b196757^! | sort | diff tests/expected_outputs/deps_b196757 - +echo "* Same, but via pygit2's blame algorithm" +git-deps --pygit2-blame b196757^! | sort | diff tests/expected_outputs/deps_b196757 - + echo "* Recursive dependencies of a4f27a1e, a regular commit" git-deps -r 4f27a1e^! | sort | diff tests/expected_outputs/recursive_deps_4f27a1e - diff --git a/tests/test_blame.py b/tests/test_blame.py new file mode 100644 index 0000000..410e7de --- /dev/null +++ b/tests/test_blame.py @@ -0,0 +1,24 @@ + +from git_deps.blame import blame_via_subprocess, BlameHunk, GitRef + +def test_blame_via_subprocess(): + hunks = list(blame_via_subprocess( + 'INSTALL.md', + '04f5c095d4eccf5808db6dbf90c31a535f7f371c', + 12, 4)) + + expected_hunks = [ + BlameHunk( + GitRef('6e23a48f888a355ad7e101c797ce1b66c4b7b86a'), + orig_start_line_number=12, + final_start_line_number=12, + lines_in_hunk=2), + BlameHunk( + GitRef('2c9d23b0291157eb1096384ff76e0122747b9bdf'), + orig_start_line_number=10, + final_start_line_number=14, + lines_in_hunk=2) + ] + + assert hunks == expected_hunks + |