aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdam Spiers <git@adamspiers.org>2023-10-10 01:42:53 +0100
committerGitHub <noreply@github.com>2023-10-10 01:42:53 +0100
commit89d51e87a3fe4bdcb1efabbe7a4923958998a61c (patch)
tree64f4a27d45553fc977a621b4f356134f585d17ba
parentcaca4f6936ed1226ef20792542af5189cc110078 (diff)
parentb0d5a8dcbc033bab0a97383b2082e09ccf698dd7 (diff)
downloadgit-deps-master.tar.gz
Merge pull request #127 from wetneb/1-blame-via-pygitmaster
Blame via pygit2 instead of subprocess
-rw-r--r--git_deps/blame.py61
-rwxr-xr-xgit_deps/cli.py2
-rw-r--r--git_deps/detector.py46
-rwxr-xr-xtests/self_test.sh9
-rw-r--r--tests/test_blame.py24
5 files changed, 122 insertions, 20 deletions
diff --git a/git_deps/blame.py b/git_deps/blame.py
new file mode 100644
index 0000000..41d79fe
--- /dev/null
+++ b/git_deps/blame.py
@@ -0,0 +1,61 @@
+import subprocess
+import re
+from dataclasses import dataclass
+
+# The following classes are introduced to imitate their counterparts in pygit2,
+# so that the output of 'blame_via_subprocess' can be swapped with pygit2's own
+# blame output.
+
+@dataclass
+class GitRef:
+ """
+ A reference to a commit
+ """
+ hex: str
+
+@dataclass
+class BlameHunk:
+ """
+ A chunk of a blame output which has the same commit information
+ for a consecutive set of lines
+ """
+ orig_commit_id: GitRef
+ orig_start_line_number: int
+ final_start_line_number: int
+ lines_in_hunk: int = 1
+
+
+def blame_via_subprocess(path, commit, start_line, num_lines):
+ """
+ Generate a list of blame hunks by calling 'git blame' as a separate process.
+ This is a workaround for the slowness of pygit2's own blame algorithm.
+ See https://github.com/aspiers/git-deps/issues/1
+ """
+ cmd = [
+ 'git', 'blame',
+ '--porcelain',
+ '-L', "%d,+%d" % (start_line, num_lines),
+ commit, '--', path
+ ]
+ output = subprocess.check_output(cmd, universal_newlines=True)
+
+ current_hunk = None
+ for line in output.split('\n'):
+ m = re.match(r'^([0-9a-f]{40}) (\d+) (\d+) (\d+)$', line)
+
+ if m: # starting a new hunk
+ if current_hunk:
+ yield current_hunk
+ dependency_sha1, orig_line_num, line_num, length = m.group(1, 2, 3, 4)
+ orig_line_num = int(orig_line_num)
+ line_num = int(line_num)
+ length = int(length)
+ current_hunk = BlameHunk(
+ orig_commit_id=GitRef(dependency_sha1),
+ orig_start_line_number = orig_line_num,
+ final_start_line_number = line_num,
+ lines_in_hunk = length
+ )
+
+ if current_hunk:
+ yield current_hunk
diff --git a/git_deps/cli.py b/git_deps/cli.py
index 34a45e1..701b004 100755
--- a/git_deps/cli.py
+++ b/git_deps/cli.py
@@ -75,6 +75,8 @@ def parse_args():
'[%(default)s]')
parser.add_argument('-d', '--debug', dest='debug', action='store_true',
help='Show debugging')
+ parser.add_argument('--pygit2-blame', dest='pygit2_blame', action='store_true',
+ help="Use pygit2's blame algorithm (slower than git's)")
options, args = parser.parse_known_args()
diff --git a/git_deps/detector.py b/git_deps/detector.py
index 1836a56..82996a6 100644
--- a/git_deps/detector.py
+++ b/git_deps/detector.py
@@ -7,6 +7,7 @@ from git_deps.utils import abort, standard_logger
from git_deps.gitutils import GitUtils
from git_deps.listener.base import DependencyListener
from git_deps.errors import InvalidCommitish
+from git_deps.blame import blame_via_subprocess
class DependencyDetector(object):
@@ -172,24 +173,26 @@ class DependencyDetector(object):
line_to_culprit = {}
- for line in blame.split('\n'):
- self.process_hunk_line(dependent, dependent_sha1, parent,
- path, line, line_to_culprit)
+ for blame_hunk in blame:
+ self.process_blame_hunk(dependent, dependent_sha1, parent,
+ path, blame_hunk, line_to_culprit)
self.debug_hunk(line_range_before, line_range_after, hunk,
line_to_culprit)
- def process_hunk_line(self, dependent, dependent_sha1, parent,
- path, line, line_to_culprit):
- self.logger.debug(" ! " + line.rstrip())
- m = re.match(r'^([0-9a-f]{40}) (\d+) (\d+)( \d+)?$', line)
- if not m:
- return
+ def process_blame_hunk(self, dependent, dependent_sha1, parent,
+ path, blame_hunk, line_to_culprit):
+
+ orig_line_num = blame_hunk.orig_start_line_number
+ line_num = blame_hunk.final_start_line_number
+ dependency_sha1 = blame_hunk.orig_commit_id.hex
+ line_representation = f"{dependency_sha1} {orig_line_num} {line_num}"
+
+ self.logger.debug(f" ! {line_representation}")
- dependency_sha1, orig_line_num, line_num = m.group(1, 2, 3)
- line_num = int(line_num)
dependency = self.get_commit(dependency_sha1)
- line_to_culprit[line_num] = dependency.hex
+ for i in range(blame_hunk.lines_in_hunk):
+ line_to_culprit[line_num + i] = dependency.hex
if self.is_excluded(dependency):
self.logger.debug(
@@ -206,7 +209,7 @@ class DependencyDetector(object):
self.record_dependency_source(parent,
dependent, dependent_sha1,
dependency, dependency_sha1,
- path, line_num, line)
+ path, line_num, line_representation)
def debug_hunk(self, line_range_before, line_range_after, hunk,
line_to_culprit):
@@ -234,13 +237,16 @@ class DependencyDetector(object):
self.notify_listeners("new_dependent", dependent)
def run_blame(self, hunk, parent, path):
- cmd = [
- 'git', 'blame',
- '--porcelain',
- '-L', "%d,+%d" % (hunk.old_start, hunk.old_lines),
- parent.hex, '--', path
- ]
- return subprocess.check_output(cmd, universal_newlines=True)
+ if self.options.pygit2_blame:
+ return self.repo.blame(path,
+ newest_commit=parent.hex,
+ min_line=hunk.old_start,
+ max_line=hunk.old_start + hunk.old_lines - 1)
+ else:
+ return blame_via_subprocess(path,
+ parent.hex,
+ hunk.old_start,
+ hunk.old_lines)
def is_excluded(self, commit):
if self.options.exclude_commits is not None:
diff --git a/tests/self_test.sh b/tests/self_test.sh
index a0b2272..52e1a28 100755
--- a/tests/self_test.sh
+++ b/tests/self_test.sh
@@ -13,12 +13,21 @@ echo "Running test suite"
echo "* Dependencies of 4f27a1e, a regular commit"
git-deps 4f27a1e^! | sort | diff tests/expected_outputs/deps_4f27a1e -
+echo "* Same, but via pygit2's blame algorithm"
+git-deps --pygit2-blame 4f27a1e^! | sort | diff tests/expected_outputs/deps_4f27a1e -
+
echo "* Dependencies of 1ba7ad5, a merge commit"
git-deps 1ba7ad5^! | sort | diff tests/expected_outputs/deps_1ba7ad5 -
+echo "* Same, but via pygit2's blame algorithm"
+git-deps --pygit2-blame 1ba7ad5^! | sort | diff tests/expected_outputs/deps_1ba7ad5 -
+
echo "* Dependencies of the root commit"
git-deps b196757^! | sort | diff tests/expected_outputs/deps_b196757 -
+echo "* Same, but via pygit2's blame algorithm"
+git-deps --pygit2-blame b196757^! | sort | diff tests/expected_outputs/deps_b196757 -
+
echo "* Recursive dependencies of a4f27a1e, a regular commit"
git-deps -r 4f27a1e^! | sort | diff tests/expected_outputs/recursive_deps_4f27a1e -
diff --git a/tests/test_blame.py b/tests/test_blame.py
new file mode 100644
index 0000000..410e7de
--- /dev/null
+++ b/tests/test_blame.py
@@ -0,0 +1,24 @@
+
+from git_deps.blame import blame_via_subprocess, BlameHunk, GitRef
+
+def test_blame_via_subprocess():
+ hunks = list(blame_via_subprocess(
+ 'INSTALL.md',
+ '04f5c095d4eccf5808db6dbf90c31a535f7f371c',
+ 12, 4))
+
+ expected_hunks = [
+ BlameHunk(
+ GitRef('6e23a48f888a355ad7e101c797ce1b66c4b7b86a'),
+ orig_start_line_number=12,
+ final_start_line_number=12,
+ lines_in_hunk=2),
+ BlameHunk(
+ GitRef('2c9d23b0291157eb1096384ff76e0122747b9bdf'),
+ orig_start_line_number=10,
+ final_start_line_number=14,
+ lines_in_hunk=2)
+ ]
+
+ assert hunks == expected_hunks
+