diff options
-rw-r--r-- | git_deps/detector.py | 235 | ||||
-rw-r--r-- | git_deps/listener/cli.py | 3 |
2 files changed, 137 insertions, 101 deletions
diff --git a/git_deps/detector.py b/git_deps/detector.py index df5b001..8e977cc 100644 --- a/git_deps/detector.py +++ b/git_deps/detector.py @@ -10,11 +10,11 @@ from git_deps.errors import InvalidCommitish class DependencyDetector(object): - """Class for automatically detecting dependencies between git commits. - A dependency is inferred by diffing the commit with each of its - parents, and for each resulting hunk, performing a blame to see - which commit was responsible for introducing the lines to which - the hunk was applied. + """Class for automatically detecting dependencies between git + commits. A dependency is inferred by diffing the commit with each + of its parents, and for each resulting hunk, performing a blame to + see which commit was responsible for introducing the lines to + which the hunk was applied. Dependencies can be traversed recursively, building a dependency tree represented (conceptually) by a list of edges. @@ -101,18 +101,18 @@ class DependencyDetector(object): while self.todo: sha1s = [commit.hex[:8] for commit in self.todo] if first_time: - self.logger.debug("Initial TODO list: %s" % " ".join(sha1s)) + self.logger.info("Initial TODO list: %s" % " ".join(sha1s)) first_time = False else: - self.logger.debug(" TODO list now: %s" % " ".join(sha1s)) + self.logger.info(" TODO list now: %s" % " ".join(sha1s)) dependent = self.todo.pop(0) dependent_sha1 = dependent.hex del self.todo_d[dependent_sha1] - self.logger.debug(" Processing %s from TODO list" % + self.logger.info(" Processing %s from TODO list" % dependent_sha1[:8]) if dependent_sha1 in self.done_d: - self.logger.debug(" %s already done previously" % + self.logger.info(" %s already done previously" % dependent_sha1) continue @@ -122,40 +122,40 @@ class DependencyDetector(object): self.find_dependencies_with_parent(dependent, parent) self.done.append(dependent_sha1) self.done_d[dependent_sha1] = True - self.logger.debug(" Found all dependencies for %s" % + self.logger.info(" Found all dependencies for %s" % dependent_sha1[:8]) # A commit won't have any dependencies if it only added new files dependencies = self.dependencies.get(dependent_sha1, {}) self.notify_listeners('dependent_done', dependent, dependencies) - self.logger.debug("Finished processing TODO list") + self.logger.info("Finished processing TODO list") self.notify_listeners('all_done') def find_dependencies_with_parent(self, dependent, parent): - """Find all dependencies of the given revision caused by the given - parent commit. This will be called multiple times for merge - commits which have multiple parents. + """Find all dependencies of the given revision caused by the + given parent commit. This will be called multiple times for + merge commits which have multiple parents. """ - self.logger.debug(" Finding dependencies of %s via parent %s" % + self.logger.info(" Finding dependencies of %s via parent %s" % (dependent.hex[:8], parent.hex[:8])) diff = self.repo.diff(parent, dependent, context_lines=self.options.context_lines) for patch in diff: path = patch.delta.old_file.path - self.logger.debug(" Examining hunks in %s" % path) + self.logger.info(" Examining hunks in %s" % path) for hunk in patch.hunks: - self.blame_hunk(dependent, parent, path, hunk) + self.blame_diff_hunk(dependent, parent, path, hunk) - def blame_hunk(self, dependent, parent, path, hunk): - """Run git blame on the parts of the hunk which exist in the older - commit in the diff. The commits generated by git blame are - the commits which the newer commit in the diff depends on, + def blame_diff_hunk(self, dependent, parent, path, hunk): + """Run git blame on the parts of the hunk which exist in the + older commit in the diff. The commits generated by git blame + are the commits which the newer commit in the diff depends on, because without the lines from those commits, the hunk would not apply correctly. """ line_range_before = "-%d,%d" % (hunk.old_start, hunk.old_lines) line_range_after = "+%d,%d" % (hunk.new_start, hunk.new_lines) - self.logger.debug(" Blaming hunk %s @ %s" % + self.logger.info(" Blaming hunk %s @ %s (listed below)" % (line_range_before, parent.hex[:8])) if not self.tree_lookup(path, parent): @@ -163,93 +163,52 @@ class DependencyDetector(object): # which was not previously in the parent. return - cmd = [ - 'git', 'blame', - '--porcelain', - '-L', "%d,+%d" % (hunk.old_start, hunk.old_lines), - parent.hex, '--', path - ] - blame = subprocess.check_output(cmd, universal_newlines=True) + blame = self.run_blame(hunk, parent, path) dependent_sha1 = dependent.hex - if dependent_sha1 not in self.dependencies: - self.logger.debug(" New dependent: %s" % - GitUtils.commit_summary(dependent)) - self.dependencies[dependent_sha1] = {} - self.notify_listeners("new_dependent", dependent) + self.register_new_dependent(dependent, dependent_sha1) line_to_culprit = {} for line in blame.split('\n'): - self.logger.debug(" !" + line.rstrip()) - m = re.match('^([0-9a-f]{40}) (\d+) (\d+)( \d+)?$', line) - if not m: - continue - dependency_sha1, orig_line_num, line_num = m.group(1, 2, 3) - line_num = int(line_num) - dependency = self.get_commit(dependency_sha1) - line_to_culprit[line_num] = dependency.hex - - if self.is_excluded(dependency): - self.logger.debug( - " Excluding dependency %s from line %s (%s)" % - (dependency_sha1[:8], line_num, - GitUtils.oneline(dependency))) - continue + self.process_hunk_line(dependent, dependent_sha1, parent, + path, line, line_to_culprit) - if dependency_sha1 not in self.dependencies[dependent_sha1]: - if not self.seen_commit(dependency): - self.notify_listeners("new_commit", dependency) - self.dependencies[dependent_sha1][dependency_sha1] = {} - - self.notify_listeners("new_dependency", - dependent, dependency, path, line_num) - - self.logger.debug( - " New dependency %s -> %s via line %s (%s)" % - (dependent_sha1[:8], dependency_sha1[:8], line_num, - GitUtils.oneline(dependency))) - - if dependency_sha1 in self.todo_d: - self.logger.debug( - " Dependency on %s via line %s already in TODO" - % (dependency_sha1[:8], line_num,)) - continue - - if dependency_sha1 in self.done_d: - self.logger.debug( - " Dependency on %s via line %s already done" % - (dependency_sha1[:8], line_num,)) - continue - - if dependency_sha1 not in self.dependencies: - if self.options.recurse: - self.todo.append(dependency) - self.todo_d[dependency.hex] = True - self.logger.debug(" + Added %s to TODO" % - dependency.hex[:8]) - - dep_sources = self.dependencies[dependent_sha1][dependency_sha1] - - if path not in dep_sources: - dep_sources[path] = {} - self.notify_listeners('new_path', - dependent, dependency, path, line_num) - - if line_num in dep_sources[path]: - abort("line %d already found when blaming %s:%s\n" - "old:\n %s\n" - "new:\n %s" % - (line_num, parent.hex[:8], path, - dep_sources[path][line_num], line)) - - dep_sources[path][line_num] = line - self.logger.debug(" New line for %s -> %s: %s" % - (dependent_sha1[:8], dependency_sha1[:8], line)) - self.notify_listeners('new_line', - dependent, dependency, path, line_num) + self.debug_hunk(line_range_before, line_range_after, hunk, + line_to_culprit) + + def process_hunk_line(self, dependent, dependent_sha1, parent, + path, line, line_to_culprit): + self.logger.debug(" ! " + line.rstrip()) + m = re.match('^([0-9a-f]{40}) (\d+) (\d+)( \d+)?$', line) + if not m: + return + + dependency_sha1, orig_line_num, line_num = m.group(1, 2, 3) + line_num = int(line_num) + dependency = self.get_commit(dependency_sha1) + line_to_culprit[line_num] = dependency.hex + + if self.is_excluded(dependency): + self.logger.debug( + " Excluding dependency %s from line %s (%s)" % + (dependency_sha1[:8], line_num, + GitUtils.oneline(dependency))) + return + + if dependency_sha1 not in self.dependencies[dependent_sha1]: + self.process_new_dependency(dependent, dependent_sha1, + dependency, dependency_sha1, + path, line_num) + + self.record_dependency_source(parent, + dependent, dependent_sha1, + dependency, dependency_sha1, + path, line_num, line) - diff_format = ' |%8.8s %5s %s%s' + def debug_hunk(self, line_range_before, line_range_after, hunk, + line_to_culprit): + diff_format = ' | %8.8s %5s %s%s' hunk_header = '@@ %s %s @@' % (line_range_before, line_range_after) self.logger.debug(diff_format % ('--------', '-----', '', hunk_header)) line_num = hunk.old_start @@ -265,6 +224,22 @@ class DependencyDetector(object): self.logger.debug(diff_format % (rev, ln, line.origin, line.content.rstrip())) + def register_new_dependent(self, dependent, dependent_sha1): + if dependent_sha1 not in self.dependencies: + self.logger.info(" New dependent: %s" % + GitUtils.commit_summary(dependent)) + self.dependencies[dependent_sha1] = {} + self.notify_listeners("new_dependent", dependent) + + def run_blame(self, hunk, parent, path): + cmd = [ + 'git', 'blame', + '--porcelain', + '-L', "%d,+%d" % (hunk.old_start, hunk.old_lines), + parent.hex, '--', path + ] + return subprocess.check_output(cmd, universal_newlines=True) + def is_excluded(self, commit): if self.options.exclude_commits is not None: for exclude in self.options.exclude_commits: @@ -272,6 +247,64 @@ class DependencyDetector(object): return True return False + def process_new_dependency(self,dependent, dependent_sha1, + dependency, dependency_sha1, + path, line_num): + if not self.seen_commit(dependency): + self.notify_listeners("new_commit", dependency) + self.dependencies[dependent_sha1][dependency_sha1] = {} + + self.notify_listeners("new_dependency", + dependent, dependency, path, line_num) + + self.logger.info( + " New dependency %s -> %s via line %s (%s)" % + (dependent_sha1[:8], dependency_sha1[:8], line_num, + GitUtils.oneline(dependency))) + + if dependency_sha1 in self.todo_d: + self.logger.info( + " Dependency on %s via line %s already in TODO" + % (dependency_sha1[:8], line_num,)) + return + + if dependency_sha1 in self.done_d: + self.logger.info( + " Dependency on %s via line %s already done" % + (dependency_sha1[:8], line_num,)) + return + + if dependency_sha1 not in self.dependencies: + if self.options.recurse: + self.todo.append(dependency) + self.todo_d[dependency.hex] = True + self.logger.info(" + Added %s to TODO" % + dependency.hex[:8]) + + def record_dependency_source(self, parent, + dependent, dependent_sha1, + dependency, dependency_sha1, + path, line_num, line): + dep_sources = self.dependencies[dependent_sha1][dependency_sha1] + + if path not in dep_sources: + dep_sources[path] = {} + self.notify_listeners('new_path', + dependent, dependency, path, line_num) + + if line_num in dep_sources[path]: + abort("line %d already found when blaming %s:%s\n" + "old:\n %s\n" + "new:\n %s" % + (line_num, parent.hex[:8], path, + dep_sources[path][line_num], line)) + + dep_sources[path][line_num] = line + self.logger.debug(" New line for %s -> %s: %s" % + (dependent_sha1[:8], dependency_sha1[:8], line)) + self.notify_listeners('new_line', + dependent, dependency, path, line_num) + def branch_contains(self, commit, branch): sha1 = commit.hex branch_commit = self.get_commit(branch) diff --git a/git_deps/listener/cli.py b/git_deps/listener/cli.py index 01bf4df..b5f8bb9 100644 --- a/git_deps/listener/cli.py +++ b/git_deps/listener/cli.py @@ -32,6 +32,9 @@ class CLIDependencyListener(DependencyListener): if self.options.multi: if self.options.log: print("%s depends on:" % dependent_sha1) + if self._revs[dependency_sha1] > 1: + print("commit %s (already shown above)\n" + % dependency_sha1) else: print("%s %s" % (dependent_sha1, dependency_sha1)) else: |