aboutsummaryrefslogtreecommitdiffstats
path: root/revlist/revlist.go
diff options
context:
space:
mode:
authorAlberto Cortés <alberto@sourced.tech>2015-11-16 03:20:01 +0100
committerAlberto Cortés <alberto@sourced.tech>2015-11-25 11:09:51 +0100
commitd643cea1e8a6d618b2eddfdbed086c7bdf208658 (patch)
treeb862c72ccb674910d24eac2ab424a7fb5ea3f0fb /revlist/revlist.go
parentcaab43e7f4ee10a15b2af826485b688473b34356 (diff)
downloadgo-git-d643cea1e8a6d618b2eddfdbed086c7bdf208658.tar.gz
Blame support for files
This also includes a diff package and revlist package (needed by blame) Some extra packfiles (<1MB) are also included, to be used as fixtures in the tests.
Diffstat (limited to 'revlist/revlist.go')
-rw-r--r--revlist/revlist.go250
1 files changed, 250 insertions, 0 deletions
diff --git a/revlist/revlist.go b/revlist/revlist.go
new file mode 100644
index 0000000..f34ddb5
--- /dev/null
+++ b/revlist/revlist.go
@@ -0,0 +1,250 @@
+// Package revlist allows to create the revision history of a file, this
+// is, the list of commits in the past that affect the file.
+//
+// The general idea is to traverse the git commit graph backward,
+// flattening the graph into a linear history, and skipping commits that
+// are irrelevant for the particular file.
+//
+// There is no single answer for this operation. The git command
+// "git-revlist" returns different histories depending on its arguments
+// and some internal heuristics.
+//
+// The current implementation tries to get something similar to what you
+// whould get using git-revlist. See the failing tests for some
+// insight about how the current implementation and git-revlist differs.
+package revlist
+
+import (
+ "bytes"
+ "errors"
+ "io"
+ "sort"
+
+ "github.com/sergi/go-diff/diffmatchpatch"
+
+ "gopkg.in/src-d/go-git.v2"
+ "gopkg.in/src-d/go-git.v2/core"
+ "gopkg.in/src-d/go-git.v2/diff"
+)
+
+// New errors defined by the package.
+var ErrFileNotFound = errors.New("file not found")
+
+// A Revs is a list of revisions for a file (basically a list of commits).
+// It implements sort.Interface.
+type Revs []*git.Commit
+
+func (l Revs) Len() int {
+ return len(l)
+}
+
+// sorts from older to newer commit.
+func (l Revs) Less(i, j int) bool {
+ return l[i].Committer.When.Before(l[j].Committer.When)
+}
+
+func (l Revs) Swap(i, j int) {
+ l[i], l[j] = l[j], l[i]
+}
+
+// for debugging
+func (l Revs) String() string {
+ var buf bytes.Buffer
+ for _, c := range l {
+ buf.WriteString(c.Hash.String()[:8])
+ buf.WriteString("\n")
+ }
+ return buf.String()
+}
+
+// New returns a Revs pointer for the
+// file at "path", from commit "commit" backwards in time.
+// The commits are stored in arbitrary order.
+// It stops searching a branch for a file upon reaching the commit
+// were it was created.
+// Moves and copies are not currently supported.
+// Cherry-picks are not detected and therefore are added to the list
+// (see git path-id for hints on how to fix this).
+// This function implements is equivalent to running go-rev-Revs.
+func New(repo *git.Repository, commit *git.Commit, path string) (Revs, error) {
+ result := make(Revs, 0)
+ seen := make(map[core.Hash]struct{}, 0)
+ err := walkGraph(&result, &seen, repo, commit, path)
+ if err != nil {
+ return nil, err
+ }
+ sort.Sort(result)
+ result = removeComp(path, result, equivalent) // for merges of identical cherry-picks
+ if err != nil {
+ return nil, err
+ }
+ return result, nil
+}
+
+// Recursive traversal of the commit graph, generating a linear history
+// of the path.
+func walkGraph(result *Revs, seen *map[core.Hash]struct{}, repo *git.Repository, current *git.Commit, path string) error {
+ // check and update seen
+ if _, ok := (*seen)[current.Hash]; ok {
+ return nil
+ }
+ (*seen)[current.Hash] = struct{}{}
+
+ // if the path is not in the current commit, stop searching.
+ if _, found := git.FindFile(path, current); !found {
+ return nil
+ }
+
+ parents := parentsContainingPath(path, current)
+
+ switch len(parents) {
+ // if the path is not found in any of its parents, the path was
+ // created by this commit; we must add it to the revisions list and
+ // stop searching. This includes the case when current is the
+ // initial commit.
+ case 0:
+ //fmt.Println(current.Hash.String(), ": case 0")
+ *result = append(*result, current)
+ return nil
+ case 1: // only one parent contains the path
+ // if the file contents has change, add the current commit
+ different, err := differentContents(path, current, parents)
+ if err != nil {
+ return err
+ }
+ if len(different) == 1 {
+ //fmt.Println(current.Hash.String(), ": case 1")
+ *result = append(*result, current)
+ }
+ // in any case, walk the parent
+ return walkGraph(result, seen, repo, parents[0], path)
+ default: // more than one parent contains the path
+ // TODO: detect merges that had a conflict, because they must be
+ // included in the result here.
+ for _, p := range parents {
+ err := walkGraph(result, seen, repo, p, path)
+ if err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+// TODO: benchmark this making git.Commit.parent public instead of using
+// an iterator
+func parentsContainingPath(path string, c *git.Commit) []*git.Commit {
+ var result []*git.Commit
+ iter := c.Parents()
+ for {
+ parent, err := iter.Next()
+ if err != nil {
+ if err == io.EOF {
+ return result
+ }
+ panic("unreachable")
+ }
+ if _, found := git.FindFile(path, parent); found {
+ result = append(result, parent)
+ }
+ }
+}
+
+// Returns an slice of the commits in "cs" that has the file "path", but with different
+// contents than what can be found in "c".
+func differentContents(path string, c *git.Commit, cs []*git.Commit) ([]*git.Commit, error) {
+ result := make([]*git.Commit, 0, len(cs))
+ h, found := blobHash(path, c)
+ if !found {
+ return nil, ErrFileNotFound
+ }
+ for _, cx := range cs {
+ if hx, found := blobHash(path, cx); found && h != hx {
+ result = append(result, cx)
+ }
+ }
+ return result, nil
+}
+
+// blobHash returns the hash of a path in a commit
+func blobHash(path string, commit *git.Commit) (hash core.Hash, found bool) {
+ file, found := git.FindFile(path, commit)
+ if !found {
+ var empty core.Hash
+ return empty, found
+ }
+ return file.Hash, true
+}
+
+// Returns a new slice of commits, with duplicates removed. Expects a
+// sorted commit list. Duplication is defined according to "comp". It
+// will always keep the first commit of a series of duplicated commits.
+func removeComp(path string, cs []*git.Commit, comp func(string, *git.Commit, *git.Commit) bool) []*git.Commit {
+ result := make([]*git.Commit, 0, len(cs))
+ if len(cs) == 0 {
+ return result
+ }
+ result = append(result, cs[0])
+ for i := 1; i < len(cs); i++ {
+ if !comp(path, cs[i], cs[i-1]) {
+ result = append(result, cs[i])
+ }
+ }
+ return result
+}
+
+// Equivalent commits are commits whos patch is the same.
+func equivalent(path string, a, b *git.Commit) bool {
+ numParentsA := a.NumParents()
+ numParentsB := b.NumParents()
+
+ // the first commit is not equivalent to anyone
+ // and "I think" merges can not be equivalent to anything
+ if numParentsA != 1 || numParentsB != 1 {
+ return false
+ }
+
+ iterA := a.Parents()
+ parentA, _ := iterA.Next()
+ iterB := b.Parents()
+ parentB, _ := iterB.Next()
+
+ dataA, _ := git.Data(path, a)
+ dataParentA, _ := git.Data(path, parentA)
+ dataB, _ := git.Data(path, b)
+ dataParentB, _ := git.Data(path, parentB)
+
+ diffsA := diff.Do(dataParentA, dataA)
+ diffsB := diff.Do(dataParentB, dataB)
+
+ if sameDiffs(diffsA, diffsB) {
+ return true
+ }
+ return false
+}
+
+func sameDiffs(a, b []diffmatchpatch.Diff) bool {
+ if len(a) != len(b) {
+ return false
+ }
+ for i := range a {
+ if !sameDiff(a[i], b[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+func sameDiff(a, b diffmatchpatch.Diff) bool {
+ if a.Type != b.Type {
+ return false
+ }
+ switch a.Type {
+ case 0:
+ return git.CountLines(a.Text) == git.CountLines(b.Text)
+ case 1, -1:
+ return a.Text == b.Text
+ default:
+ panic("unreachable")
+ }
+}