// Package revlist allows to create the revision history of a file, this // is, the list of commits in the past that affect the file. // // The general idea is to traverse the git commit graph backward, // flattening the graph into a linear history, and skipping commits that // are irrelevant for the particular file. // // There is no single answer for this operation. The git command // "git-revlist" returns different histories depending on its arguments // and some internal heuristics. // // The current implementation tries to get something similar to what you // whould get using git-revlist. See the failing tests for some // insight about how the current implementation and git-revlist differs. // // Another way to get the revision history for a file is: // git log --follow -p -- file package revlist import ( "bytes" "io" "sort" "gopkg.in/src-d/go-git.v2" "gopkg.in/src-d/go-git.v2/core" "gopkg.in/src-d/go-git.v2/diff" "github.com/sergi/go-diff/diffmatchpatch" ) // A Revs is a list of revisions for a file (basically a list of commits). // It implements sort.Interface using the commit time. type Revs []*git.Commit func (l Revs) Len() int { return len(l) } // sorts from older to newer commit. func (l Revs) Less(i, j int) bool { return l[i].Committer.When.Before(l[j].Committer.When) } func (l Revs) Swap(i, j int) { l[i], l[j] = l[j], l[i] } // for debugging func (l Revs) GoString() string { var buf bytes.Buffer for _, c := range l { buf.WriteString(c.Hash.String()[:8]) buf.WriteString("\n") } return buf.String() } // NewRevs returns a Revs pointer for the // file at "path", from commit "commit". // The commits are sorted in commit order. // It stops searching a branch for a file upon reaching the commit // were the file was created. // Moves and copies are not currently supported. // Cherry-picks are not detected unless there are no commits between // them and therefore can appear repeated in the list. // (see git path-id for hints on how to fix this). func NewRevs(repo *git.Repository, commit *git.Commit, path string) (Revs, error) { result := make(Revs, 0) seen := make(map[core.Hash]struct{}, 0) err := walkGraph(&result, &seen, repo, commit, path) if err != nil { return nil, err } sort.Sort(result) result, err = removeComp(path, result, equivalent) // for merges of identical cherry-picks if err != nil { return nil, err } return result, nil } // Recursive traversal of the commit graph, generating a linear history // of the path. func walkGraph(result *Revs, seen *map[core.Hash]struct{}, repo *git.Repository, current *git.Commit, path string) error { // check and update seen if _, ok := (*seen)[current.Hash]; ok { return nil } (*seen)[current.Hash] = struct{}{} // if the path is not in the current commit, stop searching. if _, err := current.File(path); err != nil { return nil } // optimization: don't traverse branches that does not // contain the path. parents := parentsContainingPath(path, current) switch len(parents) { // if the path is not found in any of its parents, the path was // created by this commit; we must add it to the revisions list and // stop searching. This includes the case when current is the // initial commit. case 0: *result = append(*result, current) return nil case 1: // only one parent contains the path // if the file contents has change, add the current commit different, err := differentContents(path, current, parents) if err != nil { return err } if len(different) == 1 { *result = append(*result, current) } // in any case, walk the parent return walkGraph(result, seen, repo, parents[0], path) default: // more than one parent contains the path // TODO: detect merges that had a conflict, because they must be // included in the result here. for _, p := range parents { err := walkGraph(result, seen, repo, p, path) if err != nil { return err } } } return nil } // TODO: benchmark this making git.Commit.parent public instead of using // an iterator func parentsContainingPath(path string, c *git.Commit) []*git.Commit { var result []*git.Commit iter := c.Parents() for { parent, err := iter.Next() if err != nil { if err == io.EOF { return result } panic("unreachable") } if _, err := parent.File(path); err == nil { result = append(result, parent) } } } // Returns an slice of the commits in "cs" that has the file "path", but with different // contents than what can be found in "c". func differentContents(path string, c *git.Commit, cs []*git.Commit) ([]*git.Commit, error) { result := make([]*git.Commit, 0, len(cs)) h, found := blobHash(path, c) if !found { return nil, git.ErrFileNotFound } for _, cx := range cs { if hx, found := blobHash(path, cx); found && h != hx { result = append(result, cx) } } return result, nil } // blobHash returns the hash of a path in a commit func blobHash(path string, commit *git.Commit) (hash core.Hash, found bool) { file, err := commit.File(path) if err != nil { var empty core.Hash return empty, found } return file.Hash, true } type contentsComparatorFn func(path string, a, b *git.Commit) (bool, error) // Returns a new slice of commits, with duplicates removed. Expects a // sorted commit list. Duplication is defined according to "comp". It // will always keep the first commit of a series of duplicated commits. func removeComp(path string, cs []*git.Commit, comp contentsComparatorFn) ([]*git.Commit, error) { result := make([]*git.Commit, 0, len(cs)) if len(cs) == 0 { return result, nil } result = append(result, cs[0]) for i := 1; i < len(cs); i++ { equals, err := comp(path, cs[i], cs[i-1]) if err != nil { return nil, err } if !equals { result = append(result, cs[i]) } } return result, nil } // Equivalent commits are commits whose patch is the same. func equivalent(path string, a, b *git.Commit) (bool, error) { numParentsA := a.NumParents() numParentsB := b.NumParents() // the first commit is not equivalent to anyone // and "I think" merges can not be equivalent to anything if numParentsA != 1 || numParentsB != 1 { return false, nil } diffsA, err := patch(a, path) if err != nil { return false, err } diffsB, err := patch(b, path) if err != nil { return false, err } return sameDiffs(diffsA, diffsB), nil } func patch(c *git.Commit, path string) ([]diffmatchpatch.Diff, error) { // get contents of the file in the commit file, err := c.File(path) if err != nil { return nil, err } content := file.Contents() // get contents of the file in the first parent of the commit var contentParent string iter := c.Parents() parent, err := iter.Next() if err != nil { return nil, err } file, err = parent.File(path) if err != nil { contentParent = "" } else { contentParent = file.Contents() } // compare the contents of parent and child return diff.Do(content, contentParent), nil } func sameDiffs(a, b []diffmatchpatch.Diff) bool { if len(a) != len(b) { return false } for i := range a { if !sameDiff(a[i], b[i]) { return false } } return true } func sameDiff(a, b diffmatchpatch.Diff) bool { if a.Type != b.Type { return false } switch a.Type { case 0: return git.CountLines(a.Text) == git.CountLines(b.Text) case 1, -1: return a.Text == b.Text default: panic("unreachable") } }