aboutsummaryrefslogtreecommitdiffstats
path: root/revlist/revlist.go
diff options
context:
space:
mode:
Diffstat (limited to 'revlist/revlist.go')
-rw-r--r--revlist/revlist.go274
1 files changed, 0 insertions, 274 deletions
diff --git a/revlist/revlist.go b/revlist/revlist.go
deleted file mode 100644
index bbc7e1f..0000000
--- a/revlist/revlist.go
+++ /dev/null
@@ -1,274 +0,0 @@
-// Package revlist allows to create the revision history of a file, this
-// is, the list of commits in the past that affect the file.
-//
-// The general idea is to traverse the git commit graph backward,
-// flattening the graph into a linear history, and skipping commits that
-// are irrelevant for the particular file.
-//
-// There is no single answer for this operation. The git command
-// "git-revlist" returns different histories depending on its arguments
-// and some internal heuristics.
-//
-// The current implementation tries to get something similar to what you
-// whould get using git-revlist. See the failing tests for some
-// insight about how the current implementation and git-revlist differs.
-//
-// Another way to get the revision history for a file is:
-// git log --follow -p -- file
-package revlist
-
-import (
- "bytes"
- "io"
- "sort"
-
- "gopkg.in/src-d/go-git.v2"
- "gopkg.in/src-d/go-git.v2/core"
- "gopkg.in/src-d/go-git.v2/diff"
-
- "github.com/sergi/go-diff/diffmatchpatch"
-)
-
-// A Revs is a list of revisions for a file (basically a list of commits).
-// It implements sort.Interface using the commit time.
-type Revs []*git.Commit
-
-func (l Revs) Len() int {
- return len(l)
-}
-
-// sorts from older to newer commit.
-func (l Revs) Less(i, j int) bool {
- return l[i].Committer.When.Before(l[j].Committer.When)
-}
-
-func (l Revs) Swap(i, j int) {
- l[i], l[j] = l[j], l[i]
-}
-
-// for debugging
-func (l Revs) GoString() string {
- var buf bytes.Buffer
- for _, c := range l {
- buf.WriteString(c.Hash.String()[:8])
- buf.WriteString("\n")
- }
- return buf.String()
-}
-
-// NewRevs returns a Revs pointer for the
-// file at "path", from commit "commit".
-// The commits are sorted in commit order.
-// It stops searching a branch for a file upon reaching the commit
-// were the file was created.
-// Moves and copies are not currently supported.
-// Cherry-picks are not detected unless there are no commits between
-// them and therefore can appear repeated in the list.
-// (see git path-id for hints on how to fix this).
-func NewRevs(repo *git.Repository, commit *git.Commit, path string) (Revs, error) {
- result := make(Revs, 0)
- seen := make(map[core.Hash]struct{}, 0)
- err := walkGraph(&result, &seen, repo, commit, path)
- if err != nil {
- return nil, err
- }
- sort.Sort(result)
- result, err = removeComp(path, result, equivalent) // for merges of identical cherry-picks
- if err != nil {
- return nil, err
- }
- return result, nil
-}
-
-// Recursive traversal of the commit graph, generating a linear history
-// of the path.
-func walkGraph(result *Revs, seen *map[core.Hash]struct{}, repo *git.Repository, current *git.Commit, path string) error {
- // check and update seen
- if _, ok := (*seen)[current.Hash]; ok {
- return nil
- }
- (*seen)[current.Hash] = struct{}{}
-
- // if the path is not in the current commit, stop searching.
- if _, err := current.File(path); err != nil {
- return nil
- }
-
- // optimization: don't traverse branches that does not
- // contain the path.
- parents := parentsContainingPath(path, current)
-
- switch len(parents) {
- // if the path is not found in any of its parents, the path was
- // created by this commit; we must add it to the revisions list and
- // stop searching. This includes the case when current is the
- // initial commit.
- case 0:
- *result = append(*result, current)
- return nil
- case 1: // only one parent contains the path
- // if the file contents has change, add the current commit
- different, err := differentContents(path, current, parents)
- if err != nil {
- return err
- }
- if len(different) == 1 {
- *result = append(*result, current)
- }
- // in any case, walk the parent
- return walkGraph(result, seen, repo, parents[0], path)
- default: // more than one parent contains the path
- // TODO: detect merges that had a conflict, because they must be
- // included in the result here.
- for _, p := range parents {
- err := walkGraph(result, seen, repo, p, path)
- if err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-// TODO: benchmark this making git.Commit.parent public instead of using
-// an iterator
-func parentsContainingPath(path string, c *git.Commit) []*git.Commit {
- var result []*git.Commit
- iter := c.Parents()
- for {
- parent, err := iter.Next()
- if err != nil {
- if err == io.EOF {
- return result
- }
- panic("unreachable")
- }
- if _, err := parent.File(path); err == nil {
- result = append(result, parent)
- }
- }
-}
-
-// Returns an slice of the commits in "cs" that has the file "path", but with different
-// contents than what can be found in "c".
-func differentContents(path string, c *git.Commit, cs []*git.Commit) ([]*git.Commit, error) {
- result := make([]*git.Commit, 0, len(cs))
- h, found := blobHash(path, c)
- if !found {
- return nil, git.ErrFileNotFound
- }
- for _, cx := range cs {
- if hx, found := blobHash(path, cx); found && h != hx {
- result = append(result, cx)
- }
- }
- return result, nil
-}
-
-// blobHash returns the hash of a path in a commit
-func blobHash(path string, commit *git.Commit) (hash core.Hash, found bool) {
- file, err := commit.File(path)
- if err != nil {
- var empty core.Hash
- return empty, found
- }
- return file.Hash, true
-}
-
-type contentsComparatorFn func(path string, a, b *git.Commit) (bool, error)
-
-// Returns a new slice of commits, with duplicates removed. Expects a
-// sorted commit list. Duplication is defined according to "comp". It
-// will always keep the first commit of a series of duplicated commits.
-func removeComp(path string, cs []*git.Commit, comp contentsComparatorFn) ([]*git.Commit, error) {
- result := make([]*git.Commit, 0, len(cs))
- if len(cs) == 0 {
- return result, nil
- }
- result = append(result, cs[0])
- for i := 1; i < len(cs); i++ {
- equals, err := comp(path, cs[i], cs[i-1])
- if err != nil {
- return nil, err
- }
- if !equals {
- result = append(result, cs[i])
- }
- }
- return result, nil
-}
-
-// Equivalent commits are commits whose patch is the same.
-func equivalent(path string, a, b *git.Commit) (bool, error) {
- numParentsA := a.NumParents()
- numParentsB := b.NumParents()
-
- // the first commit is not equivalent to anyone
- // and "I think" merges can not be equivalent to anything
- if numParentsA != 1 || numParentsB != 1 {
- return false, nil
- }
-
- diffsA, err := patch(a, path)
- if err != nil {
- return false, err
- }
- diffsB, err := patch(b, path)
- if err != nil {
- return false, err
- }
-
- return sameDiffs(diffsA, diffsB), nil
-}
-
-func patch(c *git.Commit, path string) ([]diffmatchpatch.Diff, error) {
- // get contents of the file in the commit
- file, err := c.File(path)
- if err != nil {
- return nil, err
- }
- content := file.Contents()
-
- // get contents of the file in the first parent of the commit
- var contentParent string
- iter := c.Parents()
- parent, err := iter.Next()
- if err != nil {
- return nil, err
- }
- file, err = parent.File(path)
- if err != nil {
- contentParent = ""
- } else {
- contentParent = file.Contents()
- }
-
- // compare the contents of parent and child
- return diff.Do(content, contentParent), nil
-}
-
-func sameDiffs(a, b []diffmatchpatch.Diff) bool {
- if len(a) != len(b) {
- return false
- }
- for i := range a {
- if !sameDiff(a[i], b[i]) {
- return false
- }
- }
- return true
-}
-
-func sameDiff(a, b diffmatchpatch.Diff) bool {
- if a.Type != b.Type {
- return false
- }
- switch a.Type {
- case 0:
- return git.CountLines(a.Text) == git.CountLines(b.Text)
- case 1, -1:
- return a.Text == b.Text
- default:
- panic("unreachable")
- }
-}