aboutsummaryrefslogblamecommitdiffstats
path: root/revlist/revlist.go
blob: f34ddb5ea7fee53ac344be875b8419d4cad2fb14 (plain) (tree)

























































































































































































































































                                                                                                                          
// Package revlist allows to create the revision history of a file, this
// is, the list of commits in the past that affect the file.
//
// The general idea is to traverse the git commit graph backward,
// flattening the graph into a linear history, and skipping commits that
// are irrelevant for the particular file.
//
// There is no single answer for this operation. The git command
// "git-revlist" returns different histories depending on its arguments
// and some internal heuristics.
//
// The current implementation tries to get something similar to what you
// whould get using git-revlist. See the failing tests for some
// insight about how the current implementation and git-revlist differs.
package revlist

import (
	"bytes"
	"errors"
	"io"
	"sort"

	"github.com/sergi/go-diff/diffmatchpatch"

	"gopkg.in/src-d/go-git.v2"
	"gopkg.in/src-d/go-git.v2/core"
	"gopkg.in/src-d/go-git.v2/diff"
)

// New errors defined by the package.
var ErrFileNotFound = errors.New("file not found")

// A Revs is a list of revisions for a file (basically a list of commits).
// It implements sort.Interface.
type Revs []*git.Commit

func (l Revs) Len() int {
	return len(l)
}

// sorts from older to newer commit.
func (l Revs) Less(i, j int) bool {
	return l[i].Committer.When.Before(l[j].Committer.When)
}

func (l Revs) Swap(i, j int) {
	l[i], l[j] = l[j], l[i]
}

// for debugging
func (l Revs) String() string {
	var buf bytes.Buffer
	for _, c := range l {
		buf.WriteString(c.Hash.String()[:8])
		buf.WriteString("\n")
	}
	return buf.String()
}

// New returns a Revs pointer for the
// file at "path", from commit "commit" backwards in time.
// The commits are stored in arbitrary order.
// It stops searching a branch for a file upon reaching the commit
// were it was created.
// Moves and copies are not currently supported.
// Cherry-picks are not detected and therefore are added to the list
// (see git path-id for hints on how to fix this).
// This function implements is equivalent to running go-rev-Revs.
func New(repo *git.Repository, commit *git.Commit, path string) (Revs, error) {
	result := make(Revs, 0)
	seen := make(map[core.Hash]struct{}, 0)
	err := walkGraph(&result, &seen, repo, commit, path)
	if err != nil {
		return nil, err
	}
	sort.Sort(result)
	result = removeComp(path, result, equivalent) // for merges of identical cherry-picks
	if err != nil {
		return nil, err
	}
	return result, nil
}

// Recursive traversal of the commit graph, generating a linear history
// of the path.
func walkGraph(result *Revs, seen *map[core.Hash]struct{}, repo *git.Repository, current *git.Commit, path string) error {
	// check and update seen
	if _, ok := (*seen)[current.Hash]; ok {
		return nil
	}
	(*seen)[current.Hash] = struct{}{}

	// if the path is not in the current commit, stop searching.
	if _, found := git.FindFile(path, current); !found {
		return nil
	}

	parents := parentsContainingPath(path, current)

	switch len(parents) {
	// if the path is not found in any of its parents, the path was
	// created by this commit; we must add it to the revisions list and
	// stop searching. This includes the case when current is the
	// initial commit.
	case 0:
		//fmt.Println(current.Hash.String(), ": case 0")
		*result = append(*result, current)
		return nil
	case 1: // only one parent contains the path
		// if the file contents has change, add the current commit
		different, err := differentContents(path, current, parents)
		if err != nil {
			return err
		}
		if len(different) == 1 {
			//fmt.Println(current.Hash.String(), ": case 1")
			*result = append(*result, current)
		}
		// in any case, walk the parent
		return walkGraph(result, seen, repo, parents[0], path)
	default: // more than one parent contains the path
		// TODO: detect merges that had a conflict, because they must be
		// included in the result here.
		for _, p := range parents {
			err := walkGraph(result, seen, repo, p, path)
			if err != nil {
				return err
			}
		}
	}
	return nil
}

// TODO: benchmark this making git.Commit.parent public instead of using
// an iterator
func parentsContainingPath(path string, c *git.Commit) []*git.Commit {
	var result []*git.Commit
	iter := c.Parents()
	for {
		parent, err := iter.Next()
		if err != nil {
			if err == io.EOF {
				return result
			}
			panic("unreachable")
		}
		if _, found := git.FindFile(path, parent); found {
			result = append(result, parent)
		}
	}
}

// Returns an slice of the commits in "cs" that has the file "path", but with different
// contents than what can be found in "c".
func differentContents(path string, c *git.Commit, cs []*git.Commit) ([]*git.Commit, error) {
	result := make([]*git.Commit, 0, len(cs))
	h, found := blobHash(path, c)
	if !found {
		return nil, ErrFileNotFound
	}
	for _, cx := range cs {
		if hx, found := blobHash(path, cx); found && h != hx {
			result = append(result, cx)
		}
	}
	return result, nil
}

// blobHash returns the hash of a path in a commit
func blobHash(path string, commit *git.Commit) (hash core.Hash, found bool) {
	file, found := git.FindFile(path, commit)
	if !found {
		var empty core.Hash
		return empty, found
	}
	return file.Hash, true
}

// Returns a new slice of commits, with duplicates removed.  Expects a
// sorted commit list.  Duplication is defined according to "comp".  It
// will always keep the first commit of a series of duplicated commits.
func removeComp(path string, cs []*git.Commit, comp func(string, *git.Commit, *git.Commit) bool) []*git.Commit {
	result := make([]*git.Commit, 0, len(cs))
	if len(cs) == 0 {
		return result
	}
	result = append(result, cs[0])
	for i := 1; i < len(cs); i++ {
		if !comp(path, cs[i], cs[i-1]) {
			result = append(result, cs[i])
		}
	}
	return result
}

// Equivalent commits are commits whos patch is the same.
func equivalent(path string, a, b *git.Commit) bool {
	numParentsA := a.NumParents()
	numParentsB := b.NumParents()

	// the first commit is not equivalent to anyone
	// and "I think" merges can not be equivalent to anything
	if numParentsA != 1 || numParentsB != 1 {
		return false
	}

	iterA := a.Parents()
	parentA, _ := iterA.Next()
	iterB := b.Parents()
	parentB, _ := iterB.Next()

	dataA, _ := git.Data(path, a)
	dataParentA, _ := git.Data(path, parentA)
	dataB, _ := git.Data(path, b)
	dataParentB, _ := git.Data(path, parentB)

	diffsA := diff.Do(dataParentA, dataA)
	diffsB := diff.Do(dataParentB, dataB)

	if sameDiffs(diffsA, diffsB) {
		return true
	}
	return false
}

func sameDiffs(a, b []diffmatchpatch.Diff) bool {
	if len(a) != len(b) {
		return false
	}
	for i := range a {
		if !sameDiff(a[i], b[i]) {
			return false
		}
	}
	return true
}

func sameDiff(a, b diffmatchpatch.Diff) bool {
	if a.Type != b.Type {
		return false
	}
	switch a.Type {
	case 0:
		return git.CountLines(a.Text) == git.CountLines(b.Text)
	case 1, -1:
		return a.Text == b.Text
	default:
		panic("unreachable")
	}
}