aboutsummaryrefslogtreecommitdiffstats
path: root/blame/blame.go
diff options
context:
space:
mode:
authorMáximo Cuadros <mcuadros@gmail.com>2015-12-12 03:22:52 +0100
committerMáximo Cuadros <mcuadros@gmail.com>2015-12-12 03:22:52 +0100
commitb543e4e02793a58db25148214ef149cb571792f5 (patch)
treed68b7e33aba06f67d3c3e301b4a68f09de0ded6a /blame/blame.go
parentc22c181f70e0afb294513315e9975b9f3f4c1d39 (diff)
downloadgo-git-b543e4e02793a58db25148214ef149cb571792f5.tar.gz
blame code reorganization, and mutting the test
Diffstat (limited to 'blame/blame.go')
-rw-r--r--blame/blame.go283
1 files changed, 0 insertions, 283 deletions
diff --git a/blame/blame.go b/blame/blame.go
deleted file mode 100644
index 7256a7b..0000000
--- a/blame/blame.go
+++ /dev/null
@@ -1,283 +0,0 @@
-// Package blame contains blaming functionality for files in the repo.
-//
-// Blaming a file is finding what commit was the last to modify each of
-// the lines in the file, therefore the output of a blaming operation is
-// usualy a slice of commits, one commit per line in the file.
-//
-// This package also provides a pretty print function to output the
-// results of a blame in a similar format to the git-blame command.
-package blame
-
-import (
- "bytes"
- "errors"
- "fmt"
- "strconv"
- "strings"
- "unicode/utf8"
-
- "gopkg.in/src-d/go-git.v2"
- "gopkg.in/src-d/go-git.v2/core"
- "gopkg.in/src-d/go-git.v2/diff"
- "gopkg.in/src-d/go-git.v2/revlist"
-)
-
-// Blame returns the last commit that modified each line of a file in
-// a repository.
-//
-// The file to blame is identified by the input arguments: repo, commit and path.
-// The output is a slice of commits, one for each line in the file.
-//
-// Blaming a file is a two step process:
-//
-// 1. Create a linear history of the commits affecting a file. We use
-// revlist.New for that.
-//
-// 2. Then build a graph with a node for every line in every file in
-// the history of the file.
-//
-// Each node (line) holds the commit where it was introduced or
-// last modified. To achieve that we use the FORWARD algorithm
-// described in Zimmermann, et al. "Mining Version Archives for
-// Co-changed Lines", in proceedings of the Mining Software
-// Repositories workshop, Shanghai, May 22-23, 2006.
-//
-// Each node is asigned a commit: Start by the nodes in the first
-// commit. Assign that commit as the creator of all its lines.
-//
-// Then jump to the nodes in the next commit, and calculate the diff
-// between the two files. Newly created lines get
-// assigned the new commit as its origin. Modified lines also get
-// this new commit. Untouched lines retain the old commit.
-//
-// All this work is done in the assignOrigin function which holds all
-// the internal relevant data in a "blame" struct, that is not
-// exported.
-//
-// TODO: ways to improve the efficiency of this function:
-//
-// 1. Improve revlist
-//
-// 2. Improve how to traverse the history (example a backward
-// traversal will be much more efficient)
-//
-// TODO: ways to improve the function in general:
-//
-// 1. Add memoization between revlist and assign.
-//
-// 2. It is using much more memory than needed, see the TODOs below.
-
-type Blame struct {
- Repo string
- Path string
- Rev string
- Lines []*line
-}
-
-func New(repo *git.Repository, path string, commit *git.Commit) (*Blame, error) {
- // init the internal blame struct
- b := new(blame)
- b.repo = repo
- b.fRev = commit
- b.path = path
-
- // get all the file revisions
- if err := b.fillRevs(); err != nil {
- return nil, err
- }
-
- // calculate the line tracking graph and fill in
- // file contents in data.
- if err := b.fillGraphAndData(); err != nil {
- return nil, err
- }
-
- file, err := b.fRev.File(b.path)
- if err != nil {
- return nil, err
- }
- finalLines := file.Lines()
-
- lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1))
- if err != nil {
- return nil, err
- }
-
- return &Blame{
- Repo: repo.URL,
- Path: path,
- Rev: commit.Hash.String(),
- Lines: lines,
- }, nil
-}
-
-type line struct {
- author string
- text string
-}
-
-func newLine(author, text string) *line {
- return &line{
- author: author,
- text: text,
- }
-}
-
-func newLines(contents []string, commits []*git.Commit) ([]*line, error) {
- if len(contents) != len(commits) {
- return nil, errors.New("contents and commits have different length")
- }
- result := make([]*line, 0, len(contents))
- for i := range contents {
- l := newLine(commits[i].Author.Email, contents[i])
- result = append(result, l)
- }
- return result, nil
-}
-
-// this struct is internally used by the blame function to hold its
-// inputs, outputs and state.
-type blame struct {
- repo *git.Repository // the repo holding the history of the file to blame
- path string // the path of the file to blame
- fRev *git.Commit // the commit of the final revision of the file to blame
- revs revlist.Revs // the chain of revisions affecting the the file to blame
- data []string // the contents of the file across all its revisions
- graph [][]*git.Commit // the graph of the lines in the file across all the revisions TODO: not all commits are needed, only the current rev and the prev
-}
-
-// calculte the history of a file "path", starting from commit "from", sorted by commit date.
-func (b *blame) fillRevs() error {
- var err error
- b.revs, err = revlist.NewRevs(b.repo, b.fRev, b.path)
- if err != nil {
- return err
- }
- return nil
-}
-
-// build graph of a file from its revision history
-func (b *blame) fillGraphAndData() error {
- b.graph = make([][]*git.Commit, len(b.revs))
- b.data = make([]string, len(b.revs)) // file contents in all the revisions
- // for every revision of the file, starting with the first
- // one...
- for i, rev := range b.revs {
- // get the contents of the file
- file, err := rev.File(b.path)
- if err != nil {
- return nil
- }
- b.data[i] = file.Contents()
- nLines := git.CountLines(b.data[i])
- // create a node for each line
- b.graph[i] = make([]*git.Commit, nLines)
- // assign a commit to each node
- // if this is the first revision, then the node is assigned to
- // this first commit.
- if i == 0 {
- for j := 0; j < nLines; j++ {
- b.graph[i][j] = (*git.Commit)(b.revs[i])
- }
- } else {
- // if this is not the first commit, then assign to the old
- // commit or to the new one, depending on what the diff
- // says.
- b.assignOrigin(i, i-1)
- }
- }
- return nil
-}
-
-// sliceGraph returns a slice of commits (one per line) for a particular
-// revision of a file (0=first revision).
-func (b *blame) sliceGraph(i int) []*git.Commit {
- fVs := b.graph[i]
- result := make([]*git.Commit, 0, len(fVs))
- for _, v := range fVs {
- c := git.Commit(*v)
- result = append(result, &c)
- }
- return result
-}
-
-// Assigns origin to vertexes in current (c) rev from data in its previous (p)
-// revision
-func (b *blame) assignOrigin(c, p int) {
- // assign origin based on diff info
- hunks := diff.Do(b.data[p], b.data[c])
- sl := -1 // source line
- dl := -1 // destination line
- for h := range hunks {
- hLines := git.CountLines(hunks[h].Text)
- for hl := 0; hl < hLines; hl++ {
- switch {
- case hunks[h].Type == 0:
- sl++
- dl++
- b.graph[c][dl] = b.graph[p][sl]
- case hunks[h].Type == 1:
- dl++
- b.graph[c][dl] = (*git.Commit)(b.revs[c])
- case hunks[h].Type == -1:
- sl++
- default:
- panic("unreachable")
- }
- }
- }
-}
-
-// GoString prints the results of a Blame using git-blame's style.
-func (b *blame) GoString() string {
- var buf bytes.Buffer
-
- file, err := b.fRev.File(b.path)
- if err != nil {
- panic("PrettyPrint: internal error in repo.Data")
- }
- contents := file.Contents()
-
- lines := strings.Split(contents, "\n")
- // max line number length
- mlnl := len(fmt.Sprintf("%s", strconv.Itoa(len(lines))))
- // max author length
- mal := b.maxAuthorLength()
- format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n",
- mal, mlnl)
-
- fVs := b.graph[len(b.graph)-1]
- for ln, v := range fVs {
- fmt.Fprintf(&buf, format, v.Hash.String()[:8],
- prettyPrintAuthor(fVs[ln]), ln+1, lines[ln])
- }
- return buf.String()
-}
-
-// utility function to pretty print the author.
-func prettyPrintAuthor(c *git.Commit) string {
- return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02"))
-}
-
-// utility function to calculate the number of runes needed
-// to print the longest author name in the blame of a file.
-func (b *blame) maxAuthorLength() int {
- memo := make(map[core.Hash]struct{}, len(b.graph)-1)
- fVs := b.graph[len(b.graph)-1]
- m := 0
- for ln := range fVs {
- if _, ok := memo[fVs[ln].Hash]; ok {
- continue
- }
- memo[fVs[ln].Hash] = struct{}{}
- m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln])))
- }
- return m
-}
-
-func max(a, b int) int {
- if a > b {
- return a
- }
- return b
-}