diff options
author | Máximo Cuadros <mcuadros@gmail.com> | 2015-12-12 03:22:52 +0100 |
---|---|---|
committer | Máximo Cuadros <mcuadros@gmail.com> | 2015-12-12 03:22:52 +0100 |
commit | b543e4e02793a58db25148214ef149cb571792f5 (patch) | |
tree | d68b7e33aba06f67d3c3e301b4a68f09de0ded6a /blame/blame.go | |
parent | c22c181f70e0afb294513315e9975b9f3f4c1d39 (diff) | |
download | go-git-b543e4e02793a58db25148214ef149cb571792f5.tar.gz |
blame code reorganization, and mutting the test
Diffstat (limited to 'blame/blame.go')
-rw-r--r-- | blame/blame.go | 283 |
1 files changed, 0 insertions, 283 deletions
diff --git a/blame/blame.go b/blame/blame.go deleted file mode 100644 index 7256a7b..0000000 --- a/blame/blame.go +++ /dev/null @@ -1,283 +0,0 @@ -// Package blame contains blaming functionality for files in the repo. -// -// Blaming a file is finding what commit was the last to modify each of -// the lines in the file, therefore the output of a blaming operation is -// usualy a slice of commits, one commit per line in the file. -// -// This package also provides a pretty print function to output the -// results of a blame in a similar format to the git-blame command. -package blame - -import ( - "bytes" - "errors" - "fmt" - "strconv" - "strings" - "unicode/utf8" - - "gopkg.in/src-d/go-git.v2" - "gopkg.in/src-d/go-git.v2/core" - "gopkg.in/src-d/go-git.v2/diff" - "gopkg.in/src-d/go-git.v2/revlist" -) - -// Blame returns the last commit that modified each line of a file in -// a repository. -// -// The file to blame is identified by the input arguments: repo, commit and path. -// The output is a slice of commits, one for each line in the file. -// -// Blaming a file is a two step process: -// -// 1. Create a linear history of the commits affecting a file. We use -// revlist.New for that. -// -// 2. Then build a graph with a node for every line in every file in -// the history of the file. -// -// Each node (line) holds the commit where it was introduced or -// last modified. To achieve that we use the FORWARD algorithm -// described in Zimmermann, et al. "Mining Version Archives for -// Co-changed Lines", in proceedings of the Mining Software -// Repositories workshop, Shanghai, May 22-23, 2006. -// -// Each node is asigned a commit: Start by the nodes in the first -// commit. Assign that commit as the creator of all its lines. -// -// Then jump to the nodes in the next commit, and calculate the diff -// between the two files. Newly created lines get -// assigned the new commit as its origin. Modified lines also get -// this new commit. Untouched lines retain the old commit. -// -// All this work is done in the assignOrigin function which holds all -// the internal relevant data in a "blame" struct, that is not -// exported. -// -// TODO: ways to improve the efficiency of this function: -// -// 1. Improve revlist -// -// 2. Improve how to traverse the history (example a backward -// traversal will be much more efficient) -// -// TODO: ways to improve the function in general: -// -// 1. Add memoization between revlist and assign. -// -// 2. It is using much more memory than needed, see the TODOs below. - -type Blame struct { - Repo string - Path string - Rev string - Lines []*line -} - -func New(repo *git.Repository, path string, commit *git.Commit) (*Blame, error) { - // init the internal blame struct - b := new(blame) - b.repo = repo - b.fRev = commit - b.path = path - - // get all the file revisions - if err := b.fillRevs(); err != nil { - return nil, err - } - - // calculate the line tracking graph and fill in - // file contents in data. - if err := b.fillGraphAndData(); err != nil { - return nil, err - } - - file, err := b.fRev.File(b.path) - if err != nil { - return nil, err - } - finalLines := file.Lines() - - lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1)) - if err != nil { - return nil, err - } - - return &Blame{ - Repo: repo.URL, - Path: path, - Rev: commit.Hash.String(), - Lines: lines, - }, nil -} - -type line struct { - author string - text string -} - -func newLine(author, text string) *line { - return &line{ - author: author, - text: text, - } -} - -func newLines(contents []string, commits []*git.Commit) ([]*line, error) { - if len(contents) != len(commits) { - return nil, errors.New("contents and commits have different length") - } - result := make([]*line, 0, len(contents)) - for i := range contents { - l := newLine(commits[i].Author.Email, contents[i]) - result = append(result, l) - } - return result, nil -} - -// this struct is internally used by the blame function to hold its -// inputs, outputs and state. -type blame struct { - repo *git.Repository // the repo holding the history of the file to blame - path string // the path of the file to blame - fRev *git.Commit // the commit of the final revision of the file to blame - revs revlist.Revs // the chain of revisions affecting the the file to blame - data []string // the contents of the file across all its revisions - graph [][]*git.Commit // the graph of the lines in the file across all the revisions TODO: not all commits are needed, only the current rev and the prev -} - -// calculte the history of a file "path", starting from commit "from", sorted by commit date. -func (b *blame) fillRevs() error { - var err error - b.revs, err = revlist.NewRevs(b.repo, b.fRev, b.path) - if err != nil { - return err - } - return nil -} - -// build graph of a file from its revision history -func (b *blame) fillGraphAndData() error { - b.graph = make([][]*git.Commit, len(b.revs)) - b.data = make([]string, len(b.revs)) // file contents in all the revisions - // for every revision of the file, starting with the first - // one... - for i, rev := range b.revs { - // get the contents of the file - file, err := rev.File(b.path) - if err != nil { - return nil - } - b.data[i] = file.Contents() - nLines := git.CountLines(b.data[i]) - // create a node for each line - b.graph[i] = make([]*git.Commit, nLines) - // assign a commit to each node - // if this is the first revision, then the node is assigned to - // this first commit. - if i == 0 { - for j := 0; j < nLines; j++ { - b.graph[i][j] = (*git.Commit)(b.revs[i]) - } - } else { - // if this is not the first commit, then assign to the old - // commit or to the new one, depending on what the diff - // says. - b.assignOrigin(i, i-1) - } - } - return nil -} - -// sliceGraph returns a slice of commits (one per line) for a particular -// revision of a file (0=first revision). -func (b *blame) sliceGraph(i int) []*git.Commit { - fVs := b.graph[i] - result := make([]*git.Commit, 0, len(fVs)) - for _, v := range fVs { - c := git.Commit(*v) - result = append(result, &c) - } - return result -} - -// Assigns origin to vertexes in current (c) rev from data in its previous (p) -// revision -func (b *blame) assignOrigin(c, p int) { - // assign origin based on diff info - hunks := diff.Do(b.data[p], b.data[c]) - sl := -1 // source line - dl := -1 // destination line - for h := range hunks { - hLines := git.CountLines(hunks[h].Text) - for hl := 0; hl < hLines; hl++ { - switch { - case hunks[h].Type == 0: - sl++ - dl++ - b.graph[c][dl] = b.graph[p][sl] - case hunks[h].Type == 1: - dl++ - b.graph[c][dl] = (*git.Commit)(b.revs[c]) - case hunks[h].Type == -1: - sl++ - default: - panic("unreachable") - } - } - } -} - -// GoString prints the results of a Blame using git-blame's style. -func (b *blame) GoString() string { - var buf bytes.Buffer - - file, err := b.fRev.File(b.path) - if err != nil { - panic("PrettyPrint: internal error in repo.Data") - } - contents := file.Contents() - - lines := strings.Split(contents, "\n") - // max line number length - mlnl := len(fmt.Sprintf("%s", strconv.Itoa(len(lines)))) - // max author length - mal := b.maxAuthorLength() - format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n", - mal, mlnl) - - fVs := b.graph[len(b.graph)-1] - for ln, v := range fVs { - fmt.Fprintf(&buf, format, v.Hash.String()[:8], - prettyPrintAuthor(fVs[ln]), ln+1, lines[ln]) - } - return buf.String() -} - -// utility function to pretty print the author. -func prettyPrintAuthor(c *git.Commit) string { - return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02")) -} - -// utility function to calculate the number of runes needed -// to print the longest author name in the blame of a file. -func (b *blame) maxAuthorLength() int { - memo := make(map[core.Hash]struct{}, len(b.graph)-1) - fVs := b.graph[len(b.graph)-1] - m := 0 - for ln := range fVs { - if _, ok := memo[fVs[ln].Hash]; ok { - continue - } - memo[fVs[ln].Hash] = struct{}{} - m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln]))) - } - return m -} - -func max(a, b int) int { - if a > b { - return a - } - return b -} |