aboutsummaryrefslogblamecommitdiffstats
path: root/blame.go
blob: f6108519ad5fdddb520626a6d2b6c75060c3448e (plain) (tree)
1
2
3
4
5
6
7
8
9
           


               
                
             

                 
              

                      


                                                  

 
                                                          
                         
                                                           
                   
                                                                                           
                         
                                                         
                     

 

                                                                            
                                                                 























                                                                                      








                                                                                    
                       
                  

                     











                                                        


                               



                                       
 




                                                                       



                                                                        
 
                            
                            
                              



                             

                                                                                  
                                                                                 


                                                 

                                                                    

                                                                    

 
                                                                             


                               
                             
                             

         
 
                                                                             








                                                                                            
         

                                             
                                 



                                                                
         
 





                                                                   









                                                                      

 
                                                                                              

                                  
 
                                                
                  



                                                  
                                                                             
                                                       

                                                                                  
                 

                                               


                                             
                 



                                                
                                               
                                              
                                                           




                                                                              
                                                         







                                                                                  

                  
 

                                                                        
                                                    
                         
                                                     
                               
                       

                                           
                     

 







                                                                              
                                                   
                                                






                                                               
                                                          








                                                    

                                                                  

                            

                                        

                                                                 



                                                                 


                                              
                                             













                                                                    
                                                 





                                                                                      
                                                                

















                                                                              
package git

import (
	"bytes"
	"errors"
	"fmt"
	"strconv"
	"strings"
	"time"
	"unicode/utf8"

	"gopkg.in/src-d/go-git.v4/plumbing"
	"gopkg.in/src-d/go-git.v4/plumbing/object"
	"gopkg.in/src-d/go-git.v4/utils/diff"
)

// BlameResult represents the result of a Blame operation.
type BlameResult struct {
	// Path is the path of the File that we're blaming.
	Path string
	// Rev (Revision) is the hash of the specified Commit used to generate this result.
	Rev plumbing.Hash
	// Lines contains every line with its authorship.
	Lines []*Line
}

// Blame returns a BlameResult with the information about the last author of
// each line from file `path` at commit `c`.
func Blame(c *object.Commit, path string) (*BlameResult, error) {
	// The file to blame is identified by the input arguments:
	// commit and path. commit is a Commit object obtained from a Repository. Path
	// represents a path to a specific file contained into the repository.
	//
	// Blaming a file is a two step process:
	//
	// 1. Create a linear history of the commits affecting a file. We use
	// revlist.New for that.
	//
	// 2. Then build a graph with a node for every line in every file in
	// the history of the file.
	//
	// Each node is assigned a commit: Start by the nodes in the first
	// commit. Assign that commit as the creator of all its lines.
	//
	// Then jump to the nodes in the next commit, and calculate the diff
	// between the two files. Newly created lines get
	// assigned the new commit as its origin. Modified lines also get
	// this new commit. Untouched lines retain the old commit.
	//
	// All this work is done in the assignOrigin function which holds all
	// the internal relevant data in a "blame" struct, that is not
	// exported.
	//
	// TODO: ways to improve the efficiency of this function:
	// 1. Improve revlist
	// 2. Improve how to traverse the history (example a backward traversal will
	// be much more efficient)
	//
	// TODO: ways to improve the function in general:
	// 1. Add memoization between revlist and assign.
	// 2. It is using much more memory than needed, see the TODOs below.

	b := new(blame)
	b.fRev = c
	b.path = path

	// get all the file revisions
	if err := b.fillRevs(); err != nil {
		return nil, err
	}

	// calculate the line tracking graph and fill in
	// file contents in data.
	if err := b.fillGraphAndData(); err != nil {
		return nil, err
	}

	file, err := b.fRev.File(b.path)
	if err != nil {
		return nil, err
	}
	finalLines, err := file.Lines()
	if err != nil {
		return nil, err
	}

	// Each node (line) holds the commit where it was introduced or
	// last modified. To achieve that we use the FORWARD algorithm
	// described in Zimmermann, et al. "Mining Version Archives for
	// Co-changed Lines", in proceedings of the Mining Software
	// Repositories workshop, Shanghai, May 22-23, 2006.
	lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1))
	if err != nil {
		return nil, err
	}

	return &BlameResult{
		Path:  path,
		Rev:   c.Hash,
		Lines: lines,
	}, nil
}

// Line values represent the contents and author of a line in BlamedResult values.
type Line struct {
	// Author is the email address of the last author that modified the line.
	Author string
	// Text is the original text of the line.
	Text string
	// Date is when the original text of the line was introduced
	Date time.Time
	// Hash is the commit hash that introduced the original line
	Hash plumbing.Hash
}

func newLine(author, text string, date time.Time, hash plumbing.Hash) *Line {
	return &Line{
		Author: author,
		Text:   text,
		Hash:   hash,
		Date:   date,
	}
}

func newLines(contents []string, commits []*object.Commit) ([]*Line, error) {
	lcontents := len(contents)
	lcommits := len(commits)

	if lcontents != lcommits {
		if lcontents == lcommits-1 && contents[lcontents-1] != "\n" {
			contents = append(contents, "\n")
		} else {
			return nil, errors.New("contents and commits have different length")
		}
	}

	result := make([]*Line, 0, lcontents)
	for i := range contents {
		result = append(result, newLine(
			commits[i].Author.Email, contents[i],
			commits[i].Author.When, commits[i].Hash,
		))
	}

	return result, nil
}

// this struct is internally used by the blame function to hold its
// inputs, outputs and state.
type blame struct {
	// the path of the file to blame
	path string
	// the commit of the final revision of the file to blame
	fRev *object.Commit
	// the chain of revisions affecting the the file to blame
	revs []*object.Commit
	// the contents of the file across all its revisions
	data []string
	// the graph of the lines in the file across all the revisions
	graph [][]*object.Commit
}

// calculate the history of a file "path", starting from commit "from", sorted by commit date.
func (b *blame) fillRevs() error {
	var err error

	b.revs, err = references(b.fRev, b.path)
	return err
}

// build graph of a file from its revision history
func (b *blame) fillGraphAndData() error {
	//TODO: not all commits are needed, only the current rev and the prev
	b.graph = make([][]*object.Commit, len(b.revs))
	b.data = make([]string, len(b.revs)) // file contents in all the revisions
	// for every revision of the file, starting with the first
	// one...
	for i, rev := range b.revs {
		// get the contents of the file
		file, err := rev.File(b.path)
		if err != nil {
			return nil
		}
		b.data[i], err = file.Contents()
		if err != nil {
			return err
		}
		nLines := countLines(b.data[i])
		// create a node for each line
		b.graph[i] = make([]*object.Commit, nLines)
		// assign a commit to each node
		// if this is the first revision, then the node is assigned to
		// this first commit.
		if i == 0 {
			for j := 0; j < nLines; j++ {
				b.graph[i][j] = b.revs[i]
			}
		} else {
			// if this is not the first commit, then assign to the old
			// commit or to the new one, depending on what the diff
			// says.
			b.assignOrigin(i, i-1)
		}
	}
	return nil
}

// sliceGraph returns a slice of commits (one per line) for a particular
// revision of a file (0=first revision).
func (b *blame) sliceGraph(i int) []*object.Commit {
	fVs := b.graph[i]
	result := make([]*object.Commit, 0, len(fVs))
	for _, v := range fVs {
		c := *v
		result = append(result, &c)
	}
	return result
}

// Assigns origin to vertexes in current (c) rev from data in its previous (p)
// revision
func (b *blame) assignOrigin(c, p int) {
	// assign origin based on diff info
	hunks := diff.Do(b.data[p], b.data[c])
	sl := -1 // source line
	dl := -1 // destination line
	for h := range hunks {
		hLines := countLines(hunks[h].Text)
		for hl := 0; hl < hLines; hl++ {
			switch {
			case hunks[h].Type == 0:
				sl++
				dl++
				b.graph[c][dl] = b.graph[p][sl]
			case hunks[h].Type == 1:
				dl++
				b.graph[c][dl] = b.revs[c]
			case hunks[h].Type == -1:
				sl++
			default:
				panic("unreachable")
			}
		}
	}
}

// GoString prints the results of a Blame using git-blame's style.
func (b *blame) GoString() string {
	var buf bytes.Buffer

	file, err := b.fRev.File(b.path)
	if err != nil {
		panic("PrettyPrint: internal error in repo.Data")
	}
	contents, err := file.Contents()
	if err != nil {
		panic("PrettyPrint: internal error in repo.Data")
	}

	lines := strings.Split(contents, "\n")
	// max line number length
	mlnl := len(strconv.Itoa(len(lines)))
	// max author length
	mal := b.maxAuthorLength()
	format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n",
		mal, mlnl)

	fVs := b.graph[len(b.graph)-1]
	for ln, v := range fVs {
		fmt.Fprintf(&buf, format, v.Hash.String()[:8],
			prettyPrintAuthor(fVs[ln]), ln+1, lines[ln])
	}
	return buf.String()
}

// utility function to pretty print the author.
func prettyPrintAuthor(c *object.Commit) string {
	return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02"))
}

// utility function to calculate the number of runes needed
// to print the longest author name in the blame of a file.
func (b *blame) maxAuthorLength() int {
	memo := make(map[plumbing.Hash]struct{}, len(b.graph)-1)
	fVs := b.graph[len(b.graph)-1]
	m := 0
	for ln := range fVs {
		if _, ok := memo[fVs[ln].Hash]; ok {
			continue
		}
		memo[fVs[ln].Hash] = struct{}{}
		m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln])))
	}
	return m
}

func max(a, b int) int {
	if a > b {
		return a
	}
	return b
}