package git import ( "bytes" "container/heap" "errors" "fmt" "io" "strconv" "time" "unicode/utf8" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/object" "github.com/go-git/go-git/v5/utils/diff" "github.com/sergi/go-diff/diffmatchpatch" ) // BlameResult represents the result of a Blame operation. type BlameResult struct { // Path is the path of the File that we're blaming. Path string // Rev (Revision) is the hash of the specified Commit used to generate this result. Rev plumbing.Hash // Lines contains every line with its authorship. Lines []*Line } // Blame returns a BlameResult with the information about the last author of // each line from file `path` at commit `c`. func Blame(c *object.Commit, path string) (*BlameResult, error) { // The file to blame is identified by the input arguments: // commit and path. commit is a Commit object obtained from a Repository. Path // represents a path to a specific file contained in the repository. // // Blaming a file is done by walking the tree in reverse order trying to find where each line was last modified. // // When a diff is found it cannot immediately assume it came from that commit, as it may have come from 1 of its // parents, so it will first try to resolve those diffs from its parents, if it couldn't find the change in its // parents then it will assign the change to itself. // // When encountering 2 parents that have made the same change to a file it will choose the parent that was merged // into the current branch first (this is determined by the order of the parents inside the commit). // // This currently works on a line by line basis, if performance becomes an issue it could be changed to work with // hunks rather than lines. Then when encountering diff hunks it would need to split them where necessary. b := new(blame) b.fRev = c b.path = path b.q = new(priorityQueue) file, err := b.fRev.File(path) if err != nil { return nil, err } finalLines, err := file.Lines() if err != nil { return nil, err } finalLength := len(finalLines) needsMap := make([]lineMap, finalLength) for i := range needsMap { needsMap[i] = lineMap{i, i, nil, -1} } contents, err := file.Contents() if err != nil { return nil, err } b.q.Push(&queueItem{ nil, nil, c, path, contents, needsMap, 0, false, 0, }) items := make([]*queueItem, 0) for { items = items[:0] for { if b.q.Len() == 0 { return nil, errors.New("invalid state: no items left on the blame queue") } item := b.q.Pop() items = append(items, item) next := b.q.Peek() if next == nil || next.Hash != item.Commit.Hash { break } } finished, err := b.addBlames(items) if err != nil { return nil, err } if finished == true { break } } if err != nil { return nil, err } b.lineToCommit = make([]*object.Commit, finalLength) for i := range needsMap { b.lineToCommit[i] = needsMap[i].Commit } lines, err := newLines(finalLines, b.lineToCommit) if err != nil { return nil, err } return &BlameResult{ Path: path, Rev: c.Hash, Lines: lines, }, nil } // Line values represent the contents and author of a line in BlamedResult values. type Line struct { // Author is the email address of the last author that modified the line. Author string // AuthorName is the name of the last author that modified the line. AuthorName string // Text is the original text of the line. Text string // Date is when the original text of the line was introduced Date time.Time // Hash is the commit hash that introduced the original line Hash plumbing.Hash } func newLine(author, authorName, text string, date time.Time, hash plumbing.Hash) *Line { return &Line{ Author: author, AuthorName: authorName, Text: text, Hash: hash, Date: date, } } func newLines(contents []string, commits []*object.Commit) ([]*Line, error) { result := make([]*Line, 0, len(contents)) for i := range contents { result = append(result, newLine( commits[i].Author.Email, commits[i].Author.Name, contents[i], commits[i].Author.When, commits[i].Hash, )) } return result, nil } // this struct is internally used by the blame function to hold its // inputs, outputs and state. type blame struct { // the path of the file to blame path string // the commit of the final revision of the file to blame fRev *object.Commit // resolved lines lineToCommit []*object.Commit // queue of commits that need resolving q *priorityQueue } type lineMap struct { Orig, Cur int Commit *object.Commit FromParentNo int } func (b *blame) addBlames(curItems []*queueItem) (bool, error) { curItem := curItems[0] // Simple optimisation to merge paths, there is potential to go a bit further here and check for any duplicates // not only if they are all the same. if len(curItems) == 1 { curItems = nil } else if curItem.IdenticalToChild { allSame := true lenCurItems := len(curItems) lowestParentNo := curItem.ParentNo for i := 1; i < lenCurItems; i++ { if !curItems[i].IdenticalToChild || curItem.Child != curItems[i].Child { allSame = false break } lowestParentNo = min(lowestParentNo, curItems[i].ParentNo) } if allSame { curItem.Child.numParentsNeedResolving = curItem.Child.numParentsNeedResolving - lenCurItems + 1 curItems = nil // free the memory curItem.ParentNo = lowestParentNo // Now check if we can remove the parent completely for curItem.Child.IdenticalToChild && curItem.Child.MergedChildren == nil && curItem.Child.numParentsNeedResolving == 1 { oldChild := curItem.Child curItem.Child = oldChild.Child curItem.ParentNo = oldChild.ParentNo } } } // if we have more than 1 item for this commit, create a single needsMap if len(curItems) > 1 { curItem.MergedChildren = make([]childToNeedsMap, len(curItems)) for i, c := range curItems { curItem.MergedChildren[i] = childToNeedsMap{c.Child, c.NeedsMap, c.IdenticalToChild, c.ParentNo} } newNeedsMap := make([]lineMap, 0, len(curItem.NeedsMap)) newNeedsMap = append(newNeedsMap, curItems[0].NeedsMap...) for i := 1; i < len(curItems); i++ { cur := curItems[i].NeedsMap n := 0 // position in newNeedsMap c := 0 // position in current list for c < len(cur) { if n == len(newNeedsMap) { newNeedsMap = append(newNeedsMap, cur[c:]...) break } else if newNeedsMap[n].Cur == cur[c].Cur { n++ c++ } else if newNeedsMap[n].Cur < cur[c].Cur { n++ } else { newNeedsMap = append(newNeedsMap, cur[c]) newPos := len(newNeedsMap) - 1 for newPos > n { newNeedsMap[newPos-1], newNeedsMap[newPos] = newNeedsMap[newPos], newNeedsMap[newPos-1] newPos-- } } } } curItem.NeedsMap = newNeedsMap curItem.IdenticalToChild = false curItem.Child = nil curItems = nil // free the memory } parents, err := parentsContainingPath(curItem.path, curItem.Commit) if err != nil { return false, err } anyPushed := false for parnetNo, prev := range parents { currentHash, err := blobHash(curItem.path, curItem.Commit) if err != nil { return false, err } prevHash, err := blobHash(prev.Path, prev.Commit) if err != nil { return false, err } if currentHash == prevHash { if len(parents) == 1 && curItem.MergedChildren == nil && curItem.IdenticalToChild { // commit that has 1 parent and 1 child and is the same as both, bypass it completely b.q.Push(&queueItem{ Child: curItem.Child, Commit: prev.Commit, path: prev.Path, Contents: curItem.Contents, NeedsMap: curItem.NeedsMap, // reuse the NeedsMap as we are throwing away this item IdenticalToChild: true, ParentNo: curItem.ParentNo, }) } else { b.q.Push(&queueItem{ Child: curItem, Commit: prev.Commit, path: prev.Path, Contents: curItem.Contents, NeedsMap: append([]lineMap(nil), curItem.NeedsMap...), // create new slice and copy IdenticalToChild: true, ParentNo: parnetNo, }) curItem.numParentsNeedResolving++ } anyPushed = true continue } // get the contents of the file file, err := prev.Commit.File(prev.Path) if err != nil { return false, err } prevContents, err := file.Contents() if err != nil { return false, err } hunks := diff.Do(prevContents, curItem.Contents) prevl := -1 curl := -1 need := 0 getFromParent := make([]lineMap, 0) out: for h := range hunks { hLines := countLines(hunks[h].Text) for hl := 0; hl < hLines; hl++ { switch { case hunks[h].Type == diffmatchpatch.DiffEqual: prevl++ curl++ if curl == curItem.NeedsMap[need].Cur { // add to needs getFromParent = append(getFromParent, lineMap{curl, prevl, nil, -1}) // move to next need need++ if need >= len(curItem.NeedsMap) { break out } } case hunks[h].Type == diffmatchpatch.DiffInsert: curl++ if curl == curItem.NeedsMap[need].Cur { // the line we want is added, it may have been added here (or by another parent), skip it for now need++ if need >= len(curItem.NeedsMap) { break out } } case hunks[h].Type == diffmatchpatch.DiffDelete: prevl += hLines continue out default: return false, errors.New("invalid state: invalid hunk Type") } } } if len(getFromParent) > 0 { b.q.Push(&queueItem{ curItem, nil, prev.Commit, prev.Path, prevContents, getFromParent, 0, false, parnetNo, }) curItem.numParentsNeedResolving++ anyPushed = true } } curItem.Contents = "" // no longer need, free the memory if !anyPushed { return finishNeeds(curItem) } return false, nil } func finishNeeds(curItem *queueItem) (bool, error) { // any needs left in the needsMap must have come from this revision for i := range curItem.NeedsMap { if curItem.NeedsMap[i].Commit == nil { curItem.NeedsMap[i].Commit = curItem.Commit curItem.NeedsMap[i].FromParentNo = -1 } } if curItem.Child == nil && curItem.MergedChildren == nil { return true, nil } if curItem.MergedChildren == nil { return applyNeeds(curItem.Child, curItem.NeedsMap, curItem.IdenticalToChild, curItem.ParentNo) } for _, ctn := range curItem.MergedChildren { m := 0 // position in merged needs map p := 0 // position in parent needs map for p < len(ctn.NeedsMap) { if ctn.NeedsMap[p].Cur == curItem.NeedsMap[m].Cur { ctn.NeedsMap[p].Commit = curItem.NeedsMap[m].Commit m++ p++ } else if ctn.NeedsMap[p].Cur < curItem.NeedsMap[m].Cur { p++ } else { m++ } } finished, err := applyNeeds(ctn.Child, ctn.NeedsMap, ctn.IdenticalToChild, ctn.ParentNo) if finished || err != nil { return finished, err } } return false, nil } func applyNeeds(child *queueItem, needsMap []lineMap, identicalToChild bool, parentNo int) (bool, error) { if identicalToChild { for i := range child.NeedsMap { l := &child.NeedsMap[i] if l.Cur != needsMap[i].Cur || l.Orig != needsMap[i].Orig { return false, errors.New("needsMap isn't the same? Why not??") } if l.Commit == nil || parentNo < l.FromParentNo { l.Commit = needsMap[i].Commit l.FromParentNo = parentNo } } } else { i := 0 out: for j := range child.NeedsMap { l := &child.NeedsMap[j] for needsMap[i].Orig < l.Cur { i++ if i == len(needsMap) { break out } } if l.Cur == needsMap[i].Orig { if l.Commit == nil || parentNo < l.FromParentNo { l.Commit = needsMap[i].Commit l.FromParentNo = parentNo } } } } child.numParentsNeedResolving-- if child.numParentsNeedResolving == 0 { finished, err := finishNeeds(child) if finished || err != nil { return finished, err } } return false, nil } // String prints the results of a Blame using git-blame's style. func (b BlameResult) String() string { var buf bytes.Buffer // max line number length mlnl := len(strconv.Itoa(len(b.Lines))) // max author length mal := b.maxAuthorLength() format := fmt.Sprintf("%%s (%%-%ds %%s %%%dd) %%s\n", mal, mlnl) for ln := range b.Lines { _, _ = fmt.Fprintf(&buf, format, b.Lines[ln].Hash.String()[:8], b.Lines[ln].AuthorName, b.Lines[ln].Date.Format("2006-01-02 15:04:05 -0700"), ln+1, b.Lines[ln].Text) } return buf.String() } // utility function to calculate the number of runes needed // to print the longest author name in the blame of a file. func (b BlameResult) maxAuthorLength() int { m := 0 for ln := range b.Lines { m = max(m, utf8.RuneCountInString(b.Lines[ln].AuthorName)) } return m } func min(a, b int) int { if a < b { return a } return b } func max(a, b int) int { if a > b { return a } return b } type childToNeedsMap struct { Child *queueItem NeedsMap []lineMap IdenticalToChild bool ParentNo int } type queueItem struct { Child *queueItem MergedChildren []childToNeedsMap Commit *object.Commit path string Contents string NeedsMap []lineMap numParentsNeedResolving int IdenticalToChild bool ParentNo int } type priorityQueueImp []*queueItem func (pq *priorityQueueImp) Len() int { return len(*pq) } func (pq *priorityQueueImp) Less(i, j int) bool { return !(*pq)[i].Commit.Less((*pq)[j].Commit) } func (pq *priorityQueueImp) Swap(i, j int) { (*pq)[i], (*pq)[j] = (*pq)[j], (*pq)[i] } func (pq *priorityQueueImp) Push(x any) { *pq = append(*pq, x.(*queueItem)) } func (pq *priorityQueueImp) Pop() any { n := len(*pq) ret := (*pq)[n-1] (*pq)[n-1] = nil // ovoid memory leak *pq = (*pq)[0 : n-1] return ret } func (pq *priorityQueueImp) Peek() *object.Commit { if len(*pq) == 0 { return nil } return (*pq)[0].Commit } type priorityQueue priorityQueueImp func (pq *priorityQueue) Init() { heap.Init((*priorityQueueImp)(pq)) } func (pq *priorityQueue) Len() int { return (*priorityQueueImp)(pq).Len() } func (pq *priorityQueue) Push(c *queueItem) { heap.Push((*priorityQueueImp)(pq), c) } func (pq *priorityQueue) Pop() *queueItem { return heap.Pop((*priorityQueueImp)(pq)).(*queueItem) } func (pq *priorityQueue) Peek() *object.Commit { return (*priorityQueueImp)(pq).Peek() } type parentCommit struct { Commit *object.Commit Path string } func parentsContainingPath(path string, c *object.Commit) ([]parentCommit, error) { // TODO: benchmark this method making git.object.Commit.parent public instead of using // an iterator var result []parentCommit iter := c.Parents() for { parent, err := iter.Next() if err == io.EOF { return result, nil } if err != nil { return nil, err } if _, err := parent.File(path); err == nil { result = append(result, parentCommit{parent, path}) } else { // look for renames patch, err := parent.Patch(c) if err != nil { return nil, err } else if patch != nil { for _, fp := range patch.FilePatches() { from, to := fp.Files() if from != nil && to != nil && to.Path() == path { result = append(result, parentCommit{parent, from.Path()}) break } } } } } } func blobHash(path string, commit *object.Commit) (plumbing.Hash, error) { file, err := commit.File(path) if err != nil { return plumbing.ZeroHash, err } return file.Hash, nil }