aboutsummaryrefslogtreecommitdiffstats
path: root/plumbing/object/difftree.go
diff options
context:
space:
mode:
authorMiguel Molina <miguel@erizocosmi.co>2020-04-23 16:51:56 +0200
committerMiguel Molina <miguel@erizocosmi.co>2020-04-23 17:22:39 +0200
commit90696f07f3a64243c6f33963f5267ab98622db9f (patch)
tree29124a96a4a34927fc48038e2961c4461129c8db /plumbing/object/difftree.go
parentbb3a1bfcc62224001d577b933355661259b80854 (diff)
downloadgo-git-90696f07f3a64243c6f33963f5267ab98622db9f.tar.gz
plumbing: detect renames by hash and similar content in diff tree
This commit implements the rename detection algorithms used in the JGit implementation. Given a list of changes, additions and deletions are extracted and matched in two ways: - By exact hash content: all additions and deletions are grouped by the content hash and paired with the best match based on the file mode and file path. All the files that cannot be paired are kept as regular deletions and additions. - By similar content: a matrix of addition and deletion pairs with all possible combinations is created and scored by how similar the content is between both files as well as how similar the file path is. The pairs with the best score and whose score is equal or greater than a threshold are paired and turned into a rename. All the files that cannot be paired are kept as regular deletions and additions. DiffTree and DiffTreeContext will not return the changes with renames detected for compatibility reasons, although this will change in v6 so that detecting renames is the default behaviour. A new function DiffTreeWithOptions has been added to configure the parameters for the rename detection to control the score threshold, the limit of renames and whether to use similar content detection in the detection. More information: - https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java - https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java - https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
Diffstat (limited to 'plumbing/object/difftree.go')
-rw-r--r--plumbing/object/difftree.go67
1 files changed, 64 insertions, 3 deletions
diff --git a/plumbing/object/difftree.go b/plumbing/object/difftree.go
index 72411a5..7c22227 100644
--- a/plumbing/object/difftree.go
+++ b/plumbing/object/difftree.go
@@ -10,14 +10,62 @@ import (
// DiffTree compares the content and mode of the blobs found via two
// tree objects.
+// DiffTree does not perform rename detection, use DiffTreeWithOptions
+// instead to detect renames.
func DiffTree(a, b *Tree) (Changes, error) {
return DiffTreeContext(context.Background(), a, b)
}
-// DiffTree compares the content and mode of the blobs found via two
+// DiffTreeContext compares the content and mode of the blobs found via two
// tree objects. Provided context must be non-nil.
-// An error will be return if context expires
+// An error will be returned if context expires.
func DiffTreeContext(ctx context.Context, a, b *Tree) (Changes, error) {
+ return DiffTreeWithOptions(ctx, a, b, nil)
+}
+
+// DiffTreeOptions are the configurable options when performing a diff tree.
+type DiffTreeOptions struct {
+ // DetectRenames is whether the diff tree will use rename detection.
+ DetectRenames bool
+ // RenameScore is the threshold to of similarity between files to consider
+ // that a pair of delete and insert are a rename. The number must be
+ // exactly between 0 and 100.
+ RenameScore uint
+ // RenameLimit is the maximum amount of files that can be compared when
+ // detecting renames. The number of comparisons that have to be performed
+ // is equal to the number of deleted files * the number of added files.
+ // That means, that if 100 files were deleted and 50 files were added, 5000
+ // file comparisons may be needed. So, if the rename limit is 50, the number
+ // of both deleted and added needs to be equal or less than 50.
+ // A value of 0 means no limit.
+ RenameLimit uint
+ // OnlyExactRenames performs only detection of exact renames and will not perform
+ // any detection of renames based on file similarity.
+ OnlyExactRenames bool
+}
+
+// DefaultDiffTreeOptions are the default and recommended options for the
+// diff tree.
+var DefaultDiffTreeOptions = &DiffTreeOptions{
+ DetectRenames: true,
+ RenameScore: 60,
+ RenameLimit: 0,
+ OnlyExactRenames: false,
+}
+
+// DiffTreeWithOptions compares the content and mode of the blobs found
+// via two tree objects with the given options. The provided context
+// must be non-nil.
+// If no options are passed, no rename detection will be performed. The
+// recommended options are DefaultDiffTreeOptions.
+// An error will be returned if the context expires.
+// This function will be deprecated and removed in v6 so the default
+// behaviour of DiffTree is to detect renames.
+func DiffTreeWithOptions(
+ ctx context.Context,
+ a, b *Tree,
+ opts *DiffTreeOptions,
+) (Changes, error) {
from := NewTreeRootNode(a)
to := NewTreeRootNode(b)
@@ -33,5 +81,18 @@ func DiffTreeContext(ctx context.Context, a, b *Tree) (Changes, error) {
return nil, err
}
- return newChanges(merkletrieChanges)
+ changes, err := newChanges(merkletrieChanges)
+ if err != nil {
+ return nil, err
+ }
+
+ if opts == nil {
+ opts = new(DiffTreeOptions)
+ }
+
+ if opts.DetectRenames {
+ return DetectRenames(changes, opts)
+ }
+
+ return changes, nil
}