From 27b67a493d3b0a4f15252b77b546115c00ad2226 Mon Sep 17 00:00:00 2001 From: Josh Betz Date: Mon, 31 Jul 2017 11:46:31 -0500 Subject: Normalize filenames before comparing. Some multibyte characters can have multiple representations. Before comparing strings, we need to normalize them. In this case we're normalizing to normalized form C, but it shouldn't matter as long as both strings are normalized to the same form. Fixes https://github.com/src-d/go-git/issues/495 --- utils/merkletrie/noder/path.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'utils/merkletrie/noder/path.go') diff --git a/utils/merkletrie/noder/path.go b/utils/merkletrie/noder/path.go index 85742db..d2e2932 100644 --- a/utils/merkletrie/noder/path.go +++ b/utils/merkletrie/noder/path.go @@ -3,6 +3,8 @@ package noder import ( "bytes" "strings" + + "golang.org/x/text/unicode/norm" ) // Path values represent a noder and its ancestors. The root goes first @@ -78,7 +80,11 @@ func (p Path) Compare(other Path) int { case i == len(p): return -1 default: - cmp := strings.Compare(p[i].Name(), other[i].Name()) + form := norm.Form(norm.NFC) + this := form.String(p[i].Name()) + that := form.String(other[i].Name()) + + cmp := strings.Compare(this, that) if cmp != 0 { return cmp } -- cgit