diff options
author | Amine Hilaly <hilalyamine@gmail.com> | 2019-05-05 17:48:49 +0200 |
---|---|---|
committer | Amine Hilaly <hilalyamine@gmail.com> | 2019-05-05 18:16:10 +0200 |
commit | 2e17f371758ad25a3674d65ef0e8e32a4660e6d4 (patch) | |
tree | 7c4bfd33ae24f272df045583c4ace761c8dd4242 /util | |
parent | 537eddb97843a3f520fdedcd35f77b08880a4829 (diff) | |
download | git-bug-2e17f371758ad25a3674d65ef0e8e32a4660e6d4.tar.gz |
Add unicode control characters test case
Move `cleanupText` to utils/text/transform.go
`text.Cleanup`: removing unicode control characters except for those allowed by `text.Safe`
Add golang.org/x/text dependencies
fix text.Cleanup
Fix import panic
Diffstat (limited to 'util')
-rw-r--r-- | util/text/transform.go | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/util/text/transform.go b/util/text/transform.go new file mode 100644 index 00000000..59dc4e03 --- /dev/null +++ b/util/text/transform.go @@ -0,0 +1,31 @@ +package text + +import ( + "strings" + "unicode" + + "golang.org/x/text/runes" + "golang.org/x/text/transform" +) + +func Cleanup(text string) (string, error) { + // windows new line, Github, really ? + text = strings.Replace(text, "\r\n", "\n", -1) + + // remove all unicode control characters except + // '\n', '\r' and '\t' + t := runes.Remove(runes.Predicate(func(r rune) bool { + switch r { + case '\r', '\n', '\t': + return false + } + return unicode.IsControl(r) + })) + sanitized, _, err := transform.String(t, text) + if err != nil { + return "", err + } + + // trim extra new line not displayed in the github UI but still present in the data + return strings.TrimSpace(sanitized), nil +} |