aboutsummaryrefslogtreecommitdiffstats
path: root/util/text/transform.go
diff options
context:
space:
mode:
authorAmine Hilaly <hilalyamine@gmail.com>2019-05-05 17:48:49 +0200
committerAmine Hilaly <hilalyamine@gmail.com>2019-05-05 18:16:10 +0200
commit2e17f371758ad25a3674d65ef0e8e32a4660e6d4 (patch)
tree7c4bfd33ae24f272df045583c4ace761c8dd4242 /util/text/transform.go
parent537eddb97843a3f520fdedcd35f77b08880a4829 (diff)
downloadgit-bug-2e17f371758ad25a3674d65ef0e8e32a4660e6d4.tar.gz
Add unicode control characters test case
Move `cleanupText` to utils/text/transform.go `text.Cleanup`: removing unicode control characters except for those allowed by `text.Safe` Add golang.org/x/text dependencies fix text.Cleanup Fix import panic
Diffstat (limited to 'util/text/transform.go')
-rw-r--r--util/text/transform.go31
1 files changed, 31 insertions, 0 deletions
diff --git a/util/text/transform.go b/util/text/transform.go
new file mode 100644
index 00000000..59dc4e03
--- /dev/null
+++ b/util/text/transform.go
@@ -0,0 +1,31 @@
+package text
+
+import (
+ "strings"
+ "unicode"
+
+ "golang.org/x/text/runes"
+ "golang.org/x/text/transform"
+)
+
+func Cleanup(text string) (string, error) {
+ // windows new line, Github, really ?
+ text = strings.Replace(text, "\r\n", "\n", -1)
+
+ // remove all unicode control characters except
+ // '\n', '\r' and '\t'
+ t := runes.Remove(runes.Predicate(func(r rune) bool {
+ switch r {
+ case '\r', '\n', '\t':
+ return false
+ }
+ return unicode.IsControl(r)
+ }))
+ sanitized, _, err := transform.String(t, text)
+ if err != nil {
+ return "", err
+ }
+
+ // trim extra new line not displayed in the github UI but still present in the data
+ return strings.TrimSpace(sanitized), nil
+}