From 2e17f371758ad25a3674d65ef0e8e32a4660e6d4 Mon Sep 17 00:00:00 2001 From: Amine Hilaly Date: Sun, 5 May 2019 17:48:49 +0200 Subject: Add unicode control characters test case Move `cleanupText` to utils/text/transform.go `text.Cleanup`: removing unicode control characters except for those allowed by `text.Safe` Add golang.org/x/text dependencies fix text.Cleanup Fix import panic --- util/text/transform.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 util/text/transform.go (limited to 'util/text') diff --git a/util/text/transform.go b/util/text/transform.go new file mode 100644 index 00000000..59dc4e03 --- /dev/null +++ b/util/text/transform.go @@ -0,0 +1,31 @@ +package text + +import ( + "strings" + "unicode" + + "golang.org/x/text/runes" + "golang.org/x/text/transform" +) + +func Cleanup(text string) (string, error) { + // windows new line, Github, really ? + text = strings.Replace(text, "\r\n", "\n", -1) + + // remove all unicode control characters except + // '\n', '\r' and '\t' + t := runes.Remove(runes.Predicate(func(r rune) bool { + switch r { + case '\r', '\n', '\t': + return false + } + return unicode.IsControl(r) + })) + sanitized, _, err := transform.String(t, text) + if err != nil { + return "", err + } + + // trim extra new line not displayed in the github UI but still present in the data + return strings.TrimSpace(sanitized), nil +} -- cgit