Implement almost full CJK support.

Display of CJK contents are supported. Adding CJK tags are problematic.
author: Yang Zhang <yang_zhang@iapcm.ac.cn> 2018-12-26 23:05:58 +0800
committer: Yang Zhang <yang_zhang@iapcm.ac.cn> 2018-12-26 23:05:58 +0800
commit: d31891504d201423f512d897d44dd71b06dad93d (patch)
tree: 0d5d5ba5597cb0fcb14a1adcc23ca6eb79c95fe6 /util/text
parent: 32b3e263fc8443f6089b9de8fbce833369461982 (diff)
parent: 3fa2d15fb899c937900083fd7de696599371ce47 (diff)
download: git-bug-d31891504d201423f512d897d44dd71b06dad93d.tar.gz
3 files changed, 89 insertions, 207 deletions
diff --git a/util/text/left_padded.go b/util/text/left_padded.go
index 729834db..3b8e13c6 100644
--- a/util/text/left_padded.go
+++ b/util/text/left_padded.go
@@ -3,25 +3,26 @@ package text
 import (
 	"bytes"
 	"fmt"
+	"github.com/mattn/go-runewidth"
 	"strings"
 )
 
-// LeftPadMaxLine pads a string on the left by a specified amount and pads the string on the right to fill the maxLength
+// LeftPadMaxLine pads a string on the left by a specified amount and pads the
+// string on the right to fill the maxLength
 func LeftPadMaxLine(text string, length, leftPad int) string {
-	runes := []rune(text)
+	rightPart := text
 
+	scrWidth := runewidth.StringWidth(text)
 	// truncate and ellipse if needed
-	if len(runes)+leftPad > length {
-		runes = append(runes[:(length-leftPad-1)], '…')
-	}
-
-	if len(runes)+leftPad < length {
-		runes = append(runes, []rune(strings.Repeat(" ", length-len(runes)-leftPad))...)
+	if scrWidth+leftPad > length {
+		rightPart = runewidth.Truncate(text, length-leftPad, "…")
+	} else if scrWidth+leftPad < length {
+		rightPart = runewidth.FillRight(text, length-leftPad)
 	}
 
 	return fmt.Sprintf("%s%s",
 		strings.Repeat(" ", leftPad),
-		string(runes),
+		rightPart,
 	)
 }
 
diff --git a/util/text/text.go b/util/text/text.go
index cffb4ee2..ee9d278c 100644
--- a/util/text/text.go
+++ b/util/text/text.go
@@ -2,9 +2,8 @@ package text
 
 import (
 	"bytes"
-	"strings"
-
 	"github.com/mattn/go-runewidth"
+	"strings"
 )
 
 // Wrap a text for an exact line size
@@ -17,97 +16,99 @@ func Wrap(text string, lineWidth int) (string, int) {
 // Handle properly terminal color escape code
 func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) {
 	var textBuffer bytes.Buffer
-	var lineBuffer bytes.Buffer
-	nbLine := 1
-	firstLine := true
+	nbLine := 0
 	pad := strings.Repeat(" ", leftPad)
 
 	// tabs are formatted as 4 spaces
 	text = strings.Replace(text, "\t", "    ", 4)
+	wrapped := wrapText(text, lineWidth-leftPad)
+	for _, line := range strings.Split(wrapped, "\n") {
+		textBuffer.WriteString(pad + line)
+		textBuffer.WriteString("\n")
+		nbLine++
+	}
+	return textBuffer.String(), nbLine
+}
 
-	for _, line := range strings.Split(text, "\n") {
-		spaceLeft := lineWidth - leftPad
-
-		if !firstLine {
-			textBuffer.WriteString("\n")
-			nbLine++
-		}
-
-		firstWord := true
-
-		for _, word := range strings.Split(line, " ") {
-			wordLength := wordLen(word)
-
-			if !firstWord {
-				lineBuffer.WriteString(" ")
-				spaceLeft -= 1
-
-				if spaceLeft <= 0 {
-					textBuffer.WriteString(pad + strings.TrimRight(lineBuffer.String(), " "))
-					textBuffer.WriteString("\n")
-					lineBuffer.Reset()
-					spaceLeft = lineWidth - leftPad
-					nbLine++
-					firstLine = false
-				}
+// Wrap text so that each line fills at most w cells. Lines break at word
+// boundary or multibyte chars.
+//
+// Wrapping Algorithm: Treat the text as a sequence of words, with each word be
+// an alphanumeric word, or a multibyte char. We scan through the text and
+// construct the word, and flush the word into the paragraph once a word is
+// ready. A word is ready when a word boundary is detected: a boundary char such
+// as '\n', '\t', and ' ' is encountered; a multibyte char is found; or a
+// multibyte to single-byte switch is encountered. '\n' is handled in a special
+// manner.
+func wrapText(s string, w int) string {
+	word := ""
+	out := ""
+
+	width := 0
+	firstWord := true
+	isMultibyteWord := false
+
+	flushWord := func() {
+		wl := wordLen(word)
+		if isMultibyteWord {
+			if width+wl > w {
+				out += "\n" + word
+				width = wl
+			} else {
+				out += word
+				width += wl
 			}
-
-			// Word fit in the current line
-			if spaceLeft >= wordLength {
-				lineBuffer.WriteString(word)
-				spaceLeft -= wordLength
-				firstWord = false
+		} else {
+			if width == 0 {
+				out += word
+				width += wl
+			} else if width+wl+1 > w {
+				out += "\n" + word
+				width = wl
 			} else {
-				// Break a word longer than a line
-				if wordLength > lineWidth {
-					for wordLength > 0 && wordLen(word) > 0 {
-						l := minInt(spaceLeft, wordLength)
-						part, leftover := splitWord(word, l)
-						word = leftover
-						wordLength = wordLen(word)
-
-						lineBuffer.WriteString(part)
-						textBuffer.WriteString(pad)
-						textBuffer.Write(lineBuffer.Bytes())
-						lineBuffer.Reset()
-
-						spaceLeft -= l
-
-						if spaceLeft <= 0 {
-							textBuffer.WriteString("\n")
-							nbLine++
-							spaceLeft = lineWidth - leftPad
-						}
-
-						if wordLength <= 0 {
-							break
-						}
-					}
-				} else {
-					// Normal break
-					textBuffer.WriteString(pad + strings.TrimRight(lineBuffer.String(), " "))
-					textBuffer.WriteString("\n")
-					lineBuffer.Reset()
-					lineBuffer.WriteString(word)
-					firstWord = false
-					spaceLeft = lineWidth - leftPad - wordLength
-					nbLine++
-				}
+				out += " " + word
+				width += wl + 1
 			}
 		}
+		word = ""
+	}
 
-		if lineBuffer.Len() > 0 {
-			textBuffer.WriteString(pad + strings.TrimRight(lineBuffer.String(), " "))
-			lineBuffer.Reset()
+	for _, r := range []rune(s) {
+		cw := runewidth.RuneWidth(r)
+		if firstWord {
+			word = string(r)
+			isMultibyteWord = cw > 1
+			firstWord = false
+			continue
+		}
+		if r == '\n' {
+			flushWord()
+			out += "\n"
+			width = 0
+		} else if r == ' ' || r == '\t' {
+			flushWord()
+		} else if cw > 1 {
+			flushWord()
+			word = string(r)
+			isMultibyteWord = true
+			word = string(r)
+		} else if cw == 1 && isMultibyteWord {
+			flushWord()
+			word = string(r)
+			isMultibyteWord = false
+		} else {
+			word += string(r)
 		}
-
-		firstLine = false
 	}
+	// The text may end without newlines, ensure flushing it or we can lose the
+	// last word.
+	flushWord()
 
-	return textBuffer.String(), nbLine
+	return out
 }
 
-// wordLen return the length of a word, while ignoring the terminal escape sequences
+// wordLen return the length of a word, while ignoring the terminal escape
+// sequences
 func wordLen(word string) int {
 	length := 0
 	escape := false
@@ -116,11 +117,9 @@ func wordLen(word string) int {
 		if char == '\x1b' {
 			escape = true
 		}
-
 		if !escape {
 			length += runewidth.RuneWidth(rune(char))
 		}
-
 		if char == 'm' {
 			escape = false
 		}
@@ -128,51 +127,3 @@ func wordLen(word string) int {
 
 	return length
 }
-
-// splitWord split a word at the given length, while ignoring the terminal escape sequences
-func splitWord(word string, length int) (string, string) {
-	runes := []rune(word)
-	var result []rune
-	added := 0
-	escape := false
-
-	if length == 0 {
-		return "", word
-	}
-
-	for _, r := range runes {
-		if r == '\x1b' {
-			escape = true
-		}
-
-		width := runewidth.RuneWidth(r)
-		if width+added > length {
-			// wide character made the length overflow
-			break
-		}
-
-		result = append(result, r)
-
-		if !escape {
-			added += width
-			if added >= length {
-				break
-			}
-		}
-
-		if r == 'm' {
-			escape = false
-		}
-	}
-
-	leftover := runes[len(result):]
-
-	return string(result), string(leftover)
-}
-
-func minInt(a, b int) int {
-	if a > b {
-		return b
-	}
-	return a
-}
diff --git a/util/text/text_test.go b/util/text/text_test.go
index f5b15a43..c70d2ccd 100644
--- a/util/text/text_test.go
+++ b/util/text/text_test.go
@@ -203,73 +203,3 @@ func TestWordLen(t *testing.T) {
 		}
 	}
 }
-
-func TestSplitWord(t *testing.T) {
-	cases := []struct {
-		Input            string
-		Length           int
-		Result, Leftover string
-	}{
-		// A simple word passes through.
-		{
-			"foo",
-			4,
-			"foo", "",
-		},
-		// Cut at the right place
-		{
-			"foobarHoy",
-			4,
-			"foob", "arHoy",
-		},
-		// A simple word passes through with colors
-		{
-			"\x1b[31mbar\x1b[0m",
-			4,
-			"\x1b[31mbar\x1b[0m", "",
-		},
-		// Cut at the right place with colors
-		{
-			"\x1b[31mfoobarHoy\x1b[0m",
-			4,
-			"\x1b[31mfoob", "arHoy\x1b[0m",
-		},
-		// Handle prefix and suffix properly
-		{
-			"foo\x1b[31mfoobarHoy\x1b[0mbaaar",
-			4,
-			"foo\x1b[31mf", "oobarHoy\x1b[0mbaaar",
-		},
-		// Cut properly with length = 0
-		{
-			"foo",
-			0,
-			"", "foo",
-		},
-		// Handle chinese
-		{
-			"快檢什麼望對",
-			4,
-			"快檢", "什麼望對",
-		},
-		{
-			"快檢什麼望對",
-			5,
-			"快檢", "什麼望對",
-		},
-		// Handle chinese with colors
-		{
-			"快\x1b[31m檢什麼\x1b[0m望對",
-			4,
-			"快\x1b[31m檢", "什麼\x1b[0m望對",
-		},
-	}
-
-	for i, tc := range cases {
-		result, leftover := splitWord(tc.Input, tc.Length)
-		if result != tc.Result || leftover != tc.Leftover {
-			t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s` - `%s`\n\nActual Output:\n\n`%s` - `%s`",
-				i, tc.Input, tc.Result, tc.Leftover, result, leftover)
-		}
-	}
-}
author	Yang Zhang <yang_zhang@iapcm.ac.cn>	2018-12-26 23:05:58 +0800
committer	Yang Zhang <yang_zhang@iapcm.ac.cn>	2018-12-26 23:05:58 +0800
commit	d31891504d201423f512d897d44dd71b06dad93d (patch)
tree	0d5d5ba5597cb0fcb14a1adcc23ca6eb79c95fe6 /util/text
parent	32b3e263fc8443f6089b9de8fbce833369461982 (diff)
parent	3fa2d15fb899c937900083fd7de696599371ce47 (diff)
download	git-bug-d31891504d201423f512d897d44dd71b06dad93d.tar.gz