aboutsummaryrefslogtreecommitdiffstats
path: root/util/text
diff options
context:
space:
mode:
Diffstat (limited to 'util/text')
-rw-r--r--util/text/left_padded.go19
-rw-r--r--util/text/text.go306
-rw-r--r--util/text/text_test.go109
3 files changed, 342 insertions, 92 deletions
diff --git a/util/text/left_padded.go b/util/text/left_padded.go
index 729834db..eae65d34 100644
--- a/util/text/left_padded.go
+++ b/util/text/left_padded.go
@@ -3,25 +3,26 @@ package text
import (
"bytes"
"fmt"
+ "github.com/mattn/go-runewidth"
"strings"
)
-// LeftPadMaxLine pads a string on the left by a specified amount and pads the string on the right to fill the maxLength
+// LeftPadMaxLine pads a string on the left by a specified amount and pads the
+// string on the right to fill the maxLength
func LeftPadMaxLine(text string, length, leftPad int) string {
- runes := []rune(text)
+ var rightPart string = text
+ scrWidth := runewidth.StringWidth(text)
// truncate and ellipse if needed
- if len(runes)+leftPad > length {
- runes = append(runes[:(length-leftPad-1)], '…')
- }
-
- if len(runes)+leftPad < length {
- runes = append(runes, []rune(strings.Repeat(" ", length-len(runes)-leftPad))...)
+ if scrWidth+leftPad > length {
+ rightPart = runewidth.Truncate(text, length-leftPad, "…")
+ } else if scrWidth+leftPad < length {
+ rightPart = runewidth.FillRight(text, length-leftPad)
}
return fmt.Sprintf("%s%s",
strings.Repeat(" ", leftPad),
- string(runes),
+ rightPart,
)
}
diff --git a/util/text/text.go b/util/text/text.go
index cffb4ee2..81cc870b 100644
--- a/util/text/text.go
+++ b/util/text/text.go
@@ -1,12 +1,18 @@
package text
import (
- "bytes"
- "strings"
-
"github.com/mattn/go-runewidth"
+ "strings"
+ "unicode/utf8"
)
+// Force runewidth not to treat ambiguous runes as wide chars, so that things
+// like unicode ellipsis/up/down/left/right glyphs can have correct runewidth
+// and can be displayed correctly in terminals.
+func init() {
+ runewidth.DefaultCondition.EastAsianWidth = false
+}
+
// Wrap a text for an exact line size
// Handle properly terminal color escape code
func Wrap(text string, lineWidth int) (string, int) {
@@ -16,98 +22,248 @@ func Wrap(text string, lineWidth int) (string, int) {
// Wrap a text for an exact line size with a left padding
// Handle properly terminal color escape code
func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) {
- var textBuffer bytes.Buffer
- var lineBuffer bytes.Buffer
- nbLine := 1
- firstLine := true
+ var lines []string
+ nbLine := 0
pad := strings.Repeat(" ", leftPad)
// tabs are formatted as 4 spaces
- text = strings.Replace(text, "\t", " ", 4)
-
+ text = strings.Replace(text, "\t", " ", -1)
+ // NOTE: text is first segmented into lines so that softwrapLine can handle.
for _, line := range strings.Split(text, "\n") {
- spaceLeft := lineWidth - leftPad
-
- if !firstLine {
- textBuffer.WriteString("\n")
+ if line == "" || strings.TrimSpace(line) == "" {
+ lines = append(lines, "")
nbLine++
+ } else {
+ wrapped := softwrapLine(line, lineWidth-leftPad)
+ firstLine := true
+ for _, seg := range strings.Split(wrapped, "\n") {
+ if firstLine {
+ lines = append(lines, pad+strings.TrimRight(seg, " "))
+ firstLine = false
+ } else {
+ lines = append(lines, pad+strings.TrimSpace(seg))
+ }
+ nbLine++
+ }
}
+ }
+ return strings.Join(lines, "\n"), nbLine
+}
- firstWord := true
+// Break a line into several lines so that each line consumes at most
+// 'textWidth' cells. Lines break at groups of white spaces and multibyte
+// chars. Nothing is removed from the original text so that it behaves like a
+// softwrap.
+//
+// Required: The line shall not contain '\n'
+//
+// WRAPPING ALGORITHM: The line is broken into non-breakable chunks, then line
+// breaks ("\n") are inserted between these groups so that the total length
+// between breaks does not exceed the required width. Words that are longer than
+// the textWidth are broen into pieces no longer than textWidth.
+//
+func softwrapLine(line string, textWidth int) string {
+ // NOTE: terminal escapes are stripped out of the line so the algorithm is
+ // simpler. Do not try to mix them in the wrapping algorithm, as it can get
+ // complicated quickly.
+ line1, termEscapes := extractTermEscapes(line)
+
+ chunks := segmentLine(line1)
+ // Reverse the chunk array so we can use it as a stack.
+ for i, j := 0, len(chunks)-1; i < j; i, j = i+1, j-1 {
+ chunks[i], chunks[j] = chunks[j], chunks[i]
+ }
+ var line2 string = ""
+ var width int = 0
+ for len(chunks) > 0 {
+ thisWord := chunks[len(chunks)-1]
+ wl := wordLen(thisWord)
+ if width+wl <= textWidth {
+ line2 += chunks[len(chunks)-1]
+ chunks = chunks[:len(chunks)-1]
+ width += wl
+ if width == textWidth && len(chunks) > 0 {
+ // NOTE: new line begins when current line is full and there are more
+ // chunks to come.
+ line2 += "\n"
+ width = 0
+ }
+ } else if wl > textWidth {
+ // NOTE: By default, long words are splited to fill the remaining space.
+ // But if the long words is the first non-space word in the middle of the
+ // line, preceeding spaces shall not be counted in word spliting.
+ splitWidth := textWidth - width
+ if strings.HasSuffix(line2, "\n"+strings.Repeat(" ", width)) {
+ splitWidth += width
+ }
+ left, right := splitWord(chunks[len(chunks)-1], splitWidth)
+ chunks[len(chunks)-1] = right
+ line2 += left + "\n"
+ width = 0
+ } else {
+ line2 += "\n"
+ width = 0
+ }
+ }
- for _, word := range strings.Split(line, " ") {
- wordLength := wordLen(word)
+ line3 := applyTermEscapes(line2, termEscapes)
+ return line3
+}
- if !firstWord {
- lineBuffer.WriteString(" ")
- spaceLeft -= 1
+// EscapeItem: Storage of terminal escapes in a line. 'item' is the actural
+// escape command, and 'pos' is the index in the rune array where the 'item'
+// shall be inserted back. For example, the escape item in "F\x1b33mox" is
+// {"\x1b33m", 1}.
+type escapeItem struct {
+ item string
+ pos int
+}
- if spaceLeft <= 0 {
- textBuffer.WriteString(pad + strings.TrimRight(lineBuffer.String(), " "))
- textBuffer.WriteString("\n")
- lineBuffer.Reset()
- spaceLeft = lineWidth - leftPad
- nbLine++
- firstLine = false
- }
+// Extract terminal escapes out of a line, returns a new line without terminal
+// escapes and a slice of escape items. The terminal escapes can be inserted
+// back into the new line at rune index 'item.pos' to recover the original line.
+//
+// Required: The line shall not contain "\n"
+//
+func extractTermEscapes(line string) (string, []escapeItem) {
+ var termEscapes []escapeItem
+ var line1 string
+
+ pos := 0
+ item := ""
+ occupiedRuneCount := 0
+ inEscape := false
+ for i, r := range []rune(line) {
+ if r == '\x1b' {
+ pos = i
+ item = string(r)
+ inEscape = true
+ continue
+ }
+ if inEscape {
+ item += string(r)
+ if r == 'm' {
+ termEscapes = append(termEscapes, escapeItem{item, pos - occupiedRuneCount})
+ occupiedRuneCount += utf8.RuneCountInString(item)
+ inEscape = false
}
+ continue
+ }
+ line1 += string(r)
+ }
+
+ return line1, termEscapes
+}
- // Word fit in the current line
- if spaceLeft >= wordLength {
- lineBuffer.WriteString(word)
- spaceLeft -= wordLength
- firstWord = false
+// Apply the extracted terminal escapes to the edited line. The only edit
+// allowed is to insert "\n" like that in softwrapLine. Callers shall ensure
+// this since this function is not able to check it.
+func applyTermEscapes(line string, escapes []escapeItem) string {
+ if len(escapes) == 0 {
+ return line
+ }
+
+ var out string = ""
+
+ currPos := 0
+ currItem := 0
+ for _, r := range line {
+ if currItem < len(escapes) && currPos == escapes[currItem].pos {
+ // NOTE: We avoid terminal escapes at the end of a line by move them one
+ // pass the end of line, so that algorithms who trim right spaces are
+ // happy. But algorithms who trim left spaces are still unhappy.
+ if r == '\n' {
+ out += "\n" + escapes[currItem].item
} else {
- // Break a word longer than a line
- if wordLength > lineWidth {
- for wordLength > 0 && wordLen(word) > 0 {
- l := minInt(spaceLeft, wordLength)
- part, leftover := splitWord(word, l)
- word = leftover
- wordLength = wordLen(word)
-
- lineBuffer.WriteString(part)
- textBuffer.WriteString(pad)
- textBuffer.Write(lineBuffer.Bytes())
- lineBuffer.Reset()
-
- spaceLeft -= l
-
- if spaceLeft <= 0 {
- textBuffer.WriteString("\n")
- nbLine++
- spaceLeft = lineWidth - leftPad
- }
-
- if wordLength <= 0 {
- break
- }
- }
- } else {
- // Normal break
- textBuffer.WriteString(pad + strings.TrimRight(lineBuffer.String(), " "))
- textBuffer.WriteString("\n")
- lineBuffer.Reset()
- lineBuffer.WriteString(word)
- firstWord = false
- spaceLeft = lineWidth - leftPad - wordLength
- nbLine++
- }
+ out += escapes[currItem].item + string(r)
+ currPos++
}
+ currItem++
+ } else {
+ if r != '\n' {
+ currPos++
+ }
+ out += string(r)
}
+ }
- if lineBuffer.Len() > 0 {
- textBuffer.WriteString(pad + strings.TrimRight(lineBuffer.String(), " "))
- lineBuffer.Reset()
- }
+ return out
+}
- firstLine = false
+// Segment a line into chunks, where each chunk consists of chars with the same
+// type and is not breakable.
+func segmentLine(s string) []string {
+ var chunks []string
+
+ var word string
+ wordType := none
+ flushWord := func() {
+ chunks = append(chunks, word)
+ word = ""
+ wordType = none
}
- return textBuffer.String(), nbLine
+ for _, r := range s {
+ // A WIDE_CHAR itself constitutes a chunk.
+ thisType := runeType(r)
+ if thisType == wideChar {
+ if wordType != none {
+ flushWord()
+ }
+ chunks = append(chunks, string(r))
+ continue
+ }
+ // Other type of chunks starts with a char of that type, and ends with a
+ // char with different type or end of string.
+ if thisType != wordType {
+ if wordType != none {
+ flushWord()
+ }
+ word = string(r)
+ wordType = thisType
+ } else {
+ word += string(r)
+ }
+ }
+ if word != "" {
+ flushWord()
+ }
+
+ return chunks
+}
+
+// Rune categories
+//
+// These categories are so defined that each category forms a non-breakable
+// chunk. It IS NOT the same as unicode code point categories.
+//
+const (
+ none int = iota
+ wideChar
+ invisible
+ shortUnicode
+ space
+ visibleAscii
+)
+
+// Determine the category of a rune.
+func runeType(r rune) int {
+ rw := runewidth.RuneWidth(r)
+ if rw > 1 {
+ return wideChar
+ } else if rw == 0 {
+ return invisible
+ } else if r > 127 {
+ return shortUnicode
+ } else if r == ' ' {
+ return space
+ } else {
+ return visibleAscii
+ }
}
-// wordLen return the length of a word, while ignoring the terminal escape sequences
+// wordLen return the length of a word, while ignoring the terminal escape
+// sequences
func wordLen(word string) int {
length := 0
escape := false
@@ -116,11 +272,9 @@ func wordLen(word string) int {
if char == '\x1b' {
escape = true
}
-
if !escape {
length += runewidth.RuneWidth(rune(char))
}
-
if char == 'm' {
escape = false
}
diff --git a/util/text/text_test.go b/util/text/text_test.go
index f5b15a43..9bf21164 100644
--- a/util/text/text_test.go
+++ b/util/text/text_test.go
@@ -1,6 +1,7 @@
package text
import (
+ "reflect"
"strings"
"testing"
)
@@ -43,7 +44,7 @@ func TestWrap(t *testing.T) {
// A tab counts as 4 characters.
{
"foo\nb\t r\n baz",
- "foo\nb\n r\n baz",
+ "foo\nb\nr\n baz",
4,
},
// Trailing whitespace is removed after used for wrapping.
@@ -86,19 +87,31 @@ func TestWrap(t *testing.T) {
// Complete example:
{
" This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* baz \nBAM ",
- " This\nis a\nlist:\n\n\n *\nfoo\n *\nbar\n\n\n *\nbaz\nBAM\n",
+ " This\nis a\nlist:\n\n *\nfoo\n *\nbar\n\n\n *\nbaz\nBAM\n",
6,
},
// Handle chinese (wide characters)
{
- "婞一枳郲逴靲屮蜧曀殳,掫乇峔掮傎溒兀緉冘仜。",
- "婞一枳郲逴靲\n屮蜧曀殳,掫\n乇峔掮傎溒兀\n緉冘仜。",
+ "一只敏捷的狐狸跳过了一只懒狗。",
+ "一只敏捷的狐\n狸跳过了一只\n懒狗。",
12,
},
// Handle chinese with colors
{
- "婞一枳郲逴\x1b[31m靲屮蜧曀殳,掫乇峔掮傎溒\x1b[0m兀緉冘仜。",
- "婞一枳郲逴\x1b[31m靲\n屮蜧曀殳,掫\n乇峔掮傎溒\x1b[0m兀\n緉冘仜。",
+ "一只敏捷的\x1b[31m狐狸跳过\x1b[0m了一只懒狗。",
+ "一只敏捷的\x1b[31m狐\n狸跳过\x1b[0m了一只\n懒狗。",
+ 12,
+ },
+ // Handle mixed wide and short characters
+ {
+ "敏捷 A quick 的狐狸 fox 跳过 jumps over a lazy 了一只懒狗 dog。",
+ "敏捷 A quick\n的狐狸 fox\n跳过 jumps\nover a lazy\n了一只懒狗\ndog。",
+ 12,
+ },
+ // Handle mixed wide and short characters with color
+ {
+ "敏捷 A \x1b31mquick 的狐狸 fox 跳\x1b0m过 jumps over a lazy 了一只懒狗 dog。",
+ "敏捷 A \x1b31mquick\n的狐狸 fox\n跳\x1b0m过 jumps\nover a lazy\n了一只懒狗\ndog。",
12,
},
}
@@ -106,7 +119,7 @@ func TestWrap(t *testing.T) {
for i, tc := range cases {
actual, lines := Wrap(tc.Input, tc.Lim)
if actual != tc.Output {
- t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n`\n%s`",
+ t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n\n`%s`",
i, tc.Input, tc.Output, actual)
}
@@ -144,6 +157,14 @@ func TestWrapLeftPadded(t *testing.T) {
蚗佶庂咺丌,輀鈁乇彽洢溦洰氶乇构碨洐巿阹。`,
59, 4,
},
+ // Handle long unbreakable words in a full stentence
+ {
+ "OT: there are alternatives to maintainer-/user-set priority, e.g. \"[user pain](http://www.lostgarden.com/2008/05/improving-bug-triage-with-user-pain.html)\".",
+ ` OT: there are alternatives to maintainer-/user-set
+ priority, e.g. "[user pain](http://www.lostgarden.com/
+ 2008/05/improving-bug-triage-with-user-pain.html)".`,
+ 58, 4,
+ },
}
for i, tc := range cases {
@@ -273,3 +294,77 @@ func TestSplitWord(t *testing.T) {
}
}
}
+
+func TestExtractApplyTermEscapes(t *testing.T) {
+ cases := []struct {
+ Input string
+ Output string
+ TermEscapes []escapeItem
+ }{
+ // A plain ascii line with escapes.
+ {
+ "This \x1b[31mis an\x1b[0m example.",
+ "This is an example.",
+ []escapeItem{{"\x1b[31m", 5}, {"\x1b[0m", 10}},
+ },
+ // A plain wide line with escapes.
+ {
+ "一只敏捷\x1b[31m的狐狸\x1b[0m跳过了一只懒狗。",
+ "一只敏捷的狐狸跳过了一只懒狗。",
+ []escapeItem{{"\x1b[31m", 4}, {"\x1b[0m", 7}},
+ },
+ // A normal-wide mixed line with escapes.
+ {
+ "一只 A Quick 敏捷\x1b[31m的狐 Fox 狸\x1b[0m跳过了Dog一只懒狗。",
+ "一只 A Quick 敏捷的狐 Fox 狸跳过了Dog一只懒狗。",
+ []escapeItem{{"\x1b[31m", 13}, {"\x1b[0m", 21}},
+ },
+ }
+
+ for i, tc := range cases {
+ line2, escapes := extractTermEscapes(tc.Input)
+ if line2 != tc.Output || !reflect.DeepEqual(escapes, tc.TermEscapes) {
+ t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\nLine: `%s`\nEscapes: `%+v`\n\nActual Output:\n\nLine: `%s`\nEscapes: `%+v`\n\n",
+ i, tc.Input, tc.Output, tc.TermEscapes, line2, escapes)
+ }
+ line3 := applyTermEscapes(line2, escapes)
+ if line3 != tc.Input {
+ t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Result:\n\n`%s`\n\nActual Result:\n\n`%s`\n\n",
+ i, tc.Input, tc.Input, line3)
+ }
+ }
+}
+
+func TestSegmentLines(t *testing.T) {
+ cases := []struct {
+ Input string
+ Output []string
+ }{
+ // A plain ascii line with escapes.
+ {
+ "This is an example.",
+ []string{"This", " ", "is", " ", "an", " ", "example."},
+ },
+ // A plain wide line with escapes.
+ {
+ "一只敏捷的狐狸跳过了一只懒狗。",
+ []string{"一", "只", "敏", "捷", "的", "狐", "狸", "跳", "过",
+ "了", "一", "只", "懒", "狗", "。"},
+ },
+ // A complex stentence.
+ {
+ "This is a 'complex' example, where 一只 and English 混合了。",
+ []string{"This", " ", "is", " ", "a", " ", "'complex'", " ", "example,",
+ " ", "where", " ", "一", "只", " ", "and", " ", "English", " ", "混",
+ "合", "了", "。"},
+ },
+ }
+
+ for i, tc := range cases {
+ chunks := segmentLine(tc.Input)
+ if !reflect.DeepEqual(chunks, tc.Output) {
+ t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`[%s]`\n\nActual Output:\n\n`[%s]`\n\n",
+ i, tc.Input, strings.Join(tc.Output, ", "), strings.Join(chunks, ", "))
+ }
+ }
+}