aboutsummaryrefslogtreecommitdiffstats
path: root/util/text
diff options
context:
space:
mode:
authorYang Zhang <yang_zhang@iapcm.ac.cn>2018-12-31 01:57:10 +0800
committerYang Zhang <yang_zhang@iapcm.ac.cn>2018-12-31 01:57:10 +0800
commitf22f9b7083ff65fb7abe00ea2fb7343a1b68c59d (patch)
tree0a4688e329c450bd8859d72955e9123b279c178b /util/text
parent467ab5b68ef3b0b9694f8987778d0ec0859a0201 (diff)
downloadgit-bug-f22f9b7083ff65fb7abe00ea2fb7343a1b68c59d.tar.gz
Implement a new wrap algorithm and pass all text tests
Diffstat (limited to 'util/text')
-rw-r--r--util/text/text.go239
-rw-r--r--util/text/text_test.go15
2 files changed, 177 insertions, 77 deletions
diff --git a/util/text/text.go b/util/text/text.go
index ad920bd8..0447bde1 100644
--- a/util/text/text.go
+++ b/util/text/text.go
@@ -1,9 +1,9 @@
package text
import (
- "bytes"
"github.com/mattn/go-runewidth"
"strings"
+ "unicode/utf8"
)
// Wrap a text for an exact line size
@@ -15,96 +15,195 @@ func Wrap(text string, lineWidth int) (string, int) {
// Wrap a text for an exact line size with a left padding
// Handle properly terminal color escape code
func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) {
- var textBuffer bytes.Buffer
- nbLine := 0
pad := strings.Repeat(" ", leftPad)
+ var lines []string
+ nbLine := 0
// tabs are formatted as 4 spaces
- text = strings.Replace(text, "\t", " ", 4)
- wrapped := wrapText(text, lineWidth-leftPad)
- for _, line := range strings.Split(wrapped, "\n") {
- textBuffer.WriteString(pad + line)
- textBuffer.WriteString("\n")
- nbLine++
+ text = strings.Replace(text, "\t", " ", -1)
+ for _, line := range strings.Split(text, "\n") {
+ if line == "" || strings.TrimSpace(line) == "" {
+ lines = append(lines, "")
+ nbLine++
+ } else {
+ wrapped := softwrapLine(line, lineWidth-leftPad)
+ firstLine := true
+ for _, seg := range strings.Split(wrapped, "\n") {
+ if firstLine {
+ lines = append(lines, pad+strings.TrimRight(seg, " "))
+ firstLine = false
+ } else {
+ lines = append(lines, pad+strings.TrimSpace(seg))
+ }
+ nbLine++
+ }
+ }
}
- return textBuffer.String(), nbLine
+ return strings.Join(lines, "\n"), nbLine
}
-// Wrap text so that each line fills at most w cells. Lines break at word
-// boundary or multibyte chars.
-//
-// Wrapping Algorithm: Treat the text as a sequence of words, with each word be
-// an alphanumeric word, or a multibyte char. We scan through the text and
-// construct the word, and flush the word into the paragraph once a word is
-// ready. A word is ready when a word boundary is detected: a boundary char such
-// as '\n', '\t', and ' ' is encountered; a multibyte char is found; or a
-// multibyte to single-byte switch is encountered. '\n' is handled in a special
-// manner.
-func wrapText(s string, w int) string {
- word := ""
- out := ""
-
- width := 0
- firstWord := true
- isMultibyteWord := false
-
- flushWord := func() {
- wl := wordLen(word)
- if isMultibyteWord {
- if width+wl > w {
- out += "\n" + word
- width = wl
+type EscapeItem struct {
+ item string
+ pos int
+}
+
+func recordTermEscape(s string) (string, []EscapeItem) {
+ var result []EscapeItem
+ var newStr string
+
+ pos := 0
+ item := ""
+ occupiedRuneCount := 0
+ inEscape := false
+ for i, r := range []rune(s) {
+ if r == '\x1b' {
+ pos = i
+ item = string(r)
+ inEscape = true
+ continue
+ }
+ if inEscape {
+ item += string(r)
+ if r == 'm' {
+ result = append(result, EscapeItem{item: item, pos: pos - occupiedRuneCount})
+ occupiedRuneCount += utf8.RuneCountInString(item)
+ inEscape = false
+ }
+ continue
+ }
+ newStr += string(r)
+ }
+
+ return newStr, result
+}
+
+func replayTermEscape(s string, sequence []EscapeItem) string {
+ if len(sequence) == 0 {
+ return string(s)
+ }
+ // Assume the original string contains no new line and the wrapped only insert
+ // new lines. So that we can recover the position where we insert the term
+ // escapes.
+ var out string = ""
+
+ currPos := 0
+ currItem := 0
+ for _, r := range []rune(s) {
+ if currItem < len(sequence) && currPos == sequence[currItem].pos {
+ if r == '\n' {
+ out += "\n" + sequence[currItem].item
} else {
- out += word
- width += wl
+ out += sequence[currItem].item + string(r)
+ currPos++
}
+ currItem++
} else {
- if width == 0 {
- out += word
- width += wl
- } else if width+wl+1 > w {
- out += "\n" + word
- width = wl
- } else {
- out += " " + word
- width += wl + 1
+ if r != '\n' {
+ currPos++
}
+ out += string(r)
}
- word = ""
}
- for _, r := range []rune(s) {
- cw := runewidth.RuneWidth(r)
- if firstWord {
- word = string(r)
- isMultibyteWord = cw > 1
- firstWord = false
+ return out
+}
+
+// Break a line into several lines so that each line consumes at most 'w' cells.
+// Lines break at group of white spaces and multibyte chars. Nothing is removed
+// from the line so that it behaves like a softwrap.
+//
+// Required: The line shall not contain '\n' (so it is a single line).
+//
+// WRAPPING ALGORITHM: The line is broken into non-breakable groups, then line
+// breaks ("\n") is inserted between these groups so that the total length
+// between breaks does not exceed the required width. Words that are longer than
+// the width is broken into several words as `M+M+...+N`.
+func softwrapLine(s string, w int) string {
+ newStr, termSeqs := recordTermEscape(s)
+
+ const (
+ WIDE_CHAR = iota
+ INVISIBLE = iota
+ SHORT_UNICODE = iota
+ SPACE = iota
+ VISIBLE_ASCII = iota
+ NONE = iota
+ )
+
+ // In order to simplify the terminal color sequence handling, we first strip
+ // them out of the text and record their position, then do the wrap. After
+ // that, we insert back these sequences.
+ runeType := func(r rune) int {
+ rw := runewidth.RuneWidth(r)
+ if rw > 1 {
+ return WIDE_CHAR
+ } else if rw == 0 {
+ return INVISIBLE
+ } else if r > 127 {
+ return SHORT_UNICODE
+ } else if r == ' ' {
+ return SPACE
+ } else {
+ return VISIBLE_ASCII
+ }
+ }
+
+ var chunks []string
+ var word string
+ wordType := NONE
+ for _, r := range []rune(newStr) {
+ // A WIDE_CHAR itself constitutes a group.
+ thisType := runeType(r)
+ if thisType == WIDE_CHAR {
+ chunks = append(chunks, string(r))
continue
}
- if r == '\n' {
- flushWord()
- out += "\n"
- width = 0
- } else if r == ' ' || r == '\t' {
- flushWord()
- } else if cw > 1 {
- flushWord()
- word = string(r)
- isMultibyteWord = true
- word = string(r)
- } else if cw == 1 && isMultibyteWord {
- flushWord()
+ // Other type of groups starts with a char of that type, and ends with a
+ // char with different type or end of string.
+ if thisType != wordType {
+ if wordType != NONE {
+ chunks = append(chunks, word)
+ }
word = string(r)
- isMultibyteWord = false
+ wordType = thisType
} else {
word += string(r)
}
}
- // The text may end without newlines, ensure flushing it or we can lose the
- // last word.
- flushWord()
+ if word != "" {
+ chunks = append(chunks, word)
+ }
- return out
+ var line string = ""
+ var width int = 0
+ // Reverse the chunk array so we can use it as a stack.
+ for i, j := 0, len(chunks)-1; i < j; i, j = i+1, j-1 {
+ chunks[i], chunks[j] = chunks[j], chunks[i]
+ }
+ for len(chunks) > 0 {
+ thisWord := chunks[len(chunks)-1]
+ wl := wordLen(thisWord)
+ if width+wl <= w {
+ line += chunks[len(chunks)-1]
+ chunks = chunks[:len(chunks)-1]
+ width += wl
+ if width == w && len(chunks) > 0{
+ line += "\n"
+ width = 0
+ }
+ } else if wl > w {
+ left, right := splitWord(chunks[len(chunks)-1], w)
+ line += left + "\n"
+ chunks[len(chunks)-1] = right
+ width = 0
+ } else {
+ line += "\n"
+ width = 0
+ }
+ }
+
+ line = replayTermEscape(line, termSeqs)
+ return line
}
// wordLen return the length of a word, while ignoring the terminal escape
diff --git a/util/text/text_test.go b/util/text/text_test.go
index f5b15a43..480b1f1f 100644
--- a/util/text/text_test.go
+++ b/util/text/text_test.go
@@ -5,6 +5,7 @@ import (
"testing"
)
+
func TestWrap(t *testing.T) {
cases := []struct {
Input, Output string
@@ -43,7 +44,7 @@ func TestWrap(t *testing.T) {
// A tab counts as 4 characters.
{
"foo\nb\t r\n baz",
- "foo\nb\n r\n baz",
+ "foo\nb\nr\n baz",
4,
},
// Trailing whitespace is removed after used for wrapping.
@@ -86,19 +87,19 @@ func TestWrap(t *testing.T) {
// Complete example:
{
" This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* baz \nBAM ",
- " This\nis a\nlist:\n\n\n *\nfoo\n *\nbar\n\n\n *\nbaz\nBAM\n",
+ " This\nis a\nlist:\n\n *\nfoo\n *\nbar\n\n\n *\nbaz\nBAM\n",
6,
},
// Handle chinese (wide characters)
{
- "婞一枳郲逴靲屮蜧曀殳,掫乇峔掮傎溒兀緉冘仜。",
- "婞一枳郲逴靲\n屮蜧曀殳,掫\n乇峔掮傎溒兀\n緉冘仜。",
+ "一只敏捷的狐狸跳过了一只懒狗。",
+ "一只敏捷的狐\n狸跳过了一只\n懒狗。",
12,
},
// Handle chinese with colors
{
- "婞一枳郲逴\x1b[31m靲屮蜧曀殳,掫乇峔掮傎溒\x1b[0m兀緉冘仜。",
- "婞一枳郲逴\x1b[31m靲\n屮蜧曀殳,掫\n乇峔掮傎溒\x1b[0m兀\n緉冘仜。",
+ "一只敏捷的\x1b[31m狐狸跳过\x1b[0m了一只懒狗。",
+ "一只敏捷的\x1b[31m狐\n狸跳过\x1b[0m了一只\n懒狗。",
12,
},
}
@@ -106,7 +107,7 @@ func TestWrap(t *testing.T) {
for i, tc := range cases {
actual, lines := Wrap(tc.Input, tc.Lim)
if actual != tc.Output {
- t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n`\n%s`",
+ t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n\n`%s`",
i, tc.Input, tc.Output, actual)
}