diff options
author | Yang Zhang <yang_zhang@iapcm.ac.cn> | 2018-12-31 01:57:10 +0800 |
---|---|---|
committer | Yang Zhang <yang_zhang@iapcm.ac.cn> | 2018-12-31 01:57:10 +0800 |
commit | f22f9b7083ff65fb7abe00ea2fb7343a1b68c59d (patch) | |
tree | 0a4688e329c450bd8859d72955e9123b279c178b /util/text | |
parent | 467ab5b68ef3b0b9694f8987778d0ec0859a0201 (diff) | |
download | git-bug-f22f9b7083ff65fb7abe00ea2fb7343a1b68c59d.tar.gz |
Implement a new wrap algorithm and pass all text tests
Diffstat (limited to 'util/text')
-rw-r--r-- | util/text/text.go | 239 | ||||
-rw-r--r-- | util/text/text_test.go | 15 |
2 files changed, 177 insertions, 77 deletions
diff --git a/util/text/text.go b/util/text/text.go index ad920bd8..0447bde1 100644 --- a/util/text/text.go +++ b/util/text/text.go @@ -1,9 +1,9 @@ package text import ( - "bytes" "github.com/mattn/go-runewidth" "strings" + "unicode/utf8" ) // Wrap a text for an exact line size @@ -15,96 +15,195 @@ func Wrap(text string, lineWidth int) (string, int) { // Wrap a text for an exact line size with a left padding // Handle properly terminal color escape code func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) { - var textBuffer bytes.Buffer - nbLine := 0 pad := strings.Repeat(" ", leftPad) + var lines []string + nbLine := 0 // tabs are formatted as 4 spaces - text = strings.Replace(text, "\t", " ", 4) - wrapped := wrapText(text, lineWidth-leftPad) - for _, line := range strings.Split(wrapped, "\n") { - textBuffer.WriteString(pad + line) - textBuffer.WriteString("\n") - nbLine++ + text = strings.Replace(text, "\t", " ", -1) + for _, line := range strings.Split(text, "\n") { + if line == "" || strings.TrimSpace(line) == "" { + lines = append(lines, "") + nbLine++ + } else { + wrapped := softwrapLine(line, lineWidth-leftPad) + firstLine := true + for _, seg := range strings.Split(wrapped, "\n") { + if firstLine { + lines = append(lines, pad+strings.TrimRight(seg, " ")) + firstLine = false + } else { + lines = append(lines, pad+strings.TrimSpace(seg)) + } + nbLine++ + } + } } - return textBuffer.String(), nbLine + return strings.Join(lines, "\n"), nbLine } -// Wrap text so that each line fills at most w cells. Lines break at word -// boundary or multibyte chars. -// -// Wrapping Algorithm: Treat the text as a sequence of words, with each word be -// an alphanumeric word, or a multibyte char. We scan through the text and -// construct the word, and flush the word into the paragraph once a word is -// ready. A word is ready when a word boundary is detected: a boundary char such -// as '\n', '\t', and ' ' is encountered; a multibyte char is found; or a -// multibyte to single-byte switch is encountered. '\n' is handled in a special -// manner. -func wrapText(s string, w int) string { - word := "" - out := "" - - width := 0 - firstWord := true - isMultibyteWord := false - - flushWord := func() { - wl := wordLen(word) - if isMultibyteWord { - if width+wl > w { - out += "\n" + word - width = wl +type EscapeItem struct { + item string + pos int +} + +func recordTermEscape(s string) (string, []EscapeItem) { + var result []EscapeItem + var newStr string + + pos := 0 + item := "" + occupiedRuneCount := 0 + inEscape := false + for i, r := range []rune(s) { + if r == '\x1b' { + pos = i + item = string(r) + inEscape = true + continue + } + if inEscape { + item += string(r) + if r == 'm' { + result = append(result, EscapeItem{item: item, pos: pos - occupiedRuneCount}) + occupiedRuneCount += utf8.RuneCountInString(item) + inEscape = false + } + continue + } + newStr += string(r) + } + + return newStr, result +} + +func replayTermEscape(s string, sequence []EscapeItem) string { + if len(sequence) == 0 { + return string(s) + } + // Assume the original string contains no new line and the wrapped only insert + // new lines. So that we can recover the position where we insert the term + // escapes. + var out string = "" + + currPos := 0 + currItem := 0 + for _, r := range []rune(s) { + if currItem < len(sequence) && currPos == sequence[currItem].pos { + if r == '\n' { + out += "\n" + sequence[currItem].item } else { - out += word - width += wl + out += sequence[currItem].item + string(r) + currPos++ } + currItem++ } else { - if width == 0 { - out += word - width += wl - } else if width+wl+1 > w { - out += "\n" + word - width = wl - } else { - out += " " + word - width += wl + 1 + if r != '\n' { + currPos++ } + out += string(r) } - word = "" } - for _, r := range []rune(s) { - cw := runewidth.RuneWidth(r) - if firstWord { - word = string(r) - isMultibyteWord = cw > 1 - firstWord = false + return out +} + +// Break a line into several lines so that each line consumes at most 'w' cells. +// Lines break at group of white spaces and multibyte chars. Nothing is removed +// from the line so that it behaves like a softwrap. +// +// Required: The line shall not contain '\n' (so it is a single line). +// +// WRAPPING ALGORITHM: The line is broken into non-breakable groups, then line +// breaks ("\n") is inserted between these groups so that the total length +// between breaks does not exceed the required width. Words that are longer than +// the width is broken into several words as `M+M+...+N`. +func softwrapLine(s string, w int) string { + newStr, termSeqs := recordTermEscape(s) + + const ( + WIDE_CHAR = iota + INVISIBLE = iota + SHORT_UNICODE = iota + SPACE = iota + VISIBLE_ASCII = iota + NONE = iota + ) + + // In order to simplify the terminal color sequence handling, we first strip + // them out of the text and record their position, then do the wrap. After + // that, we insert back these sequences. + runeType := func(r rune) int { + rw := runewidth.RuneWidth(r) + if rw > 1 { + return WIDE_CHAR + } else if rw == 0 { + return INVISIBLE + } else if r > 127 { + return SHORT_UNICODE + } else if r == ' ' { + return SPACE + } else { + return VISIBLE_ASCII + } + } + + var chunks []string + var word string + wordType := NONE + for _, r := range []rune(newStr) { + // A WIDE_CHAR itself constitutes a group. + thisType := runeType(r) + if thisType == WIDE_CHAR { + chunks = append(chunks, string(r)) continue } - if r == '\n' { - flushWord() - out += "\n" - width = 0 - } else if r == ' ' || r == '\t' { - flushWord() - } else if cw > 1 { - flushWord() - word = string(r) - isMultibyteWord = true - word = string(r) - } else if cw == 1 && isMultibyteWord { - flushWord() + // Other type of groups starts with a char of that type, and ends with a + // char with different type or end of string. + if thisType != wordType { + if wordType != NONE { + chunks = append(chunks, word) + } word = string(r) - isMultibyteWord = false + wordType = thisType } else { word += string(r) } } - // The text may end without newlines, ensure flushing it or we can lose the - // last word. - flushWord() + if word != "" { + chunks = append(chunks, word) + } - return out + var line string = "" + var width int = 0 + // Reverse the chunk array so we can use it as a stack. + for i, j := 0, len(chunks)-1; i < j; i, j = i+1, j-1 { + chunks[i], chunks[j] = chunks[j], chunks[i] + } + for len(chunks) > 0 { + thisWord := chunks[len(chunks)-1] + wl := wordLen(thisWord) + if width+wl <= w { + line += chunks[len(chunks)-1] + chunks = chunks[:len(chunks)-1] + width += wl + if width == w && len(chunks) > 0{ + line += "\n" + width = 0 + } + } else if wl > w { + left, right := splitWord(chunks[len(chunks)-1], w) + line += left + "\n" + chunks[len(chunks)-1] = right + width = 0 + } else { + line += "\n" + width = 0 + } + } + + line = replayTermEscape(line, termSeqs) + return line } // wordLen return the length of a word, while ignoring the terminal escape diff --git a/util/text/text_test.go b/util/text/text_test.go index f5b15a43..480b1f1f 100644 --- a/util/text/text_test.go +++ b/util/text/text_test.go @@ -5,6 +5,7 @@ import ( "testing" ) + func TestWrap(t *testing.T) { cases := []struct { Input, Output string @@ -43,7 +44,7 @@ func TestWrap(t *testing.T) { // A tab counts as 4 characters. { "foo\nb\t r\n baz", - "foo\nb\n r\n baz", + "foo\nb\nr\n baz", 4, }, // Trailing whitespace is removed after used for wrapping. @@ -86,19 +87,19 @@ func TestWrap(t *testing.T) { // Complete example: { " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* baz \nBAM ", - " This\nis a\nlist:\n\n\n *\nfoo\n *\nbar\n\n\n *\nbaz\nBAM\n", + " This\nis a\nlist:\n\n *\nfoo\n *\nbar\n\n\n *\nbaz\nBAM\n", 6, }, // Handle chinese (wide characters) { - "婞一枳郲逴靲屮蜧曀殳,掫乇峔掮傎溒兀緉冘仜。", - "婞一枳郲逴靲\n屮蜧曀殳,掫\n乇峔掮傎溒兀\n緉冘仜。", + "一只敏捷的狐狸跳过了一只懒狗。", + "一只敏捷的狐\n狸跳过了一只\n懒狗。", 12, }, // Handle chinese with colors { - "婞一枳郲逴\x1b[31m靲屮蜧曀殳,掫乇峔掮傎溒\x1b[0m兀緉冘仜。", - "婞一枳郲逴\x1b[31m靲\n屮蜧曀殳,掫\n乇峔掮傎溒\x1b[0m兀\n緉冘仜。", + "一只敏捷的\x1b[31m狐狸跳过\x1b[0m了一只懒狗。", + "一只敏捷的\x1b[31m狐\n狸跳过\x1b[0m了一只\n懒狗。", 12, }, } @@ -106,7 +107,7 @@ func TestWrap(t *testing.T) { for i, tc := range cases { actual, lines := Wrap(tc.Input, tc.Lim) if actual != tc.Output { - t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n`\n%s`", + t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n\n`%s`", i, tc.Input, tc.Output, actual) } |