aboutsummaryrefslogtreecommitdiffstats
path: root/util/text
diff options
context:
space:
mode:
Diffstat (limited to 'util/text')
-rw-r--r--util/text/left_padded.go42
-rw-r--r--util/text/left_padded_test.go56
-rw-r--r--util/text/text.go330
-rw-r--r--util/text/text_test.go376
4 files changed, 0 insertions, 804 deletions
diff --git a/util/text/left_padded.go b/util/text/left_padded.go
deleted file mode 100644
index eae65d34..00000000
--- a/util/text/left_padded.go
+++ /dev/null
@@ -1,42 +0,0 @@
-package text
-
-import (
- "bytes"
- "fmt"
- "github.com/mattn/go-runewidth"
- "strings"
-)
-
-// LeftPadMaxLine pads a string on the left by a specified amount and pads the
-// string on the right to fill the maxLength
-func LeftPadMaxLine(text string, length, leftPad int) string {
- var rightPart string = text
-
- scrWidth := runewidth.StringWidth(text)
- // truncate and ellipse if needed
- if scrWidth+leftPad > length {
- rightPart = runewidth.Truncate(text, length-leftPad, "…")
- } else if scrWidth+leftPad < length {
- rightPart = runewidth.FillRight(text, length-leftPad)
- }
-
- return fmt.Sprintf("%s%s",
- strings.Repeat(" ", leftPad),
- rightPart,
- )
-}
-
-// LeftPad left pad each line of the given text
-func LeftPad(text string, leftPad int) string {
- var result bytes.Buffer
-
- pad := strings.Repeat(" ", leftPad)
-
- for _, line := range strings.Split(text, "\n") {
- result.WriteString(pad)
- result.WriteString(line)
- result.WriteString("\n")
- }
-
- return result.String()
-}
diff --git a/util/text/left_padded_test.go b/util/text/left_padded_test.go
deleted file mode 100644
index 0be79e32..00000000
--- a/util/text/left_padded_test.go
+++ /dev/null
@@ -1,56 +0,0 @@
-package text
-
-import "testing"
-
-func TestLeftPadMaxLine(t *testing.T) {
- cases := []struct {
- input, output string
- maxValueLength int
- leftPad int
- }{
- {
- "foo",
- "foo ",
- 4,
- 0,
- },
- {
- "foofoofoo",
- "foo…",
- 4,
- 0,
- },
- {
- "foo",
- "foo ",
- 10,
- 0,
- },
- {
- "foo",
- " f…",
- 4,
- 2,
- },
- {
- "foofoofoo",
- " foo…",
- 6,
- 2,
- },
- {
- "foo",
- " foo ",
- 10,
- 2,
- },
- }
-
- for i, tc := range cases {
- result := LeftPadMaxLine(tc.input, tc.maxValueLength, tc.leftPad)
- if result != tc.output {
- t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n\n`%s`",
- i, tc.input, tc.output, result)
- }
- }
-}
diff --git a/util/text/text.go b/util/text/text.go
deleted file mode 100644
index 39584d5d..00000000
--- a/util/text/text.go
+++ /dev/null
@@ -1,330 +0,0 @@
-package text
-
-import (
- "github.com/mattn/go-runewidth"
- "strings"
- "unicode/utf8"
-)
-
-// Force runewidth not to treat ambiguous runes as wide chars, so that things
-// like unicode ellipsis/up/down/left/right glyphs can have correct runewidth
-// and can be displayed correctly in terminals.
-func init() {
- runewidth.DefaultCondition.EastAsianWidth = false
-}
-
-// Wrap a text for an exact line size
-// Handle properly terminal color escape code
-func Wrap(text string, lineWidth int) (string, int) {
- return WrapLeftPadded(text, lineWidth, 0)
-}
-
-// Wrap a text for an exact line size with a left padding
-// Handle properly terminal color escape code
-func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) {
- var lines []string
- nbLine := 0
- pad := strings.Repeat(" ", leftPad)
-
- // tabs are formatted as 4 spaces
- text = strings.Replace(text, "\t", " ", -1)
- // NOTE: text is first segmented into lines so that softwrapLine can handle.
- for _, line := range strings.Split(text, "\n") {
- if line == "" || strings.TrimSpace(line) == "" {
- lines = append(lines, "")
- nbLine++
- } else {
- wrapped := softwrapLine(line, lineWidth-leftPad)
- firstLine := true
- for _, seg := range strings.Split(wrapped, "\n") {
- if firstLine {
- lines = append(lines, pad+strings.TrimRight(seg, " "))
- firstLine = false
- } else {
- lines = append(lines, pad+strings.TrimSpace(seg))
- }
- nbLine++
- }
- }
- }
- return strings.Join(lines, "\n"), nbLine
-}
-
-// Break a line into several lines so that each line consumes at most
-// 'textWidth' cells. Lines break at groups of white spaces and multibyte
-// chars. Nothing is removed from the original text so that it behaves like a
-// softwrap.
-//
-// Required: The line shall not contain '\n'
-//
-// WRAPPING ALGORITHM: The line is broken into non-breakable chunks, then line
-// breaks ("\n") are inserted between these groups so that the total length
-// between breaks does not exceed the required width. Words that are longer than
-// the textWidth are broken into pieces no longer than textWidth.
-//
-func softwrapLine(line string, textWidth int) string {
- // NOTE: terminal escapes are stripped out of the line so the algorithm is
- // simpler. Do not try to mix them in the wrapping algorithm, as it can get
- // complicated quickly.
- line1, termEscapes := extractTermEscapes(line)
-
- chunks := segmentLine(line1)
- // Reverse the chunk array so we can use it as a stack.
- for i, j := 0, len(chunks)-1; i < j; i, j = i+1, j-1 {
- chunks[i], chunks[j] = chunks[j], chunks[i]
- }
- var line2 string = ""
- var width int = 0
- for len(chunks) > 0 {
- thisWord := chunks[len(chunks)-1]
- wl := wordLen(thisWord)
- if width+wl <= textWidth {
- line2 += chunks[len(chunks)-1]
- chunks = chunks[:len(chunks)-1]
- width += wl
- if width == textWidth && len(chunks) > 0 {
- // NOTE: new line begins when current line is full and there are more
- // chunks to come.
- line2 += "\n"
- width = 0
- }
- } else if wl > textWidth {
- // NOTE: By default, long words are splited to fill the remaining space.
- // But if the long words is the first non-space word in the middle of the
- // line, preceeding spaces shall not be counted in word spliting.
- splitWidth := textWidth - width
- if strings.HasSuffix(line2, "\n"+strings.Repeat(" ", width)) {
- splitWidth += width
- }
- left, right := splitWord(chunks[len(chunks)-1], splitWidth)
- chunks[len(chunks)-1] = right
- line2 += left + "\n"
- width = 0
- } else {
- line2 += "\n"
- width = 0
- }
- }
-
- line3 := applyTermEscapes(line2, termEscapes)
- return line3
-}
-
-// EscapeItem: Storage of terminal escapes in a line. 'item' is the actural
-// escape command, and 'pos' is the index in the rune array where the 'item'
-// shall be inserted back. For example, the escape item in "F\x1b33mox" is
-// {"\x1b33m", 1}.
-type escapeItem struct {
- item string
- pos int
-}
-
-// Extract terminal escapes out of a line, returns a new line without terminal
-// escapes and a slice of escape items. The terminal escapes can be inserted
-// back into the new line at rune index 'item.pos' to recover the original line.
-//
-// Required: The line shall not contain "\n"
-//
-func extractTermEscapes(line string) (string, []escapeItem) {
- var termEscapes []escapeItem
- var line1 string
-
- pos := 0
- item := ""
- occupiedRuneCount := 0
- inEscape := false
- for i, r := range []rune(line) {
- if r == '\x1b' {
- pos = i
- item = string(r)
- inEscape = true
- continue
- }
- if inEscape {
- item += string(r)
- if r == 'm' {
- termEscapes = append(termEscapes, escapeItem{item, pos - occupiedRuneCount})
- occupiedRuneCount += utf8.RuneCountInString(item)
- inEscape = false
- }
- continue
- }
- line1 += string(r)
- }
-
- return line1, termEscapes
-}
-
-// Apply the extracted terminal escapes to the edited line. The only edit
-// allowed is to insert "\n" like that in softwrapLine. Callers shall ensure
-// this since this function is not able to check it.
-func applyTermEscapes(line string, escapes []escapeItem) string {
- if len(escapes) == 0 {
- return line
- }
-
- var out string = ""
-
- currPos := 0
- currItem := 0
- for _, r := range line {
- if currItem < len(escapes) && currPos == escapes[currItem].pos {
- // NOTE: We avoid terminal escapes at the end of a line by move them one
- // pass the end of line, so that algorithms who trim right spaces are
- // happy. But algorithms who trim left spaces are still unhappy.
- if r == '\n' {
- out += "\n" + escapes[currItem].item
- } else {
- out += escapes[currItem].item + string(r)
- currPos++
- }
- currItem++
- } else {
- if r != '\n' {
- currPos++
- }
- out += string(r)
- }
- }
-
- // Don't forget the trailing escape, if any.
- if currItem == len(escapes)-1 && currPos == escapes[currItem].pos {
- out += escapes[currItem].item
- }
-
- return out
-}
-
-// Segment a line into chunks, where each chunk consists of chars with the same
-// type and is not breakable.
-func segmentLine(s string) []string {
- var chunks []string
-
- var word string
- wordType := none
- flushWord := func() {
- chunks = append(chunks, word)
- word = ""
- wordType = none
- }
-
- for _, r := range s {
- // A WIDE_CHAR itself constitutes a chunk.
- thisType := runeType(r)
- if thisType == wideChar {
- if wordType != none {
- flushWord()
- }
- chunks = append(chunks, string(r))
- continue
- }
- // Other type of chunks starts with a char of that type, and ends with a
- // char with different type or end of string.
- if thisType != wordType {
- if wordType != none {
- flushWord()
- }
- word = string(r)
- wordType = thisType
- } else {
- word += string(r)
- }
- }
- if word != "" {
- flushWord()
- }
-
- return chunks
-}
-
-// Rune categories
-//
-// These categories are so defined that each category forms a non-breakable
-// chunk. It IS NOT the same as unicode code point categories.
-//
-const (
- none int = iota
- wideChar
- invisible
- shortUnicode
- space
- visibleAscii
-)
-
-// Determine the category of a rune.
-func runeType(r rune) int {
- rw := runewidth.RuneWidth(r)
- if rw > 1 {
- return wideChar
- } else if rw == 0 {
- return invisible
- } else if r > 127 {
- return shortUnicode
- } else if r == ' ' {
- return space
- } else {
- return visibleAscii
- }
-}
-
-// wordLen return the length of a word, while ignoring the terminal escape
-// sequences
-func wordLen(word string) int {
- length := 0
- escape := false
-
- for _, char := range word {
- if char == '\x1b' {
- escape = true
- }
- if !escape {
- length += runewidth.RuneWidth(rune(char))
- }
- if char == 'm' {
- escape = false
- }
- }
-
- return length
-}
-
-// splitWord split a word at the given length, while ignoring the terminal escape sequences
-func splitWord(word string, length int) (string, string) {
- runes := []rune(word)
- var result []rune
- added := 0
- escape := false
-
- if length == 0 {
- return "", word
- }
-
- for _, r := range runes {
- if r == '\x1b' {
- escape = true
- }
-
- width := runewidth.RuneWidth(r)
- if width+added > length {
- // wide character made the length overflow
- break
- }
-
- result = append(result, r)
-
- if !escape {
- added += width
- if added >= length {
- break
- }
- }
-
- if r == 'm' {
- escape = false
- }
- }
-
- leftover := runes[len(result):]
-
- return string(result), string(leftover)
-}
diff --git a/util/text/text_test.go b/util/text/text_test.go
deleted file mode 100644
index 5be25409..00000000
--- a/util/text/text_test.go
+++ /dev/null
@@ -1,376 +0,0 @@
-package text
-
-import (
- "reflect"
- "strings"
- "testing"
-)
-
-func TestWrap(t *testing.T) {
- cases := []struct {
- Input, Output string
- Lim int
- }{
- // A simple word passes through.
- {
- "foo",
- "foo",
- 4,
- },
- // Word breaking
- {
- "foobarbaz",
- "foob\narba\nz",
- 4,
- },
- // Lines are broken at whitespace.
- {
- "foo bar baz",
- "foo\nbar\nbaz",
- 4,
- },
- // Word breaking
- {
- "foo bars bazzes",
- "foo\nbars\nbazz\nes",
- 4,
- },
- // A word that would run beyond the width is wrapped.
- {
- "fo sop",
- "fo\nsop",
- 4,
- },
- // A tab counts as 4 characters.
- {
- "foo\nb\t r\n baz",
- "foo\nb\nr\n baz",
- 4,
- },
- // Trailing whitespace is removed after used for wrapping.
- // Runs of whitespace on which a line is broken are removed.
- {
- "foo \nb ar ",
- "foo\n\nb\nar\n",
- 4,
- },
- // An explicit line break at the end of the input is preserved.
- {
- "foo bar baz\n",
- "foo\nbar\nbaz\n",
- 4,
- },
- // Explicit break are always preserved.
- {
- "\nfoo bar\n\n\nbaz\n",
- "\nfoo\nbar\n\n\nbaz\n",
- 4,
- },
- // Ignore complete words with terminal color sequence
- {
- "foo \x1b[31mbar\x1b[0m baz",
- "foo\n\x1b[31mbar\x1b[0m\nbaz",
- 4,
- },
- // Handle words with colors sequence inside the word
- {
- "foo b\x1b[31mbar\x1b[0mr baz",
- "foo\nb\x1b[31mbar\n\x1b[0mr\nbaz",
- 4,
- },
- // Break words with colors sequence inside the word
- {
- "foo bb\x1b[31mbar\x1b[0mr baz",
- "foo\nbb\x1b[31mba\nr\x1b[0mr\nbaz",
- 4,
- },
- // Complete example:
- {
- " This is a list: \n\n\t* foo\n\t* bar\n\n\n\t* baz \nBAM ",
- " This\nis a\nlist:\n\n *\nfoo\n *\nbar\n\n\n *\nbaz\nBAM\n",
- 6,
- },
- // Handle chinese (wide characters)
- {
- "一只敏捷的狐狸跳过了一只懒狗。",
- "一只敏捷的狐\n狸跳过了一只\n懒狗。",
- 12,
- },
- // Handle chinese with colors
- {
- "一只敏捷的\x1b[31m狐狸跳过\x1b[0m了一只懒狗。",
- "一只敏捷的\x1b[31m狐\n狸跳过\x1b[0m了一只\n懒狗。",
- 12,
- },
- // Handle mixed wide and short characters
- {
- "敏捷 A quick 的狐狸 fox 跳过 jumps over a lazy 了一只懒狗 dog。",
- "敏捷 A quick\n的狐狸 fox\n跳过 jumps\nover a lazy\n了一只懒狗\ndog。",
- 12,
- },
- // Handle mixed wide and short characters with color
- {
- "敏捷 A \x1b31mquick 的狐狸 fox 跳\x1b0m过 jumps over a lazy 了一只懒狗 dog。",
- "敏捷 A \x1b31mquick\n的狐狸 fox\n跳\x1b0m过 jumps\nover a lazy\n了一只懒狗\ndog。",
- 12,
- },
- }
-
- for i, tc := range cases {
- actual, lines := Wrap(tc.Input, tc.Lim)
- if actual != tc.Output {
- t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s`\n\nActual Output:\n\n`%s`",
- i, tc.Input, tc.Output, actual)
- }
-
- expected := len(strings.Split(tc.Output, "\n"))
- if expected != lines {
- t.Fatalf("Case %d Nb lines mismatch\nExpected:%d\nActual:%d",
- i, expected, lines)
- }
- }
-}
-
-func TestWrapLeftPadded(t *testing.T) {
- cases := []struct {
- input, output string
- lim, pad int
- }{
- {
- "The Lorem ipsum text is typically composed of pseudo-Latin words. It is commonly used as placeholder text to examine or demonstrate the visual effects of various graphic design.",
- ` The Lorem ipsum text is typically composed of
- pseudo-Latin words. It is commonly used as placeholder
- text to examine or demonstrate the visual effects of
- various graphic design.`,
- 59, 4,
- },
- // Handle Chinese
- {
- "婞一枳郲逴靲屮蜧曀殳,掫乇峔掮傎溒兀緉冘仜。郼牪艽螗媷錵朸一詅掜豗怙刉笀丌,楀棶乇矹迡搦囷圣亍昄漚粁仈祂。覂一洳袶揙楱亍滻瘯毌,掗屮柅軡菵腩乜榵毌夯。勼哻怌婇怤灟葠雺奷朾恦扰衪岨坋誁乇芚誙腞。冇笉妺悆浂鱦賌廌灱灱觓坋佫呬耴跣兀枔蓔輈。嵅咍犴膰痭瘰机一靬涽捊矷尒玶乇,煚塈丌岰陊鉖怞戉兀甿跾觓夬侄。棩岧汌橩僁螗玎一逭舴圂衪扐衲兀,嵲媕亍衩衿溽昃夯丌侄蒰扂丱呤。毰侘妅錣廇螉仴一暀淖蚗佶庂咺丌,輀鈁乇彽洢溦洰氶乇构碨洐巿阹。",
- ` 婞一枳郲逴靲屮蜧曀殳,掫乇峔掮傎溒兀緉冘仜。郼牪艽螗媷
- 錵朸一詅掜豗怙刉笀丌,楀棶乇矹迡搦囷圣亍昄漚粁仈祂。覂
- 一洳袶揙楱亍滻瘯毌,掗屮柅軡菵腩乜榵毌夯。勼哻怌婇怤灟
- 葠雺奷朾恦扰衪岨坋誁乇芚誙腞。冇笉妺悆浂鱦賌廌灱灱觓坋
- 佫呬耴跣兀枔蓔輈。嵅咍犴膰痭瘰机一靬涽捊矷尒玶乇,煚塈
- 丌岰陊鉖怞戉兀甿跾觓夬侄。棩岧汌橩僁螗玎一逭舴圂衪扐衲
- 兀,嵲媕亍衩衿溽昃夯丌侄蒰扂丱呤。毰侘妅錣廇螉仴一暀淖
- 蚗佶庂咺丌,輀鈁乇彽洢溦洰氶乇构碨洐巿阹。`,
- 59, 4,
- },
- // Handle long unbreakable words in a full stentence
- {
- "OT: there are alternatives to maintainer-/user-set priority, e.g. \"[user pain](http://www.lostgarden.com/2008/05/improving-bug-triage-with-user-pain.html)\".",
- ` OT: there are alternatives to maintainer-/user-set
- priority, e.g. "[user pain](http://www.lostgarden.com/
- 2008/05/improving-bug-triage-with-user-pain.html)".`,
- 58, 4,
- },
- }
-
- for i, tc := range cases {
- actual, lines := WrapLeftPadded(tc.input, tc.lim, tc.pad)
- if actual != tc.output {
- t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n`\n%s`\n\nActual Output:\n`\n%s\n%s`",
- i, tc.input, tc.output,
- "|"+strings.Repeat("-", tc.lim-2)+"|",
- actual)
- }
-
- expected := len(strings.Split(tc.output, "\n"))
- if expected != lines {
- t.Fatalf("Case %d Nb lines mismatch\nExpected:%d\nActual:%d",
- i, expected, lines)
- }
- }
-}
-
-func TestWordLen(t *testing.T) {
- cases := []struct {
- Input string
- Length int
- }{
- // A simple word
- {
- "foo",
- 3,
- },
- // A simple word with colors
- {
- "\x1b[31mbar\x1b[0m",
- 3,
- },
- // Handle prefix and suffix properly
- {
- "foo\x1b[31mfoobarHoy\x1b[0mbaaar",
- 17,
- },
- // Handle chinese
- {
- "快檢什麼望對",
- 12,
- },
- // Handle chinese with colors
- {
- "快\x1b[31m檢什麼\x1b[0m望對",
- 12,
- },
- }
-
- for i, tc := range cases {
- l := wordLen(tc.Input)
- if l != tc.Length {
- t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%d`\n\nActual Output:\n\n`%d`",
- i, tc.Input, tc.Length, l)
- }
- }
-}
-
-func TestSplitWord(t *testing.T) {
- cases := []struct {
- Input string
- Length int
- Result, Leftover string
- }{
- // A simple word passes through.
- {
- "foo",
- 4,
- "foo", "",
- },
- // Cut at the right place
- {
- "foobarHoy",
- 4,
- "foob", "arHoy",
- },
- // A simple word passes through with colors
- {
- "\x1b[31mbar\x1b[0m",
- 4,
- "\x1b[31mbar\x1b[0m", "",
- },
- // Cut at the right place with colors
- {
- "\x1b[31mfoobarHoy\x1b[0m",
- 4,
- "\x1b[31mfoob", "arHoy\x1b[0m",
- },
- // Handle prefix and suffix properly
- {
- "foo\x1b[31mfoobarHoy\x1b[0mbaaar",
- 4,
- "foo\x1b[31mf", "oobarHoy\x1b[0mbaaar",
- },
- // Cut properly with length = 0
- {
- "foo",
- 0,
- "", "foo",
- },
- // Handle chinese
- {
- "快檢什麼望對",
- 4,
- "快檢", "什麼望對",
- },
- {
- "快檢什麼望對",
- 5,
- "快檢", "什麼望對",
- },
- // Handle chinese with colors
- {
- "快\x1b[31m檢什麼\x1b[0m望對",
- 4,
- "快\x1b[31m檢", "什麼\x1b[0m望對",
- },
- }
-
- for i, tc := range cases {
- result, leftover := splitWord(tc.Input, tc.Length)
- if result != tc.Result || leftover != tc.Leftover {
- t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`%s` - `%s`\n\nActual Output:\n\n`%s` - `%s`",
- i, tc.Input, tc.Result, tc.Leftover, result, leftover)
- }
- }
-}
-
-func TestExtractApplyTermEscapes(t *testing.T) {
- cases := []struct {
- Input string
- Output string
- TermEscapes []escapeItem
- }{
- // A plain ascii line with escapes.
- {
- "This \x1b[31mis an\x1b[0m example.",
- "This is an example.",
- []escapeItem{{"\x1b[31m", 5}, {"\x1b[0m", 10}},
- },
- // Escape at the end
- {
- "This \x1b[31mis an example.\x1b[0m",
- "This is an example.",
- []escapeItem{{"\x1b[31m", 5}, {"\x1b[0m", 19}},
- },
- // A plain wide line with escapes.
- {
- "一只敏捷\x1b[31m的狐狸\x1b[0m跳过了一只懒狗。",
- "一只敏捷的狐狸跳过了一只懒狗。",
- []escapeItem{{"\x1b[31m", 4}, {"\x1b[0m", 7}},
- },
- // A normal-wide mixed line with escapes.
- {
- "一只 A Quick 敏捷\x1b[31m的狐 Fox 狸\x1b[0m跳过了Dog一只懒狗。",
- "一只 A Quick 敏捷的狐 Fox 狸跳过了Dog一只懒狗。",
- []escapeItem{{"\x1b[31m", 13}, {"\x1b[0m", 21}},
- },
- }
-
- for i, tc := range cases {
- line2, escapes := extractTermEscapes(tc.Input)
- if line2 != tc.Output || !reflect.DeepEqual(escapes, tc.TermEscapes) {
- t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\nLine: `%s`\nEscapes: `%+v`\n\nActual Output:\n\nLine: `%s`\nEscapes: `%+v`\n\n",
- i, tc.Input, tc.Output, tc.TermEscapes, line2, escapes)
- }
- line3 := applyTermEscapes(line2, escapes)
- if line3 != tc.Input {
- t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Result:\n\n`%s`\n\nActual Result:\n\n`%s`\n\n",
- i, tc.Input, tc.Input, line3)
- }
- }
-}
-
-func TestSegmentLines(t *testing.T) {
- cases := []struct {
- Input string
- Output []string
- }{
- // A plain ascii line with escapes.
- {
- "This is an example.",
- []string{"This", " ", "is", " ", "an", " ", "example."},
- },
- // A plain wide line with escapes.
- {
- "一只敏捷的狐狸跳过了一只懒狗。",
- []string{"一", "只", "敏", "捷", "的", "狐", "狸", "跳", "过",
- "了", "一", "只", "懒", "狗", "。"},
- },
- // A complex stentence.
- {
- "This is a 'complex' example, where 一只 and English 混合了。",
- []string{"This", " ", "is", " ", "a", " ", "'complex'", " ", "example,",
- " ", "where", " ", "一", "只", " ", "and", " ", "English", " ", "混",
- "合", "了", "。"},
- },
- }
-
- for i, tc := range cases {
- chunks := segmentLine(tc.Input)
- if !reflect.DeepEqual(chunks, tc.Output) {
- t.Fatalf("Case %d Input:\n\n`%s`\n\nExpected Output:\n\n`[%s]`\n\nActual Output:\n\n`[%s]`\n\n",
- i, tc.Input, strings.Join(tc.Output, ", "), strings.Join(chunks, ", "))
- }
- }
-}