path: root/util/text/text.go

            

package text

import (
	"github.com/mattn/go-runewidth"
	"strings"
	"unicode/utf8"
)

// Force runewidth not to treat ambiguous runes as wide chars, so that things
// like unicode ellipsis/up/down/left/right glyphs can have correct runewidth
// and can be displayed correctly in terminals.
func init() {
	runewidth.DefaultCondition.EastAsianWidth = false
}

// Wrap a text for an exact line size
// Handle properly terminal color escape code
func Wrap(text string, lineWidth int) (string, int) {
	return WrapLeftPadded(text, lineWidth, 0)
}

// Wrap a text for an exact line size with a left padding
// Handle properly terminal color escape code
func WrapLeftPadded(text string, lineWidth int, leftPad int) (string, int) {
	var lines []string
	nbLine := 0
	pad := strings.Repeat(" ", leftPad)

	// tabs are formatted as 4 spaces
	text = strings.Replace(text, "\t", "    ", -1)
	// NOTE: text is first segmented into lines so that softwrapLine can handle.
	for _, line := range strings.Split(text, "\n") {
		if line == "" || strings.TrimSpace(line) == "" {
			lines = append(lines, "")
			nbLine++
		} else {
			wrapped := softwrapLine(line, lineWidth-leftPad)
			firstLine := true
			for _, seg := range strings.Split(wrapped, "\n") {
				if firstLine {
					lines = append(lines, pad+strings.TrimRight(seg, " "))
					firstLine = false
				} else {
					lines = append(lines, pad+strings.TrimSpace(seg))
				}
				nbLine++
			}
		}
	}
	return strings.Join(lines, "\n"), nbLine
}

// Break a line into several lines so that each line consumes at most
// 'textWidth' cells.  Lines break at groups of white spaces and multibyte
// chars. Nothing is removed from the original text so that it behaves like a
// softwrap.
//
// Required: The line shall not contain '\n'
//
// WRAPPING ALGORITHM: The line is broken into non-breakable chunks, then line
// breaks ("\n") are inserted between these groups so that the total length
// between breaks does not exceed the required width. Words that are longer than
// the textWidth are broen into pieces no longer than textWidth.
//
func softwrapLine(line string, textWidth int) string {
	// NOTE: terminal escapes are stripped out of the line so the algorithm is
	// simpler. Do not try to mix them in the wrapping algorithm, as it can get
	// complicated quickly.
	line1, termEscapes := extractTermEscapes(line)

	chunks := segmentLine(line1)
	// Reverse the chunk array so we can use it as a stack.
	for i, j := 0, len(chunks)-1; i < j; i, j = i+1, j-1 {
		chunks[i], chunks[j] = chunks[j], chunks[i]
	}
	var line2 string = ""
	var width int = 0
	for len(chunks) > 0 {
		thisWord := chunks[len(chunks)-1]
		wl := wordLen(thisWord)
		if width+wl <= textWidth {
			line2 += chunks[len(chunks)-1]
			chunks = chunks[:len(chunks)-1]
			width += wl
			if width == textWidth && len(chunks) > 0 {
				// NOTE: new line begins when current line is full and there are more
				// chunks to come.
				line2 += "\n"
				width = 0
			}
		} else if wl > textWidth {
			left, right := splitWord(chunks[len(chunks)-1], textWidth)
			line2 += left + "\n"
			chunks[len(chunks)-1] = right
			width = 0
		} else {
			line2 += "\n"
			width = 0
		}
	}

	line3 := applyTermEscapes(line2, termEscapes)
	return line3
}

// EscapeItem: Storage of terminal escapes in a line. 'item' is the actural
// escape command, and 'pos' is the index in the rune array where the 'item'
// shall be inserted back. For example, the escape item in "F\x1b33mox" is
// {"\x1b33m", 1}.
type EscapeItem struct {
	item string
	pos  int
}

// Extract terminal escapes out of a line, returns a new line without terminal
// escapes and a slice of escape items. The terminal escapes can be inserted
// back into the new line at rune index 'item.pos' to recover the original line.
//
// Required: The line shall not contain "\n"
//
func extractTermEscapes(line string) (string, []EscapeItem) {
	var termEscapes []EscapeItem
	var line1 string

	pos := 0
	item := ""
	occupiedRuneCount := 0
	inEscape := false
	for i, r := range []rune(line) {
		if r == '\x1b' {
			pos = i
			item = string(r)
			inEscape = true
			continue
		}
		if inEscape {
			item += string(r)
			if r == 'm' {
				termEscapes = append(termEscapes, EscapeItem{item, pos - occupiedRuneCount})
				occupiedRuneCount += utf8.RuneCountInString(item)
				inEscape = false
			}
			continue
		}
		line1 += string(r)
	}

	return line1, termEscapes
}

// Apply the extracted terminal escapes to the edited line. The only edit
// allowed is to insert "\n" like that in softwrapLine. Callers shall ensure
// this since this function is not able to check it.
func applyTermEscapes(line string, escapes []EscapeItem) string {
	if len(escapes) == 0 {
		return line
	}

	var out string = ""

	currPos := 0
	currItem := 0
	for _, r := range line {
		if currItem < len(escapes) && currPos == escapes[currItem].pos {
			// NOTE: We avoid terminal escapes at the end of a line by move them one
			// pass the end of line, so that algorithms who trim right spaces are
			// happy. But algorithms who trim left spaces are still unhappy.
			if r == '\n' {
				out += "\n" + escapes[currItem].item
			} else {
				out += escapes[currItem].item + string(r)
				currPos++
			}
			currItem++
		} else {
			if r != '\n' {
				currPos++
			}
			out += string(r)
		}
	}

	return out
}

// Segment a line into chunks, where each chunk consists of chars with the same
// type and is not breakable.
func segmentLine(s string) []string {
	var chunks []string

	var word string
	wordType := NONE
	flushWord := func() {
		chunks = append(chunks, word)
		word = ""
		wordType = NONE
	}

	for _, r := range s {
		// A WIDE_CHAR itself constitutes a chunk.
		thisType := runeType(r)
		if thisType == WIDE_CHAR {
			if wordType != NONE {
				flushWord()
			}
			chunks = append(chunks, string(r))
			continue
		}
		// Other type of chunks starts with a char of that type, and ends with a
		// char with different type or end of string.
		if thisType != wordType {
			if wordType != NONE {
				flushWord()
			}
			word = string(r)
			wordType = thisType
		} else {
			word += string(r)
		}
	}
	if word != "" {
		flushWord()
	}

	return chunks
}

// Rune categories
//
// These categories are so defined that each category forms a non-breakable
// chunk. It IS NOT the same as unicode code point categories.
//
const (
	NONE          = -1
	WIDE_CHAR     = iota
	INVISIBLE     = iota
	SHORT_UNICODE = iota
	SPACE         = iota
	VISIBLE_ASCII = iota
)

// Determine the category of a rune.
func runeType(r rune) int {
	rw := runewidth.RuneWidth(r)
	if rw > 1 {
		return WIDE_CHAR
	} else if rw == 0 {
		return INVISIBLE
	} else if r > 127 {
		return SHORT_UNICODE
	} else if r == ' ' {
		return SPACE
	} else {
		return VISIBLE_ASCII
	}
}

// wordLen return the length of a word, while ignoring the terminal escape
// sequences
func wordLen(word string) int {
	length := 0
	escape := false

	for _, char := range word {
		if char == '\x1b' {
			escape = true
		}
		if !escape {
			length += runewidth.RuneWidth(rune(char))
		}
		if char == 'm' {
			escape = false
		}
	}

	return length
}

// splitWord split a word at the given length, while ignoring the terminal escape sequences
func splitWord(word string, length int) (string, string) {
	runes := []rune(word)
	var result []rune
	added := 0
	escape := false

	if length == 0 {
		return "", word
	}

	for _, r := range runes {
		if r == '\x1b' {
			escape = true
		}

		width := runewidth.RuneWidth(r)
		if width+added > length {
			// wide character made the length overflow
			break
		}

		result = append(result, r)

		if !escape {
			added += width
			if added >= length {
				break
			}
		}

		if r == 'm' {
			escape = false
		}
	}

	leftover := runes[len(result):]

	return string(result), string(leftover)
}

func minInt(a, b int) int {
	if a > b {
		return b
	}
	return a
}