diff options
Diffstat (limited to 'filters')
-rw-r--r-- | filters/wrap.go | 267 | ||||
-rw-r--r-- | filters/wrap_test.go | 212 |
2 files changed, 479 insertions, 0 deletions
diff --git a/filters/wrap.go b/filters/wrap.go new file mode 100644 index 00000000..f1b335cc --- /dev/null +++ b/filters/wrap.go @@ -0,0 +1,267 @@ +package main + +import ( + "bufio" + "errors" + "flag" + "fmt" + "io" + "os" + "regexp" + "strings" + + "github.com/mattn/go-runewidth" +) + +type paragraph struct { + // email quote prefix, if any + quotes string + // list item indent, if any + leader string + // actual text of this paragraph + text string + // percentage of letters in text + proseRatio int + // text ends with a space + flowed bool + // paragraph is a list item + listItem bool +} + +func main() { + var err error + var width int + var reflow bool + var file string + var proseRatio int + var input *os.File + + fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError) + fs.IntVar(&width, "w", 80, "preferred wrap margin") + fs.BoolVar(&reflow, "r", false, + "reflow all paragraphs even if no trailing space") + fs.IntVar(&proseRatio, "l", 50, + "minimum percentage of letters in a line to be considered a paragaph") + fs.StringVar(&file, "f", "", "read from file instead of stdin") + _ = fs.Parse(os.Args[1:]) + + if file != "" { + input, err = os.OpenFile(file, os.O_RDONLY, 0o644) + if err != nil { + goto end + } + } else { + input = os.Stdin + } + + err = wrap(input, os.Stdout, width, reflow, proseRatio) + +end: + if err != nil && !errors.Is(err, io.EOF) { + fmt.Fprintf(os.Stderr, "error: %s\n", err) + os.Exit(1) + } +} + +func wrap( + in io.Reader, out io.Writer, width int, reflow bool, proseRatio int, +) error { + var para *paragraph = nil + var line string + var err error + + if patchSubjectRe.MatchString(os.Getenv("AERC_SUBJECT")) { + // never reflow patches + _, err = io.Copy(out, in) + } else { + reader := bufio.NewReader(in) + line, err = reader.ReadString('\n') + for ; err == nil; line, err = reader.ReadString('\n') { + next := parse(line) + switch { + case para == nil: + para = next + case para.isContinuation(next, reflow, proseRatio): + para.join(next) + default: + para.write(out, width, proseRatio) + para = next + } + } + if para != nil { + para.write(out, width, proseRatio) + } + } + + return err +} + +// Parse a line of text into a paragraph structure +func parse(line string) *paragraph { + p := new(paragraph) + q := 0 + t := 0 + line = strings.TrimRight(line, "\r\n") + // tabs cause a whole lot of troubles, replace them with 8 spaces + line = strings.ReplaceAll(line, "\t", " ") + + // Use integer offsets to find relevant positions in the line + // + // > > > 2) blah blah blah blah + // ^--------+-----^ + // q | t + // end of quotes | start of text + // | + // list item leader + + // detect the end of quotes prefix if any + for q < len(line) && line[q] == '>' { + q += 1 + if q < len(line) && line[q] == ' ' { + q += 1 + } + } + + // detect list item leader + loc := listItemRe.FindStringIndex(line[q:]) + if loc != nil { + // start of list item + p.listItem = true + } else { + // maybe list item continuation + loc = leadingSpaceRe.FindStringIndex(line[q:]) + } + if loc != nil { + t = q + loc[1] + } else { + // no list at all + t = q + } + + // check if there is trailing whitespace, indicating format=flowed + loc = trailingSpaceRe.FindStringIndex(line[t:]) + if loc != nil { + p.flowed = true + // trim whitespace + line = line[:t+loc[0]] + } + + p.quotes = line[:q] + p.leader = strings.Repeat(" ", runewidth.StringWidth(line[q:t])) + p.text = line[q:] + + // compute the ratio of letters in the actual text + onlyLetters := strings.TrimLeft(line[q:], " ") + totalLen := runewidth.StringWidth(onlyLetters) + if totalLen == 0 { + // to avoid division by zero + totalLen = 1 + } + onlyLetters = notLetterRe.ReplaceAllLiteralString(onlyLetters, "") + p.proseRatio = 100 * runewidth.StringWidth(onlyLetters) / totalLen + + return p +} + +// Return true if a paragraph is a continuation of the current one. +func (p *paragraph) isContinuation( + next *paragraph, reflow bool, proseRatio int, +) bool { + switch { + case next.listItem: + // new list items always start a new paragraph + return false + case next.proseRatio < proseRatio || p.proseRatio < proseRatio: + // does not look like prose, maybe ascii art + return false + case next.quotes != p.quotes || next.leader != p.leader: + // quote level and/or list item leader have changed + return false + case len(strings.Trim(next.text, " ")) == 0: + // empty line + return false + case p.flowed: + // current paragraph has trailing space, indicating + // format=flowed + return true + case reflow: + // user forced paragraph reflow on the command line + return true + default: + return false + } +} + +// Join next paragraph into current one. +func (p *paragraph) join(next *paragraph) { + if p.text == "" { + p.text = next.text + } else { + p.text = p.text + " " + strings.Trim(next.text, " ") + } + p.proseRatio = (p.proseRatio + next.proseRatio) / 2 + p.flowed = next.flowed +} + +// Write a paragraph, wrapping at words boundaries. +// +// Only try to do word wrapping on things that look like prose. When the text +// contains too many non-letter characters, print it as-is. +func (p *paragraph) write(out io.Writer, margin int, proseRatio int) { + leader := "" + more := true + quotesWidth := runewidth.StringWidth(p.quotes) + for more { + var line string + width := quotesWidth + runewidth.StringWidth(leader) + remain := runewidth.StringWidth(p.text) + if width+remain <= margin || p.proseRatio < proseRatio { + // whole paragraph fits on a single line + line = p.text + p.text = "" + more = false + } else { + // find split point, preferably before margin + split := -1 + w := 0 + for i, r := range p.text { + w += runewidth.RuneWidth(r) + if width+w > margin && split != -1 { + break + } + if r == ' ' { + split = i + } + } + if split == -1 { + // no space found to split, print a long line + line = p.text + p.text = "" + more = false + } else { + line = p.text[:split] + // find start of next word + for split < len(p.text) && p.text[split] == ' ' { + split++ + } + if split < len(p.text) { + p.text = p.text[split:] + } else { + // only trailing whitespace, we're done + p.text = "" + more = false + } + } + } + fmt.Fprintf(out, "%s%s%s\n", p.quotes, leader, line) + leader = p.leader + } +} + +var ( + patchSubjectRe = regexp.MustCompile(`\bPATCH\b`) + listItemRe = regexp.MustCompile(`^\s*([\-\*\.]|\d{1,2}[\)\]\.])\s+`) + leadingSpaceRe = regexp.MustCompile(`^\s+`) + trailingSpaceRe = regexp.MustCompile(`\s+$`) + notLetterRe = regexp.MustCompile(`[^\pL]`) +) diff --git a/filters/wrap_test.go b/filters/wrap_test.go new file mode 100644 index 00000000..f8d82cbe --- /dev/null +++ b/filters/wrap_test.go @@ -0,0 +1,212 @@ +package main + +import ( + "bytes" + "errors" + "io" + "testing" +) + +type vector struct { + name string + in string + out string + width int + reflow bool + ratio int +} + +var vectors = []vector{ + { + name: "simple", + in: `long line that exceeds margin by many words +`, + width: 30, + reflow: false, + ratio: 50, + out: `long line that exceeds margin +by many words +`, + }, + { + name: "two-paragraphs", + in: `this is one long paragraph +this is another long one +`, + width: 20, + reflow: false, + ratio: 50, + out: `this is one long +paragraph +this is another +long one +`, + }, + { + name: "reflow", + in: `this is one long paragraph +this is another long one +`, + width: 20, + reflow: true, + ratio: 50, + out: `this is one long +paragraph this is +another long one +`, + }, + { + name: "quotes", + in: `Let's play with quotes: + +>> Hi there how are you doing? +> Great thanks + +How rude. + +>> Fantastic. Let's go wrap some words. +`, + width: 20, + reflow: false, + ratio: 50, + out: `Let's play with +quotes: + +>> Hi there how are +>> you doing? +> Great thanks + +How rude. + +>> Fantastic. Let's +>> go wrap some +>> words. +`, + }, + { + name: "ascii-art", + in: `This is a nice drawing, isn't it? + ++-------------------+ +| foobaz | ++-------------------+ + | + | ++-------------------+ +| foobar | ++-------------------+ +`, + width: 15, + ratio: 50, + reflow: true, + out: `This is a nice +drawing, isn't +it? + ++-------------------+ +| foobaz | ++-------------------+ + | + | ++-------------------+ +| foobar | ++-------------------+ +`, + }, + { + name: "list-items", + in: `Shopping list: + + - milk + - chocolate + - cookies (please, with nuts) +`, + width: 20, + reflow: false, + ratio: 50, + out: `Shopping list: + + - milk + - chocolate + - cookies + (please, with + nuts) +`, + }, + { + name: "list-items-reflow", + in: `Shopping list: + + * milk + * chocolate + * cookies + (please, + with nuts) +`, + width: 100, + reflow: true, + ratio: 30, + out: `Shopping list: + + * milk + * chocolate + * cookies (please, with nuts) +`, + }, + { + name: "long-url", + in: `Please follow this ugly link: +http://foobaz.org/xapapzolmkdmldfk-fldskjflsk-cisjoij/onoes.jsp?xxx=2&yyy=3 +`, + width: 20, + reflow: true, + ratio: 50, + out: `Please follow this +ugly link: +http://foobaz.org/xapapzolmkdmldfk-fldskjflsk-cisjoij/onoes.jsp?xxx=2&yyy=3 +`, + }, + { + name: "format=flowed", + in: "Oh, \nI'm \nso \nhip \nI \nuse \nformat=flowed.\n", + width: 30, + reflow: false, + ratio: 50, + out: "Oh, I'm so hip I use\nformat=flowed.\n", + }, + { + name: "non-ascii", + in: `Lorem ççççç ççççç ççç ççççç çç ççç ççççç çççççççç ççç çç ççççç ççççççççççç ççççç + +Lorem жжжжж жжжжж жжж жжжжж жж жжж жжжжж жжжжжжжж жжж жж жжжжж жжжжжжжжжжж жжжжж жжжжжжжж +`, + width: 40, + reflow: false, + ratio: 50, + out: `Lorem ççççç ççççç ççç ççççç çç ççç +ççççç çççççççç ççç çç ççççç ççççççççççç +ççççç + +Lorem жжжжж жжжжж жжж жжжжж жж жжж +жжжжж жжжжжжжж жжж жж жжжжж жжжжжжжжжжж +жжжжж жжжжжжжж +`, + }, +} + +func TestWrap(t *testing.T) { + for _, vec := range vectors { + t.Run(vec.name, func(t *testing.T) { + r := bytes.NewReader([]byte(vec.in)) + var buf bytes.Buffer + err := wrap(r, &buf, vec.width, vec.reflow, vec.ratio) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("[%s]: %v", vec.name, err) + } + if buf.String() != vec.out { + t.Errorf("[%s] invalid format:\n%q\nexpected\n%q", + vec.name, buf.String(), vec.out) + } + }) + } +} |