aboutsummaryrefslogtreecommitdiffstats
path: root/filters
diff options
context:
space:
mode:
Diffstat (limited to 'filters')
-rw-r--r--filters/wrap.go267
-rw-r--r--filters/wrap_test.go212
2 files changed, 479 insertions, 0 deletions
diff --git a/filters/wrap.go b/filters/wrap.go
new file mode 100644
index 00000000..f1b335cc
--- /dev/null
+++ b/filters/wrap.go
@@ -0,0 +1,267 @@
+package main
+
+import (
+ "bufio"
+ "errors"
+ "flag"
+ "fmt"
+ "io"
+ "os"
+ "regexp"
+ "strings"
+
+ "github.com/mattn/go-runewidth"
+)
+
+type paragraph struct {
+ // email quote prefix, if any
+ quotes string
+ // list item indent, if any
+ leader string
+ // actual text of this paragraph
+ text string
+ // percentage of letters in text
+ proseRatio int
+ // text ends with a space
+ flowed bool
+ // paragraph is a list item
+ listItem bool
+}
+
+func main() {
+ var err error
+ var width int
+ var reflow bool
+ var file string
+ var proseRatio int
+ var input *os.File
+
+ fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
+ fs.IntVar(&width, "w", 80, "preferred wrap margin")
+ fs.BoolVar(&reflow, "r", false,
+ "reflow all paragraphs even if no trailing space")
+ fs.IntVar(&proseRatio, "l", 50,
+ "minimum percentage of letters in a line to be considered a paragaph")
+ fs.StringVar(&file, "f", "", "read from file instead of stdin")
+ _ = fs.Parse(os.Args[1:])
+
+ if file != "" {
+ input, err = os.OpenFile(file, os.O_RDONLY, 0o644)
+ if err != nil {
+ goto end
+ }
+ } else {
+ input = os.Stdin
+ }
+
+ err = wrap(input, os.Stdout, width, reflow, proseRatio)
+
+end:
+ if err != nil && !errors.Is(err, io.EOF) {
+ fmt.Fprintf(os.Stderr, "error: %s\n", err)
+ os.Exit(1)
+ }
+}
+
+func wrap(
+ in io.Reader, out io.Writer, width int, reflow bool, proseRatio int,
+) error {
+ var para *paragraph = nil
+ var line string
+ var err error
+
+ if patchSubjectRe.MatchString(os.Getenv("AERC_SUBJECT")) {
+ // never reflow patches
+ _, err = io.Copy(out, in)
+ } else {
+ reader := bufio.NewReader(in)
+ line, err = reader.ReadString('\n')
+ for ; err == nil; line, err = reader.ReadString('\n') {
+ next := parse(line)
+ switch {
+ case para == nil:
+ para = next
+ case para.isContinuation(next, reflow, proseRatio):
+ para.join(next)
+ default:
+ para.write(out, width, proseRatio)
+ para = next
+ }
+ }
+ if para != nil {
+ para.write(out, width, proseRatio)
+ }
+ }
+
+ return err
+}
+
+// Parse a line of text into a paragraph structure
+func parse(line string) *paragraph {
+ p := new(paragraph)
+ q := 0
+ t := 0
+ line = strings.TrimRight(line, "\r\n")
+ // tabs cause a whole lot of troubles, replace them with 8 spaces
+ line = strings.ReplaceAll(line, "\t", " ")
+
+ // Use integer offsets to find relevant positions in the line
+ //
+ // > > > 2) blah blah blah blah
+ // ^--------+-----^
+ // q | t
+ // end of quotes | start of text
+ // |
+ // list item leader
+
+ // detect the end of quotes prefix if any
+ for q < len(line) && line[q] == '>' {
+ q += 1
+ if q < len(line) && line[q] == ' ' {
+ q += 1
+ }
+ }
+
+ // detect list item leader
+ loc := listItemRe.FindStringIndex(line[q:])
+ if loc != nil {
+ // start of list item
+ p.listItem = true
+ } else {
+ // maybe list item continuation
+ loc = leadingSpaceRe.FindStringIndex(line[q:])
+ }
+ if loc != nil {
+ t = q + loc[1]
+ } else {
+ // no list at all
+ t = q
+ }
+
+ // check if there is trailing whitespace, indicating format=flowed
+ loc = trailingSpaceRe.FindStringIndex(line[t:])
+ if loc != nil {
+ p.flowed = true
+ // trim whitespace
+ line = line[:t+loc[0]]
+ }
+
+ p.quotes = line[:q]
+ p.leader = strings.Repeat(" ", runewidth.StringWidth(line[q:t]))
+ p.text = line[q:]
+
+ // compute the ratio of letters in the actual text
+ onlyLetters := strings.TrimLeft(line[q:], " ")
+ totalLen := runewidth.StringWidth(onlyLetters)
+ if totalLen == 0 {
+ // to avoid division by zero
+ totalLen = 1
+ }
+ onlyLetters = notLetterRe.ReplaceAllLiteralString(onlyLetters, "")
+ p.proseRatio = 100 * runewidth.StringWidth(onlyLetters) / totalLen
+
+ return p
+}
+
+// Return true if a paragraph is a continuation of the current one.
+func (p *paragraph) isContinuation(
+ next *paragraph, reflow bool, proseRatio int,
+) bool {
+ switch {
+ case next.listItem:
+ // new list items always start a new paragraph
+ return false
+ case next.proseRatio < proseRatio || p.proseRatio < proseRatio:
+ // does not look like prose, maybe ascii art
+ return false
+ case next.quotes != p.quotes || next.leader != p.leader:
+ // quote level and/or list item leader have changed
+ return false
+ case len(strings.Trim(next.text, " ")) == 0:
+ // empty line
+ return false
+ case p.flowed:
+ // current paragraph has trailing space, indicating
+ // format=flowed
+ return true
+ case reflow:
+ // user forced paragraph reflow on the command line
+ return true
+ default:
+ return false
+ }
+}
+
+// Join next paragraph into current one.
+func (p *paragraph) join(next *paragraph) {
+ if p.text == "" {
+ p.text = next.text
+ } else {
+ p.text = p.text + " " + strings.Trim(next.text, " ")
+ }
+ p.proseRatio = (p.proseRatio + next.proseRatio) / 2
+ p.flowed = next.flowed
+}
+
+// Write a paragraph, wrapping at words boundaries.
+//
+// Only try to do word wrapping on things that look like prose. When the text
+// contains too many non-letter characters, print it as-is.
+func (p *paragraph) write(out io.Writer, margin int, proseRatio int) {
+ leader := ""
+ more := true
+ quotesWidth := runewidth.StringWidth(p.quotes)
+ for more {
+ var line string
+ width := quotesWidth + runewidth.StringWidth(leader)
+ remain := runewidth.StringWidth(p.text)
+ if width+remain <= margin || p.proseRatio < proseRatio {
+ // whole paragraph fits on a single line
+ line = p.text
+ p.text = ""
+ more = false
+ } else {
+ // find split point, preferably before margin
+ split := -1
+ w := 0
+ for i, r := range p.text {
+ w += runewidth.RuneWidth(r)
+ if width+w > margin && split != -1 {
+ break
+ }
+ if r == ' ' {
+ split = i
+ }
+ }
+ if split == -1 {
+ // no space found to split, print a long line
+ line = p.text
+ p.text = ""
+ more = false
+ } else {
+ line = p.text[:split]
+ // find start of next word
+ for split < len(p.text) && p.text[split] == ' ' {
+ split++
+ }
+ if split < len(p.text) {
+ p.text = p.text[split:]
+ } else {
+ // only trailing whitespace, we're done
+ p.text = ""
+ more = false
+ }
+ }
+ }
+ fmt.Fprintf(out, "%s%s%s\n", p.quotes, leader, line)
+ leader = p.leader
+ }
+}
+
+var (
+ patchSubjectRe = regexp.MustCompile(`\bPATCH\b`)
+ listItemRe = regexp.MustCompile(`^\s*([\-\*\.]|\d{1,2}[\)\]\.])\s+`)
+ leadingSpaceRe = regexp.MustCompile(`^\s+`)
+ trailingSpaceRe = regexp.MustCompile(`\s+$`)
+ notLetterRe = regexp.MustCompile(`[^\pL]`)
+)
diff --git a/filters/wrap_test.go b/filters/wrap_test.go
new file mode 100644
index 00000000..f8d82cbe
--- /dev/null
+++ b/filters/wrap_test.go
@@ -0,0 +1,212 @@
+package main
+
+import (
+ "bytes"
+ "errors"
+ "io"
+ "testing"
+)
+
+type vector struct {
+ name string
+ in string
+ out string
+ width int
+ reflow bool
+ ratio int
+}
+
+var vectors = []vector{
+ {
+ name: "simple",
+ in: `long line that exceeds margin by many words
+`,
+ width: 30,
+ reflow: false,
+ ratio: 50,
+ out: `long line that exceeds margin
+by many words
+`,
+ },
+ {
+ name: "two-paragraphs",
+ in: `this is one long paragraph
+this is another long one
+`,
+ width: 20,
+ reflow: false,
+ ratio: 50,
+ out: `this is one long
+paragraph
+this is another
+long one
+`,
+ },
+ {
+ name: "reflow",
+ in: `this is one long paragraph
+this is another long one
+`,
+ width: 20,
+ reflow: true,
+ ratio: 50,
+ out: `this is one long
+paragraph this is
+another long one
+`,
+ },
+ {
+ name: "quotes",
+ in: `Let's play with quotes:
+
+>> Hi there how are you doing?
+> Great thanks
+
+How rude.
+
+>> Fantastic. Let's go wrap some words.
+`,
+ width: 20,
+ reflow: false,
+ ratio: 50,
+ out: `Let's play with
+quotes:
+
+>> Hi there how are
+>> you doing?
+> Great thanks
+
+How rude.
+
+>> Fantastic. Let's
+>> go wrap some
+>> words.
+`,
+ },
+ {
+ name: "ascii-art",
+ in: `This is a nice drawing, isn't it?
+
++-------------------+
+| foobaz |
++-------------------+
+ |
+ |
++-------------------+
+| foobar |
++-------------------+
+`,
+ width: 15,
+ ratio: 50,
+ reflow: true,
+ out: `This is a nice
+drawing, isn't
+it?
+
++-------------------+
+| foobaz |
++-------------------+
+ |
+ |
++-------------------+
+| foobar |
++-------------------+
+`,
+ },
+ {
+ name: "list-items",
+ in: `Shopping list:
+
+ - milk
+ - chocolate
+ - cookies (please, with nuts)
+`,
+ width: 20,
+ reflow: false,
+ ratio: 50,
+ out: `Shopping list:
+
+ - milk
+ - chocolate
+ - cookies
+ (please, with
+ nuts)
+`,
+ },
+ {
+ name: "list-items-reflow",
+ in: `Shopping list:
+
+ * milk
+ * chocolate
+ * cookies
+ (please,
+ with nuts)
+`,
+ width: 100,
+ reflow: true,
+ ratio: 30,
+ out: `Shopping list:
+
+ * milk
+ * chocolate
+ * cookies (please, with nuts)
+`,
+ },
+ {
+ name: "long-url",
+ in: `Please follow this ugly link:
+http://foobaz.org/xapapzolmkdmldfk-fldskjflsk-cisjoij/onoes.jsp?xxx=2&yyy=3
+`,
+ width: 20,
+ reflow: true,
+ ratio: 50,
+ out: `Please follow this
+ugly link:
+http://foobaz.org/xapapzolmkdmldfk-fldskjflsk-cisjoij/onoes.jsp?xxx=2&yyy=3
+`,
+ },
+ {
+ name: "format=flowed",
+ in: "Oh, \nI'm \nso \nhip \nI \nuse \nformat=flowed.\n",
+ width: 30,
+ reflow: false,
+ ratio: 50,
+ out: "Oh, I'm so hip I use\nformat=flowed.\n",
+ },
+ {
+ name: "non-ascii",
+ in: `Lorem ççççç ççççç ççç ççççç çç ççç ççççç çççççççç ççç çç ççççç ççççççççççç ççççç
+
+Lorem жжжжж жжжжж жжж жжжжж жж жжж жжжжж жжжжжжжж жжж жж жжжжж жжжжжжжжжжж жжжжж жжжжжжжж
+`,
+ width: 40,
+ reflow: false,
+ ratio: 50,
+ out: `Lorem ççççç ççççç ççç ççççç çç ççç
+ççççç çççççççç ççç çç ççççç ççççççççççç
+ççççç
+
+Lorem жжжжж жжжжж жжж жжжжж жж жжж
+жжжжж жжжжжжжж жжж жж жжжжж жжжжжжжжжжж
+жжжжж жжжжжжжж
+`,
+ },
+}
+
+func TestWrap(t *testing.T) {
+ for _, vec := range vectors {
+ t.Run(vec.name, func(t *testing.T) {
+ r := bytes.NewReader([]byte(vec.in))
+ var buf bytes.Buffer
+ err := wrap(r, &buf, vec.width, vec.reflow, vec.ratio)
+ if err != nil && !errors.Is(err, io.EOF) {
+ t.Fatalf("[%s]: %v", vec.name, err)
+ }
+ if buf.String() != vec.out {
+ t.Errorf("[%s] invalid format:\n%q\nexpected\n%q",
+ vec.name, buf.String(), vec.out)
+ }
+ })
+ }
+}