diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rw-r--r-- | Makefile | 11 | ||||
-rw-r--r-- | config/aerc.conf | 2 | ||||
-rw-r--r-- | doc/aerc-config.5.scd | 13 | ||||
-rw-r--r-- | filters/wrap.go | 267 | ||||
-rw-r--r-- | filters/wrap_test.go | 212 |
7 files changed, 495 insertions, 12 deletions
@@ -2,6 +2,7 @@ /aerc2 /aerc /aerc.debug +/wrap /.aerc.d race.log.* raw.log diff --git a/CHANGELOG.md b/CHANGELOG.md index 6931391c..5e29ea5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). `subject-re-pattern`. - Search/filter by absolute and relative date ranges with the `-d` flag. - LIST-STATUS support for imap +- built-in `wrap` filter that does not mess up nested quotes and lists. ### Fixed @@ -19,7 +19,7 @@ GO_LDFLAGS+=-X main.Flags=$(flags) GO_LDFLAGS+=-X git.sr.ht/~rjarry/aerc/config.shareDir=$(SHAREDIR) GO_LDFLAGS+=$(GO_EXTRA_LDFLAGS) -GOSRC!=find * -name '*.go' +GOSRC!=find * -name '*.go' | grep -v filters/wrap.go GOSRC+=go.mod go.sum DOCS := \ @@ -37,7 +37,7 @@ DOCS := \ aerc-templates.7 \ aerc-stylesets.7 -all: aerc $(DOCS) +all: aerc wrap $(DOCS) build_cmd:=$(GO) build $(BUILD_OPTS) $(GOFLAGS) -ldflags "$(GO_LDFLAGS)" -o aerc @@ -52,6 +52,10 @@ _!=grep -sqFx '$(build_cmd)' .aerc.d || rm -f .aerc.d aerc: $(GOSRC) .aerc.d $(build_cmd) +wrap: filters/wrap.go .aerc.d + $(GO) build $(BUILD_OPTS) $(GOFLAGS) -ldflags "$(GO_EXTRA_LDFLAGS)" \ + -o wrap filters/wrap.go + .PHONY: dev dev: $(MAKE) aerc BUILD_OPTS="-trimpath -race" @@ -102,7 +106,7 @@ RM?=rm -f clean: $(RM) $(DOCS) aerc -install: $(DOCS) aerc +install: $(DOCS) aerc wrap mkdir -m755 -p $(DESTDIR)$(BINDIR) $(DESTDIR)$(MANDIR)/man1 $(DESTDIR)$(MANDIR)/man5 $(DESTDIR)$(MANDIR)/man7 \ $(DESTDIR)$(SHAREDIR) $(DESTDIR)$(SHAREDIR)/filters $(DESTDIR)$(SHAREDIR)/templates $(DESTDIR)$(SHAREDIR)/stylesets \ $(DESTDIR)$(PREFIX)/share/applications @@ -130,6 +134,7 @@ install: $(DOCS) aerc install -m755 filters/html-unsafe $(DESTDIR)$(SHAREDIR)/filters/html-unsafe install -m755 filters/plaintext $(DESTDIR)$(SHAREDIR)/filters/plaintext install -m755 filters/show-ics-details.py $(DESTDIR)$(SHAREDIR)/filters/show-ics-details.py + install -m755 wrap $(DESTDIR)$(SHAREDIR)/filters/wrap install -m644 templates/new_message $(DESTDIR)$(SHAREDIR)/templates/new_message install -m644 templates/quoted_reply $(DESTDIR)$(SHAREDIR)/templates/quoted_reply install -m644 templates/forward_as_body $(DESTDIR)$(SHAREDIR)/templates/forward_as_body diff --git a/config/aerc.conf b/config/aerc.conf index ebd361ad..862b3be8 100644 --- a/config/aerc.conf +++ b/config/aerc.conf @@ -396,7 +396,7 @@ message/rfc822=colorize #application/x-sh=bat -fP -l sh #image/*=catimg -w $(tput cols) - #subject,~Git(hub|lab)=lolcat -f -#from,thatguywhodoesnothardwraphismessages=fmt -w 72 | colorize +#from,thatguywhodoesnothardwraphismessages=wrap -w 100 | colorize [openers] # diff --git a/doc/aerc-config.5.scd b/doc/aerc-config.5.scd index ac9a3a73..17a96f07 100644 --- a/doc/aerc-config.5.scd +++ b/doc/aerc-config.5.scd @@ -636,22 +636,19 @@ _text/plain_ ``` Wrap long lines at 100 characters, while not messing up nested quotes. - Not perfect, but works for most emails: + Handles format=flowed emails properly: ``` - text/plain=fmt -s -p ">>" -w 100 | fmt -s -p ">" -w 100 | fmt -s -w 100 | colorize + text/plain=wrap -w 100 | colorize ``` _from,<sender>_ Another example of hard wrapping lines of emails sent by a specific - person but using neovim which handles nested quotes without issues: + person. Explicitly reflow all paragraphs instead of only wrapping long + lines. This may break manual formatting in some messages: ``` - from,thatguywhoneverhardwrapshismessages=case "$AERC_SUBJECT" in \\ - \*PATCH\*) cat;; \\ - \*) nvim - -u NONE -es '+set ft=mail fo=tcqwn1j tw=80' \\ - '+:norm! gggqG' '+%print' '+:q!';; \\ - esac | colorize + from,thatguywhoneverhardwrapshismessages=wrap -r -w 72 | colorize ``` _subject,~<regexp>_ diff --git a/filters/wrap.go b/filters/wrap.go new file mode 100644 index 00000000..f1b335cc --- /dev/null +++ b/filters/wrap.go @@ -0,0 +1,267 @@ +package main + +import ( + "bufio" + "errors" + "flag" + "fmt" + "io" + "os" + "regexp" + "strings" + + "github.com/mattn/go-runewidth" +) + +type paragraph struct { + // email quote prefix, if any + quotes string + // list item indent, if any + leader string + // actual text of this paragraph + text string + // percentage of letters in text + proseRatio int + // text ends with a space + flowed bool + // paragraph is a list item + listItem bool +} + +func main() { + var err error + var width int + var reflow bool + var file string + var proseRatio int + var input *os.File + + fs := flag.NewFlagSet(os.Args[0], flag.ExitOnError) + fs.IntVar(&width, "w", 80, "preferred wrap margin") + fs.BoolVar(&reflow, "r", false, + "reflow all paragraphs even if no trailing space") + fs.IntVar(&proseRatio, "l", 50, + "minimum percentage of letters in a line to be considered a paragaph") + fs.StringVar(&file, "f", "", "read from file instead of stdin") + _ = fs.Parse(os.Args[1:]) + + if file != "" { + input, err = os.OpenFile(file, os.O_RDONLY, 0o644) + if err != nil { + goto end + } + } else { + input = os.Stdin + } + + err = wrap(input, os.Stdout, width, reflow, proseRatio) + +end: + if err != nil && !errors.Is(err, io.EOF) { + fmt.Fprintf(os.Stderr, "error: %s\n", err) + os.Exit(1) + } +} + +func wrap( + in io.Reader, out io.Writer, width int, reflow bool, proseRatio int, +) error { + var para *paragraph = nil + var line string + var err error + + if patchSubjectRe.MatchString(os.Getenv("AERC_SUBJECT")) { + // never reflow patches + _, err = io.Copy(out, in) + } else { + reader := bufio.NewReader(in) + line, err = reader.ReadString('\n') + for ; err == nil; line, err = reader.ReadString('\n') { + next := parse(line) + switch { + case para == nil: + para = next + case para.isContinuation(next, reflow, proseRatio): + para.join(next) + default: + para.write(out, width, proseRatio) + para = next + } + } + if para != nil { + para.write(out, width, proseRatio) + } + } + + return err +} + +// Parse a line of text into a paragraph structure +func parse(line string) *paragraph { + p := new(paragraph) + q := 0 + t := 0 + line = strings.TrimRight(line, "\r\n") + // tabs cause a whole lot of troubles, replace them with 8 spaces + line = strings.ReplaceAll(line, "\t", " ") + + // Use integer offsets to find relevant positions in the line + // + // > > > 2) blah blah blah blah + // ^--------+-----^ + // q | t + // end of quotes | start of text + // | + // list item leader + + // detect the end of quotes prefix if any + for q < len(line) && line[q] == '>' { + q += 1 + if q < len(line) && line[q] == ' ' { + q += 1 + } + } + + // detect list item leader + loc := listItemRe.FindStringIndex(line[q:]) + if loc != nil { + // start of list item + p.listItem = true + } else { + // maybe list item continuation + loc = leadingSpaceRe.FindStringIndex(line[q:]) + } + if loc != nil { + t = q + loc[1] + } else { + // no list at all + t = q + } + + // check if there is trailing whitespace, indicating format=flowed + loc = trailingSpaceRe.FindStringIndex(line[t:]) + if loc != nil { + p.flowed = true + // trim whitespace + line = line[:t+loc[0]] + } + + p.quotes = line[:q] + p.leader = strings.Repeat(" ", runewidth.StringWidth(line[q:t])) + p.text = line[q:] + + // compute the ratio of letters in the actual text + onlyLetters := strings.TrimLeft(line[q:], " ") + totalLen := runewidth.StringWidth(onlyLetters) + if totalLen == 0 { + // to avoid division by zero + totalLen = 1 + } + onlyLetters = notLetterRe.ReplaceAllLiteralString(onlyLetters, "") + p.proseRatio = 100 * runewidth.StringWidth(onlyLetters) / totalLen + + return p +} + +// Return true if a paragraph is a continuation of the current one. +func (p *paragraph) isContinuation( + next *paragraph, reflow bool, proseRatio int, +) bool { + switch { + case next.listItem: + // new list items always start a new paragraph + return false + case next.proseRatio < proseRatio || p.proseRatio < proseRatio: + // does not look like prose, maybe ascii art + return false + case next.quotes != p.quotes || next.leader != p.leader: + // quote level and/or list item leader have changed + return false + case len(strings.Trim(next.text, " ")) == 0: + // empty line + return false + case p.flowed: + // current paragraph has trailing space, indicating + // format=flowed + return true + case reflow: + // user forced paragraph reflow on the command line + return true + default: + return false + } +} + +// Join next paragraph into current one. +func (p *paragraph) join(next *paragraph) { + if p.text == "" { + p.text = next.text + } else { + p.text = p.text + " " + strings.Trim(next.text, " ") + } + p.proseRatio = (p.proseRatio + next.proseRatio) / 2 + p.flowed = next.flowed +} + +// Write a paragraph, wrapping at words boundaries. +// +// Only try to do word wrapping on things that look like prose. When the text +// contains too many non-letter characters, print it as-is. +func (p *paragraph) write(out io.Writer, margin int, proseRatio int) { + leader := "" + more := true + quotesWidth := runewidth.StringWidth(p.quotes) + for more { + var line string + width := quotesWidth + runewidth.StringWidth(leader) + remain := runewidth.StringWidth(p.text) + if width+remain <= margin || p.proseRatio < proseRatio { + // whole paragraph fits on a single line + line = p.text + p.text = "" + more = false + } else { + // find split point, preferably before margin + split := -1 + w := 0 + for i, r := range p.text { + w += runewidth.RuneWidth(r) + if width+w > margin && split != -1 { + break + } + if r == ' ' { + split = i + } + } + if split == -1 { + // no space found to split, print a long line + line = p.text + p.text = "" + more = false + } else { + line = p.text[:split] + // find start of next word + for split < len(p.text) && p.text[split] == ' ' { + split++ + } + if split < len(p.text) { + p.text = p.text[split:] + } else { + // only trailing whitespace, we're done + p.text = "" + more = false + } + } + } + fmt.Fprintf(out, "%s%s%s\n", p.quotes, leader, line) + leader = p.leader + } +} + +var ( + patchSubjectRe = regexp.MustCompile(`\bPATCH\b`) + listItemRe = regexp.MustCompile(`^\s*([\-\*\.]|\d{1,2}[\)\]\.])\s+`) + leadingSpaceRe = regexp.MustCompile(`^\s+`) + trailingSpaceRe = regexp.MustCompile(`\s+$`) + notLetterRe = regexp.MustCompile(`[^\pL]`) +) diff --git a/filters/wrap_test.go b/filters/wrap_test.go new file mode 100644 index 00000000..f8d82cbe --- /dev/null +++ b/filters/wrap_test.go @@ -0,0 +1,212 @@ +package main + +import ( + "bytes" + "errors" + "io" + "testing" +) + +type vector struct { + name string + in string + out string + width int + reflow bool + ratio int +} + +var vectors = []vector{ + { + name: "simple", + in: `long line that exceeds margin by many words +`, + width: 30, + reflow: false, + ratio: 50, + out: `long line that exceeds margin +by many words +`, + }, + { + name: "two-paragraphs", + in: `this is one long paragraph +this is another long one +`, + width: 20, + reflow: false, + ratio: 50, + out: `this is one long +paragraph +this is another +long one +`, + }, + { + name: "reflow", + in: `this is one long paragraph +this is another long one +`, + width: 20, + reflow: true, + ratio: 50, + out: `this is one long +paragraph this is +another long one +`, + }, + { + name: "quotes", + in: `Let's play with quotes: + +>> Hi there how are you doing? +> Great thanks + +How rude. + +>> Fantastic. Let's go wrap some words. +`, + width: 20, + reflow: false, + ratio: 50, + out: `Let's play with +quotes: + +>> Hi there how are +>> you doing? +> Great thanks + +How rude. + +>> Fantastic. Let's +>> go wrap some +>> words. +`, + }, + { + name: "ascii-art", + in: `This is a nice drawing, isn't it? + ++-------------------+ +| foobaz | ++-------------------+ + | + | ++-------------------+ +| foobar | ++-------------------+ +`, + width: 15, + ratio: 50, + reflow: true, + out: `This is a nice +drawing, isn't +it? + ++-------------------+ +| foobaz | ++-------------------+ + | + | ++-------------------+ +| foobar | ++-------------------+ +`, + }, + { + name: "list-items", + in: `Shopping list: + + - milk + - chocolate + - cookies (please, with nuts) +`, + width: 20, + reflow: false, + ratio: 50, + out: `Shopping list: + + - milk + - chocolate + - cookies + (please, with + nuts) +`, + }, + { + name: "list-items-reflow", + in: `Shopping list: + + * milk + * chocolate + * cookies + (please, + with nuts) +`, + width: 100, + reflow: true, + ratio: 30, + out: `Shopping list: + + * milk + * chocolate + * cookies (please, with nuts) +`, + }, + { + name: "long-url", + in: `Please follow this ugly link: +http://foobaz.org/xapapzolmkdmldfk-fldskjflsk-cisjoij/onoes.jsp?xxx=2&yyy=3 +`, + width: 20, + reflow: true, + ratio: 50, + out: `Please follow this +ugly link: +http://foobaz.org/xapapzolmkdmldfk-fldskjflsk-cisjoij/onoes.jsp?xxx=2&yyy=3 +`, + }, + { + name: "format=flowed", + in: "Oh, \nI'm \nso \nhip \nI \nuse \nformat=flowed.\n", + width: 30, + reflow: false, + ratio: 50, + out: "Oh, I'm so hip I use\nformat=flowed.\n", + }, + { + name: "non-ascii", + in: `Lorem ççççç ççççç ççç ççççç çç ççç ççççç çççççççç ççç çç ççççç ççççççççççç ççççç + +Lorem жжжжж жжжжж жжж жжжжж жж жжж жжжжж жжжжжжжж жжж жж жжжжж жжжжжжжжжжж жжжжж жжжжжжжж +`, + width: 40, + reflow: false, + ratio: 50, + out: `Lorem ççççç ççççç ççç ççççç çç ççç +ççççç çççççççç ççç çç ççççç ççççççççççç +ççççç + +Lorem жжжжж жжжжж жжж жжжжж жж жжж +жжжжж жжжжжжжж жжж жж жжжжж жжжжжжжжжжж +жжжжж жжжжжжжж +`, + }, +} + +func TestWrap(t *testing.T) { + for _, vec := range vectors { + t.Run(vec.name, func(t *testing.T) { + r := bytes.NewReader([]byte(vec.in)) + var buf bytes.Buffer + err := wrap(r, &buf, vec.width, vec.reflow, vec.ratio) + if err != nil && !errors.Is(err, io.EOF) { + t.Fatalf("[%s]: %v", vec.name, err) + } + if buf.String() != vec.out { + t.Errorf("[%s] invalid format:\n%q\nexpected\n%q", + vec.name, buf.String(), vec.out) + } + }) + } +} |