aboutsummaryrefslogtreecommitdiffstats
path: root/lib/rfc822
diff options
context:
space:
mode:
authorRobin Jarry <robin@jarry.cc>2023-10-17 14:40:08 +0200
committerRobin Jarry <robin@jarry.cc>2023-10-28 19:24:55 +0200
commit57088312fdd8e602a084bd5736a0e22a34be9ec0 (patch)
tree8c5544262cf8c1772ec661748cfa4d5491ff4c77 /lib/rfc822
parent591659b52867cb118d1f82d41693a02123935e0c (diff)
downloadaerc-57088312fdd8e602a084bd5736a0e22a34be9ec0.tar.gz
worker: move shared code to lib
Avoid importing code from worker/lib into lib. It should only be the other way around. Move the message parsing code used by maildir, notmuch, mbox and the eml viewer into a lib/rfc822 package. Adapt imports accordingly. Signed-off-by: Robin Jarry <robin@jarry.cc> Reviewed-by: Koni Marti <koni.marti@gmail.com> Tested-by: Moritz Poldrack <moritz@poldrack.dev> Tested-by: Inwit <inwit@sindominio.net>
Diffstat (limited to 'lib/rfc822')
-rw-r--r--lib/rfc822/message.go383
-rw-r--r--lib/rfc822/message_test.go84
-rw-r--r--lib/rfc822/testdata/message/invalid/hexa26
-rw-r--r--lib/rfc822/testdata/message/valid/quoted-mime-type45
4 files changed, 538 insertions, 0 deletions
diff --git a/lib/rfc822/message.go b/lib/rfc822/message.go
new file mode 100644
index 00000000..979d4595
--- /dev/null
+++ b/lib/rfc822/message.go
@@ -0,0 +1,383 @@
+package rfc822
+
+import (
+ "bufio"
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "regexp"
+ "strings"
+ "time"
+
+ "git.sr.ht/~rjarry/aerc/lib/parse"
+ "git.sr.ht/~rjarry/aerc/log"
+ "git.sr.ht/~rjarry/aerc/models"
+ "github.com/emersion/go-message"
+ _ "github.com/emersion/go-message/charset"
+ "github.com/emersion/go-message/mail"
+)
+
+// RFC 1123Z regexp
+var dateRe = regexp.MustCompile(`(((Mon|Tue|Wed|Thu|Fri|Sat|Sun))[,]?\s[0-9]{1,2})\s` +
+ `(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s` +
+ `([0-9]{4})\s([0-9]{2}):([0-9]{2})(:([0-9]{2}))?\s([\+|\-][0-9]{4})`)
+
+func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) {
+ if len(index) == 0 {
+ // non multipart, simply return everything
+ return bufReader(e)
+ }
+ if mpr := e.MultipartReader(); mpr != nil {
+ idx := 0
+ for {
+ idx++
+ part, err := mpr.NextPart()
+ if err != nil {
+ return nil, err
+ }
+ if idx == index[0] {
+ rest := index[1:]
+ if len(rest) < 1 {
+ return bufReader(part)
+ }
+ return FetchEntityPartReader(part, index[1:])
+ }
+ }
+ }
+ return nil, fmt.Errorf("FetchEntityPartReader: unexpected code reached")
+}
+
+// TODO: the UI doesn't seem to like readers which aren't buffers
+func bufReader(e *message.Entity) (io.Reader, error) {
+ var buf bytes.Buffer
+ if _, err := io.Copy(&buf, e.Body); err != nil {
+ return nil, err
+ }
+ return &buf, nil
+}
+
+// split a MIME type into its major and minor parts
+func splitMIME(m string) (string, string) {
+ parts := strings.Split(m, "/")
+ if len(parts) != 2 {
+ return parts[0], ""
+ }
+ return parts[0], parts[1]
+}
+
+func fixContentType(h message.Header) (string, map[string]string) {
+ ct, rest := h.Get("Content-Type"), ""
+ if i := strings.Index(ct, ";"); i > 0 {
+ ct, rest = ct[:i], ct[i:]
+ }
+
+ // check if there are quotes around the content type
+ if strings.Contains(ct, "\"") {
+ header := strings.ReplaceAll(ct, "\"", "")
+ if rest != "" {
+ header += rest
+ }
+ h.Set("Content-Type", header)
+ if contenttype, params, err := h.ContentType(); err == nil {
+ return contenttype, params
+ }
+ }
+
+ // if all else fails, return text/plain
+ return "text/plain", nil
+}
+
+func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
+ var body models.BodyStructure
+ contentType, ctParams, err := e.Header.ContentType()
+ if err != nil {
+ // try to fix the error; if all measures fail, then return a
+ // text/plain content type to display at least plaintext
+ contentType, ctParams = fixContentType(e.Header)
+ }
+
+ mimeType, mimeSubType := splitMIME(contentType)
+ body.MIMEType = mimeType
+ body.MIMESubType = mimeSubType
+ body.Params = ctParams
+ body.Description = e.Header.Get("content-description")
+ body.Encoding = e.Header.Get("content-transfer-encoding")
+ if cd := e.Header.Get("content-disposition"); cd != "" {
+ contentDisposition, cdParams, err := e.Header.ContentDisposition()
+ if err != nil {
+ return nil, fmt.Errorf("could not parse content disposition: %w", err)
+ }
+ body.Disposition = contentDisposition
+ body.DispositionParams = cdParams
+ }
+ body.Parts = []*models.BodyStructure{}
+ if mpr := e.MultipartReader(); mpr != nil {
+ for {
+ part, err := mpr.NextPart()
+ if errors.Is(err, io.EOF) {
+ return &body, nil
+ } else if err != nil {
+ return nil, err
+ }
+ ps, err := ParseEntityStructure(part)
+ if err != nil {
+ return nil, fmt.Errorf("could not parse child entity structure: %w", err)
+ }
+ body.Parts = append(body.Parts, ps)
+ }
+ }
+ return &body, nil
+}
+
+var DateParseError = errors.New("date parsing failed")
+
+func parseEnvelope(h *mail.Header) (*models.Envelope, error) {
+ from, err := parseAddressList(h, "from")
+ if err != nil {
+ return nil, fmt.Errorf("could not read from address: %w", err)
+ }
+ to, err := parseAddressList(h, "to")
+ if err != nil {
+ return nil, fmt.Errorf("could not read to address: %w", err)
+ }
+ cc, err := parseAddressList(h, "cc")
+ if err != nil {
+ return nil, fmt.Errorf("could not read cc address: %w", err)
+ }
+ bcc, err := parseAddressList(h, "bcc")
+ if err != nil {
+ return nil, fmt.Errorf("could not read bcc address: %w", err)
+ }
+ replyTo, err := parseAddressList(h, "reply-to")
+ if err != nil {
+ return nil, fmt.Errorf("could not read reply-to address: %w", err)
+ }
+ subj, err := h.Subject()
+ if err != nil {
+ return nil, fmt.Errorf("could not read subject: %w", err)
+ }
+ msgID, err := h.MessageID()
+ if err != nil {
+ // proper parsing failed, so fall back to whatever is there
+ msgID, err = h.Text("message-id")
+ if err != nil {
+ return nil, err
+ }
+ }
+ var irt string
+ irtList := parse.MsgIDList(h, "in-reply-to")
+ if len(irtList) > 0 {
+ irt = irtList[0]
+ }
+ date, err := parseDate(h)
+ if err != nil {
+ // still return a valid struct plus a sentinel date parsing error
+ // if only the date parsing failed
+ err = fmt.Errorf("%w: %v", DateParseError, err) //nolint:errorlint // can only use %w once
+ }
+ return &models.Envelope{
+ Date: date,
+ Subject: subj,
+ MessageId: msgID,
+ From: from,
+ ReplyTo: replyTo,
+ To: to,
+ Cc: cc,
+ Bcc: bcc,
+ InReplyTo: irt,
+ }, err
+}
+
+// parseDate tries to parse the date from the Date header with non std formats
+// if this fails it tries to parse the received header as well
+func parseDate(h *mail.Header) (time.Time, error) {
+ t, err := h.Date()
+ if err == nil {
+ return t, nil
+ }
+ text, err := h.Text("date")
+ // sometimes, no error occurs but the date is empty.
+ // In this case, guess time from received header field
+ if err != nil || text == "" {
+ t, err := parseReceivedHeader(h)
+ if err == nil {
+ return t, nil
+ }
+ }
+ layouts := []string{
+ // X-Mailer: EarthLink Zoo Mail 1.0
+ "Mon, _2 Jan 2006 15:04:05 -0700 (GMT-07:00)",
+ }
+ for _, layout := range layouts {
+ if t, err := time.Parse(layout, text); err == nil {
+ return t, nil
+ }
+ }
+ // still no success, try the received header as a last resort
+ t, err = parseReceivedHeader(h)
+ if err != nil {
+ return time.Time{}, fmt.Errorf("unrecognized date format: %s", text)
+ }
+ return t, nil
+}
+
+func parseReceivedHeader(h *mail.Header) (time.Time, error) {
+ guess, err := h.Text("received")
+ if err != nil {
+ return time.Time{}, fmt.Errorf("received header not parseable: %w",
+ err)
+ }
+ return time.Parse(time.RFC1123Z, dateRe.FindString(guess))
+}
+
+func parseAddressList(h *mail.Header, key string) ([]*mail.Address, error) {
+ hdr, err := h.Text(key)
+ if err != nil && !message.IsUnknownCharset(err) {
+ return nil, err
+ }
+ if hdr == "" {
+ return nil, nil
+ }
+ add, err := mail.ParseAddressList(hdr)
+ if err != nil {
+ return []*mail.Address{{Name: hdr}}, nil
+ }
+ return add, err
+}
+
+// RawMessage is an interface that describes a raw message
+type RawMessage interface {
+ NewReader() (io.ReadCloser, error)
+ ModelFlags() (models.Flags, error)
+ Labels() ([]string, error)
+ UID() uint32
+}
+
+// MessageInfo populates a models.MessageInfo struct for the message.
+// based on the reader returned by NewReader
+func MessageInfo(raw RawMessage) (*models.MessageInfo, error) {
+ var parseErr error
+ r, err := raw.NewReader()
+ if err != nil {
+ return nil, err
+ }
+ defer r.Close()
+ msg, err := ReadMessage(r)
+ if err != nil {
+ return nil, fmt.Errorf("could not read message: %w", err)
+ }
+ bs, err := ParseEntityStructure(msg)
+ if errors.As(err, new(message.UnknownEncodingError)) {
+ parseErr = err
+ } else if err != nil {
+ return nil, fmt.Errorf("could not get structure: %w", err)
+ }
+ h := &mail.Header{Header: msg.Header}
+ env, err := parseEnvelope(h)
+ if err != nil && !errors.Is(err, DateParseError) {
+ return nil, fmt.Errorf("could not parse envelope: %w", err)
+ // if only the date parsing failed we still get the rest of the
+ // envelop structure in a valid state.
+ // Date parsing errors are fairly common and it's better to be
+ // slightly off than to not be able to read the mails at all
+ // hence we continue here
+ }
+ recDate, _ := parseReceivedHeader(h)
+ if recDate.IsZero() {
+ // better than nothing, if incorrect
+ recDate = env.Date
+ }
+ flags, err := raw.ModelFlags()
+ if err != nil {
+ return nil, err
+ }
+ labels, err := raw.Labels()
+ if err != nil {
+ return nil, err
+ }
+ return &models.MessageInfo{
+ BodyStructure: bs,
+ Envelope: env,
+ Flags: flags,
+ Labels: labels,
+ InternalDate: recDate,
+ RFC822Headers: h,
+ Size: 0,
+ Uid: raw.UID(),
+ Error: parseErr,
+ }, nil
+}
+
+// MessageHeaders populates a models.MessageInfo struct for the message.
+// based on the reader returned by NewReader. Minimal information is included.
+// There is no body structure or RFC822Headers set
+func MessageHeaders(raw RawMessage) (*models.MessageInfo, error) {
+ var parseErr error
+ r, err := raw.NewReader()
+ if err != nil {
+ return nil, err
+ }
+ defer r.Close()
+ msg, err := ReadMessage(r)
+ if err != nil {
+ return nil, fmt.Errorf("could not read message: %w", err)
+ }
+ h := &mail.Header{Header: msg.Header}
+ env, err := parseEnvelope(h)
+ if err != nil && !errors.Is(err, DateParseError) {
+ return nil, fmt.Errorf("could not parse envelope: %w", err)
+ // if only the date parsing failed we still get the rest of the
+ // envelop structure in a valid state.
+ // Date parsing errors are fairly common and it's better to be
+ // slightly off than to not be able to read the mails at all
+ // hence we continue here
+ }
+ recDate, _ := parseReceivedHeader(h)
+ if recDate.IsZero() {
+ // better than nothing, if incorrect
+ recDate = env.Date
+ }
+ flags, err := raw.ModelFlags()
+ if err != nil {
+ return nil, err
+ }
+ labels, err := raw.Labels()
+ if err != nil {
+ return nil, err
+ }
+ return &models.MessageInfo{
+ Envelope: env,
+ Flags: flags,
+ Labels: labels,
+ InternalDate: recDate,
+ Refs: parse.MsgIDList(h, "references"),
+ Size: 0,
+ Uid: raw.UID(),
+ Error: parseErr,
+ }, nil
+}
+
+// NewCRLFReader returns a reader with CRLF line endings
+func NewCRLFReader(r io.Reader) io.Reader {
+ var buf bytes.Buffer
+ scanner := bufio.NewScanner(r)
+ for scanner.Scan() {
+ buf.WriteString(scanner.Text() + "\r\n")
+ }
+ return &buf
+}
+
+// ReadMessage is a wrapper for the message.Read function to read a message
+// from r. The message's encoding and charset are automatically decoded to
+// UTF-8. If an unknown charset is encountered, the error is logged but a nil
+// error is returned since the entity object can still be read.
+func ReadMessage(r io.Reader) (*message.Entity, error) {
+ entity, err := message.Read(r)
+ if message.IsUnknownCharset(err) {
+ log.Warnf("unknown charset encountered")
+ } else if err != nil {
+ return nil, fmt.Errorf("could not read message: %w", err)
+ }
+ return entity, nil
+}
diff --git a/lib/rfc822/message_test.go b/lib/rfc822/message_test.go
new file mode 100644
index 00000000..8730afe2
--- /dev/null
+++ b/lib/rfc822/message_test.go
@@ -0,0 +1,84 @@
+package rfc822_test
+
+import (
+ "io"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "git.sr.ht/~rjarry/aerc/lib/rfc822"
+ "git.sr.ht/~rjarry/aerc/models"
+)
+
+func TestMessageInfoParser(t *testing.T) {
+ rootDir := "testdata/message/valid"
+ msgFiles, err := os.ReadDir(rootDir)
+ die(err)
+
+ for _, fi := range msgFiles {
+ if fi.IsDir() {
+ continue
+ }
+
+ p := fi.Name()
+ t.Run(p, func(t *testing.T) {
+ m := newMockRawMessageFromPath(filepath.Join(rootDir, p))
+ mi, err := rfc822.MessageInfo(m)
+ if err != nil {
+ t.Fatal("Failed to create MessageInfo with:", err)
+ }
+
+ if perr := mi.Error; perr != nil {
+ t.Fatal("Expected no parsing error, but got:", mi.Error)
+ }
+ })
+ }
+}
+
+func TestMessageInfoHandledError(t *testing.T) {
+ rootDir := "testdata/message/invalid"
+ msgFiles, err := os.ReadDir(rootDir)
+ die(err)
+
+ for _, fi := range msgFiles {
+ if fi.IsDir() {
+ continue
+ }
+
+ p := fi.Name()
+ t.Run(p, func(t *testing.T) {
+ m := newMockRawMessageFromPath(filepath.Join(rootDir, p))
+ mi, err := rfc822.MessageInfo(m)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if perr := mi.Error; perr == nil {
+ t.Fatal("Expected MessageInfo.Error, got none")
+ }
+ })
+ }
+}
+
+type mockRawMessage struct {
+ path string
+}
+
+func newMockRawMessageFromPath(p string) *mockRawMessage {
+ return &mockRawMessage{
+ path: p,
+ }
+}
+
+func (m *mockRawMessage) NewReader() (io.ReadCloser, error) {
+ return os.Open(m.path)
+}
+func (m *mockRawMessage) ModelFlags() (models.Flags, error) { return 0, nil }
+func (m *mockRawMessage) Labels() ([]string, error) { return nil, nil }
+func (m *mockRawMessage) UID() uint32 { return 0 }
+
+func die(err error) {
+ if err != nil {
+ panic(err)
+ }
+}
diff --git a/lib/rfc822/testdata/message/invalid/hexa b/lib/rfc822/testdata/message/invalid/hexa
new file mode 100644
index 00000000..56b352ff
--- /dev/null
+++ b/lib/rfc822/testdata/message/invalid/hexa
@@ -0,0 +1,26 @@
+Subject: Confirmation Needed gUdVJQBhsd
+Content-Type: multipart/mixed; boundary="Nextpart_1Q2YJhd197991794467076Pgfa"
+To: <BORK@example.com>
+From: ""REGISTRAR"" <zdglopi-1Q2YJhd-noReply@example.com>
+
+--Nextpart_1Q2YJhd197991794467076Pgfa
+Content-Type: multipart/parallel; boundary="sg54sd54g54sdg54"
+
+--sg54sd54g54sdg54
+Content-Type: multipart/alternative; boundary="54qgf54q546f46qsf46qsf"
+
+--54qgf54q546f46qsf46qsf
+Content-Type: text/plain; charset=utf-8
+Content-Transfer-Encoding: Hexa
+
+
+
+--54qgf54q546f46qsf46qsf
+Content-Type: text/html; charset=utf-8
+
+
+<CeNteR><a hRef="https://example.com-ap-southeast-example.com.com/example.com#qs=r-acacaeehdiebadgdhgghcaegckhabababaggacihaccajfbacccgaehhbkacb"><b><h2>Congratulations Netflix Customer!</h2></b></a><br>
+<HeaD>
+<ObJECT>
+
+--Nextpart_1Q2YJhd197991794467076Pgfa--
diff --git a/lib/rfc822/testdata/message/valid/quoted-mime-type b/lib/rfc822/testdata/message/valid/quoted-mime-type
new file mode 100644
index 00000000..d9af28a2
--- /dev/null
+++ b/lib/rfc822/testdata/message/valid/quoted-mime-type
@@ -0,0 +1,45 @@
+Subject: Your ECOLINES tickets
+X-PHP-Originating-Script: 33:functions.inc.php
+From: ECOLINES <ecolines@ecolines.lv>
+Content-Type: multipart/mixed;
+ boundary="PHP-mixed-ba319678ca12656cfb8cd46e736ce09d"
+Message-Id: <E1nvIQS-0004tm-Bc@legacy.ecolines.net>
+Date: Sun, 29 May 2022 15:53:44 +0300
+
+--PHP-mixed-ba319678ca12656cfb8cd46e736ce09d
+Content-Type: multipart/alternative; boundary="PHP-alt-ba319678ca12656cfb8cd46e736ce09d"
+
+--PHP-alt-ba319678ca12656cfb8cd46e736ce09d
+Content-Type: text/plain; charset="UTF-8"
+Content-Transfer-Encoding: 7bit
+
+Your tickets are attached to this message. Also You can print out Your tickets from our website www.ecolines.net<b
+r />
+…
+
+--PHP-alt-ba319678ca12656cfb8cd46e736ce09d
+Content-Type: text/html; charset="UTF-8"
+Content-Transfer-Encoding: 7bit
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+…
+
+--PHP-alt-ba319678ca12656cfb8cd46e736ce09d--
+
+--PHP-mixed-ba319678ca12656cfb8cd46e736ce09d
+Content-Type: "application/pdf"; name="17634428.pdf"
+Content-Disposition: attachment; filename="17634428.pdf"
+Content-Transfer-Encoding: base64
+
+JVBERi0xLjQKMSAwIG9iago8PAovVGl0bGUgKP7/AFkAbwB1AHIAIAB0AGkAYwBrAGUAdCkKL0Ny
+…
+
+--PHP-mixed-ba319678ca12656cfb8cd46e736ce09d
+Content-Type: "application/pdf"; name="invoice-6385490.pdf"
+Content-Disposition: attachment; filename="invoice-6385490.pdf"
+Content-Transfer-Encoding: base64
+
+JVBERi0xLjQKMSAwIG9iago8PAovVGl0bGUgKP7/AEkAbgB2AG8AaQBjAGUpCi9DcmVhdG9yICj+
+…
+
+--PHP-mixed-ba319678ca12656cfb8cd46e736ce09d--