aboutsummaryrefslogtreecommitdiffstats
path: root/worker/lib/parse.go
diff options
context:
space:
mode:
Diffstat (limited to 'worker/lib/parse.go')
-rw-r--r--worker/lib/parse.go415
1 files changed, 0 insertions, 415 deletions
diff --git a/worker/lib/parse.go b/worker/lib/parse.go
deleted file mode 100644
index 8e20e904..00000000
--- a/worker/lib/parse.go
+++ /dev/null
@@ -1,415 +0,0 @@
-package lib
-
-import (
- "bufio"
- "bytes"
- "errors"
- "fmt"
- "io"
- "os"
- "regexp"
- "strings"
- "time"
-
- "git.sr.ht/~rjarry/aerc/lib/parse"
- "git.sr.ht/~rjarry/aerc/log"
- "git.sr.ht/~rjarry/aerc/models"
- "github.com/emersion/go-message"
- _ "github.com/emersion/go-message/charset"
- "github.com/emersion/go-message/mail"
-)
-
-// RFC 1123Z regexp
-var dateRe = regexp.MustCompile(`(((Mon|Tue|Wed|Thu|Fri|Sat|Sun))[,]?\s[0-9]{1,2})\s` +
- `(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s` +
- `([0-9]{4})\s([0-9]{2}):([0-9]{2})(:([0-9]{2}))?\s([\+|\-][0-9]{4})`)
-
-func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) {
- if len(index) == 0 {
- // non multipart, simply return everything
- return bufReader(e)
- }
- if mpr := e.MultipartReader(); mpr != nil {
- idx := 0
- for {
- idx++
- part, err := mpr.NextPart()
- if err != nil {
- return nil, err
- }
- if idx == index[0] {
- rest := index[1:]
- if len(rest) < 1 {
- return bufReader(part)
- }
- return FetchEntityPartReader(part, index[1:])
- }
- }
- }
- return nil, fmt.Errorf("FetchEntityPartReader: unexpected code reached")
-}
-
-// TODO: the UI doesn't seem to like readers which aren't buffers
-func bufReader(e *message.Entity) (io.Reader, error) {
- var buf bytes.Buffer
- if _, err := io.Copy(&buf, e.Body); err != nil {
- return nil, err
- }
- return &buf, nil
-}
-
-// split a MIME type into its major and minor parts
-func splitMIME(m string) (string, string) {
- parts := strings.Split(m, "/")
- if len(parts) != 2 {
- return parts[0], ""
- }
- return parts[0], parts[1]
-}
-
-func fixContentType(h message.Header) (string, map[string]string) {
- ct, rest := h.Get("Content-Type"), ""
- if i := strings.Index(ct, ";"); i > 0 {
- ct, rest = ct[:i], ct[i:]
- }
-
- // check if there are quotes around the content type
- if strings.Contains(ct, "\"") {
- header := strings.ReplaceAll(ct, "\"", "")
- if rest != "" {
- header += rest
- }
- h.Set("Content-Type", header)
- if contenttype, params, err := h.ContentType(); err == nil {
- return contenttype, params
- }
- }
-
- // if all else fails, return text/plain
- return "text/plain", nil
-}
-
-func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
- var body models.BodyStructure
- contentType, ctParams, err := e.Header.ContentType()
- if err != nil {
- // try to fix the error; if all measures fail, then return a
- // text/plain content type to display at least plaintext
- contentType, ctParams = fixContentType(e.Header)
- }
-
- mimeType, mimeSubType := splitMIME(contentType)
- body.MIMEType = mimeType
- body.MIMESubType = mimeSubType
- body.Params = ctParams
- body.Description = e.Header.Get("content-description")
- body.Encoding = e.Header.Get("content-transfer-encoding")
- if cd := e.Header.Get("content-disposition"); cd != "" {
- contentDisposition, cdParams, err := e.Header.ContentDisposition()
- if err != nil {
- return nil, fmt.Errorf("could not parse content disposition: %w", err)
- }
- body.Disposition = contentDisposition
- body.DispositionParams = cdParams
- }
- body.Parts = []*models.BodyStructure{}
- if mpr := e.MultipartReader(); mpr != nil {
- for {
- part, err := mpr.NextPart()
- if errors.Is(err, io.EOF) {
- return &body, nil
- } else if err != nil {
- return nil, err
- }
- ps, err := ParseEntityStructure(part)
- if err != nil {
- return nil, fmt.Errorf("could not parse child entity structure: %w", err)
- }
- body.Parts = append(body.Parts, ps)
- }
- }
- return &body, nil
-}
-
-var DateParseError = errors.New("date parsing failed")
-
-func parseEnvelope(h *mail.Header) (*models.Envelope, error) {
- from, err := parseAddressList(h, "from")
- if err != nil {
- return nil, fmt.Errorf("could not read from address: %w", err)
- }
- to, err := parseAddressList(h, "to")
- if err != nil {
- return nil, fmt.Errorf("could not read to address: %w", err)
- }
- cc, err := parseAddressList(h, "cc")
- if err != nil {
- return nil, fmt.Errorf("could not read cc address: %w", err)
- }
- bcc, err := parseAddressList(h, "bcc")
- if err != nil {
- return nil, fmt.Errorf("could not read bcc address: %w", err)
- }
- replyTo, err := parseAddressList(h, "reply-to")
- if err != nil {
- return nil, fmt.Errorf("could not read reply-to address: %w", err)
- }
- subj, err := h.Subject()
- if err != nil {
- return nil, fmt.Errorf("could not read subject: %w", err)
- }
- msgID, err := h.MessageID()
- if err != nil {
- // proper parsing failed, so fall back to whatever is there
- msgID, err = h.Text("message-id")
- if err != nil {
- return nil, err
- }
- }
- var irt string
- irtList := parse.MsgIDList(h, "in-reply-to")
- if len(irtList) > 0 {
- irt = irtList[0]
- }
- date, err := parseDate(h)
- if err != nil {
- // still return a valid struct plus a sentinel date parsing error
- // if only the date parsing failed
- err = fmt.Errorf("%w: %v", DateParseError, err) //nolint:errorlint // can only use %w once
- }
- return &models.Envelope{
- Date: date,
- Subject: subj,
- MessageId: msgID,
- From: from,
- ReplyTo: replyTo,
- To: to,
- Cc: cc,
- Bcc: bcc,
- InReplyTo: irt,
- }, err
-}
-
-// parseDate tries to parse the date from the Date header with non std formats
-// if this fails it tries to parse the received header as well
-func parseDate(h *mail.Header) (time.Time, error) {
- t, err := h.Date()
- if err == nil {
- return t, nil
- }
- text, err := h.Text("date")
- // sometimes, no error occurs but the date is empty.
- // In this case, guess time from received header field
- if err != nil || text == "" {
- t, err := parseReceivedHeader(h)
- if err == nil {
- return t, nil
- }
- }
- layouts := []string{
- // X-Mailer: EarthLink Zoo Mail 1.0
- "Mon, _2 Jan 2006 15:04:05 -0700 (GMT-07:00)",
- }
- for _, layout := range layouts {
- if t, err := time.Parse(layout, text); err == nil {
- return t, nil
- }
- }
- // still no success, try the received header as a last resort
- t, err = parseReceivedHeader(h)
- if err != nil {
- return time.Time{}, fmt.Errorf("unrecognized date format: %s", text)
- }
- return t, nil
-}
-
-func parseReceivedHeader(h *mail.Header) (time.Time, error) {
- guess, err := h.Text("received")
- if err != nil {
- return time.Time{}, fmt.Errorf("received header not parseable: %w",
- err)
- }
- return time.Parse(time.RFC1123Z, dateRe.FindString(guess))
-}
-
-func parseAddressList(h *mail.Header, key string) ([]*mail.Address, error) {
- hdr, err := h.Text(key)
- if err != nil && !message.IsUnknownCharset(err) {
- return nil, err
- }
- if hdr == "" {
- return nil, nil
- }
- add, err := mail.ParseAddressList(hdr)
- if err != nil {
- return []*mail.Address{{Name: hdr}}, nil
- }
- return add, err
-}
-
-// RawMessage is an interface that describes a raw message
-type RawMessage interface {
- NewReader() (io.ReadCloser, error)
- ModelFlags() (models.Flags, error)
- Labels() ([]string, error)
- UID() uint32
-}
-
-// MessageInfo populates a models.MessageInfo struct for the message.
-// based on the reader returned by NewReader
-func MessageInfo(raw RawMessage) (*models.MessageInfo, error) {
- var parseErr error
- r, err := raw.NewReader()
- if err != nil {
- return nil, err
- }
- defer r.Close()
- msg, err := ReadMessage(r)
- if err != nil {
- return nil, fmt.Errorf("could not read message: %w", err)
- }
- bs, err := ParseEntityStructure(msg)
- if errors.As(err, new(message.UnknownEncodingError)) {
- parseErr = err
- } else if err != nil {
- return nil, fmt.Errorf("could not get structure: %w", err)
- }
- h := &mail.Header{Header: msg.Header}
- env, err := parseEnvelope(h)
- if err != nil && !errors.Is(err, DateParseError) {
- return nil, fmt.Errorf("could not parse envelope: %w", err)
- // if only the date parsing failed we still get the rest of the
- // envelop structure in a valid state.
- // Date parsing errors are fairly common and it's better to be
- // slightly off than to not be able to read the mails at all
- // hence we continue here
- }
- recDate, _ := parseReceivedHeader(h)
- if recDate.IsZero() {
- // better than nothing, if incorrect
- recDate = env.Date
- }
- flags, err := raw.ModelFlags()
- if err != nil {
- return nil, err
- }
- labels, err := raw.Labels()
- if err != nil {
- return nil, err
- }
- return &models.MessageInfo{
- BodyStructure: bs,
- Envelope: env,
- Flags: flags,
- Labels: labels,
- InternalDate: recDate,
- RFC822Headers: h,
- Size: 0,
- Uid: raw.UID(),
- Error: parseErr,
- }, nil
-}
-
-// LimitHeaders returns a new Header with the specified headers included or
-// excluded
-func LimitHeaders(hdr *mail.Header, fields []string, exclude bool) *mail.Header {
- fieldMap := make(map[string]struct{}, len(fields))
- for _, f := range fields {
- fieldMap[strings.ToLower(f)] = struct{}{}
- }
- nh := &mail.Header{}
- curFields := hdr.Fields()
- for curFields.Next() {
- key := strings.ToLower(curFields.Key())
- _, present := fieldMap[key]
- // XOR exclude and present. When they are equal, it means we
- // should not add the header to the new header struct
- if exclude == present {
- continue
- }
- nh.Add(key, curFields.Value())
- }
- return nh
-}
-
-// MessageHeaders populates a models.MessageInfo struct for the message.
-// based on the reader returned by NewReader. Minimal information is included.
-// There is no body structure or RFC822Headers set
-func MessageHeaders(raw RawMessage) (*models.MessageInfo, error) {
- var parseErr error
- r, err := raw.NewReader()
- if err != nil {
- return nil, err
- }
- defer r.Close()
- msg, err := ReadMessage(r)
- if err != nil {
- return nil, fmt.Errorf("could not read message: %w", err)
- }
- h := &mail.Header{Header: msg.Header}
- env, err := parseEnvelope(h)
- if err != nil && !errors.Is(err, DateParseError) {
- return nil, fmt.Errorf("could not parse envelope: %w", err)
- // if only the date parsing failed we still get the rest of the
- // envelop structure in a valid state.
- // Date parsing errors are fairly common and it's better to be
- // slightly off than to not be able to read the mails at all
- // hence we continue here
- }
- recDate, _ := parseReceivedHeader(h)
- if recDate.IsZero() {
- // better than nothing, if incorrect
- recDate = env.Date
- }
- flags, err := raw.ModelFlags()
- if err != nil {
- return nil, err
- }
- labels, err := raw.Labels()
- if err != nil {
- return nil, err
- }
- return &models.MessageInfo{
- Envelope: env,
- Flags: flags,
- Labels: labels,
- InternalDate: recDate,
- Refs: parse.MsgIDList(h, "references"),
- Size: 0,
- Uid: raw.UID(),
- Error: parseErr,
- }, nil
-}
-
-// NewCRLFReader returns a reader with CRLF line endings
-func NewCRLFReader(r io.Reader) io.Reader {
- var buf bytes.Buffer
- scanner := bufio.NewScanner(r)
- for scanner.Scan() {
- buf.WriteString(scanner.Text() + "\r\n")
- }
- return &buf
-}
-
-// ReadMessage is a wrapper for the message.Read function to read a message
-// from r. The message's encoding and charset are automatically decoded to
-// UTF-8. If an unknown charset is encountered, the error is logged but a nil
-// error is returned since the entity object can still be read.
-func ReadMessage(r io.Reader) (*message.Entity, error) {
- entity, err := message.Read(r)
- if message.IsUnknownCharset(err) {
- log.Warnf("unknown charset encountered")
- } else if err != nil {
- return nil, fmt.Errorf("could not read message: %w", err)
- }
- return entity, nil
-}
-
-// FileSize returns the size of the file specified by name
-func FileSize(name string) (uint32, error) {
- fileInfo, err := os.Stat(name)
- if err != nil {
- return 0, fmt.Errorf("failed to obtain fileinfo: %w", err)
- }
- return uint32(fileInfo.Size()), nil
-}