From 57088312fdd8e602a084bd5736a0e22a34be9ec0 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Tue, 17 Oct 2023 14:40:08 +0200 Subject: worker: move shared code to lib Avoid importing code from worker/lib into lib. It should only be the other way around. Move the message parsing code used by maildir, notmuch, mbox and the eml viewer into a lib/rfc822 package. Adapt imports accordingly. Signed-off-by: Robin Jarry Reviewed-by: Koni Marti Tested-by: Moritz Poldrack Tested-by: Inwit --- worker/lib/daterange.go | 471 --------------------- worker/lib/daterange_test.go | 97 ----- worker/lib/headers.go | 29 ++ worker/lib/parse.go | 415 ------------------ worker/lib/parse_test.go | 83 ---- worker/lib/search.go | 10 +- worker/lib/size.go | 15 + worker/lib/testdata/message/invalid/hexa | 26 -- worker/lib/testdata/message/valid/quoted-mime-type | 45 -- 9 files changed, 50 insertions(+), 1141 deletions(-) delete mode 100644 worker/lib/daterange.go delete mode 100644 worker/lib/daterange_test.go create mode 100644 worker/lib/headers.go delete mode 100644 worker/lib/parse.go delete mode 100644 worker/lib/parse_test.go create mode 100644 worker/lib/size.go delete mode 100644 worker/lib/testdata/message/invalid/hexa delete mode 100644 worker/lib/testdata/message/valid/quoted-mime-type (limited to 'worker/lib') diff --git a/worker/lib/daterange.go b/worker/lib/daterange.go deleted file mode 100644 index b08bf177..00000000 --- a/worker/lib/daterange.go +++ /dev/null @@ -1,471 +0,0 @@ -package lib - -import ( - "fmt" - "strings" - "time" - - "git.sr.ht/~rjarry/aerc/log" -) - -const dateFmt = "2006-01-02" - -// ParseDateRange parses a date range into a start and end date. Dates are -// expected to be in the YYYY-MM-DD format. -// -// Start and end dates are connected by the range operator ".." where end date -// is not included in the date range. -// -// ParseDateRange can also parse open-ended ranges, i.e. start.. or ..end are -// allowed. -// -// Relative date terms (such as "1 week 1 day" or "1w 1d") can be used, too. -func ParseDateRange(s string) (start, end time.Time, err error) { - s = cleanInput(s) - s = ensureRangeOp(s) - i := strings.Index(s, "..") - switch { - case i < 0: - // single date - start, err = translate(s) - if err != nil { - err = fmt.Errorf("failed to parse date: %w", err) - return - } - end = start.AddDate(0, 0, 1) - - case i == 0: - // end date only - if len(s) < 2 { - err = fmt.Errorf("no date found") - return - } - end, err = translate(s[2:]) - if err != nil { - err = fmt.Errorf("failed to parse date: %w", err) - return - } - - case i > 0: - // start date first - start, err = translate(s[:i]) - if err != nil { - err = fmt.Errorf("failed to parse date: %w", err) - return - } - if len(s[i:]) <= 2 { - return - } - // and end dates if available - end, err = translate(s[(i + 2):]) - if err != nil { - err = fmt.Errorf("failed to parse date: %w", err) - return - } - } - - return -} - -type dictFunc = func(bool) time.Time - -// dict is a dictionary to translate words to dates. Map key must be at least 3 -// characters for matching purposes. -var dict map[string]dictFunc = map[string]dictFunc{ - "today": func(_ bool) time.Time { - return time.Now() - }, - "yesterday": func(_ bool) time.Time { - return time.Now().AddDate(0, 0, -1) - }, - "week": func(this bool) time.Time { - diff := 0 - if !this { - diff = -7 - } - return time.Now().AddDate(0, 0, - daydiff(time.Monday)+diff) - }, - "month": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(0, diff, -t.Day()+1) - }, - "year": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, 0, -t.YearDay()+1) - }, - "monday": func(this bool) time.Time { - diff := 0 - if !this { - diff = -7 - } - return time.Now().AddDate(0, 0, - daydiff(time.Monday)+diff) - }, - "tuesday": func(this bool) time.Time { - diff := 0 - if !this { - diff = -7 - } - return time.Now().AddDate(0, 0, - daydiff(time.Tuesday)+diff) - }, - "wednesday": func(this bool) time.Time { - diff := 0 - if !this { - diff = -7 - } - return time.Now().AddDate(0, 0, - daydiff(time.Wednesday)+diff) - }, - "thursday": func(this bool) time.Time { - diff := 0 - if !this { - diff = -7 - } - return time.Now().AddDate(0, 0, - daydiff(time.Thursday)+diff) - }, - "friday": func(this bool) time.Time { - diff := 0 - if !this { - diff = -7 - } - return time.Now().AddDate(0, 0, - daydiff(time.Friday)+diff) - }, - "saturday": func(this bool) time.Time { - diff := 0 - if !this { - diff = -7 - } - return time.Now().AddDate(0, 0, - daydiff(time.Saturday)+diff) - }, - "sunday": func(this bool) time.Time { - diff := 0 - if !this { - diff = -7 - } - return time.Now().AddDate(0, 0, - daydiff(time.Sunday)+diff) - }, - "january": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.January), -t.Day()+1) - }, - "february": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.February), -t.Day()+1) - }, - "march": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.March), -t.Day()+1) - }, - "april": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.April), -t.Day()+1) - }, - "may": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.May), -t.Day()+1) - }, - "june": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.June), -t.Day()+1) - }, - "july": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.July), -t.Day()+1) - }, - "august": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.August), -t.Day()+1) - }, - "september": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.September), -t.Day()+1) - }, - "october": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.October), -t.Day()+1) - }, - "november": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.November), -t.Day()+1) - }, - "december": func(this bool) time.Time { - diff := 0 - if !this { - diff = -1 - } - t := time.Now() - return t.AddDate(diff, - monthdiff(time.December), -t.Day()+1) - }, -} - -func daydiff(d time.Weekday) int { - daydiff := d - time.Now().Weekday() - if daydiff > 0 { - return int(daydiff) - 7 - } - return int(daydiff) -} - -func monthdiff(d time.Month) int { - monthdiff := d - time.Now().Month() - if monthdiff > 0 { - return int(monthdiff) - 12 - } - return int(monthdiff) -} - -// translate translates regular time words into date strings -func translate(s string) (time.Time, error) { - if s == "" { - return time.Now(), fmt.Errorf("empty string") - } - log.Tracef("input: %s", s) - s0 := s - - // if next characters is integer, then parse a relative date - if '0' <= s[0] && s[0] <= '9' && hasUnit(s) { - relDate, err := ParseRelativeDate(s) - if err != nil { - log.Errorf("could not parse relative date from '%s': %v", - s0, err) - } else { - log.Tracef("relative date: translated to %v from %s", - relDate, s0) - return bod(relDate.Apply(time.Now())), nil - } - } - - // consult dictionary for terms translation - s, this, hasPrefix := handlePrefix(s) - for term, dateFn := range dict { - if term == "month" && !hasPrefix { - continue - } - if strings.Contains(term, s) { - log.Tracef("dictionary: translated to %s from %s", - term, s0) - return bod(dateFn(this)), nil - } - } - - // this is a regular date, parse it in the normal format - log.Infof("parse: translates %s to regular format", s0) - return time.Parse(dateFmt, s) -} - -// bod returns the begin of the day -func bod(t time.Time) time.Time { - y, m, d := t.Date() - return time.Date(y, m, d, 0, 0, 0, 0, t.Location()) -} - -func handlePrefix(s string) (string, bool, bool) { - var hasPrefix bool - this := true - if strings.HasPrefix(s, "this") { - hasPrefix = true - s = strings.TrimPrefix(s, "this") - } - if strings.HasPrefix(s, "last") { - hasPrefix = true - this = false - s = strings.TrimPrefix(s, "last") - } - return s, this, hasPrefix -} - -func cleanInput(s string) string { - s = strings.ToLower(s) - s = strings.ReplaceAll(s, " ", "") - s = strings.ReplaceAll(s, "_", "") - return s -} - -// RelDate is the relative date in the past, e.g. yesterday would be -// represented as RelDate{0,0,1}. -type RelDate struct { - Year uint - Month uint - Day uint -} - -func (d RelDate) Apply(t time.Time) time.Time { - return t.AddDate(-int(d.Year), -int(d.Month), -int(d.Day)) -} - -// ParseRelativeDate parses a string of relative terms into a DateAdd. -// -// Syntax: N (year|month|week|day) .. -// -// The following are valid inputs: -// 5weeks1day -// 5w1d -// -// Adapted from the Go stdlib in src/time/format.go -func ParseRelativeDate(s string) (RelDate, error) { - s0 := s - s = cleanInput(s) - var da RelDate - for s != "" { - var n uint - - var err error - - // expect an integer - if !('0' <= s[0] && s[0] <= '9') { - return da, fmt.Errorf("not a valid relative term: %s", - s0) - } - - // consume integer - n, s, err = leadingInt(s) - if err != nil { - return da, fmt.Errorf("cannot read integer in %s", - s0) - } - - // consume the units - i := 0 - for ; i < len(s); i++ { - c := s[i] - if '0' <= c && c <= '9' { - break - } - } - if i == 0 { - return da, fmt.Errorf("missing unit in %s", s0) - } - - u := s[:i] - s = s[i:] - switch u[0] { - case 'y': - da.Year += n - case 'm': - da.Month += n - case 'w': - da.Day += 7 * n - case 'd': - da.Day += n - default: - return da, fmt.Errorf("unknown unit %s in %s", u, s0) - } - - } - - return da, nil -} - -func hasUnit(s string) (has bool) { - for _, u := range "ymwd" { - if strings.Contains(s, string(u)) { - return true - } - } - return false -} - -// leadingInt parses and returns the leading integer in s. -// -// Adapted from the Go stdlib in src/time/format.go -func leadingInt(s string) (x uint, rem string, err error) { - i := 0 - for ; i < len(s); i++ { - c := s[i] - if c < '0' || c > '9' { - break - } - x = x*10 + uint(c) - '0' - } - return x, s[i:], nil -} - -func ensureRangeOp(s string) string { - if strings.Contains(s, "..") { - return s - } - s0 := s - for _, m := range []string{"this", "last"} { - for _, u := range []string{"year", "month", "week"} { - term := m + u - if strings.Contains(s, term) { - if m == "last" { - return s0 + "..this" + u - } else { - return s0 + ".." - } - } - } - } - return s0 -} diff --git a/worker/lib/daterange_test.go b/worker/lib/daterange_test.go deleted file mode 100644 index 807e7ac7..00000000 --- a/worker/lib/daterange_test.go +++ /dev/null @@ -1,97 +0,0 @@ -package lib_test - -import ( - "reflect" - "testing" - "time" - - "git.sr.ht/~rjarry/aerc/worker/lib" -) - -func TestParseDateRange(t *testing.T) { - dateFmt := "2006-01-02" - parse := func(s string) time.Time { d, _ := time.Parse(dateFmt, s); return d } - tests := []struct { - s string - start time.Time - end time.Time - }{ - { - s: "2022-11-01", - start: parse("2022-11-01"), - end: parse("2022-11-02"), - }, - { - s: "2022-11-01..", - start: parse("2022-11-01"), - }, - { - s: "..2022-11-05", - end: parse("2022-11-05"), - }, - { - s: "2022-11-01..2022-11-05", - start: parse("2022-11-01"), - end: parse("2022-11-05"), - }, - } - - for _, test := range tests { - start, end, err := lib.ParseDateRange(test.s) - if err != nil { - t.Errorf("ParseDateRange return error for %s: %v", - test.s, err) - } - - if !start.Equal(test.start) { - t.Errorf("wrong start date; expected %v, got %v", - test.start, start) - } - - if !end.Equal(test.end) { - t.Errorf("wrong end date; expected %v, got %v", - test.end, end) - } - } -} - -func TestParseRelativeDate(t *testing.T) { - tests := []struct { - s string - want lib.RelDate - }{ - { - s: "5 weeks 1 day", - want: lib.RelDate{Year: 0, Month: 0, Day: 5*7 + 1}, - }, - { - s: "5_weeks 1_day", - want: lib.RelDate{Year: 0, Month: 0, Day: 5*7 + 1}, - }, - { - s: "5weeks1day", - want: lib.RelDate{Year: 0, Month: 0, Day: 5*7 + 1}, - }, - { - s: "5w1d", - want: lib.RelDate{Year: 0, Month: 0, Day: 5*7 + 1}, - }, - { - s: "5y4m3w1d", - want: lib.RelDate{Year: 5, Month: 4, Day: 3*7 + 1}, - }, - } - - for _, test := range tests { - da, err := lib.ParseRelativeDate(test.s) - if err != nil { - t.Errorf("ParseRelativeDate return error for %s: %v", - test.s, err) - } - - if !reflect.DeepEqual(da, test.want) { - t.Errorf("results don't match. expected %v, got %v", - test.want, da) - } - } -} diff --git a/worker/lib/headers.go b/worker/lib/headers.go new file mode 100644 index 00000000..391d1d2a --- /dev/null +++ b/worker/lib/headers.go @@ -0,0 +1,29 @@ +package lib + +import ( + "strings" + + "github.com/emersion/go-message/mail" +) + +// LimitHeaders returns a new Header with the specified headers included or +// excluded +func LimitHeaders(hdr *mail.Header, fields []string, exclude bool) *mail.Header { + fieldMap := make(map[string]struct{}, len(fields)) + for _, f := range fields { + fieldMap[strings.ToLower(f)] = struct{}{} + } + nh := &mail.Header{} + curFields := hdr.Fields() + for curFields.Next() { + key := strings.ToLower(curFields.Key()) + _, present := fieldMap[key] + // XOR exclude and present. When they are equal, it means we + // should not add the header to the new header struct + if exclude == present { + continue + } + nh.Add(key, curFields.Value()) + } + return nh +} diff --git a/worker/lib/parse.go b/worker/lib/parse.go deleted file mode 100644 index 8e20e904..00000000 --- a/worker/lib/parse.go +++ /dev/null @@ -1,415 +0,0 @@ -package lib - -import ( - "bufio" - "bytes" - "errors" - "fmt" - "io" - "os" - "regexp" - "strings" - "time" - - "git.sr.ht/~rjarry/aerc/lib/parse" - "git.sr.ht/~rjarry/aerc/log" - "git.sr.ht/~rjarry/aerc/models" - "github.com/emersion/go-message" - _ "github.com/emersion/go-message/charset" - "github.com/emersion/go-message/mail" -) - -// RFC 1123Z regexp -var dateRe = regexp.MustCompile(`(((Mon|Tue|Wed|Thu|Fri|Sat|Sun))[,]?\s[0-9]{1,2})\s` + - `(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s` + - `([0-9]{4})\s([0-9]{2}):([0-9]{2})(:([0-9]{2}))?\s([\+|\-][0-9]{4})`) - -func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) { - if len(index) == 0 { - // non multipart, simply return everything - return bufReader(e) - } - if mpr := e.MultipartReader(); mpr != nil { - idx := 0 - for { - idx++ - part, err := mpr.NextPart() - if err != nil { - return nil, err - } - if idx == index[0] { - rest := index[1:] - if len(rest) < 1 { - return bufReader(part) - } - return FetchEntityPartReader(part, index[1:]) - } - } - } - return nil, fmt.Errorf("FetchEntityPartReader: unexpected code reached") -} - -// TODO: the UI doesn't seem to like readers which aren't buffers -func bufReader(e *message.Entity) (io.Reader, error) { - var buf bytes.Buffer - if _, err := io.Copy(&buf, e.Body); err != nil { - return nil, err - } - return &buf, nil -} - -// split a MIME type into its major and minor parts -func splitMIME(m string) (string, string) { - parts := strings.Split(m, "/") - if len(parts) != 2 { - return parts[0], "" - } - return parts[0], parts[1] -} - -func fixContentType(h message.Header) (string, map[string]string) { - ct, rest := h.Get("Content-Type"), "" - if i := strings.Index(ct, ";"); i > 0 { - ct, rest = ct[:i], ct[i:] - } - - // check if there are quotes around the content type - if strings.Contains(ct, "\"") { - header := strings.ReplaceAll(ct, "\"", "") - if rest != "" { - header += rest - } - h.Set("Content-Type", header) - if contenttype, params, err := h.ContentType(); err == nil { - return contenttype, params - } - } - - // if all else fails, return text/plain - return "text/plain", nil -} - -func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) { - var body models.BodyStructure - contentType, ctParams, err := e.Header.ContentType() - if err != nil { - // try to fix the error; if all measures fail, then return a - // text/plain content type to display at least plaintext - contentType, ctParams = fixContentType(e.Header) - } - - mimeType, mimeSubType := splitMIME(contentType) - body.MIMEType = mimeType - body.MIMESubType = mimeSubType - body.Params = ctParams - body.Description = e.Header.Get("content-description") - body.Encoding = e.Header.Get("content-transfer-encoding") - if cd := e.Header.Get("content-disposition"); cd != "" { - contentDisposition, cdParams, err := e.Header.ContentDisposition() - if err != nil { - return nil, fmt.Errorf("could not parse content disposition: %w", err) - } - body.Disposition = contentDisposition - body.DispositionParams = cdParams - } - body.Parts = []*models.BodyStructure{} - if mpr := e.MultipartReader(); mpr != nil { - for { - part, err := mpr.NextPart() - if errors.Is(err, io.EOF) { - return &body, nil - } else if err != nil { - return nil, err - } - ps, err := ParseEntityStructure(part) - if err != nil { - return nil, fmt.Errorf("could not parse child entity structure: %w", err) - } - body.Parts = append(body.Parts, ps) - } - } - return &body, nil -} - -var DateParseError = errors.New("date parsing failed") - -func parseEnvelope(h *mail.Header) (*models.Envelope, error) { - from, err := parseAddressList(h, "from") - if err != nil { - return nil, fmt.Errorf("could not read from address: %w", err) - } - to, err := parseAddressList(h, "to") - if err != nil { - return nil, fmt.Errorf("could not read to address: %w", err) - } - cc, err := parseAddressList(h, "cc") - if err != nil { - return nil, fmt.Errorf("could not read cc address: %w", err) - } - bcc, err := parseAddressList(h, "bcc") - if err != nil { - return nil, fmt.Errorf("could not read bcc address: %w", err) - } - replyTo, err := parseAddressList(h, "reply-to") - if err != nil { - return nil, fmt.Errorf("could not read reply-to address: %w", err) - } - subj, err := h.Subject() - if err != nil { - return nil, fmt.Errorf("could not read subject: %w", err) - } - msgID, err := h.MessageID() - if err != nil { - // proper parsing failed, so fall back to whatever is there - msgID, err = h.Text("message-id") - if err != nil { - return nil, err - } - } - var irt string - irtList := parse.MsgIDList(h, "in-reply-to") - if len(irtList) > 0 { - irt = irtList[0] - } - date, err := parseDate(h) - if err != nil { - // still return a valid struct plus a sentinel date parsing error - // if only the date parsing failed - err = fmt.Errorf("%w: %v", DateParseError, err) //nolint:errorlint // can only use %w once - } - return &models.Envelope{ - Date: date, - Subject: subj, - MessageId: msgID, - From: from, - ReplyTo: replyTo, - To: to, - Cc: cc, - Bcc: bcc, - InReplyTo: irt, - }, err -} - -// parseDate tries to parse the date from the Date header with non std formats -// if this fails it tries to parse the received header as well -func parseDate(h *mail.Header) (time.Time, error) { - t, err := h.Date() - if err == nil { - return t, nil - } - text, err := h.Text("date") - // sometimes, no error occurs but the date is empty. - // In this case, guess time from received header field - if err != nil || text == "" { - t, err := parseReceivedHeader(h) - if err == nil { - return t, nil - } - } - layouts := []string{ - // X-Mailer: EarthLink Zoo Mail 1.0 - "Mon, _2 Jan 2006 15:04:05 -0700 (GMT-07:00)", - } - for _, layout := range layouts { - if t, err := time.Parse(layout, text); err == nil { - return t, nil - } - } - // still no success, try the received header as a last resort - t, err = parseReceivedHeader(h) - if err != nil { - return time.Time{}, fmt.Errorf("unrecognized date format: %s", text) - } - return t, nil -} - -func parseReceivedHeader(h *mail.Header) (time.Time, error) { - guess, err := h.Text("received") - if err != nil { - return time.Time{}, fmt.Errorf("received header not parseable: %w", - err) - } - return time.Parse(time.RFC1123Z, dateRe.FindString(guess)) -} - -func parseAddressList(h *mail.Header, key string) ([]*mail.Address, error) { - hdr, err := h.Text(key) - if err != nil && !message.IsUnknownCharset(err) { - return nil, err - } - if hdr == "" { - return nil, nil - } - add, err := mail.ParseAddressList(hdr) - if err != nil { - return []*mail.Address{{Name: hdr}}, nil - } - return add, err -} - -// RawMessage is an interface that describes a raw message -type RawMessage interface { - NewReader() (io.ReadCloser, error) - ModelFlags() (models.Flags, error) - Labels() ([]string, error) - UID() uint32 -} - -// MessageInfo populates a models.MessageInfo struct for the message. -// based on the reader returned by NewReader -func MessageInfo(raw RawMessage) (*models.MessageInfo, error) { - var parseErr error - r, err := raw.NewReader() - if err != nil { - return nil, err - } - defer r.Close() - msg, err := ReadMessage(r) - if err != nil { - return nil, fmt.Errorf("could not read message: %w", err) - } - bs, err := ParseEntityStructure(msg) - if errors.As(err, new(message.UnknownEncodingError)) { - parseErr = err - } else if err != nil { - return nil, fmt.Errorf("could not get structure: %w", err) - } - h := &mail.Header{Header: msg.Header} - env, err := parseEnvelope(h) - if err != nil && !errors.Is(err, DateParseError) { - return nil, fmt.Errorf("could not parse envelope: %w", err) - // if only the date parsing failed we still get the rest of the - // envelop structure in a valid state. - // Date parsing errors are fairly common and it's better to be - // slightly off than to not be able to read the mails at all - // hence we continue here - } - recDate, _ := parseReceivedHeader(h) - if recDate.IsZero() { - // better than nothing, if incorrect - recDate = env.Date - } - flags, err := raw.ModelFlags() - if err != nil { - return nil, err - } - labels, err := raw.Labels() - if err != nil { - return nil, err - } - return &models.MessageInfo{ - BodyStructure: bs, - Envelope: env, - Flags: flags, - Labels: labels, - InternalDate: recDate, - RFC822Headers: h, - Size: 0, - Uid: raw.UID(), - Error: parseErr, - }, nil -} - -// LimitHeaders returns a new Header with the specified headers included or -// excluded -func LimitHeaders(hdr *mail.Header, fields []string, exclude bool) *mail.Header { - fieldMap := make(map[string]struct{}, len(fields)) - for _, f := range fields { - fieldMap[strings.ToLower(f)] = struct{}{} - } - nh := &mail.Header{} - curFields := hdr.Fields() - for curFields.Next() { - key := strings.ToLower(curFields.Key()) - _, present := fieldMap[key] - // XOR exclude and present. When they are equal, it means we - // should not add the header to the new header struct - if exclude == present { - continue - } - nh.Add(key, curFields.Value()) - } - return nh -} - -// MessageHeaders populates a models.MessageInfo struct for the message. -// based on the reader returned by NewReader. Minimal information is included. -// There is no body structure or RFC822Headers set -func MessageHeaders(raw RawMessage) (*models.MessageInfo, error) { - var parseErr error - r, err := raw.NewReader() - if err != nil { - return nil, err - } - defer r.Close() - msg, err := ReadMessage(r) - if err != nil { - return nil, fmt.Errorf("could not read message: %w", err) - } - h := &mail.Header{Header: msg.Header} - env, err := parseEnvelope(h) - if err != nil && !errors.Is(err, DateParseError) { - return nil, fmt.Errorf("could not parse envelope: %w", err) - // if only the date parsing failed we still get the rest of the - // envelop structure in a valid state. - // Date parsing errors are fairly common and it's better to be - // slightly off than to not be able to read the mails at all - // hence we continue here - } - recDate, _ := parseReceivedHeader(h) - if recDate.IsZero() { - // better than nothing, if incorrect - recDate = env.Date - } - flags, err := raw.ModelFlags() - if err != nil { - return nil, err - } - labels, err := raw.Labels() - if err != nil { - return nil, err - } - return &models.MessageInfo{ - Envelope: env, - Flags: flags, - Labels: labels, - InternalDate: recDate, - Refs: parse.MsgIDList(h, "references"), - Size: 0, - Uid: raw.UID(), - Error: parseErr, - }, nil -} - -// NewCRLFReader returns a reader with CRLF line endings -func NewCRLFReader(r io.Reader) io.Reader { - var buf bytes.Buffer - scanner := bufio.NewScanner(r) - for scanner.Scan() { - buf.WriteString(scanner.Text() + "\r\n") - } - return &buf -} - -// ReadMessage is a wrapper for the message.Read function to read a message -// from r. The message's encoding and charset are automatically decoded to -// UTF-8. If an unknown charset is encountered, the error is logged but a nil -// error is returned since the entity object can still be read. -func ReadMessage(r io.Reader) (*message.Entity, error) { - entity, err := message.Read(r) - if message.IsUnknownCharset(err) { - log.Warnf("unknown charset encountered") - } else if err != nil { - return nil, fmt.Errorf("could not read message: %w", err) - } - return entity, nil -} - -// FileSize returns the size of the file specified by name -func FileSize(name string) (uint32, error) { - fileInfo, err := os.Stat(name) - if err != nil { - return 0, fmt.Errorf("failed to obtain fileinfo: %w", err) - } - return uint32(fileInfo.Size()), nil -} diff --git a/worker/lib/parse_test.go b/worker/lib/parse_test.go deleted file mode 100644 index 4bf95431..00000000 --- a/worker/lib/parse_test.go +++ /dev/null @@ -1,83 +0,0 @@ -package lib - -import ( - "io" - "os" - "path/filepath" - "testing" - - "git.sr.ht/~rjarry/aerc/models" -) - -func TestMessageInfoParser(t *testing.T) { - rootDir := "testdata/message/valid" - msgFiles, err := os.ReadDir(rootDir) - die(err) - - for _, fi := range msgFiles { - if fi.IsDir() { - continue - } - - p := fi.Name() - t.Run(p, func(t *testing.T) { - m := newMockRawMessageFromPath(filepath.Join(rootDir, p)) - mi, err := MessageInfo(m) - if err != nil { - t.Fatal("Failed to create MessageInfo with:", err) - } - - if perr := mi.Error; perr != nil { - t.Fatal("Expected no parsing error, but got:", mi.Error) - } - }) - } -} - -func TestMessageInfoHandledError(t *testing.T) { - rootDir := "testdata/message/invalid" - msgFiles, err := os.ReadDir(rootDir) - die(err) - - for _, fi := range msgFiles { - if fi.IsDir() { - continue - } - - p := fi.Name() - t.Run(p, func(t *testing.T) { - m := newMockRawMessageFromPath(filepath.Join(rootDir, p)) - mi, err := MessageInfo(m) - if err != nil { - t.Fatal(err) - } - - if perr := mi.Error; perr == nil { - t.Fatal("Expected MessageInfo.Error, got none") - } - }) - } -} - -type mockRawMessage struct { - path string -} - -func newMockRawMessageFromPath(p string) *mockRawMessage { - return &mockRawMessage{ - path: p, - } -} - -func (m *mockRawMessage) NewReader() (io.ReadCloser, error) { - return os.Open(m.path) -} -func (m *mockRawMessage) ModelFlags() (models.Flags, error) { return 0, nil } -func (m *mockRawMessage) Labels() ([]string, error) { return nil, nil } -func (m *mockRawMessage) UID() uint32 { return 0 } - -func die(err error) { - if err != nil { - panic(err) - } -} diff --git a/worker/lib/search.go b/worker/lib/search.go index 11fc1b11..cd372aae 100644 --- a/worker/lib/search.go +++ b/worker/lib/search.go @@ -9,6 +9,8 @@ import ( "git.sr.ht/~sircmpwn/getopt" + "git.sr.ht/~rjarry/aerc/lib/parse" + "git.sr.ht/~rjarry/aerc/lib/rfc822" "git.sr.ht/~rjarry/aerc/log" "git.sr.ht/~rjarry/aerc/models" ) @@ -60,7 +62,7 @@ func GetSearchCriteria(args []string) (*searchCriteria, error) { case 'b': body = true case 'd': - start, end, err := ParseDateRange(opt.Value) + start, end, err := parse.DateRange(opt.Value) if err != nil { log.Errorf("failed to parse start date: %v", err) continue @@ -99,7 +101,7 @@ func getParsedFlag(name string) models.Flags { return f } -func Search(messages []RawMessage, criteria *searchCriteria) ([]uint32, error) { +func Search(messages []rfc822.RawMessage, criteria *searchCriteria) ([]uint32, error) { requiredParts := getRequiredParts(criteria) matchedUids := []uint32{} @@ -117,7 +119,7 @@ func Search(messages []RawMessage, criteria *searchCriteria) ([]uint32, error) { // searchMessage executes the search criteria for the given RawMessage, // returns true if search succeeded -func searchMessage(message RawMessage, criteria *searchCriteria, +func searchMessage(message rfc822.RawMessage, criteria *searchCriteria, parts MsgParts, ) (bool, error) { // setup parts of the message to use in the search @@ -137,7 +139,7 @@ func searchMessage(message RawMessage, criteria *searchCriteria, } } if parts&HEADER > 0 || parts&DATE > 0 { - header, err = MessageInfo(message) + header, err = rfc822.MessageInfo(message) if err != nil { return false, err } diff --git a/worker/lib/size.go b/worker/lib/size.go new file mode 100644 index 00000000..f00437c3 --- /dev/null +++ b/worker/lib/size.go @@ -0,0 +1,15 @@ +package lib + +import ( + "fmt" + "os" +) + +// FileSize returns the size of the file specified by name +func FileSize(name string) (uint32, error) { + fileInfo, err := os.Stat(name) + if err != nil { + return 0, fmt.Errorf("failed to obtain fileinfo: %w", err) + } + return uint32(fileInfo.Size()), nil +} diff --git a/worker/lib/testdata/message/invalid/hexa b/worker/lib/testdata/message/invalid/hexa deleted file mode 100644 index 56b352ff..00000000 --- a/worker/lib/testdata/message/invalid/hexa +++ /dev/null @@ -1,26 +0,0 @@ -Subject: Confirmation Needed gUdVJQBhsd -Content-Type: multipart/mixed; boundary="Nextpart_1Q2YJhd197991794467076Pgfa" -To: -From: ""REGISTRAR"" - ---Nextpart_1Q2YJhd197991794467076Pgfa -Content-Type: multipart/parallel; boundary="sg54sd54g54sdg54" - ---sg54sd54g54sdg54 -Content-Type: multipart/alternative; boundary="54qgf54q546f46qsf46qsf" - ---54qgf54q546f46qsf46qsf -Content-Type: text/plain; charset=utf-8 -Content-Transfer-Encoding: Hexa - - - ---54qgf54q546f46qsf46qsf -Content-Type: text/html; charset=utf-8 - - -

Congratulations Netflix Customer!


- - - ---Nextpart_1Q2YJhd197991794467076Pgfa-- diff --git a/worker/lib/testdata/message/valid/quoted-mime-type b/worker/lib/testdata/message/valid/quoted-mime-type deleted file mode 100644 index d9af28a2..00000000 --- a/worker/lib/testdata/message/valid/quoted-mime-type +++ /dev/null @@ -1,45 +0,0 @@ -Subject: Your ECOLINES tickets -X-PHP-Originating-Script: 33:functions.inc.php -From: ECOLINES -Content-Type: multipart/mixed; - boundary="PHP-mixed-ba319678ca12656cfb8cd46e736ce09d" -Message-Id: -Date: Sun, 29 May 2022 15:53:44 +0300 - ---PHP-mixed-ba319678ca12656cfb8cd46e736ce09d -Content-Type: multipart/alternative; boundary="PHP-alt-ba319678ca12656cfb8cd46e736ce09d" - ---PHP-alt-ba319678ca12656cfb8cd46e736ce09d -Content-Type: text/plain; charset="UTF-8" -Content-Transfer-Encoding: 7bit - -Your tickets are attached to this message. Also You can print out Your tickets from our website www.ecolines.net -… - ---PHP-alt-ba319678ca12656cfb8cd46e736ce09d -Content-Type: text/html; charset="UTF-8" -Content-Transfer-Encoding: 7bit - - -… - ---PHP-alt-ba319678ca12656cfb8cd46e736ce09d-- - ---PHP-mixed-ba319678ca12656cfb8cd46e736ce09d -Content-Type: "application/pdf"; name="17634428.pdf" -Content-Disposition: attachment; filename="17634428.pdf" -Content-Transfer-Encoding: base64 - -JVBERi0xLjQKMSAwIG9iago8PAovVGl0bGUgKP7/AFkAbwB1AHIAIAB0AGkAYwBrAGUAdCkKL0Ny -… - ---PHP-mixed-ba319678ca12656cfb8cd46e736ce09d -Content-Type: "application/pdf"; name="invoice-6385490.pdf" -Content-Disposition: attachment; filename="invoice-6385490.pdf" -Content-Transfer-Encoding: base64 - -JVBERi0xLjQKMSAwIG9iago8PAovVGl0bGUgKP7/AEkAbgB2AG8AaQBjAGUpCi9DcmVhdG9yICj+ -… - ---PHP-mixed-ba319678ca12656cfb8cd46e736ce09d-- -- cgit