aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorVitaly Ovchinnikov <v@ovch.ru>2023-12-30 09:43:52 +0000
committerRobin Jarry <robin@jarry.cc>2023-12-30 15:42:09 +0100
commit49a8cf10935f2792e1ce825e4fc31d1878b3d08c (patch)
tree15b7d03675d81fcc442fd21c0e917cc6eae13d60 /lib
parentf92935c9c395ce4ee38fdc734c7ba93133a369e8 (diff)
downloadaerc-49a8cf10935f2792e1ce825e4fc31d1878b3d08c.tar.gz
rfc822: improve dates parsing
Provide better parsing of email dates with timezone names, but without numeric offsets. Add unit tests to check the new behaviour. Signed-off-by: Vitaly Ovchinnikov <v@ovch.ru> Acked-by: Robin Jarry <robin@jarry.cc>
Diffstat (limited to 'lib')
-rw-r--r--lib/rfc822/message.go57
-rw-r--r--lib/rfc822/message_test.go44
2 files changed, 91 insertions, 10 deletions
diff --git a/lib/rfc822/message.go b/lib/rfc822/message.go
index 979d4595..7b0d0552 100644
--- a/lib/rfc822/message.go
+++ b/lib/rfc822/message.go
@@ -189,14 +189,45 @@ func parseEnvelope(h *mail.Header) (*models.Envelope, error) {
}, err
}
+// If the date is formatted like ...... -0500 (EST), parser takes the EST part
+// and ignores the numeric offset. Then it might easily fail to guess what EST
+// means unless the proper locale is loaded. This function checks that, so such
+// time values can be safely ignored
+// https://stackoverflow.com/questions/49084316/why-doesnt-gos-time-parse-parse-the-timezone-identifier
+func isDateOK(t time.Time) bool {
+ name, offset := t.Zone()
+
+ // non-zero offsets are fine
+ if offset != 0 {
+ return true
+ }
+
+ // zero offset is ok if that's UTC or GMT
+ if name == "UTC" || name == "GMT" || name == "" {
+ return true
+ }
+
+ // otherwise this date should not be trusted
+ return false
+}
+
// parseDate tries to parse the date from the Date header with non std formats
// if this fails it tries to parse the received header as well
func parseDate(h *mail.Header) (time.Time, error) {
+ // here we store the best parsed time we have so far
+ // if we find no "correct" time, we'll use that
+ bestDate := time.Time{}
+
+ // trying the easy way
t, err := h.Date()
if err == nil {
- return t, nil
+ if isDateOK(t) {
+ return t, nil
+ }
+ bestDate = t
}
text, err := h.Text("date")
+
// sometimes, no error occurs but the date is empty.
// In this case, guess time from received header field
if err != nil || text == "" {
@@ -211,15 +242,29 @@ func parseDate(h *mail.Header) (time.Time, error) {
}
for _, layout := range layouts {
if t, err := time.Parse(layout, text); err == nil {
- return t, nil
+ if isDateOK(t) {
+ return t, nil
+ }
+ bestDate = t
}
}
- // still no success, try the received header as a last resort
+
+ // still no success, try the received header
t, err = parseReceivedHeader(h)
- if err != nil {
- return time.Time{}, fmt.Errorf("unrecognized date format: %s", text)
+ if err == nil {
+ if isDateOK(t) {
+ return t, nil
+ }
+ bestDate = t
}
- return t, nil
+
+ // do we have at least something?
+ if !bestDate.IsZero() {
+ return bestDate, nil
+ }
+
+ // sad...
+ return time.Time{}, fmt.Errorf("unrecognized date format: %s", text)
}
func parseReceivedHeader(h *mail.Header) (time.Time, error) {
diff --git a/lib/rfc822/message_test.go b/lib/rfc822/message_test.go
index 8730afe2..fd2e8265 100644
--- a/lib/rfc822/message_test.go
+++ b/lib/rfc822/message_test.go
@@ -1,13 +1,15 @@
-package rfc822_test
+package rfc822
import (
"io"
"os"
"path/filepath"
"testing"
+ "time"
- "git.sr.ht/~rjarry/aerc/lib/rfc822"
"git.sr.ht/~rjarry/aerc/models"
+ "github.com/emersion/go-message/mail"
+ "github.com/stretchr/testify/require"
)
func TestMessageInfoParser(t *testing.T) {
@@ -23,7 +25,7 @@ func TestMessageInfoParser(t *testing.T) {
p := fi.Name()
t.Run(p, func(t *testing.T) {
m := newMockRawMessageFromPath(filepath.Join(rootDir, p))
- mi, err := rfc822.MessageInfo(m)
+ mi, err := MessageInfo(m)
if err != nil {
t.Fatal("Failed to create MessageInfo with:", err)
}
@@ -48,7 +50,7 @@ func TestMessageInfoHandledError(t *testing.T) {
p := fi.Name()
t.Run(p, func(t *testing.T) {
m := newMockRawMessageFromPath(filepath.Join(rootDir, p))
- mi, err := rfc822.MessageInfo(m)
+ mi, err := MessageInfo(m)
if err != nil {
t.Fatal(err)
}
@@ -60,6 +62,40 @@ func TestMessageInfoHandledError(t *testing.T) {
}
}
+func TestParseMessageDate(t *testing.T) {
+ // we use different times for "Date" and "Received" fields to make sure the right one is parsed
+ tests := []struct {
+ date string
+ received string
+ utc time.Time
+ }{
+ {
+ date: "Fri, 22 Dec 2023 11:19:01 +0000",
+ received: "from aaa.bbb.com for <user@host.com>; Fri, 22 Dec 2023 06:19:02 -0500 (EST)",
+ utc: time.Date(2023, time.December, 22, 11, 19, 1, 0, time.UTC),
+ },
+ {
+ date: "Fri, 29 Dec 2023 14:06:37 +0100",
+ received: "from somewhere.com for a@b.c; Fri, 30 Dec 2023 4:06:43 +1300",
+ utc: time.Date(2023, time.December, 29, 13, 6, 37, 0, time.UTC),
+ },
+ {
+ date: "Fri, 29 Dec 2023 00:51:00 EST",
+ received: "by hostname.com; Fri, 29 Dec 2023 00:51:33 -0500 (EST)",
+ utc: time.Date(2023, time.December, 29, 5, 51, 33, 0, time.UTC),
+ },
+ }
+
+ for _, test := range tests {
+ h := mail.Header{}
+ h.SetText("Date", test.date)
+ h.SetText("Received", test.received)
+ res, err := parseDate(&h)
+ require.Nil(t, err)
+ require.Equal(t, test.utc, res.UTC())
+ }
+}
+
type mockRawMessage struct {
path string
}