diff options
author | Vitaly Ovchinnikov <v@ovch.ru> | 2023-12-30 09:43:52 +0000 |
---|---|---|
committer | Robin Jarry <robin@jarry.cc> | 2023-12-30 15:42:09 +0100 |
commit | 49a8cf10935f2792e1ce825e4fc31d1878b3d08c (patch) | |
tree | 15b7d03675d81fcc442fd21c0e917cc6eae13d60 /lib | |
parent | f92935c9c395ce4ee38fdc734c7ba93133a369e8 (diff) | |
download | aerc-49a8cf10935f2792e1ce825e4fc31d1878b3d08c.tar.gz |
rfc822: improve dates parsing
Provide better parsing of email dates with timezone names, but without
numeric offsets. Add unit tests to check the new behaviour.
Signed-off-by: Vitaly Ovchinnikov <v@ovch.ru>
Acked-by: Robin Jarry <robin@jarry.cc>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/rfc822/message.go | 57 | ||||
-rw-r--r-- | lib/rfc822/message_test.go | 44 |
2 files changed, 91 insertions, 10 deletions
diff --git a/lib/rfc822/message.go b/lib/rfc822/message.go index 979d4595..7b0d0552 100644 --- a/lib/rfc822/message.go +++ b/lib/rfc822/message.go @@ -189,14 +189,45 @@ func parseEnvelope(h *mail.Header) (*models.Envelope, error) { }, err } +// If the date is formatted like ...... -0500 (EST), parser takes the EST part +// and ignores the numeric offset. Then it might easily fail to guess what EST +// means unless the proper locale is loaded. This function checks that, so such +// time values can be safely ignored +// https://stackoverflow.com/questions/49084316/why-doesnt-gos-time-parse-parse-the-timezone-identifier +func isDateOK(t time.Time) bool { + name, offset := t.Zone() + + // non-zero offsets are fine + if offset != 0 { + return true + } + + // zero offset is ok if that's UTC or GMT + if name == "UTC" || name == "GMT" || name == "" { + return true + } + + // otherwise this date should not be trusted + return false +} + // parseDate tries to parse the date from the Date header with non std formats // if this fails it tries to parse the received header as well func parseDate(h *mail.Header) (time.Time, error) { + // here we store the best parsed time we have so far + // if we find no "correct" time, we'll use that + bestDate := time.Time{} + + // trying the easy way t, err := h.Date() if err == nil { - return t, nil + if isDateOK(t) { + return t, nil + } + bestDate = t } text, err := h.Text("date") + // sometimes, no error occurs but the date is empty. // In this case, guess time from received header field if err != nil || text == "" { @@ -211,15 +242,29 @@ func parseDate(h *mail.Header) (time.Time, error) { } for _, layout := range layouts { if t, err := time.Parse(layout, text); err == nil { - return t, nil + if isDateOK(t) { + return t, nil + } + bestDate = t } } - // still no success, try the received header as a last resort + + // still no success, try the received header t, err = parseReceivedHeader(h) - if err != nil { - return time.Time{}, fmt.Errorf("unrecognized date format: %s", text) + if err == nil { + if isDateOK(t) { + return t, nil + } + bestDate = t } - return t, nil + + // do we have at least something? + if !bestDate.IsZero() { + return bestDate, nil + } + + // sad... + return time.Time{}, fmt.Errorf("unrecognized date format: %s", text) } func parseReceivedHeader(h *mail.Header) (time.Time, error) { diff --git a/lib/rfc822/message_test.go b/lib/rfc822/message_test.go index 8730afe2..fd2e8265 100644 --- a/lib/rfc822/message_test.go +++ b/lib/rfc822/message_test.go @@ -1,13 +1,15 @@ -package rfc822_test +package rfc822 import ( "io" "os" "path/filepath" "testing" + "time" - "git.sr.ht/~rjarry/aerc/lib/rfc822" "git.sr.ht/~rjarry/aerc/models" + "github.com/emersion/go-message/mail" + "github.com/stretchr/testify/require" ) func TestMessageInfoParser(t *testing.T) { @@ -23,7 +25,7 @@ func TestMessageInfoParser(t *testing.T) { p := fi.Name() t.Run(p, func(t *testing.T) { m := newMockRawMessageFromPath(filepath.Join(rootDir, p)) - mi, err := rfc822.MessageInfo(m) + mi, err := MessageInfo(m) if err != nil { t.Fatal("Failed to create MessageInfo with:", err) } @@ -48,7 +50,7 @@ func TestMessageInfoHandledError(t *testing.T) { p := fi.Name() t.Run(p, func(t *testing.T) { m := newMockRawMessageFromPath(filepath.Join(rootDir, p)) - mi, err := rfc822.MessageInfo(m) + mi, err := MessageInfo(m) if err != nil { t.Fatal(err) } @@ -60,6 +62,40 @@ func TestMessageInfoHandledError(t *testing.T) { } } +func TestParseMessageDate(t *testing.T) { + // we use different times for "Date" and "Received" fields to make sure the right one is parsed + tests := []struct { + date string + received string + utc time.Time + }{ + { + date: "Fri, 22 Dec 2023 11:19:01 +0000", + received: "from aaa.bbb.com for <user@host.com>; Fri, 22 Dec 2023 06:19:02 -0500 (EST)", + utc: time.Date(2023, time.December, 22, 11, 19, 1, 0, time.UTC), + }, + { + date: "Fri, 29 Dec 2023 14:06:37 +0100", + received: "from somewhere.com for a@b.c; Fri, 30 Dec 2023 4:06:43 +1300", + utc: time.Date(2023, time.December, 29, 13, 6, 37, 0, time.UTC), + }, + { + date: "Fri, 29 Dec 2023 00:51:00 EST", + received: "by hostname.com; Fri, 29 Dec 2023 00:51:33 -0500 (EST)", + utc: time.Date(2023, time.December, 29, 5, 51, 33, 0, time.UTC), + }, + } + + for _, test := range tests { + h := mail.Header{} + h.SetText("Date", test.date) + h.SetText("Received", test.received) + res, err := parseDate(&h) + require.Nil(t, err) + require.Equal(t, test.utc, res.UTC()) + } +} + type mockRawMessage struct { path string } |