aboutsummaryrefslogtreecommitdiffstats
path: root/worker/lib/parse.go
diff options
context:
space:
mode:
authorTim Culverhouse <tim@timculverhouse.com>2022-10-27 15:35:43 -0500
committerRobin Jarry <robin@jarry.cc>2022-11-06 23:18:01 +0100
commit59510c41c839004f037a20464f368c445a7a91d1 (patch)
treee5048ea3c367881f01d9928c4936b1d95088e111 /worker/lib/parse.go
parent29205fdd07c09c21c2d5244c6c7e4fae7b6c824f (diff)
downloadaerc-59510c41c839004f037a20464f368c445a7a91d1.tar.gz
maildir: keep less data in memory for sorting
Sorting opens and reads portions of every file within a directory in order to gather the data needed. Specifically, RFC822Headers and BodyStructure are not needed. The RFC822Headers field stores a lot of information, and the BodyStructure field requires parsing the body of the email. Don't set these two values when parsing. Note: in my testing, this dropped sorting a 52k archive from 2.2gb of ram usage, to < 500mb Signed-off-by: Tim Culverhouse <tim@timculverhouse.com> Acked-by: Robin Jarry <robin@jarry.cc>
Diffstat (limited to 'worker/lib/parse.go')
-rw-r--r--worker/lib/parse.go48
1 files changed, 48 insertions, 0 deletions
diff --git a/worker/lib/parse.go b/worker/lib/parse.go
index 616784c6..f57a56ac 100644
--- a/worker/lib/parse.go
+++ b/worker/lib/parse.go
@@ -302,6 +302,54 @@ func MessageInfo(raw RawMessage) (*models.MessageInfo, error) {
}, nil
}
+// MessageHeaders populates a models.MessageInfo struct for the message.
+// based on the reader returned by NewReader. Minimal information is included.
+// There is no body structure or RFC822Headers set
+func MessageHeaders(raw RawMessage) (*models.MessageInfo, error) {
+ var parseErr error
+ r, err := raw.NewReader()
+ if err != nil {
+ return nil, err
+ }
+ defer r.Close()
+ msg, err := ReadMessage(r)
+ if err != nil {
+ return nil, fmt.Errorf("could not read message: %w", err)
+ }
+ h := &mail.Header{Header: msg.Header}
+ env, err := parseEnvelope(h)
+ if err != nil && !errors.Is(err, DateParseError) {
+ return nil, fmt.Errorf("could not parse envelope: %w", err)
+ // if only the date parsing failed we still get the rest of the
+ // envelop structure in a valid state.
+ // Date parsing errors are fairly common and it's better to be
+ // slightly off than to not be able to read the mails at all
+ // hence we continue here
+ }
+ recDate, _ := parseReceivedHeader(h)
+ if recDate.IsZero() {
+ // better than nothing, if incorrect
+ recDate = env.Date
+ }
+ flags, err := raw.ModelFlags()
+ if err != nil {
+ return nil, err
+ }
+ labels, err := raw.Labels()
+ if err != nil {
+ return nil, err
+ }
+ return &models.MessageInfo{
+ Envelope: env,
+ Flags: flags,
+ Labels: labels,
+ InternalDate: recDate,
+ Size: 0,
+ Uid: raw.UID(),
+ Error: parseErr,
+ }, nil
+}
+
// NewCRLFReader returns a reader with CRLF line endings
func NewCRLFReader(r io.Reader) io.Reader {
var buf bytes.Buffer