From 59510c41c839004f037a20464f368c445a7a91d1 Mon Sep 17 00:00:00 2001 From: Tim Culverhouse Date: Thu, 27 Oct 2022 15:35:43 -0500 Subject: maildir: keep less data in memory for sorting Sorting opens and reads portions of every file within a directory in order to gather the data needed. Specifically, RFC822Headers and BodyStructure are not needed. The RFC822Headers field stores a lot of information, and the BodyStructure field requires parsing the body of the email. Don't set these two values when parsing. Note: in my testing, this dropped sorting a 52k archive from 2.2gb of ram usage, to < 500mb Signed-off-by: Tim Culverhouse Acked-by: Robin Jarry --- worker/lib/parse.go | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'worker/lib/parse.go') diff --git a/worker/lib/parse.go b/worker/lib/parse.go index 616784c6..f57a56ac 100644 --- a/worker/lib/parse.go +++ b/worker/lib/parse.go @@ -302,6 +302,54 @@ func MessageInfo(raw RawMessage) (*models.MessageInfo, error) { }, nil } +// MessageHeaders populates a models.MessageInfo struct for the message. +// based on the reader returned by NewReader. Minimal information is included. +// There is no body structure or RFC822Headers set +func MessageHeaders(raw RawMessage) (*models.MessageInfo, error) { + var parseErr error + r, err := raw.NewReader() + if err != nil { + return nil, err + } + defer r.Close() + msg, err := ReadMessage(r) + if err != nil { + return nil, fmt.Errorf("could not read message: %w", err) + } + h := &mail.Header{Header: msg.Header} + env, err := parseEnvelope(h) + if err != nil && !errors.Is(err, DateParseError) { + return nil, fmt.Errorf("could not parse envelope: %w", err) + // if only the date parsing failed we still get the rest of the + // envelop structure in a valid state. + // Date parsing errors are fairly common and it's better to be + // slightly off than to not be able to read the mails at all + // hence we continue here + } + recDate, _ := parseReceivedHeader(h) + if recDate.IsZero() { + // better than nothing, if incorrect + recDate = env.Date + } + flags, err := raw.ModelFlags() + if err != nil { + return nil, err + } + labels, err := raw.Labels() + if err != nil { + return nil, err + } + return &models.MessageInfo{ + Envelope: env, + Flags: flags, + Labels: labels, + InternalDate: recDate, + Size: 0, + Uid: raw.UID(), + Error: parseErr, + }, nil +} + // NewCRLFReader returns a reader with CRLF line endings func NewCRLFReader(r io.Reader) io.Reader { var buf bytes.Buffer -- cgit