aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--parse_received.py490
-rw-r--r--test/samples/testing-email-01.eml129
-rw-r--r--test/test_parse_received.py157
3 files changed, 776 insertions, 0 deletions
diff --git a/parse_received.py b/parse_received.py
new file mode 100644
index 0000000..f9d106b
--- /dev/null
+++ b/parse_received.py
@@ -0,0 +1,490 @@
+# -*- coding: utf-8 -*-
+import dateutil.parser
+import email
+import email.utils
+from pprint import pformat
+import logging
+logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
+ level=logging.DEBUG)
+
+import rply
+
+
+#logging.basicConfig(level=logging.DEBUG)
+
+# from http://search.cpan.org/~simon/Email-Received-1.00/lib/Email/Received.pm
+# possible keys are
+# ip rdns helo ident envfrom auth by id
+
+# from RFC2821
+"""
+4.4 Trace Information
+
+
+ When an SMTP server receives a message for delivery or further
+ processing, it MUST insert trace ("time stamp" or "Received")
+ information at the beginning of the message content, as discussed in
+ section 4.1.1.4.
+
+ This line MUST be structured as follows:
+
+ - The FROM field, which MUST be supplied in an SMTP environment,
+ SHOULD contain both (1) the name of the source host as presented
+ in the EHLO command and (2) an address literal containing the IP
+ address of the source, determined from the TCP connection.
+
+ - The ID field MAY contain an "@" as suggested in RFC 822, but this
+ is not required.
+
+ - The FOR field MAY contain a list of <path> entries when multiple
+ RCPT commands have been given. This may raise some security
+ issues and is usually not desirable; see section 7.2.
+
+ An Internet mail program MUST NOT change a Received: line that was
+ previously added to the message header. SMTP servers MUST prepend
+ Received lines to messages; they MUST NOT change the order of
+ existing lines or insert Received lines in any other location.
+
+ As the Internet grows, comparability of Received fields is important
+ for detecting problems, especially slow relays. SMTP servers that
+ create Received fields SHOULD use explicit offsets in the dates
+ (e.g., -0800), rather than time zone names of any type. Local time
+ (with an offset) is preferred to UT when feasible. This formulation
+ allows slightly more information about local circumstances to be
+ specified. If UT is needed, the receiver need merely do some simple
+ arithmetic to convert the values. Use of UT loses information about
+ the time zone-location of the server. If it is desired to supply a
+ time zone name, it SHOULD be included in a comment.
+
+ When the delivery SMTP server makes the "final delivery" of a
+ message, it inserts a return-path line at the beginning of the mail
+ data. This use of return-path is required; mail systems MUST support
+ it. The return-path line preserves the information in the <reverse-
+ path> from the MAIL command. Here, final delivery means the message
+ has left the SMTP environment. Normally, this would mean it had been
+ delivered to the destination user or an associated mail drop, but in
+ some cases it may be further processed and transmitted by another
+ mail system.
+
+ It is possible for the mailbox in the return path to be different
+ from the actual sender's mailbox, for example, if error responses are
+ to be delivered to a special error handling mailbox rather than to
+ the message sender. When mailing lists are involved, this
+ arrangement is common and useful as a means of directing errors to
+ the list maintainer rather than the message originator.
+
+ The text above implies that the final mail data will begin with a
+ return path line, followed by one or more time stamp lines. These
+ lines will be followed by the mail data headers and body [32].
+
+ It is sometimes difficult for an SMTP server to determine whether or
+ not it is making final delivery since forwarding or other operations
+ may occur after the message is accepted for delivery. Consequently,
+ any further (forwarding, gateway, or relay) systems MAY remove the
+ return path and rebuild the MAIL command as needed to ensure that
+ exactly one such line appears in a delivered message.
+
+ A message-originating SMTP system SHOULD NOT send a message that
+ already contains a Return-path header. SMTP servers performing a
+ relay function MUST NOT inspect the message data, and especially not
+ to the extent needed to determine if Return-path headers are present.
+ SMTP servers making final delivery MAY remove Return-path headers
+ before adding their own.
+
+ The primary purpose of the Return-path is to designate the address to
+ which messages indicating non-delivery or other mail system failures
+ are to be sent. For this to be unambiguous, exactly one return path
+ SHOULD be present when the message is delivered. Systems using RFC
+ 822 syntax with non-SMTP transports SHOULD designate an unambiguous
+ address, associated with the transport envelope, to which error
+ reports (e.g., non-delivery messages) should be sent.
+
+ Historical note: Text in RFC 822 that appears to contradict the use
+ of the Return-path header (or the envelope reverse path address from
+ the MAIL command) as the destination for error messages is not
+ applicable on the Internet. The reverse path address (as copied into
+ the Return-path) MUST be used as the target of any mail containing
+ delivery error messages.
+
+ In particular:
+
+ - a gateway from SMTP->elsewhere SHOULD insert a return-path header,
+ unless it is known that the "elsewhere" transport also uses
+ Internet domain addresses and maintains the envelope sender
+ address separately.
+
+ - a gateway from elsewhere->SMTP SHOULD delete any return-path
+ header present in the message, and either copy that information to
+ the SMTP envelope or combine it with information present in the
+ envelope of the other transport system to construct the reverse
+ path argument to the MAIL command in the SMTP envelope.
+
+ The server must give special treatment to cases in which the
+ processing following the end of mail data indication is only
+ partially successful. This could happen if, after accepting several
+ recipients and the mail data, the SMTP server finds that the mail
+ data could be successfully delivered to some, but not all, of the
+ recipients. In such cases, the response to the DATA command MUST be
+ an OK reply. However, the SMTP server MUST compose and send an
+ "undeliverable mail" notification message to the originator of the
+ message.
+
+ A single notification listing all of the failed recipients or
+ separate notification messages MUST be sent for each failed
+ recipient. For economy of processing by the sender, the former is
+ preferred when possible. All undeliverable mail notification
+ messages are sent using the MAIL command (even if they result from
+ processing the obsolete SEND, SOML, or SAML commands) and use a null
+ return path as discussed in section 3.7.
+
+ The time stamp line and the return path line are formally defined as
+ follows:
+"""
+
+# from
+
+"""
+3.1.5. Path
+
+
+ The Path header field indicates the route taken by an article since
+ its injection into the Netnews system. Each agent that processes an
+ article is required to prepend at least one <path-identity> to this
+ header field body. This is primarily so that news servers are able
+ to avoid sending articles to sites already known to have them, in
+ particular the site they came from. Additionally, it permits
+ gathering statistics and tracing the route articles take in moving
+ over the network.
+
+ path = "Path:" SP *WSP path-list tail-entry *WSP CRLF
+
+ path-list = *( path-identity [FWS] [path-diagnostic] "!" )
+
+ path-diagnostic = diag-match / diag-other / diag-deprecated
+
+ diag-match = "!" ; another "!"
+
+ diag-other = "!." diag-keyword [ "." diag-identity ] [FWS]
+
+ diag-deprecated = "!" IPv4address [FWS]
+
+ diag-keyword = 1*ALPHA ; see [RFC5537]
+
+ diag-identity = path-identity / IPv4address / IPv6address
+
+ tail-entry = path-nodot
+ ; may be the string "not-for-mail"
+
+ path-identity = ( 1*( label "." ) toplabel ) / path-nodot
+
+ path-nodot = 1*( alphanum / "-" / "_" ) ; legacy names
+
+ label = alphanum [ *( alphanum / "-" ) alphanum ]
+
+ toplabel = ( [ label *( "-" ) ] ALPHA *( "-" ) label ) /
+ ( label *( "-" ) ALPHA [ *( "-" ) label ] ) /
+ ( label 1*( "-" ) label )
+
+ alphanum = ALPHA / DIGIT ; compare [RFC3696]
+
+ A <path-identity> is a name identifying a site. It takes the form of
+ a domain name having two or more components separated by dots, or a
+ single name with no dots (<path-nodot>).
+
+ Each <path-identity> in the <path-list> (which does not include the
+ <tail-entry>) indicates, from right to left, the successive agents
+ through which the article has passed. The use of the <diag-match>,
+ which appears as "!!", indicates that the agent to its left verified
+ the identity of the agent to its right before accepting the article
+ (whereas the <path-delimiter> "!" implies no such claim).
+
+ NOTE: Historically, the <tail-entry> indicated the name of the
+ sender. If not used for this purpose, the string "not-for-mail"
+ is often used instead (since at one time the whole path could be
+ used as a mail address for the sender).
+
+ NOTE: Although case-insensitive, it is intended that the
+ <diag-keyword>s should be in uppercase, to distinguish them from
+ the <path-identity>s, which are traditionally in lowercase.
+
+ A <path-diagnostic> is an item inserted into the Path header field
+ for purposes other than to indicate the name of a site. The use of
+ these is described in [RFC5537].
+
+ NOTE: One usage of a <path-diagnostic> is to record an IP address.
+ The fact that <IPv6address>es are allowed means that the colon (:)
+ is permitted; note that this may cause interoperability problems
+ at older sites that regard ":" as a <path-delimiter> and have
+ neighbors whose names have 4 or fewer characters, and where all
+ the characters are valid HEX digits.
+
+ NOTE: Although <IPv4address>es have occasionally been used in the
+ past (usually with a diagnostic intent), their continued use is
+ deprecated (though it is still acceptable in the form of the
+ <diag-deprecated>).
+"""
+
+
+__lg = rply.LexerGenerator()
+# Add takes a rule name, and a regular expression that defines the rule.
+__lg.ignore(r"\s+")
+__lg.ignore(r'[\n;=\)\(]+')
+__lg.add('FROMSEP', r"(?i)\bfrom\b")
+__lg.add('BYSEP', r"(?i)\bby\b")
+__lg.add('VIASEP', r"(?i)\bvia\b")
+__lg.add('WITHSEP', r"(?i)\bwith\b")
+__lg.add('FORSEP', r"(?i)\bfor\b")
+__lg.add('IDSEP', r"(?i)\bid\b")
+__lg.add('TCPSEP', r"(?i)\btcp\b")
+__lg.add('SMTPSEP', r"(?i)\b[e]?smtp\b")
+__lg.add('IPV4ADDRESS', r"\[?\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\]?")
+__lg.add('SMTPVERSION', r"\d{1,2}\.\d{1,2}\.\d{1,2}/\d{1,2}\.\d{1,2}\.\d{1,2}")
+__lg.add('DOMAIN', r"(?i)[<(]?(([\w][\w\-\.]*)\.)?([\w][\w\-]+)*" +
+ r"(\.([a-z][a-z]*))[>)]?")
+__weekday = r"(Mon|Tue|Wed|Thu|Fri|Sat|Sun)"
+__month_name = r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"
+__lg.add('DATETIME', (r"%s\s*,\s+\d{1,2}\s+%s\s+\d{4} " +
+ r"\d{1,2}:\d{2}:\d{2}\s+[-+]\d{4}(\s*\([A-Z]{3}\))?") %
+ (__weekday, __month_name))
+__lg.add("EMAILADDR", r'<[a-zA-Z0-9_.+-]+@[a-zA-Z0-9._-]+>')
+__lg.add('IGNORABLESTR', r"may be forged")
+# __lg.add('ParentStr', r"\([^)]+\)")
+__lg.add('STRING', r"\S+")
+
+lexer = __lg.build()
+
+__pg = rply.ParserGenerator([rule.name for rule in lexer.rules],
+ cache_id='received_parser')
+
+"""
+Result of:
+
+from server.mymailhost.com
+ (mail.mymailhost.com [126.43.75.123])
+ by pilot01.cl.msu.edu (8.10.2/8.10.2) with ESMTP id NAA23597;
+ Fri, 12 Jul 2002 16:11:20 -0400 (EDT)
+
+is
+
+[[Token('FROMSEP', 'from'),
+ [Token('DOMAIN', 'server.mymailhost.com'),
+ [[Token('DOMAIN', 'mail.mymailhost.com'),
+ Token('IPV4ADDRESS', '[126.43.75.123]')]]]],
+ [Token('BYSEP', 'by'),
+ [Token('DOMAIN', 'pilot01.cl.msu.edu'),
+ [[Token('STRING', '8.10.2/8.10.2)')]]]],
+ [[[], [[Token('SMTPSEP', 'ESMTP')]], [Token('STRING', 'NAA23597;')], []]],
+ Token('DATETIME', 'Fri, 12 Jul 2002 16:11:20 -0400 (EDT)')]
+"""
+
+
+@__pg.production('main : stamp')
+def main(p):
+ return p[0]
+
+
+@__pg.production('stamp : from-domain by-domain optinfo DATETIME')
+def stamp(p):
+ #logging.debug('stamp p = %s', p)
+ return [p[0], p[1], p[2], p[3]]
+
+
+@__pg.production('from-domain : FROMSEP extended-domain')
+def from_domain(p):
+ #logging.debug('from_domain p = %s', p)
+ return [p[0], p[1]]
+
+
+@__pg.production('by-domain : BYSEP extended-domain')
+def by_domain(p):
+ #logging.debug('by_domain p = %s', p)
+ return [p[0], p[1]]
+
+
+@__pg.production('extended-domain : DOMAIN')
+def extended_domain(p):
+ #logging.debug('extended_domain p = %s', p)
+ return p[0]
+
+
+@__pg.production('extended-domain : DOMAIN tcp-info')
+def extended_domain_tcp(p):
+ #logging.debug('extended_domain_tcp p = %s', p)
+ return [p[0], p[1]]
+
+
+@__pg.production('extended-domain : IPV4ADDRESS tcp-info')
+def extended_domain_addr(p):
+ #logging.debug('extended_domain_addr p = %s', p)
+ return [p[0], p[1]]
+
+
+@__pg.production('extended-domain : extended-domain IGNORABLESTR')
+def extended_domain_ignorable(p):
+ #logging.debug('extended_domain_ignorable p = %s', p)
+ return p[0]
+
+
+@__pg.production('tcp-info : IPV4ADDRESS')
+def tcp_info(p):
+ #logging.debug('optinfo p = %s', p)
+ return p
+
+
+@__pg.production('tcp-info : DOMAIN IPV4ADDRESS')
+def tcp_info_addr(p):
+ #logging.debug('optinfo p = %s', p)
+ return p
+
+
+@__pg.production('tcp-info : SMTPVERSION')
+def tcp_info_string(p):
+ #logging.debug('optinfo p = %s', p)
+ return p
+
+
+@__pg.production('optinfo : via with id for')
+def optinfo(p):
+ #logging.debug('optinfo p = %s', p)
+ return [p]
+
+
+@__pg.production('via : ')
+def via_empty(p):
+ #logging.debug('optinfo_via p = %s', p)
+ return p
+
+
+@__pg.production('via : VIASEP link')
+def via(p):
+ #logging.debug('optinfo_via p = %s', p)
+ return [p[2]]
+
+
+@__pg.production('with :')
+def with_empty(p):
+ #logging.debug('optinfo_with p = %s', p)
+ return p
+
+
+@__pg.production('with : WITHSEP protocol')
+def with_protocol(p):
+ #logging.debug('optinfo_with p = %s', p)
+ return p[1]
+
+
+@__pg.production('id : ')
+def id_empty(p):
+ #logging.debug('optinfo_id p = %s', p)
+ return p
+
+
+@__pg.production('id : IDSEP STRING')
+def id(p):
+ #logging.debug('optinfo_id p = %s', p)
+ return [p[1]]
+
+
+# Actually EMAILADDR is Message-Id
+@__pg.production('id : IDSEP EMAILADDR')
+def id_addr(p):
+ #logging.debug('id_addr p = %s', p)
+ return [p[1]]
+
+
+@__pg.production('for : ')
+def for_empty(p):
+ #logging.debug('for p = %s', p)
+ return p
+
+
+# For = "FOR" FWS 1*( Path / Mailbox ) CFWS
+@__pg.production('for : FORSEP EMAILADDR')
+def optinfo_for(p):
+ #logging.debug('for p = %s', p)
+ return [p[0]]
+
+
+@__pg.production('link : TCPSEP STRING')
+def link(p):
+ #logging.debug('link p = %s', p)
+ return [p[2]]
+
+
+@__pg.production('protocol : SMTPSEP')
+def protocol(p):
+ #logging.debug('protocol p = %s', p)
+ return p
+
+
+parser = __pg.build()
+
+
+# from http://stackoverflow.com/questions/27450365/
+def sort_key(received_header):
+ received_date = email.utils.parsedate_tz(received_header)
+ return received_date
+
+# [[Token('FROMSEP', 'from'),
+# [Token('DOMAIN', 'server.mymailhost.com'),
+# [[Token('DOMAIN', 'mail.mymailhost.com'),
+# Token('IPV4ADDRESS', '[126.43.75.123]')]]]],
+# [Token('BYSEP', 'by'),
+# [Token('DOMAIN', 'pilot01.cl.msu.edu'),
+# Token('SMTPVERSION', '8.10.2/8.10.2')]],
+# [[[], [Token('SMTPSEP', 'ESMTP')], [Token('STRING', 'NAA23597;')], []]],
+# Token('DATETIME', 'Fri, 12 Jul 2002 16:11:20 -0400 (EDT)')]
+#
+# [[Token('FROMSEP', 'from'),
+# [Token('DOMAIN', 'd1080.master.cz'),
+# [[Token('DOMAIN', 'p-lab.cz'),
+# Token('IPV4ADDRESS', '[89.185.245.149]')]]]],
+# [Token('BYSEP', 'by'),
+# [Token('DOMAIN', 'mx1.redhat.com'),
+# Token('SMTPVERSION', '8.14.4/8.14.4')]],
+# [[[],
+# [Token('SMTPSEP', 'ESMTP')],
+# [Token('STRING', 't07GaC1j031854')],
+# [Token('FORSEP', 'for')]]],
+# Token('DATETIME', 'Wed, 7 Jan 2015 11:36:13 -0500')]
+
+
+def parse_header(in_str):
+ # Path: cbosgd!mhuxj!mhuxt!eagle!jerry
+ stream = lexer.lex(in_str)
+ logging.debug('\nstream:\n%s', list(stream))
+ stream.idx = 0
+ parsed = parser.parse(stream)
+ logging.debug('\nparsed:\n%s', pformat(parsed))
+
+ from_list = parsed[0][1]
+ logging.debug('\nfrom_list = %s', from_list[1])
+ by_list = parsed[1][1]
+ logging.debug('by_list = %s', by_list)
+ with_list = parsed[2]
+ logging.debug('with_list = %s', with_list)
+ date_str = parsed[3].getstr()
+ logging.debug('date_str = %s', date_str)
+
+ out = {
+ 'from': {
+ 'halo': from_list[0].getstr(),
+ 'reveresed': from_list[1][0].getstr(),
+ 'ipaddr': from_list[1][1].getstr()
+ },
+ 'by': {'server': by_list[0].getstr()},
+ 'date': dateutil.parser.parse(date_str)
+ }
+
+ return out
+
+received_header_list = []
+received_header_list.sort(key=sort_key)
+
+if __name__ == '__main__':
+ instr = """from d1080.master.cz (p-lab.cz [89.185.245.149] (may be forged))
+ by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id t07GaC1j031854
+ for <mcepl@redhat.com>; Wed, 7 Jan 2015 11:36:13 -0500"""
+ print(instr)
+ for token in lexer.lex(instr):
+ print(token)
diff --git a/test/samples/testing-email-01.eml b/test/samples/testing-email-01.eml
new file mode 100644
index 0000000..9c00523
--- /dev/null
+++ b/test/samples/testing-email-01.eml
@@ -0,0 +1,129 @@
+From mcepl@redhat.com Wed Jan 7 17:36:15 2015
+Return-Path: mcepl@redhat.com
+Received: from zmta02.collab.prod.int.phx2.redhat.com (LHLO
+ zmta02.collab.prod.int.phx2.redhat.com) (10.5.81.9) by
+ zmail13.collab.prod.int.phx2.redhat.com with LMTP; Wed, 7 Jan 2015 11:36:14
+ -0500 (EST)
+Received: from int-mx14.intmail.prod.int.phx2.redhat.com (int-mx14.intmail.prod.int.phx2.redhat.com [10.5.11.27])
+ by zmta02.collab.prod.int.phx2.redhat.com (Postfix) with ESMTP id D959F121409
+ for <mcepl@mail.corp.redhat.com>; Wed, 7 Jan 2015 11:36:14 -0500 (EST)
+Received: from mx1.redhat.com (ext-mx12.extmail.prod.ext.phx2.redhat.com [10.5.110.17])
+ by int-mx14.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id t07GaEPV007184
+ for <mcepl@redhat.com>; Wed, 7 Jan 2015 11:36:14 -0500
+Received: from d1080.master.cz (p-lab.cz [89.185.245.149] (may be forged))
+ by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id t07GaC1j031854
+ for <mcepl@redhat.com>; Wed, 7 Jan 2015 11:36:13 -0500
+Received: by d1080.master.cz (Postfix)
+ id 1932D3218192; Wed, 7 Jan 2015 17:36:12 +0100 (CET)
+Delivered-To: virtuser_566@d1080.master.cz
+Received: by d1080.master.cz (Postfix, from userid 557)
+ id 113733218161; Wed, 7 Jan 2015 17:36:12 +0100 (CET)
+X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on p-lab.cz
+X-Spam-Level:
+X-Spam-Status: No, score=-105.0 required=2.0 tests=RCVD_IN_DNSWL_HI,
+ SPF_HELO_PASS,URIBL_BLOCKED,USER_IN_ALL_SPAM_TO autolearn=disabled
+ version=3.3.1
+Received: from mx1.redhat.com (mx1.redhat.com [209.132.183.28])
+ by d1080.master.cz (Postfix) with ESMTP id 80EB93218161
+ for <mcepl@cepl.eu>; Wed, 7 Jan 2015 17:36:10 +0100 (CET)
+Received: from int-mx11.intmail.prod.int.phx2.redhat.com (int-mx11.intmail.prod.int.phx2.redhat.com [10.5.11.24])
+ by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id t07Ga8gQ031700
+ (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-GCM-SHA384 bits=256 verify=FAIL);
+ Wed, 7 Jan 2015 11:36:09 -0500
+Received: from mitmanek.localdomain (ovpn-116-62.ams2.redhat.com [10.36.116.62])
+ by int-mx11.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id t07Ga7QL024180;
+ Wed, 7 Jan 2015 11:36:08 -0500
+Received: from mitmanek.redhat.com (mitmanek.ceplovi.cz [127.0.0.1])
+ by mitmanek.localdomain (Postfix) with ESMTP id 44EA710093A06;
+ Wed, 7 Jan 2015 17:36:06 +0100 (CET)
+From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= <mcepl@redhat.com>
+To: piglit@lists.freedesktop.org
+Cc: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= <mcepl@cepl.eu>
+Subject: [PATCH] Do we really need python 2.7?
+Date: Wed, 7 Jan 2015 17:36:00 +0100
+Message-Id: <1420648560-24144-1-git-send-email-mcepl@redhat.com>
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+X-Scanned-By: MIMEDefang 2.68 on 10.5.11.27
+X-Scanned-By: MIMEDefang 2.68 on 10.5.110.17
+X-Scanned-By: MIMEDefang 2.68 on 10.5.11.24
+Content-Transfer-Encoding: quoted-printable
+X-RedHat-Spam-Score: -1.106 (BAYES_00,RDNS_NONE,URIBL_BLOCKED) 89.185.245.149 [89.185.245.149] 89.185.245.149 [89.185.245.149] <mcepl@redhat.com>
+Status: RO
+Content-Length: 2410
+Lines: 73
+
+From: Mat=C4=9Bj Cepl <mcepl@cepl.eu>
+
+Hi,
+
+I am in the process of struggle to build piglit from the master
+(commit 4adb082) on RHEL-6 (we would like to continue to use it
+for testing).
+
+When I tried just naively build our RHEL-7/Fedora 20+ package on
+EL-6 configuration failed because it complained it is missing
+python 2.6. Which is a bit strange, because of course, RHEL-6
+does contain python 2.6. So, I went digging to find out where
+this message comes from and I have discovered this interesting
+piece of code:
+
+ # Check for presence of Python 2.6 or greater.
+ foreach(python_cmd python2 python)
+ execute_process(
+ COMMAND ${python_cmd} -c \
+ "import sys; assert '2.7' <=3D sys.version < '3'"
+ OUTPUT_QUIET
+ ERROR_QUIET
+ RESULT_VARIABLE python_version_check_error_code)
+ if(python_version_check_error_code EQUAL 0)
+ set(python ${python_cmd})
+ break()
+ endif(python_version_check_error_code EQUAL 0)
+ endforeach(python_cmd)
+
+ if(NOT DEFINED python)
+ message(FATAL_ERROR "python version 2.x (where x >=3D 6) requ=
+ired")
+ endif(NOT DEFINED python)
+
+First obviously this code lies. Either we really care about
+python 2.7 and we should declare our loyalities openly, or piglit
+can be working with python 2.6 (which I hope) and then that '2.7'
+is just a typo.
+
+Which one it is? How difficult it would be to switch piglit to
+python 2.6? I hope not that difficult (as there were not that
+many changes between 2.6 and 2.7). Does anybody know?
+
+And of course, when I apply just the attached patch I get to yet
+another problem: freeglut (RHEL-6 has freeglut-2.6.0). But let's
+walk this minefield one mine at the time.
+
+Best,
+
+Mat=C4=9Bj
+
+---
+ CMakeLists.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 6fb6c8a..d2f2f0e 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -190,7 +190,7 @@ ENDIF()
+ # Check for presence of Python 2.6 or greater.
+ foreach(python_cmd python2 python)
+ execute_process(
+- COMMAND ${python_cmd} -c "import sys; assert '2.7' <=3D sys.version < =
+'3'"
++ COMMAND ${python_cmd} -c "import sys; assert '2.6' <=3D sys.version < =
+'3'"
+ OUTPUT_QUIET
+ ERROR_QUIET
+ RESULT_VARIABLE python_version_check_error_code)
+--=20
+1.8.3.1
+
+
diff --git a/test/test_parse_received.py b/test/test_parse_received.py
new file mode 100644
index 0000000..8fa1945
--- /dev/null
+++ b/test/test_parse_received.py
@@ -0,0 +1,157 @@
+import datetime
+import email
+import unittest
+
+from dateutil.tz import tzoffset
+from rply import Token
+
+import parse_received
+
+
+#logging.basicConfig(level=logging.DEBUG)
+
+
+INPUT_01 = \
+ """from server.mymailhost.com
+ (mail.mymailhost.com [126.43.75.123])
+ by pilot01.cl.msu.edu (8.10.2/8.10.2) with ESMTP id NAA23597;
+ Fri, 12 Jul 2002 16:11:20 -0400 (EDT)"""
+INPUT_02 = \
+ """from d1080.master.cz (p-lab.cz [89.185.245.149] (may be forged))
+ by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id t07GaC1j031854
+ for <mcepl@redhat.com>; Wed, 7 Jan 2015 11:36:13 -0500"""
+
+
+class TestReceivedLexer(unittest.TestCase):
+ maxDiff = None
+
+ def test_simple_01(self):
+ expected = [
+ Token('FROMSEP', 'from'),
+ Token('DOMAIN', 'server.mymailhost.com'),
+ Token('DOMAIN', 'mail.mymailhost.com'),
+ Token('IPV4ADDRESS', '[126.43.75.123]'),
+ Token('BYSEP', 'by'),
+ Token('DOMAIN', 'pilot01.cl.msu.edu'),
+ Token('SMTPVERSION', '8.10.2/8.10.2'),
+ Token('WITHSEP', 'with'),
+ Token('SMTPSEP', 'ESMTP'),
+ Token('IDSEP', 'id'),
+ Token('STRING', 'NAA23597;'),
+ Token('DATETIME', 'Fri, 12 Jul 2002 16:11:20 -0400 (EDT)')
+ ]
+ parsed = list(parse_received.lexer.lex(INPUT_01))
+ self.assertEqual(parsed, expected)
+
+ def test_simple_02(self):
+ expected = [
+ Token('FROMSEP', 'from'),
+ Token('DOMAIN', 'd1080.master.cz'),
+ Token('DOMAIN', 'p-lab.cz'),
+ Token('IPV4ADDRESS', '[89.185.245.149]'),
+ Token('IGNORABLESTR', 'may be forged'),
+ Token('BYSEP', 'by'),
+ Token('DOMAIN', 'mx1.redhat.com'),
+ Token('SMTPVERSION', '8.14.4/8.14.4'),
+ Token('WITHSEP', 'with'),
+ Token('SMTPSEP', 'ESMTP'),
+ Token('IDSEP', 'id'),
+ Token('STRING', 't07GaC1j031854'),
+ Token('FORSEP', 'for'),
+ Token('EMAILADDR', '<mcepl@redhat.com>'),
+ Token('DATETIME', 'Wed, 7 Jan 2015 11:36:13 -0500')
+ ]
+ parsed = list(parse_received.lexer.lex(INPUT_02))
+ self.assertEqual(parsed, expected)
+
+
+class TestReceivedParser(unittest.TestCase):
+ maxDiff = None
+
+ def test_simple_01(self):
+ expected = [
+ [Token('FROMSEP', 'from'),
+ [Token('DOMAIN', 'server.mymailhost.com'),
+ [Token('DOMAIN', 'mail.mymailhost.com'),
+ Token('IPV4ADDRESS', '[126.43.75.123]')]]
+ ],
+ [Token('BYSEP', 'by'),
+ [Token('DOMAIN', 'pilot01.cl.msu.edu'),
+ [Token('SMTPVERSION', '8.10.2/8.10.2')]]
+ ],
+ [[[],
+ [Token('SMTPSEP', 'ESMTP')],
+ [Token('STRING', 'NAA23597;')], []]
+ ],
+ Token('DATETIME', 'Fri, 12 Jul 2002 16:11:20 -0400 (EDT)')
+ ]
+ stream = parse_received.lexer.lex(INPUT_01)
+ stream.idx = 0
+ parsed = parse_received.parser.parse(stream)
+ self.assertEqual(parsed, expected)
+
+ def test_simple_02(self):
+ expected = [
+ [Token('FROMSEP', 'from'),
+ [Token('DOMAIN', 'd1080.master.cz'),
+ [Token('DOMAIN', 'p-lab.cz'),
+ Token('IPV4ADDRESS', '[89.185.245.149]')]]
+ ],
+ [Token('BYSEP', 'by'),
+ [Token('DOMAIN', 'mx1.redhat.com'),
+ [Token('SMTPVERSION', '8.14.4/8.14.4')]]
+ ],
+ [[[],
+ [Token('SMTPSEP', 'ESMTP')],
+ [Token('STRING', 't07GaC1j031854')],
+ [Token('FORSEP', 'for')]]
+ ],
+ Token('DATETIME', 'Wed, 7 Jan 2015 11:36:13 -0500')
+ ]
+ stream = parse_received.lexer.lex(INPUT_02)
+ stream.idx = 0
+ parsed = parse_received.parser.parse(stream)
+ self.assertEqual(parsed, expected)
+
+ def test_parse_01(self):
+ expected = {
+ 'by': {'server': 'pilot01.cl.msu.edu'},
+ 'date': datetime.datetime(
+ 2002, 7, 12, 16, 11, 20, tzinfo=tzoffset(u'EDT', -14400)),
+ 'from': {
+ 'halo': 'server.mymailhost.com',
+ 'ipaddr': '[126.43.75.123]',
+ 'reveresed': 'mail.mymailhost.com'
+ }
+ }
+
+ observed = parse_received.parse_header(INPUT_01)
+ self.assertEqual(observed, expected)
+
+ def test_parse_02(self):
+ expected = {
+ 'by': {'server': 'mx1.redhat.com'},
+ 'date': datetime.datetime(
+ 2015, 1, 7, 11, 36, 13, tzinfo=tzoffset(None, -18000)),
+ 'from': {
+ 'halo': 'd1080.master.cz',
+ 'ipaddr': '[89.185.245.149]',
+ 'reveresed': 'p-lab.cz'
+ }
+ }
+
+ observed = parse_received.parse_header(INPUT_02)
+ self.assertEqual(observed, expected)
+
+ def test_email_01(self):
+ with open('examples/mail', 'r') as inmail:
+ msg = email.message_from_file(inmail)
+ received_hdrs = msg.get_all('Received')
+ expected = []
+ parsed_headers = []
+ for hdr in received_hdrs:
+ parsed_headers.append(parse_received.parse_header(hdr))
+ self.assertEqual(parsed_headers, expected)
+
+if __name__ == '__main__':
+ unittest.main()