aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@cepl.eu>2015-01-04 18:49:10 +0100
committerMatěj Cepl <mcepl@cepl.eu>2015-01-05 12:59:40 +0100
commite48572bca289eba580b96ca6273c0a3fb4e7d47f (patch)
tree06868c25913f9f55d2fe34ab226e93124f813757
parentfc73177f54a70ceacfc1ea381c769c6d5b8de12e (diff)
downloadpygn-e48572bca289eba580b96ca6273c0a3fb4e7d47f.tar.gz
Add lexer based on rply.
-rw-r--r--.gitignore4
-rw-r--r--examples/PLY-reference-manual.maffbin0 -> 31354 bytes
-rw-r--r--setup.py5
-rw-r--r--test/__init__.py13
-rwxr-xr-xtest/test_pyg.py85
-rwxr-xr-xtest/test_wlp.py143
-rw-r--r--wlp_lex.py12
-rw-r--r--wlp_yacc.py98
8 files changed, 266 insertions, 94 deletions
diff --git a/.gitignore b/.gitignore
index 2cf78ef..a3fc624 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
build/
-wlp/commands.tab.*
-wlp/lex.yy.c
+wlp_c/commands.tab.*
+wlp_c/lex.yy.c
*.pyc
wlp.so
pygn.egg-info/
diff --git a/examples/PLY-reference-manual.maff b/examples/PLY-reference-manual.maff
new file mode 100644
index 0000000..af9f0a5
--- /dev/null
+++ b/examples/PLY-reference-manual.maff
Binary files differ
diff --git a/setup.py b/setup.py
index 9fdad76..e74d78a 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,8 @@ class Build_WLP_ext(build_ext):
# that's how it should be.
# otherwise, subsequent calls down the stack unwind the list and
# check_call won't get it.
- ([['yacc', '-d', '-o', 'wlp/commands.tab.c', 'wlp/commands.y']]),
+ ([['yacc', '-d', '-o', 'wlp_c/commands.tab.c',
+ 'wlp_c/commands.y']]),
'Generating lexer')
self.make_file(
'wlp/commands.l', 'wlp/lex.yy.c', check_call,
@@ -53,7 +54,7 @@ setup(name='pygn',
},
# TODO package actually requires lex and yacc port, but not sure
# how to say it here
- requires=[],
+ requires=['rply'],
license="GPLv3",
keywords=["nntp", "email", "gateway"],
classifiers=[
diff --git a/test/__init__.py b/test/__init__.py
index e69de29..55f45b6 100644
--- a/test/__init__.py
+++ b/test/__init__.py
@@ -0,0 +1,13 @@
+import os.path
+import sys
+import sysconfig
+
+
+def distutils_dir_name(dname):
+ """Returns the name of a distutils build directory"""
+ f = "{dirname}.{platform}-{version[0]}.{version[1]}"
+ return f.format(dirname=dname,
+ platform=sysconfig.get_platform(),
+ version=sys.version_info)
+wlp_lib_path = os.path.join('build', distutils_dir_name('lib'))
+sys.path.insert(0, wlp_lib_path)
diff --git a/test/test_pyg.py b/test/test_pyg.py
new file mode 100755
index 0000000..240228b
--- /dev/null
+++ b/test/test_pyg.py
@@ -0,0 +1,85 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import mail2news
+import os
+import os.path
+import re
+import subprocess
+import sys
+import test
+import unittest
+
+
+class TestM2N(unittest.TestCase):
+ expected_output = """Newsgroups: pyg.test
+From: Pyg <pyg@localhost.com>
+To: User <user@localhost.com>
+Subject: test
+Date: Sun, 1 Feb 2002 16:40:40 +0200
+Message-Id: <20001001164040.Aa8326@localhost>
+Return-Path: <pyg@localhost>
+Mime-Version: 1.0
+Content-Type: text/plain; charset=us-ascii
+User-Agent: Mutt/1.2.5i
+X-Multiline: this header probably broke RFC, but is frequent.
+X-Gateway: pyg %s %s
+
+one line test
+
+""" % (mail2news.VERSION, mail2news.DESC)
+
+ def test_m2n(self):
+ with open('examples/mail') as in_mail:
+ pid = subprocess.Popen(['./pygm2n', '-Tv', '-n', 'pyg.test'],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE)
+ out, _ = pid.communicate(in_mail.read())
+ self.assertEqual(out, self.expected_output)
+
+
+class TestN2M(unittest.TestCase):
+ expected_output = """Received: from GATEWAY by mitmanek.ceplovi.cz with pyg
+ for <test@example.com> ; Mon Dec 15 17:13:30 2014 (CEST)
+From: kame@inwind.it (PYG)
+To: test@example.com
+Subject: pyg's article test
+Date: 10 Jun 2000 23:20:47 +0200
+Organization: Debian GNU/Linux
+Reply-To: pyg@localhost
+Content-Type: text/plain; charset=US-ASCII
+Mime-Version: 1.0
+Content-Transfer-Encoding: 7bit
+X-Trace: pyg.server.tld 960672047 927 192.168.1.2 (10 Jun 2000 21:20:47 GMT)
+X-Newsgroups: local.moderated
+X-Gateway: pyg %s %s
+X-NNTP-Posting-Host: pyg.server.tld
+Resent-From: sender@example.com
+Resent-Sender: sender@example.com
+""" % (mail2news.VERSION, mail2news.DESC)
+
+ def test_n2m(self):
+ env = os.environ
+ env['PYTHONPATH'] = ":".join(sys.path)
+
+ with open('examples/articletest.accepted') as in_mail:
+ pid = subprocess.Popen(['./pygn2m', '-Tvt', 'test@example.com',
+ '-s', 'sender@example.com',
+ '-w', 'examples/whitelist.example'],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE, env=env)
+ in_message = in_mail.read().replace('pyg@pyg.server.tld',
+ 'kame@inwind.it')
+ out, err = pid.communicate(in_message)
+ out = re.sub(r'^Message-Id:.*$', '', out)
+ # Not sure how to compare two email mesages (with different
+ # times, etc.) so for now just to make sure the script doesn’t
+ # blow up and gives some output
+ # otherwise it would be
+ # self.assertEqual(out, expected_output)
+ self.assertEqual(pid.returncode, 0)
+ self.assertGreater(len(out), 0)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/test/test_wlp.py b/test/test_wlp.py
index 4dd1516..db6085f 100755
--- a/test/test_wlp.py
+++ b/test/test_wlp.py
@@ -1,31 +1,65 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
-
-import mail2news
-import os
-import os.path
-import re
-import subprocess
-import sys
-import sysconfig
+import logging
+logging.basicConfig(level=logging.DEBUG)
import unittest
import wlp
+import wlp_lex
+import wlp_yacc
-
-def distutils_dir_name(dname):
- """Returns the name of a distutils build directory"""
- f = "{dirname}.{platform}-{version[0]}.{version[1]}"
- return f.format(dirname=dname,
- platform=sysconfig.get_platform(),
- version=sys.version_info)
-wlp_lib_path = os.path.join('build', distutils_dir_name('lib'))
-sys.path.insert(0, wlp_lib_path)
-
-
+from rply import Token
class TestWLP(unittest.TestCase):
+ maxDiff = None
+ test_input = '''<kame@innocent.com> {
+ From: = 'ME' Sender: = "Cosimo" Reply-to = "me"
+ }
+
+ <alfarano@students.cs.unibo.it> {
+ From: = 'Cosimo Alfarano'
+ X-Firstname: = 'Cosimo'
+ }
+
+ <kame@innocent.com> {
+ From: = 'kame@inwind.it'
+ Reply-to: = "KA"
+ Sender: = "Kalfa"
+ }'''
+
+ def test_wlp_lexer(self):
+ expected_stream = [
+ Token('OWNER', '<kame@innocent.com>'),
+ Token('VAR', 'From:'),
+ Token('VAL', "'ME'"),
+ Token('VAR', 'Sender:'),
+ Token('VAL', '"Cosimo"'),
+ Token('VAR', 'Reply-to'),
+ Token('VAL', '"me"'),
+ Token('OWNER', '<alfarano@students.cs.unibo.it>'),
+ Token('VAR', 'From:'),
+ Token('VAL', "'Cosimo Alfarano'"),
+ Token('VAR', 'X-Firstname:'),
+ Token('VAL', "'Cosimo'"),
+ Token('OWNER', '<kame@innocent.com>'),
+ Token('VAR', 'From:'),
+ Token('VAL', "'kame@inwind.it'"),
+ Token('VAR', 'Reply-to:'),
+ Token('VAL', '"KA"'),
+ Token('VAR', 'Sender:'),
+ Token('VAL', '"Kalfa"')
+ ]
+ tokens = list(wlp_lex.lexer.lex(self.test_input))
+ self.assertEqual(tokens, expected_stream)
+
def test_wlp_parser(self):
+ lex_stream = wlp_lex.lexer.lex(self.test_input)
+ tree = wlp_yacc.parser.parse(lex_stream)
+ logging.debug('tree = %s', tree)
+ logging.debug('tree = dir %s', dir(tree))
+ self.assertEqual(tree, [])
+
+ def test_wlp_C_parser(self):
wlp.setfilebyname('examples/whitelist.example')
wl_dict = wlp.mkdict()
expected_dict = {'alfarano@students.cs.unibo.it': {
@@ -40,76 +74,5 @@ class TestWLP(unittest.TestCase):
}
self.assertEqual(wl_dict, expected_dict)
-
-class TestM2N(unittest.TestCase):
- expected_output = """Newsgroups: pyg.test
-From: Pyg <pyg@localhost.com>
-To: User <user@localhost.com>
-Subject: test
-Date: Sun, 1 Feb 2002 16:40:40 +0200
-Message-Id: <20001001164040.Aa8326@localhost>
-Return-Path: <pyg@localhost>
-Mime-Version: 1.0
-Content-Type: text/plain; charset=us-ascii
-User-Agent: Mutt/1.2.5i
-X-Multiline: this header probably broke RFC, but is frequent.
-X-Gateway: pyg %s %s
-
-one line test
-
-""" % (mail2news.VERSION, mail2news.DESC)
-
- def test_m2n(self):
- with open('examples/mail') as in_mail:
- pid = subprocess.Popen(['./pygm2n', '-Tv', '-n', 'pyg.test'],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE)
- out, _ = pid.communicate(in_mail.read())
- self.assertEqual(out, self.expected_output)
-
-
-class TestN2M(unittest.TestCase):
- expected_output = """Received: from GATEWAY by mitmanek.ceplovi.cz with pyg
- for <test@example.com> ; Mon Dec 15 17:13:30 2014 (CEST)
-From: kame@inwind.it (PYG)
-To: test@example.com
-Subject: pyg's article test
-Date: 10 Jun 2000 23:20:47 +0200
-Organization: Debian GNU/Linux
-Reply-To: pyg@localhost
-Content-Type: text/plain; charset=US-ASCII
-Mime-Version: 1.0
-Content-Transfer-Encoding: 7bit
-X-Trace: pyg.server.tld 960672047 927 192.168.1.2 (10 Jun 2000 21:20:47 GMT)
-X-Newsgroups: local.moderated
-X-Gateway: pyg %s %s
-X-NNTP-Posting-Host: pyg.server.tld
-Resent-From: sender@example.com
-Resent-Sender: sender@example.com
-""" % (mail2news.VERSION, mail2news.DESC)
-
- def test_n2m(self):
- env = os.environ
- env['PYTHONPATH'] = wlp_lib_path
-
- with open('examples/articletest.accepted') as in_mail:
- pid = subprocess.Popen(['./pygn2m', '-Tvt', 'test@example.com',
- '-s', 'sender@example.com',
- '-w', 'examples/whitelist.example'],
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE, env=env)
- in_message = in_mail.read().replace('pyg@pyg.server.tld',
- 'kame@inwind.it')
- out, err = pid.communicate(in_message)
- out = re.sub(r'^Message-Id:.*$', '', out)
- # Not sure how to compare two email mesages (with different
- # times, etc.) so for now just to make sure the script doesn’t
- # blow up and gives some output
- # otherwise it would be
- # self.assertEqual(out, expected_output)
- self.assertEqual(pid.returncode, 0)
- self.assertGreater(len(out), 0)
-
-
if __name__ == "__main__":
unittest.main()
diff --git a/wlp_lex.py b/wlp_lex.py
new file mode 100644
index 0000000..2cd912b
--- /dev/null
+++ b/wlp_lex.py
@@ -0,0 +1,12 @@
+import rply
+
+lg = rply.LexerGenerator()
+# Add takes a rule name, and a regular expression that defines the rule.
+lg.add("OWNER", r'<[a-zA-Z0-9_.+-]+@[a-zA-Z0-9._-]+>')
+lg.add("VAL", r'[\'`"][a-zA-Z0-9@_+.<>() -]+[\'`"]')
+lg.add("VAR", r'[a-zA-Z0-9_<>-]+[:]?')
+
+lg.ignore(r"\s+")
+lg.ignore(r'[{}=]+')
+
+lexer = lg.build()
diff --git a/wlp_yacc.py b/wlp_yacc.py
new file mode 100644
index 0000000..a16f530
--- /dev/null
+++ b/wlp_yacc.py
@@ -0,0 +1,98 @@
+import logging
+
+import rply
+
+logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
+ level=logging.DEBUG)
+
+pg = rply.ParserGenerator(['OWNER', 'VAL', 'VAR'],
+ cache_id='wlp_parser')
+
+"""
+ $accept ::= block $end
+
+ block ::= blockstatement
+ | block blockstatement
+
+ blockstatement ::= owner '{' commandline '}'
+
+ commandline ::= command
+ | commandline command
+
+ command ::= varpart '=' valpart
+
+ owner ::= OWNERID
+
+ varpart ::= VARID
+
+ valpart ::= VALID
+"""
+
+## Makes from this source
+##
+## <kame@innocent.com> {
+## From: = 'ME' Sender: = "Cosimo" Reply-to = "me"
+## }
+##
+## <alfarano@students.cs.unibo.it> {
+## From: = 'Cosimo Alfarano'
+## X-Firstname: = 'Cosimo'
+## }
+##
+## <kame@innocent.com> {
+## From: = 'kame@inwind.it'
+## Reply-to: = "KA"
+## Sender: = "Kalfa"
+## }
+##
+## this dictionary
+##
+## expected_dict = {'alfarano@students.cs.unibo.it': {
+## 'From:': 'Cosimo Alfarano',
+## 'X-Firstname:': 'Cosimo'
+## },
+## 'kame@innocent.com': {
+## 'From:': 'kame@inwind.it',
+## 'Reply-to': 'me',
+## 'Reply-to:': 'KA',
+## 'Sender:': 'Kalfa'}
+## }
+
+
+@pg.production('main : block')
+def main(p):
+ return p[0]
+
+
+# block:
+# blockstatement
+# | block blockstatement
+@pg.production('block : blockstatement')
+@pg.production('block : block blockstatement')
+def block(p):
+ logging.debug('block p = %s', p)
+
+
+# blockstatement:
+# owner '{' commandline '}'
+@pg.production('blockstatement : OWNER commandline')
+def blockstatement(p):
+ logging.debug('blockstatement p = %s', p)
+
+
+# commandline:
+# command | commandline command
+@pg.production('commandline : command')
+@pg.production('commandline : commandline command')
+def commandline(p):
+ logging.debug('commandline p = %s', p)
+
+
+# command:
+# varpart '=' valpart { found(left,right,owner); }
+@pg.production('command : VAR VAL')
+def command(p):
+ logging.debug('command p = %s', p)
+ return (p[0], p[1])
+
+parser = pg.build()