Refactored spmd parser to use InputParagraph class.

author: Martin Vilcans <martin@librador.com> 2012-02-01 22:06:20 +0100
committer: Martin Vilcans <martin@librador.com> 2012-02-01 22:59:54 +0100
commit: 801d04db2238c8f93c5762bbb291ac9af40995b5 (patch)
tree: 981d588eec8c89c59247d2d5cee2f74b3cd67a54
parent: 77940d45a14a727250aa9117b85f716a249c5ad6 (diff)
download: screenplain-801d04db2238c8f93c5762bbb291ac9af40995b5.tar.gz
2 files changed, 110 insertions, 100 deletions
diff --git a/screenplain/parsers/spmd.py b/screenplain/parsers/spmd.py
index c2a757a..8826d9f 100644
--- a/screenplain/parsers/spmd.py
+++ b/screenplain/parsers/spmd.py
@@ -23,98 +23,121 @@ slug_prefixes = (
 
 TWOSPACE = ' ' * 2
 
-centered_re = re.compile(r'\s*>\s*(.*)\s*<\s*$')
-preprocess_re = re.compile(r'^([ \t]*)(.*?)([ \t]*)[\r\n]*$')
+centered_re = re.compile(r'\s*>\s*(.*?)\s*<\s*$')
 dual_dialog_re = re.compile(r'^(.+?)(\s*\^)$')
+slug_re = re.compile(r'(?:(\.)\s*)?(\S.*?)\s*$')
+transition_re = re.compile(r'(>?)\s*(.+?)(:?)$')
 
 
-def is_slug(blanks_before, line_list):
-    if len(line_list) != 1:
-        return False
-    upper = line_list[0].upper()
-    if upper.startswith('.') and len(upper) > 1:
-        return True
-    return any(upper.startswith(s) for s in slug_prefixes)
+def _to_rich(line_or_line_list):
+    """Converts a line list into a list of RichString
+    or a single string to a RichString.
 
+    """
+    if isinstance(line_or_line_list, basestring):
+        return parse_emphasis(line_or_line_list)
+    else:
+        return [parse_emphasis(line) for line in line_or_line_list]
+
+
+class InputParagraph(object):
+    def __init__(self, blanks_before, lines):
+        self.blanks_before = 0
+        self.lines = lines
+
+    def update_list(self, previous_paragraphs):
+        """Inserts this paragraph into a list.
+        Modifies the `previous_paragraphs` list.
+        """
+        previous_paragraphs.append(
+            self.as_slug() or
+            self.as_centered_action() or
+            self.as_dialog(previous_paragraphs) or
+            self.as_transition() or
+            self.as_action()
+        )
+
+    def as_slug(self):
+        if len(self.lines) != 1:
+            return None
+
+        match = slug_re.match(self.lines[0])
+        if not match:
+            return
+
+        period, text = match.groups()
+        if period:
+            return Slug(_to_rich(text.upper()))
+
+        upper = text.upper()
+        if not any(upper.startswith(s) for s in slug_prefixes):
+            return None
+
+        return Slug(_to_rich(upper))
+
+    def as_centered_action(self):
+        if not all(centered_re.match(line) for line in self.lines):
+            return None
+        return Action(_to_rich(
+            centered_re.match(line).group(1) for line in self.lines
+        ), centered=True)
 
-def _create_slug(line):
-    if line.startswith('.') and len(line) > 1:
-        line = line[1:]
-    return Slug(_to_rich([line])[0])
+    def _create_dialog(self, character):
+        return Dialog(
+            parse_emphasis(character),
+            _to_rich(line.strip() for line in self.lines[1:])
+        )
 
+    def as_dialog(self, previous_paragraphs):
+        if len(self.lines) < 2:
+            return None
 
-def _create_dialog(line_list, previous_paragraphs):
+        character = self.lines[0]
+        if not character.isupper() or character.endswith(TWOSPACE):
+            return None
 
-    if previous_paragraphs and isinstance(previous_paragraphs[-1], Dialog):
-        dual_match = dual_dialog_re.match(line_list[0])
-        if dual_match:
-            previous = previous_paragraphs.pop()
-            dialog = Dialog(
-                parse_emphasis(dual_match.group(1)),
-                _to_rich(line_list[1:])
-            )
-            return DualDialog(previous, dialog)
+        if previous_paragraphs and isinstance(previous_paragraphs[-1], Dialog):
+            dual_match = dual_dialog_re.match(character)
+            if dual_match:
+                previous = previous_paragraphs.pop()
+                dialog = self._create_dialog(dual_match.group(1))
+                return DualDialog(previous, dialog)
 
-    return Dialog(
-        parse_emphasis(line_list[0]),
-        _to_rich(line_list[1:])
-    )
+        return self._create_dialog(character)
 
+    def as_transition(self):
+        if len(self.lines) != 1:
+            return None
 
-def _to_rich(line_list):
-    """Converts a line list into a list of RichString."""
-    return [parse_emphasis(line.strip()) for line in line_list]
+        match = transition_re.match(self.lines[0])
+        if not match:
+            return None
+        greater_than, text, colon = match.groups()
 
+        if greater_than:
+            return Transition(_to_rich(text.upper() + colon))
 
-def create_paragraph(blanks_before, line_list, previous_paragraphs):
-    first_line = line_list[0]
-    if is_slug(blanks_before, line_list):
-        return _create_slug(line_list[0])
-    elif all(centered_re.match(line) for line in line_list):
-        return Action(_to_rich(
-            centered_re.match(line).group(1) for line in line_list
-        ), centered=True)
-    elif (
-        len(line_list) > 1 and
-        first_line.isupper() and
-        not first_line.endswith(TWOSPACE)
-    ):
-        return _create_dialog(line_list, previous_paragraphs)
-    elif (
-        len(line_list) == 1 and first_line.isupper()
-        and (first_line.endswith(':') or first_line.startswith('>'))
-    ):
-        if first_line.startswith('>'):
-            return Transition(_to_rich([first_line[1:]])[0])
-        else:
-            return Transition(_to_rich([first_line])[0])
-    else:
-        return Action(_to_rich(line_list))
+        if text.isupper() and colon:
+            return Transition(_to_rich(text + colon))
 
+        return None
 
-def _preprocess_line(raw_line):
-    """Splits a line into leading spaces, text content, and trailing spaces.
+    def as_action(self):
+        return Action(_to_rich(line.rstrip() for line in self.lines))
 
-    >>> _preprocess_line('  foo  ')
-    ('  ', 'foo', '  ')
 
-    For a blank line, the trailing spaces will be returned as trailing
-    whitespace:
+def _preprocess_line(raw_line):
+    r"""Replaces tabs with spaces and removes trailing end of line markers.
+
+    >>> _preprocess_line('foo \r\n\n')
+    'foo '
 
-    >>> _preprocess_line('   ')
-    ('', '', '   ')
     """
-    line = raw_line.expandtabs(4)
-    leading, text, trailing = preprocess_re.match(line).groups()
-    if not text:
-        trailing = leading
-        leading = ''
-    return leading, text, trailing
+    return raw_line.expandtabs(4).rstrip('\r\n')
 
 
-def _is_blank(preprocessed_line):
-    leading, text, trailing = preprocessed_line
-    return not text and not trailing
+def _is_blank(line):
+    return line == ''
 
 
 def parse(source):
@@ -122,18 +145,11 @@ def parse(source):
     blank_count = 0
     source = (_preprocess_line(line) for line in source)
     paragraphs = []
-    for blank, preprocessed_lines in itertools.groupby(source, _is_blank):
+    for blank, input_lines in itertools.groupby(source, _is_blank):
         if blank:
-            blank_count = sum(1 for line in preprocessed_lines)
+            blank_count = sum(1 for line in input_lines)
         else:
-            paragraph = create_paragraph(
-                blank_count,
-                [
-                    text + trailing
-                    for (leading, text, trailing) in preprocessed_lines
-                ],
-                paragraphs
-            )
-            paragraphs.append(paragraph)
+            paragraph = InputParagraph(blank_count, list(input_lines))
+            paragraph.update_list(paragraphs)
 
     return paragraphs
diff --git a/tests/spmd_test.py b/tests/spmd_test.py
index 264995b..9f03076 100644
--- a/tests/spmd_test.py
+++ b/tests/spmd_test.py
@@ -234,28 +234,22 @@ class ParseTests(unittest2.TestCase):
         self.assertEquals([Action, Transition], [type(p) for p in paras])
         self.assertEquals(plain('FADE OUT.'), paras[1].line)
 
-    def test_multiline_paragraph(self):
-        """Check that we don't join lines like Markdown does.
-        """
+    def test_action_preserves_leading_whitespace(self):
         paras = list(parse([
-            'They drink long and well from the beers.',
+            'hello',
             '',
-            "And then there's a long beat.",
-            "Longer than is funny. ",
-            "   Long enough to be depressing.",
-            '',
-            'The men look at each other.',
+            '  two spaces',
+            '   three spaces ',
         ]))
-        self.assertEquals([Action, Action, Action], [type(p) for p in paras])
+        self.assertEquals([Action, Action], [type(p) for p in paras])
         self.assertEquals(
             [
-                plain("And then there's a long beat."),
-                plain("Longer than is funny."),
-                plain("Long enough to be depressing."),
+                plain(u'  two spaces'),
+                plain(u'   three spaces'),
             ], paras[1].lines
         )
 
-    def test_multiline_dialog(self):
+    def test_leading_and_trailing_spaces_in_dialog(self):
         paras = list(parse([
             'JULIET',
             'O Romeo, Romeo! wherefore art thou Romeo?',
@@ -265,10 +259,10 @@ class ParseTests(unittest2.TestCase):
         ]))
         self.assertEquals([Dialog], [type(p) for p in paras])
         self.assertEquals([
-            (False, plain('O Romeo, Romeo! wherefore art thou Romeo?')),
-            (False, plain('Deny thy father and refuse thy name;')),
-            (False, plain('Or, if thou wilt not, be but sworn my love,')),
-            (False, plain("And I'll no longer be a Capulet.")),
+            (False, plain(u'O Romeo, Romeo! wherefore art thou Romeo?')),
+            (False, plain(u'Deny thy father and refuse thy name;')),
+            (False, plain(u'Or, if thou wilt not, be but sworn my love,')),
+            (False, plain(u"And I'll no longer be a Capulet.")),
         ], paras[0].blocks)
 
     def test_single_centered_line(self):
author	Martin Vilcans <martin@librador.com>	2012-02-01 22:06:20 +0100
committer	Martin Vilcans <martin@librador.com>	2012-02-01 22:59:54 +0100
commit	801d04db2238c8f93c5762bbb291ac9af40995b5 (patch)
tree	981d588eec8c89c59247d2d5cee2f74b3cd67a54
parent	77940d45a14a727250aa9117b85f716a249c5ad6 (diff)
download	screenplain-801d04db2238c8f93c5762bbb291ac9af40995b5.tar.gz