Parse the title page.

The title page isn't actually used for anything right now, but if it is in the source, at least it doesn't appear as action lines in the output.
author: Martin Vilcans <martin@librador.com> 2012-02-16 23:18:53 +0100
committer: Martin Vilcans <martin@librador.com> 2012-02-16 23:18:53 +0100
commit: 03d27c86a728864d1ab59802f787aca893955fca (patch)
tree: 291e79c367a3680e8d778b397ef74d3334a5b428
parent: 0d824c62fd570d1f10f75efaffa3dd8d8d09e665 (diff)
download: screenplain-03d27c86a728864d1ab59802f787aca893955fca.tar.gz
2 files changed, 104 insertions, 5 deletions
diff --git a/screenplain/parsers/spmd.py b/screenplain/parsers/spmd.py
index 2940a8c..f9aa643 100644
--- a/screenplain/parsers/spmd.py
+++ b/screenplain/parsers/spmd.py
@@ -3,6 +3,7 @@
 # http://www.opensource.org/licenses/mit-license.php
 
 import itertools
+from itertools import takewhile
 import re
 
 from screenplain.types import (
@@ -18,6 +19,9 @@ slug_regexes = (
 
 TWOSPACE = ' ' * 2
 
+title_page_key_re = re.compile(r'([^:]+):\s*(.*)')
+title_page_value_re = re.compile(r'(?:\s{3,}|\t)(.+)')
+
 centered_re = re.compile(r'\s*>\s*(.*?)\s*<\s*$')
 dual_dialog_re = re.compile(r'^(.+?)(\s*\^)$')
 slug_re = re.compile(r'(?:(\.)\s*)?(\S.*?)\s*$')
@@ -153,6 +157,24 @@ def _is_blank(line):
 def parse(source):
     """Reads raw text input and generates paragraph objects."""
     source = (_preprocess_line(line) for line in source)
+
+    title_page_lines = list(takewhile(lambda line: line != '', source))
+    title_page = parse_title_page(title_page_lines)
+
+    if title_page:
+        # The first lines were a title page.
+        # Parse the rest of the source as screenplay body.
+        # TODO: Create a title page from the data in title_page
+        return parse_body(source)
+    else:
+        # The first lines were not a title page.
+        # Parse them as part of the screenplay body.
+        return parse_body(itertools.chain(title_page_lines, [''], source))
+
+
+def parse_body(source):
+    """Reads lines of the main screenplay and generates paragraph objects."""
+
     paragraphs = []
     for blank, input_lines in itertools.groupby(source, _is_blank):
         if not blank:
@@ -160,3 +182,32 @@ def parse(source):
             paragraph.update_list(paragraphs)
 
     return paragraphs
+
+def parse_title_page(lines):
+
+    result = {}
+
+    it = iter(lines)
+    try:
+        line = it.next()
+        while True:
+            key_match = title_page_key_re.match(line)
+            if not key_match:
+                return None
+            key, value = key_match.groups()
+            if value:
+                # Single line key/value
+                result.setdefault(key, []).append(value)
+                line = it.next()
+            else:
+                for line in it:
+                    value_match = title_page_value_re.match(line)
+                    if not value_match:
+                        break
+                    result.setdefault(key, []).append(value_match.group(1))
+                else:
+                    # Last line has been processed
+                    break
+    except StopIteration:
+        pass
+    return result
diff --git a/tests/spmd_test.py b/tests/spmd_test.py
index aac8df9..bb5f463 100644
--- a/tests/spmd_test.py
+++ b/tests/spmd_test.py
@@ -4,6 +4,7 @@
 
 import unittest2
 from screenplain.parsers.spmd import parse
+from screenplain.parsers import spmd
 from screenplain.types import (
     Slug, Action, Dialog, DualDialog, Transition, Section
 )
@@ -12,11 +13,6 @@ from screenplain.richstring import plain, italic, empty_string
 
 class ParseTests(unittest2.TestCase):
 
-    # A Scene Heading, or "slugline," is any line that has a blank
-    # line following it, and either begins with INT or EXT, or has
-    # two empty lines preceding it. A Scene Heading always has at
-    # least one blank line preceding it.
-    # NOTE: Actually the list used in Appendix 1
     def test_slug_with_prefix(self):
         paras = list(parse([
             'INT. SOMEWHERE - DAY',
@@ -337,5 +333,57 @@ class ParseTests(unittest2.TestCase):
         self.assertFalse(paras[0].centered)
         self.assertEquals([plain(line) for line in lines], paras[0].lines)
 
+class TitlePageTests(unittest2.TestCase):
+
+    def test_basic_title_page(self):
+        lines = [
+            'Title:',
+            '    _**BRICK & STEEL**_',
+            '    _**FULL RETIRED**_',
+            'Author: Stu Maschwitz',
+        ]
+        self.assertDictEqual(
+            {
+                'Title': ['_**BRICK & STEEL**_', '_**FULL RETIRED**_'],
+                'Author': ['Stu Maschwitz'],
+            },
+            spmd.parse_title_page(lines)
+        )
+
+    def test_multiple_values(self):
+        lines = [
+            'Title: Death',
+            'Title: - a love story',
+            'Title:',
+            '   (which happens to be true)',
+        ]
+        self.assertDictEqual(
+            {
+                'Title': [
+                    'Death',
+                    '- a love story',
+                    '(which happens to be true)'
+                ]
+            },
+            spmd.parse_title_page(lines)
+        )
+
+    def test_empty_value_ignored(self):
+        lines = [
+            'Title:',
+            'Author: John August',
+        ]
+        self.assertDictEqual(
+            {'Author': ['John August']},
+            spmd.parse_title_page(lines)
+        )
+
+    def test_unparsable_title_page_returns_none(self):
+        lines = [
+            'Title: Inception',
+            '    additional line',
+        ]
+        self.assertIsNone(spmd.parse_title_page(lines))
+
 if __name__ == '__main__':
     unittest2.main()
author	Martin Vilcans <martin@librador.com>	2012-02-16 23:18:53 +0100
committer	Martin Vilcans <martin@librador.com>	2012-02-16 23:18:53 +0100
commit	03d27c86a728864d1ab59802f787aca893955fca (patch)
tree	291e79c367a3680e8d778b397ef74d3334a5b428
parent	0d824c62fd570d1f10f75efaffa3dd8d8d09e665 (diff)
download	screenplain-03d27c86a728864d1ab59802f787aca893955fca.tar.gz