diff options
author | Martin Vilcans <martin@librador.com> | 2012-02-16 23:18:53 +0100 |
---|---|---|
committer | Martin Vilcans <martin@librador.com> | 2012-02-16 23:18:53 +0100 |
commit | 03d27c86a728864d1ab59802f787aca893955fca (patch) | |
tree | 291e79c367a3680e8d778b397ef74d3334a5b428 | |
parent | 0d824c62fd570d1f10f75efaffa3dd8d8d09e665 (diff) | |
download | screenplain-03d27c86a728864d1ab59802f787aca893955fca.tar.gz |
Parse the title page.
The title page isn't actually used for anything right now,
but if it is in the source, at least it doesn't appear
as action lines in the output.
-rw-r--r-- | screenplain/parsers/spmd.py | 51 | ||||
-rw-r--r-- | tests/spmd_test.py | 58 |
2 files changed, 104 insertions, 5 deletions
diff --git a/screenplain/parsers/spmd.py b/screenplain/parsers/spmd.py index 2940a8c..f9aa643 100644 --- a/screenplain/parsers/spmd.py +++ b/screenplain/parsers/spmd.py @@ -3,6 +3,7 @@ # http://www.opensource.org/licenses/mit-license.php import itertools +from itertools import takewhile import re from screenplain.types import ( @@ -18,6 +19,9 @@ slug_regexes = ( TWOSPACE = ' ' * 2 +title_page_key_re = re.compile(r'([^:]+):\s*(.*)') +title_page_value_re = re.compile(r'(?:\s{3,}|\t)(.+)') + centered_re = re.compile(r'\s*>\s*(.*?)\s*<\s*$') dual_dialog_re = re.compile(r'^(.+?)(\s*\^)$') slug_re = re.compile(r'(?:(\.)\s*)?(\S.*?)\s*$') @@ -153,6 +157,24 @@ def _is_blank(line): def parse(source): """Reads raw text input and generates paragraph objects.""" source = (_preprocess_line(line) for line in source) + + title_page_lines = list(takewhile(lambda line: line != '', source)) + title_page = parse_title_page(title_page_lines) + + if title_page: + # The first lines were a title page. + # Parse the rest of the source as screenplay body. + # TODO: Create a title page from the data in title_page + return parse_body(source) + else: + # The first lines were not a title page. + # Parse them as part of the screenplay body. + return parse_body(itertools.chain(title_page_lines, [''], source)) + + +def parse_body(source): + """Reads lines of the main screenplay and generates paragraph objects.""" + paragraphs = [] for blank, input_lines in itertools.groupby(source, _is_blank): if not blank: @@ -160,3 +182,32 @@ def parse(source): paragraph.update_list(paragraphs) return paragraphs + +def parse_title_page(lines): + + result = {} + + it = iter(lines) + try: + line = it.next() + while True: + key_match = title_page_key_re.match(line) + if not key_match: + return None + key, value = key_match.groups() + if value: + # Single line key/value + result.setdefault(key, []).append(value) + line = it.next() + else: + for line in it: + value_match = title_page_value_re.match(line) + if not value_match: + break + result.setdefault(key, []).append(value_match.group(1)) + else: + # Last line has been processed + break + except StopIteration: + pass + return result diff --git a/tests/spmd_test.py b/tests/spmd_test.py index aac8df9..bb5f463 100644 --- a/tests/spmd_test.py +++ b/tests/spmd_test.py @@ -4,6 +4,7 @@ import unittest2 from screenplain.parsers.spmd import parse +from screenplain.parsers import spmd from screenplain.types import ( Slug, Action, Dialog, DualDialog, Transition, Section ) @@ -12,11 +13,6 @@ from screenplain.richstring import plain, italic, empty_string class ParseTests(unittest2.TestCase): - # A Scene Heading, or "slugline," is any line that has a blank - # line following it, and either begins with INT or EXT, or has - # two empty lines preceding it. A Scene Heading always has at - # least one blank line preceding it. - # NOTE: Actually the list used in Appendix 1 def test_slug_with_prefix(self): paras = list(parse([ 'INT. SOMEWHERE - DAY', @@ -337,5 +333,57 @@ class ParseTests(unittest2.TestCase): self.assertFalse(paras[0].centered) self.assertEquals([plain(line) for line in lines], paras[0].lines) +class TitlePageTests(unittest2.TestCase): + + def test_basic_title_page(self): + lines = [ + 'Title:', + ' _**BRICK & STEEL**_', + ' _**FULL RETIRED**_', + 'Author: Stu Maschwitz', + ] + self.assertDictEqual( + { + 'Title': ['_**BRICK & STEEL**_', '_**FULL RETIRED**_'], + 'Author': ['Stu Maschwitz'], + }, + spmd.parse_title_page(lines) + ) + + def test_multiple_values(self): + lines = [ + 'Title: Death', + 'Title: - a love story', + 'Title:', + ' (which happens to be true)', + ] + self.assertDictEqual( + { + 'Title': [ + 'Death', + '- a love story', + '(which happens to be true)' + ] + }, + spmd.parse_title_page(lines) + ) + + def test_empty_value_ignored(self): + lines = [ + 'Title:', + 'Author: John August', + ] + self.assertDictEqual( + {'Author': ['John August']}, + spmd.parse_title_page(lines) + ) + + def test_unparsable_title_page_returns_none(self): + lines = [ + 'Title: Inception', + ' additional line', + ] + self.assertIsNone(spmd.parse_title_page(lines)) + if __name__ == '__main__': unittest2.main() |