diff options
author | Chris Little <chrislit@crosswire.org> | 2012-08-27 08:20:20 +0000 |
---|---|---|
committer | Chris Little <chrislit@crosswire.org> | 2012-08-27 08:20:20 +0000 |
commit | 0cddc3f873584bb979509daec3ec0b70a5111aa4 (patch) | |
tree | 8b659a44796f03e772e3a7c466d2a470d8717074 | |
parent | cc7f674fae7feff6206b2ce03c44562fedeec47c (diff) | |
download | sword-tools-0cddc3f873584bb979509daec3ec0b70a5111aa4.tar.gz |
converted multiline comments to single-line, started adding docstrings
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@401 07627401-56e2-0310-80f4-f8cd0041bdcd
-rwxr-xr-x | modules/python/usfm2osis.py | 156 |
1 files changed, 71 insertions, 85 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py index ab6e6a8..d020568 100755 --- a/modules/python/usfm2osis.py +++ b/modules/python/usfm2osis.py @@ -290,12 +290,8 @@ filename2osis = dict() verbose = bool() ucs4 = (sys.maxunicode > 0xFFFF) -""" -BEGIN PSF-licensed segment -""" -""" -keynat from http://code.activestate.com/recipes/285264-natural-string-sorting/ -""" +# BEGIN PSF-licensed segment +# keynat from http://code.activestate.com/recipes/285264-natural-string-sorting/ def keynat(string): r'''A natural sort helper function for sort() and sorted() without using regular expressions or exceptions. @@ -318,24 +314,34 @@ def keynat(string): else: r.append(c.lower()) return r -""" -END PSF-licened segment -""" +# END PSF-licened segment def keycanon(filename): - global filename2osis - return canonicalOrder.index(filename2osis[filename]) + """Sort helper function that orders according to canon position (defined in canonicalOrder list), returning canonical position or infinity if not in the list.""" + if filename in filename2osis: + return canonicalOrder.index(filename2osis[filename]) + return float('inf') def keyusfm(filename): - return usfmNumericOrder.index(filename2osis[filename]) + """Sort helper function that orders according to USFM book number (defined in usfmNumericOrder list), returning USFM book number or infinity if not in the list.""" + if filename in filename2osis: + return usfmNumericOrder.index(filename2osis[filename]) + return float('inf') def keysupplied(filename): + """Sort helper function that keeps the items in the order in which they were supplied (i.e. it doesn't sort at all), returning the number of times the function has been called.""" if not hasattr(keysupplied, "counter"): - keysupplied.counter = 0 # it doesn't exist yet, so initialize it + keysupplied.counter = 0 keysupplied.counter += 1 return keysupplied.counter def convertToOsis(sFile): + """Open a USFM file and return a string consisting of its OSIS equivalent. + + Keyword arguments: + sFile -- Path to the USFM file to be converted + + """ global encoding global relaxedConformance @@ -394,10 +400,8 @@ def convertToOsis(sFile): def cvtIdentification(osis, relaxedConformance, filename): - """ - Identification - supported: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3 - """ + ### Identification + ### supported: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3 # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.) osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n]*?)\n'+r'(.*)(?=\\id|$)', lambda m: '\uFDD0<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + '</div type="book">\uFDD0\n' , osis, flags=re.DOTALL) @@ -432,10 +436,9 @@ def convertToOsis(sFile): def cvtIntroductions(osis, relaxedConformance): - """ - Introductions - supported: \imt#, \is#, \ip, \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili#, \iot, \io#, \ior...\ior*, \iex, \iqt...\iqt*, \imte, \ie - """ + ### Introductions + ### supported: \imt#, \is#, \ip, \ipi, \im, \imi, \ipq, \imq, \ipr, \iq#, \ib, \ili#, \iot, \io#, \ior...\ior*, \iex, \iqt...\iqt*, \imte, \ie + # \imt#_text... osis = re.sub(r'\\imt(\d?)\s+(.+)', lambda m: '<title ' + ('level="'+m.group(1)+'" ' if m.group(1) else '') + 'type="main" subType="x-introduction">' + m.group(2) + '</title>', osis) @@ -507,10 +510,9 @@ def convertToOsis(sFile): def cvtTitles(osis, relaxedConformance): - """ - Titles, Headings, and Labels - supported: \mt#, \mte#, \ms#, \mr, \s#, \sr, \r, \rq...\rq*, \d, \sp - """ + ### Titles, Headings, and Labels + ### supported: \mt#, \mte#, \ms#, \mr, \s#, \sr, \r, \rq...\rq*, \d, \sp + # \ms#_text... osis = re.sub(r'\\ms1?\s+(.+)', lambda m: '\uFDD5<div type="majorSection"><title>' + m.group(1) + '</title>', osis) osis = re.sub('(\uFDD5[^\uFDD5\uFDD0]+)', r'\1'+'</div>\uFDD5\n', osis, flags=re.DOTALL) @@ -563,10 +565,9 @@ def convertToOsis(sFile): def cvtChaptersAndVerses(osis, relaxedConformance): - """ - Chapters and Verses - supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp* - """ + ### Chapters and Verses + ### supported: \c, \ca...\ca*, \cl, \cp, \cd, \v, \va...\va*, \vp...\vp* + # \c_# osis = re.sub(r'\\c\s+([^\s]+)\b(.+?)(?=(\\c\s+|</div type="book"))', lambda m: '\uFDD1<chapter osisID="$BOOK$.' + m.group(1) + r'" sID="$BOOK$.' + m.group(1) + '"/>' + m.group(2) + '<chapter eID="$BOOK$.' + m.group(1) + '"/>\uFDD3\n', osis, flags=re.DOTALL) @@ -617,10 +618,9 @@ def convertToOsis(sFile): def cvtParagraphs(osis, relaxedConformance): - """ - Paragraphs - supported: \p, \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b - """ + ### Paragraphs + ### supported: \p, \m, \pmo, \pm, \pmc, \pmr, \pi#, \mi, \nb, \cls, \li#, \pc, \pr, \ph#, \b + # \p(_text...) osis = re.sub(r'\\p\s+(.*?)(?=(\\(i?m|i?p|nb|lit|cls|tr)|<chapter eID|</?div\b|<(p|closer)\b))', lambda m: '\uFDD3<p>\n' + m.group(1) + '\uFDD3</p>\n', osis, flags=re.DOTALL) @@ -663,10 +663,9 @@ def convertToOsis(sFile): def cvtPoetry(osis, relaxedConformance): - """ - Poetry - supported: \q#, \qr, \qc, \qs...\qs*, \qa, \qac...\qac*, \qm#, \b - """ + ### Poetry + ### supported: \q#, \qr, \qc, \qs...\qs*, \qa, \qac...\qac*, \qm#, \b + # \qs_(Selah)\qs* osis = re.sub(r'\\qs\b\s(.+?)\\qs\*', r'<l type="selah">\1</l>', osis, flags=re.DOTALL) @@ -696,10 +695,9 @@ def convertToOsis(sFile): def cvtTables(osis, relaxedConformance): - """ - Tables - supported: \tr, \th#, \thr#, \tc#, \tcr# - """ + ### Tables + ### supported: \tr, \th#, \thr#, \tc#, \tcr# + # \tr_ osis = re.sub(r'\\tr\b\s*(.*?)(?=(['+'\uFDD0\uFDD1\uFDD3\uFDD4'+r']|\\tr\s|<lb\b|<title\b))', r'<row>\1</row>', osis, flags=re.DOTALL) @@ -752,10 +750,9 @@ def convertToOsis(sFile): def cvtFootnotes(osis, relaxedConformance): - """ - Footnotes - supported:\f...\f*, \fe...\fe*, \fr, \fk, \fq, \fqa, \fl, \fp, \fv, \ft, \fdc...\fdc*, \fm...\fm* - """ + ### Footnotes + ### supported:\f...\f*, \fe...\fe*, \fr, \fk, \fq, \fqa, \fl, \fp, \fv, \ft, \fdc...\fdc*, \fm...\fm* + # \f_+_...\f* osis = re.sub(r'\\f\s+([^\s\\]+)?\s*(.+?)\s*\\f\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' placement="foot">' + m.group(2) + '\uFDDF</note>', osis, flags=re.DOTALL) @@ -809,10 +806,9 @@ def convertToOsis(sFile): def cvtCrossReferences(osis, relaxedConformance): - """ - Cross References - supported: \\x...\\x*, \\xo, \\xk, \\xq, \\xt, \\xot...\\xot*, \\xnt...\\xnt*, \\xdc...\\xdc* - """ + ### Cross References + ### supported: \\x...\\x*, \\xo, \\xk, \\xq, \\xt, \\xot...\\xot*, \\xnt...\\xnt*, \\xdc...\\xdc* + # \x_+_...\x* osis = re.sub(r'\\x\s+([^\s]+?)\s+(.+?)\s*\\x\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="crossReference">' + m.group(2) + '\uFDDF</note>', osis, flags=re.DOTALL) @@ -821,14 +817,11 @@ def convertToOsis(sFile): return osis - """ - Special Text and Character Styles - """ + ### Special Text and Character Styles def cvtSpecialText(osis, relaxedConformance): - """ - Special Text - supported: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \nd...\nd*, \ord...\ord*, \pn...\pn*, \qt...\qt*, \sig...\sig*, \sls...\sls*, \tl...\tl*, \wj...\wj* - """ + ### Special Text + ### supported: \add...\add*, \bk...\bk*, \dc...\dc*, \k...\k*, \lit, \nd...\nd*, \ord...\ord*, \pn...\pn*, \qt...\qt*, \sig...\sig*, \sls...\sls*, \tl...\tl*, \wj...\wj* + # \add_...\add* osis = re.sub(r'\\add\s+(.+?)\\add\*', r'<transChange type="added">\1</transChange>', osis, flags=re.DOTALL) @@ -882,10 +875,9 @@ def convertToOsis(sFile): def cvtCharacterStyling(osis, relaxedConformance): - """ - Character Styling - supported: \em...\em*, \bd...\bd*, \it...\it*, \bdit...\bdit*, \no...\no*, \sc...\sc* - """ + ### Character Styling + ### supported: \em...\em*, \bd...\bd*, \it...\it*, \bdit...\bdit*, \no...\no*, \sc...\sc* + # \em_...\em* osis = re.sub(r'\\em\s+(.+?)\\em\*', r'<hi type="emphasis">\1</hi>', osis, flags=re.DOTALL) @@ -908,10 +900,9 @@ def convertToOsis(sFile): def cvtSpacingAndBreaks(osis, relaxedConformance): - """ - Spacing and Breaks - supported: ~, //, \pb - """ + ### Spacing and Breaks + ### supported: ~, //, \pb + # ~ osis = osis.replace('~', '\u00A0') @@ -925,10 +916,9 @@ def convertToOsis(sFile): def cvtSpecialFeatures(osis, relaxedConformance): - """ - Special Features - supported: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh* - """ + ### Special Features + ### supported: \fig...\fig*, \ndx...\ndx*, \pro...\pro*, \w...\w*, \wg...\wg*, \wh...\wh* + # \fig DESC|FILE|SIZE|LOC|COPY|CAP|REF\fig* def makeFigure(matchObject): fig_desc,fig_file,fig_size,fig_loc,fig_copy,fig_cap,fig_ref = matchObject.groups() @@ -939,10 +929,9 @@ def convertToOsis(sFile): figure += ' size="' + fig_size + '"' if fig_copy: figure += ' rights="' + fig_copy + '"' - """ TODO: implement parsing in osisParse(Bible reference string) - if fig_ref: - figure += ' annotateRef="' + osisParse(fig_ref) + '"' - """ + # TODO: implement parsing in osisParse(Bible reference string) + # if fig_ref: + # figure += ' annotateRef="' + osisParse(fig_ref) + '"' figure += '>\n' if fig_cap: figure += '<caption>' + fig_cap + '</caption>\n' @@ -979,10 +968,9 @@ def convertToOsis(sFile): def cvtPeripherals(osis, relaxedConformance): - """ - Peripherals - supported: \periph - """ + ### Peripherals + ### supported: \periph + # \periph def tagPeriph(matchObject): periphType,contents = matchObject @@ -1002,10 +990,9 @@ def convertToOsis(sFile): def cvtStudyBibleContent(osis, relaxedConformance): - """ - Study Bible Content - supported: \ef...\ef*, \ex...\ex*, \esb...\esbe, \cat - """ + ### Study Bible Content + ### supported: \ef...\ef*, \ex...\ex*, \esb...\esbe, \cat + # \ef...\ef* osis = re.sub(r'\\ef\s+([^\s\\]+?)\s*(.+?)\s*\\ef\*', lambda m: '<note' + ((' n=""') if (m.group(1) == '-') else ('' if (m.group(1) == '+') else (' n="' + m.group(1) + '"'))) + ' type="study">' + m.group(2) + '\uFDDF</note>', osis, flags=re.DOTALL) osis = re.sub(r'(<note\b[^>]*?>.*?</note>)', lambda m: processNote(m.group(1)), osis, flags=re.DOTALL) @@ -1024,11 +1011,10 @@ def convertToOsis(sFile): def cvtPrivateUseExtensions(osis, relaxedConformance): - """ - \z namespace - supported: \z<Extension> - We can't really know what these mean, but will preserve them as <milestone/> elements. - """ + ### \z namespace + ### supported: \z<Extension> + ### We can't really know what these mean, but will preserve them as <milestone/> elements. + # publishing assistant markers # \zpa-xb...\zpa-xb* : \periph Book # \zpa-xc...\zpa-xc* : \periph Chapter |