diff options
author | Chris Little <chrislit@crosswire.org> | 2012-08-26 09:28:04 +0000 |
---|---|---|
committer | Chris Little <chrislit@crosswire.org> | 2012-08-26 09:28:04 +0000 |
commit | d7bfe310c10142745931dd93deedc4fe96e6ffad (patch) | |
tree | 89fc3cc416a9e468273edf61bfb4f7628be0019e /modules/python | |
parent | f1eab50654b8bb3d82eb5a931aa0529d551727f6 (diff) | |
download | sword-tools-d7bfe310c10142745931dd93deedc4fe96e6ffad.tar.gz |
implemented sorting key functions for canonical & usfm-numberic orders
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@396 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/python')
-rwxr-xr-x | modules/python/usfm2osis.py | 34 |
1 files changed, 23 insertions, 11 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py index dea21ac..3ca8325 100755 --- a/modules/python/usfm2osis.py +++ b/modules/python/usfm2osis.py @@ -209,7 +209,7 @@ canonicalOrder = [ 'INDEX', 'GAZETTEER', 'X-OTHER' ] -sfmNumericOrder = [ +usfmNumericOrder = [ # Front Matter 'FRONT', 'INTRODUCTION', @@ -282,6 +282,7 @@ introPeripherals = { osis2locBk = dict() loc2osisBk = dict() +filename2osis = dict() verbose = bool() ucs4 = (sys.maxunicode > 0xFFFF) @@ -317,6 +318,18 @@ def keynat(string): END PSF-licened segment """ +def keycanon(filename): + if filename2osis: + return canonicalOrder.index(filename2osis[filename]) + else: + return keynat(filename) + +def keyusfm(filename): + if filename2osis: + return usfmNumericOrder.index(filename2osis[filename]) + else: + return keynat(filename) + def convertToOSIS(sFile): global encoding global relaxedConformance @@ -375,18 +388,19 @@ def convertToOSIS(sFile): return osis - def cvtIdentification(osis, relaxedConformance): + def cvtIdentification(osis, relaxedConformance, filename): """ Identification supported: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3 """ - global loc2osisBk, osis2locBk + global loc2osisBk, osis2locBk, filename2osis # \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.) osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n'+']*?)'+'\n'+r'(.*)(?=\\id|$)', lambda m: u'<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + u'</div type="book">\n' , osis, flags=re.DOTALL) # keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS osisBook = re.search(r'\\id\s+([A-Z0-9]{3})', osis) if osisBook: osisBook = bookDict[osisBook.group(1)] + filename2osis[filename] = osisBook # \ide_<ENCODING> osis = re.sub(r'\\ide\b.*'+'\n', '', osis) # delete, since this was handled above @@ -1128,7 +1142,7 @@ def convertToOSIS(sFile): # call individual conversion processors in series osis = cvtPreprocess(osis, relaxedConformance) osis = cvtRelaxedConformanceRemaps(osis, relaxedConformance) - osis = cvtIdentification(osis, relaxedConformance) + osis = cvtIdentification(osis, relaxedConformance, sFile) osis = cvtIntroductions(osis, relaxedConformance) osis = cvtTitles(osis, relaxedConformance) osis = cvtChaptersAndVerses(osis, relaxedConformance) @@ -1175,7 +1189,7 @@ def printUsage(): print(' -h, --help print this usage information') print(' -o FILENAME output filename (default is: <osisWork>.osis.xml)') print(' -r enable relaxed markup processing (for non-standard USFM)') - print(' -s mode set book sorting mode: natural (default), alpha, canonical, none') + print(' -s mode set book sorting mode: natural (default), alpha, canonical, usfm, none') print(' -v verbose feedback') print(' -x disable XML validation') print('') @@ -1280,24 +1294,22 @@ if __name__ == "__main__": printUsage() if sys.argv[i].startswith('a'): sortKey = None - sortCmp = None print('Sorting book files alphanumerically.') elif sys.argv[i].startswith('na'): sortKey = keynat - sortCmp = None print('Sorting book files naturally.') elif sys.argv[i].startswith('c'): - sortKey = keynat # TODO: write appropriate helpers - sortCmp = None + sortKey = keycanon print('Sorting book files canonically.') + elif sys.argv[i].startswith('u'): + sortKey = keyusfm + print('Sorting book files by USFM book number.') else: sortKey = None # TODO: write appropriate helpers - sortCmp = None print('Leaving book files unsorted.') inputFilesIdx += 2 # increment 2, reflecting 2 args for -s else: sortKey = keynat - sortCmp = None print('Sorting book files naturally.') |