summaryrefslogtreecommitdiffstats
path: root/modules/python
diff options
context:
space:
mode:
authorChris Little <chrislit@crosswire.org>2012-08-26 09:28:04 +0000
committerChris Little <chrislit@crosswire.org>2012-08-26 09:28:04 +0000
commitd7bfe310c10142745931dd93deedc4fe96e6ffad (patch)
tree89fc3cc416a9e468273edf61bfb4f7628be0019e /modules/python
parentf1eab50654b8bb3d82eb5a931aa0529d551727f6 (diff)
downloadsword-tools-d7bfe310c10142745931dd93deedc4fe96e6ffad.tar.gz
implemented sorting key functions for canonical & usfm-numberic orders
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@396 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/python')
-rwxr-xr-xmodules/python/usfm2osis.py34
1 files changed, 23 insertions, 11 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py
index dea21ac..3ca8325 100755
--- a/modules/python/usfm2osis.py
+++ b/modules/python/usfm2osis.py
@@ -209,7 +209,7 @@ canonicalOrder = [
'INDEX', 'GAZETTEER', 'X-OTHER'
]
-sfmNumericOrder = [
+usfmNumericOrder = [
# Front Matter
'FRONT', 'INTRODUCTION',
@@ -282,6 +282,7 @@ introPeripherals = {
osis2locBk = dict()
loc2osisBk = dict()
+filename2osis = dict()
verbose = bool()
ucs4 = (sys.maxunicode > 0xFFFF)
@@ -317,6 +318,18 @@ def keynat(string):
END PSF-licened segment
"""
+def keycanon(filename):
+ if filename2osis:
+ return canonicalOrder.index(filename2osis[filename])
+ else:
+ return keynat(filename)
+
+def keyusfm(filename):
+ if filename2osis:
+ return usfmNumericOrder.index(filename2osis[filename])
+ else:
+ return keynat(filename)
+
def convertToOSIS(sFile):
global encoding
global relaxedConformance
@@ -375,18 +388,19 @@ def convertToOSIS(sFile):
return osis
- def cvtIdentification(osis, relaxedConformance):
+ def cvtIdentification(osis, relaxedConformance, filename):
"""
Identification
supported: \id, \ide, \sts, \rem, \h, \toc1, \toc2, \toc3
"""
- global loc2osisBk, osis2locBk
+ global loc2osisBk, osis2locBk, filename2osis
# \id_<CODE>_(Name of file, Book name, Language, Last edited, Date etc.)
osis = re.sub(r'\\id\s+([A-Z0-9]{3})\b\s*([^\\'+'\n'+']*?)'+'\n'+r'(.*)(?=\\id|$)', lambda m: u'﷐<div type="book" osisID="' + bookDict[m.group(1)] + '">\n' + (('<!-- id comment - ' + m.group(2) + ' -->\n') if m.group(2) else '') + m.group(3) + u'</div type="book">﷐\n' , osis, flags=re.DOTALL)
# keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS
osisBook = re.search(r'\\id\s+([A-Z0-9]{3})', osis)
if osisBook:
osisBook = bookDict[osisBook.group(1)]
+ filename2osis[filename] = osisBook
# \ide_<ENCODING>
osis = re.sub(r'\\ide\b.*'+'\n', '', osis) # delete, since this was handled above
@@ -1128,7 +1142,7 @@ def convertToOSIS(sFile):
# call individual conversion processors in series
osis = cvtPreprocess(osis, relaxedConformance)
osis = cvtRelaxedConformanceRemaps(osis, relaxedConformance)
- osis = cvtIdentification(osis, relaxedConformance)
+ osis = cvtIdentification(osis, relaxedConformance, sFile)
osis = cvtIntroductions(osis, relaxedConformance)
osis = cvtTitles(osis, relaxedConformance)
osis = cvtChaptersAndVerses(osis, relaxedConformance)
@@ -1175,7 +1189,7 @@ def printUsage():
print(' -h, --help print this usage information')
print(' -o FILENAME output filename (default is: <osisWork>.osis.xml)')
print(' -r enable relaxed markup processing (for non-standard USFM)')
- print(' -s mode set book sorting mode: natural (default), alpha, canonical, none')
+ print(' -s mode set book sorting mode: natural (default), alpha, canonical, usfm, none')
print(' -v verbose feedback')
print(' -x disable XML validation')
print('')
@@ -1280,24 +1294,22 @@ if __name__ == "__main__":
printUsage()
if sys.argv[i].startswith('a'):
sortKey = None
- sortCmp = None
print('Sorting book files alphanumerically.')
elif sys.argv[i].startswith('na'):
sortKey = keynat
- sortCmp = None
print('Sorting book files naturally.')
elif sys.argv[i].startswith('c'):
- sortKey = keynat # TODO: write appropriate helpers
- sortCmp = None
+ sortKey = keycanon
print('Sorting book files canonically.')
+ elif sys.argv[i].startswith('u'):
+ sortKey = keyusfm
+ print('Sorting book files by USFM book number.')
else:
sortKey = None # TODO: write appropriate helpers
- sortCmp = None
print('Leaving book files unsorted.')
inputFilesIdx += 2 # increment 2, reflecting 2 args for -s
else:
sortKey = keynat
- sortCmp = None
print('Sorting book files naturally.')