diff options
author | Chris Little <chrislit@crosswire.org> | 2013-06-09 22:10:02 +0000 |
---|---|---|
committer | Chris Little <chrislit@crosswire.org> | 2013-06-09 22:10:02 +0000 |
commit | f360faae6b56685177450e083ae6e5f3750c86c8 (patch) | |
tree | 4df1d9e26137d0ac3cc6020900bee71153ab86d1 | |
parent | 8faed072c06785de7e65b3953968f309c3669227 (diff) | |
download | sword-tools-f360faae6b56685177450e083ae6e5f3750c86c8.tar.gz |
reorganized roadmap & bumped version to reflect current featureset
improved printed feedback
decreased threads to one less than the number of processors to improve UI response during the long USFM to OSIS conversion processNote
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@420 07627401-56e2-0310-80f4-f8cd0041bdcd
-rwxr-xr-x | modules/python/usfm2osis.py | 46 |
1 files changed, 25 insertions, 21 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py index e369900..49af24a 100755 --- a/modules/python/usfm2osis.py +++ b/modules/python/usfm2osis.py @@ -9,7 +9,7 @@ id = '$Id$' usfmVersion = '2.35' # http://ubs-icap.org/chm/usfm/2.35/index.html osisVersion = '2.1.1' # http://www.bibletechnologies.net/osisCore.2.1.1.xsd -scriptVersion = '0.5' +scriptVersion = '0.6' # usfm2osis.py # Copyright 2012 by the CrossWire Bible Society <http://www.crosswire.org/> @@ -40,9 +40,10 @@ scriptVersion = '0.5' ### Roadmap: # 0.5 initial commit, including full coverage of core USFM tags -# 0.6 file sorting options (natural/alphabetic/canonical/none); expand sub-verses with ! in osisIDs; Python3 compatability; add optional schema validator (lxml probably); docstrings; unittest; make fully OO; PyDev project? -# 0.7 test suite incorporating all USFM examples from UBS ICAP and other complex cases -# 0.8 more clean-up & re-ordering to correctly encapsulate milestones within appropriate containers; clear remaining TODO items, to the extent possible +# 0.6 file sorting options (natural/alphabetic/canonical/none); Python3 compatability; add optional schema validator (lxml probably); docstrings +# 0.7 expand sub-verses with ! in osisIDs; unittest; make fully OO; PyDev project? +# 0.8 test suite incorporating all USFM examples from UBS ICAP and other complex cases +# 0.9 more clean-up & re-ordering to correctly encapsulate milestones within appropriate containers; clear remaining TODO items, to the extent possible # 1.0 feature complete for release & production use # 1.x xreffix.pl-functionality (osisParse(ref)), requiring SWORD bindings; use toc3 for localization # 1.x SWORD-mode output? @@ -1313,7 +1314,7 @@ def convertToOsis(sFile): if encoding in aliases: osis = codecs.open(sFile, 'r', encoding).read().strip() + '\n' else: - print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + sFile + ' as UTF-8.')) + print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + sFile + ' as UTF-8')) encoding = 'utf-8' if sys.version_info[0] < 3: @@ -1380,7 +1381,7 @@ def readIdentifiersFromOsis(filename): if encoding in aliases: osis = codecs.open(filename, 'r', encoding).read().strip() + '\n' else: - #print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + filename + ' as UTF-8.')) + #print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + filename + ' as UTF-8')) encoding = 'utf-8' # keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS @@ -1458,7 +1459,7 @@ if __name__ == "__main__": global encoding global relaxedConformance - num_processes = multiprocessing.cpu_count() + num_processes = max(1,multiprocessing.cpu_count()-1) num_jobs = num_processes encoding = '' @@ -1519,26 +1520,26 @@ if __name__ == "__main__": printUsage() if sys.argv[i].startswith('a'): sortKey = None - print('Sorting book files alphanumerically.') + print('Sorting book files alphanumerically') elif sys.argv[i].startswith('na'): sortKey = keynat - print('Sorting book files naturally.') + print('Sorting book files naturally') elif sys.argv[i].startswith('c'): sortKey = keycanon - print('Sorting book files canonically.') + print('Sorting book files canonically') elif sys.argv[i].startswith('u'): sortKey = keyusfm - print('Sorting book files by USFM book number.') + print('Sorting book files by USFM book number') elif sys.argv[i].startswith('random'): # for testing only sortKey = lambda filename: int(random.random()*256) - print('Sorting book files randomly.') + print('Sorting book files randomly') else: sortKey = keysupplied - print('Leaving book files unsorted, in the order in which they were supplied.') + print('Leaving book files unsorted, in the order in which they were supplied') inputFilesIdx += 2 # increment 2, reflecting 2 args for -s else: sortKey = keynat - print('Sorting book files naturally.') + print('Sorting book files naturally') usfmDocList = sys.argv[inputFilesIdx:] @@ -1556,6 +1557,7 @@ if __name__ == "__main__": result_queue = multiprocessing.Queue() # spawn workers + print('Converting USFM documents to OSIS...') for i in range(num_processes): worker = Worker(work_queue, result_queue) worker.start() @@ -1566,7 +1568,7 @@ if __name__ == "__main__": k,v=result_queue.get() osisSegment[k]=v - verbosePrint('Assembling OSIS document...') + print('Assembling OSIS document') osisDoc = '<osis xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.'+osisVersion+'.xsd">\n<osisText osisRefWork="Bible" xml:lang="und" osisIDWork="' + osisWork + '">\n<header>\n<work osisWork="' + osisWork + '"/>\n</header>\n' unhandledTags = set() @@ -1580,13 +1582,13 @@ if __name__ == "__main__": try: #import urllib from lxml import etree - verbosePrint('Validating XML...') + print('Validating XML...') osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(osisSchema))) #osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(urllib.urlopen('http://www.bibletechnologies.net/osisCore.' + osisVersion + '.xsd').read()))) etree.fromstring(osisDoc, osisParser) - verbosePrint('XML Valid') + print('XML Valid') except ImportError: - verbosePrint('For schema validation, install lxml') + print('For schema validation, install lxml') except etree.XMLSyntaxError as eVal: print('XML Validation error: ' + str(eVal)) @@ -1594,9 +1596,11 @@ if __name__ == "__main__": osisFile.write('<?xml version="1.0" encoding="UTF-8"?>\n') osisFile.write(osisDoc) + print('Done!') + if unhandledTags: - if verbose: - print('') + print('') print(('Unhandled USFM tags: ' + ', '.join(sorted(unhandledTags)) + ' (' + str(len(unhandledTags)) + ' total)')) if not relaxedConformance: - print('Consider using the -r option for relaxed markup processing.') + print('Consider using the -r option for relaxed markup processing') + |