summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Little <chrislit@crosswire.org>2013-06-09 22:10:02 +0000
committerChris Little <chrislit@crosswire.org>2013-06-09 22:10:02 +0000
commitf360faae6b56685177450e083ae6e5f3750c86c8 (patch)
tree4df1d9e26137d0ac3cc6020900bee71153ab86d1
parent8faed072c06785de7e65b3953968f309c3669227 (diff)
downloadsword-tools-f360faae6b56685177450e083ae6e5f3750c86c8.tar.gz
reorganized roadmap & bumped version to reflect current featureset
improved printed feedback decreased threads to one less than the number of processors to improve UI response during the long USFM to OSIS conversion processNote git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@420 07627401-56e2-0310-80f4-f8cd0041bdcd
-rwxr-xr-xmodules/python/usfm2osis.py46
1 files changed, 25 insertions, 21 deletions
diff --git a/modules/python/usfm2osis.py b/modules/python/usfm2osis.py
index e369900..49af24a 100755
--- a/modules/python/usfm2osis.py
+++ b/modules/python/usfm2osis.py
@@ -9,7 +9,7 @@ id = '$Id$'
usfmVersion = '2.35' # http://ubs-icap.org/chm/usfm/2.35/index.html
osisVersion = '2.1.1' # http://www.bibletechnologies.net/osisCore.2.1.1.xsd
-scriptVersion = '0.5'
+scriptVersion = '0.6'
# usfm2osis.py
# Copyright 2012 by the CrossWire Bible Society <http://www.crosswire.org/>
@@ -40,9 +40,10 @@ scriptVersion = '0.5'
### Roadmap:
# 0.5 initial commit, including full coverage of core USFM tags
-# 0.6 file sorting options (natural/alphabetic/canonical/none); expand sub-verses with ! in osisIDs; Python3 compatability; add optional schema validator (lxml probably); docstrings; unittest; make fully OO; PyDev project?
-# 0.7 test suite incorporating all USFM examples from UBS ICAP and other complex cases
-# 0.8 more clean-up & re-ordering to correctly encapsulate milestones within appropriate containers; clear remaining TODO items, to the extent possible
+# 0.6 file sorting options (natural/alphabetic/canonical/none); Python3 compatability; add optional schema validator (lxml probably); docstrings
+# 0.7 expand sub-verses with ! in osisIDs; unittest; make fully OO; PyDev project?
+# 0.8 test suite incorporating all USFM examples from UBS ICAP and other complex cases
+# 0.9 more clean-up & re-ordering to correctly encapsulate milestones within appropriate containers; clear remaining TODO items, to the extent possible
# 1.0 feature complete for release & production use
# 1.x xreffix.pl-functionality (osisParse(ref)), requiring SWORD bindings; use toc3 for localization
# 1.x SWORD-mode output?
@@ -1313,7 +1314,7 @@ def convertToOsis(sFile):
if encoding in aliases:
osis = codecs.open(sFile, 'r', encoding).read().strip() + '\n'
else:
- print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + sFile + ' as UTF-8.'))
+ print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + sFile + ' as UTF-8'))
encoding = 'utf-8'
if sys.version_info[0] < 3:
@@ -1380,7 +1381,7 @@ def readIdentifiersFromOsis(filename):
if encoding in aliases:
osis = codecs.open(filename, 'r', encoding).read().strip() + '\n'
else:
- #print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + filename + ' as UTF-8.'))
+ #print(('WARNING: Encoding "' + encoding + '" unknown, processing ' + filename + ' as UTF-8'))
encoding = 'utf-8'
# keep a copy of the OSIS book abbreviation for below (\toc3 processing) to store for mapping localized book names to/from OSIS
@@ -1458,7 +1459,7 @@ if __name__ == "__main__":
global encoding
global relaxedConformance
- num_processes = multiprocessing.cpu_count()
+ num_processes = max(1,multiprocessing.cpu_count()-1)
num_jobs = num_processes
encoding = ''
@@ -1519,26 +1520,26 @@ if __name__ == "__main__":
printUsage()
if sys.argv[i].startswith('a'):
sortKey = None
- print('Sorting book files alphanumerically.')
+ print('Sorting book files alphanumerically')
elif sys.argv[i].startswith('na'):
sortKey = keynat
- print('Sorting book files naturally.')
+ print('Sorting book files naturally')
elif sys.argv[i].startswith('c'):
sortKey = keycanon
- print('Sorting book files canonically.')
+ print('Sorting book files canonically')
elif sys.argv[i].startswith('u'):
sortKey = keyusfm
- print('Sorting book files by USFM book number.')
+ print('Sorting book files by USFM book number')
elif sys.argv[i].startswith('random'): # for testing only
sortKey = lambda filename: int(random.random()*256)
- print('Sorting book files randomly.')
+ print('Sorting book files randomly')
else:
sortKey = keysupplied
- print('Leaving book files unsorted, in the order in which they were supplied.')
+ print('Leaving book files unsorted, in the order in which they were supplied')
inputFilesIdx += 2 # increment 2, reflecting 2 args for -s
else:
sortKey = keynat
- print('Sorting book files naturally.')
+ print('Sorting book files naturally')
usfmDocList = sys.argv[inputFilesIdx:]
@@ -1556,6 +1557,7 @@ if __name__ == "__main__":
result_queue = multiprocessing.Queue()
# spawn workers
+ print('Converting USFM documents to OSIS...')
for i in range(num_processes):
worker = Worker(work_queue, result_queue)
worker.start()
@@ -1566,7 +1568,7 @@ if __name__ == "__main__":
k,v=result_queue.get()
osisSegment[k]=v
- verbosePrint('Assembling OSIS document...')
+ print('Assembling OSIS document')
osisDoc = '<osis xmlns="http://www.bibletechnologies.net/2003/OSIS/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.'+osisVersion+'.xsd">\n<osisText osisRefWork="Bible" xml:lang="und" osisIDWork="' + osisWork + '">\n<header>\n<work osisWork="' + osisWork + '"/>\n</header>\n'
unhandledTags = set()
@@ -1580,13 +1582,13 @@ if __name__ == "__main__":
try:
#import urllib
from lxml import etree
- verbosePrint('Validating XML...')
+ print('Validating XML...')
osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(osisSchema)))
#osisParser = etree.XMLParser(schema = etree.XMLSchema(etree.XML(urllib.urlopen('http://www.bibletechnologies.net/osisCore.' + osisVersion + '.xsd').read())))
etree.fromstring(osisDoc, osisParser)
- verbosePrint('XML Valid')
+ print('XML Valid')
except ImportError:
- verbosePrint('For schema validation, install lxml')
+ print('For schema validation, install lxml')
except etree.XMLSyntaxError as eVal:
print('XML Validation error: ' + str(eVal))
@@ -1594,9 +1596,11 @@ if __name__ == "__main__":
osisFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
osisFile.write(osisDoc)
+ print('Done!')
+
if unhandledTags:
- if verbose:
- print('')
+ print('')
print(('Unhandled USFM tags: ' + ', '.join(sorted(unhandledTags)) + ' (' + str(len(unhandledTags)) + ' total)'))
if not relaxedConformance:
- print('Consider using the -r option for relaxed markup processing.')
+ print('Consider using the -r option for relaxed markup processing')
+