diff options
Diffstat (limited to 'versification/av11n.py')
-rwxr-xr-x | versification/av11n.py | 163 |
1 files changed, 87 insertions, 76 deletions
diff --git a/versification/av11n.py b/versification/av11n.py index 136a382..5e01c9e 100755 --- a/versification/av11n.py +++ b/versification/av11n.py @@ -11,102 +11,113 @@ # more output, change the following line to be True instead of False verbose = False debug = True -import sys import re -verseid = re.compile('^.+\..+\..+$') +import sys +verseid = re.compile(r'^.+\..+\..+$') # Inform the user that we need the SWORD extension try: - import Sword -except: - print "You do not have the SWORD library installed. Please install it." - sys.exit(1) + import Sword +except ImportError: + print "You do not have the SWORD library installed. Please install it." + sys.exit(1) -# Inform the user that we need pyquery, as it makes parsing XML files that much easier +# Inform the user that we need pyquery, as it makes parsing XML files +# that much easier try: - from pyquery import PyQuery as pq -except: - print "You do not appear to have PyQuery installed. Please install it." - sys.exit(2) + from pyquery import PyQuery as pq # noqa +except ImportError: + print "You do not appear to have PyQuery installed. Please install it." + sys.exit(2) # Without the name of a file, we cannot proceed any further if len(sys.argv) < 2 or sys.argv[1] == '--help': - print "Usage: %s <OSISfile>" % (sys.argv[0],) + print "Usage: %s <OSISfile>" % (sys.argv[0],) # Open the file if debug: - print 'Opening %s' % (sys.argv[1],) + print 'Opening %s' % (sys.argv[1],) d = pq(filename=sys.argv[1]) # Get the list of versifications if debug: - print 'Fetching a list of versifications' + print 'Fetching a list of versifications' vmgr = Sword.VerseMgr.getSystemVerseMgr() av11ns = vmgr.getVersificationSystems() # Get the list of all osisIDs if debug: - print 'Fetching a list of OSIS IDs' + print 'Fetching a list of OSIS IDs' ids = d("*[osisID]") # Iterate each versification scheme for v11n in av11ns: - print 'Checking %s' % (v11n.c_str(),) - # Construct a list of the IDs in this versification - key = Sword.VerseKey() - key.setVersificationSystem(v11n.c_str()) - otkeyList = [] # Anything left in this afterwards is missing from the OSIS ot - ntkeyList = [] # Anything left in this afterwards is missing from the OSIS nt - otextraKeys = [] # Anything that gets placed in here is extraneous OT material (we think) - ntextraKeys = [] # Anything that gets placed in here is extraneous NT material (we think) - - inNT = False - while key.Error() == '\x00': - skey = key.getOSISRef() - if not inNT and re.match('^Matt', skey): # Assume we enter the NT when we hit Matthew - inNT = True - if inNT: - ntkeyList.append(skey) - else: - otkeyList.append(skey) - key.increment() - ntkeyList = set(ntkeyList) # The 'in' operator only works on a set - otkeyList = set(otkeyList) - - inNT = False - # Now iterate the ones we have in this file - for e in ids: - osisid = e.attrib.get('osisID') - #print 'Checking key %s' % (osisid,) - if osisid in otkeyList: - otkeyList.remove(osisid) - elif osisid in ntkeyList: - ntkeyList.remove(osisid) - inNT = True - elif verseid.match(osisid) and inNT: - ntextraKeys.append(osisid) - elif verseid.match(osisid) and not inNT: - otextraKeys.append(osisid) - # Ignore it if not verseid.match() - - # Now let's see what is left over - keyList = list(otkeyList.union(ntkeyList)) # Sets in Python cannot be ordered - keyList.sort() - if len(keyList) > 0: - if verbose: - print '\tThe following IDs do not appear in your file:' - for k in keyList: - print k - else: - print '\tThere are %d OT IDs and %d NT IDs in the versification which are not in your file.' % (len(otkeyList), len(ntkeyList)) - else: - print '\tYour file has all the references in this versification' - - # Now let's see if you had extra - if len(otextraKeys + ntextraKeys) > 0: - if verbose: - print '\tThe following IDs do not appear in the versification:' - for k in ntextraKeys + otextraKeys: - print k - else: - print '\tThere are %d OT IDs and %d NT IDs in your file which do not appear in the versification.' % (len(otextraKeys), len(ntextraKeys)) - else: - print '\tYour file has no extra references' + print 'Checking %s' % (v11n.c_str(),) + # Construct a list of the IDs in this versification + key = Sword.VerseKey() + key.setVersificationSystem(v11n.c_str()) + # Anything left in this afterwards is missing from the OSIS ot + otkeyList = [] + # Anything left in this afterwards is missing from the OSIS nt + ntkeyList = [] + # Anything that gets placed in here is extraneous OT material (we think) + otextraKeys = [] + # Anything that gets placed in here is extraneous NT material (we think) + ntextraKeys = [] + + inNT = False + while key.Error() == '\x00': + skey = key.getOSISRef() + # Assume we enter the NT when we hit Matthew + if not inNT and re.match('^Matt', skey): + inNT = True + if inNT: + ntkeyList.append(skey) + else: + otkeyList.append(skey) + key.increment() + ntkeyList = set(ntkeyList) # The 'in' operator only works on a set + otkeyList = set(otkeyList) + + inNT = False + # Now iterate the ones we have in this file + for e in ids: + osisid = e.attrib.get('osisID') + #print 'Checking key %s' % (osisid,) + if osisid in otkeyList: + otkeyList.remove(osisid) + elif osisid in ntkeyList: + ntkeyList.remove(osisid) + inNT = True + elif verseid.match(osisid) and inNT: + ntextraKeys.append(osisid) + elif verseid.match(osisid) and not inNT: + otextraKeys.append(osisid) + # Ignore it if not verseid.match() + + # Now let's see what is left over + # Sets in Python cannot be ordered + keyList = list(otkeyList.union(ntkeyList)) + keyList.sort() + if len(keyList) > 0: + if verbose: + print '\tThe following IDs do not appear in your file:' + for k in keyList: + print k + else: + print ('\tThere are %d OT IDs and %d NT IDs ' + + 'in the versification which are not in your file.') \ + % (len(otkeyList), len(ntkeyList)) + else: + print '\tYour file has all the references in this versification' + + # Now let's see if you had extra + if len(otextraKeys + ntextraKeys) > 0: + if verbose: + print '\tThe following IDs do not appear in the versification:' + for k in ntextraKeys + otextraKeys: + print k + else: + print ('\tThere are %d OT IDs and %d NT IDs ' + + 'in your file which do not appear in the versification.') \ + % (len(otextraKeys), len(ntextraKeys)) + else: + print '\tYour file has no extra references' |