#!/usr/bin/env python # coding: utf-8 # # This does a very roughshod attempt to compare the osisIDs found in an # XML file with each of the versifications that SWORD knows about to help # a user find the one which is most akin to the one they are using. It is # limited in its need for your file to be at least segregated into OT/NT # in the proper order, although within each testament, it requires nothing # special as for ordering. # # Invoke simply by calling the program and the file name. import logging # in normal state level should be debug.WARNING, debug.INFO and debug.DEBUG # give additional information. logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', level=logging.WARNING) import re import sys verseid = re.compile(r'^.+\..+\..+$') # Inform the user that we need the SWORD extension try: import Sword except ImportError: logging.exception( "You do not have the SWORD library installed. Please install it.") sys.exit(1) # Inform the user that we need pyquery, as it makes parsing XML files # that much easier try: from pyquery import PyQuery as pq # noqa except ImportError: logging.exception( "You do not appear to have PyQuery installed. Please install it.") sys.exit(2) # Without the name of a file, we cannot proceed any further if len(sys.argv) < 2 or sys.argv[1] == '--help': print >>sys.stderr, "Usage: %s " % sys.argv[0] sys.exit(1) # Open the file logging.debug('Opening %s' % (sys.argv[1],)) d = pq(filename=sys.argv[1]) # Get the list of versifications logging.debug('Fetching a list of v11ns') vmgr = Sword.VersificationMgr.getSystemVersificationMgr() av11ns = vmgr.getVersificationSystems() # Get the list of all osisIDs logging.debug('Fetching a list of OSIS IDs') ids = d("*[osisID]") # Iterate each versification scheme for v11n in av11ns: print('Checking %s' % v11n.c_str()) # Construct a list of the IDs in this versification key = Sword.VerseKey() key.setVersificationSystem(v11n.c_str()) # Anything left in this afterwards is missing from the OSIS ot otkeyList = [] # Anything left in this afterwards is missing from the OSIS nt ntkeyList = [] # Anything that gets placed in here is extraneous OT material (we think) otextraKeys = [] # Anything that gets placed in here is extraneous NT material (we think) ntextraKeys = [] inNT = False while key.popError() == '\x00': skey = key.getOSISRef() # Assume we enter the NT when we hit Matthew if not inNT and skey.startswith('Matt'): inNT = True if inNT: ntkeyList.append(skey) else: otkeyList.append(skey) key.increment() ntkeyList = set(ntkeyList) # The 'in' operator only works on a set otkeyList = set(otkeyList) inNT = False # Now iterate the ones we have in this file for e in ids: logging.debug('e = %s', e) osisid = e.attrib.get('osisID') #print 'Checking key %s' % (osisid,) if osisid in otkeyList: otkeyList.remove(osisid) elif osisid in ntkeyList: ntkeyList.remove(osisid) inNT = True elif verseid.match(osisid) and inNT: ntextraKeys.append(osisid) elif verseid.match(osisid) and not inNT: otextraKeys.append(osisid) # Ignore it if not verseid.match() # Now let's see what is left over # Sets in Python cannot be ordered keyList = list(otkeyList.union(ntkeyList)) keyList.sort() if len(keyList) > 0: logging.info('\tThe following IDs don’t appear in your file:\n%s', str("\n".join(keyList))) print ('\tThere are %d OT IDs and %d NT IDs ' + 'in v11n which arn’t in your file.') \ % (len(otkeyList), len(ntkeyList)) else: print '\tYour file has all the references in this v11n' # Now let's see if you had extra if len(otextraKeys + ntextraKeys) > 0: logging.info( '\tThe following IDs don’t appear in v11n:\n%s', str("\n".join(keyList))) print ('\tThere are %d OT IDs and %d NT IDs ' + 'in your file which don’t appear in v11n.') \ % (len(otextraKeys), len(ntextraKeys)) else: print '\tYour file has no extra references'