diff options
-rwxr-xr-x | versification/av11n.py | 38 |
1 files changed, 19 insertions, 19 deletions
diff --git a/versification/av11n.py b/versification/av11n.py index 9ade84e..198eeb4 100755 --- a/versification/av11n.py +++ b/versification/av11n.py @@ -9,6 +9,7 @@ # special as for ordering. # # Invoke simply by calling the program and the file name. +import io import logging # in normal state level should be debug.WARNING, debug.INFO and debug.DEBUG # give additional information. @@ -16,7 +17,12 @@ logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', level=logging.WARNING) import re import sys -verseid = re.compile(r'^.+\..+\..+$') +try: + import lxml.etree as ET +except ImportError: + import xml.etree.ElementTree as ET + +VERSEID_RE = re.compile(r'^.+\..+\..+$') # Inform the user that we need the SWORD extension try: @@ -26,15 +32,6 @@ except ImportError: "You do not have the SWORD library installed. Please install it.") sys.exit(1) -# Inform the user that we need pyquery, as it makes parsing XML files -# that much easier -try: - from pyquery import PyQuery as pq # noqa -except ImportError: - logging.exception( - "You do not appear to have PyQuery installed. Please install it.") - sys.exit(2) - # Without the name of a file, we cannot proceed any further if len(sys.argv) < 2 or sys.argv[1] == '--help': print >>sys.stderr, "Usage: %s <OSISfile>" % sys.argv[0] @@ -42,7 +39,8 @@ if len(sys.argv) < 2 or sys.argv[1] == '--help': # Open the file logging.debug('Opening %s' % (sys.argv[1],)) -d = pq(filename=sys.argv[1]) + +tree = ET.parse(io.open(sys.argv[1], encoding='utf8')).getroot() # Get the list of versifications logging.debug('Fetching a list of v11ns') vmgr = Sword.VersificationMgr.getSystemVersificationMgr() @@ -50,7 +48,11 @@ av11ns = vmgr.getVersificationSystems() # Get the list of all osisIDs logging.debug('Fetching a list of OSIS IDs') -ids = d("*[osisID]") +ids = set() +for item in tree.iter(): + if 'osisID' in item.attrib: + ids.add(item.attrib['osisID']) + # Iterate each versification scheme for v11n in av11ns: print('Checking %s' % v11n.c_str()) @@ -82,20 +84,18 @@ for v11n in av11ns: inNT = False # Now iterate the ones we have in this file - for e in ids: - logging.debug('e = %s', e) - osisid = e.attrib.get('osisID') - #print 'Checking key %s' % (osisid,) + for osisid in ids: + logging.debug('Checking key %s', osisid) if osisid in otkeyList: otkeyList.remove(osisid) elif osisid in ntkeyList: ntkeyList.remove(osisid) inNT = True - elif verseid.match(osisid) and inNT: + elif VERSEID_RE.match(osisid) and inNT: ntextraKeys.append(osisid) - elif verseid.match(osisid) and not inNT: + elif VERSEID_RE.match(osisid) and not inNT: otextraKeys.append(osisid) - # Ignore it if not verseid.match() + # Ignore it if not VERSEID_RE.match() # Now let's see what is left over # Sets in Python cannot be ordered |