summaryrefslogtreecommitdiffstats
path: root/versification/av11n.py
blob: 88f8402c6952aae65011e5d3284e03890c8686ae (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python
# coding: utf-8
#
# This does a very roughshod attempt to compare the osisIDs found in an
# XML file with each of the versifications that SWORD knows about to help
# a user find the one which is most akin to the one they are using. It is
# limited in its need for your file to be at least segregated into OT/NT
# in the proper order, although within each testament, it requires nothing
# special as for ordering.
#
# Invoke simply by calling the program and the file name.
import argparse
import io
import logging
# in normal state level should be debug.WARNING, debug.INFO and debug.DEBUG
# give additional information.
logging.basicConfig(format='%(levelname)s:%(message)s',
                    level=logging.INFO)
log = logging.getLogger('versification')

import re
import sys
try:
    import lxml.etree as ET
except ImportError:
    import xml.etree.ElementTree as ET

OSIS_NS = 'http://www.bibletechnologies.net/2003/OSIS/namespace'
VERSEID_RE = re.compile(r'^(.+\.\d+\.\d+).*$')

# Inform the user that we need the SWORD extension
try:
    import Sword
except ImportError:
    log.exception(
        "You do not have the SWORD library installed. Please install it.")
    sys.exit(1)

arg_parser = argparse.ArgumentParser(
    description='Compare OSIS file with available v11ns.')

arg_parser.add_argument('--verbose', '-v', action='count')
arg_parser.add_argument('filename', nargs=1)


args = arg_parser.parse_args()

if args.verbose:
    log.setLevel = logging.DEBUG

log.debug('args = %s', args)

# Open the file
log.debug('Opening %s' % args.filename[0])

tree = ET.parse(io.open(args.filename[0], encoding='utf8')).getroot()
# Get the list of versifications
log.debug('Fetching a list of v11ns')
vmgr = Sword.VersificationMgr.getSystemVersificationMgr()
av11ns = vmgr.getVersificationSystems()
log.debug('av11ns = %s', av11ns)

# Get the list of all osisIDs
log.debug('Fetching a list of OSIS IDs')
ids = set()
for item in tree.iter('{%s}verse' % OSIS_NS):
    if 'osisID' in item.attrib:
        ids.add(item.attrib['osisID'].split('!')[0])
log.debug('ids = len(%d)', len(ids))

# Iterate each versification scheme
for v11n in av11ns:
    v11n_name = v11n.c_str()
    print('\nChecking %s:\n%s' %
          (v11n_name, (len(v11n_name) + 10) * '-'))
    # Construct a list of the IDs in this versification
    key = Sword.VerseKey()
    key.setVersificationSystem(v11n.c_str())
    # Anything left in this afterwards is missing from the OSIS ot
    otkeyList = []
    # Anything left in this afterwards is missing from the OSIS nt
    ntkeyList = []
    # Anything that gets placed in here is extraneous OT material (we think)
    otextraKeys = []
    # Anything that gets placed in here is extraneous NT material (we think)
    ntextraKeys = []

    inNT = False
    while key.popError() == '\x00':
        skey = key.getOSISRef()
        # Assume we enter the NT when we hit Matthew
        if not inNT and skey.startswith('Matt'):
            inNT = True
        if inNT:
            ntkeyList.append(skey)
        else:
            otkeyList.append(skey)
        key.increment()
    ntkeyList = set(ntkeyList)  # The 'in' operator only works on a set
    otkeyList = set(otkeyList)

    inNT = False
    # Now iterate the ones we have in this file
    for osisid in ids:
#        log.debug('Checking key %s', osisid)
        if osisid in otkeyList:
            otkeyList.remove(osisid)
        elif osisid in ntkeyList:
            ntkeyList.remove(osisid)
            inNT = True
        else:
            verse_match = VERSEID_RE.match(osisid)
            if verse_match and inNT:
                ntextraKeys.append(verse_match.group(1))
            elif verse_match and not inNT:
                otextraKeys.append(verse_match.group(1))
            # Ignore it if not VERSEID_RE.match()

    # Now let's see what is left over
    # Sets in Python cannot be ordered
    keyList = list(otkeyList.union(ntkeyList))
    keyList.sort()
    if len(keyList) > 0:
        if len(keyList) < 100:
            log.info('\tThe following IDs don’t appear in your file:\n%s',
                         str(", ".join(keyList)))
        print ('\tThere are %d OT IDs and %d NT IDs ' +
               'in v11n which aren’t in your file.') \
            % (len(otkeyList), len(ntkeyList))
    else:
        print '\tYour file has all the references in this v11n'

    # Now let's see if you had extra
    if len(otextraKeys + ntextraKeys) > 0:
        # It doesn't make sense to print out lists longer than 100
        # they cannot be read anyway
        if len(keyList) < 100:
            log.info(
                '\tThe following IDs don’t appear in v11n:\n%s',
                str(", ".join(keyList)))
        print ('\tThere are %d OT IDs and %d NT IDs ' +
               'in your file which don’t appear in v11n.') \
            % (len(otextraKeys), len(ntextraKeys))
    else:
        print '\tYour file has no extra references'