1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
#! /usr/bin/env python
# Distributed under the "Here, have it" license
# Written by Greg Hellings, 2011, all rights reserved
def usage(name):
'''
Helpful hints for the user. Let them know what it is
that this script expects of them.
'''
print "Usage: %s <original mod> <mod to check> <OT|NT>" % (name,)
print "Requires the SWORD Python library to operate."
print "Also requires the Numpy library (fast array calculations)"
sys.exit(-1)
def increment(v, *args):
'''
Copies the SWKey out of the first module and into any
other modules down the row.
'''
v.increment()
k = v.getKey()
for m in args:
m.setKey(k)
def main(argv):
'''
Main loops and the like for the program.
'''
if len(argv) != 4:
usage(argv[0])
if argv[3] not in ('OT', 'NT'):
usage(argv[0])
# Which testament are we comparing against
if argv[3] == 'NT':
checkNT = True
else:
checkNT = False
# Fetch the original source language module
mgr = Sword.SWMgr()
original = mgr.getModule(argv[1])
if original == None:
print "You might want to pick a translation that exists."
sys.exit(-1)
trans = mgr.getModule(argv[2])
if trans == None:
print "You might want to pick a translation that exists."
sys.exit(-1)
counts = []
NT = False
# Iterate the whole selection
print"""\t**********************************
******** Building tables ********
**********************************"""
while original.Error() == '\x00' and trans.Error() == '\x00':
oWords = original.StripText().decode('utf-8').split()
tWords = trans.StripText().decode('utf-8').split()
key = Sword.VerseKey(original.getKey())
# Check if we've entered the NT
if not NT and key.getOSISRef().startswith('Matt'):
NT = True
# Only check one testament
if checkNT != NT:
increment(original, trans)
continue
if len(oWords) == 0:
print 'Unable to check verse %s - no content in source' % (key.getText(),)
elif len(tWords) == 0:
print 'Unable to check verse %s - no content in target' % (key.getText(),)
else:
counts.append((key.getOSISRef(), len(oWords) / float(len(tWords)), key.getVerse() == key.getChapterMax()))
increment(original, trans)
# Now that we have all the values, let's see if there
# are any that seem way out of whack
vals = [x for k, x, b in counts]
mean = numpy.average(vals)
std = numpy.std(vals)
print """\t***********************************
****** Beginning comparisons ******
***********************************"""
print "Average ratio: %0.4f" % (mean,)
print "Standard dev: %0.4f" % (std,)
# Now iterate the target translation
print "The following references fall outside of the target standard deviation"
for ref, ratio, b in counts:
if abs(ratio - mean) >= 2 * std and b:
print '%s' % (ref,)
try:
import Sword
import sys
import numpy
except m:
usage(sys.argv[0])
if __name__ == '__main__':
main(sys.argv)
|