summaryrefslogtreecommitdiffstats
path: root/versification/v11n_check.py
blob: 6530f2e7724c1ba4eae1ceb28df60ad5c89692ea (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#! /usr/bin/env python
# Distributed under the "Here, have it" license
# Written by Greg Hellings, 2011, all rights reserved

def usage(name):
	'''
	Helpful hints for the user. Let them know what it is
	that this script expects of them.
	'''
	print "Usage: %s <original mod> <mod to check> <OT|NT>" % (name,)
	print "Requires the SWORD Python library to operate."
	print "Also requires the Numpy library (fast array calculations)"
	sys.exit(-1)

def increment(v, *args):
	'''
	Copies the SWKey out of the first module and into any
	other modules down the row.
	'''
	v.increment()
	k = v.getKey()
	for m in args:
		m.setKey(k)

def main(argv):
	'''
	Main loops and the like for the program.
	'''
	if len(argv) != 4:
		usage(argv[0])
	
	if argv[3] not in ('OT', 'NT'):
		usage(argv[0])

	# Which testament are we comparing against
	if argv[3] == 'NT':
		checkNT = True
	else:
		checkNT = False

	# Fetch the original source language module
	mgr = Sword.SWMgr()
	original = mgr.getModule(argv[1])
	if original == None:
		print "You might want to pick a translation that exists."
		sys.exit(-1)
	trans = mgr.getModule(argv[2])
	if trans == None:
		print "You might want to pick a translation that exists."
		sys.exit(-1)

	counts = []
	NT = False
	# Iterate the whole selection
	print"""\t**********************************
	********  Building tables ********
	**********************************"""
	while original.Error() == '\x00' and trans.Error() == '\x00':
		oWords = original.StripText().decode('utf-8').split()
		tWords = trans.StripText().decode('utf-8').split()
		key = Sword.VerseKey(original.getKey())

		# Check if we've entered the NT
		if not NT and key.getOSISRef().startswith('Matt'):
			NT = True

		# Only check one testament
		if checkNT != NT:
			increment(original, trans)
			continue

		if len(oWords) == 0:
			print 'Unable to check verse %s - no content in source' % (key.getText(),)
		elif len(tWords) == 0:
			print 'Unable to check verse %s - no content in target' % (key.getText(),)
		else:
			counts.append((key.getOSISRef(), len(oWords) / float(len(tWords)), key.getVerse() == key.getChapterMax()))

		increment(original, trans)
	
	# Now that we have all the values, let's see if there
	# are any that seem way out of whack
	vals = [x for k, x, b in counts]
	mean = numpy.average(vals)
	std  = numpy.std(vals)
	
	print """\t***********************************
	****** Beginning comparisons ******
	***********************************"""
	print "Average ratio: %0.4f" % (mean,)
	print "Standard dev:  %0.4f" % (std,)

	# Now iterate the target translation
	print "The following references fall outside of the target standard deviation"
	for ref, ratio, b in counts:
		if abs(ratio - mean) >= 2 * std and b:
			print '%s' % (ref,)

try:
	import Sword
	import sys
	import numpy
except m:
	usage(sys.argv[0])

if __name__ == '__main__':
	main(sys.argv)