summaryrefslogtreecommitdiffstats
path: root/versification/validate_v11n.py
blob: 3a76bf88f37c5c5a3b20477877c943996d7d3a99 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env python

import re, sys

files = sys.argv[1:]

if not files:
    print 'Usage: ' + sys.argv[0] + ' <canon.h-style file(s)>'
    exit()

for fn in files:
    f = open(fn).readlines()

    booksChapters = 0
    vmChapters = 0

    inBooks = False
    inVm = False

    for l in f:
        # This is not robust. It assumes that [on]tbooks[] precedes vm[] and
        # that all of the verse counts in vm[] are part of books listed in
        # [on]tbooks[]. In general, it assumes canon files that look like what
        # we include in the library and generate from v11nsys.pl.

        l = re.sub(r'//.*', '', l)
        l = re.sub(r'\s*$', '', l)
        
        if l:        
            if re.search(r'struct sbook [on]tbooks.*?\[\]', l):
                inBooks = True
            elif re.search(r'int vm.*?\[\]', l):
                inVm = True
            elif (inVm or inBooks) and re.search(r'};', l):            
                inBooks = False
                inVm = False
            elif inBooks:
                match = re.search(r'{".+?", ".+?", ".+?", (\d+)},', l)
                if match:
                    booksChapters += int(match.group(1))
            elif inVm:
                match = re.findall(r'\d+,?', l)
                vmChapters += len(match)
                
    print fn + ' is ' + ('' if booksChapters == vmChapters else 'not ') + 'valid: ' + str(booksChapters) + ':' + str(vmChapters)