diff options
author | Peter von Kaehne <refdoc@gmx.net> | 2010-01-13 08:41:29 +0000 |
---|---|---|
committer | Peter von Kaehne <refdoc@gmx.net> | 2010-01-13 08:41:29 +0000 |
commit | 894e6809eb329f05723713833638d9c7bae00380 (patch) | |
tree | f737e012a1a8de4891b7f2a5db923808b3723a21 /modules/geneve | |
parent | e974ac8d14f3627d04f03461e6d1cb09bca95532 (diff) | |
download | sword-tools-894e6809eb329f05723713833638d9c7bae00380.tar.gz |
'paralist.pl' creates a list of USFM xrefs from a styled MS word -> abiword file
'books' is a list of USFM book ids, Jeremiah was missed out
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@267 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/geneve')
-rw-r--r-- | modules/geneve/books | 1 | ||||
-rwxr-xr-x | modules/geneve/paralist.pl | 41 |
2 files changed, 42 insertions, 0 deletions
diff --git a/modules/geneve/books b/modules/geneve/books index b45de57..fdde065 100644 --- a/modules/geneve/books +++ b/modules/geneve/books @@ -21,6 +21,7 @@ PRO ECC
SNG
ISA
+JER
LAM
EZK
DAN
diff --git a/modules/geneve/paralist.pl b/modules/geneve/paralist.pl new file mode 100755 index 0000000..b6aee50 --- /dev/null +++ b/modules/geneve/paralist.pl @@ -0,0 +1,41 @@ +#!/usr/bin/perl +use strict; + +my @files=`ls -1 *.abw`; + +foreach (@files){ + + my @lines; + chop; + open PARA, ">>$_.list"; + chomp(@lines=`cat $_`); + + + foreach (@lines) { + s/<m\ .*?\/m>//; + + s/<p\ style=\"Parallelen [Ü]*bers Kapitel\".*?><c.*?>\ *[K|P]\ (.*?)<\/c><\/p>//g; + + s/<p\ style=\"Parallelenverweise\" xid=\"[0-9]+\"\ props=\"text-align:left; line-height:1.0; dom-dir:ltr; orphans:0; widows:0\"><\/p>//g; + + s/<p\ style=\"Parallelenverweise\" xid=\"[1-7]\".+?><c.+?>.*?<\/c><\/p>//g; + s/<p style=\"Parallelenverweise\".*?>(.*?)<\/p>/\n$1\ \\x\*/g; + s/<c style=\"Verszahl\ Parallelensignal\".*?>(.*?)<\/c>//g; + s/<c style=\"Parallelentext Leerz\".*?>\ <\/c>/\ \\x\ /g; + s/<c style=\"Parallelentext kursiv\".*?>(.*?)<\/c>/\ \\xk\ $1\ /g; + s/<c .*?props=\"lang:de-DE;\ font-size:8pt;\ font-family:(Utopia|Times\ New\ Roman)\".*?>(.*?)<\/c>/\ \\xt\ $2/g; + s/^.*?style=\"Normal.*?$//g; + s/^.*?style=\"En-tête.*?$//g; + s/<br\ *\/>//g; + s/xid\=\".*?\"//g; + s/^\ x\*//; + s/<.*?>//g; + s/(\\xt.*?)(\\xk)/$1\ \\x\*\n\\x\ $2/g; + s/^\ //g; + s/\n\ /\n/g; + s/\ \ +/\ /g; +} + + print (PARA @lines); + close PARA; +} |