summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTroy A. Griffitts <scribe@crosswire.org>2014-04-13 04:30:58 +0000
committerTroy A. Griffitts <scribe@crosswire.org>2014-04-13 04:30:58 +0000
commitbb0c93ca425cce7dabe9e466b22dd8c6c338c721 (patch)
tree22b70984678114b5fe571b830cc1a57dc0e1d290
parent45d37bb6862a92bc0999e4468bb78d8dae68c1cf (diff)
downloadsword-tools-bb0c93ca425cce7dabe9e466b22dd8c6c338c721.tar.gz
added simple conversion scripts for maurice's stuff
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@485 07627401-56e2-0310-80f4-f8cd0041bdcd
-rw-r--r--misc/symbgreektoutf16.c78
-rw-r--r--modules/bibles/grc/tr/Makefile30
-rw-r--r--modules/bibles/grc/whnu/Makefile30
-rwxr-xr-xmodules/bibles/grc/whnu/convert.sh28
4 files changed, 166 insertions, 0 deletions
diff --git a/misc/symbgreektoutf16.c b/misc/symbgreektoutf16.c
new file mode 100644
index 0000000..d09a14b
--- /dev/null
+++ b/misc/symbgreektoutf16.c
@@ -0,0 +1,78 @@
+#include <stdio.h>
+unsigned short symbolGreekTranslation[] = {
+ /* 000 (00) */ 0x0000 /*' '*/, 0x0001 /*''*/, 0x0002 /*''*/, 0x0003 /*''*/,
+ /* 004 (04) */ 0x0004 /*''*/, 0x0005 /*''*/, 0x0006 /*''*/, 0x0007 /*''*/,
+ /* 008 (08) */ ' ' /*'backspace? '*/, 0x0009 /*'\t'*/, 0x000a /*\n*/, 0x000b /*' '*/,
+ /* 012 (0c) */ 0x000c /*' '*/, 0x000d /*'\r'*/, 0x000e /*''*/, 0x000f /*''*/,
+ /* 016 (10) */ 0x0010 /*''*/, 0x0011 /*''*/, 0x0012 /*''*/, 0x0013 /*''*/,
+ /* 020 (14) */ 0x0014 /*''*/, 0x0015 /*''*/, 0x0016 /*''*/, 0x0017 /*''*/,
+ /* 024 (18) */ 0x0018 /*''*/, 0x0019 /*''*/, 0x001a /*''*/, 0x001b /*''*/,
+ /* 028 (1c) */ 0x001c /*''*/, 0x001d /*''*/, 0x001e /*''*/, 0x001f /*''*/,
+ /* 032 (20) */ 0x0020 /*' '*/, 0x0021 /*'!'*/, 0x03C2 /*'"'*/, 0x0023 /*'#'*/,
+ /* 036 (24) */ 0x0024 /*'$'*/, 0x0025 /*'%'*/, 0x0026 /*'&'*/, 0x0342 /* circumflex perispomeni'''*/,
+ /* 040 (28) */ 0x0028 /*'('*/, 0x0029 /*')'*/, 0x002a /*'*'*/, 0x002b /*'+'*/,
+ /* 044 (2c) */ 0x002c /*','*/, 0x002d /*'-'*/, 0x002e /*'.'*/, 0x002f /*'/'*/,
+ /* 048 (30) */ 0x0030 /*'0'*/, 0x0031 /*'1'*/, 0x0032 /*'2'*/, 0x0033 /*'3'*/,
+ /* 052 (34) */ 0x0034 /*'4'*/, 0x0035 /*'5'*/, 0x0036 /*'6'*/, 0x0037 /*'7'*/,
+ /* 056 (38) */ 0x0038 /*'8'*/, 0x0039 /*'9'*/, 0x00b7 /* middle dot ':'*/, ';' /* possibly a grave varia (see code below) */,
+ /* 060 (3c) */ 0x003c /*'<'*/, 0x003d /*'='*/, 0x0308 /*'>'*/, 0x003f /*'?'*/,
+ /* 064 (40) */ 0x0040 /*'@'*/, 0x0391 /*'A'*/, 0x0392 /*'B'*/, 0x03A7 /*'C'*/,
+ /* 068 (44) */ 0x0394 /*'D'*/, 0x0395 /*'E'*/, 0x03A6 /*'F'*/, 0x0393 /*'G'*/,
+ /* 072 (48) */ 0x0397 /*'H'*/, 0x0399 /*'I'*/, 0x0314 /*'J'*/, 0x039a /*'K' bug in .py */,
+ /* 076 (4c) */ 0x039b /*'L'*/, 0x039c /*'M'*/, 0x039d /*'N'*/, 0x039f /*'O'*/,
+ /* 080 (50) */ 0x03a0 /*'P'*/, 0x0398 /*'Q'*/, 0x03a1 /*'R'*/, 0x03a3 /*'S'*/,
+ /* 084 (54) */ 0x03a4 /*'T'*/, 0x03a5 /*'U'*/, 0x0384 /*'V'*/, 0x03a9 /*'W'*/,
+ /* 088 (58) */ 0x039e /*'X'*/, 0x03a8 /*'Y'*/, 0x005a /*'Z'*/, 0x005b /*'['*/,
+ /* 092 (5c) */ 0x005c /*'\'*/, 0x005d /*']'*/, 0x005e /*'^'*/, 0x005f /*'_'*/,
+ /* 096 (60) */ 0x0060 /*'`'*/, 0x03b1 /*'a'*/, 0x03b2 /*'b'*/, 0x03c7 /*'c'*/,
+ /* 100 (64) */ 0x03b4 /*'d'*/, 0x03b5 /*'e'*/, 0x03c6 /*'f'*/, 0x03b3 /*'g'*/,
+ /* 104 (68) */ 0x03b7 /*'h'*/, 0x03b9 /*'i'*/, 0x0313 /* smooth breathing 'j'*/, 0x03ba /*'k'*/,
+ /* 108 (6c) */ 0x03bb /*'l'*/, 0x03bc /*'m'*/, 0x03bd /*'n'*/, 0x03bf /*'o'*/,
+ /* 112 (70) */ 0x03c0 /*'p'*/, 0x03b8 /*'q'*/, 0x03c1 /*'r'*/, 0x03c3 /*'s'*/,
+ /* 116 (74) */ 0x03c4 /*'t'*/, 0x03c5 /*'u'*/, 0x0301 /* acute tonos 'v'*/, 0x03c9 /*'w'*/,
+ /* 120 (78) */ 0x03be /*'x'*/, 0x03c8 /*'y'*/, 0x03b6 /*'z'*/, 0x007b /*'{'*/,
+ /* 124 (7c) */ 0x007c /*'|'*/, 0x007d /*'}'*/, 0x03c2 /*'~'*/, 0x007f /*''*/,
+ /* 128 (80) */ ' ' /*'�'*/, 0x0081 /*'�'*/, 'C', 0x0083 /*'�'*/,
+ /* 132 (84) */ 0x0084 /*'�'*/, 0x003e /* blockend '�'*/, 0x1fbd /*elusionszeichen'�'*/, 0x0087 /*'�'*/,
+ /* 136 (88) */ 0x0088 /*'�'*/, 0x003b /*; '�'*/, ' ' /*'�'*/, 0x008b /*'�'*/,
+ /* 140 (8c) */ 'a', 'c', 0x008e /*'�'*/, 0x008f /*'�'*/,
+ /* 144 (90) */ 0x0090 /*'�'*/, 0x0091 /*'�'*/, 0x0092 /*'�'*/, 0x0093 /*'�'*/,
+ /* 148 (94) */ 0x0094 /*'�'*/, 0x0095 /*'�'*/, 0x0096 /*'�'*/, 0x0056 /*'�'*/,
+ /* 152 (98) */ 0x0098 /*'�'*/, 0x0099 /*'�'*/, 0x002f /* / '�'*/, 0x009b /*'�'*/,
+ /* 156 (9c) */ 0x009c /*'�'*/, 0x009d /*'�'*/, 0x009e /*'�'*/, 0x009f /*'�'*/,
+ /* 160 (a0) */ 't', 0x00a1 /*'�'*/, 0x00a2 /*'�'*/, 0x00a3 /*'�'*/,
+ /* 164 (a4) */ 0x00a4 /*'�'*/, 0x00a5 /*'�'*/, 0x00a6 /*'�'*/, 's',
+ /* 168 (a8) */ 'r', 'g', 0x00aa /*'�'*/, 'e',
+ /* 172 (ac) */ 'u', 0x0047 /* G '�'*/, 0x00ae /*'�'*/, '}' /*'�'*/,
+ /* 176 (b0) */ 0x00b0 /*'�'*/, 0x0306 /*'�'*/, 'j', 0x0323 /*unterpunkt'�'*/,
+ /* 180 (b4) */ 'y', 'm', 'd', 'w',
+ /* 184 (b8) */ 0x00b8 /*'�'*/, 'p', 'b' /*'�'*/, 0x005b /* [ '�'*/,
+ /* 188 (bc) */ 0x005d /*]'�'*/, 0x00bd /*'�'*/, 0x00be /*'�'*/, '{' /*'�'*/,
+ /* 192 (c0) */ 0x00c0 /*'�'*/, 0x00BA /* degree '�'*/, 'l'/* 0x006c */, 'v' /*'�'*/,
+ /* 196 (c4) */ 'f', 'x', 0x02bc/* apostrophe */, 0x00c7 /*'�'*/,
+ /* 200 (c8) */ 0x00c8 /*'�'*/, ';' /*'�'*/, ' ' /*'�'*/, 0x00cb /*'�'*/,
+ /* 204 (cc) */ 0x00cc /*'�'*/, 0x00cd /*'�'*/, 'Q', 'q',
+ /* 208 (d0) */ 0x0304, 0x0305 /* intf uses: 0x00af oberstrich'�'*/, 0x00d2 /*'�'*/, 0x00d3 /*'�'*/,
+ /* 212 (d4) */ 0x00d4 /*'�'*/, 0x00d5 /*'�'*/, 0x00d6 /*'�'*/, 'V',
+ /* 216 (d8) */ 0x00d8 /*'�'*/, 0x00d9 /*'�'*/, 0x007c /*|'�'*/, 0x005c /*`'�'*/,
+ /* 220 (dc) */ 0x00dc /*'�'*/, 0x00dd /*'�'*/, 0x00de /*'�'*/, 0x0302 /*'�'*/,
+ /* 224 (e0) */ 0x00e0 /*'�'*/, 0x00e1 /*'�'*/, ' ' /*'�'*/, 'W',
+ /* 228 (e4) */ 0x00e4 /*'�'*/, 'R', 0x00e6 /*'�'*/, 'Y',
+ /* 232 (e8) */ 'U', 'i', 'S', 'D',
+ /* 236 (ec) */ 'F', 'G', 0x00ee /*'�'*/, ' ' /*'�'*/,
+ /* 240 (f0) */ 0x00f0 /*'�'*/, 'L', ':' /*'�'*/, 0x00f3 /*'�'*/,
+ /* 244 (f4) */ 0x00f4 /*'�'*/, 0x03DB /*stigma*/, 'n', 0x00f7 /*'�'*/,
+ /* 248 (f8) */ 'J', 0x00f9 /*'�'*/, 'h', 'k',
+ /* 252 (fc) */ 0x00fc /*'�'*/, 0x00fd /*'�'*/, 0x00fe /*'�'*/, 0x00ff /*'?'*/
+};
+
+int main(int argc, char **argv) {
+ char robinson = (argc > 1 && !strcmp(argv[1], "-robinson"));
+ unsigned char c;
+ while (read(0, &c, 1)) {
+ if (c == 'v' && robinson) c = '~';
+ putchar((unsigned char)(symbolGreekTranslation[c] >> 8));
+ putchar((unsigned char)(symbolGreekTranslation[c] & 0x00FF));
+ }
+ return 0;
+}
diff --git a/modules/bibles/grc/tr/Makefile b/modules/bibles/grc/tr/Makefile
new file mode 100644
index 0000000..8742dc1
--- /dev/null
+++ b/modules/bibles/grc/tr/Makefile
@@ -0,0 +1,30 @@
+.SUFFIXES: .UTR .imp
+SOURCE = TR-PRSD
+TARGET = tr
+SOURCES = $(shell find $(SOURCE)/ -name '*.UTR')
+IMPS = $(SOURCES:%.UTR=%.imp)
+
+all: $(TARGET)/
+
+$(SOURCE).ZIP:
+ wget -O $(SOURCE).ZIP https://sites.google.com/a/wmail.fi/greeknt/home/greeknt/$(SOURCE).ZIP?attredirects=0
+
+$(SOURCE)/: $(SOURCE).ZIP
+ unzip $(SOURCE).ZIP -d $(SOURCE)/
+
+.UTR.imp:
+ ../whnu/convert.sh $< > $@
+
+$(SOURCE).imp: $(SOURCE)/ ../whnu/convert.sh symbgreektoutf16 $(IMPS)
+ cat $(IMPS) > $(SOURCE).imp
+
+$(TARGET)/: $(SOURCE).imp
+ mkdir -p $(TARGET)/
+ imp2vs $(SOURCE).imp -z -o $(TARGET)/
+
+clean:
+ rm -rf $(SOURCE).ZIP $(SOURCE) $(SOURCE).imp symbgreektoutf16 $(TARGET)
+
+symbgreektoutf16: ../../../../misc/symbgreektoutf16.c
+ gcc -o $@ $<
+
diff --git a/modules/bibles/grc/whnu/Makefile b/modules/bibles/grc/whnu/Makefile
new file mode 100644
index 0000000..27e9d8f
--- /dev/null
+++ b/modules/bibles/grc/whnu/Makefile
@@ -0,0 +1,30 @@
+.SUFFIXES: .UWH .imp
+SOURCE = WH27PRSD
+TARGET = whnu
+SOURCES = $(shell find $(SOURCE)/ -name '*.UWH')
+IMPS = $(SOURCES:%.UWH=%.imp)
+
+all: $(TARGET)/
+
+$(SOURCE).ZIP:
+ wget -O $(SOURCE).ZIP https://sites.google.com/a/wmail.fi/greeknt/home/greeknt/$(SOURCE).ZIP?attredirects=0
+
+$(SOURCE)/: $(SOURCE).ZIP
+ unzip $(SOURCE).ZIP -d $(SOURCE)/
+
+.UWH.imp:
+ ./convert.sh $< > $@
+
+$(SOURCE).imp: $(SOURCE)/ convert.sh symbgreektoutf16 $(IMPS)
+ cat $(IMPS) > $(SOURCE).imp
+
+$(TARGET)/: $(SOURCE).imp
+ mkdir -p $(TARGET)/
+ imp2vs $(SOURCE).imp -z -o $(TARGET)/
+
+clean:
+ rm -rf $(SOURCE).ZIP $(SOURCE) $(SOURCE).imp symbgreektoutf16 $(TARGET)
+
+symbgreektoutf16: ../../../../misc/symbgreektoutf16.c
+ gcc -o $@ $<
+
diff --git a/modules/bibles/grc/whnu/convert.sh b/modules/bibles/grc/whnu/convert.sh
new file mode 100755
index 0000000..d4c443a
--- /dev/null
+++ b/modules/bibles/grc/whnu/convert.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+FILENAME=$(basename $1)
+BOOK=${FILENAME%.*}
+vn="0"
+sed -e 's/ //g' $1| while read line; do
+ for i in $line; do
+ if [[ "$i" =~ [0-9]+:[0-9]+ ]]; then
+ echo; echo \$\$\$$BOOK $i
+ elif [[ "$i" =~ \{.*\} ]]; then
+ echo -n "<w lemma=\"strong:$lemma\" morph=\"robinson:${i:1:-1}"
+ if [[ "$sm" -ne "" ]]; then echo -n " strongsMorph:$sm"; fi
+ echo -n "\">$txt</w> "
+ sm=""
+ elif [[ "$i" =~ [0-9]+ ]]; then
+ if [[ "$i" > "5624" ]]; then sm=$i
+ else lemma=$i; fi
+ elif [[ "$i" = "|" ]]; then
+ ((vn = vn + 1))
+ if [[ "$vn" > "1" ]]; then echo -n "</seg>"; fi
+ if [[ "$vn" < "3" ]]; then echo -n "<seg type=\"x-variant\" subType=\"x-$vn\">"
+ else vn="0"; fi
+ elif [[ "$i" =~ [a-z]+ ]]; then
+ txt=$(echo $i |./symbgreektoutf16 -robinson| uconv --from-code UTF-16 --to-code UTF-8)
+ else
+ echo NOT HANDLED!: $i
+ fi
+ done
+done