From bb0c93ca425cce7dabe9e466b22dd8c6c338c721 Mon Sep 17 00:00:00 2001 From: "Troy A. Griffitts" Date: Sun, 13 Apr 2014 04:30:58 +0000 Subject: added simple conversion scripts for maurice's stuff git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@485 07627401-56e2-0310-80f4-f8cd0041bdcd --- misc/symbgreektoutf16.c | 78 ++++++++++++++++++++++++++++++++++++++ modules/bibles/grc/tr/Makefile | 30 +++++++++++++++ modules/bibles/grc/whnu/Makefile | 30 +++++++++++++++ modules/bibles/grc/whnu/convert.sh | 28 ++++++++++++++ 4 files changed, 166 insertions(+) create mode 100644 misc/symbgreektoutf16.c create mode 100644 modules/bibles/grc/tr/Makefile create mode 100644 modules/bibles/grc/whnu/Makefile create mode 100755 modules/bibles/grc/whnu/convert.sh diff --git a/misc/symbgreektoutf16.c b/misc/symbgreektoutf16.c new file mode 100644 index 0000000..d09a14b --- /dev/null +++ b/misc/symbgreektoutf16.c @@ -0,0 +1,78 @@ +#include +unsigned short symbolGreekTranslation[] = { + /* 000 (00) */ 0x0000 /*' '*/, 0x0001 /*''*/, 0x0002 /*''*/, 0x0003 /*''*/, + /* 004 (04) */ 0x0004 /*''*/, 0x0005 /*''*/, 0x0006 /*''*/, 0x0007 /*''*/, + /* 008 (08) */ ' ' /*'backspace? '*/, 0x0009 /*'\t'*/, 0x000a /*\n*/, 0x000b /*' '*/, + /* 012 (0c) */ 0x000c /*' '*/, 0x000d /*'\r'*/, 0x000e /*''*/, 0x000f /*''*/, + /* 016 (10) */ 0x0010 /*''*/, 0x0011 /*''*/, 0x0012 /*''*/, 0x0013 /*''*/, + /* 020 (14) */ 0x0014 /*''*/, 0x0015 /*''*/, 0x0016 /*''*/, 0x0017 /*''*/, + /* 024 (18) */ 0x0018 /*''*/, 0x0019 /*''*/, 0x001a /*''*/, 0x001b /*''*/, + /* 028 (1c) */ 0x001c /*''*/, 0x001d /*''*/, 0x001e /*''*/, 0x001f /*''*/, + /* 032 (20) */ 0x0020 /*' '*/, 0x0021 /*'!'*/, 0x03C2 /*'"'*/, 0x0023 /*'#'*/, + /* 036 (24) */ 0x0024 /*'$'*/, 0x0025 /*'%'*/, 0x0026 /*'&'*/, 0x0342 /* circumflex perispomeni'''*/, + /* 040 (28) */ 0x0028 /*'('*/, 0x0029 /*')'*/, 0x002a /*'*'*/, 0x002b /*'+'*/, + /* 044 (2c) */ 0x002c /*','*/, 0x002d /*'-'*/, 0x002e /*'.'*/, 0x002f /*'/'*/, + /* 048 (30) */ 0x0030 /*'0'*/, 0x0031 /*'1'*/, 0x0032 /*'2'*/, 0x0033 /*'3'*/, + /* 052 (34) */ 0x0034 /*'4'*/, 0x0035 /*'5'*/, 0x0036 /*'6'*/, 0x0037 /*'7'*/, + /* 056 (38) */ 0x0038 /*'8'*/, 0x0039 /*'9'*/, 0x00b7 /* middle dot ':'*/, ';' /* possibly a grave varia (see code below) */, + /* 060 (3c) */ 0x003c /*'<'*/, 0x003d /*'='*/, 0x0308 /*'>'*/, 0x003f /*'?'*/, + /* 064 (40) */ 0x0040 /*'@'*/, 0x0391 /*'A'*/, 0x0392 /*'B'*/, 0x03A7 /*'C'*/, + /* 068 (44) */ 0x0394 /*'D'*/, 0x0395 /*'E'*/, 0x03A6 /*'F'*/, 0x0393 /*'G'*/, + /* 072 (48) */ 0x0397 /*'H'*/, 0x0399 /*'I'*/, 0x0314 /*'J'*/, 0x039a /*'K' bug in .py */, + /* 076 (4c) */ 0x039b /*'L'*/, 0x039c /*'M'*/, 0x039d /*'N'*/, 0x039f /*'O'*/, + /* 080 (50) */ 0x03a0 /*'P'*/, 0x0398 /*'Q'*/, 0x03a1 /*'R'*/, 0x03a3 /*'S'*/, + /* 084 (54) */ 0x03a4 /*'T'*/, 0x03a5 /*'U'*/, 0x0384 /*'V'*/, 0x03a9 /*'W'*/, + /* 088 (58) */ 0x039e /*'X'*/, 0x03a8 /*'Y'*/, 0x005a /*'Z'*/, 0x005b /*'['*/, + /* 092 (5c) */ 0x005c /*'\'*/, 0x005d /*']'*/, 0x005e /*'^'*/, 0x005f /*'_'*/, + /* 096 (60) */ 0x0060 /*'`'*/, 0x03b1 /*'a'*/, 0x03b2 /*'b'*/, 0x03c7 /*'c'*/, + /* 100 (64) */ 0x03b4 /*'d'*/, 0x03b5 /*'e'*/, 0x03c6 /*'f'*/, 0x03b3 /*'g'*/, + /* 104 (68) */ 0x03b7 /*'h'*/, 0x03b9 /*'i'*/, 0x0313 /* smooth breathing 'j'*/, 0x03ba /*'k'*/, + /* 108 (6c) */ 0x03bb /*'l'*/, 0x03bc /*'m'*/, 0x03bd /*'n'*/, 0x03bf /*'o'*/, + /* 112 (70) */ 0x03c0 /*'p'*/, 0x03b8 /*'q'*/, 0x03c1 /*'r'*/, 0x03c3 /*'s'*/, + /* 116 (74) */ 0x03c4 /*'t'*/, 0x03c5 /*'u'*/, 0x0301 /* acute tonos 'v'*/, 0x03c9 /*'w'*/, + /* 120 (78) */ 0x03be /*'x'*/, 0x03c8 /*'y'*/, 0x03b6 /*'z'*/, 0x007b /*'{'*/, + /* 124 (7c) */ 0x007c /*'|'*/, 0x007d /*'}'*/, 0x03c2 /*'~'*/, 0x007f /*''*/, + /* 128 (80) */ ' ' /*'�'*/, 0x0081 /*'�'*/, 'C', 0x0083 /*'�'*/, + /* 132 (84) */ 0x0084 /*'�'*/, 0x003e /* blockend '�'*/, 0x1fbd /*elusionszeichen'�'*/, 0x0087 /*'�'*/, + /* 136 (88) */ 0x0088 /*'�'*/, 0x003b /*; '�'*/, ' ' /*'�'*/, 0x008b /*'�'*/, + /* 140 (8c) */ 'a', 'c', 0x008e /*'�'*/, 0x008f /*'�'*/, + /* 144 (90) */ 0x0090 /*'�'*/, 0x0091 /*'�'*/, 0x0092 /*'�'*/, 0x0093 /*'�'*/, + /* 148 (94) */ 0x0094 /*'�'*/, 0x0095 /*'�'*/, 0x0096 /*'�'*/, 0x0056 /*'�'*/, + /* 152 (98) */ 0x0098 /*'�'*/, 0x0099 /*'�'*/, 0x002f /* / '�'*/, 0x009b /*'�'*/, + /* 156 (9c) */ 0x009c /*'�'*/, 0x009d /*'�'*/, 0x009e /*'�'*/, 0x009f /*'�'*/, + /* 160 (a0) */ 't', 0x00a1 /*'�'*/, 0x00a2 /*'�'*/, 0x00a3 /*'�'*/, + /* 164 (a4) */ 0x00a4 /*'�'*/, 0x00a5 /*'�'*/, 0x00a6 /*'�'*/, 's', + /* 168 (a8) */ 'r', 'g', 0x00aa /*'�'*/, 'e', + /* 172 (ac) */ 'u', 0x0047 /* G '�'*/, 0x00ae /*'�'*/, '}' /*'�'*/, + /* 176 (b0) */ 0x00b0 /*'�'*/, 0x0306 /*'�'*/, 'j', 0x0323 /*unterpunkt'�'*/, + /* 180 (b4) */ 'y', 'm', 'd', 'w', + /* 184 (b8) */ 0x00b8 /*'�'*/, 'p', 'b' /*'�'*/, 0x005b /* [ '�'*/, + /* 188 (bc) */ 0x005d /*]'�'*/, 0x00bd /*'�'*/, 0x00be /*'�'*/, '{' /*'�'*/, + /* 192 (c0) */ 0x00c0 /*'�'*/, 0x00BA /* degree '�'*/, 'l'/* 0x006c */, 'v' /*'�'*/, + /* 196 (c4) */ 'f', 'x', 0x02bc/* apostrophe */, 0x00c7 /*'�'*/, + /* 200 (c8) */ 0x00c8 /*'�'*/, ';' /*'�'*/, ' ' /*'�'*/, 0x00cb /*'�'*/, + /* 204 (cc) */ 0x00cc /*'�'*/, 0x00cd /*'�'*/, 'Q', 'q', + /* 208 (d0) */ 0x0304, 0x0305 /* intf uses: 0x00af oberstrich'�'*/, 0x00d2 /*'�'*/, 0x00d3 /*'�'*/, + /* 212 (d4) */ 0x00d4 /*'�'*/, 0x00d5 /*'�'*/, 0x00d6 /*'�'*/, 'V', + /* 216 (d8) */ 0x00d8 /*'�'*/, 0x00d9 /*'�'*/, 0x007c /*|'�'*/, 0x005c /*`'�'*/, + /* 220 (dc) */ 0x00dc /*'�'*/, 0x00dd /*'�'*/, 0x00de /*'�'*/, 0x0302 /*'�'*/, + /* 224 (e0) */ 0x00e0 /*'�'*/, 0x00e1 /*'�'*/, ' ' /*'�'*/, 'W', + /* 228 (e4) */ 0x00e4 /*'�'*/, 'R', 0x00e6 /*'�'*/, 'Y', + /* 232 (e8) */ 'U', 'i', 'S', 'D', + /* 236 (ec) */ 'F', 'G', 0x00ee /*'�'*/, ' ' /*'�'*/, + /* 240 (f0) */ 0x00f0 /*'�'*/, 'L', ':' /*'�'*/, 0x00f3 /*'�'*/, + /* 244 (f4) */ 0x00f4 /*'�'*/, 0x03DB /*stigma*/, 'n', 0x00f7 /*'�'*/, + /* 248 (f8) */ 'J', 0x00f9 /*'�'*/, 'h', 'k', + /* 252 (fc) */ 0x00fc /*'�'*/, 0x00fd /*'�'*/, 0x00fe /*'�'*/, 0x00ff /*'?'*/ +}; + +int main(int argc, char **argv) { + char robinson = (argc > 1 && !strcmp(argv[1], "-robinson")); + unsigned char c; + while (read(0, &c, 1)) { + if (c == 'v' && robinson) c = '~'; + putchar((unsigned char)(symbolGreekTranslation[c] >> 8)); + putchar((unsigned char)(symbolGreekTranslation[c] & 0x00FF)); + } + return 0; +} diff --git a/modules/bibles/grc/tr/Makefile b/modules/bibles/grc/tr/Makefile new file mode 100644 index 0000000..8742dc1 --- /dev/null +++ b/modules/bibles/grc/tr/Makefile @@ -0,0 +1,30 @@ +.SUFFIXES: .UTR .imp +SOURCE = TR-PRSD +TARGET = tr +SOURCES = $(shell find $(SOURCE)/ -name '*.UTR') +IMPS = $(SOURCES:%.UTR=%.imp) + +all: $(TARGET)/ + +$(SOURCE).ZIP: + wget -O $(SOURCE).ZIP https://sites.google.com/a/wmail.fi/greeknt/home/greeknt/$(SOURCE).ZIP?attredirects=0 + +$(SOURCE)/: $(SOURCE).ZIP + unzip $(SOURCE).ZIP -d $(SOURCE)/ + +.UTR.imp: + ../whnu/convert.sh $< > $@ + +$(SOURCE).imp: $(SOURCE)/ ../whnu/convert.sh symbgreektoutf16 $(IMPS) + cat $(IMPS) > $(SOURCE).imp + +$(TARGET)/: $(SOURCE).imp + mkdir -p $(TARGET)/ + imp2vs $(SOURCE).imp -z -o $(TARGET)/ + +clean: + rm -rf $(SOURCE).ZIP $(SOURCE) $(SOURCE).imp symbgreektoutf16 $(TARGET) + +symbgreektoutf16: ../../../../misc/symbgreektoutf16.c + gcc -o $@ $< + diff --git a/modules/bibles/grc/whnu/Makefile b/modules/bibles/grc/whnu/Makefile new file mode 100644 index 0000000..27e9d8f --- /dev/null +++ b/modules/bibles/grc/whnu/Makefile @@ -0,0 +1,30 @@ +.SUFFIXES: .UWH .imp +SOURCE = WH27PRSD +TARGET = whnu +SOURCES = $(shell find $(SOURCE)/ -name '*.UWH') +IMPS = $(SOURCES:%.UWH=%.imp) + +all: $(TARGET)/ + +$(SOURCE).ZIP: + wget -O $(SOURCE).ZIP https://sites.google.com/a/wmail.fi/greeknt/home/greeknt/$(SOURCE).ZIP?attredirects=0 + +$(SOURCE)/: $(SOURCE).ZIP + unzip $(SOURCE).ZIP -d $(SOURCE)/ + +.UWH.imp: + ./convert.sh $< > $@ + +$(SOURCE).imp: $(SOURCE)/ convert.sh symbgreektoutf16 $(IMPS) + cat $(IMPS) > $(SOURCE).imp + +$(TARGET)/: $(SOURCE).imp + mkdir -p $(TARGET)/ + imp2vs $(SOURCE).imp -z -o $(TARGET)/ + +clean: + rm -rf $(SOURCE).ZIP $(SOURCE) $(SOURCE).imp symbgreektoutf16 $(TARGET) + +symbgreektoutf16: ../../../../misc/symbgreektoutf16.c + gcc -o $@ $< + diff --git a/modules/bibles/grc/whnu/convert.sh b/modules/bibles/grc/whnu/convert.sh new file mode 100755 index 0000000..d4c443a --- /dev/null +++ b/modules/bibles/grc/whnu/convert.sh @@ -0,0 +1,28 @@ +#!/bin/bash +FILENAME=$(basename $1) +BOOK=${FILENAME%.*} +vn="0" +sed -e 's/ //g' $1| while read line; do + for i in $line; do + if [[ "$i" =~ [0-9]+:[0-9]+ ]]; then + echo; echo \$\$\$$BOOK $i + elif [[ "$i" =~ \{.*\} ]]; then + echo -n "$txt " + sm="" + elif [[ "$i" =~ [0-9]+ ]]; then + if [[ "$i" > "5624" ]]; then sm=$i + else lemma=$i; fi + elif [[ "$i" = "|" ]]; then + ((vn = vn + 1)) + if [[ "$vn" > "1" ]]; then echo -n ""; fi + if [[ "$vn" < "3" ]]; then echo -n "" + else vn="0"; fi + elif [[ "$i" =~ [a-z]+ ]]; then + txt=$(echo $i |./symbgreektoutf16 -robinson| uconv --from-code UTF-16 --to-code UTF-8) + else + echo NOT HANDLED!: $i + fi + done +done -- cgit