diff options
author | Troy A. Griffitts <scribe@crosswire.org> | 2005-11-23 18:23:56 +0000 |
---|---|---|
committer | Troy A. Griffitts <scribe@crosswire.org> | 2005-11-23 18:23:56 +0000 |
commit | be8bc1114804330e32b991cdf13a1ce25a0f0067 (patch) | |
tree | 70b5f367b92d0c11cda78fc05369e8efa2d837cb | |
parent | 89e2be0015cae6a94c55c511ddc9863e16ae1314 (diff) | |
download | sword-tools-be8bc1114804330e32b991cdf13a1ce25a0f0067.tar.gz |
Added TLGU conversion stuff
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@53 07627401-56e2-0310-80f4-f8cd0041bdcd
-rw-r--r-- | modules/lxxm/lxxm.jar | bin | 26328 -> 1440 bytes | |||
-rw-r--r-- | modules/lxxm/lxxm.jpx.local | 21 | ||||
-rw-r--r-- | tlgu/Makefile | 5 | ||||
-rw-r--r-- | tlgu/README | 1 | ||||
-rwxr-xr-x | tlgu/convert.sh | 13 | ||||
-rw-r--r-- | tlgu/tlgcodes.h | 322 | ||||
-rw-r--r-- | tlgu/tlgu.1 | 192 | ||||
-rw-r--r-- | tlgu/tlgu.c | 1344 | ||||
-rw-r--r-- | tlgu/tlgu.h | 221 | ||||
-rw-r--r-- | tlgu/tlgu.html | 565 | ||||
-rw-r--r-- | tlgu/tlgu.pdf | bin | 0 -> 18051 bytes | |||
-rw-r--r-- | tlgu/tlgu.ps | 421 |
12 files changed, 3096 insertions, 9 deletions
diff --git a/modules/lxxm/lxxm.jar b/modules/lxxm/lxxm.jar Binary files differindex ebbad2b..4ac670b 100644 --- a/modules/lxxm/lxxm.jar +++ b/modules/lxxm/lxxm.jar diff --git a/modules/lxxm/lxxm.jpx.local b/modules/lxxm/lxxm.jpx.local index bbf4d02..96b1b1d 100644 --- a/modules/lxxm/lxxm.jpx.local +++ b/modules/lxxm/lxxm.jpx.local @@ -1,20 +1,23 @@ build.menu.1[0]=com.borland.jbuilder.build.ProjectBuilder$ProjectBuildAction;make build.menu.2[0]=com.borland.jbuilder.build.ProjectBuilder$ProjectBuildAction;rebuild -content.layout.xml.[0]=<project-layout><tab_container selected_index="0"><tab node_name="src/lxxm/LXXMConv.java"/><tab node_name="[/space/usr/local/jdk1.5.0_04/src.zip]/java/lang/String.java"/></tab_container></project-layout> +content.layout.xml.[0]=<project-layout><tab_container selected_index="0"><tab node_name="src/lxxm/LXXMConv.java"/></tab_container></project-layout> debug.NoTracingClasses.1[0]=16 java.lang.Object1 1 -1 debug.NoTracingClasses.2[0]=21 java.lang.ClassLoader1 1 -1 debug.NoTracingClasses.3[0]=16 java.lang.String1 1 -1 debug.SplitThreadsAndDataView[0]=0 -history.files.active[0]=src/lxxm/LXXMConv.java,F;19,2247 -history.files.open.1[0]=src/lxxm/LXXMConv.java,F;19,2247 -history.files.open.2[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/java/lang/String.java,F;1743,69911 +history.files.active[0]=src/lxxm/LXXMConv.java,F;14,289 +history.files.open.1[0]=src/lxxm/LXXMConv.java,F;14,289 import.optimize.legacyPropertiesRead[0]=1 packagebrowser.lastClassName[0]=lxxm.LXXMConv sys.Author[0]= sys.DefaultPackage[0]=lxxm -sys.ReopenHistory.1[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/java/io/BufferedReader.java -sys.ReopenHistory.2[0]=src/greekconverter/BetacodeToUnicode.java -sys.ReopenHistory.3[0]=classes/greekconverter/UC.class -sys.ReopenHistory.4[0]=src/lxxm/Untitled1.java -team.lastScanned[0]=1128944865968 +sys.ReopenHistory.1[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/javax/xml/xpath/XPath.java +sys.ReopenHistory.2[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/javax/xml/xpath/XPathFactory.java +sys.ReopenHistory.3[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/javax/xml/xpath/XPathExpression.java +sys.ReopenHistory.4[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/java/lang/String.java +sys.ReopenHistory.5[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/java/io/BufferedReader.java +sys.ReopenHistory.6[0]=src/greekconverter/BetacodeToUnicode.java +sys.ReopenHistory.7[0]=classes/greekconverter/UC.class +sys.ReopenHistory.8[0]=src/lxxm/Untitled1.java +team.lastScanned[0]=1132281572356 wizard.package.package_history.1[0]=lxxm diff --git a/tlgu/Makefile b/tlgu/Makefile new file mode 100644 index 0000000..f0cdbb4 --- /dev/null +++ b/tlgu/Makefile @@ -0,0 +1,5 @@ +tlgu: tlgcodes.h tlgu.c tlgu.h + $(CC) -o tlgu tlgu.c + +clean: + rm -rf *.o tlgu diff --git a/tlgu/README b/tlgu/README new file mode 100644 index 0000000..dbef3a1 --- /dev/null +++ b/tlgu/README @@ -0,0 +1 @@ +man -l tlgu.1
\ No newline at end of file diff --git a/tlgu/convert.sh b/tlgu/convert.sh new file mode 100755 index 0000000..053bf9d --- /dev/null +++ b/tlgu/convert.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# +# usage example: ./convert.sh /mnt/cdrom/PHI7/ddp +# +#./tlgu -r -Z Z:%Z/y:%y/z:%z/a%a/b%b/c%c/d%d/n%n/x%x/D%D/L%L/R%R/T%T/ $i tmp.out +# + +echo "" > all.out +for i in $1[0-8]*.txt +do +./tlgu -e 0 -Z \$\$\$/%d/%c/%Z/%y/%z\\n $i tmp.out +cat tmp.out >> all.out +done diff --git a/tlgu/tlgcodes.h b/tlgu/tlgcodes.h new file mode 100644 index 0000000..4586048 --- /dev/null +++ b/tlgu/tlgcodes.h @@ -0,0 +1,322 @@ +/* tlgcodes.h + * + * Copyright (C) 2004 Dimitri Marinakis + * + * Licensed under the terms of the GNU General Public License. + * ABSOLUTELY NO WARRANTY. + * See the file `COPYING' in this directory. + * + * Code arrays for escape sequences + * See: handle_escape_codes + * + */ + +/* Punctuation codes (%) 0 - 170 */ +/*FIXME: check or find symbols for + %29, %31, %47-%49, (%50 - %81), %98, %105, %110, <%128>, %138, %140, %144, + %145, %157, %171 +*/ +#define MAX_PUNCTUATION 180 +unsigned int punctuation[] = { + 0x2020, 0x003f, 0x002a, 0x002f, 0x0021, 0x007c, 0x003d, 0x002b, 0x0025, 0x0026, + 0x003a, 0x00b7, 0x203b, 0x2021, 0x00a7, 0x02c8, 0x00a6, 0x2016, 0x0027, 0x002d, + 0x0301, 0x0300, 0x0302, 0x0308, 0x0303, 0x0327, 0x0304, 0x0306, 0x0308, 0x0324, + 0x1fbd, 0x1fbf, 0x1ffd, 0x1fef, 0x1fc0, 0x1fce, 0x1fde, 0x1fdd, 0x1fdf, 0x00a8, + 0x02d8, 0x2013, 0x2696, 0x00d7, 0x2693, 0x2694, 0x2695, 0x003d, 0x0025, 0x0025, + 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, + 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, + 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, + 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, + 0x0025, 0x0485, 0x0486, 0x1dc0, 0x0307, 0x1dc1, 0x035c, 0x0308, 0x00bb, 0x0025, + 0x003b, 0x0023, 0x2018, 0x005c, 0x005e, 0x2016, 0x224c, 0x007e, 0x00b1, 0x00b7, + 0x25cb, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, + 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x032f, 0x0302, 0x2020, + 0x0307, 0x0025, 0x1fee, 0x1fcd, 0x1fcf, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, + 0x2261, 0x2697, 0x2510, 0x0025, 0x0025, 0x0025, 0x00b7, 0x030a, 0x030c, 0x0328, + 0x007c, 0x002d, 0x2219, 0x002d, 0x2234, 0x2235, 0x0025, 0x0025, 0x2042, 0x00d7, + 0x002d, 0x00f7, 0x0338, 0x00b6, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, + 0x0359, 0x002f, 0x1fbd, 0x1ffe, 0x1ffd, 0x1fef, 0x1fc0, 0x0313, 0x0314, 0x0323, + }; + +/* Text Symbols (#) 0 - 1528 + * May be preceded by upper case (*) this table contains only upper case characters + */ +/* FIXME: #6, #8, #19, #20, #24, #25, #27, #30, #31, #53, #54, #56, #59, + #61, #62, #64, #66, #68, #74, #87, #101-#134, #136-#150, #152-#199 + #240-end */ +#define MAX_TEXT_SYMBOLS 1529 +unsigned int text_symbols[] = { + 0x0374, 0x03de, 0x03da, 0x03d8, 0x03d8, 0x03e0, 0x005f, 0x0023, 0x005f, 0x0301, + 0x03fd, 0x03ff, 0x2014, 0x203b, 0x2e16, 0x003e, 0x03f9, 0x002f, 0x003c, 0x0300, + 0x2220, 0x0053, 0x0375, 0x0039, 0x0023, 0x0282, 0x2e0f, 0x221a, 0x0023, 0x00b7, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0387, 0x003a, 0x22ee, 0x0023, 0x2059, 0x0023, 0x0023, 0x0023, 0x2283, + 0x0399, 0x0023, 0x0023, 0x0394, 0x0023, 0x0397, 0x0023, 0x03a7, 0x0023, 0x039c, + 0x002e, 0x0387, 0x02d9, 0x003a, 0x22ee, 0x002e, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0308, 0x0027, 0x1ffd, 0x1fef, 0x1fc0, 0x1ffe, 0x1fbd, 0x0023, 0x0023, 0x0023, + 0x2014, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x00f7, 0x22d6, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x2216, 0x2283, 0x0023, 0x223c, 0x0023, 0x0023, 0x2248, + 0x0023, 0x0023, 0x0023, 0x223d, 0x0023, 0x0023, 0x0023, 0x0023, 0x03fc, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x02d9, 0x222b, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x221e, 0x2014, 0x0023, 0x0023, 0x0023, 0x0023, 0x2310, 0x0023, 0x0023, 0x0023, + 0x0043, 0x2282, 0x20de, 0x0375, 0x0023, 0x05d0, 0x0023, 0x0023, 0x0023, 0x2309, + 0x0023, 0x2229, 0x0023, 0x2282, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x2643, 0x25a1, 0x264f, 0x264d, 0x2640, 0x2650, 0x2644, 0x2609, 0x263f, 0x263e, + 0x2642, 0x2651, 0x264c, 0x2648, 0x264e, 0x264a, 0x264b, 0x2653, 0x2652, 0x2649, + 0x260d, 0x263d, 0x260c, 0x2605, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x260b, 0x2651, 0x0023, 0x264c, 0x264e, 0x2126, 0x2127, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x2321, 0x0023, 0x2e0e, 0x003e, 0x2e0e, 0x2e0e, 0x003d, 0x2e0e, 0x0023, 0x2e0e, + 0x2e0e, 0x2e0e, 0x2e0e, 0x2e0e, 0x2e0e, 0x2251, 0x0023, 0x0023, 0x0023, 0x2022, + 0x2629, 0x2629, 0x2627, 0x003e, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0283, 0x2e10, 0x2e11, 0x2e10, 0x2e11, 0x01a7, 0x2e0f, 0x2573, 0x00b7, + 0x2014, 0x007c, 0x2627, 0x0023, 0x0023, 0x2627, 0x2138, 0x2192, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0283, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x0023, 0x0023, 0x2609, 0x0023, 0x2e0e, 0x2e0f, 0x2e14, 0x2e15, 0x203b, 0x0023, + 0x0023, 0x0023, 0x03fd, 0x0023, 0x0023, 0x0023, 0x0023, 0x2241, 0x0023, 0x2191, + 0x2629, 0x0023, 0x0023, 0x2e13, 0x2297, 0x271b, 0x2190, 0x02c6, 0x0023, 0x0023, + 0x0023, 0x035c, 0x2e12, 0x03da, 0x0311, 0x0023, 0x0023, 0x0023, 0x01b7, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x2263, + 0x2237, 0x25cc, 0x05e6, 0x05d1, 0x22bb, 0x2208, 0x2629, 0x0023, 0x0023, 0x0023, + 0x0023, 0x2191, 0x0305, 0x2319, 0x2423, 0x0023, 0x0023, 0x03dc, 0x0023, 0x0023, + 0x03f9, 0x0023, 0x22a2, 0x0023, 0x0393, 0x2309, 0x0023, 0x03a6, 0x03a1, 0x039c, + 0x0399, 0x0398, 0x2228, 0x039d, 0x2127, 0x0396, 0x0023, 0x0395, 0x2210, 0x0023, + 0x0023, 0x22cf, 0x0023, 0x039b, 0x22b8, 0x0036, 0x0039, 0x230b, 0x0394, 0x2207, + + 0x2203, 0x0023, 0x0023, 0x03a0, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x03a9, 0x0023, 0x03bb, + 0x0023, 0x22a3, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x2200, 0x039f, 0x039e, + 0x0394, 0x0399, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x2441, 0x03a9, 0x0397, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x03a7, 0x03a4, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0391, 0x0392, 0x03a5, 0x03a8, 0x2044, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x22cf, 0x22d4, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x22b1, 0x22b0, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x223b, + 0x0023, 0x2201, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, 0x0669, + 0x0660, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x2733, 0x2282, 0x2283, 0x03a7, 0x002f, 0x22ba, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x223d, 0x0023, 0x0023, 0x0023, 0x0023, 0x223b, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x003c, 0x0023, 0x0023, + 0x2116, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x003c, 0x0282, 0x00f7, 0x005c, 0x0023, 0x0023, 0x0023, 0x0023, + 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023 + }; + +/* Quotation Marks (") 0 - 60 */ +/* FIXME: check pairs, find symbols for "50-"69 */ +#define MAX_QUOTATION 61 +unsigned int quotation_open[MAX_QUOTATION]; +unsigned int quotation_open_symbol[] = { + 0x201c, 0x201e, 0x201e, 0x2018, 0x201a, 0x201a, 0x00ab, 0x3008, 0x201c, 0x0022, + 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, + 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, + 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, + 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, + 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, + 0x0022 + }; +unsigned int quotation_close_symbol[] = { + 0x201d, 0x201d, 0x201d, 0x2019, 0x2019, 0x2019, 0x00bb, 0x3009, 0x201e, 0x0022, + 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, + 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, + 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, + 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, + 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, + 0x0022 + }; + + +/* Brackets ([) 0 - 69 */ +/* FIXME: check pairs, find symbols for [9, [10, [13, [14, [15, [33-[69 */ +#define MAX_BRACKET 70 +unsigned int bracket_open[MAX_BRACKET]; +unsigned int bracket_open_symbol[] = { + 0x005b, 0x0028, 0x003c, 0x007b, 0x301a, 0x230a, 0x2308, 0x2308, 0x300c, 0x005b, + 0x005b, 0xfe59, 0x2192, 0x005b, 0x005b, 0x005b, 0x3016, 0x300e, 0x300a, 0x005b, + 0x23a7, 0x239f, 0x23a8, 0x23a9, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, + 0x239b, 0x239c, 0x239d, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, + 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, + 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, + 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, + }; +unsigned int bracket_close_symbol[] = { + 0x005d, 0x0029, 0x003e, 0x007d, 0x301b, 0x230b, 0x2309, 0x230b, 0x300d, 0x005d, + 0x005d, 0xfe5a, 0x2190, 0x005d, 0x005d, 0x005d, 0x3017, 0x300f, 0x300b, 0x005d, + 0x23ab, 0x23aa, 0x23ac, 0x23ad, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, + 0x239e, 0x239f, 0x23a0, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, + 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, + 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, + 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, + }; + +/* Quasi-Brackets (<) 0 - 100 */ +/* FIXME: decide on handling: for each letter, appropriate superscripts and subscripts should + * be combined with every letter + */ +#define MAX_QUASI_BRACKET 101 +unsigned int quasi_bracket_open[MAX_QUASI_BRACKET]; +unsigned int quasi_bracket_open_symbol[] = { + 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, + 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, + 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, + 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, + 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, + 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, + 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, + 0x25ba, 0x0028, 0x0028, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, + 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, + 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, + 0x003c, + }; +unsigned int quasi_bracket_close_symbol[] = { + 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, + 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, + 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, + 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, + 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, + 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, + 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, + 0x25c4, 0x0029, 0x0029, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, + 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, + 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, + 0x003e, + }; + +/* Non-Text ({) 0 - 71 */ +/*FIXME: decide on representation */ +#define MAX_NON_TEXT 72 +unsigned int non_text_open[MAX_NON_TEXT]; +unsigned int non_text_open_symbol[] = { + 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, + 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, + 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, + 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, + 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, + 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, + 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, + 0x007b, 0x007b, + }; +unsigned int non_text_close_symbol[] = { + 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, + 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, + 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, + 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, + 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, + 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, + 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, + 0x007d, 0x007d, + }; diff --git a/tlgu/tlgu.1 b/tlgu/tlgu.1 new file mode 100644 index 0000000..d3cc149 --- /dev/null +++ b/tlgu/tlgu.1 @@ -0,0 +1,192 @@ +.TH tlgu 1 "Feb, 2005" "Version 1.1" "TLG to Unicode Converter" +.SH NAME + +tlgu \- convert TLG (D) CD-ROM txt files to Unicode + +.SH SYNOPSIS +.B tlgu +[ +.I options +] +.I input_file +.I output_file + +.SH DESCRIPTION +.B tlgu +will convert an \fIinput_file\fP from Thesaurus Linguae Graeca (TLG) representation +to a Unicode (UTF-8) \fIoutput_file\fP. The TLG representation consists of \fBbeta-code\fP +text and \fBcitation\fP information. + +.SH OPTIONS +.TP +.B \-b +inserts a form feed and citation information (levels a, b, c, d) on every "book" citation +change. By default the program will output line feeds only (see also \fB\-p\fP). +.TP +.B \-p +observes paging instructions. +By default the program will output line feeds only. +.TP +.B \-r +primarily Roman text. Some TLG texts, notably doccan1.txt and doccan2.txt are mainly +roman texts lacking explicit language change codes. Setting this option will force +a change to roman text after each citation block is encountered. +.TP +.B \-v +highest-level reference citation is included before each text line (v-level) +.TP +.B \-w +reference citation is included before each text line (w-level) +.TP +.B \-x +reference citation is included before each text line (x-level) +.TP +.B \-y +reference citation is included before each text line (y-level) +.TP +.B \-z +lowest-level reference citation is included before each text line (z-level). +.sp 1 +.TP +.B \-B +inserts blank space (a tab) before each and every line. +.TP +.B \-C +citation debug information is output. +.TP +.B \-S +special code debug information is output. +.TP +.B \-V +block processing information is output (verbose). +.TP +.B \-W +each work (book) is output as a separate file in the form output_file-xxx.txt + +.SH HISTORY AND INTENDED USE +The purpose of \fBtlgu\fP is to translate binary TLG-format files into readable and editable text. +It is based on an earlier program written in 80x86 assembly language (1996) outputting codes for +a home-made font which used the prevalent hellenic font encodings of that time complemented +by dead accent characters - not very attractive, but readable. +.sp 1 +Then came Unicode and a plethora of accented character glyphs; nice-looking but +with the well-known drawback that special processing is needed to do wild-card searches. +Nice polytonic fonts have now been made available (Cardo, Gentium, Athena, Athenian, +Porson) and, surely, these will be expanded as special-use code points are included +in the Unicode definition (musical symbols, other special symbols) and more fonts will be created. +.sp 1 +So, at this point in time, \fBtlgu\fP will crunch a file which has been formatted +according to the published TLG-D format and produce codes for most glyphs +generally available. No attempt has been made to introduce multi-character sequences +or formatting codes (font changes). If a code has not been defined, the program will output +the respective "code family" glyph. You may use the \fB\-S\fP option to check such codes +against the published beta code definition. +.sp 1 +You may not like the character output for a specific code. Check out the \fBtlgcodes.h\fP file +containing the special symbol and punctuation codes and select one to suit you better. It will +probably be a while before the beta to Unicode correspondence settles down. + + +.SH EXAMPLES +.B ./tlgu -r DOCCAN2.TXT doccanu.txt +Translate the TLG canon to a unicode text file. Note the use of the \fB-r\fP option (this file +expects Roman as the default font). +.TP +.B ./tlgu -x -y -z TLG1799.TXT tlg1799u.txt +Generate a continuous file with the texts of granpa Euclides. Available citations (-x -y -z) +are Book//demonstratio/line as shown in the respective "cit" field of doccan2.txt. +.TP +.B ./tlgu -b -B TLG1799.TXT tlg1799u.txt +Generate the same texts, this time with a page feed and book citation information on the first +page of each book and a tab before each line (use with OOo versions earlier than 1.1.4). +.TP +.B ./tlgu -C TLG1799.TXT tlg1799u.txt +See how the citation information changes within each TLG block. +.TP +.B ./tlgu -S TLG1799.TXT tlg1799u.txt | sort > symbols1799.txt +Check out the symbols used in a work. Book and x, y, z references are printed on a separate +line for each symbol. Sort / grep the output to locate specific symbols of interest; save in +a file for later use. +.TP +.B ./tlgu -W TLG0006.TXT tlg0006u +Will produce separate files for each work, named tlg006u-001.txt etc. + +.SH POST-PROCESSING EXAMPLES +I use the OpenOffice suite for most of my work. This example shows one of many possible +ways of using the search and replace facility to create a readable version of the Suda lexicon. +.TP +.B ./tlgu -B TLG4085.TXT tlg4085u.txt +A Unicode file with the text is created +.TP +.B Open the generated file with OOo: +File | Open | Filename: tlg4085u.txt, +File Type: Text Encoded \-\- Press Open +.sp 1 +The ASCII Filter Options window appears. Select the Unicode (UTF-8) character set and +a proper Unicode font installed in your machine (e.g. Cardo). Press OK. +.TP +.B Replace angle brackets with expanded text +Lexicon terms are enclosed in <angle brackets>. The actual beta codes indicate the use of +expanded text for emphasis. Select Edit | Find & Replace. The \fBFind & Replace\fP window appears. +.sp 1 +In the \fBSearch For\fP field, type the following expression: \fB<[^<>]*>\fP +This means "find any characters between angle brackets, not including angle brackets". +.sp 1 +In the \fBReplace With\fP window insert a single ampersand: \fB&\fP +This means that we need to \fBadd\fP formatting information (this case) or additional text to +the text found. Press \fBFormat...\fP and select the \fBPosition\fP tab; select Spacing +Expanded by 2.0 points. Press OK. +.sp 1 +Check the \fBRegular Expressions\fP box and press \fBReplace All\fP. +.sp 1 +You may now replace the angle brackets with nothings. +.sp 1 +Repeat the above procedure for titles enclosed in {braces}. Write a macro... +.TP +.B Other useful information +In the "Execute" tab of the "Properties" window of my KDesktop Link to Application +I have the following command (single line): +.br +\fBLC_CTYPE=el_GR.UTF-8 /whereitsat/OpenOffice.org1.1.x/soffice\fP +.br +The prefix, an environment variable, allows you to use the same program with different locales; +in this case, hellenic Unicode (UTF-8). +.sp 1 +I put my default locale and keyboard definitions in my \fB.profile\fP: +.br +.na +.B export LC_CTYPE=el_GR.UTF-8 +.br +.na +.B setxkbmap us+el polytonic -option grp:ctrl_shift_toggle +.br +.sp 1 +This way multi-lingual text can be entered; keyboard layout switching is done by pressing Ctrl/Shift. +.SH REFERENCES +There are several texts describing the internal representation of \fBPHI\fP and +\fBTLG\fP text, ID data, citation data and index files. The originator of this +format is the Packard Humanities Institute. The TLG is maintained by UCI \- see +\fBwww.tlg.uci.edu\fP \- where you may find the \fBTLG Beta Code Manual\fP and the +\fBTLG Beta Code Quick Reference Guide\fP. +.sp 1 +Unicode consortium publications pertaining to the codification +of characters used in Hellenic literature, scientific and musical texts. +.sp 1 +The OpenOffice suite (\fBwww.openoffice.org\fP) includes a word processor that you +can use to load, process and create new polytonic texts. + +.SH COPYRIGHT +Copyright (C) 2004, 2005 Dimitri Marinakis (dm ssa gr). + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License (version 2) as published by +the Free Software Foundation. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA diff --git a/tlgu/tlgu.c b/tlgu/tlgu.c new file mode 100644 index 0000000..29abe5c --- /dev/null +++ b/tlgu/tlgu.c @@ -0,0 +1,1344 @@ +/* tlgu: Translates TLG (D) text files to Unicode text + * + * Copyright (C) 2004, 2005 Dimitri Marinakis + * + * Licensed under the terms of the GNU General Public License. + * ABSOLUTELY NO WARRANTY. + * See the file `COPYING' in this directory. + * + * Usage: + * tlgu [options] infile outfile + * + * Options: + * -r -- primarily Roman text; default betastate = ROMAN, reset on every ID code + * -vwxyz -- reference citations are printed in the form xxx.xxx...xxx + * -(a)b(cd) -- description citations are printed + * -B -- output blank space (tab) after each new line (beginning of line) + * -p -- pagination is observed, otherwise book lines are printed continuously + * -C -- citation debug information is printed + * -S -- special code debug information is printed + * -T -- bracket debug information is printed + * -V -- processing debug information is printed + * -W -- multiple output files, one for each work + * + * Returns: exit code 1 if unsuccesful + * + * Make: gcc tlgu.c -o tlgu + * + * History: This is a re-write of a DOS program (tlgft.asm) written several + * years ago to translate Hellenic texts distributed on the TLG CD-ROM from + * "beta code" to something readable, editable and printable. + * + * Pointers / References: + * TLG Project - www.tlg.uci.edu + * PHI CD ROM Format Description, Packard Humanities Institute, 19 April 1992 + * Beta code reference - Text versions: tlgbeta.txt or tlgcode.txt + * a .pdf version is also available. + * ID locator reference - Text version tlgcodes.txt + * + * dm: 14-Jun-2001 ELOT-928 + * 14-Jun-2004 Unicode + * 26-Jun-2004 Command-line options + * 26-Feb-2005 Output file separation (-W option) + * 06-Mar-2005 Latin accent characters added (without parentheses) + * 21-Nov-2005 Added -Z -e and imported into sword-tools SVN repository + */ + +#include <stdio.h> +#include <string.h> +#include "tlgu.h" +#include "tlgcodes.h" + +/****************** PROTOTYPES FROM THE TOP DOWN *******************/ + +int tlgu (char * input_file, char * output_file); +void output_utf(int ucode); +void output_string(char *outstr); +int process_beta (int input_count); +void beta_code(int input_count); +int id_code(int input_count); +void store_accents(unsigned char bufferchar); +const char *resolve_cite_format(const char *cformat); + +/****************** PROGRAM VERSION INFORMATION *******************/ +char *prog_version="1.2"; + +/****************** COMMAND LINE OPTIONS **************************/ +int opt_roman = 0; +int opt_page = 0; +int opt_blank = 0; +int opt_acit = 0; +int opt_bcit = 0; +int opt_ccit = 0; +int opt_dcit = 0; +int opt_cit_id = 0; /* combines a, b, c */ +int opt_vcit = 0; +int opt_wcit = 0; +int opt_xcit = 0; +int opt_ycit = 0; +int opt_cprefix = 0; +char cformat[253]; +int opt_ecit_blank = 0; +char ecite[253]; +int opt_zcit = 0; +int opt_verbose = 0; +int opt_debug_bracket = 0; +int opt_debug_cit = 0; +int opt_debug_special = 0; +int opt_multiple = 0; + +/****************** GLOBAL VARIABLES *******************************/ + +int iptr = 0; /* input buffer pointer, reset before every read */ +int optr = 0; /* output buffer pointer, reset after every write */ +unsigned char input_buffer[INRECSIZE]; +unsigned char output_buffer[OUTRECSIZE]; +#define MAXFILELEN 256 + +/************ GLOBAL BETA CODE PROCESSING VARIABLES **************/ + +unsigned int outcode; +int betastate; /* translation state machine */ +int previous_state; /* needed for symbol translations */ +int start_new_line = 0; /* needed for symbol translations */ +int book_change = 0; /* needed for symbol translations */ +int accents; /* holds accent combinations */ +char *accented_chars = "AEHIOUWR"; +char *accent_chars = ")(+/\\=|"; +char *latin_accent_chars = "+/\\=|"; +char *escape_codes = "$&%\"@#^[]<>{}"; +char *punctuation_codes = " .,:;_\"%{}$&"; /* used by which_sigma */ +char previous_bcit[52][32]; /* holds previous work (book) citation */ + +/****************** GLOBAL DESCRIPTOR VARIABLES *****************/ + +/* + Space is reserved for descriptive data as follows: + + citations, binary component -- z, y, x, w, v, n (1 to 16383) + citations, ascii component -- a-z (1 to 15 characters + null, only a-d, n, v-z are actually used) + descriptors, binary component -- a-z (1 to 16383) + descriptors, ascii component -- a-z (1 to 31 characters + null) + + Citations --- + a - author citation + b - work citation + c - preferred abbreviation for the work + d - preferred abbreviation for the author + + n - if present signifies a document within a work + when it changes, v-z are nulled but are then independent + if n is not present, a change in an upper level nulls out the rest + + v-z hierarchical citation levels, high to low + + v + w + x - (chapter) + y - (verse) (book) + z - line + + Descriptions --- + + z - comment sequence number within a work + + In the common data structures below, citations will hold the first 26 positions (0-25) + while descriptors will hold the next 26 positions. +*/ +int icitation[52]; +char citation[52][32]; +int id_level; /* holds translated current id level as an index to ID arrays */ +int id_char; /* holds the pointer for the ascii part of the ID arrays */ +int id_command; /* holds the current instruction for ID handling */ +int id_process; /* if non-zero, command must be processed */ + + +/****************** HANDLE ARGUMENTS AND SYNTAX *******************/ + +void usage_info(void) +{ + printf("\ntlgu: TLG beta code file to Unicode translator ver. %s\n", prog_version); + printf("\ntlgu: Copyright (C) 2004, 2005 Dimitri Marinakis"); + printf("\ntlgu: This program is free software; you are encouraged to redistribute it under"); + printf("\ntlgu: the terms of the GNU General Public License.\n"); + printf("\ntlgu: This program comes with ABSOLUTELY NO WARRANTY. See the GNU General Public"); + printf("\ntlgu: License (e.g. in the file named `COPYING') for more details.\n"); + printf("\ntlgu: Syntax: [-options...] tlgu beta_code_file text_file\n\n"); + printf("tlgu: -r -- primarily Roman text; default betastate = ROMAN, reset on every ID code\n"); + printf("tlgu: -v -w -x -y -z -- work reference citations are printed in the form xxx.xxx...xxx\n"); + printf("tlgu: -Z <custom_cite_format_prefix> -- use special codes %%v %%w %%y %%z in string\n"); + printf("tlgu: -e <custom_blank_cite_seg_string> -- e.g. \"[NONE]\" instead of default \"\"\n"); + printf("tlgu: -b -- books are preceded by a page feed and description citations are printed\n"); + printf("tlgu: -p -- pagination is observed, otherwise book lines are printed continuously\n"); + printf("tlgu: -B -- output blank space (tab) at the beginning of each line\n"); + printf("tlgu: -C -- citation debug information is printed\n"); + printf("tlgu: -S -- special code debug information is printed\n"); + printf("tlgu: -V -- processing debug information is printed\n"); + printf("tlgu: -W -- multiple output files, one for each work (book)\n\n"); +} + +main(int argc, char * argv[]) +{ + unsigned char ucc; /* test variable */ + int idx; + + if (sizeof(ucc) != 1) { + printf("\ntlgu: I need 8-bit characters to work\n"); + exit(1); + } + + if (argc < 3) { + usage_info(); + exit(1); + } + + --argc ; + ++argv ; + + while(argc > 2 && argv[0][0] == '-') { + switch(argv[0][1]) { + case 'W': + opt_multiple =1; + break ; + case 'V': + opt_verbose =1; + break ; + case 'S': + opt_debug_special = 1; + break ; + case 'T': + opt_debug_bracket = 1; + break ; + case 'C': + opt_debug_cit = 1; + break ; + case 'B': + opt_blank = 1; + break ; + case 'p': + opt_page = 1; + break ; + case 'r': + opt_roman = 1; + break ; + case 'a': + opt_acit = 1; + opt_cit_id =1; + break ; + case 'b': + opt_bcit = 1; + opt_cit_id =1; + break ; + case 'c': + opt_ccit = 1; + opt_cit_id =1; + break ; + case 'd': + opt_dcit = 1; + opt_cit_id =1; + break ; + case 'v': + opt_vcit = 1; + break ; + case 'w': + opt_wcit = 1; + break ; + case 'x': + opt_xcit = 1; + break; + case 'y': + opt_ycit = 1; + break ; + case 'z': + opt_zcit = 1; + break; + case 'e': + opt_ecit_blank = 1; + strcpy(ecite, argv[1]); + argc-- ; + argv++ ; + break; + case 'Z': + opt_cprefix = 1; + strcpy(cformat, argv[1]); + argc-- ; + argv++ ; + break; + default: + usage_info() ; + exit(0) ; + } + argc-- ; + argv++ ; + } + + return tlgu(argv[0], argv[1]); +} + + +/****************** FILE READ-WRITE LOOP **************************/ + +int tlgu(char *input_file, char *output_file) +{ + int i; /* counter */ + int j; /* counter */ + int infile; /* input file descriptor */ + int outfile;/* output file descriptor */ + + int icnt; /* input file bytes read in input buffer */ + int ocnt; /* output file bytes written */ + int bytes_to_process; /* bytes read minus bytes already processed */ + + int wehaveinput; /* flag for while */ + int beta_return; /* process beta return code */ + + char new_file[256]; + struct stat filestat; + + /* Open input and output files + */ + infile = open(input_file, O_RDONLY); + if (infile < 0) { + perror("tlgu input file open"); + return(1); + } else { + if (strlen(output_file) < MAXFILELEN-5) { + strcpy(new_file, output_file); + } else { + printf("\ntlgu output filename too long - exiting\n"); + return(1); + } + outfile = open(new_file, O_WRONLY | O_CREAT | O_TRUNC); + if (outfile < 0) { + perror("tlgu output file create"); + close(infile); + return(1); + } + } + + /* Initialize citation + * and descriptor indicators + */ + for (i = 0; i < 52; i++) { + icitation[i] = 0; + for (j = 0; j < 32; j++) { + citation[i][j]=0; + } + } + + /* Initialize beta processing defaults + * e.g. The TLG Canon needs ROMAN as default + * Hellenic should be reset at each ID CODE + */ + if (opt_roman) betastate = ROMAN; + else betastate = HELLENIC; + + /* Read, process and write file blocks, + * Optionally create one file per book (-W) + * Change file mode (equivalent to chmod 644 output_file), + * and return. + * Note: Local deblocking usually yields higher speeds + */ + wehaveinput = 1; + while (wehaveinput) { + /* Read and process beta code in input_buffer */ + icnt = read(infile, input_buffer, sizeof(input_buffer)); + if (icnt == 0) wehaveinput = 0; + + iptr = 0; + while ((icnt > 0) && (iptr < icnt)) { + bytes_to_process = icnt - iptr; + beta_return = process_beta(bytes_to_process); + + /* Write processed data and reset output buffer pointer */ + if (optr > 0) { + ocnt = write(outfile, output_buffer, optr); + optr = 0; + if (ocnt < 0) { + perror("tlgu output file write"); + wehaveinput = 0; + } + } else if (beta_return != -2) { /* no more bytes to write, no book change request */ + if (opt_verbose) printf("\ntlgu: no more bytes to write"); + wehaveinput = 0; /* signal no more input */ + } + if (beta_return == -2) { + /* book change request, close current file and open a new one */ + if (opt_verbose) printf("\ntlgu: book change request: %s", previous_bcit[1]); + if (close(outfile)) return(1); + if (chmod(new_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) return(1); + + /* request file information and delete zero-length files + */ + stat(new_file, &filestat); + if (filestat.st_size == 0) unlink(new_file); + + sprintf(new_file, "%s-%s.txt", output_file, previous_bcit[1]); + outfile = open(new_file, O_WRONLY | O_CREAT | O_TRUNC); + if (outfile < 0) { + perror("tlgu: new_file create"); + close(infile); + return(1); + } + + + } + } + } + + /* Close input and output files, + * make output file readable + */ + close(infile); + + if (close(outfile)) { + perror("tlgu output file close"); + return(1); + } + if (chmod(new_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) { + perror("tlgu output file chmod"); + return(1); + } + if (opt_verbose) printf("\ntlgu: processing complete\n"); + return(0); +} + +/****************** PROCESSING *************************************/ + +/* process_beta: + * Processes <input_count> bytes in <input_buffer> + * Returns: -1 for EOF, -2 for book change + * Changes: iptr + */ +int process_beta (int input_count) +{ + unsigned char inchar; + unsigned int outcode; + int processing; + int iptr_max; /* holds the calculated maximum input pointer value */ + int return_code; /* id_code and beta_code bytes written; error if negative */ + char outstring[511]; + char nstring[253]; + + return_code = 0; + /* A beta code stream includes two kinds of data: + * ID data - always has the high bit set. + * Text data - always has the high bit reset. + */ + processing = 1; + iptr_max = iptr + input_count; + if (opt_verbose) printf("\n\ntlgu: process_beta - %d bytes, iptr = %4.4x, iptr_max = %4.4x", input_count, iptr, iptr_max); + while (processing) { + if ((iptr < INRECSIZE) && (iptr < iptr_max)) { + inchar = input_buffer[iptr++]; + if (optr < OUTRECSIZE) { + if (inchar == 0) { + /* do nothing for null characters */ + } else if (inchar > 0x7F) { + /* ID data - decrement input pointer before processing */ + --iptr; + + /* Reset beta decoding state if roman option specified */ + if (opt_roman) betastate = ROMAN; + + /* Process ID code */ + return_code = id_code(input_count); + if (return_code == -1) { + if (opt_verbose) printf("\ntlgu: EOF while processing id code"); + processing = 0; + } else if (return_code == -2) { + if (opt_verbose) printf("\ntlgu: book change request"); + processing = 0; + } + start_new_line = 1; + } else { + /* text data < 0x80 - decrement input pointer before processing */ + --iptr; + if (start_new_line) { + /* Write info on (book) citation change */ + if (book_change) { + if (opt_cit_id) { + sprintf(outstring, "\n\f[%s] ", citation[0]); + output_string(outstring); + sprintf(outstring, "[%s] ", citation[1]); + output_string(outstring); + sprintf(outstring, "[%s] ", citation[2]); + output_string(outstring); + sprintf(outstring, "[%s]\n", citation[3]); + output_string(outstring); + } + book_change = 0; + } + sprintf(outstring, "\n"); + if (opt_blank) + strcat(outstring, "\t"); + else if (opt_cprefix) { + strcat(outstring, resolve_cite_format(cformat)); + } + else if (opt_vcit || opt_wcit || opt_xcit || opt_ycit || opt_zcit) { + if (opt_vcit) { + if (icitation[21] == 0) sprintf(nstring, "%s.",citation[21]); + else sprintf(nstring, "%d%s.", icitation[21], citation[21]); + if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); + strcat(outstring, nstring); + } + if (opt_wcit) { + if (icitation[22] == 0) sprintf(nstring, "%s.",citation[22]); + else sprintf(nstring, "%d%s.", icitation[22], citation[22]); + if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); + strcat(outstring, nstring); + } + if (opt_xcit) { + if (icitation[23] == 0) sprintf(nstring, "%s.",citation[23]); + else sprintf(nstring, "%d%s.", icitation[23], citation[23]); + if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); + strcat(outstring, nstring); + } + if (opt_ycit) { + if (icitation[24] == 0) sprintf(nstring, "%s.",citation[24]); + else sprintf(nstring, "%d%s.", icitation[24], citation[24]); + if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); + strcat(outstring, nstring); + } + if (opt_zcit) { + if (icitation[25] == 0) sprintf(nstring, "%s.",citation[25]); + else sprintf(nstring, "%d%s", icitation[25], citation[25]); + if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); + strcat(outstring, nstring); + } + /* Separate text from citation using a tab character */ + strcat(outstring, "\t"); + } + if (input_buffer[iptr] < 0x80) { + /* Print only if not followed by another ID byte */ + output_string(outstring); + } + start_new_line = 0; + if (opt_roman) betastate = ROMAN; + else betastate = HELLENIC; + } + beta_code(input_count); + } + } else { + /* Output size is greater than input -- intermediate write */ + printf("\ntlgu: FIXME -- DATA LOSS: ERROR output size iptr - %x optr - %x", iptr, optr); + processing = 0; + } + } else { + /* Finished processing all input */ + processing = 0; + } + } /* end while processing*/ + + if (opt_verbose) printf("\ntlgu: iptr - %4.4x, optr - %4.4x ", iptr, optr); + return return_code; +} +/****************** LIBRARY FUNCTIONS ******************************/ +/* get_acents: + * gets accents in <accents> + * Returns: number of accents found or zero + * Changes: accents, iptr + */ +int get_accents(void) +{ + unsigned char bufferchar; + int processing = 1; + int number_of_accents = 0; + + accents = 0; + + while (processing) { + if (iptr < INRECSIZE) { + bufferchar = input_buffer[iptr++]; + if (betastate == ROMAN) { + if (strchr(latin_accent_chars, bufferchar)) { + store_accents(bufferchar); + number_of_accents++; + } else { + --iptr; + processing = 0; + } + } else if (strchr(accent_chars, bufferchar)) { + store_accents(bufferchar); + number_of_accents++; + } else { + --iptr; + processing = 0; + } + } else { + processing = 0; + } + } + return number_of_accents; +} + +/* store_accents: + * Stores accent character passed as a parameter to <accents> + * 0 00 00 --- 0 00 00 no accent + * | | | + * | | ---- 01 psili, 10 dasia, 11 dialytika + * | ------- 01 varia, 10 oxia, 11 perispomeni + * ----------- 1 ypogegrammeni + * Changes: accents + * Caveat: currently only ORs new accent... expects an all-zero accent variable + */ +void store_accents(unsigned char bufferchar) +{ + switch (bufferchar) + { + case ')': + accents = accents | 1; + break; + case '(': + accents = accents | 2; + break; + case '+': + accents = accents | 3; + break; + case '\\': + accents = accents | 4; + break; + case '/': + accents = accents | 8; + break; + case '=': + accents = accents | 0xc; + break; + case '|': + accents = accents | 0x10; + break; + default: + break; + } + accents &= 0x1f; +} + +/* output_accents: + * Input: <accents> + * 0 00 00 --- 0 00 00 no accent + * | | | + * | | ---- 01 psili, 10 dasia, 11 dialytika + * | ------- 01 varia, 10 oxia, 11 perispomeni + * ----------- 1 ypogegrammeni + * Changes: optr (output_utf) + */ +void output_accents(void) +{ + int paccents; + + paccents = accents & 3; + if (paccents == 1) + output_utf(PSILI); + else if (paccents == 2) + output_utf(DASIA); + else if (paccents == 3) + output_utf(DIALYTIKA); + + paccents = (accents & 0xc) >> 2; + if (paccents == 1) + output_utf(VARIA); + else if (paccents == 2) + output_utf(OXIA); + else if (paccents == 3) { + if (betastate == ROMAN) + output_utf(CARET); + else + output_utf(PERISPOMENI); + } + paccents = accents & 0x10; + if (paccents) + output_utf(YPOGEGRAMMENI); +} + + +/* getnum: + * Collects a non-zero number from the current <input_buffer> position. + * Returns: an integer or zero if no number found, -1 on end of buffer + * Changes: iptr + */ + int getnum(void) +{ + #define MAXNUMBERS 32 + unsigned char bufferchar; + unsigned char modnumber[MAXNUMBERS]; /* symbol or font modifier number string */ + int imodnumber = 0; /* index to modnumber */ + int convnumber = 0; /* converted modnumber string */ + int processing = 1; + + modnumber[0] = 0; + + while (processing) { + if ( (iptr < INRECSIZE) && (imodnumber < MAXNUMBERS) ) { + bufferchar = input_buffer[iptr++]; + if (isdigit(bufferchar)) { + modnumber[imodnumber++] = bufferchar; + } else { + --iptr; + modnumber[imodnumber] = 0; + sscanf(modnumber, "%d", &convnumber); + processing = 0; + } + } else { + convnumber = -1; + processing = 0; + } + } + if (convnumber < 0) perror("did not complete number\n"); + return convnumber; +} + +/* output_utf: + * Converts the input code into a UTF-8 byte sequence in output_buffer + * Changes: optr, output_buffer + */ +void output_utf(int ucode) +{ + if ((optr+3) > OUTRECSIZE) { + perror("optr out of range"); + } else if (ucode == 0){ + /* do nothing */ + } else if (ucode < 0x80) { + output_buffer[optr++] = ucode; + } else if (ucode < 0x800) { + output_buffer[optr++] = (ucode >> 6) | 0xc0; + output_buffer[optr++] = (ucode & 0x3f) | 0x80; + } else if (ucode <= 0xffff) { + output_buffer[optr++] = ((ucode & 0xf000) >> 12) | 0xe0; + output_buffer[optr++] = ((ucode & 0x0fc0) >> 6) | 0x80; + output_buffer[optr++] = (ucode & 0x3f) | 0x80; + } else { + /* higher unicodes are ignored */ + } +} + +/* output_string: + * Calls output_utf to write a string in <output_buffer> + * Returns: the number of bytes written + * Changes: optr, output_buffer + */ +void output_string(char *outstr) +{ + int nextchar; + int cnt; + + for (cnt = 0; cnt < strlen(outstr); cnt++) { + output_utf(outstr[cnt]); + } +} + +/* handle_escape_codes: + * Formatting and character output based on escape codes: $&%"@#^[]<>{} + * Input: escape code, optional number + * Changes: optr, output_buffer + */ +void handle_escape_codes(unsigned char beta, int number) +{ + int temp = 0; + + switch (beta) + { + case '$': + betastate = HELLENIC; + accents = 0; + break; + case '&': + betastate = ROMAN; + accents = 0; + break; + case '%': + if (opt_debug_special) printf("%%%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); + if (number < MAX_PUNCTUATION) + output_utf(punctuation[number]); + break; + case '\"': + if (opt_debug_special) printf("\"%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); + if (number < MAX_QUOTATION) { + if (quotation_open[number]) { + output_utf(quotation_close_symbol[number]); + quotation_open[number] = 0; + } else { + output_utf(quotation_open_symbol[number]); + quotation_open[number] = 1; + } + } + break; + case '@': + /* FIXME: If citations are active, paging should be disabled */ + if (opt_debug_special) printf("@%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); + /* Page formats -- FIXME: incomplete */ + if (number == 0) { + output_utf(0x20); + output_utf(0x20); + } else if (number == 1) { + if (opt_page) output_utf(0xc); +//FIXME: reinstate else output_utf(0xa); + } //fixme: reinstate else output_utf(0xa); + break; + case '#': + if (opt_debug_special) printf("#%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); + if (number < MAX_TEXT_SYMBOLS) { + output_utf(text_symbols[number]); + } + break; + case '^': + /* quarter-spaces: will output at least one space */ + if (number > 0) temp = number / 4; + while (temp >= 0) { + output_utf(0x20); + temp--; + } + break; + case '[': + if (opt_debug_bracket) printf("[%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); + if (number < MAX_BRACKET) { + output_utf(bracket_open_symbol[number]); + } + break; + case ']': + if (opt_debug_bracket) printf("]%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); + if (number < MAX_BRACKET) { + output_utf(bracket_close_symbol[number]); + } + break; + case '<': + if (opt_debug_bracket) printf("<%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); + if (number < MAX_QUASI_BRACKET) { + output_utf(quasi_bracket_open_symbol[number]); + } + break; + case '>': + if (opt_debug_bracket) printf(">%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); + if (number < MAX_QUASI_BRACKET) { + output_utf(quasi_bracket_close_symbol[number]); + } + break; + case '{': + if (opt_debug_bracket) printf("{%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); + if (number < MAX_NON_TEXT) { + output_utf(non_text_open_symbol[number]); + + } + break; + case '}': + if (opt_debug_bracket) printf("{%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]); + if (number < MAX_NON_TEXT) { + output_utf(non_text_close_symbol[number]); + } + break; + default: + break; + } +} + +/* which_sigma: + * Tries to decide on which sigma form to use. + * Input: index of input_buffer (iptr) after the sigma + * Returns: output character code + */ +int which_sigma(int nextptr) +{ + int scanning; + int nextcode; + /* If the next character is a hyphen, it is a medial sigma + * Otherwise, a few characters are examined in the input buffer: + * if an alphabetic character is found before we hit a space, or + * other punctuation character, it is a medial sigma + * otherwise it is a final sigma (there is one exception in 4085 - POS(.)) + */ + if (input_buffer[iptr] == '-') + return(SIGMEDIAL); + else { + scanning = 10; + while(scanning) { + nextcode = input_buffer[nextptr++]; + if (isalpha(nextcode)) + return(SIGMEDIAL); + if (nextcode > 0x7f) + return(SIGFINAL); + if (strchr(punctuation_codes, nextcode)) + return(SIGFINAL); + scanning--; + } + return(SIGMEDIAL); + } +} + +/* beta_code: + * Processes <input_count> characters in <input_buffer> and + * writes processed output to output_buffer> + * Changes: optr, output_buffer + */ +void beta_code(int input_count) +{ + int processing; + int input_pointer_max; + unsigned char betachar; + unsigned int outputchar; + int tmp; + + input_pointer_max = iptr + input_count; + processing = 1; + + while (processing) { + if ( (iptr < INRECSIZE) && (iptr < input_pointer_max) ) { + betachar = input_buffer[iptr++]; + if ((betachar > 0x7F)) { + /* ID data found - restore pointer and stop processing*/ + --iptr; + processing = 0; + } else { + outputchar = 0; + if (strchr(escape_codes, betachar)) { + /* Handle escape codes */ + handle_escape_codes(betachar, getnum()); + } else if (betastate == HELLENIC && betachar == '*') { + /* Handle Hellenic uppercase character */ + get_accents(); + betachar = input_buffer[iptr++]; + if (accents == 0) get_accents(); //FIXME: handle suffix accents differently + if (strchr(accented_chars, betachar)) { + switch (betachar) { + case 'A': + outputchar = Alpha[accents]; + break; + case 'E': + outputchar = Epsilon[accents]; + break; + case 'H': + outputchar = Eta[accents]; + break; + case 'I': + outputchar = Iota[accents]; + break; + case 'O': + outputchar = Omicron[accents]; + break; + case 'U': + outputchar = Ypsilon[accents]; + break; + case 'W': + outputchar = Omega[accents]; + break; + case 'R': + outputchar = Rho[accents]; + break; + default: + break; + } + } else if (betachar == 'S') { + tmp = getnum(); + if (tmp == 3) outputchar = SIGLUNATEUPPER; + else outputchar = SIGMEDIALUPPER; + } else if (isalpha(betachar)) { + /* not an accented character */ + outputchar = hellenic[betachar]; + } else { + outputchar = hellenic[betachar - 0x20]; + } + if (outputchar == 0) outputchar = hellenic[betachar]; /* error condition */ + output_utf(outputchar); + } else if (betastate == HELLENIC && isalpha(betachar)) { + /* Handle hellenic lower case: + * Get default character and then try to pin accents + */ + if (strchr(accented_chars, betachar)) { + get_accents(); + switch (betachar) { + case 'A': + outputchar = alpha[accents]; + break; + case 'E': + outputchar = epsilon[accents]; + break; + case 'H': + outputchar = eta[accents]; + break; + case 'I': + outputchar = iota[accents]; + break; + case 'O': + outputchar = omicron[accents]; + break; + case 'U': + outputchar = ypsilon[accents]; + break; + case 'W': + outputchar = omega[accents]; + break; + case 'R': + outputchar = rho[accents]; + break; + default: + break; + } + } else if (betachar == 'S') { + tmp = getnum(); + if (tmp == 1) outputchar = SIGMEDIAL; + else if (tmp == 2)outputchar = SIGFINAL; + else if (tmp == 3) outputchar = SIGLUNATE; + if (outputchar == 0) { + outputchar = which_sigma(iptr); + } + } + + if (outputchar == 0) outputchar = hellenic[betachar - 0x20]; + output_utf(outputchar); + } else if (betastate == ROMAN && isalpha(betachar)) { + /* Handle Roman characters */ + //FIXME: need to process roman characters + if (isalpha(betachar)) get_accents(); + outputchar = betachar; + output_utf(outputchar); + /* ROMAN uses combining accent forms */ + output_accents(); + } else { + //FIXME: placeholder + if (betachar != '`') outputchar = betachar; + output_utf(outputchar); + } + } + } else { + /* Requested number of characters have been processed + * or no more characters available in buffer + */ + processing = 0; + } + } +} + + +const char *resolve_cite_format(const char *cformat) { + static char outbuf[511]; + char nstring[253]; + int z; + *outbuf = 0; + const char *c; + for (c = cformat; *c; c++) { + if (*c == '%') { + const char c2 = *(c+1); + signed char cstart = -1; + if ((c2 >= 'a') && (c2 <= 'z')) { + cstart = c2 - 'a'; + } + else if ((c2 >= 'A') && (c2 <= 'Z')) { + cstart = 26 + (c2 - 'A'); + } + else if (c2 == '%') { + *nstring = '%'; nstring[1] = 0; strcat(outbuf, nstring); + } + else { + fprintf(stderr, "unknown escape sequence: %%%c\n", c2); + } + c++; //skip both our '%' and following character (by loop inc); + + if (cstart > 20) { + if (icitation[cstart] == 0) sprintf(nstring, "%s",citation[cstart]); + else sprintf(nstring, "%d%s", icitation[cstart], citation[cstart]); + if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite); +// ADDED FOR SWORD KEY DELIMETER + for (z = 0; z < strlen(nstring); z++) { + if (nstring[z] == '/') nstring[z] = ':'; + } +// ----------------------------- + strcat(outbuf, nstring); + } + else if (cstart > -1) { + if (!citation[cstart] || !citation[cstart][0]) { + if (opt_ecit_blank) strcat(outbuf, ecite); + } + else { +// ADDED FOR SWORD KEY DELIMETER + for (z = 0; z < strlen(nstring); z++) { + if (nstring[z] == '/') nstring[z] = ':'; + } +// ----------------------------- + strcat(outbuf, citation[cstart]); + } + } + } + else if (*c == '\\') { + switch (*(c+1)) { + case 't': strcat(outbuf, "\t"); break; + case 'n': strcat(outbuf, "\n"); break; + case 'r': strcat(outbuf, "\r"); break; + default: *nstring = *(c+1); nstring[1] = 0; strcat(outbuf, nstring); break; + } + c++; //skip both our '%' and following character (by loop inc); + } + else { + *nstring = *c; nstring[1] = 0; strcat(outbuf, nstring); + } + } + return outbuf; +} + + +/* id_code: + * <iptr> points to the next character in the <input_buffer> to process; + * <optr> points to the next empty <output_buffer position. + * Returns: 0 or -1 for EOF + */ +int id_code(int input_count) +{ + int input_pointer_max; + int return_code; + int scratch; + int processing; + unsigned char idchar; + unsigned char outcode; + + + return_code = 0; + input_pointer_max = iptr + input_count; + processing = 1; + while (processing) { + if ((iptr < INRECSIZE) && (iptr < input_pointer_max)) { + outcode = 0; + idchar = input_buffer[iptr++]; + if ((idchar < 0x80)) { /* text data - restore pointer and return*/ + --iptr; + processing = 0; + } else { /* ID data - translate and write */ + if (optr < OUTRECSIZE) { + id_process = 0; /* we don't have a command yet */ + if (idchar >= 0xF0) { + switch (idchar) + { + case 0xF0: /* EOF */ + return_code = -1; /* indicate EOF */ + processing = 0; + break; + case 0xFE: /* End of block -- block is padded with nulls */ + while (!input_buffer[iptr] && iptr<INRECSIZE) { + iptr++; + } + if (opt_debug_cit) printf("tlgu: EOB %x\n", iptr-1); + break; + case 0xFF: /* End of ASCII string */ + if (opt_debug_cit) printf("tlgu: %d %s\n", id_level, citation[id_level]); + if (opt_debug_cit) printf("tlgu: EOS %x\n ", iptr-1); + break; + case 0xF8: /* Exception start */ + if (opt_debug_cit) printf("tlgu: Exc start %x\n", iptr-1); + break; + case 0xF9: /* Exception end */ + if (opt_debug_cit) printf("tlgu: Exc end %x\n", iptr-1); + break; + default: + break; + } + } else if (idchar >= 0xE0) { + /* The byte following an escape code is an ID byte + * Citation IDs can only be 0=a, 1=b, 2=c and 4=d + */ + if (opt_debug_cit) printf("tlgu: Escape %x", idchar); + id_command = idchar & 0xF; /* get "command" nybble */ + idchar = input_buffer[iptr++] & 0x7F; /* get ID level byte */ + if (idchar >= 97) { /* descriptors hold the upper part of the array */ + id_level = idchar - 97 + 26; /* create an index offset */ + if (id_level > 51) {id_level = 51;} /* default to z */ + } else { + id_level = idchar & 7; /* must be 0 - 4 */ + if (id_level == 4) {id_level = 3;} /* adjust d level */ + } + if (opt_debug_cit) printf(" ID level: %d\n", id_level); + id_process = 1; /* command must be processed */ + } else if ((idchar >= 0x80) && (id_process == 0)) { + id_command = idchar & 0xF; /* get command first */ + scratch = (idchar >> 4) & 0x7; /* try to create an offset */ + //printf(" %x %x ", idchar, scratch); + switch (scratch) + { + case 0: + id_level = 25; /* z */ + id_process = 1; /* command must be processed */ + break; + case 1: + id_level = 24; /* y */ + id_process = 1; /* command must be processed */ + break; + case 2: + id_level = 23; /* x */ + id_process = 1; /* command must be processed */ + break; + case 3: + id_level = 22; /* w */ + id_process = 1; /* command must be processed */ + break; + case 4: + id_level = 21; /* v */ + id_process = 1; /* command must be processed */ + break; + case 5: + id_level = 13; /* n */ + id_process = 1; /* command must be processed */ + break; + default: + break; + } + + } + if (id_process) { + switch (id_command) + { + case 0: + icitation[id_level]++; /* increment ID */ + break; + case 1: + icitation[id_level] = 1; /* literal value */ + break; + case 2: + icitation[id_level] = 2; /* literal value */ + break; + case 3: + icitation[id_level] = 3; /* literal value */ + break; + case 4: + icitation[id_level] = 4; /* literal value */ + break; + case 5: + icitation[id_level] = 5; /* literal value */ + break; + case 6: + icitation[id_level] = 6; /* literal value */ + break; + case 7: + icitation[id_level] = 7; /* literal value */ + break; + case 8: + idchar = input_buffer[iptr++]; /* 7 bit binary value */ + icitation[id_level] = idchar & 0x7F; + break; + case 9: + idchar = input_buffer[iptr++]; /* 7 bit binary value */ + icitation[id_level] = idchar & 0x7F; + idchar = input_buffer[iptr++]; /* single character */ + citation[id_level][0] = idchar & 0x7F; + citation[id_level][1] = 0; + break; + case 0xa: + idchar = input_buffer[iptr++]; /* 7 bit binary value */ + icitation[id_level] = idchar & 0x7F; + for (id_char=0; id_char < 31; id_char++) { + idchar = input_buffer[iptr++]; /* string */ + if (idchar == 0xFF) { + citation[id_level][id_char] = 0; /* end of string */ + break; + } else { + citation[id_level][id_char] = idchar & 0x7F; + } + } + break; + case 0xb: + idchar = input_buffer[iptr++]; /* 14 bit binary value */ + scratch = (idchar & 0x7F) << 7; /* shift upper */ + idchar = input_buffer[iptr++]; /* 14 bit binary value */ + idchar &= 0x7F; /* mask sign bit */ + scratch = scratch | idchar; /* combine */ + icitation[id_level] = scratch; + break; + case 0xc: + idchar = input_buffer[iptr++]; /* 14 bit binary value */ + scratch = (idchar & 0x7F) << 7; /* shift upper */ + idchar = input_buffer[iptr++]; /* 14 bit binary value */ + idchar &= 0x7F; /* mask sign bit */ + scratch = scratch | idchar; /* combine */ + icitation[id_level] = scratch; + idchar = input_buffer[iptr++]; /* single character */ + citation[id_level][0] = idchar & 0x7F; + citation[id_level][1] = 0; /* end of string */ + break; + case 0xd: + idchar = input_buffer[iptr++]; /* 14 bit binary value */ + scratch = (idchar & 0x7F) << 7; /* shift upper */ + idchar = input_buffer[iptr++]; /* 14 bit binary value */ + idchar &= 0x7F; /* mask sign bit */ + scratch = scratch | idchar; /* combine */ + icitation[id_level] = scratch; + for (id_char=0; id_char < 31; id_char++) { + idchar = input_buffer[iptr++]; /* string */ + if (idchar == 0xFF) { + citation[id_level][id_char] = 0; /* end of string */ + break; + } else { + citation[id_level][id_char] = idchar & 0x7F; + } + } + break; + case 0xe: + /* same binary value, single character */ + idchar = input_buffer[iptr++]; /* single character */ + citation[id_level][0] = idchar & 0x7F; + citation[id_level][1] = 0; /* end of string */ + break; + case 0xf: + icitation[id_level] = 0; /* no binary value */ + for (id_char=0; id_char < 31; id_char++) { + idchar = input_buffer[iptr++]; /* string */ + if (idchar == 0xFF) { + citation[id_level][id_char] = 0; /* end of string */ + break; + } else { + citation[id_level][id_char] = idchar & 0x7F; + } + } + + /* Keep tab of book changes, optionally split into books */ + if (id_level == 1) { + if (strncmp(citation[1], previous_bcit[1], 31)) { + if (opt_multiple) { + /* Signal outer loop to stop + * after processing citation change + */ + return_code = -2; + processing = 0; + if (opt_verbose) printf("\ntlgu: book citation: %s, previous: %s", citation[1], previous_bcit[1]); + } + strncpy(previous_bcit[1], citation[1], 31); + previous_bcit[1][31] = 0; + } + book_change = 1; + } + break; + default: + printf("tlgu: Unknown id_command: %x, iptr %x\n", id_command, iptr); + break; + } + if (opt_debug_cit) printf("tlgu: Command: %x ID level: %d, Binary: %d, ASCII: %s iptr++ %x\n",\ + id_command, id_level,icitation[id_level], citation[id_level], iptr); + + /* Adjust lower citation levels */ + switch (id_level) + { + case 21: + icitation[22] = 1; + case 22: + icitation[23] = 1; + case 23: + icitation[24] = 1; + case 24: + icitation[25] = 1; + case 25: + outcode = 0; + break; + default: + break; + } + + } /* id_process */ + + if (outcode) { + output_utf(outcode); + } + + } else { + --iptr; /* output buffer full - restore pointer and return */ + processing = 0; + } + } /* ID data processing */ + } else { /* Finished processing all input */ + processing = 0; + } + } /* while processing loop */ + return return_code; +} diff --git a/tlgu/tlgu.h b/tlgu/tlgu.h new file mode 100644 index 0000000..83aeb56 --- /dev/null +++ b/tlgu/tlgu.h @@ -0,0 +1,221 @@ +/* tlgu.h + * + * Copyright (C) 2004 Dimitri Marinakis + * + * Licensed under the terms of the GNU General Public License. + * ABSOLUTELY NO WARRANTY. + * See the file `COPYING' in this directory. + * + * Hellenic character codes + * Relevant Unicode standard tables: + * Greek and Coptic: 0370 - 03FF + * Greek Extended: 1F00 - 1FFF + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <ctype.h> + + +#define INRECSIZE 0x2000 +#define OUTRECSIZE 0xFFFFF + +/* Beta code escapes and state processing codes */ +#define HELLENIC 1 +#define ROMAN 2 +#define PUNCTUATION 3 +#define QUOTATION 4 +#define PAGE 5 +#define BRACKET 6 +#define QUASIBRACKET 7 +#define NONTEXT 8 +#define SYMBOL 9 +#define HELLENIC_UPPER 0xa +#define HELLENIC_SELECT 0xb +#define HELLENIC_SIGMA 0xc +/* Accent is an existing code above 0x1f */ +#define ACCENT 0x2f +#define HELLENIC_SIGMA_UPPER 0x10 +#define TABHALF 0x11 +#define ROMAN_SELECT 0x16 +#define PUNCTUATION_SELECT 0x1f +#define QUOTATION_SELECT 0x29 +#define PAGE_SELECT 0x33 +#define BRACKET_SELECT 0x3d +#define QUASIBRACKET_SELECT 0x47 +#define NONTEXT_SELECT 0x51 +#define SYMBOL_SELECT 0x5b +#define TABHALF_SELECT 0x61 + +/* code defines */ +#define SIGMEDIAL 0x3c3 +#define SIGMEDIALUPPER 0x3a3 +#define SIGFINAL 0x3c2 +#define SIGFINALUPPER 0x3a3 +#define SIGLUNATE 0x3f2 +#define SIGLUNATEUPPER 0x3f9 + +/* accents */ +#define PSILI 0x313 +#define DASIA 0x314 +#define DIALYTIKA 0x308 +#define VARIA 0x300 +#define OXIA 0x301 +#define PERISPOMENI 0x342 +#define YPOGEGRAMMENI 0x345 +#define CARET 0x302 + +/* TLG stream translation table -- Unicode + A B G D E Z H Q I K L M N C O P R S T U F X Y W V; V is digamma + A value under 0x20 is a state change control code. + Zero means no character. + */ +unsigned int hellenic[] = { + /* sp ! " # $ % & ' */ + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + /* ( ) * + , - . / */ + ACCENT, ACCENT, HELLENIC_UPPER, ACCENT, 0x2c, 0x2d, 0x2e, ACCENT, + /* 0 1 2 3 4 5 6 7 */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + /* 8 9 : ; < = > ? @ */ + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, + /* a b c d e f g h */ + 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3d5, 0x3b3, 0x3b7, + /* i j k l m n o p */ + 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf, 0x3c0, + /* q r s t u v w x */ + 0x3b8, 0x3c1, 0x3c2, 0x3c4, 0x3c5, 0x3dd, 0x3c9, 0x3c7, + /* y z [ \ ] ^ _ sep`*/ + 0x3c8, 0x3b6, 0x54, 0x55, 0x56, 0x57, 0x00, 0x00, + /* A B C D E F G H */ + 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393, 0x397, + /* I J K L M N O P */ + 0x399, 0x3A3, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f, 0x3a0, + /* Q R S T U V W X */ + 0x398, 0x3a1, 0x3a2, 0x3a4, 0x3a5, 0x3dc, 0x3a9, 0x3a7, + /* Y Z { | } ~ DEL */ + 0x3a8, 0x396, 0x7b, 0x7c, 0x7d, 0x00, 0x00}; + + +/* Accents can be described in three groups, all optional + * In the first group are - mutually exclusive - psili, daseia or dialytika + * In the second group are - mutually exclusive - oxia, varia or perispomeni + * In the third group are - mutually exclusive - ypogegrammeni, subscript dot or missing letter dot + * as the last two are not part of fully-formed characters, will be used as combining diacritical marks + * The simplified form is then: + * [ ) or ( or + ] [ / or \ or = ] [ | ] + * + * This can be described by 5 accent flag bits (reverse order) + * + * 0 00 00 --- 0 00 00 no accent + * | | | + * | | ---- 01 psili, 10 dasia, 11 dialytika + * | ------- 01 varia, 10 oxia, 11 perispomeni + * ----------- 1 ypogegrammeni + * + * The resulting table of accentable characters will have 32-character rows + * with the formed character codes in the appropriate positions, or zero: + * plain, psili, dasia, dialytika, varia, psili-varia, dasia-varia, dialytika-varia + * oxia, psili-oxia, dasia-oxia, dialytika-oxia, perispomeni, psili-perisp, dasia-perisp, dialytika-perisp + * ditto with ypogegrammeni + * + * If zero is returned, combining diacritical marks should be generated from the accent flags. + */ +unsigned int alpha[] = { + 0x03b1, 0x1f00, 0x1f01, 0x0000, 0x1f70, 0x1f02, 0x1f03, 0x0000, + 0x1f71, 0x1f04, 0x1f05, 0x0000, 0x1fb6, 0x1f06, 0x1f07, 0x0000, + 0x1fb3, 0x1f80, 0x1f81, 0x0000, 0x1fb2, 0x1f82, 0x1f83, 0x0000, + 0x1fb4, 0x1f84, 0x1f85, 0x0000, 0x1fb7, 0x1f86, 0x1f87, 0x0000 + }; +unsigned int Alpha[] = { + 0x0391, 0x1f08, 0x1f09, 0x0000, 0x1fba, 0x1f0a, 0x1f0b, 0x0000, + 0x1fbb, 0x1f0c, 0x1f0d, 0x0000, 0x0000, 0x1f0e, 0x1f0f, 0x0000, + 0x1fbc, 0x1f88, 0x1f89, 0x0000, 0x0000, 0x1f8a, 0x1f8b, 0x0000, + 0x0000, 0x1f8c, 0x1f8d, 0x0000, 0x0000, 0x1f8e, 0x1f8f, 0x0000 + }; +unsigned int epsilon[] = { + 0x03b5, 0x1f10, 0x1f11, 0x0000, 0x1f72, 0x1f12, 0x1f13, 0x0000, + 0x1f73, 0x1f14, 0x1f15, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; + unsigned int Epsilon[] = { + 0x0395, 0x1f18, 0x1f19, 0x0000, 0x1fc8, 0x1f1a, 0x1f1b, 0x0000, + 0x1fc9, 0x1f1c, 0x1f1d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; +unsigned int eta[] = { + 0x03b7, 0x1f20, 0x1f21, 0x0000, 0x1f74, 0x1f22, 0x1f23, 0x0000, + 0x1f75, 0x1f24, 0x1f25, 0x0000, 0x1fc6, 0x1f26, 0x1f27, 0x0000, + 0x1fc3, 0x1f90, 0x1f91, 0x0000, 0x1fc2, 0x1f92, 0x1f93, 0x0000, + 0x1fc4, 0x1f94, 0x1f95, 0x0000, 0x1fc7, 0x1f96, 0x1f97, 0x0000 + }; +unsigned int Eta[] = { + 0x0397, 0x1f28, 0x1f29, 0x0000, 0x1fca, 0x1f2a, 0x1f2b, 0x0000, + 0x1fcb, 0x1f2c, 0x1f2d, 0x0000, 0x0000, 0x1f2e, 0x1f2f, 0x0000, + 0x1fcc, 0x1f98, 0x1f99, 0x0000, 0x0000, 0x1f9a, 0x1f9b, 0x0000, + 0x0000, 0x1f9c, 0x1f9d, 0x0000, 0x0000, 0x1f9e, 0x1f9f, 0x0000 + }; +unsigned int iota[] = { + 0x03b9, 0x1f30, 0x1f31, 0x03ca, 0x1f76, 0x1f32, 0x1f33, 0x1fd2, + 0x1f77, 0x1f34, 0x1f35, 0x1fd3, 0x1fd6, 0x1f36, 0x1f37, 0x1fd7, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; + unsigned int Iota[] = { + 0x0399, 0x1f38, 0x1f39, 0x03aa, 0x1fda, 0x1f3a, 0x1f3b, 0x0000, + 0x1fdb, 0x1f3c, 0x1f3d, 0x0000, 0x0000, 0x1f3e, 0x1f3f, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; +unsigned int omicron[] = { + 0x03bf, 0x1f40, 0x1f41, 0x0000, 0x1f78, 0x1f42, 0x1f43, 0x0000, + 0x1f79, 0x1f44, 0x1f45, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; + unsigned int Omicron[] = { + 0x039f, 0x1f48, 0x1f49, 0x0000, 0x1ff8, 0x1f4a, 0x1f4b, 0x0000, + 0x1ff9, 0x1f4c, 0x1f4d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; +unsigned int ypsilon[] = { + 0x03c5, 0x1f50, 0x1f51, 0x03cb, 0x1f7a, 0x1f52, 0x1f53, 0x1fe2, + 0x1f7b, 0x1f54, 0x1f55, 0x1fe3, 0x1fe6, 0x1f56, 0x1f57, 0x1fe7, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; + unsigned int Ypsilon[] = { + 0x03a5, 0x0000, 0x1f59, 0x03ab, 0x1fea, 0x0000, 0x1f5b, 0x0000, + 0x1feb, 0x0000, 0x1f5d, 0x0000, 0x0000, 0x0000, 0x1f5f, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; +unsigned int omega[] = { + 0x03c9, 0x1f60, 0x1f61, 0x0000, 0x1f7c, 0x1f62, 0x1f63, 0x0000, + 0x1f7d, 0x1f64, 0x1f65, 0x0000, 0x1ff6, 0x1f66, 0x1f67, 0x0000, + 0x1ff3, 0x1fa0, 0x1fa1, 0x0000, 0x1ff2, 0x1fa2, 0x1fa3, 0x0000, + 0x1ff4, 0x1fa4, 0x1fa5, 0x0000, 0x1ff7, 0x1fa6, 0x1fa7, 0x0000 + }; +unsigned int Omega[] = { + 0x03a9, 0x1f68, 0x1f69, 0x0000, 0x1ffa, 0x1f6a, 0x1f6b, 0x0000, + 0x1ffb, 0x1f6c, 0x1f6d, 0x0000, 0x03a9, 0x1f6e, 0x1f6f, 0x0000, + 0x1ffc, 0x1fa8, 0x1fa9, 0x0000, 0x0000, 0x1faa, 0x1fab, 0x0000, + 0x0000, 0x1fac, 0x1fad, 0x0000, 0x0000, 0x1fae, 0x1faf, 0x0000 + }; +unsigned int rho[] = { + 0x03c1, 0x1fe4, 0x1fe5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; +unsigned int Rho[] = { + 0x03a1, 0x0000, 0x1fec, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }; diff --git a/tlgu/tlgu.html b/tlgu/tlgu.html new file mode 100644 index 0000000..30a0016 --- /dev/null +++ b/tlgu/tlgu.html @@ -0,0 +1,565 @@ +<html> +<head> +<meta name="generator" content="groff -Thtml, see www.gnu.org"> +<meta name="Content-Style" content="text/css"> +<title>tlgu</title> +</head> +<body> + +<h1 align=center>tlgu</h1> +<a href="#NAME">NAME</a><br> +<a href="#SYNOPSIS">SYNOPSIS</a><br> +<a href="#DESCRIPTION">DESCRIPTION</a><br> +<a href="#OPTIONS">OPTIONS</a><br> +<a href="#HISTORY AND INTENDED USE">HISTORY AND INTENDED USE</a><br> +<a href="#EXAMPLES">EXAMPLES</a><br> +<a href="#POST-PROCESSING EXAMPLES">POST-PROCESSING EXAMPLES</a><br> +<a href="#REFERENCES">REFERENCES</a><br> +<a href="#COPYRIGHT">COPYRIGHT</a><br> + +<hr> +<!-- Creator : groff version 1.17.2 --> +<!-- CreationDate: Sun Mar 6 13:42:46 2005 --> +<a name="NAME"></a> +<h2>NAME</h2> +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +tlgu - convert TLG (D) CD-ROM txt files to Unicode</td></table> +<a name="SYNOPSIS"></a> +<h2>SYNOPSIS</h2> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>tlgu</b> [ <i>options</i> ] <i>input_file +output_file</i></td></table> +<a name="DESCRIPTION"></a> +<h2>DESCRIPTION</h2> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>tlgu</b> will convert an <i>input_file</i> from Thesaurus +Linguae Graeca (TLG) representation to a Unicode (UTF-8) +<i>output_file</i>. The TLG representation consists of +<b>beta-code</b> text and <b>citation</b> +information.</td></table> +<a name="OPTIONS"></a> +<h2>OPTIONS</h2> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-b</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +inserts a form feed and citation information (levels a, b, +c, d) on every "book" citation change. By default +the program will output line feeds only (see also +<b>-p</b>).</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-p</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +observes paging instructions. By default the program will +output line feeds only.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-r</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +primarily Roman text. Some TLG texts, notably doccan1.txt +and doccan2.txt are mainly roman texts lacking explicit +language change codes. Setting this option will force a +change to roman text after each citation block is +encountered.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-v</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +highest-level reference citation is included before each +text line (v-level)</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-w</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +reference citation is included before each text line +(w-level)</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-x</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +reference citation is included before each text line +(x-level)</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-y</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +reference citation is included before each text line +(y-level)</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-z</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +lowest-level reference citation is included before each text +line (z-level).</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-B</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +inserts blank space (a tab) before each and every +line.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-C</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +citation debug information is output.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-S</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +special code debug information is output.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-V</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +block processing information is output +(verbose).</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>-W</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +each work (book) is output as a separate file in the form +output_file-xxx.txt</td></table> +<a name="HISTORY AND INTENDED USE"></a> +<h2>HISTORY AND INTENDED USE</h2> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +The purpose of <b>tlgu</b> is to translate binary TLG-format +files into readable and editable text. It is based on an +earlier program written in 80x86 assembly language (1996) +outputting codes for a home-made font which used the +prevalent hellenic font encodings of that time complemented +by dead accent characters - not very attractive, but +readable.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +Then came Unicode and a plethora of accented character +glyphs; nice-looking but with the well-known drawback that +special processing is needed to do wild-card searches. Nice +polytonic fonts have now been made available (Cardo, +Gentium, Athena, Athenian, Porson) and, surely, these will +be expanded as special-use code points are included in the +Unicode definition (musical symbols, other special symbols) +and more fonts will be created.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +So, at this point in time, <b>tlgu</b> will crunch a file +which has been formatted according to the published TLG-D +format and produce codes for most glyphs generally +available. No attempt has been made to introduce +multi-character sequences or formatting codes (font +changes). If a code has not been defined, the program will +output the respective "code family" glyph. You may +use the <b>-S</b> option to check such codes against the +published beta code definition.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +You may not like the character output for a specific code. +Check out the <b>tlgcodes.h</b> file containing the special +symbol and punctuation codes and select one to suit you +better. It will probably be a while before the beta to +Unicode correspondence settles down.</td></table> +<a name="EXAMPLES"></a> +<h2>EXAMPLES</h2> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>./tlgu -r DOCCAN2.TXT doccanu.txt</b> Translate the TLG +canon to a unicode text file. Note the use of the <b>-r</b> +option (this file expects Roman as the default +font).</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>./tlgu -x -y -z TLG1799.TXT tlg1799u.txt</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +Generate a continuous file with the texts of granpa +Euclides. Available citations (-x -y -z) are +Book//demonstratio/line as shown in the respective +"cit" field of doccan2.txt.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>./tlgu -b -B TLG1799.TXT tlg1799u.txt</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +Generate the same texts, this time with a page feed and book +citation information on the first page of each book and a +tab before each line (use with OOo versions earlier than +1.1.4).</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>./tlgu -C TLG1799.TXT tlg1799u.txt</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +See how the citation information changes within each TLG +block.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>./tlgu -S TLG1799.TXT tlg1799u.txt | sort > +symbols1799.txt</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +Check out the symbols used in a work. Book and x, y, z +references are printed on a separate line for each symbol. +Sort / grep the output to locate specific symbols of +interest; save in a file for later use.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>./tlgu -W TLG0006.TXT tlg0006u</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +Will produce separate files for each work, named +tlg006u-001.txt etc.</td></table> +<a name="POST-PROCESSING EXAMPLES"></a> +<h2>POST-PROCESSING EXAMPLES</h2> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +I use the OpenOffice suite for most of my work. This example +shows one of many possible ways of using the search and +replace facility to create a readable version of the Suda +lexicon.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>./tlgu -B TLG4085.TXT tlg4085u.txt</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +A Unicode file with the text is created</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>Open the generated file with OOo:</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +File | Open | Filename: tlg4085u.txt, File Type: Text +Encoded -- Press Open</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +The ASCII Filter Options window appears. Select the Unicode +(UTF-8) character set and a proper Unicode font installed in +your machine (e.g. Cardo). Press OK.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>Replace angle brackets with expanded +text</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +Lexicon terms are enclosed in <angle brackets>. The +actual beta codes indicate the use of expanded text for +emphasis. Select Edit | Find & Replace. The <b>Find +& Replace</b> window appears.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +In the <b>Search For</b> field, type the following +expression: <b><[^<>]*></b> This means +"find any characters between angle brackets, not +including angle brackets".</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +In the <b>Replace With</b> window insert a single ampersand: +<b>&</b> This means that we need to <b>add</b> +formatting information (this case) or additional text to the +text found. Press <b>Format...</b> and select the +<b>Position</b> tab; select Spacing Expanded by 2.0 points. +Press OK.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +Check the <b>Regular Expressions</b> box and press +<b>Replace All</b>.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +You may now replace the angle brackets with +nothings.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +Repeat the above procedure for titles enclosed in {braces}. +Write a macro...</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +<b>Other useful information</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +In the "Execute" tab of the "Properties" +window of my KDesktop Link to Application I have the +following command (single line):<b><br> +LC_CTYPE=el_GR.UTF-8 +/whereitsat/OpenOffice.org1.1.x/soffice</b><br> +The prefix, an environment variable, allows you to use the +same program with different locales; in this case, hellenic +Unicode (UTF-8).</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +I put my default locale and keyboard definitions in my +<b>.profile</b>:<b><br> +export LC_CTYPE=el_GR.UTF-8<br> +setxkbmap us+el polytonic -option +grp:ctrl_shift_toggle</b></td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="21%"></td><td width="79%"> +This way multi-lingual text can be entered; keyboard layout +switching is done by pressing Ctrl/Shift.</td></table> +<a name="REFERENCES"></a> +<h2>REFERENCES</h2> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +There are several texts describing the internal +representation of <b>PHI</b> and <b>TLG</b> text, ID data, +citation data and index files. The originator of this format +is the Packard Humanities Institute. The TLG is maintained +by UCI - see <b>www.tlg.uci.edu</b> - where you may find the +<b>TLG Beta Code Manual</b> and the <b>TLG Beta Code Quick +Reference Guide</b>.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +Unicode consortium publications pertaining to the +codification of characters used in Hellenic literature, +scientific and musical texts.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +The OpenOffice suite (<b>www.openoffice.org</b>) includes a +word processor that you can use to load, process and create +new polytonic texts.</td></table> +<a name="COPYRIGHT"></a> +<h2>COPYRIGHT</h2> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +Copyright (C) 2004, 2005 Dimitri Marinakis (dm ssa +gr).</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +This program is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public +License (version 2) as published by the Free Software +Foundation.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +This program is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied +warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +PURPOSE. See the GNU General Public License for more +details.</td></table> + +<table width="100%" border=0 rules="none" frame="void" + cols="2" cellspacing="0" cellpadding="0"> +<tr valign="top" align="left"> +<td width="10%"></td><td width="90%"> +You should have received a copy of the GNU General Public +License along with this program; if not, write to the Free +Software Foundation, Inc., 59 Temple Place, Suite 330, +Boston, MA 02111-1307 USA</td></table> +<hr> +</body> +</html> diff --git a/tlgu/tlgu.pdf b/tlgu/tlgu.pdf Binary files differnew file mode 100644 index 0000000..d8d501e --- /dev/null +++ b/tlgu/tlgu.pdf diff --git a/tlgu/tlgu.ps b/tlgu/tlgu.ps new file mode 100644 index 0000000..2c97404 --- /dev/null +++ b/tlgu/tlgu.ps @@ -0,0 +1,421 @@ +%!PS-Adobe-3.0 +%%Creator: groff version 1.17.2 +%%CreationDate: Sun Mar 6 13:42:52 2005 +%%DocumentNeededResources: font Times-Roman +%%+ font Times-Bold +%%+ font Times-Italic +%%DocumentSuppliedResources: procset grops 1.17 2 +%%Pages: 3 +%%PageOrder: Ascend +%%Orientation: Portrait +%%EndComments +%%BeginProlog +%%BeginResource: procset grops 1.17 2 +/setpacking where{ +pop +currentpacking +true setpacking +}if +/grops 120 dict dup begin +/SC 32 def +/A/show load def +/B{0 SC 3 -1 roll widthshow}bind def +/C{0 exch ashow}bind def +/D{0 exch 0 SC 5 2 roll awidthshow}bind def +/E{0 rmoveto show}bind def +/F{0 rmoveto 0 SC 3 -1 roll widthshow}bind def +/G{0 rmoveto 0 exch ashow}bind def +/H{0 rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/I{0 exch rmoveto show}bind def +/J{0 exch rmoveto 0 SC 3 -1 roll widthshow}bind def +/K{0 exch rmoveto 0 exch ashow}bind def +/L{0 exch rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/M{rmoveto show}bind def +/N{rmoveto 0 SC 3 -1 roll widthshow}bind def +/O{rmoveto 0 exch ashow}bind def +/P{rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/Q{moveto show}bind def +/R{moveto 0 SC 3 -1 roll widthshow}bind def +/S{moveto 0 exch ashow}bind def +/T{moveto 0 exch 0 SC 5 2 roll awidthshow}bind def +/SF{ +findfont exch +[exch dup 0 exch 0 exch neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/MF{ +findfont +[5 2 roll +0 3 1 roll +neg 0 0]makefont +dup setfont +[exch/setfont cvx]cvx bind def +}bind def +/level0 0 def +/RES 0 def +/PL 0 def +/LS 0 def +/MANUAL{ +statusdict begin/manualfeed true store end +}bind def +/PLG{ +gsave newpath clippath pathbbox grestore +exch pop add exch pop +}bind def +/BP{ +/level0 save def +1 setlinecap +1 setlinejoin +72 RES div dup scale +LS{ +90 rotate +}{ +0 PL translate +}ifelse +1 -1 scale +}bind def +/EP{ +level0 restore +showpage +}bind def +/DA{ +newpath arcn stroke +}bind def +/SN{ +transform +.25 sub exch .25 sub exch +round .25 add exch round .25 add exch +itransform +}bind def +/DL{ +SN +moveto +SN +lineto stroke +}bind def +/DC{ +newpath 0 360 arc closepath +}bind def +/TM matrix def +/DE{ +TM currentmatrix pop +translate scale newpath 0 0 .5 0 360 arc closepath +TM setmatrix +}bind def +/RC/rcurveto load def +/RL/rlineto load def +/ST/stroke load def +/MT/moveto load def +/CL/closepath load def +/FL{ +currentgray exch setgray fill setgray +}bind def +/BL/fill load def +/LW/setlinewidth load def +/RE{ +findfont +dup maxlength 1 index/FontName known not{1 add}if dict begin +{ +1 index/FID ne{def}{pop pop}ifelse +}forall +/Encoding exch def +dup/FontName exch def +currentdict end definefont pop +}bind def +/DEFS 0 def +/EBEGIN{ +moveto +DEFS begin +}bind def +/EEND/end load def +/CNT 0 def +/level1 0 def +/PBEGIN{ +/level1 save def +translate +div 3 1 roll div exch scale +neg exch neg exch translate +0 setgray +0 setlinecap +1 setlinewidth +0 setlinejoin +10 setmiterlimit +[]0 setdash +/setstrokeadjust where{ +pop +false setstrokeadjust +}if +/setoverprint where{ +pop +false setoverprint +}if +newpath +/CNT countdictstack def +userdict begin +/showpage{}def +}bind def +/PEND{ +clear +countdictstack CNT sub{end}repeat +level1 restore +}bind def +end def +/setpacking where{ +pop +setpacking +}if +%%EndResource +%%IncludeResource: font Times-Roman +%%IncludeResource: font Times-Bold +%%IncludeResource: font Times-Italic +grops begin/DEFS 1 dict def DEFS begin/u{.001 mul}bind def end/RES 72 +def/PL PLG def/LS false def/ENC0[/asciicircum/asciitilde/Scaron/Zcaron +/scaron/zcaron/Ydieresis/trademark/quotesingle/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef +/.notdef/.notdef/space/exclam/quotedbl/numbersign/dollar/percent +/ampersand/quoteright/parenleft/parenright/asterisk/plus/comma/hyphen +/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon +/semicolon/less/equal/greater/question/at/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O +/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash/bracketright/circumflex +/underscore/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y +/z/braceleft/bar/braceright/tilde/.notdef/quotesinglbase/guillemotleft +/guillemotright/bullet/florin/fraction/perthousand/dagger/daggerdbl +/endash/emdash/ff/fi/fl/ffi/ffl/dotlessi/dotlessj/grave/hungarumlaut +/dotaccent/breve/caron/ring/ogonek/quotedblleft/quotedblright/oe/lslash +/quotedblbase/OE/Lslash/.notdef/exclamdown/cent/sterling/currency/yen +/brokenbar/section/dieresis/copyright/ordfeminine/guilsinglleft +/logicalnot/minus/registered/macron/degree/plusminus/twosuperior +/threesuperior/acute/mu/paragraph/periodcentered/cedilla/onesuperior +/ordmasculine/guilsinglright/onequarter/onehalf/threequarters +/questiondown/Agrave/Aacute/Acircumflex/Atilde/Adieresis/Aring/AE +/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute/Icircumflex +/Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis +/multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn +/germandbls/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla +/egrave/eacute/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis +/eth/ntilde/ograve/oacute/ocircumflex/otilde/odieresis/divide/oslash +/ugrave/uacute/ucircumflex/udieresis/yacute/thorn/ydieresis]def +/Times-Italic@0 ENC0/Times-Italic RE/Times-Bold@0 ENC0/Times-Bold RE +/Times-Roman@0 ENC0/Times-Roman RE +%%EndProlog +%%Page: 1 1 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 150.26(tlgu\(1\) TLG)72 48 R(to Unicode Con)2.5 +E -.15(ve)-.4 G 150.26(rter tlgu\(1\)).15 F/F1 10.95/Times-Bold@0 SF +-.219(NA)72 84 S(ME).219 E F0(tlgu \255 con)108 96 Q -.15(ve)-.4 G +(rt TLG \(D\) CD-R).15 E(OM txt \214les to Unicode)-.4 E F1(SYNOPSIS)72 +124.8 Q/F2 10/Times-Bold@0 SF(tlgu)108 136.8 Q F0([)2.5 E/F3 10 +/Times-Italic@0 SF(options)2.5 E F0(])2.5 E F3 +(input_\214le output_\214le)2.5 E F1(DESCRIPTION)72 165.6 Q F2(tlgu)108 +177.6 Q F0 .269(will con)2.769 F -.15(ve)-.4 G .269(rt an).15 F F3 +(input_\214le)2.769 E F0 .268(from Thesaurus Linguae Graeca \(TLG\) rep\ +resentation to a Unicode \(UTF-8\))2.769 F F3(output_\214le)108 189.6 Q +F0 5(.T)C(he TLG representation consists of)-5 E F2(beta-code)2.5 E F0 +(te)2.5 E(xt and)-.15 E F2(citation)2.5 E F0(information.)2.5 E F1 +(OPTIONS)72 218.4 Q F2<ad62>108 230.4 Q F0 .218 +(inserts a form feed and citation information \(le)24.74 F -.15(ve)-.25 +G .218(ls a, b, c, d\) on e).15 F -.15(ve)-.25 G .219 +(ry "book" citation change.).15 F(By)5.219 E(def)144 242.4 Q +(ault the program will output line feeds only \(see also)-.1 E F2<ad70> +2.5 E F0(\).)A F2<ad70>108 259.2 Q F0(observ)24.74 E +(es paging instructions.)-.15 E(By def)5 E +(ault the program will output line feeds only)-.1 E(.)-.65 E F2<ad72>108 +276 Q F0 1.362(primarily Roman te)25.86 F 1.362(xt. Some TLG te)-.15 F +1.361(xts, notably doccan1.txt and doccan2.txt are mainly roman)-.15 F +(te)144 288 Q .192(xts lacking e)-.15 F .192 +(xplicit language change codes.)-.15 F .193 +(Setting this option will force a change to roman te)5.193 F(xt)-.15 E +(after each citation block is encountered.)144 300 Q F2<ad76>108 316.8 Q +F0(highest-le)25.3 E -.15(ve)-.25 G 2.5(lr).15 G +(eference citation is included before each te)-2.5 E(xt line \(v-le)-.15 +E -.15(ve)-.25 G(l\)).15 E F2<ad77>108 333.6 Q F0 +(reference citation is included before each te)23.08 E(xt line \(w-le) +-.15 E -.15(ve)-.25 G(l\)).15 E F2<ad78>108 350.4 Q F0 +(reference citation is included before each te)25.3 E(xt line \(x-le) +-.15 E -.15(ve)-.25 G(l\)).15 E F2<ad79>108 367.2 Q F0 +(reference citation is included before each te)25.3 E(xt line \(y-le) +-.15 E -.15(ve)-.25 G(l\)).15 E F2<ad7a>108 384 Q F0(lo)25.86 E(west-le) +-.25 E -.15(ve)-.25 G 2.5(lr).15 G +(eference citation is included before each te)-2.5 E(xt line \(z-le)-.15 +E -.15(ve)-.25 G(l\).).15 E F2<ad42>108 412.8 Q F0 +(inserts blank space \(a tab\) before each and e)23.63 E -.15(ve)-.25 G +(ry line.).15 E F2<ad43>108 429.6 Q F0(citation deb)23.08 E +(ug information is output.)-.2 E F2<ad53>108 446.4 Q F0 +(special code deb)24.74 E(ug information is output.)-.2 E F2<ad56>108 +463.2 Q F0(block processing information is output \(v)23.08 E(erbose\).) +-.15 E F2<ad57>108 480 Q F0(each w)20.3 E(ork \(book\) is output as a s\ +eparate \214le in the form output_\214le-xxx.txt)-.1 E F1(HIST)72 508.8 +Q(OR)-.197 E 2.738(YA)-.383 G(ND INTENDED USE)-2.738 E F0 .103 +(The purpose of)108 520.8 R F2(tlgu)2.602 E F0 .102(is to translate bin\ +ary TLG-format \214les into readable and editable te)2.602 F 2.602 +(xt. It)-.15 F .102(is based on an)2.602 F .624(earlier program written\ + in 80x86 assembly language \(1996\) outputting codes for a home-made f\ +ont which)108 532.8 R .485(used the pre)108 544.8 R -.25(va)-.25 G .484 +(lent hellenic font encodings of that time complemented by dead accent \ +characters - not v).25 F(ery)-.15 E(attracti)108 556.8 Q -.15(ve)-.25 G +2.5(,b).15 G(ut readable.)-2.7 E 1.412(Then came Unicode and a plethora\ + of accented character glyphs; nice-looking b)108 580.8 R 1.413 +(ut with the well-kno)-.2 F(wn)-.25 E(dra)108 592.8 Q .616 +(wback that special processing is needed to do wild-card searches.)-.15 +F .616(Nice polytonic fonts ha)5.616 F .916 -.15(ve n)-.2 H 1.116 -.25 +(ow b).15 H(een).25 E .277(made a)108 604.8 R -.25(va)-.2 G .278 +(ilable \(Cardo, Gentium, Athena, Athenian, Porson\) and, surely).25 F +2.778(,t)-.65 G .278(hese will be e)-2.778 F .278(xpanded as special-) +-.15 F .581(use code points are included in the Unicode de\214nition \(\ +musical symbols, other special symbols\) and more)108 616.8 R +(fonts will be created.)108 628.8 Q .034(So, at this point in time,)108 +652.8 R F2(tlgu)2.534 E F0 .034(will crunch a \214le which has been for\ +matted according to the published TLG-D)2.534 F 1.41 +(format and produce codes for most glyphs generally a)108 664.8 R -.25 +(va)-.2 G 3.909(ilable. No).25 F 1.409 +(attempt has been made to introduce)3.909 F .437 +(multi-character sequences or formatting codes \(font changes\).)108 +676.8 R .438(If a code has not been de\214ned, the program)5.437 F .238 +(will output the respecti)108 688.8 R .538 -.15(ve ")-.25 H .238(code f) +.15 F .238(amily" glyph.)-.1 F -1.1(Yo)5.238 G 2.738(um)1.1 G .238 +(ay use the)-2.738 F F2<ad53>2.738 E F0 .237 +(option to check such codes ag)2.738 F .237(ainst the)-.05 F +(published beta code de\214nition.)108 700.8 Q -1.1(Yo)108 724.8 S 3.868 +(um)1.1 G 1.368(ay not lik)-3.868 F 3.868(et)-.1 G 1.368 +(he character output for a speci\214c code.)-3.868 F 1.368 +(Check out the)6.368 F F2(tlgcodes.h)3.869 E F0 1.369 +(\214le containing the)3.869 F -1.11(Ve)72 768 S(rsion 1.1)1.11 E +(Feb, 2005)168.45 E(1)209 E EP +%%Page: 2 2 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 150.26(tlgu\(1\) TLG)72 48 R(to Unicode Con)2.5 +E -.15(ve)-.4 G 150.26(rter tlgu\(1\)).15 F .577 +(special symbol and punctuation codes and select one to suit you better) +108 84 R 5.577(.I)-.55 G 3.077(tw)-5.577 G .577 +(ill probably be a while before)-3.077 F +(the beta to Unicode correspondence settles do)108 96 Q(wn.)-.25 E/F1 +10.95/Times-Bold@0 SF(EXAMPLES)72 136.8 Q/F2 10/Times-Bold@0 SF .363 +(./tlgu -r DOCCAN2.TXT doccanu.txt)108 148.8 R F0 -.35(Tr)2.863 G .363 +(anslate the TLG canon to a unicode te).35 F .363 +(xt \214le. Note the use of the)-.15 F F2(-r)108 160.8 Q F0 +(option \(this \214le e)2.5 E(xpects Roman as the def)-.15 E +(ault font\).)-.1 E F2(./tlgu -x -y -z TLG1799.TXT tlg1799u.txt)108 +177.6 Q F0 1.667(Generate a continuous \214le with the te)144 189.6 R +1.667(xts of granpa Euclides. A)-.15 F -.25(va)-.74 G 1.666 +(ilable citations \(-x -y -z\) are).25 F(Book//demonstratio/line as sho) +144 201.6 Q(wn in the respecti)-.25 E .3 -.15(ve ")-.25 H +(cit" \214eld of doccan2.txt.).15 E F2 +(./tlgu -b -B TLG1799.TXT tlg1799u.txt)108 218.4 Q F0 .267 +(Generate the same te)144 230.4 R .267(xts, this time with a page feed \ +and book citation information on the \214rst page)-.15 F +(of each book and a tab before each line \(use with OOo v)144 242.4 Q +(ersions earlier than 1.1.4\).)-.15 E F2 +(./tlgu -C TLG1799.TXT tlg1799u.txt)108 259.2 Q F0(See ho)144 271.2 Q +2.5(wt)-.25 G(he citation information changes within each TLG block.) +-2.5 E F2(./tlgu -S TLG1799.TXT tlg1799u.txt | sort > symbols1799.txt) +108 288 Q F0 .521(Check out the symbols used in a w)144 300 R 3.021 +(ork. Book)-.1 F .521(and x, y)3.021 F 3.02(,zr)-.65 G .52 +(eferences are printed on a separate line)-3.02 F .19(for each symbol. \ +Sort / grep the output to locate speci\214c symbols of interest; sa)144 +312 R .491 -.15(ve i)-.2 H 2.691(na\214).15 G .191(le for later)-2.691 F +(use.)144 324 Q F2(./tlgu -W TLG0006.TXT tlg0006u)108 340.8 Q F0 -.4(Wi) +144 352.8 S(ll produce separate \214les for each w).4 E +(ork, named tlg006u-001.txt etc.)-.1 E F1(POST)72 381.6 Q(-PR)-1.007 E +(OCESSING EXAMPLES)-.329 E F0 2.939(Iu)108 393.6 S .439(se the OpenOf) +-2.939 F .439(\214ce suite for most of my w)-.25 F 2.938(ork. This)-.1 F +-.15(ex)2.938 G .438(ample sho).15 F .438(ws one of man)-.25 F 2.938(yp) +-.15 G .438(ossible w)-2.938 F .438(ays of using)-.1 F +(the search and replace f)108 405.6 Q(acility to create a readable v)-.1 +E(ersion of the Suda le)-.15 E(xicon.)-.15 E F2 +(./tlgu -B TLG4085.TXT tlg4085u.txt)108 422.4 Q F0 2.5(AU)144 434.4 S +(nicode \214le with the te)-2.5 E(xt is created)-.15 E F2 +(Open the generated \214le with OOo:)108 451.2 Q F0 +(File | Open | Filename: tlg4085u.txt, File T)144 463.2 Q(ype: T)-.8 E +-.15(ex)-.7 G 2.5(tE).15 G(ncoded \255\255 Press Open)-2.5 E .274 +(The ASCII Filter Options windo)144 487.2 R 2.774(wa)-.25 G .274 +(ppears. Select the Unicode \(UTF-8\) character set and a proper)-2.774 +F(Unicode font installed in your machine \(e.g. Cardo\).)144 499.2 Q +(Press OK.)5 E F2(Replace angle brack)108 516 Q(ets with expanded text) +-.1 E F0(Le)144 528 Q 2.545(xicon terms are enclosed in <angle brack) +-.15 F 5.044(ets>. The)-.1 F 2.544 +(actual beta codes indicate the use of)5.044 F -.15(ex)144 540 S .158 +(panded te).15 F .158(xt for emphasis.)-.15 F .159 +(Select Edit | Find & Replace.)5.158 F(The)5.159 E F2 .159 +(Find & Replace)2.659 F F0(windo)2.659 E 2.659(wa)-.25 G(ppears.)-2.659 +E .468(In the)144 564 R F2(Sear)2.968 E .468(ch F)-.18 F(or)-.25 E F0 +.468(\214eld, type the follo)2.968 F .468(wing e)-.25 F(xpression:)-.15 +E F2(<[^<>]*>)2.968 E F0 .468(This means "\214nd an)2.968 F 2.967(yc) +-.15 G(haracters)-2.967 E(between angle brack)144 576 Q +(ets, not including angle brack)-.1 E(ets".)-.1 E .768(In the)144 600 R +F2 .768(Replace W)3.268 F(ith)-.18 E F0(windo)3.269 E 3.269(wi)-.25 G +.769(nsert a single ampersand:)-3.269 F F2(&)3.269 E F0 .769 +(This means that we need to)3.269 F F2(add)3.269 E F0(for)3.269 E(-)-.2 +E .403(matting information \(this case\) or additional te)144 612 R .403 +(xt to the te)-.15 F .403(xt found.)-.15 F(Press)5.402 E F2 -.25(Fo) +2.902 G(rmat...).25 E F0 .402(and select the)2.902 F F2 -.2(Po)144 624 S +(sition).2 E F0(tab; select Spacing Expanded by 2.0 points.)2.5 E +(Press OK.)5 E(Check the)144 648 Q F2(Regular Expr)2.5 E(essions)-.18 E +F0(box and press)2.5 E F2(Replace All)2.5 E F0(.)A -1.1(Yo)144 672 S 2.5 +(um)1.1 G(ay no)-2.5 E 2.5(wr)-.25 G(eplace the angle brack)-2.5 E +(ets with nothings.)-.1 E(Repeat the abo)144 696 Q .3 -.15(ve p)-.15 H +(rocedure for titles enclosed in {braces}.).15 E(Write a macro...)5 E +-1.11(Ve)72 768 S(rsion 1.1)1.11 E(Feb, 2005)168.45 E(2)209 E EP +%%Page: 3 3 +%%BeginPageSetup +BP +%%EndPageSetup +/F0 10/Times-Roman@0 SF 150.26(tlgu\(1\) TLG)72 48 R(to Unicode Con)2.5 +E -.15(ve)-.4 G 150.26(rter tlgu\(1\)).15 F/F1 10/Times-Bold@0 SF +(Other useful inf)108 84 Q(ormation)-.25 E F0 .759(In the "Ex)144 96 R +.759(ecute" tab of the "Properties" windo)-.15 F 3.259(wo)-.25 G 3.259 +(fm)-3.259 G 3.259(yK)-3.259 G .76(Desktop Link to Application I ha) +-3.259 F 1.06 -.15(ve t)-.2 H(he).15 E(follo)144 108 Q +(wing command \(single line\):)-.25 E F1(LC_CTYPE=el_GR.UTF-8 /wher)144 +120 Q(eitsat/OpenOf\214ce.or)-.18 E(g1.1.x/sof\214ce)-.1 E F0 .278 +(The pre\214x, an en)144 132 R .278(vironment v)-.4 F .278 +(ariable, allo)-.25 F .277(ws you to use the same program with dif)-.25 +F .277(ferent locales; in)-.25 F(this case, hellenic Unicode \(UTF-8\).) +144 144 Q 2.5(Ip)144 168 S(ut my def)-2.5 E(ault locale and k)-.1 E -.15 +(ey)-.1 G(board de\214nitions in my).15 E F1(.pr)2.5 E(o\214le)-.18 E F0 +(:)A F1(export LC_CTYPE=el_GR.UTF-8)144 180 Q +(setxkbmap us+el polytonic -option gr)144 192 Q(p:ctrl_shift_toggle)-.1 +E F0(This w)144 216 Q(ay multi-lingual te)-.1 E(xt can be entered;)-.15 +E -.1(ke)5 G(yboard layout switching is done by pressing)-.05 E +(Ctrl/Shift.)144 228 Q/F2 10.95/Times-Bold@0 SF(REFERENCES)72 244.8 Q F0 +(There are se)108 256.8 Q -.15(ve)-.25 G(ral te).15 E +(xts describing the internal representation of)-.15 E F1(PHI)2.5 E F0 +(and)2.5 E F1(TLG)2.5 E F0(te)2.5 E(xt, ID data, citation data)-.15 E +(and inde)108 268.8 Q 2.5<788c>-.15 G 2.5(les. The)-2.5 F +(originator of this format is the P)2.5 E(ackard Humanities Institute.) +-.15 E(The TLG is maintained)5 E(by UCI \255 see)108 280.8 Q F1(www)2.5 +E(.tlg)-.7 E(.uci.edu)-.15 E F0 2.5<ad77>2.5 G(here you may \214nd the) +-2.5 E F1(TLG Beta Code Manual)2.5 E F0(and the)2.5 E F1(TLG Beta)2.5 E +(Code Quick Refer)108 292.8 Q(ence Guide)-.18 E F0(.)A(Unicode consorti\ +um publications pertaining to the codi\214cation of characters used in \ +Hellenic literature, sci-)108 316.8 Q(enti\214c and musical te)108 328.8 +Q(xts.)-.15 E(The OpenOf)108 352.8 Q(\214ce suite \()-.25 E F1(www)A +(.openof\214ce.or)-.7 E(g)-.1 E F0 2.5(\)i)C(ncludes a w)-2.5 E +(ord processor that you can use to load, process)-.1 E(and create ne)108 +364.8 Q 2.5(wp)-.25 G(olytonic te)-2.5 E(xts.)-.15 E F2(COPYRIGHT)72 +393.6 Q F0(Cop)108 405.6 Q +(yright \(C\) 2004, 2005 Dimitri Marinakis \(dm ssa gr\).)-.1 E +(This program is free softw)108 429.6 Q(are; you can redistrib)-.1 E +(ute it and/or modify it under the terms of the GNU General)-.2 E +(Public License \(v)108 441.6 Q +(ersion 2\) as published by the Free Softw)-.15 E(are F)-.1 E +(oundation.)-.15 E(This program is distrib)108 465.6 Q +(uted in the hope that it will be useful, b)-.2 E(ut WITHOUT ANY W)-.2 E +(ARRANTY)-1.2 E 2.5(;w)-.92 G(ithout)-2.5 E -2.15 -.25(ev e)108 477.6 T +2.5(nt).25 G(he implied w)-2.5 E(arranty of MERCHANT)-.1 E +(ABILITY or FITNESS FOR A P)-.93 E(AR)-.92 E(TICULAR PURPOSE.)-.6 E(See) +5 E(the GNU General Public License for more details.)108 489.6 Q -1.1 +(Yo)108 513.6 S 2.5(us)1.1 G(hould ha)-2.5 E .3 -.15(ve r)-.2 H(ecei).15 +E -.15(ve)-.25 G 2.5(dac).15 G(op)-2.5 E 2.5(yo)-.1 G 2.5(ft)-2.5 G +(he GNU General Public License along with this program; if not, write) +-2.5 E(to the Free Softw)108 525.6 Q(are F)-.1 E(oundation, Inc., 59 T) +-.15 E(emple Place, Suite 330, Boston, MA)-.7 E 2.5(02111-1307 USA)5 F +-1.11(Ve)72 768 S(rsion 1.1)1.11 E(Feb, 2005)168.45 E(3)209 E EP +%%Trailer +end +%%EOF |