summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTroy A. Griffitts <scribe@crosswire.org>2005-11-23 18:23:56 +0000
committerTroy A. Griffitts <scribe@crosswire.org>2005-11-23 18:23:56 +0000
commitbe8bc1114804330e32b991cdf13a1ce25a0f0067 (patch)
tree70b5f367b92d0c11cda78fc05369e8efa2d837cb
parent89e2be0015cae6a94c55c511ddc9863e16ae1314 (diff)
downloadsword-tools-be8bc1114804330e32b991cdf13a1ce25a0f0067.tar.gz
Added TLGU conversion stuff
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@53 07627401-56e2-0310-80f4-f8cd0041bdcd
-rw-r--r--modules/lxxm/lxxm.jarbin26328 -> 1440 bytes
-rw-r--r--modules/lxxm/lxxm.jpx.local21
-rw-r--r--tlgu/Makefile5
-rw-r--r--tlgu/README1
-rwxr-xr-xtlgu/convert.sh13
-rw-r--r--tlgu/tlgcodes.h322
-rw-r--r--tlgu/tlgu.1192
-rw-r--r--tlgu/tlgu.c1344
-rw-r--r--tlgu/tlgu.h221
-rw-r--r--tlgu/tlgu.html565
-rw-r--r--tlgu/tlgu.pdfbin0 -> 18051 bytes
-rw-r--r--tlgu/tlgu.ps421
12 files changed, 3096 insertions, 9 deletions
diff --git a/modules/lxxm/lxxm.jar b/modules/lxxm/lxxm.jar
index ebbad2b..4ac670b 100644
--- a/modules/lxxm/lxxm.jar
+++ b/modules/lxxm/lxxm.jar
Binary files differ
diff --git a/modules/lxxm/lxxm.jpx.local b/modules/lxxm/lxxm.jpx.local
index bbf4d02..96b1b1d 100644
--- a/modules/lxxm/lxxm.jpx.local
+++ b/modules/lxxm/lxxm.jpx.local
@@ -1,20 +1,23 @@
build.menu.1[0]=com.borland.jbuilder.build.ProjectBuilder$ProjectBuildAction;make
build.menu.2[0]=com.borland.jbuilder.build.ProjectBuilder$ProjectBuildAction;rebuild
-content.layout.xml.[0]=<project-layout><tab_container selected_index="0"><tab node_name="src/lxxm/LXXMConv.java"/><tab node_name="[/space/usr/local/jdk1.5.0_04/src.zip]/java/lang/String.java"/></tab_container></project-layout>
+content.layout.xml.[0]=<project-layout><tab_container selected_index="0"><tab node_name="src/lxxm/LXXMConv.java"/></tab_container></project-layout>
debug.NoTracingClasses.1[0]=16 java.lang.Object1 1 -1
debug.NoTracingClasses.2[0]=21 java.lang.ClassLoader1 1 -1
debug.NoTracingClasses.3[0]=16 java.lang.String1 1 -1
debug.SplitThreadsAndDataView[0]=0
-history.files.active[0]=src/lxxm/LXXMConv.java,F;19,2247
-history.files.open.1[0]=src/lxxm/LXXMConv.java,F;19,2247
-history.files.open.2[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/java/lang/String.java,F;1743,69911
+history.files.active[0]=src/lxxm/LXXMConv.java,F;14,289
+history.files.open.1[0]=src/lxxm/LXXMConv.java,F;14,289
import.optimize.legacyPropertiesRead[0]=1
packagebrowser.lastClassName[0]=lxxm.LXXMConv
sys.Author[0]=
sys.DefaultPackage[0]=lxxm
-sys.ReopenHistory.1[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/java/io/BufferedReader.java
-sys.ReopenHistory.2[0]=src/greekconverter/BetacodeToUnicode.java
-sys.ReopenHistory.3[0]=classes/greekconverter/UC.class
-sys.ReopenHistory.4[0]=src/lxxm/Untitled1.java
-team.lastScanned[0]=1128944865968
+sys.ReopenHistory.1[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/javax/xml/xpath/XPath.java
+sys.ReopenHistory.2[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/javax/xml/xpath/XPathFactory.java
+sys.ReopenHistory.3[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/javax/xml/xpath/XPathExpression.java
+sys.ReopenHistory.4[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/java/lang/String.java
+sys.ReopenHistory.5[0]=[/space/usr/local/jdk1.5.0_04/src.zip]/java/io/BufferedReader.java
+sys.ReopenHistory.6[0]=src/greekconverter/BetacodeToUnicode.java
+sys.ReopenHistory.7[0]=classes/greekconverter/UC.class
+sys.ReopenHistory.8[0]=src/lxxm/Untitled1.java
+team.lastScanned[0]=1132281572356
wizard.package.package_history.1[0]=lxxm
diff --git a/tlgu/Makefile b/tlgu/Makefile
new file mode 100644
index 0000000..f0cdbb4
--- /dev/null
+++ b/tlgu/Makefile
@@ -0,0 +1,5 @@
+tlgu: tlgcodes.h tlgu.c tlgu.h
+ $(CC) -o tlgu tlgu.c
+
+clean:
+ rm -rf *.o tlgu
diff --git a/tlgu/README b/tlgu/README
new file mode 100644
index 0000000..dbef3a1
--- /dev/null
+++ b/tlgu/README
@@ -0,0 +1 @@
+man -l tlgu.1 \ No newline at end of file
diff --git a/tlgu/convert.sh b/tlgu/convert.sh
new file mode 100755
index 0000000..053bf9d
--- /dev/null
+++ b/tlgu/convert.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+#
+# usage example: ./convert.sh /mnt/cdrom/PHI7/ddp
+#
+#./tlgu -r -Z Z:%Z/y:%y/z:%z/a%a/b%b/c%c/d%d/n%n/x%x/D%D/L%L/R%R/T%T/ $i tmp.out
+#
+
+echo "" > all.out
+for i in $1[0-8]*.txt
+do
+./tlgu -e 0 -Z \$\$\$/%d/%c/%Z/%y/%z\\n $i tmp.out
+cat tmp.out >> all.out
+done
diff --git a/tlgu/tlgcodes.h b/tlgu/tlgcodes.h
new file mode 100644
index 0000000..4586048
--- /dev/null
+++ b/tlgu/tlgcodes.h
@@ -0,0 +1,322 @@
+/* tlgcodes.h
+ *
+ * Copyright (C) 2004 Dimitri Marinakis
+ *
+ * Licensed under the terms of the GNU General Public License.
+ * ABSOLUTELY NO WARRANTY.
+ * See the file `COPYING' in this directory.
+ *
+ * Code arrays for escape sequences
+ * See: handle_escape_codes
+ *
+ */
+
+/* Punctuation codes (%) 0 - 170 */
+/*FIXME: check or find symbols for
+ %29, %31, %47-%49, (%50 - %81), %98, %105, %110, <%128>, %138, %140, %144,
+ %145, %157, %171
+*/
+#define MAX_PUNCTUATION 180
+unsigned int punctuation[] = {
+ 0x2020, 0x003f, 0x002a, 0x002f, 0x0021, 0x007c, 0x003d, 0x002b, 0x0025, 0x0026,
+ 0x003a, 0x00b7, 0x203b, 0x2021, 0x00a7, 0x02c8, 0x00a6, 0x2016, 0x0027, 0x002d,
+ 0x0301, 0x0300, 0x0302, 0x0308, 0x0303, 0x0327, 0x0304, 0x0306, 0x0308, 0x0324,
+ 0x1fbd, 0x1fbf, 0x1ffd, 0x1fef, 0x1fc0, 0x1fce, 0x1fde, 0x1fdd, 0x1fdf, 0x00a8,
+ 0x02d8, 0x2013, 0x2696, 0x00d7, 0x2693, 0x2694, 0x2695, 0x003d, 0x0025, 0x0025,
+ 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025,
+ 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025,
+ 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025,
+ 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025,
+ 0x0025, 0x0485, 0x0486, 0x1dc0, 0x0307, 0x1dc1, 0x035c, 0x0308, 0x00bb, 0x0025,
+ 0x003b, 0x0023, 0x2018, 0x005c, 0x005e, 0x2016, 0x224c, 0x007e, 0x00b1, 0x00b7,
+ 0x25cb, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025,
+ 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x032f, 0x0302, 0x2020,
+ 0x0307, 0x0025, 0x1fee, 0x1fcd, 0x1fcf, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025,
+ 0x2261, 0x2697, 0x2510, 0x0025, 0x0025, 0x0025, 0x00b7, 0x030a, 0x030c, 0x0328,
+ 0x007c, 0x002d, 0x2219, 0x002d, 0x2234, 0x2235, 0x0025, 0x0025, 0x2042, 0x00d7,
+ 0x002d, 0x00f7, 0x0338, 0x00b6, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025, 0x0025,
+ 0x0359, 0x002f, 0x1fbd, 0x1ffe, 0x1ffd, 0x1fef, 0x1fc0, 0x0313, 0x0314, 0x0323,
+ };
+
+/* Text Symbols (#) 0 - 1528
+ * May be preceded by upper case (*) this table contains only upper case characters
+ */
+/* FIXME: #6, #8, #19, #20, #24, #25, #27, #30, #31, #53, #54, #56, #59,
+ #61, #62, #64, #66, #68, #74, #87, #101-#134, #136-#150, #152-#199
+ #240-end */
+#define MAX_TEXT_SYMBOLS 1529
+unsigned int text_symbols[] = {
+ 0x0374, 0x03de, 0x03da, 0x03d8, 0x03d8, 0x03e0, 0x005f, 0x0023, 0x005f, 0x0301,
+ 0x03fd, 0x03ff, 0x2014, 0x203b, 0x2e16, 0x003e, 0x03f9, 0x002f, 0x003c, 0x0300,
+ 0x2220, 0x0053, 0x0375, 0x0039, 0x0023, 0x0282, 0x2e0f, 0x221a, 0x0023, 0x00b7,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0387, 0x003a, 0x22ee, 0x0023, 0x2059, 0x0023, 0x0023, 0x0023, 0x2283,
+ 0x0399, 0x0023, 0x0023, 0x0394, 0x0023, 0x0397, 0x0023, 0x03a7, 0x0023, 0x039c,
+ 0x002e, 0x0387, 0x02d9, 0x003a, 0x22ee, 0x002e, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0308, 0x0027, 0x1ffd, 0x1fef, 0x1fc0, 0x1ffe, 0x1fbd, 0x0023, 0x0023, 0x0023,
+ 0x2014, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x00f7, 0x22d6, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x2216, 0x2283, 0x0023, 0x223c, 0x0023, 0x0023, 0x2248,
+ 0x0023, 0x0023, 0x0023, 0x223d, 0x0023, 0x0023, 0x0023, 0x0023, 0x03fc, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x02d9, 0x222b, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x221e, 0x2014, 0x0023, 0x0023, 0x0023, 0x0023, 0x2310, 0x0023, 0x0023, 0x0023,
+ 0x0043, 0x2282, 0x20de, 0x0375, 0x0023, 0x05d0, 0x0023, 0x0023, 0x0023, 0x2309,
+ 0x0023, 0x2229, 0x0023, 0x2282, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x2643, 0x25a1, 0x264f, 0x264d, 0x2640, 0x2650, 0x2644, 0x2609, 0x263f, 0x263e,
+ 0x2642, 0x2651, 0x264c, 0x2648, 0x264e, 0x264a, 0x264b, 0x2653, 0x2652, 0x2649,
+ 0x260d, 0x263d, 0x260c, 0x2605, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x260b, 0x2651, 0x0023, 0x264c, 0x264e, 0x2126, 0x2127, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x2321, 0x0023, 0x2e0e, 0x003e, 0x2e0e, 0x2e0e, 0x003d, 0x2e0e, 0x0023, 0x2e0e,
+ 0x2e0e, 0x2e0e, 0x2e0e, 0x2e0e, 0x2e0e, 0x2251, 0x0023, 0x0023, 0x0023, 0x2022,
+ 0x2629, 0x2629, 0x2627, 0x003e, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0283, 0x2e10, 0x2e11, 0x2e10, 0x2e11, 0x01a7, 0x2e0f, 0x2573, 0x00b7,
+ 0x2014, 0x007c, 0x2627, 0x0023, 0x0023, 0x2627, 0x2138, 0x2192, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0283, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x0023, 0x0023, 0x2609, 0x0023, 0x2e0e, 0x2e0f, 0x2e14, 0x2e15, 0x203b, 0x0023,
+ 0x0023, 0x0023, 0x03fd, 0x0023, 0x0023, 0x0023, 0x0023, 0x2241, 0x0023, 0x2191,
+ 0x2629, 0x0023, 0x0023, 0x2e13, 0x2297, 0x271b, 0x2190, 0x02c6, 0x0023, 0x0023,
+ 0x0023, 0x035c, 0x2e12, 0x03da, 0x0311, 0x0023, 0x0023, 0x0023, 0x01b7, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x2263,
+ 0x2237, 0x25cc, 0x05e6, 0x05d1, 0x22bb, 0x2208, 0x2629, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x2191, 0x0305, 0x2319, 0x2423, 0x0023, 0x0023, 0x03dc, 0x0023, 0x0023,
+ 0x03f9, 0x0023, 0x22a2, 0x0023, 0x0393, 0x2309, 0x0023, 0x03a6, 0x03a1, 0x039c,
+ 0x0399, 0x0398, 0x2228, 0x039d, 0x2127, 0x0396, 0x0023, 0x0395, 0x2210, 0x0023,
+ 0x0023, 0x22cf, 0x0023, 0x039b, 0x22b8, 0x0036, 0x0039, 0x230b, 0x0394, 0x2207,
+
+ 0x2203, 0x0023, 0x0023, 0x03a0, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x03a9, 0x0023, 0x03bb,
+ 0x0023, 0x22a3, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x2200, 0x039f, 0x039e,
+ 0x0394, 0x0399, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x2441, 0x03a9, 0x0397, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x03a7, 0x03a4, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0391, 0x0392, 0x03a5, 0x03a8, 0x2044, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x22cf, 0x22d4, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x22b1, 0x22b0,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x223b,
+ 0x0023, 0x2201, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, 0x0669,
+ 0x0660, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x2733, 0x2282, 0x2283, 0x03a7, 0x002f, 0x22ba, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x223d, 0x0023, 0x0023, 0x0023, 0x0023, 0x223b, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x003c, 0x0023, 0x0023,
+ 0x2116, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x003c, 0x0282, 0x00f7, 0x005c, 0x0023, 0x0023, 0x0023, 0x0023,
+ 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023, 0x0023
+ };
+
+/* Quotation Marks (") 0 - 60 */
+/* FIXME: check pairs, find symbols for "50-"69 */
+#define MAX_QUOTATION 61
+unsigned int quotation_open[MAX_QUOTATION];
+unsigned int quotation_open_symbol[] = {
+ 0x201c, 0x201e, 0x201e, 0x2018, 0x201a, 0x201a, 0x00ab, 0x3008, 0x201c, 0x0022,
+ 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022,
+ 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022,
+ 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022,
+ 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022,
+ 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022,
+ 0x0022
+ };
+unsigned int quotation_close_symbol[] = {
+ 0x201d, 0x201d, 0x201d, 0x2019, 0x2019, 0x2019, 0x00bb, 0x3009, 0x201e, 0x0022,
+ 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022,
+ 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022,
+ 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022,
+ 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022,
+ 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022, 0x0022,
+ 0x0022
+ };
+
+
+/* Brackets ([) 0 - 69 */
+/* FIXME: check pairs, find symbols for [9, [10, [13, [14, [15, [33-[69 */
+#define MAX_BRACKET 70
+unsigned int bracket_open[MAX_BRACKET];
+unsigned int bracket_open_symbol[] = {
+ 0x005b, 0x0028, 0x003c, 0x007b, 0x301a, 0x230a, 0x2308, 0x2308, 0x300c, 0x005b,
+ 0x005b, 0xfe59, 0x2192, 0x005b, 0x005b, 0x005b, 0x3016, 0x300e, 0x300a, 0x005b,
+ 0x23a7, 0x239f, 0x23a8, 0x23a9, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b,
+ 0x239b, 0x239c, 0x239d, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b,
+ 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b,
+ 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b,
+ 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b, 0x005b,
+ };
+unsigned int bracket_close_symbol[] = {
+ 0x005d, 0x0029, 0x003e, 0x007d, 0x301b, 0x230b, 0x2309, 0x230b, 0x300d, 0x005d,
+ 0x005d, 0xfe5a, 0x2190, 0x005d, 0x005d, 0x005d, 0x3017, 0x300f, 0x300b, 0x005d,
+ 0x23ab, 0x23aa, 0x23ac, 0x23ad, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d,
+ 0x239e, 0x239f, 0x23a0, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d,
+ 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d,
+ 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d,
+ 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d,
+ };
+
+/* Quasi-Brackets (<) 0 - 100 */
+/* FIXME: decide on handling: for each letter, appropriate superscripts and subscripts should
+ * be combined with every letter
+ */
+#define MAX_QUASI_BRACKET 101
+unsigned int quasi_bracket_open[MAX_QUASI_BRACKET];
+unsigned int quasi_bracket_open_symbol[] = {
+ 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c,
+ 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c,
+ 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c,
+ 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c,
+ 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c,
+ 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c,
+ 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c,
+ 0x25ba, 0x0028, 0x0028, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c,
+ 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c,
+ 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c, 0x003c,
+ 0x003c,
+ };
+unsigned int quasi_bracket_close_symbol[] = {
+ 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e,
+ 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e,
+ 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e,
+ 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e,
+ 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e,
+ 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e,
+ 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e,
+ 0x25c4, 0x0029, 0x0029, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e,
+ 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e,
+ 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e, 0x003e,
+ 0x003e,
+ };
+
+/* Non-Text ({) 0 - 71 */
+/*FIXME: decide on representation */
+#define MAX_NON_TEXT 72
+unsigned int non_text_open[MAX_NON_TEXT];
+unsigned int non_text_open_symbol[] = {
+ 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b,
+ 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b,
+ 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b,
+ 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b,
+ 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b,
+ 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b,
+ 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b,
+ 0x007b, 0x007b,
+ };
+unsigned int non_text_close_symbol[] = {
+ 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d,
+ 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d,
+ 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d,
+ 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d,
+ 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d,
+ 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d,
+ 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d, 0x007d,
+ 0x007d, 0x007d,
+ };
diff --git a/tlgu/tlgu.1 b/tlgu/tlgu.1
new file mode 100644
index 0000000..d3cc149
--- /dev/null
+++ b/tlgu/tlgu.1
@@ -0,0 +1,192 @@
+.TH tlgu 1 "Feb, 2005" "Version 1.1" "TLG to Unicode Converter"
+.SH NAME
+
+tlgu \- convert TLG (D) CD-ROM txt files to Unicode
+
+.SH SYNOPSIS
+.B tlgu
+[
+.I options
+]
+.I input_file
+.I output_file
+
+.SH DESCRIPTION
+.B tlgu
+will convert an \fIinput_file\fP from Thesaurus Linguae Graeca (TLG) representation
+to a Unicode (UTF-8) \fIoutput_file\fP. The TLG representation consists of \fBbeta-code\fP
+text and \fBcitation\fP information.
+
+.SH OPTIONS
+.TP
+.B \-b
+inserts a form feed and citation information (levels a, b, c, d) on every "book" citation
+change. By default the program will output line feeds only (see also \fB\-p\fP).
+.TP
+.B \-p
+observes paging instructions.
+By default the program will output line feeds only.
+.TP
+.B \-r
+primarily Roman text. Some TLG texts, notably doccan1.txt and doccan2.txt are mainly
+roman texts lacking explicit language change codes. Setting this option will force
+a change to roman text after each citation block is encountered.
+.TP
+.B \-v
+highest-level reference citation is included before each text line (v-level)
+.TP
+.B \-w
+reference citation is included before each text line (w-level)
+.TP
+.B \-x
+reference citation is included before each text line (x-level)
+.TP
+.B \-y
+reference citation is included before each text line (y-level)
+.TP
+.B \-z
+lowest-level reference citation is included before each text line (z-level).
+.sp 1
+.TP
+.B \-B
+inserts blank space (a tab) before each and every line.
+.TP
+.B \-C
+citation debug information is output.
+.TP
+.B \-S
+special code debug information is output.
+.TP
+.B \-V
+block processing information is output (verbose).
+.TP
+.B \-W
+each work (book) is output as a separate file in the form output_file-xxx.txt
+
+.SH HISTORY AND INTENDED USE
+The purpose of \fBtlgu\fP is to translate binary TLG-format files into readable and editable text.
+It is based on an earlier program written in 80x86 assembly language (1996) outputting codes for
+a home-made font which used the prevalent hellenic font encodings of that time complemented
+by dead accent characters - not very attractive, but readable.
+.sp 1
+Then came Unicode and a plethora of accented character glyphs; nice-looking but
+with the well-known drawback that special processing is needed to do wild-card searches.
+Nice polytonic fonts have now been made available (Cardo, Gentium, Athena, Athenian,
+Porson) and, surely, these will be expanded as special-use code points are included
+in the Unicode definition (musical symbols, other special symbols) and more fonts will be created.
+.sp 1
+So, at this point in time, \fBtlgu\fP will crunch a file which has been formatted
+according to the published TLG-D format and produce codes for most glyphs
+generally available. No attempt has been made to introduce multi-character sequences
+or formatting codes (font changes). If a code has not been defined, the program will output
+the respective "code family" glyph. You may use the \fB\-S\fP option to check such codes
+against the published beta code definition.
+.sp 1
+You may not like the character output for a specific code. Check out the \fBtlgcodes.h\fP file
+containing the special symbol and punctuation codes and select one to suit you better. It will
+probably be a while before the beta to Unicode correspondence settles down.
+
+
+.SH EXAMPLES
+.B ./tlgu -r DOCCAN2.TXT doccanu.txt
+Translate the TLG canon to a unicode text file. Note the use of the \fB-r\fP option (this file
+expects Roman as the default font).
+.TP
+.B ./tlgu -x -y -z TLG1799.TXT tlg1799u.txt
+Generate a continuous file with the texts of granpa Euclides. Available citations (-x -y -z)
+are Book//demonstratio/line as shown in the respective "cit" field of doccan2.txt.
+.TP
+.B ./tlgu -b -B TLG1799.TXT tlg1799u.txt
+Generate the same texts, this time with a page feed and book citation information on the first
+page of each book and a tab before each line (use with OOo versions earlier than 1.1.4).
+.TP
+.B ./tlgu -C TLG1799.TXT tlg1799u.txt
+See how the citation information changes within each TLG block.
+.TP
+.B ./tlgu -S TLG1799.TXT tlg1799u.txt | sort > symbols1799.txt
+Check out the symbols used in a work. Book and x, y, z references are printed on a separate
+line for each symbol. Sort / grep the output to locate specific symbols of interest; save in
+a file for later use.
+.TP
+.B ./tlgu -W TLG0006.TXT tlg0006u
+Will produce separate files for each work, named tlg006u-001.txt etc.
+
+.SH POST-PROCESSING EXAMPLES
+I use the OpenOffice suite for most of my work. This example shows one of many possible
+ways of using the search and replace facility to create a readable version of the Suda lexicon.
+.TP
+.B ./tlgu -B TLG4085.TXT tlg4085u.txt
+A Unicode file with the text is created
+.TP
+.B Open the generated file with OOo:
+File | Open | Filename: tlg4085u.txt,
+File Type: Text Encoded \-\- Press Open
+.sp 1
+The ASCII Filter Options window appears. Select the Unicode (UTF-8) character set and
+a proper Unicode font installed in your machine (e.g. Cardo). Press OK.
+.TP
+.B Replace angle brackets with expanded text
+Lexicon terms are enclosed in <angle brackets>. The actual beta codes indicate the use of
+expanded text for emphasis. Select Edit | Find & Replace. The \fBFind & Replace\fP window appears.
+.sp 1
+In the \fBSearch For\fP field, type the following expression: \fB<[^<>]*>\fP
+This means "find any characters between angle brackets, not including angle brackets".
+.sp 1
+In the \fBReplace With\fP window insert a single ampersand: \fB&\fP
+This means that we need to \fBadd\fP formatting information (this case) or additional text to
+the text found. Press \fBFormat...\fP and select the \fBPosition\fP tab; select Spacing
+Expanded by 2.0 points. Press OK.
+.sp 1
+Check the \fBRegular Expressions\fP box and press \fBReplace All\fP.
+.sp 1
+You may now replace the angle brackets with nothings.
+.sp 1
+Repeat the above procedure for titles enclosed in {braces}. Write a macro...
+.TP
+.B Other useful information
+In the "Execute" tab of the "Properties" window of my KDesktop Link to Application
+I have the following command (single line):
+.br
+\fBLC_CTYPE=el_GR.UTF-8 /whereitsat/OpenOffice.org1.1.x/soffice\fP
+.br
+The prefix, an environment variable, allows you to use the same program with different locales;
+in this case, hellenic Unicode (UTF-8).
+.sp 1
+I put my default locale and keyboard definitions in my \fB.profile\fP:
+.br
+.na
+.B export LC_CTYPE=el_GR.UTF-8
+.br
+.na
+.B setxkbmap us+el polytonic -option grp:ctrl_shift_toggle
+.br
+.sp 1
+This way multi-lingual text can be entered; keyboard layout switching is done by pressing Ctrl/Shift.
+.SH REFERENCES
+There are several texts describing the internal representation of \fBPHI\fP and
+\fBTLG\fP text, ID data, citation data and index files. The originator of this
+format is the Packard Humanities Institute. The TLG is maintained by UCI \- see
+\fBwww.tlg.uci.edu\fP \- where you may find the \fBTLG Beta Code Manual\fP and the
+\fBTLG Beta Code Quick Reference Guide\fP.
+.sp 1
+Unicode consortium publications pertaining to the codification
+of characters used in Hellenic literature, scientific and musical texts.
+.sp 1
+The OpenOffice suite (\fBwww.openoffice.org\fP) includes a word processor that you
+can use to load, process and create new polytonic texts.
+
+.SH COPYRIGHT
+Copyright (C) 2004, 2005 Dimitri Marinakis (dm ssa gr).
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License (version 2) as published by
+the Free Software Foundation.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
diff --git a/tlgu/tlgu.c b/tlgu/tlgu.c
new file mode 100644
index 0000000..29abe5c
--- /dev/null
+++ b/tlgu/tlgu.c
@@ -0,0 +1,1344 @@
+/* tlgu: Translates TLG (D) text files to Unicode text
+ *
+ * Copyright (C) 2004, 2005 Dimitri Marinakis
+ *
+ * Licensed under the terms of the GNU General Public License.
+ * ABSOLUTELY NO WARRANTY.
+ * See the file `COPYING' in this directory.
+ *
+ * Usage:
+ * tlgu [options] infile outfile
+ *
+ * Options:
+ * -r -- primarily Roman text; default betastate = ROMAN, reset on every ID code
+ * -vwxyz -- reference citations are printed in the form xxx.xxx...xxx
+ * -(a)b(cd) -- description citations are printed
+ * -B -- output blank space (tab) after each new line (beginning of line)
+ * -p -- pagination is observed, otherwise book lines are printed continuously
+ * -C -- citation debug information is printed
+ * -S -- special code debug information is printed
+ * -T -- bracket debug information is printed
+ * -V -- processing debug information is printed
+ * -W -- multiple output files, one for each work
+ *
+ * Returns: exit code 1 if unsuccesful
+ *
+ * Make: gcc tlgu.c -o tlgu
+ *
+ * History: This is a re-write of a DOS program (tlgft.asm) written several
+ * years ago to translate Hellenic texts distributed on the TLG CD-ROM from
+ * "beta code" to something readable, editable and printable.
+ *
+ * Pointers / References:
+ * TLG Project - www.tlg.uci.edu
+ * PHI CD ROM Format Description, Packard Humanities Institute, 19 April 1992
+ * Beta code reference - Text versions: tlgbeta.txt or tlgcode.txt
+ * a .pdf version is also available.
+ * ID locator reference - Text version tlgcodes.txt
+ *
+ * dm: 14-Jun-2001 ELOT-928
+ * 14-Jun-2004 Unicode
+ * 26-Jun-2004 Command-line options
+ * 26-Feb-2005 Output file separation (-W option)
+ * 06-Mar-2005 Latin accent characters added (without parentheses)
+ * 21-Nov-2005 Added -Z -e and imported into sword-tools SVN repository
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include "tlgu.h"
+#include "tlgcodes.h"
+
+/****************** PROTOTYPES FROM THE TOP DOWN *******************/
+
+int tlgu (char * input_file, char * output_file);
+void output_utf(int ucode);
+void output_string(char *outstr);
+int process_beta (int input_count);
+void beta_code(int input_count);
+int id_code(int input_count);
+void store_accents(unsigned char bufferchar);
+const char *resolve_cite_format(const char *cformat);
+
+/****************** PROGRAM VERSION INFORMATION *******************/
+char *prog_version="1.2";
+
+/****************** COMMAND LINE OPTIONS **************************/
+int opt_roman = 0;
+int opt_page = 0;
+int opt_blank = 0;
+int opt_acit = 0;
+int opt_bcit = 0;
+int opt_ccit = 0;
+int opt_dcit = 0;
+int opt_cit_id = 0; /* combines a, b, c */
+int opt_vcit = 0;
+int opt_wcit = 0;
+int opt_xcit = 0;
+int opt_ycit = 0;
+int opt_cprefix = 0;
+char cformat[253];
+int opt_ecit_blank = 0;
+char ecite[253];
+int opt_zcit = 0;
+int opt_verbose = 0;
+int opt_debug_bracket = 0;
+int opt_debug_cit = 0;
+int opt_debug_special = 0;
+int opt_multiple = 0;
+
+/****************** GLOBAL VARIABLES *******************************/
+
+int iptr = 0; /* input buffer pointer, reset before every read */
+int optr = 0; /* output buffer pointer, reset after every write */
+unsigned char input_buffer[INRECSIZE];
+unsigned char output_buffer[OUTRECSIZE];
+#define MAXFILELEN 256
+
+/************ GLOBAL BETA CODE PROCESSING VARIABLES **************/
+
+unsigned int outcode;
+int betastate; /* translation state machine */
+int previous_state; /* needed for symbol translations */
+int start_new_line = 0; /* needed for symbol translations */
+int book_change = 0; /* needed for symbol translations */
+int accents; /* holds accent combinations */
+char *accented_chars = "AEHIOUWR";
+char *accent_chars = ")(+/\\=|";
+char *latin_accent_chars = "+/\\=|";
+char *escape_codes = "$&%\"@#^[]<>{}";
+char *punctuation_codes = " .,:;_\"%{}$&"; /* used by which_sigma */
+char previous_bcit[52][32]; /* holds previous work (book) citation */
+
+/****************** GLOBAL DESCRIPTOR VARIABLES *****************/
+
+/*
+ Space is reserved for descriptive data as follows:
+
+ citations, binary component -- z, y, x, w, v, n (1 to 16383)
+ citations, ascii component -- a-z (1 to 15 characters + null, only a-d, n, v-z are actually used)
+ descriptors, binary component -- a-z (1 to 16383)
+ descriptors, ascii component -- a-z (1 to 31 characters + null)
+
+ Citations ---
+ a - author citation
+ b - work citation
+ c - preferred abbreviation for the work
+ d - preferred abbreviation for the author
+
+ n - if present signifies a document within a work
+ when it changes, v-z are nulled but are then independent
+ if n is not present, a change in an upper level nulls out the rest
+
+ v-z hierarchical citation levels, high to low
+
+ v
+ w
+ x - (chapter)
+ y - (verse) (book)
+ z - line
+
+ Descriptions ---
+
+ z - comment sequence number within a work
+
+ In the common data structures below, citations will hold the first 26 positions (0-25)
+ while descriptors will hold the next 26 positions.
+*/
+int icitation[52];
+char citation[52][32];
+int id_level; /* holds translated current id level as an index to ID arrays */
+int id_char; /* holds the pointer for the ascii part of the ID arrays */
+int id_command; /* holds the current instruction for ID handling */
+int id_process; /* if non-zero, command must be processed */
+
+
+/****************** HANDLE ARGUMENTS AND SYNTAX *******************/
+
+void usage_info(void)
+{
+ printf("\ntlgu: TLG beta code file to Unicode translator ver. %s\n", prog_version);
+ printf("\ntlgu: Copyright (C) 2004, 2005 Dimitri Marinakis");
+ printf("\ntlgu: This program is free software; you are encouraged to redistribute it under");
+ printf("\ntlgu: the terms of the GNU General Public License.\n");
+ printf("\ntlgu: This program comes with ABSOLUTELY NO WARRANTY. See the GNU General Public");
+ printf("\ntlgu: License (e.g. in the file named `COPYING') for more details.\n");
+ printf("\ntlgu: Syntax: [-options...] tlgu beta_code_file text_file\n\n");
+ printf("tlgu: -r -- primarily Roman text; default betastate = ROMAN, reset on every ID code\n");
+ printf("tlgu: -v -w -x -y -z -- work reference citations are printed in the form xxx.xxx...xxx\n");
+ printf("tlgu: -Z <custom_cite_format_prefix> -- use special codes %%v %%w %%y %%z in string\n");
+ printf("tlgu: -e <custom_blank_cite_seg_string> -- e.g. \"[NONE]\" instead of default \"\"\n");
+ printf("tlgu: -b -- books are preceded by a page feed and description citations are printed\n");
+ printf("tlgu: -p -- pagination is observed, otherwise book lines are printed continuously\n");
+ printf("tlgu: -B -- output blank space (tab) at the beginning of each line\n");
+ printf("tlgu: -C -- citation debug information is printed\n");
+ printf("tlgu: -S -- special code debug information is printed\n");
+ printf("tlgu: -V -- processing debug information is printed\n");
+ printf("tlgu: -W -- multiple output files, one for each work (book)\n\n");
+}
+
+main(int argc, char * argv[])
+{
+ unsigned char ucc; /* test variable */
+ int idx;
+
+ if (sizeof(ucc) != 1) {
+ printf("\ntlgu: I need 8-bit characters to work\n");
+ exit(1);
+ }
+
+ if (argc < 3) {
+ usage_info();
+ exit(1);
+ }
+
+ --argc ;
+ ++argv ;
+
+ while(argc > 2 && argv[0][0] == '-') {
+ switch(argv[0][1]) {
+ case 'W':
+ opt_multiple =1;
+ break ;
+ case 'V':
+ opt_verbose =1;
+ break ;
+ case 'S':
+ opt_debug_special = 1;
+ break ;
+ case 'T':
+ opt_debug_bracket = 1;
+ break ;
+ case 'C':
+ opt_debug_cit = 1;
+ break ;
+ case 'B':
+ opt_blank = 1;
+ break ;
+ case 'p':
+ opt_page = 1;
+ break ;
+ case 'r':
+ opt_roman = 1;
+ break ;
+ case 'a':
+ opt_acit = 1;
+ opt_cit_id =1;
+ break ;
+ case 'b':
+ opt_bcit = 1;
+ opt_cit_id =1;
+ break ;
+ case 'c':
+ opt_ccit = 1;
+ opt_cit_id =1;
+ break ;
+ case 'd':
+ opt_dcit = 1;
+ opt_cit_id =1;
+ break ;
+ case 'v':
+ opt_vcit = 1;
+ break ;
+ case 'w':
+ opt_wcit = 1;
+ break ;
+ case 'x':
+ opt_xcit = 1;
+ break;
+ case 'y':
+ opt_ycit = 1;
+ break ;
+ case 'z':
+ opt_zcit = 1;
+ break;
+ case 'e':
+ opt_ecit_blank = 1;
+ strcpy(ecite, argv[1]);
+ argc-- ;
+ argv++ ;
+ break;
+ case 'Z':
+ opt_cprefix = 1;
+ strcpy(cformat, argv[1]);
+ argc-- ;
+ argv++ ;
+ break;
+ default:
+ usage_info() ;
+ exit(0) ;
+ }
+ argc-- ;
+ argv++ ;
+ }
+
+ return tlgu(argv[0], argv[1]);
+}
+
+
+/****************** FILE READ-WRITE LOOP **************************/
+
+int tlgu(char *input_file, char *output_file)
+{
+ int i; /* counter */
+ int j; /* counter */
+ int infile; /* input file descriptor */
+ int outfile;/* output file descriptor */
+
+ int icnt; /* input file bytes read in input buffer */
+ int ocnt; /* output file bytes written */
+ int bytes_to_process; /* bytes read minus bytes already processed */
+
+ int wehaveinput; /* flag for while */
+ int beta_return; /* process beta return code */
+
+ char new_file[256];
+ struct stat filestat;
+
+ /* Open input and output files
+ */
+ infile = open(input_file, O_RDONLY);
+ if (infile < 0) {
+ perror("tlgu input file open");
+ return(1);
+ } else {
+ if (strlen(output_file) < MAXFILELEN-5) {
+ strcpy(new_file, output_file);
+ } else {
+ printf("\ntlgu output filename too long - exiting\n");
+ return(1);
+ }
+ outfile = open(new_file, O_WRONLY | O_CREAT | O_TRUNC);
+ if (outfile < 0) {
+ perror("tlgu output file create");
+ close(infile);
+ return(1);
+ }
+ }
+
+ /* Initialize citation
+ * and descriptor indicators
+ */
+ for (i = 0; i < 52; i++) {
+ icitation[i] = 0;
+ for (j = 0; j < 32; j++) {
+ citation[i][j]=0;
+ }
+ }
+
+ /* Initialize beta processing defaults
+ * e.g. The TLG Canon needs ROMAN as default
+ * Hellenic should be reset at each ID CODE
+ */
+ if (opt_roman) betastate = ROMAN;
+ else betastate = HELLENIC;
+
+ /* Read, process and write file blocks,
+ * Optionally create one file per book (-W)
+ * Change file mode (equivalent to chmod 644 output_file),
+ * and return.
+ * Note: Local deblocking usually yields higher speeds
+ */
+ wehaveinput = 1;
+ while (wehaveinput) {
+ /* Read and process beta code in input_buffer */
+ icnt = read(infile, input_buffer, sizeof(input_buffer));
+ if (icnt == 0) wehaveinput = 0;
+
+ iptr = 0;
+ while ((icnt > 0) && (iptr < icnt)) {
+ bytes_to_process = icnt - iptr;
+ beta_return = process_beta(bytes_to_process);
+
+ /* Write processed data and reset output buffer pointer */
+ if (optr > 0) {
+ ocnt = write(outfile, output_buffer, optr);
+ optr = 0;
+ if (ocnt < 0) {
+ perror("tlgu output file write");
+ wehaveinput = 0;
+ }
+ } else if (beta_return != -2) { /* no more bytes to write, no book change request */
+ if (opt_verbose) printf("\ntlgu: no more bytes to write");
+ wehaveinput = 0; /* signal no more input */
+ }
+ if (beta_return == -2) {
+ /* book change request, close current file and open a new one */
+ if (opt_verbose) printf("\ntlgu: book change request: %s", previous_bcit[1]);
+ if (close(outfile)) return(1);
+ if (chmod(new_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) return(1);
+
+ /* request file information and delete zero-length files
+ */
+ stat(new_file, &filestat);
+ if (filestat.st_size == 0) unlink(new_file);
+
+ sprintf(new_file, "%s-%s.txt", output_file, previous_bcit[1]);
+ outfile = open(new_file, O_WRONLY | O_CREAT | O_TRUNC);
+ if (outfile < 0) {
+ perror("tlgu: new_file create");
+ close(infile);
+ return(1);
+ }
+
+
+ }
+ }
+ }
+
+ /* Close input and output files,
+ * make output file readable
+ */
+ close(infile);
+
+ if (close(outfile)) {
+ perror("tlgu output file close");
+ return(1);
+ }
+ if (chmod(new_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) {
+ perror("tlgu output file chmod");
+ return(1);
+ }
+ if (opt_verbose) printf("\ntlgu: processing complete\n");
+ return(0);
+}
+
+/****************** PROCESSING *************************************/
+
+/* process_beta:
+ * Processes <input_count> bytes in <input_buffer>
+ * Returns: -1 for EOF, -2 for book change
+ * Changes: iptr
+ */
+int process_beta (int input_count)
+{
+ unsigned char inchar;
+ unsigned int outcode;
+ int processing;
+ int iptr_max; /* holds the calculated maximum input pointer value */
+ int return_code; /* id_code and beta_code bytes written; error if negative */
+ char outstring[511];
+ char nstring[253];
+
+ return_code = 0;
+ /* A beta code stream includes two kinds of data:
+ * ID data - always has the high bit set.
+ * Text data - always has the high bit reset.
+ */
+ processing = 1;
+ iptr_max = iptr + input_count;
+ if (opt_verbose) printf("\n\ntlgu: process_beta - %d bytes, iptr = %4.4x, iptr_max = %4.4x", input_count, iptr, iptr_max);
+ while (processing) {
+ if ((iptr < INRECSIZE) && (iptr < iptr_max)) {
+ inchar = input_buffer[iptr++];
+ if (optr < OUTRECSIZE) {
+ if (inchar == 0) {
+ /* do nothing for null characters */
+ } else if (inchar > 0x7F) {
+ /* ID data - decrement input pointer before processing */
+ --iptr;
+
+ /* Reset beta decoding state if roman option specified */
+ if (opt_roman) betastate = ROMAN;
+
+ /* Process ID code */
+ return_code = id_code(input_count);
+ if (return_code == -1) {
+ if (opt_verbose) printf("\ntlgu: EOF while processing id code");
+ processing = 0;
+ } else if (return_code == -2) {
+ if (opt_verbose) printf("\ntlgu: book change request");
+ processing = 0;
+ }
+ start_new_line = 1;
+ } else {
+ /* text data < 0x80 - decrement input pointer before processing */
+ --iptr;
+ if (start_new_line) {
+ /* Write info on (book) citation change */
+ if (book_change) {
+ if (opt_cit_id) {
+ sprintf(outstring, "\n\f[%s] ", citation[0]);
+ output_string(outstring);
+ sprintf(outstring, "[%s] ", citation[1]);
+ output_string(outstring);
+ sprintf(outstring, "[%s] ", citation[2]);
+ output_string(outstring);
+ sprintf(outstring, "[%s]\n", citation[3]);
+ output_string(outstring);
+ }
+ book_change = 0;
+ }
+ sprintf(outstring, "\n");
+ if (opt_blank)
+ strcat(outstring, "\t");
+ else if (opt_cprefix) {
+ strcat(outstring, resolve_cite_format(cformat));
+ }
+ else if (opt_vcit || opt_wcit || opt_xcit || opt_ycit || opt_zcit) {
+ if (opt_vcit) {
+ if (icitation[21] == 0) sprintf(nstring, "%s.",citation[21]);
+ else sprintf(nstring, "%d%s.", icitation[21], citation[21]);
+ if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite);
+ strcat(outstring, nstring);
+ }
+ if (opt_wcit) {
+ if (icitation[22] == 0) sprintf(nstring, "%s.",citation[22]);
+ else sprintf(nstring, "%d%s.", icitation[22], citation[22]);
+ if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite);
+ strcat(outstring, nstring);
+ }
+ if (opt_xcit) {
+ if (icitation[23] == 0) sprintf(nstring, "%s.",citation[23]);
+ else sprintf(nstring, "%d%s.", icitation[23], citation[23]);
+ if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite);
+ strcat(outstring, nstring);
+ }
+ if (opt_ycit) {
+ if (icitation[24] == 0) sprintf(nstring, "%s.",citation[24]);
+ else sprintf(nstring, "%d%s.", icitation[24], citation[24]);
+ if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite);
+ strcat(outstring, nstring);
+ }
+ if (opt_zcit) {
+ if (icitation[25] == 0) sprintf(nstring, "%s.",citation[25]);
+ else sprintf(nstring, "%d%s", icitation[25], citation[25]);
+ if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite);
+ strcat(outstring, nstring);
+ }
+ /* Separate text from citation using a tab character */
+ strcat(outstring, "\t");
+ }
+ if (input_buffer[iptr] < 0x80) {
+ /* Print only if not followed by another ID byte */
+ output_string(outstring);
+ }
+ start_new_line = 0;
+ if (opt_roman) betastate = ROMAN;
+ else betastate = HELLENIC;
+ }
+ beta_code(input_count);
+ }
+ } else {
+ /* Output size is greater than input -- intermediate write */
+ printf("\ntlgu: FIXME -- DATA LOSS: ERROR output size iptr - %x optr - %x", iptr, optr);
+ processing = 0;
+ }
+ } else {
+ /* Finished processing all input */
+ processing = 0;
+ }
+ } /* end while processing*/
+
+ if (opt_verbose) printf("\ntlgu: iptr - %4.4x, optr - %4.4x ", iptr, optr);
+ return return_code;
+}
+/****************** LIBRARY FUNCTIONS ******************************/
+/* get_acents:
+ * gets accents in <accents>
+ * Returns: number of accents found or zero
+ * Changes: accents, iptr
+ */
+int get_accents(void)
+{
+ unsigned char bufferchar;
+ int processing = 1;
+ int number_of_accents = 0;
+
+ accents = 0;
+
+ while (processing) {
+ if (iptr < INRECSIZE) {
+ bufferchar = input_buffer[iptr++];
+ if (betastate == ROMAN) {
+ if (strchr(latin_accent_chars, bufferchar)) {
+ store_accents(bufferchar);
+ number_of_accents++;
+ } else {
+ --iptr;
+ processing = 0;
+ }
+ } else if (strchr(accent_chars, bufferchar)) {
+ store_accents(bufferchar);
+ number_of_accents++;
+ } else {
+ --iptr;
+ processing = 0;
+ }
+ } else {
+ processing = 0;
+ }
+ }
+ return number_of_accents;
+}
+
+/* store_accents:
+ * Stores accent character passed as a parameter to <accents>
+ * 0 00 00 --- 0 00 00 no accent
+ * | | |
+ * | | ---- 01 psili, 10 dasia, 11 dialytika
+ * | ------- 01 varia, 10 oxia, 11 perispomeni
+ * ----------- 1 ypogegrammeni
+ * Changes: accents
+ * Caveat: currently only ORs new accent... expects an all-zero accent variable
+ */
+void store_accents(unsigned char bufferchar)
+{
+ switch (bufferchar)
+ {
+ case ')':
+ accents = accents | 1;
+ break;
+ case '(':
+ accents = accents | 2;
+ break;
+ case '+':
+ accents = accents | 3;
+ break;
+ case '\\':
+ accents = accents | 4;
+ break;
+ case '/':
+ accents = accents | 8;
+ break;
+ case '=':
+ accents = accents | 0xc;
+ break;
+ case '|':
+ accents = accents | 0x10;
+ break;
+ default:
+ break;
+ }
+ accents &= 0x1f;
+}
+
+/* output_accents:
+ * Input: <accents>
+ * 0 00 00 --- 0 00 00 no accent
+ * | | |
+ * | | ---- 01 psili, 10 dasia, 11 dialytika
+ * | ------- 01 varia, 10 oxia, 11 perispomeni
+ * ----------- 1 ypogegrammeni
+ * Changes: optr (output_utf)
+ */
+void output_accents(void)
+{
+ int paccents;
+
+ paccents = accents & 3;
+ if (paccents == 1)
+ output_utf(PSILI);
+ else if (paccents == 2)
+ output_utf(DASIA);
+ else if (paccents == 3)
+ output_utf(DIALYTIKA);
+
+ paccents = (accents & 0xc) >> 2;
+ if (paccents == 1)
+ output_utf(VARIA);
+ else if (paccents == 2)
+ output_utf(OXIA);
+ else if (paccents == 3) {
+ if (betastate == ROMAN)
+ output_utf(CARET);
+ else
+ output_utf(PERISPOMENI);
+ }
+ paccents = accents & 0x10;
+ if (paccents)
+ output_utf(YPOGEGRAMMENI);
+}
+
+
+/* getnum:
+ * Collects a non-zero number from the current <input_buffer> position.
+ * Returns: an integer or zero if no number found, -1 on end of buffer
+ * Changes: iptr
+ */
+ int getnum(void)
+{
+ #define MAXNUMBERS 32
+ unsigned char bufferchar;
+ unsigned char modnumber[MAXNUMBERS]; /* symbol or font modifier number string */
+ int imodnumber = 0; /* index to modnumber */
+ int convnumber = 0; /* converted modnumber string */
+ int processing = 1;
+
+ modnumber[0] = 0;
+
+ while (processing) {
+ if ( (iptr < INRECSIZE) && (imodnumber < MAXNUMBERS) ) {
+ bufferchar = input_buffer[iptr++];
+ if (isdigit(bufferchar)) {
+ modnumber[imodnumber++] = bufferchar;
+ } else {
+ --iptr;
+ modnumber[imodnumber] = 0;
+ sscanf(modnumber, "%d", &convnumber);
+ processing = 0;
+ }
+ } else {
+ convnumber = -1;
+ processing = 0;
+ }
+ }
+ if (convnumber < 0) perror("did not complete number\n");
+ return convnumber;
+}
+
+/* output_utf:
+ * Converts the input code into a UTF-8 byte sequence in output_buffer
+ * Changes: optr, output_buffer
+ */
+void output_utf(int ucode)
+{
+ if ((optr+3) > OUTRECSIZE) {
+ perror("optr out of range");
+ } else if (ucode == 0){
+ /* do nothing */
+ } else if (ucode < 0x80) {
+ output_buffer[optr++] = ucode;
+ } else if (ucode < 0x800) {
+ output_buffer[optr++] = (ucode >> 6) | 0xc0;
+ output_buffer[optr++] = (ucode & 0x3f) | 0x80;
+ } else if (ucode <= 0xffff) {
+ output_buffer[optr++] = ((ucode & 0xf000) >> 12) | 0xe0;
+ output_buffer[optr++] = ((ucode & 0x0fc0) >> 6) | 0x80;
+ output_buffer[optr++] = (ucode & 0x3f) | 0x80;
+ } else {
+ /* higher unicodes are ignored */
+ }
+}
+
+/* output_string:
+ * Calls output_utf to write a string in <output_buffer>
+ * Returns: the number of bytes written
+ * Changes: optr, output_buffer
+ */
+void output_string(char *outstr)
+{
+ int nextchar;
+ int cnt;
+
+ for (cnt = 0; cnt < strlen(outstr); cnt++) {
+ output_utf(outstr[cnt]);
+ }
+}
+
+/* handle_escape_codes:
+ * Formatting and character output based on escape codes: $&%"@#^[]<>{}
+ * Input: escape code, optional number
+ * Changes: optr, output_buffer
+ */
+void handle_escape_codes(unsigned char beta, int number)
+{
+ int temp = 0;
+
+ switch (beta)
+ {
+ case '$':
+ betastate = HELLENIC;
+ accents = 0;
+ break;
+ case '&':
+ betastate = ROMAN;
+ accents = 0;
+ break;
+ case '%':
+ if (opt_debug_special) printf("%%%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]);
+ if (number < MAX_PUNCTUATION)
+ output_utf(punctuation[number]);
+ break;
+ case '\"':
+ if (opt_debug_special) printf("\"%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]);
+ if (number < MAX_QUOTATION) {
+ if (quotation_open[number]) {
+ output_utf(quotation_close_symbol[number]);
+ quotation_open[number] = 0;
+ } else {
+ output_utf(quotation_open_symbol[number]);
+ quotation_open[number] = 1;
+ }
+ }
+ break;
+ case '@':
+ /* FIXME: If citations are active, paging should be disabled */
+ if (opt_debug_special) printf("@%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]);
+ /* Page formats -- FIXME: incomplete */
+ if (number == 0) {
+ output_utf(0x20);
+ output_utf(0x20);
+ } else if (number == 1) {
+ if (opt_page) output_utf(0xc);
+//FIXME: reinstate else output_utf(0xa);
+ } //fixme: reinstate else output_utf(0xa);
+ break;
+ case '#':
+ if (opt_debug_special) printf("#%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]);
+ if (number < MAX_TEXT_SYMBOLS) {
+ output_utf(text_symbols[number]);
+ }
+ break;
+ case '^':
+ /* quarter-spaces: will output at least one space */
+ if (number > 0) temp = number / 4;
+ while (temp >= 0) {
+ output_utf(0x20);
+ temp--;
+ }
+ break;
+ case '[':
+ if (opt_debug_bracket) printf("[%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]);
+ if (number < MAX_BRACKET) {
+ output_utf(bracket_open_symbol[number]);
+ }
+ break;
+ case ']':
+ if (opt_debug_bracket) printf("]%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]);
+ if (number < MAX_BRACKET) {
+ output_utf(bracket_close_symbol[number]);
+ }
+ break;
+ case '<':
+ if (opt_debug_bracket) printf("<%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]);
+ if (number < MAX_QUASI_BRACKET) {
+ output_utf(quasi_bracket_open_symbol[number]);
+ }
+ break;
+ case '>':
+ if (opt_debug_bracket) printf(">%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]);
+ if (number < MAX_QUASI_BRACKET) {
+ output_utf(quasi_bracket_close_symbol[number]);
+ }
+ break;
+ case '{':
+ if (opt_debug_bracket) printf("{%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]);
+ if (number < MAX_NON_TEXT) {
+ output_utf(non_text_open_symbol[number]);
+
+ }
+ break;
+ case '}':
+ if (opt_debug_bracket) printf("{%d -- %s %d.%d.%d\n", number, citation[1], icitation[23], icitation[24], icitation[25]);
+ if (number < MAX_NON_TEXT) {
+ output_utf(non_text_close_symbol[number]);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+/* which_sigma:
+ * Tries to decide on which sigma form to use.
+ * Input: index of input_buffer (iptr) after the sigma
+ * Returns: output character code
+ */
+int which_sigma(int nextptr)
+{
+ int scanning;
+ int nextcode;
+ /* If the next character is a hyphen, it is a medial sigma
+ * Otherwise, a few characters are examined in the input buffer:
+ * if an alphabetic character is found before we hit a space, or
+ * other punctuation character, it is a medial sigma
+ * otherwise it is a final sigma (there is one exception in 4085 - POS(.))
+ */
+ if (input_buffer[iptr] == '-')
+ return(SIGMEDIAL);
+ else {
+ scanning = 10;
+ while(scanning) {
+ nextcode = input_buffer[nextptr++];
+ if (isalpha(nextcode))
+ return(SIGMEDIAL);
+ if (nextcode > 0x7f)
+ return(SIGFINAL);
+ if (strchr(punctuation_codes, nextcode))
+ return(SIGFINAL);
+ scanning--;
+ }
+ return(SIGMEDIAL);
+ }
+}
+
+/* beta_code:
+ * Processes <input_count> characters in <input_buffer> and
+ * writes processed output to output_buffer>
+ * Changes: optr, output_buffer
+ */
+void beta_code(int input_count)
+{
+ int processing;
+ int input_pointer_max;
+ unsigned char betachar;
+ unsigned int outputchar;
+ int tmp;
+
+ input_pointer_max = iptr + input_count;
+ processing = 1;
+
+ while (processing) {
+ if ( (iptr < INRECSIZE) && (iptr < input_pointer_max) ) {
+ betachar = input_buffer[iptr++];
+ if ((betachar > 0x7F)) {
+ /* ID data found - restore pointer and stop processing*/
+ --iptr;
+ processing = 0;
+ } else {
+ outputchar = 0;
+ if (strchr(escape_codes, betachar)) {
+ /* Handle escape codes */
+ handle_escape_codes(betachar, getnum());
+ } else if (betastate == HELLENIC && betachar == '*') {
+ /* Handle Hellenic uppercase character */
+ get_accents();
+ betachar = input_buffer[iptr++];
+ if (accents == 0) get_accents(); //FIXME: handle suffix accents differently
+ if (strchr(accented_chars, betachar)) {
+ switch (betachar) {
+ case 'A':
+ outputchar = Alpha[accents];
+ break;
+ case 'E':
+ outputchar = Epsilon[accents];
+ break;
+ case 'H':
+ outputchar = Eta[accents];
+ break;
+ case 'I':
+ outputchar = Iota[accents];
+ break;
+ case 'O':
+ outputchar = Omicron[accents];
+ break;
+ case 'U':
+ outputchar = Ypsilon[accents];
+ break;
+ case 'W':
+ outputchar = Omega[accents];
+ break;
+ case 'R':
+ outputchar = Rho[accents];
+ break;
+ default:
+ break;
+ }
+ } else if (betachar == 'S') {
+ tmp = getnum();
+ if (tmp == 3) outputchar = SIGLUNATEUPPER;
+ else outputchar = SIGMEDIALUPPER;
+ } else if (isalpha(betachar)) {
+ /* not an accented character */
+ outputchar = hellenic[betachar];
+ } else {
+ outputchar = hellenic[betachar - 0x20];
+ }
+ if (outputchar == 0) outputchar = hellenic[betachar]; /* error condition */
+ output_utf(outputchar);
+ } else if (betastate == HELLENIC && isalpha(betachar)) {
+ /* Handle hellenic lower case:
+ * Get default character and then try to pin accents
+ */
+ if (strchr(accented_chars, betachar)) {
+ get_accents();
+ switch (betachar) {
+ case 'A':
+ outputchar = alpha[accents];
+ break;
+ case 'E':
+ outputchar = epsilon[accents];
+ break;
+ case 'H':
+ outputchar = eta[accents];
+ break;
+ case 'I':
+ outputchar = iota[accents];
+ break;
+ case 'O':
+ outputchar = omicron[accents];
+ break;
+ case 'U':
+ outputchar = ypsilon[accents];
+ break;
+ case 'W':
+ outputchar = omega[accents];
+ break;
+ case 'R':
+ outputchar = rho[accents];
+ break;
+ default:
+ break;
+ }
+ } else if (betachar == 'S') {
+ tmp = getnum();
+ if (tmp == 1) outputchar = SIGMEDIAL;
+ else if (tmp == 2)outputchar = SIGFINAL;
+ else if (tmp == 3) outputchar = SIGLUNATE;
+ if (outputchar == 0) {
+ outputchar = which_sigma(iptr);
+ }
+ }
+
+ if (outputchar == 0) outputchar = hellenic[betachar - 0x20];
+ output_utf(outputchar);
+ } else if (betastate == ROMAN && isalpha(betachar)) {
+ /* Handle Roman characters */
+ //FIXME: need to process roman characters
+ if (isalpha(betachar)) get_accents();
+ outputchar = betachar;
+ output_utf(outputchar);
+ /* ROMAN uses combining accent forms */
+ output_accents();
+ } else {
+ //FIXME: placeholder
+ if (betachar != '`') outputchar = betachar;
+ output_utf(outputchar);
+ }
+ }
+ } else {
+ /* Requested number of characters have been processed
+ * or no more characters available in buffer
+ */
+ processing = 0;
+ }
+ }
+}
+
+
+const char *resolve_cite_format(const char *cformat) {
+ static char outbuf[511];
+ char nstring[253];
+ int z;
+ *outbuf = 0;
+ const char *c;
+ for (c = cformat; *c; c++) {
+ if (*c == '%') {
+ const char c2 = *(c+1);
+ signed char cstart = -1;
+ if ((c2 >= 'a') && (c2 <= 'z')) {
+ cstart = c2 - 'a';
+ }
+ else if ((c2 >= 'A') && (c2 <= 'Z')) {
+ cstart = 26 + (c2 - 'A');
+ }
+ else if (c2 == '%') {
+ *nstring = '%'; nstring[1] = 0; strcat(outbuf, nstring);
+ }
+ else {
+ fprintf(stderr, "unknown escape sequence: %%%c\n", c2);
+ }
+ c++; //skip both our '%' and following character (by loop inc);
+
+ if (cstart > 20) {
+ if (icitation[cstart] == 0) sprintf(nstring, "%s",citation[cstart]);
+ else sprintf(nstring, "%d%s", icitation[cstart], citation[cstart]);
+ if ((opt_ecit_blank) && (!*nstring)) strcpy(nstring, ecite);
+// ADDED FOR SWORD KEY DELIMETER
+ for (z = 0; z < strlen(nstring); z++) {
+ if (nstring[z] == '/') nstring[z] = ':';
+ }
+// -----------------------------
+ strcat(outbuf, nstring);
+ }
+ else if (cstart > -1) {
+ if (!citation[cstart] || !citation[cstart][0]) {
+ if (opt_ecit_blank) strcat(outbuf, ecite);
+ }
+ else {
+// ADDED FOR SWORD KEY DELIMETER
+ for (z = 0; z < strlen(nstring); z++) {
+ if (nstring[z] == '/') nstring[z] = ':';
+ }
+// -----------------------------
+ strcat(outbuf, citation[cstart]);
+ }
+ }
+ }
+ else if (*c == '\\') {
+ switch (*(c+1)) {
+ case 't': strcat(outbuf, "\t"); break;
+ case 'n': strcat(outbuf, "\n"); break;
+ case 'r': strcat(outbuf, "\r"); break;
+ default: *nstring = *(c+1); nstring[1] = 0; strcat(outbuf, nstring); break;
+ }
+ c++; //skip both our '%' and following character (by loop inc);
+ }
+ else {
+ *nstring = *c; nstring[1] = 0; strcat(outbuf, nstring);
+ }
+ }
+ return outbuf;
+}
+
+
+/* id_code:
+ * <iptr> points to the next character in the <input_buffer> to process;
+ * <optr> points to the next empty <output_buffer position.
+ * Returns: 0 or -1 for EOF
+ */
+int id_code(int input_count)
+{
+ int input_pointer_max;
+ int return_code;
+ int scratch;
+ int processing;
+ unsigned char idchar;
+ unsigned char outcode;
+
+
+ return_code = 0;
+ input_pointer_max = iptr + input_count;
+ processing = 1;
+ while (processing) {
+ if ((iptr < INRECSIZE) && (iptr < input_pointer_max)) {
+ outcode = 0;
+ idchar = input_buffer[iptr++];
+ if ((idchar < 0x80)) { /* text data - restore pointer and return*/
+ --iptr;
+ processing = 0;
+ } else { /* ID data - translate and write */
+ if (optr < OUTRECSIZE) {
+ id_process = 0; /* we don't have a command yet */
+ if (idchar >= 0xF0) {
+ switch (idchar)
+ {
+ case 0xF0: /* EOF */
+ return_code = -1; /* indicate EOF */
+ processing = 0;
+ break;
+ case 0xFE: /* End of block -- block is padded with nulls */
+ while (!input_buffer[iptr] && iptr<INRECSIZE) {
+ iptr++;
+ }
+ if (opt_debug_cit) printf("tlgu: EOB %x\n", iptr-1);
+ break;
+ case 0xFF: /* End of ASCII string */
+ if (opt_debug_cit) printf("tlgu: %d %s\n", id_level, citation[id_level]);
+ if (opt_debug_cit) printf("tlgu: EOS %x\n ", iptr-1);
+ break;
+ case 0xF8: /* Exception start */
+ if (opt_debug_cit) printf("tlgu: Exc start %x\n", iptr-1);
+ break;
+ case 0xF9: /* Exception end */
+ if (opt_debug_cit) printf("tlgu: Exc end %x\n", iptr-1);
+ break;
+ default:
+ break;
+ }
+ } else if (idchar >= 0xE0) {
+ /* The byte following an escape code is an ID byte
+ * Citation IDs can only be 0=a, 1=b, 2=c and 4=d
+ */
+ if (opt_debug_cit) printf("tlgu: Escape %x", idchar);
+ id_command = idchar & 0xF; /* get "command" nybble */
+ idchar = input_buffer[iptr++] & 0x7F; /* get ID level byte */
+ if (idchar >= 97) { /* descriptors hold the upper part of the array */
+ id_level = idchar - 97 + 26; /* create an index offset */
+ if (id_level > 51) {id_level = 51;} /* default to z */
+ } else {
+ id_level = idchar & 7; /* must be 0 - 4 */
+ if (id_level == 4) {id_level = 3;} /* adjust d level */
+ }
+ if (opt_debug_cit) printf(" ID level: %d\n", id_level);
+ id_process = 1; /* command must be processed */
+ } else if ((idchar >= 0x80) && (id_process == 0)) {
+ id_command = idchar & 0xF; /* get command first */
+ scratch = (idchar >> 4) & 0x7; /* try to create an offset */
+ //printf(" %x %x ", idchar, scratch);
+ switch (scratch)
+ {
+ case 0:
+ id_level = 25; /* z */
+ id_process = 1; /* command must be processed */
+ break;
+ case 1:
+ id_level = 24; /* y */
+ id_process = 1; /* command must be processed */
+ break;
+ case 2:
+ id_level = 23; /* x */
+ id_process = 1; /* command must be processed */
+ break;
+ case 3:
+ id_level = 22; /* w */
+ id_process = 1; /* command must be processed */
+ break;
+ case 4:
+ id_level = 21; /* v */
+ id_process = 1; /* command must be processed */
+ break;
+ case 5:
+ id_level = 13; /* n */
+ id_process = 1; /* command must be processed */
+ break;
+ default:
+ break;
+ }
+
+ }
+ if (id_process) {
+ switch (id_command)
+ {
+ case 0:
+ icitation[id_level]++; /* increment ID */
+ break;
+ case 1:
+ icitation[id_level] = 1; /* literal value */
+ break;
+ case 2:
+ icitation[id_level] = 2; /* literal value */
+ break;
+ case 3:
+ icitation[id_level] = 3; /* literal value */
+ break;
+ case 4:
+ icitation[id_level] = 4; /* literal value */
+ break;
+ case 5:
+ icitation[id_level] = 5; /* literal value */
+ break;
+ case 6:
+ icitation[id_level] = 6; /* literal value */
+ break;
+ case 7:
+ icitation[id_level] = 7; /* literal value */
+ break;
+ case 8:
+ idchar = input_buffer[iptr++]; /* 7 bit binary value */
+ icitation[id_level] = idchar & 0x7F;
+ break;
+ case 9:
+ idchar = input_buffer[iptr++]; /* 7 bit binary value */
+ icitation[id_level] = idchar & 0x7F;
+ idchar = input_buffer[iptr++]; /* single character */
+ citation[id_level][0] = idchar & 0x7F;
+ citation[id_level][1] = 0;
+ break;
+ case 0xa:
+ idchar = input_buffer[iptr++]; /* 7 bit binary value */
+ icitation[id_level] = idchar & 0x7F;
+ for (id_char=0; id_char < 31; id_char++) {
+ idchar = input_buffer[iptr++]; /* string */
+ if (idchar == 0xFF) {
+ citation[id_level][id_char] = 0; /* end of string */
+ break;
+ } else {
+ citation[id_level][id_char] = idchar & 0x7F;
+ }
+ }
+ break;
+ case 0xb:
+ idchar = input_buffer[iptr++]; /* 14 bit binary value */
+ scratch = (idchar & 0x7F) << 7; /* shift upper */
+ idchar = input_buffer[iptr++]; /* 14 bit binary value */
+ idchar &= 0x7F; /* mask sign bit */
+ scratch = scratch | idchar; /* combine */
+ icitation[id_level] = scratch;
+ break;
+ case 0xc:
+ idchar = input_buffer[iptr++]; /* 14 bit binary value */
+ scratch = (idchar & 0x7F) << 7; /* shift upper */
+ idchar = input_buffer[iptr++]; /* 14 bit binary value */
+ idchar &= 0x7F; /* mask sign bit */
+ scratch = scratch | idchar; /* combine */
+ icitation[id_level] = scratch;
+ idchar = input_buffer[iptr++]; /* single character */
+ citation[id_level][0] = idchar & 0x7F;
+ citation[id_level][1] = 0; /* end of string */
+ break;
+ case 0xd:
+ idchar = input_buffer[iptr++]; /* 14 bit binary value */
+ scratch = (idchar & 0x7F) << 7; /* shift upper */
+ idchar = input_buffer[iptr++]; /* 14 bit binary value */
+ idchar &= 0x7F; /* mask sign bit */
+ scratch = scratch | idchar; /* combine */
+ icitation[id_level] = scratch;
+ for (id_char=0; id_char < 31; id_char++) {
+ idchar = input_buffer[iptr++]; /* string */
+ if (idchar == 0xFF) {
+ citation[id_level][id_char] = 0; /* end of string */
+ break;
+ } else {
+ citation[id_level][id_char] = idchar & 0x7F;
+ }
+ }
+ break;
+ case 0xe:
+ /* same binary value, single character */
+ idchar = input_buffer[iptr++]; /* single character */
+ citation[id_level][0] = idchar & 0x7F;
+ citation[id_level][1] = 0; /* end of string */
+ break;
+ case 0xf:
+ icitation[id_level] = 0; /* no binary value */
+ for (id_char=0; id_char < 31; id_char++) {
+ idchar = input_buffer[iptr++]; /* string */
+ if (idchar == 0xFF) {
+ citation[id_level][id_char] = 0; /* end of string */
+ break;
+ } else {
+ citation[id_level][id_char] = idchar & 0x7F;
+ }
+ }
+
+ /* Keep tab of book changes, optionally split into books */
+ if (id_level == 1) {
+ if (strncmp(citation[1], previous_bcit[1], 31)) {
+ if (opt_multiple) {
+ /* Signal outer loop to stop
+ * after processing citation change
+ */
+ return_code = -2;
+ processing = 0;
+ if (opt_verbose) printf("\ntlgu: book citation: %s, previous: %s", citation[1], previous_bcit[1]);
+ }
+ strncpy(previous_bcit[1], citation[1], 31);
+ previous_bcit[1][31] = 0;
+ }
+ book_change = 1;
+ }
+ break;
+ default:
+ printf("tlgu: Unknown id_command: %x, iptr %x\n", id_command, iptr);
+ break;
+ }
+ if (opt_debug_cit) printf("tlgu: Command: %x ID level: %d, Binary: %d, ASCII: %s iptr++ %x\n",\
+ id_command, id_level,icitation[id_level], citation[id_level], iptr);
+
+ /* Adjust lower citation levels */
+ switch (id_level)
+ {
+ case 21:
+ icitation[22] = 1;
+ case 22:
+ icitation[23] = 1;
+ case 23:
+ icitation[24] = 1;
+ case 24:
+ icitation[25] = 1;
+ case 25:
+ outcode = 0;
+ break;
+ default:
+ break;
+ }
+
+ } /* id_process */
+
+ if (outcode) {
+ output_utf(outcode);
+ }
+
+ } else {
+ --iptr; /* output buffer full - restore pointer and return */
+ processing = 0;
+ }
+ } /* ID data processing */
+ } else { /* Finished processing all input */
+ processing = 0;
+ }
+ } /* while processing loop */
+ return return_code;
+}
diff --git a/tlgu/tlgu.h b/tlgu/tlgu.h
new file mode 100644
index 0000000..83aeb56
--- /dev/null
+++ b/tlgu/tlgu.h
@@ -0,0 +1,221 @@
+/* tlgu.h
+ *
+ * Copyright (C) 2004 Dimitri Marinakis
+ *
+ * Licensed under the terms of the GNU General Public License.
+ * ABSOLUTELY NO WARRANTY.
+ * See the file `COPYING' in this directory.
+ *
+ * Hellenic character codes
+ * Relevant Unicode standard tables:
+ * Greek and Coptic: 0370 - 03FF
+ * Greek Extended: 1F00 - 1FFF
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <ctype.h>
+
+
+#define INRECSIZE 0x2000
+#define OUTRECSIZE 0xFFFFF
+
+/* Beta code escapes and state processing codes */
+#define HELLENIC 1
+#define ROMAN 2
+#define PUNCTUATION 3
+#define QUOTATION 4
+#define PAGE 5
+#define BRACKET 6
+#define QUASIBRACKET 7
+#define NONTEXT 8
+#define SYMBOL 9
+#define HELLENIC_UPPER 0xa
+#define HELLENIC_SELECT 0xb
+#define HELLENIC_SIGMA 0xc
+/* Accent is an existing code above 0x1f */
+#define ACCENT 0x2f
+#define HELLENIC_SIGMA_UPPER 0x10
+#define TABHALF 0x11
+#define ROMAN_SELECT 0x16
+#define PUNCTUATION_SELECT 0x1f
+#define QUOTATION_SELECT 0x29
+#define PAGE_SELECT 0x33
+#define BRACKET_SELECT 0x3d
+#define QUASIBRACKET_SELECT 0x47
+#define NONTEXT_SELECT 0x51
+#define SYMBOL_SELECT 0x5b
+#define TABHALF_SELECT 0x61
+
+/* code defines */
+#define SIGMEDIAL 0x3c3
+#define SIGMEDIALUPPER 0x3a3
+#define SIGFINAL 0x3c2
+#define SIGFINALUPPER 0x3a3
+#define SIGLUNATE 0x3f2
+#define SIGLUNATEUPPER 0x3f9
+
+/* accents */
+#define PSILI 0x313
+#define DASIA 0x314
+#define DIALYTIKA 0x308
+#define VARIA 0x300
+#define OXIA 0x301
+#define PERISPOMENI 0x342
+#define YPOGEGRAMMENI 0x345
+#define CARET 0x302
+
+/* TLG stream translation table -- Unicode
+ A B G D E Z H Q I K L M N C O P R S T U F X Y W V; V is digamma
+ A value under 0x20 is a state change control code.
+ Zero means no character.
+ */
+unsigned int hellenic[] = {
+ /* sp ! " # $ % & ' */
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ /* ( ) * + , - . / */
+ ACCENT, ACCENT, HELLENIC_UPPER, ACCENT, 0x2c, 0x2d, 0x2e, ACCENT,
+ /* 0 1 2 3 4 5 6 7 */
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 8 9 : ; < = > ? @ */
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40,
+ /* a b c d e f g h */
+ 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3d5, 0x3b3, 0x3b7,
+ /* i j k l m n o p */
+ 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf, 0x3c0,
+ /* q r s t u v w x */
+ 0x3b8, 0x3c1, 0x3c2, 0x3c4, 0x3c5, 0x3dd, 0x3c9, 0x3c7,
+ /* y z [ \ ] ^ _ sep`*/
+ 0x3c8, 0x3b6, 0x54, 0x55, 0x56, 0x57, 0x00, 0x00,
+ /* A B C D E F G H */
+ 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393, 0x397,
+ /* I J K L M N O P */
+ 0x399, 0x3A3, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f, 0x3a0,
+ /* Q R S T U V W X */
+ 0x398, 0x3a1, 0x3a2, 0x3a4, 0x3a5, 0x3dc, 0x3a9, 0x3a7,
+ /* Y Z { | } ~ DEL */
+ 0x3a8, 0x396, 0x7b, 0x7c, 0x7d, 0x00, 0x00};
+
+
+/* Accents can be described in three groups, all optional
+ * In the first group are - mutually exclusive - psili, daseia or dialytika
+ * In the second group are - mutually exclusive - oxia, varia or perispomeni
+ * In the third group are - mutually exclusive - ypogegrammeni, subscript dot or missing letter dot
+ * as the last two are not part of fully-formed characters, will be used as combining diacritical marks
+ * The simplified form is then:
+ * [ ) or ( or + ] [ / or \ or = ] [ | ]
+ *
+ * This can be described by 5 accent flag bits (reverse order)
+ *
+ * 0 00 00 --- 0 00 00 no accent
+ * | | |
+ * | | ---- 01 psili, 10 dasia, 11 dialytika
+ * | ------- 01 varia, 10 oxia, 11 perispomeni
+ * ----------- 1 ypogegrammeni
+ *
+ * The resulting table of accentable characters will have 32-character rows
+ * with the formed character codes in the appropriate positions, or zero:
+ * plain, psili, dasia, dialytika, varia, psili-varia, dasia-varia, dialytika-varia
+ * oxia, psili-oxia, dasia-oxia, dialytika-oxia, perispomeni, psili-perisp, dasia-perisp, dialytika-perisp
+ * ditto with ypogegrammeni
+ *
+ * If zero is returned, combining diacritical marks should be generated from the accent flags.
+ */
+unsigned int alpha[] = {
+ 0x03b1, 0x1f00, 0x1f01, 0x0000, 0x1f70, 0x1f02, 0x1f03, 0x0000,
+ 0x1f71, 0x1f04, 0x1f05, 0x0000, 0x1fb6, 0x1f06, 0x1f07, 0x0000,
+ 0x1fb3, 0x1f80, 0x1f81, 0x0000, 0x1fb2, 0x1f82, 0x1f83, 0x0000,
+ 0x1fb4, 0x1f84, 0x1f85, 0x0000, 0x1fb7, 0x1f86, 0x1f87, 0x0000
+ };
+unsigned int Alpha[] = {
+ 0x0391, 0x1f08, 0x1f09, 0x0000, 0x1fba, 0x1f0a, 0x1f0b, 0x0000,
+ 0x1fbb, 0x1f0c, 0x1f0d, 0x0000, 0x0000, 0x1f0e, 0x1f0f, 0x0000,
+ 0x1fbc, 0x1f88, 0x1f89, 0x0000, 0x0000, 0x1f8a, 0x1f8b, 0x0000,
+ 0x0000, 0x1f8c, 0x1f8d, 0x0000, 0x0000, 0x1f8e, 0x1f8f, 0x0000
+ };
+unsigned int epsilon[] = {
+ 0x03b5, 0x1f10, 0x1f11, 0x0000, 0x1f72, 0x1f12, 0x1f13, 0x0000,
+ 0x1f73, 0x1f14, 0x1f15, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+ unsigned int Epsilon[] = {
+ 0x0395, 0x1f18, 0x1f19, 0x0000, 0x1fc8, 0x1f1a, 0x1f1b, 0x0000,
+ 0x1fc9, 0x1f1c, 0x1f1d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+unsigned int eta[] = {
+ 0x03b7, 0x1f20, 0x1f21, 0x0000, 0x1f74, 0x1f22, 0x1f23, 0x0000,
+ 0x1f75, 0x1f24, 0x1f25, 0x0000, 0x1fc6, 0x1f26, 0x1f27, 0x0000,
+ 0x1fc3, 0x1f90, 0x1f91, 0x0000, 0x1fc2, 0x1f92, 0x1f93, 0x0000,
+ 0x1fc4, 0x1f94, 0x1f95, 0x0000, 0x1fc7, 0x1f96, 0x1f97, 0x0000
+ };
+unsigned int Eta[] = {
+ 0x0397, 0x1f28, 0x1f29, 0x0000, 0x1fca, 0x1f2a, 0x1f2b, 0x0000,
+ 0x1fcb, 0x1f2c, 0x1f2d, 0x0000, 0x0000, 0x1f2e, 0x1f2f, 0x0000,
+ 0x1fcc, 0x1f98, 0x1f99, 0x0000, 0x0000, 0x1f9a, 0x1f9b, 0x0000,
+ 0x0000, 0x1f9c, 0x1f9d, 0x0000, 0x0000, 0x1f9e, 0x1f9f, 0x0000
+ };
+unsigned int iota[] = {
+ 0x03b9, 0x1f30, 0x1f31, 0x03ca, 0x1f76, 0x1f32, 0x1f33, 0x1fd2,
+ 0x1f77, 0x1f34, 0x1f35, 0x1fd3, 0x1fd6, 0x1f36, 0x1f37, 0x1fd7,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+ unsigned int Iota[] = {
+ 0x0399, 0x1f38, 0x1f39, 0x03aa, 0x1fda, 0x1f3a, 0x1f3b, 0x0000,
+ 0x1fdb, 0x1f3c, 0x1f3d, 0x0000, 0x0000, 0x1f3e, 0x1f3f, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+unsigned int omicron[] = {
+ 0x03bf, 0x1f40, 0x1f41, 0x0000, 0x1f78, 0x1f42, 0x1f43, 0x0000,
+ 0x1f79, 0x1f44, 0x1f45, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+ unsigned int Omicron[] = {
+ 0x039f, 0x1f48, 0x1f49, 0x0000, 0x1ff8, 0x1f4a, 0x1f4b, 0x0000,
+ 0x1ff9, 0x1f4c, 0x1f4d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+unsigned int ypsilon[] = {
+ 0x03c5, 0x1f50, 0x1f51, 0x03cb, 0x1f7a, 0x1f52, 0x1f53, 0x1fe2,
+ 0x1f7b, 0x1f54, 0x1f55, 0x1fe3, 0x1fe6, 0x1f56, 0x1f57, 0x1fe7,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+ unsigned int Ypsilon[] = {
+ 0x03a5, 0x0000, 0x1f59, 0x03ab, 0x1fea, 0x0000, 0x1f5b, 0x0000,
+ 0x1feb, 0x0000, 0x1f5d, 0x0000, 0x0000, 0x0000, 0x1f5f, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+unsigned int omega[] = {
+ 0x03c9, 0x1f60, 0x1f61, 0x0000, 0x1f7c, 0x1f62, 0x1f63, 0x0000,
+ 0x1f7d, 0x1f64, 0x1f65, 0x0000, 0x1ff6, 0x1f66, 0x1f67, 0x0000,
+ 0x1ff3, 0x1fa0, 0x1fa1, 0x0000, 0x1ff2, 0x1fa2, 0x1fa3, 0x0000,
+ 0x1ff4, 0x1fa4, 0x1fa5, 0x0000, 0x1ff7, 0x1fa6, 0x1fa7, 0x0000
+ };
+unsigned int Omega[] = {
+ 0x03a9, 0x1f68, 0x1f69, 0x0000, 0x1ffa, 0x1f6a, 0x1f6b, 0x0000,
+ 0x1ffb, 0x1f6c, 0x1f6d, 0x0000, 0x03a9, 0x1f6e, 0x1f6f, 0x0000,
+ 0x1ffc, 0x1fa8, 0x1fa9, 0x0000, 0x0000, 0x1faa, 0x1fab, 0x0000,
+ 0x0000, 0x1fac, 0x1fad, 0x0000, 0x0000, 0x1fae, 0x1faf, 0x0000
+ };
+unsigned int rho[] = {
+ 0x03c1, 0x1fe4, 0x1fe5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
+unsigned int Rho[] = {
+ 0x03a1, 0x0000, 0x1fec, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+ };
diff --git a/tlgu/tlgu.html b/tlgu/tlgu.html
new file mode 100644
index 0000000..30a0016
--- /dev/null
+++ b/tlgu/tlgu.html
@@ -0,0 +1,565 @@
+<html>
+<head>
+<meta name="generator" content="groff -Thtml, see www.gnu.org">
+<meta name="Content-Style" content="text/css">
+<title>tlgu</title>
+</head>
+<body>
+
+<h1 align=center>tlgu</h1>
+<a href="#NAME">NAME</a><br>
+<a href="#SYNOPSIS">SYNOPSIS</a><br>
+<a href="#DESCRIPTION">DESCRIPTION</a><br>
+<a href="#OPTIONS">OPTIONS</a><br>
+<a href="#HISTORY AND INTENDED USE">HISTORY AND INTENDED USE</a><br>
+<a href="#EXAMPLES">EXAMPLES</a><br>
+<a href="#POST-PROCESSING EXAMPLES">POST-PROCESSING EXAMPLES</a><br>
+<a href="#REFERENCES">REFERENCES</a><br>
+<a href="#COPYRIGHT">COPYRIGHT</a><br>
+
+<hr>
+<!-- Creator : groff version 1.17.2 -->
+<!-- CreationDate: Sun Mar 6 13:42:46 2005 -->
+<a name="NAME"></a>
+<h2>NAME</h2>
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+tlgu - convert TLG (D) CD-ROM txt files to Unicode</td></table>
+<a name="SYNOPSIS"></a>
+<h2>SYNOPSIS</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>tlgu</b> [ <i>options</i> ] <i>input_file
+output_file</i></td></table>
+<a name="DESCRIPTION"></a>
+<h2>DESCRIPTION</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>tlgu</b> will convert an <i>input_file</i> from Thesaurus
+Linguae Graeca (TLG) representation to a Unicode (UTF-8)
+<i>output_file</i>. The TLG representation consists of
+<b>beta-code</b> text and <b>citation</b>
+information.</td></table>
+<a name="OPTIONS"></a>
+<h2>OPTIONS</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-b</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+inserts a form feed and citation information (levels a, b,
+c, d) on every &quot;book&quot; citation change. By default
+the program will output line feeds only (see also
+<b>-p</b>).</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-p</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+observes paging instructions. By default the program will
+output line feeds only.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-r</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+primarily Roman text. Some TLG texts, notably doccan1.txt
+and doccan2.txt are mainly roman texts lacking explicit
+language change codes. Setting this option will force a
+change to roman text after each citation block is
+encountered.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-v</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+highest-level reference citation is included before each
+text line (v-level)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-w</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+reference citation is included before each text line
+(w-level)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-x</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+reference citation is included before each text line
+(x-level)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-y</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+reference citation is included before each text line
+(y-level)</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-z</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+lowest-level reference citation is included before each text
+line (z-level).</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-B</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+inserts blank space (a tab) before each and every
+line.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-C</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+citation debug information is output.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-S</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+special code debug information is output.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-V</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+block processing information is output
+(verbose).</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>-W</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+each work (book) is output as a separate file in the form
+output_file-xxx.txt</td></table>
+<a name="HISTORY AND INTENDED USE"></a>
+<h2>HISTORY AND INTENDED USE</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+The purpose of <b>tlgu</b> is to translate binary TLG-format
+files into readable and editable text. It is based on an
+earlier program written in 80x86 assembly language (1996)
+outputting codes for a home-made font which used the
+prevalent hellenic font encodings of that time complemented
+by dead accent characters - not very attractive, but
+readable.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+Then came Unicode and a plethora of accented character
+glyphs; nice-looking but with the well-known drawback that
+special processing is needed to do wild-card searches. Nice
+polytonic fonts have now been made available (Cardo,
+Gentium, Athena, Athenian, Porson) and, surely, these will
+be expanded as special-use code points are included in the
+Unicode definition (musical symbols, other special symbols)
+and more fonts will be created.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+So, at this point in time, <b>tlgu</b> will crunch a file
+which has been formatted according to the published TLG-D
+format and produce codes for most glyphs generally
+available. No attempt has been made to introduce
+multi-character sequences or formatting codes (font
+changes). If a code has not been defined, the program will
+output the respective &quot;code family&quot; glyph. You may
+use the <b>-S</b> option to check such codes against the
+published beta code definition.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+You may not like the character output for a specific code.
+Check out the <b>tlgcodes.h</b> file containing the special
+symbol and punctuation codes and select one to suit you
+better. It will probably be a while before the beta to
+Unicode correspondence settles down.</td></table>
+<a name="EXAMPLES"></a>
+<h2>EXAMPLES</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>./tlgu -r DOCCAN2.TXT doccanu.txt</b> Translate the TLG
+canon to a unicode text file. Note the use of the <b>-r</b>
+option (this file expects Roman as the default
+font).</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>./tlgu -x -y -z TLG1799.TXT tlg1799u.txt</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Generate a continuous file with the texts of granpa
+Euclides. Available citations (-x -y -z) are
+Book//demonstratio/line as shown in the respective
+&quot;cit&quot; field of doccan2.txt.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>./tlgu -b -B TLG1799.TXT tlg1799u.txt</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Generate the same texts, this time with a page feed and book
+citation information on the first page of each book and a
+tab before each line (use with OOo versions earlier than
+1.1.4).</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>./tlgu -C TLG1799.TXT tlg1799u.txt</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+See how the citation information changes within each TLG
+block.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>./tlgu -S TLG1799.TXT tlg1799u.txt | sort &gt;
+symbols1799.txt</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Check out the symbols used in a work. Book and x, y, z
+references are printed on a separate line for each symbol.
+Sort / grep the output to locate specific symbols of
+interest; save in a file for later use.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>./tlgu -W TLG0006.TXT tlg0006u</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Will produce separate files for each work, named
+tlg006u-001.txt etc.</td></table>
+<a name="POST-PROCESSING EXAMPLES"></a>
+<h2>POST-PROCESSING EXAMPLES</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+I use the OpenOffice suite for most of my work. This example
+shows one of many possible ways of using the search and
+replace facility to create a readable version of the Suda
+lexicon.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>./tlgu -B TLG4085.TXT tlg4085u.txt</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+A Unicode file with the text is created</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>Open the generated file with OOo:</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+File | Open | Filename: tlg4085u.txt, File Type: Text
+Encoded -- Press Open</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+The ASCII Filter Options window appears. Select the Unicode
+(UTF-8) character set and a proper Unicode font installed in
+your machine (e.g. Cardo). Press OK.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>Replace angle brackets with expanded
+text</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Lexicon terms are enclosed in &lt;angle brackets&gt;. The
+actual beta codes indicate the use of expanded text for
+emphasis. Select Edit | Find &amp; Replace. The <b>Find
+&amp; Replace</b> window appears.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+In the <b>Search For</b> field, type the following
+expression: <b>&lt;[^&lt;&gt;]*&gt;</b> This means
+&quot;find any characters between angle brackets, not
+including angle brackets&quot;.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+In the <b>Replace With</b> window insert a single ampersand:
+<b>&amp;</b> This means that we need to <b>add</b>
+formatting information (this case) or additional text to the
+text found. Press <b>Format...</b> and select the
+<b>Position</b> tab; select Spacing Expanded by 2.0 points.
+Press OK.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Check the <b>Regular Expressions</b> box and press
+<b>Replace All</b>.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+You may now replace the angle brackets with
+nothings.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+Repeat the above procedure for titles enclosed in {braces}.
+Write a macro...</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+<b>Other useful information</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+In the &quot;Execute&quot; tab of the &quot;Properties&quot;
+window of my KDesktop Link to Application I have the
+following command (single line):<b><br>
+LC_CTYPE=el_GR.UTF-8
+/whereitsat/OpenOffice.org1.1.x/soffice</b><br>
+The prefix, an environment variable, allows you to use the
+same program with different locales; in this case, hellenic
+Unicode (UTF-8).</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+I put my default locale and keyboard definitions in my
+<b>.profile</b>:<b><br>
+export LC_CTYPE=el_GR.UTF-8<br>
+setxkbmap us+el polytonic -option
+grp:ctrl_shift_toggle</b></td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="21%"></td><td width="79%">
+This way multi-lingual text can be entered; keyboard layout
+switching is done by pressing Ctrl/Shift.</td></table>
+<a name="REFERENCES"></a>
+<h2>REFERENCES</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+There are several texts describing the internal
+representation of <b>PHI</b> and <b>TLG</b> text, ID data,
+citation data and index files. The originator of this format
+is the Packard Humanities Institute. The TLG is maintained
+by UCI - see <b>www.tlg.uci.edu</b> - where you may find the
+<b>TLG Beta Code Manual</b> and the <b>TLG Beta Code Quick
+Reference Guide</b>.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+Unicode consortium publications pertaining to the
+codification of characters used in Hellenic literature,
+scientific and musical texts.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+The OpenOffice suite (<b>www.openoffice.org</b>) includes a
+word processor that you can use to load, process and create
+new polytonic texts.</td></table>
+<a name="COPYRIGHT"></a>
+<h2>COPYRIGHT</h2>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+Copyright (C) 2004, 2005 Dimitri Marinakis (dm ssa
+gr).</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+This program is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public
+License (version 2) as published by the Free Software
+Foundation.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+This program is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied
+warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+PURPOSE. See the GNU General Public License for more
+details.</td></table>
+
+<table width="100%" border=0 rules="none" frame="void"
+ cols="2" cellspacing="0" cellpadding="0">
+<tr valign="top" align="left">
+<td width="10%"></td><td width="90%">
+You should have received a copy of the GNU General Public
+License along with this program; if not, write to the Free
+Software Foundation, Inc., 59 Temple Place, Suite 330,
+Boston, MA 02111-1307 USA</td></table>
+<hr>
+</body>
+</html>
diff --git a/tlgu/tlgu.pdf b/tlgu/tlgu.pdf
new file mode 100644
index 0000000..d8d501e
--- /dev/null
+++ b/tlgu/tlgu.pdf
Binary files differ
diff --git a/tlgu/tlgu.ps b/tlgu/tlgu.ps
new file mode 100644
index 0000000..2c97404
--- /dev/null
+++ b/tlgu/tlgu.ps
@@ -0,0 +1,421 @@
+%!PS-Adobe-3.0
+%%Creator: groff version 1.17.2
+%%CreationDate: Sun Mar 6 13:42:52 2005
+%%DocumentNeededResources: font Times-Roman
+%%+ font Times-Bold
+%%+ font Times-Italic
+%%DocumentSuppliedResources: procset grops 1.17 2
+%%Pages: 3
+%%PageOrder: Ascend
+%%Orientation: Portrait
+%%EndComments
+%%BeginProlog
+%%BeginResource: procset grops 1.17 2
+/setpacking where{
+pop
+currentpacking
+true setpacking
+}if
+/grops 120 dict dup begin
+/SC 32 def
+/A/show load def
+/B{0 SC 3 -1 roll widthshow}bind def
+/C{0 exch ashow}bind def
+/D{0 exch 0 SC 5 2 roll awidthshow}bind def
+/E{0 rmoveto show}bind def
+/F{0 rmoveto 0 SC 3 -1 roll widthshow}bind def
+/G{0 rmoveto 0 exch ashow}bind def
+/H{0 rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def
+/I{0 exch rmoveto show}bind def
+/J{0 exch rmoveto 0 SC 3 -1 roll widthshow}bind def
+/K{0 exch rmoveto 0 exch ashow}bind def
+/L{0 exch rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def
+/M{rmoveto show}bind def
+/N{rmoveto 0 SC 3 -1 roll widthshow}bind def
+/O{rmoveto 0 exch ashow}bind def
+/P{rmoveto 0 exch 0 SC 5 2 roll awidthshow}bind def
+/Q{moveto show}bind def
+/R{moveto 0 SC 3 -1 roll widthshow}bind def
+/S{moveto 0 exch ashow}bind def
+/T{moveto 0 exch 0 SC 5 2 roll awidthshow}bind def
+/SF{
+findfont exch
+[exch dup 0 exch 0 exch neg 0 0]makefont
+dup setfont
+[exch/setfont cvx]cvx bind def
+}bind def
+/MF{
+findfont
+[5 2 roll
+0 3 1 roll
+neg 0 0]makefont
+dup setfont
+[exch/setfont cvx]cvx bind def
+}bind def
+/level0 0 def
+/RES 0 def
+/PL 0 def
+/LS 0 def
+/MANUAL{
+statusdict begin/manualfeed true store end
+}bind def
+/PLG{
+gsave newpath clippath pathbbox grestore
+exch pop add exch pop
+}bind def
+/BP{
+/level0 save def
+1 setlinecap
+1 setlinejoin
+72 RES div dup scale
+LS{
+90 rotate
+}{
+0 PL translate
+}ifelse
+1 -1 scale
+}bind def
+/EP{
+level0 restore
+showpage
+}bind def
+/DA{
+newpath arcn stroke
+}bind def
+/SN{
+transform
+.25 sub exch .25 sub exch
+round .25 add exch round .25 add exch
+itransform
+}bind def
+/DL{
+SN
+moveto
+SN
+lineto stroke
+}bind def
+/DC{
+newpath 0 360 arc closepath
+}bind def
+/TM matrix def
+/DE{
+TM currentmatrix pop
+translate scale newpath 0 0 .5 0 360 arc closepath
+TM setmatrix
+}bind def
+/RC/rcurveto load def
+/RL/rlineto load def
+/ST/stroke load def
+/MT/moveto load def
+/CL/closepath load def
+/FL{
+currentgray exch setgray fill setgray
+}bind def
+/BL/fill load def
+/LW/setlinewidth load def
+/RE{
+findfont
+dup maxlength 1 index/FontName known not{1 add}if dict begin
+{
+1 index/FID ne{def}{pop pop}ifelse
+}forall
+/Encoding exch def
+dup/FontName exch def
+currentdict end definefont pop
+}bind def
+/DEFS 0 def
+/EBEGIN{
+moveto
+DEFS begin
+}bind def
+/EEND/end load def
+/CNT 0 def
+/level1 0 def
+/PBEGIN{
+/level1 save def
+translate
+div 3 1 roll div exch scale
+neg exch neg exch translate
+0 setgray
+0 setlinecap
+1 setlinewidth
+0 setlinejoin
+10 setmiterlimit
+[]0 setdash
+/setstrokeadjust where{
+pop
+false setstrokeadjust
+}if
+/setoverprint where{
+pop
+false setoverprint
+}if
+newpath
+/CNT countdictstack def
+userdict begin
+/showpage{}def
+}bind def
+/PEND{
+clear
+countdictstack CNT sub{end}repeat
+level1 restore
+}bind def
+end def
+/setpacking where{
+pop
+setpacking
+}if
+%%EndResource
+%%IncludeResource: font Times-Roman
+%%IncludeResource: font Times-Bold
+%%IncludeResource: font Times-Italic
+grops begin/DEFS 1 dict def DEFS begin/u{.001 mul}bind def end/RES 72
+def/PL PLG def/LS false def/ENC0[/asciicircum/asciitilde/Scaron/Zcaron
+/scaron/zcaron/Ydieresis/trademark/quotesingle/.notdef/.notdef/.notdef
+/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef
+/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef
+/.notdef/.notdef/space/exclam/quotedbl/numbersign/dollar/percent
+/ampersand/quoteright/parenleft/parenright/asterisk/plus/comma/hyphen
+/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon
+/semicolon/less/equal/greater/question/at/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O
+/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash/bracketright/circumflex
+/underscore/quoteleft/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y
+/z/braceleft/bar/braceright/tilde/.notdef/quotesinglbase/guillemotleft
+/guillemotright/bullet/florin/fraction/perthousand/dagger/daggerdbl
+/endash/emdash/ff/fi/fl/ffi/ffl/dotlessi/dotlessj/grave/hungarumlaut
+/dotaccent/breve/caron/ring/ogonek/quotedblleft/quotedblright/oe/lslash
+/quotedblbase/OE/Lslash/.notdef/exclamdown/cent/sterling/currency/yen
+/brokenbar/section/dieresis/copyright/ordfeminine/guilsinglleft
+/logicalnot/minus/registered/macron/degree/plusminus/twosuperior
+/threesuperior/acute/mu/paragraph/periodcentered/cedilla/onesuperior
+/ordmasculine/guilsinglright/onequarter/onehalf/threequarters
+/questiondown/Agrave/Aacute/Acircumflex/Atilde/Adieresis/Aring/AE
+/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute/Icircumflex
+/Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis
+/multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn
+/germandbls/agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla
+/egrave/eacute/ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis
+/eth/ntilde/ograve/oacute/ocircumflex/otilde/odieresis/divide/oslash
+/ugrave/uacute/ucircumflex/udieresis/yacute/thorn/ydieresis]def
+/Times-Italic@0 ENC0/Times-Italic RE/Times-Bold@0 ENC0/Times-Bold RE
+/Times-Roman@0 ENC0/Times-Roman RE
+%%EndProlog
+%%Page: 1 1
+%%BeginPageSetup
+BP
+%%EndPageSetup
+/F0 10/Times-Roman@0 SF 150.26(tlgu\(1\) TLG)72 48 R(to Unicode Con)2.5
+E -.15(ve)-.4 G 150.26(rter tlgu\(1\)).15 F/F1 10.95/Times-Bold@0 SF
+-.219(NA)72 84 S(ME).219 E F0(tlgu \255 con)108 96 Q -.15(ve)-.4 G
+(rt TLG \(D\) CD-R).15 E(OM txt \214les to Unicode)-.4 E F1(SYNOPSIS)72
+124.8 Q/F2 10/Times-Bold@0 SF(tlgu)108 136.8 Q F0([)2.5 E/F3 10
+/Times-Italic@0 SF(options)2.5 E F0(])2.5 E F3
+(input_\214le output_\214le)2.5 E F1(DESCRIPTION)72 165.6 Q F2(tlgu)108
+177.6 Q F0 .269(will con)2.769 F -.15(ve)-.4 G .269(rt an).15 F F3
+(input_\214le)2.769 E F0 .268(from Thesaurus Linguae Graeca \(TLG\) rep\
+resentation to a Unicode \(UTF-8\))2.769 F F3(output_\214le)108 189.6 Q
+F0 5(.T)C(he TLG representation consists of)-5 E F2(beta-code)2.5 E F0
+(te)2.5 E(xt and)-.15 E F2(citation)2.5 E F0(information.)2.5 E F1
+(OPTIONS)72 218.4 Q F2<ad62>108 230.4 Q F0 .218
+(inserts a form feed and citation information \(le)24.74 F -.15(ve)-.25
+G .218(ls a, b, c, d\) on e).15 F -.15(ve)-.25 G .219
+(ry "book" citation change.).15 F(By)5.219 E(def)144 242.4 Q
+(ault the program will output line feeds only \(see also)-.1 E F2<ad70>
+2.5 E F0(\).)A F2<ad70>108 259.2 Q F0(observ)24.74 E
+(es paging instructions.)-.15 E(By def)5 E
+(ault the program will output line feeds only)-.1 E(.)-.65 E F2<ad72>108
+276 Q F0 1.362(primarily Roman te)25.86 F 1.362(xt. Some TLG te)-.15 F
+1.361(xts, notably doccan1.txt and doccan2.txt are mainly roman)-.15 F
+(te)144 288 Q .192(xts lacking e)-.15 F .192
+(xplicit language change codes.)-.15 F .193
+(Setting this option will force a change to roman te)5.193 F(xt)-.15 E
+(after each citation block is encountered.)144 300 Q F2<ad76>108 316.8 Q
+F0(highest-le)25.3 E -.15(ve)-.25 G 2.5(lr).15 G
+(eference citation is included before each te)-2.5 E(xt line \(v-le)-.15
+E -.15(ve)-.25 G(l\)).15 E F2<ad77>108 333.6 Q F0
+(reference citation is included before each te)23.08 E(xt line \(w-le)
+-.15 E -.15(ve)-.25 G(l\)).15 E F2<ad78>108 350.4 Q F0
+(reference citation is included before each te)25.3 E(xt line \(x-le)
+-.15 E -.15(ve)-.25 G(l\)).15 E F2<ad79>108 367.2 Q F0
+(reference citation is included before each te)25.3 E(xt line \(y-le)
+-.15 E -.15(ve)-.25 G(l\)).15 E F2<ad7a>108 384 Q F0(lo)25.86 E(west-le)
+-.25 E -.15(ve)-.25 G 2.5(lr).15 G
+(eference citation is included before each te)-2.5 E(xt line \(z-le)-.15
+E -.15(ve)-.25 G(l\).).15 E F2<ad42>108 412.8 Q F0
+(inserts blank space \(a tab\) before each and e)23.63 E -.15(ve)-.25 G
+(ry line.).15 E F2<ad43>108 429.6 Q F0(citation deb)23.08 E
+(ug information is output.)-.2 E F2<ad53>108 446.4 Q F0
+(special code deb)24.74 E(ug information is output.)-.2 E F2<ad56>108
+463.2 Q F0(block processing information is output \(v)23.08 E(erbose\).)
+-.15 E F2<ad57>108 480 Q F0(each w)20.3 E(ork \(book\) is output as a s\
+eparate \214le in the form output_\214le-xxx.txt)-.1 E F1(HIST)72 508.8
+Q(OR)-.197 E 2.738(YA)-.383 G(ND INTENDED USE)-2.738 E F0 .103
+(The purpose of)108 520.8 R F2(tlgu)2.602 E F0 .102(is to translate bin\
+ary TLG-format \214les into readable and editable te)2.602 F 2.602
+(xt. It)-.15 F .102(is based on an)2.602 F .624(earlier program written\
+ in 80x86 assembly language \(1996\) outputting codes for a home-made f\
+ont which)108 532.8 R .485(used the pre)108 544.8 R -.25(va)-.25 G .484
+(lent hellenic font encodings of that time complemented by dead accent \
+characters - not v).25 F(ery)-.15 E(attracti)108 556.8 Q -.15(ve)-.25 G
+2.5(,b).15 G(ut readable.)-2.7 E 1.412(Then came Unicode and a plethora\
+ of accented character glyphs; nice-looking b)108 580.8 R 1.413
+(ut with the well-kno)-.2 F(wn)-.25 E(dra)108 592.8 Q .616
+(wback that special processing is needed to do wild-card searches.)-.15
+F .616(Nice polytonic fonts ha)5.616 F .916 -.15(ve n)-.2 H 1.116 -.25
+(ow b).15 H(een).25 E .277(made a)108 604.8 R -.25(va)-.2 G .278
+(ilable \(Cardo, Gentium, Athena, Athenian, Porson\) and, surely).25 F
+2.778(,t)-.65 G .278(hese will be e)-2.778 F .278(xpanded as special-)
+-.15 F .581(use code points are included in the Unicode de\214nition \(\
+musical symbols, other special symbols\) and more)108 616.8 R
+(fonts will be created.)108 628.8 Q .034(So, at this point in time,)108
+652.8 R F2(tlgu)2.534 E F0 .034(will crunch a \214le which has been for\
+matted according to the published TLG-D)2.534 F 1.41
+(format and produce codes for most glyphs generally a)108 664.8 R -.25
+(va)-.2 G 3.909(ilable. No).25 F 1.409
+(attempt has been made to introduce)3.909 F .437
+(multi-character sequences or formatting codes \(font changes\).)108
+676.8 R .438(If a code has not been de\214ned, the program)5.437 F .238
+(will output the respecti)108 688.8 R .538 -.15(ve ")-.25 H .238(code f)
+.15 F .238(amily" glyph.)-.1 F -1.1(Yo)5.238 G 2.738(um)1.1 G .238
+(ay use the)-2.738 F F2<ad53>2.738 E F0 .237
+(option to check such codes ag)2.738 F .237(ainst the)-.05 F
+(published beta code de\214nition.)108 700.8 Q -1.1(Yo)108 724.8 S 3.868
+(um)1.1 G 1.368(ay not lik)-3.868 F 3.868(et)-.1 G 1.368
+(he character output for a speci\214c code.)-3.868 F 1.368
+(Check out the)6.368 F F2(tlgcodes.h)3.869 E F0 1.369
+(\214le containing the)3.869 F -1.11(Ve)72 768 S(rsion 1.1)1.11 E
+(Feb, 2005)168.45 E(1)209 E EP
+%%Page: 2 2
+%%BeginPageSetup
+BP
+%%EndPageSetup
+/F0 10/Times-Roman@0 SF 150.26(tlgu\(1\) TLG)72 48 R(to Unicode Con)2.5
+E -.15(ve)-.4 G 150.26(rter tlgu\(1\)).15 F .577
+(special symbol and punctuation codes and select one to suit you better)
+108 84 R 5.577(.I)-.55 G 3.077(tw)-5.577 G .577
+(ill probably be a while before)-3.077 F
+(the beta to Unicode correspondence settles do)108 96 Q(wn.)-.25 E/F1
+10.95/Times-Bold@0 SF(EXAMPLES)72 136.8 Q/F2 10/Times-Bold@0 SF .363
+(./tlgu -r DOCCAN2.TXT doccanu.txt)108 148.8 R F0 -.35(Tr)2.863 G .363
+(anslate the TLG canon to a unicode te).35 F .363
+(xt \214le. Note the use of the)-.15 F F2(-r)108 160.8 Q F0
+(option \(this \214le e)2.5 E(xpects Roman as the def)-.15 E
+(ault font\).)-.1 E F2(./tlgu -x -y -z TLG1799.TXT tlg1799u.txt)108
+177.6 Q F0 1.667(Generate a continuous \214le with the te)144 189.6 R
+1.667(xts of granpa Euclides. A)-.15 F -.25(va)-.74 G 1.666
+(ilable citations \(-x -y -z\) are).25 F(Book//demonstratio/line as sho)
+144 201.6 Q(wn in the respecti)-.25 E .3 -.15(ve ")-.25 H
+(cit" \214eld of doccan2.txt.).15 E F2
+(./tlgu -b -B TLG1799.TXT tlg1799u.txt)108 218.4 Q F0 .267
+(Generate the same te)144 230.4 R .267(xts, this time with a page feed \
+and book citation information on the \214rst page)-.15 F
+(of each book and a tab before each line \(use with OOo v)144 242.4 Q
+(ersions earlier than 1.1.4\).)-.15 E F2
+(./tlgu -C TLG1799.TXT tlg1799u.txt)108 259.2 Q F0(See ho)144 271.2 Q
+2.5(wt)-.25 G(he citation information changes within each TLG block.)
+-2.5 E F2(./tlgu -S TLG1799.TXT tlg1799u.txt | sort > symbols1799.txt)
+108 288 Q F0 .521(Check out the symbols used in a w)144 300 R 3.021
+(ork. Book)-.1 F .521(and x, y)3.021 F 3.02(,zr)-.65 G .52
+(eferences are printed on a separate line)-3.02 F .19(for each symbol. \
+Sort / grep the output to locate speci\214c symbols of interest; sa)144
+312 R .491 -.15(ve i)-.2 H 2.691(na\214).15 G .191(le for later)-2.691 F
+(use.)144 324 Q F2(./tlgu -W TLG0006.TXT tlg0006u)108 340.8 Q F0 -.4(Wi)
+144 352.8 S(ll produce separate \214les for each w).4 E
+(ork, named tlg006u-001.txt etc.)-.1 E F1(POST)72 381.6 Q(-PR)-1.007 E
+(OCESSING EXAMPLES)-.329 E F0 2.939(Iu)108 393.6 S .439(se the OpenOf)
+-2.939 F .439(\214ce suite for most of my w)-.25 F 2.938(ork. This)-.1 F
+-.15(ex)2.938 G .438(ample sho).15 F .438(ws one of man)-.25 F 2.938(yp)
+-.15 G .438(ossible w)-2.938 F .438(ays of using)-.1 F
+(the search and replace f)108 405.6 Q(acility to create a readable v)-.1
+E(ersion of the Suda le)-.15 E(xicon.)-.15 E F2
+(./tlgu -B TLG4085.TXT tlg4085u.txt)108 422.4 Q F0 2.5(AU)144 434.4 S
+(nicode \214le with the te)-2.5 E(xt is created)-.15 E F2
+(Open the generated \214le with OOo:)108 451.2 Q F0
+(File | Open | Filename: tlg4085u.txt, File T)144 463.2 Q(ype: T)-.8 E
+-.15(ex)-.7 G 2.5(tE).15 G(ncoded \255\255 Press Open)-2.5 E .274
+(The ASCII Filter Options windo)144 487.2 R 2.774(wa)-.25 G .274
+(ppears. Select the Unicode \(UTF-8\) character set and a proper)-2.774
+F(Unicode font installed in your machine \(e.g. Cardo\).)144 499.2 Q
+(Press OK.)5 E F2(Replace angle brack)108 516 Q(ets with expanded text)
+-.1 E F0(Le)144 528 Q 2.545(xicon terms are enclosed in <angle brack)
+-.15 F 5.044(ets>. The)-.1 F 2.544
+(actual beta codes indicate the use of)5.044 F -.15(ex)144 540 S .158
+(panded te).15 F .158(xt for emphasis.)-.15 F .159
+(Select Edit | Find & Replace.)5.158 F(The)5.159 E F2 .159
+(Find & Replace)2.659 F F0(windo)2.659 E 2.659(wa)-.25 G(ppears.)-2.659
+E .468(In the)144 564 R F2(Sear)2.968 E .468(ch F)-.18 F(or)-.25 E F0
+.468(\214eld, type the follo)2.968 F .468(wing e)-.25 F(xpression:)-.15
+E F2(<[^<>]*>)2.968 E F0 .468(This means "\214nd an)2.968 F 2.967(yc)
+-.15 G(haracters)-2.967 E(between angle brack)144 576 Q
+(ets, not including angle brack)-.1 E(ets".)-.1 E .768(In the)144 600 R
+F2 .768(Replace W)3.268 F(ith)-.18 E F0(windo)3.269 E 3.269(wi)-.25 G
+.769(nsert a single ampersand:)-3.269 F F2(&)3.269 E F0 .769
+(This means that we need to)3.269 F F2(add)3.269 E F0(for)3.269 E(-)-.2
+E .403(matting information \(this case\) or additional te)144 612 R .403
+(xt to the te)-.15 F .403(xt found.)-.15 F(Press)5.402 E F2 -.25(Fo)
+2.902 G(rmat...).25 E F0 .402(and select the)2.902 F F2 -.2(Po)144 624 S
+(sition).2 E F0(tab; select Spacing Expanded by 2.0 points.)2.5 E
+(Press OK.)5 E(Check the)144 648 Q F2(Regular Expr)2.5 E(essions)-.18 E
+F0(box and press)2.5 E F2(Replace All)2.5 E F0(.)A -1.1(Yo)144 672 S 2.5
+(um)1.1 G(ay no)-2.5 E 2.5(wr)-.25 G(eplace the angle brack)-2.5 E
+(ets with nothings.)-.1 E(Repeat the abo)144 696 Q .3 -.15(ve p)-.15 H
+(rocedure for titles enclosed in {braces}.).15 E(Write a macro...)5 E
+-1.11(Ve)72 768 S(rsion 1.1)1.11 E(Feb, 2005)168.45 E(2)209 E EP
+%%Page: 3 3
+%%BeginPageSetup
+BP
+%%EndPageSetup
+/F0 10/Times-Roman@0 SF 150.26(tlgu\(1\) TLG)72 48 R(to Unicode Con)2.5
+E -.15(ve)-.4 G 150.26(rter tlgu\(1\)).15 F/F1 10/Times-Bold@0 SF
+(Other useful inf)108 84 Q(ormation)-.25 E F0 .759(In the "Ex)144 96 R
+.759(ecute" tab of the "Properties" windo)-.15 F 3.259(wo)-.25 G 3.259
+(fm)-3.259 G 3.259(yK)-3.259 G .76(Desktop Link to Application I ha)
+-3.259 F 1.06 -.15(ve t)-.2 H(he).15 E(follo)144 108 Q
+(wing command \(single line\):)-.25 E F1(LC_CTYPE=el_GR.UTF-8 /wher)144
+120 Q(eitsat/OpenOf\214ce.or)-.18 E(g1.1.x/sof\214ce)-.1 E F0 .278
+(The pre\214x, an en)144 132 R .278(vironment v)-.4 F .278
+(ariable, allo)-.25 F .277(ws you to use the same program with dif)-.25
+F .277(ferent locales; in)-.25 F(this case, hellenic Unicode \(UTF-8\).)
+144 144 Q 2.5(Ip)144 168 S(ut my def)-2.5 E(ault locale and k)-.1 E -.15
+(ey)-.1 G(board de\214nitions in my).15 E F1(.pr)2.5 E(o\214le)-.18 E F0
+(:)A F1(export LC_CTYPE=el_GR.UTF-8)144 180 Q
+(setxkbmap us+el polytonic -option gr)144 192 Q(p:ctrl_shift_toggle)-.1
+E F0(This w)144 216 Q(ay multi-lingual te)-.1 E(xt can be entered;)-.15
+E -.1(ke)5 G(yboard layout switching is done by pressing)-.05 E
+(Ctrl/Shift.)144 228 Q/F2 10.95/Times-Bold@0 SF(REFERENCES)72 244.8 Q F0
+(There are se)108 256.8 Q -.15(ve)-.25 G(ral te).15 E
+(xts describing the internal representation of)-.15 E F1(PHI)2.5 E F0
+(and)2.5 E F1(TLG)2.5 E F0(te)2.5 E(xt, ID data, citation data)-.15 E
+(and inde)108 268.8 Q 2.5<788c>-.15 G 2.5(les. The)-2.5 F
+(originator of this format is the P)2.5 E(ackard Humanities Institute.)
+-.15 E(The TLG is maintained)5 E(by UCI \255 see)108 280.8 Q F1(www)2.5
+E(.tlg)-.7 E(.uci.edu)-.15 E F0 2.5<ad77>2.5 G(here you may \214nd the)
+-2.5 E F1(TLG Beta Code Manual)2.5 E F0(and the)2.5 E F1(TLG Beta)2.5 E
+(Code Quick Refer)108 292.8 Q(ence Guide)-.18 E F0(.)A(Unicode consorti\
+um publications pertaining to the codi\214cation of characters used in \
+Hellenic literature, sci-)108 316.8 Q(enti\214c and musical te)108 328.8
+Q(xts.)-.15 E(The OpenOf)108 352.8 Q(\214ce suite \()-.25 E F1(www)A
+(.openof\214ce.or)-.7 E(g)-.1 E F0 2.5(\)i)C(ncludes a w)-2.5 E
+(ord processor that you can use to load, process)-.1 E(and create ne)108
+364.8 Q 2.5(wp)-.25 G(olytonic te)-2.5 E(xts.)-.15 E F2(COPYRIGHT)72
+393.6 Q F0(Cop)108 405.6 Q
+(yright \(C\) 2004, 2005 Dimitri Marinakis \(dm ssa gr\).)-.1 E
+(This program is free softw)108 429.6 Q(are; you can redistrib)-.1 E
+(ute it and/or modify it under the terms of the GNU General)-.2 E
+(Public License \(v)108 441.6 Q
+(ersion 2\) as published by the Free Softw)-.15 E(are F)-.1 E
+(oundation.)-.15 E(This program is distrib)108 465.6 Q
+(uted in the hope that it will be useful, b)-.2 E(ut WITHOUT ANY W)-.2 E
+(ARRANTY)-1.2 E 2.5(;w)-.92 G(ithout)-2.5 E -2.15 -.25(ev e)108 477.6 T
+2.5(nt).25 G(he implied w)-2.5 E(arranty of MERCHANT)-.1 E
+(ABILITY or FITNESS FOR A P)-.93 E(AR)-.92 E(TICULAR PURPOSE.)-.6 E(See)
+5 E(the GNU General Public License for more details.)108 489.6 Q -1.1
+(Yo)108 513.6 S 2.5(us)1.1 G(hould ha)-2.5 E .3 -.15(ve r)-.2 H(ecei).15
+E -.15(ve)-.25 G 2.5(dac).15 G(op)-2.5 E 2.5(yo)-.1 G 2.5(ft)-2.5 G
+(he GNU General Public License along with this program; if not, write)
+-2.5 E(to the Free Softw)108 525.6 Q(are F)-.1 E(oundation, Inc., 59 T)
+-.15 E(emple Place, Suite 330, Boston, MA)-.7 E 2.5(02111-1307 USA)5 F
+-1.11(Ve)72 768 S(rsion 1.1)1.11 E(Feb, 2005)168.45 E(3)209 E EP
+%%Trailer
+end
+%%EOF