aboutsummaryrefslogtreecommitdiffstats
path: root/prg2lout.c
diff options
context:
space:
mode:
authorJeffrey H. Kingston <jeff@it.usyd.edu.au>2010-09-14 20:36:08 +0000
committerJeffrey H. Kingston <jeff@it.usyd.edu.au>2010-09-14 20:36:08 +0000
commite3fe9fa1d10264370c63e20756e5cbd2c1050f43 (patch)
tree57165bf2889337044bc3633854e5aa38f7d89e6b /prg2lout.c
parent73d840b9f14b65166b92e6b43f930fd0ef7b8267 (diff)
downloadlout-e3fe9fa1d10264370c63e20756e5cbd2c1050f43.tar.gz
Lout 3.20 tag.3.20
git-svn-id: http://svn.savannah.nongnu.org/svn/lout/tags/3.20@10 9365b830-b601-4143-9ba8-b4a8e2c3339c
Diffstat (limited to 'prg2lout.c')
-rw-r--r--prg2lout.c3598
1 files changed, 2856 insertions, 742 deletions
diff --git a/prg2lout.c b/prg2lout.c
index c8ba048..d592edb 100644
--- a/prg2lout.c
+++ b/prg2lout.c
@@ -1,6 +1,7 @@
+
/*****************************************************************************/
/* */
-/* PRG2LOUT: A PROGRAM TO CONVERT PROGRAM SOURCES INTO LOUT (VERSION 1.0) */
+/* PRG2LOUT: A PROGRAM TO CONVERT PROGRAM SOURCES INTO LOUT (VERSION 2.0) */
/* COPYRIGHT (C) 2000 Jeffrey H. Kingston */
/* */
/* Jeffrey H. Kingston (jeff@cs.su.oz.au) */
@@ -23,50 +24,123 @@
/* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* */
/*****************************************************************************/
-#include <stdio.h>
-#include <stdlib.h>
+
+
+/*****************************************************************************/
+/* */
+/* GENERAL INTRODUCTION TO PRG2LOUT */
+/* */
+/* The best way to see what the aim of prg2lout as currently conceived is, */
+/* is to look in file cprint at the setup file options. You will see that */
+/* the aim is to provide three basic styles: fixed (essentially mono font), */
+/* varying (essentially varying-width font with various faces for different */
+/* elements at the user's choice), and symbol (similar to varying). */
+/* */
+/* The elements currently aimed for are strings, identifiers, comments, */
+/* keywords, numbers, and operators, and the end user is able to choose, */
+/* for each of these kinds of elements, which font to set them in. */
+/* */
+/* This is achieved by a simple division of labour: prg2lout does the */
+/* classifying of the input into a sequence of these elements, and the Lout */
+/* end (cprint and cprintf, or their clones for other languages) does the */
+/* formatting. For example, the C text */
+/* */
+/* inc = inc + 1 */
+/* */
+/* would be classified by prg2lout into identifier, operator, identifier, */
+/* operator, number; and consequently prg2lout would emit */
+/* */
+/* @PI{"inc"} @PO{"="} @PI{"inc"} @PO{"+"} @PN{"1"} */
+/* */
+/* which is readable by Lout, thanks to having quotes around everything */
+/* potentially dangerous, and clearly tells Lout, by means of the commands */
+/* @PC, @PI, etc., how each part of the input has been classified. */
+/* */
+/* The actual classification is carried out by prg2lout as follows. Each */
+/* programming language is described to prg2lout as a collection of tokens; */
+/* you say what the token begins with, what's a legal character inside the */
+/* token, and how it ends. You also say which command (@PC, @PI etc.) to */
+/* emit when a token of that kind is found. Prg2lout does the rest. */
+/* */
+/* Prg2lout knows all about tricky problems such as multi-line tokens (it */
+/* breaks them up into single-line pieces) and backslash in Lout strings */
+/* (it replaces any \ within an output string by \\, " by \", etc.). It */
+/* also handles tab characters and formfeed characters properly, and it */
+/* produces intelligible error messages when unexpected things happen, */
+/* such as input terminating in the middle of a string. This attention to */
+/* detail is a strong reason for using prg2lout rather than something more */
+/* ad-hoc, such as @Verbatim or a quick script. */
+/* */
+/*****************************************************************************/
+
/*****************************************************************************/
/* */
-/* How to add another language to prg2lout's list */
+/* HOW TO ADD ANOTHER LANGUAGE TO PRG2LOUT */
+/* */
+/* Step 1. Construct clones of (say) eiffel and eiffelf (these are in */
+/* $LOUTLIB/include) with every occurrence of eiffel or Eiffel in them */
+/* changed to your language as appropriate. Install your files in the */
+/* Lout system include directory alongside eiffel and eiffelf. */
/* */
-/* Step 1. Have a browse through the token declarations below, and work */
+/* It is good to clone the files exactly because that way all program */
+/* formatting works the same way, and one chapter of the User's Guide */
+/* covers the lot. However if your language has some unique element, not */
+/* readily classifiable as a string, identifier, comment, keyword, */
+/* number, or operator, it is possible to emit a different command of */
+/* your choice for the new element; but then your clones of eiffel and */
+/* eiffelf have to be extended to handle that command. */
+/* */
+/* Step 2. Have a browse through the token declarations below, and work */
/* out which of them you need for your language. If you need a token that */
/* isn't there already, you'll have to define it; there are many examples */
-/* there to help you. */
+/* and documentation there to help you. The tokens for Perl are rather */
+/* complicated and don't make a good model for most languages, so look */
+/* more at the C and Eiffel ones. */
/* */
-/* Step 2. Browse through the language declarations, and declare your */
+/* Step 3. Browse through the language declarations, and declare your */
/* language following those examples: first you give a set of one or more */
-/* alternative names for your language, then the default printing style for */
-/* it (either "fixed", "varying", or "symbol"), then you list the tokens */
-/* of the language, then you list the keywords. */
+/* alternative names for your language, then some other things, including */
+/* the list of tokens of the language, and its keywords. */
/* */
-/* Step 3. Add your language variable to the list in the initializer of */
+/* Step 4. Add your language variable to the list in the initializer of */
/* variable languages, as you can see the others have been done. */
/* */
-/* Step 4. If any lists of initializers now contain more than */
+/* Step 5. If any lists of initializers now contain more than */
/* MAX_ARRAY_LENGTH-1 elements, increase MAX_ARRAY_LENGTH until they don't. */
+/* The gcc compiler will warn you if you forget to do this. */
/* */
-/* Step 5. Recompile and reinstall prg2lout, test "prg2lout -u" then */
+/* Step 6. Recompile and reinstall prg2lout, test "prg2lout -u" then */
/* "prg2lout -l <mylanguage> <myfile> | lout -s > out.ps". */
/* */
-/* Step 6. Send your tested and verified changes to jeff@cs.usyd.edu.au */
-/* for incorporation in the next Lout release. If you do this, please */
-/* try hard to ensure that your changes conform to the formal definition */
-/* of your language. */
+/* Step 7. Send your tested and tidied files to me for incorporation */
+/* in the next Lout release. If you do this, please try hard to ensure */
+/* that your new code conforms to the formal definition of your language. */
+/* Feel free to email me for advice as you go along. */
+/* */
+/* Jeff Kingston */
+/* jeff@cs.usyd.edu.au */
/* */
/*****************************************************************************/
-
+
+#include <stdio.h>
+#include <stdlib.h>
+#define FALSE 0
+#define TRUE 1
+#define BOOLEAN unsigned
+
/*****************************************************************************/
/* */
/* Character sets */
/* */
-/* Here are prg2lout's definitions of various commonly needed sets of */
+/* These are prg2lout's definitions of various commonly needed sets of */
/* characters. May need enhancement for Latin1 etc. */
/* */
/*****************************************************************************/
+#define AllCharacters NULL /* code will recognize NULL and do this */
+
char AllPrintable[] =
" !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ;
@@ -75,18 +149,14 @@ char AllPrintablePlusNL[] =
" !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n" ;
+char AllPrintablePlusTab[] =
+ " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
+ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\t" ;
+
char AllPrintableTabNL[] =
" !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n\t" ;
-char NotDoubleQuote[] =
- " !#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ;
-
-char NotQuote[] =
- " !#$%&\"()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ;
-
char Letters[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ;
char Letter_Digit[] =
@@ -100,287 +170,1797 @@ char Letter_Digit[] =
#define SepDigits "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"
+#define HexDigits "A", "a", "B", "b", "C", "c", "D", "d", "E", "e", "F", "f"
+
/*****************************************************************************/
/* */
/* TOKEN - put your token declarations in this section */
/* */
+/* The fields of token_rec have the following meanings: */
+/* */
+/* name */
+/* The name of this token, e.g. "string" or "identifier". This field */
+/* is used only by error messages generated by prg2lout; for example, */
+/* prg2lout might print the message "input ended within string". */
+/* */
+/* print_style */
+/* */
+/* print_style What gets printed */
+/* ------------------------------------------------------- */
+/* PRINT_WHOLE_QUOTED command{"token"} */
+/* PRINT_NODELIMS_QUOTED command{"token-minus-delims"} */
+/* PRINT_WHOLE_UNQUOTED command{token} */
+/* PRINT_NODELIMS_UNQUOTED command{token-minus-delims} */
+/* PRINT_NODELIMS_INNER command{inner} */
+/* PRINT_COMMAND_ONLY command */
+/* */
+/* If command (see next) is empty then the braces {} are not printed. */
+/* */
+/* PRINT_WHOLE_QUOTED. This command is the most frequently used one; */
+/* it prints the token, enclosed in braces and quotes, preceded by the */
+/* command. The quotes ensure that the result is legal Lout; any " or */
+/* \ in the token is printed with a preceding \ as required in Lout. */
+/* The usual arrangement for handling white space is that none of the */
+/* tokens contain it; when it is encountered prg2lout generates the */
+/* appropriate Lout without being told (a space for a space, a newline */
+/* for a newline, @NP for a formfeed, and something clever for tab */
+/* which does the required thing). However, you can define a token */
+/* that contains white space if you wish. If you do this, the */
+/* quotation marks will be temporarily closed off, the white space */
+/* handled as just described, then the quotes opened again. */
+/* */
+/* PRINT_NODELIMS_QUOTED. This is like PRINT_WHOLE_QUOTED except that */
+/* the opening and closing delimiters of the token are omitted from */
+/* the print. This is useful occasionally when these delimiters are */
+/* formatting markers, not intended to be printed. */
+/* */
+/* PRINT_WHOLE_UNQUOTED. This style prints the command and braces */
+/* as usual, but omits the quotes and prints the token absolutely */
+/* verbatim. In general this is not going to produce legal Lout, */
+/* but it is useful in two cases: when the token is a Lout escape, */
+/* so that it is the user's responsibility to ensure that its content */
+/* is legal Lout; and when the command is another filter command, so */
+/* that the token content will not go directly into Lout anyway, it */
+/* will go through the other filter first. */
+/* */
+/* PRINT_NODELIMS_UNQUOTED. This is like PRINT_WHOLE_UNQUOTED except */
+/* that the opening and closing delimiters of the token are omitted. */
+/* */
+/* PRINT_NODELIMS_INNER. Like PRINT_NODELIMS_UNQUOTED except that the */
+/* inner part (i.e. not delimiters) is formatted in the same language. */
+/* */
+/* PRINT_COMMAND_ONLY. This ignores the token and prints just the */
+/* command, presumably because the command says it all for that token. */
+/* When using PRINT_COMMAND_ONLY you will probably need to finish your */
+/* command with a space: since there are no following braces in this */
+/* print style, your command will run into the next one otherwise. */
+/* */
/*****************************************************************************/
-#define MAX_CHAR 256
-#define MAX_STRING_LENGTH 101
-#define MAX_ARRAY_LENGTH 101
+
+/*****************************************************************************/
+/* */
+/* Meaning of TOKEN fields (ctd.) */
+/* */
+/* command */
+/* The Lout command to print. This command could be an legal Lout; */
+/* programming language setup files offer the following Lout symbols */
+/* that make the most common commands: */
+/* */
+/* @PI for formatting identifiers */
+/* @PK for formatting keywords */
+/* @PO for formatting operators */
+/* @PN for formatting numbers */
+/* @PS for formatting strings */
+/* @PC for formatting comments */
+/* @PA for printing an asterisk (lower on the line than usual) */
+/* @PM for printing a minus sign (longer than a hyphen) */
+/* @PD for printing a dot (.), only larger than usual */
+/* */
+/* The last three require PRINT_COMMAND_ONLY (they take no parameter). */
+/* If command is NULL or "", then no command will be printed and */
+/* furthermore the token will not be enclosed in the usual braces. */
+/* */
+/* alternate_command */
+/* Every language has a list of keywords. Just before printing each */
+/* token, it is compared against the keywords. If it is one of them, */
+/* then alternate_command is used instead of command. For example, */
+/* identifiers usually have command @PI and alternate_command @PK. */
+/* */
+/* following_command */
+/* Print this Lout command (or commands) after the token. */
+/* */
+/* start_line_only */
+/* A Boolean field. If TRUE, this token is to be recognized only */
+/* if it occurs at the very start of a line. */
+/* */
+/* starts[] */
+/* This field holds an array of strings. If prg2lout discovers any */
+/* one of these strings while it is not reading some other token, */
+/* then it deems that this token has begun. The recognized string */
+/* is the token's "starting delimiter". */
+/* */
+/* starts2[], ends2[] */
+/* These fields each hold an array of strings, and the two arrays */
+/* must have equal length. If they have length zero, these fields */
+/* do not apply. Otherwise, they modify the meaning of starts[] and */
+/* end_delimiter below. Their effect is best seen by looking at some */
+/* examples from Perl, their only user at the time of writing: */
+/* */
+/* q/hello/ qq/hello/ qq?hello? qq{hello} */
+/* */
+/* These strings may begin with q, qq, qx, and several other things; */
+/* this is then followed by a single character which determines the */
+/* string terminator; e.g. / means "terminate with /", { means */
+/* "terminate with }", etc. This is implemented by putting these */
+/* pairs of strings in corresponding places in starts[] and ends[]. */
+/* */
+/* starts: { "q", "qq", "qx" } */
+/* starts2: { "/", "?", "{" } */
+/* ends2: { "/", "?", "}" } */
+/* */
+/* PerlQTypeToken is a larger example of this. */
+/* */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/* */
+/* Meaning of TOKEN fields (ctd.) */
+/* */
+/* legal */
+/* This string defines the set of legal characters inside this token. */
+/* For example, numbers might have "0123456789." for this field, since */
+/* these are the characters that are legal within numbers, usually. */
+/* */
+/* escape */
+/* This string defines a single character which is the escape */
+/* character for this token. That is, if we are reading this token */
+/* and come upon this character, the character following it is */
+/* treated differently. An empty string "" means no escape character. */
+/* */
+/* escape_legal */
+/* This string defines the set of characters which are legal after */
+/* the escape character just mentioned. If any one of these appears */
+/* immediately after the escape character, it is deemed to be part */
+/* of the token even if without the preceding escape it would not be. */
+/* */
+/* inner_escape */
+/* end_inner_escape */
+/* The inner_escape string should be either empty (in which case it */
+/* does not apply), or else it should contain a single character, the */
+/* "inner escape" character. An inner escape is a temporary suspension */
+/* of a token, reverting to the original language. It is used to set */
+/* program text within comments. For example, in Eiffel and Blue, */
+/* inner_escape is "`" and end_inner_escape is "'" and so we can write */
+/* */
+/* -- increment `balance' by `amount' */
+/* */
+/* to treat balance and amount as identifiers within a comment token. */
+/* The inner escape is not limited to one token, it may have any */
+/* number of tokens, and they may have inner escapes too; prg2lout */
+/* imposes no limit on the depth of nesting of inner escapes. */
+/* */
+/* end_delimiter */
+/* This string shows how the token ends; for example, a string would */
+/* have end_delimiter ". If empty, it means that the token ends */
+/* just before the first character encountered that is not legal (see */
+/* "legal" above). For example, identifiers and numbers would have */
+/* empty end_delimiter. If ends2[] is not empty then end_delimiter */
+/* is ignored, since ends2[] explains how the token ends. */
+/* */
+/* end_start_line_only */
+/* A BOOLEAN field. If true, the end delimiter is to be recognized */
+/* only if it occurs at the very start of a line. */
+/* */
+/* want_two_ends */
+/* A Boolean feature used only by Perl; TRUE means that end_delimiter */
+/* (or ends2[]) has to be encountered twice before the token ends, */
+/* rather than the usual once. Used by PerSTypeToken to recognise */
+/* */
+/* s/abc/ABC/ */
+/* */
+/* and similar things as single tokens. Note that this works without */
+/* modification on s{abc}{ABC} provided { is legal within strings: as */
+/* long as the token terminates at the second }, we are safe. */
+/* */
+/*****************************************************************************/
+
+
+#define MAX_CHAR 256
+#define MAX_ARRAY_LENGTH 401
+#define PRINT_WHOLE_QUOTED 1
+#define PRINT_NODELIMS_QUOTED 2
+#define PRINT_WHOLE_UNQUOTED 3
+#define PRINT_NODELIMS_UNQUOTED 4
+#define PRINT_NODELIMS_INNER 5
+#define PRINT_COMMAND_ONLY 6
typedef struct token_rec {
- char *name; /* the name of this token, e.g. "string" */
- char *command; /* the Lout command for it, e.g. "@D" */
- char *alternate_command; /* the alternate command for it, e.g. "@K" */
- char *starts[MAX_ARRAY_LENGTH]; /* strings that start this token */
- char *legal; /* legal characters in this token */
- char *escape; /* the escape character e.g. "\\", or "" */
- char *escape_legal; /* legal characters after escape character */
- char *inner_escape; /* inner escape, e.g. "`", or else "" */
- char *end_inner_escape; /* end inner escape, e.g. "'" */
- char *end_delimiter; /* the end delimiter, else "" */
+ char *name;
+ int print_style;
+ char *command;
+ char *alternate_command;
+ char *following_command;
+ BOOLEAN start_line_only;
+ char *starts[MAX_ARRAY_LENGTH];
+ char *starts2[MAX_ARRAY_LENGTH];
+ char *ends2[MAX_ARRAY_LENGTH];
+ char *legal;
+ char *escape;
+ char *escape_legal;
+ char *inner_escape;
+ char *end_inner_escape;
+ char *end_delimiter;
+ BOOLEAN end_start_line_only;
+ BOOLEAN want_two_ends;
/* The following options are initialized by the program, so don't you */
- char chtype[MAX_CHAR]; /* character types within token */
+ char chtype[MAX_CHAR]; /* character types within token */
char escape_chtype[MAX_CHAR]; /* character types after escape */
} TOKEN;
+
+/*****************************************************************************/
+/* */
+/* Tokens defining strings and literal characters in non-Perl languages. */
+/* */
+/*****************************************************************************/
TOKEN CStringToken = {
"string", /* used by error messages involving this token */
- "@S", /* Lout command for formatting strings */
- "", /* No alternate command */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PS", /* Lout command for formatting strings */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
{ "\"" }, /* strings begin with a " character */
- NotDoubleQuote, /* inside, any printable except " is OK */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllPrintable, /* inside, any printable is OK */
"\\", /* within strings, \\ is the escape character */
AllPrintablePlusNL, /* after escape char, any printable char or nl OK */
"", /* strings do not permit "inner escapes" */
"", /* and so there is no end innner escape either */
- "\"" /* strings end with a " character */
+ "\"", /* strings end with a " character */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
TOKEN CCharacterToken = {
"character", /* used by error messages involving this token */
- "@C", /* Lout command for formatting characters */
- "", /* No alternate command */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PC", /* Lout command for formatting characters */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
{ "'" }, /* characters begin with a ' character */
- NotQuote, /* inside, any printable except ' is OK */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllPrintable, /* inside, any printable character is OK */
"\\", /* within characters, \\ is the escape character */
AllPrintable, /* after escape char, any printable char is OK */
"", /* characters do not permit "inner escapes" */
"", /* and so there is no end innner escape either */
- "'" /* characters end with a ' character */
+ "'", /* characters end with a ' character */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
+
TOKEN EiffelStringToken = {
"string", /* used by error messages involving this token */
- "@S", /* Lout command for formatting strings */
- "", /* No alternate command */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PS", /* Lout command for formatting strings */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
{ "\"" }, /* strings begin with a " character */
- NotDoubleQuote, /* inside, any printable except " is OK */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllPrintable, /* inside, any printable except " is OK */
"%", /* within strings, % is the escape character */
AllPrintable, /* after escape char, any printable char is OK */
"", /* strings do not permit "inner escapes" */
"", /* and so there is no end innner escape either */
- "\"" /* strings end with a " character */
+ "\"", /* strings end with a " character */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
TOKEN EiffelCharacterToken = {
"character", /* used by error messages involving this token */
- "@C", /* Lout command for formatting characters */
- "", /* No alternate command */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PC", /* Lout command for formatting characters */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
{ "'" }, /* characters begin with a ' character */
- NotQuote, /* inside, any printable except ' is OK */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllPrintable, /* inside, any printable except ' is OK */
"%", /* within characters, % is the escape character */
AllPrintable, /* after escape char, any printable char is OK */
"", /* characters do not permit "inner escapes" */
"", /* and so there is no end innner escape either */
- "'" /* characters end with a ' character */
+ "'", /* characters end with a ' character */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
+
+
+/*****************************************************************************/
+/* */
+/* Identifiers, in the form common to most programming languages. */
+/* */
+/*****************************************************************************/
TOKEN IdentifierToken = {
"identifier", /* used by error messages involving this token */
- "@D", /* Lout command for formatting identifiers */
- "@K", /* Alternate command (for keywords) */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PI", /* Lout command for formatting identifiers */
+ "@PK", /* Alternate command (for keywords) */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
{ SepLetters, "_" }, /* identifiers begin with any letter or _ */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
Letter_Digit, /* inside, letters, underscores, digits are OK */
"", /* no escape character within identifiers */
"", /* so nothing legal after escape char either */
"", /* identifiers do not permit "inner escapes" */
"", /* and so there is no end innner escape either */
- "" /* identifiers do not end with a delimiter */
+ "", /* identifiers do not end with a delimiter */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
+
+/*****************************************************************************/
+/* */
+/* Numbers, in the form common to most programming languages. */
+/* */
+/*****************************************************************************/
+
TOKEN NumberToken = {
"number", /* used by error messages involving this token */
- "@N", /* Lout command for formatting numbers */
- "", /* No alternate command */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PN", /* Lout command for formatting numbers */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
{ SepDigits }, /* numbers must begin with a digit */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
"0123456789.eE", /* inside, digits, decimal point, exponent */
"", /* no escape character within numbers */
"", /* so nothing legal after escape char either */
"", /* numbers do not permit "inner escapes" */
"", /* and so there is no end innner escape either */
- "" /* numbers do not end with a delimiter */
+ "", /* numbers do not end with a delimiter */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
+
+
+/*****************************************************************************/
+/* */
+/* Tokens defining comments in various languages. */
+/* */
+/*****************************************************************************/
TOKEN CCommentToken = {
"comment", /* used by error messages involving this token */
- "@C", /* Lout command for formatting comments */
- "", /* No alternate command */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PC", /* Lout command for formatting comments */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
{ "/*" }, /* comments begin with this character pair */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
AllPrintableTabNL, /* inside, any printable char, tab, or nl is OK */
"", /* no escape character within comments */
"", /* so nothing legal after escape char either */
"", /* C comments do not permit "inner escapes" */
"", /* and so there is no end innner escape either */
- "*/" /* comments end with this character pair */
-};
-
-TOKEN CCommentEscapeToken = {
- "Lout escape", /* used by error messages involving this token */
- NULL, /* no Lout command since passed through unformatted */
- "", /* No alternate command */
- { "/*@" }, /* comments begin with this character pair */
- AllPrintableTabNL, /* inside, any printable char, tab, or nl is OK */
- "", /* no escape character within comments */
- "", /* so nothing legal after escape char either */
- "", /* no "inner escape" in escape comments */
- "", /* so no end of "inner escape" either */
- "*/" /* comments end with this character pair */
+ "*/", /* comments end with this character pair */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
TOKEN CPPCommentToken = {
"comment", /* used by error messages involving this token */
- "@C", /* Lout command for formatting comments */
- "", /* No alternate command */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PC", /* Lout command for formatting comments */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
{ "//" }, /* comments begin with this character pair */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
AllPrintable, /* inside, any printable char is OK (not NL) */
"", /* no escape character within comments */
"", /* so nothing legal after escape char either */
"", /* C comments do not permit "inner escapes" */
"", /* and so there is no end innner escape either */
- "" /* no end delimiter (end of line will end it) */
+ "", /* no end delimiter (end of line will end it) */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
+
-TOKEN CPPCommentEscapeToken = {
- "Lout escape", /* used by error messages involving this token */
- NULL, /* no Lout command since passed through unformatted */
- "", /* No alternate command */
- { "//@" }, /* comments begin with this character pair */
+TOKEN EiffelCommentToken = {
+ "comment", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PC", /* Lout command for formatting comments */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "--" }, /* comments begin with this character pair */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
AllPrintable, /* inside, any printable char is OK */
"", /* no escape character within comments */
"", /* so nothing legal after escape char either */
- "", /* no "inner escape" in escape comments */
- "", /* so no end of "inner escape" either */
- "" /* no end delimiter (end of line will end it) */
+ "`", /* start of "inner escape" in Eiffel comment */
+ "'", /* end of "inner escape" in Eiffel comment */
+ "", /* no ending delimiter; end of line will end it */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
-TOKEN EiffelCommentToken = {
+TOKEN BlueCommentToken = {
"comment", /* used by error messages involving this token */
- "@C", /* Lout command for formatting comments */
- "", /* No alternate command */
- { "--" }, /* comments begin with this character pair */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PC", /* Lout command for formatting comments */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "==", "--" }, /* comments begin with this character pair */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
AllPrintable, /* inside, any printable char is OK */
"", /* no escape character within comments */
"", /* so nothing legal after escape char either */
- "`", /* start of "inner escape" in Eiffel comment */
- "'", /* end of "inner escape" in Eiffel comment */
- "" /* no ending delimiter; end of line will end it */
+ "`", /* start of "inner escape" in Blue comment */
+ "'", /* end of "inner escape" in Blue comment */
+ "", /* no ending delimiter; end of line will end it */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
-TOKEN EiffelCommentEscapeToken = {
+
+/*****************************************************************************/
+/* */
+/* Tokens defining escape comments in various languages. */
+/* */
+/* See discussion of "inner escapes" above for more information. */
+/* */
+/*****************************************************************************/
+
+TOKEN CCommentEscapeToken = {
"Lout escape", /* used by error messages involving this token */
- NULL, /* no Lout command since passed through unformatted */
- "", /* No alternate command */
- { "--@" }, /* comments begin with this character pair */
+ PRINT_NODELIMS_UNQUOTED, /* print this token unformatted */
+ "", /* no Lout command since we are printing raw */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "/*@" }, /* escape comments begin with this delimiter */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllPrintableTabNL, /* inside, any printable char, tab, or nl is OK */
+ "", /* no escape character within comments */
+ "", /* so nothing legal after escape char either */
+ "", /* no "inner escape" in escape comments */
+ "", /* so no end of "inner escape" either */
+ "*/", /* comments end with this character pair */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN CPPCommentEscapeToken = {
+ "Lout escape", /* used by error messages involving this token */
+ PRINT_NODELIMS_UNQUOTED, /* print this token unformatted */
+ "", /* no Lout command since we are printing raw */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "//@" }, /* escape comments begin with this delimiter */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
AllPrintable, /* inside, any printable char is OK */
"", /* no escape character within comments */
"", /* so nothing legal after escape char either */
"", /* no "inner escape" in escape comments */
"", /* so no end of "inner escape" either */
- "" /* no ending delimiter; end of line will end it */
+ "", /* no end delimiter (end of line will end it) */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
+
-TOKEN BlueCommentToken = {
- "comment", /* used by error messages involving this token */
- "@C", /* Lout command for formatting comments */
- "", /* No alternate command */
- { "==", "--" }, /* comments begin with this character pair */
+TOKEN EiffelCommentEscapeToken = {
+ "Lout escape", /* used by error messages involving this token */
+ PRINT_NODELIMS_UNQUOTED, /* print this token unformatted */
+ "", /* no Lout command since we are printing raw */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "--@" }, /* escape comments begin with this delimiter */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
AllPrintable, /* inside, any printable char is OK */
"", /* no escape character within comments */
"", /* so nothing legal after escape char either */
- "`", /* start of "inner escape" in Blue comment */
- "'", /* end of "inner escape" in Blue comment */
- "" /* no ending delimiter; end of line will end it */
+ "", /* no "inner escape" in escape comments */
+ "", /* so no end of "inner escape" either */
+ "", /* no ending delimiter; end of line will end it */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
TOKEN BlueCommentEscapeToken = {
"Lout escape", /* used by error messages involving this token */
- NULL, /* no Lout command since passed through unformatted */
- "", /* No alternate command */
- { "==@", "--@" }, /* comments begin with this character pair */
+ PRINT_NODELIMS_UNQUOTED, /* print this token unformatted */
+ "", /* no Lout command since we are printing raw */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "==@", "--@" }, /* escape comments begin with these delimiters */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
AllPrintable, /* inside, any printable char is OK */
"", /* no escape character within comments */
"", /* so nothing legal after escape char either */
"", /* no "inner escape" in escape comments */
"", /* so no end of "inner escape" either */
- "" /* no ending delimiter; end of line will end it */
+ "", /* no ending delimiter; end of line will end it */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
};
+
+/*****************************************************************************/
+/* */
+/* Tokens which are fixed strings, hence simpler than the above. */
+/* */
+/*****************************************************************************/
#define FixedToken(str, command) /* define fixed-string token */ \
{ \
str, /* name used for debugging only */ \
+ PRINT_WHOLE_QUOTED, /* print this token as usual */ \
+ command, /* Lout command for formatting this */ \
+ "", /* no alternate command */ \
+ "", /* no following command */ \
+ FALSE, /* token not just start of line */ \
+ { str }, /* token begins (and ends!) with this */ \
+ { NULL }, /* no start2 needed */ \
+ { NULL }, /* so no end2 either */ \
+ "", /* nothing inside, since no inside */ \
+ "", "", /* no escape character */ \
+ "", "", /* no inner escape; no end inner esc */ \
+ "", /* no ending delimiter */ \
+ FALSE, /* end not have to be at line start */ \
+ FALSE, /* don't end delimiter twice to stop */ \
+}
+
+TOKEN HashToken = FixedToken("#", "@PO");
+TOKEN ExclamationToken = FixedToken("!", "@PO");
+TOKEN PercentToken = FixedToken("%", "@PO");
+TOKEN HatToken = FixedToken("^", "@PO");
+TOKEN AmpersandToken = FixedToken("&", "@PO");
+TOKEN SlashToken = FixedToken("/", "@PO");
+TOKEN ArrowToken = FixedToken("->", "arrowright @A @PO");
+TOKEN BackSlashToken = FixedToken("\\", "@PO");
+TOKEN LeftParenToken = FixedToken("(", "@PO");
+TOKEN RightParenToken = FixedToken(")", "@PO");
+TOKEN PlusToken = FixedToken("+", "plus @A @PO");
+TOKEN EqualToken = FixedToken("=", "equal @A @PO");
+TOKEN LeftBraceToken = FixedToken("{", "@PO");
+TOKEN RightBraceToken = FixedToken("}", "@PO");
+TOKEN BarToken = FixedToken("|", "@PO");
+TOKEN CircumToken = FixedToken("~", "@PO");
+TOKEN LeftBracketToken = FixedToken("[", "@PO");
+TOKEN RightBracketToken = FixedToken("]", "@PO");
+TOKEN SemicolonToken = FixedToken(";", "@PO");
+TOKEN ColonToken = FixedToken(":", "@PO");
+TOKEN LessToken = FixedToken("<", "less @A @PO");
+TOKEN GreaterToken = FixedToken(">", "greater @A @PO");
+TOKEN QuestionToken = FixedToken("?", "@PO");
+TOKEN CommaToken = FixedToken(",", "@PO");
+TOKEN DotToken = FixedToken(".", "@PO");
+TOKEN LessEqualToken = FixedToken("<=", "lessequal @A @PO");
+TOKEN GreaterEqualToken = FixedToken(">=", "greaterequal @A @PO");
+TOKEN CNotEqualToken = FixedToken("!=", "notequal @A @PO");
+TOKEN EiffelNotEqualToken = FixedToken("/=", "notequal @A @PO");
+TOKEN BlueNotEqualToken = FixedToken("<>", "notequal @A @PO");
+TOKEN AssignToken = FixedToken(":=", "@PO");
+TOKEN QuestionAssignToken = FixedToken("?=", "@PO");
+TOKEN DollarToken = FixedToken("$", "@PO");
+TOKEN ImpliesToken = FixedToken("=>", "implies @A @PO");
+
+
+/*****************************************************************************/
+/* */
+/* Fixed-string tokens that are to be printed COMMAND_ONLY (no parameter). */
+/* */
+/*****************************************************************************/
+
+#define NoParameterToken(str, command) /* define fixed-string token */ \
+{ \
+ str, /* name used for debugging only */ \
+ PRINT_COMMAND_ONLY, /* print only the command */ \
command, /* Lout command for formatting this */ \
- "", /* No alternate command */ \
+ "", /* no alternate command */ \
+ "", /* no following command */ \
+ FALSE, /* token not just start of line */ \
{ str }, /* token begins (and ends!) with this */ \
+ { NULL }, /* no start2 needed */ \
+ { NULL }, /* so no end2 either */ \
"", /* nothing inside, since no inside */ \
- "", /* no escape character */ \
- "", /* so nothing after escape character */ \
- "", /* no inner escape either */ \
- "", /* so no end inner escape */ \
- "" /* no ending delimiter */ \
+ "", "", /* no escape character */ \
+ "", "", /* no inner escape; no end inner esc */ \
+ "", /* no ending delimiter */ \
+ FALSE, /* end not have to be at line start */ \
+ FALSE, /* don't end delimiter twice to stop */ \
+}
+
+TOKEN StarToken = NoParameterToken("*", "{@PA}");
+TOKEN MinusToken = NoParameterToken("-", "{@PM}");
+TOKEN EiffelDotToken = NoParameterToken(".", "{@PD}");
+
+
+/*****************************************************************************/
+/* */
+/* Perl (quarantined from other languages since it's very different). */
+/* */
+/* Perl code co-authored by Jeffrey H. Kingston and Mark Summerfield */
+/* March 2000 */
+/* */
+/* In the comments below, WCS refers to "Programming Perl", Second */
+/* Edition (1996), by Wall, Christiansen, and Schwartz. However Perl */
+/* has changed since then and this code also reflects those changes. */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* Perl's strings and regular expressions */
+/* */
+/* The table in WCS pp. 41 is a good summary of the possibilities: */
+/* */
+/* '' q// */
+/* "" qq// */
+/* `` qx// */
+/* () qw// */
+/* // m// */
+/* s/// s/// */
+/* y/// tr/// */
+/* */
+/* To this must be added the following quotation, which begins just */
+/* below the table: */
+/* */
+/* Any non-alphabetic, non-whitespace delimiter can be used in place */
+/* of /. If the opening delimiter is a parenthesis, bracket, brace, */
+/* or angle bracket, the closing delimiter will be the matching */
+/* construct. (Embedded occurrences of the delimiters must match in */
+/* pairs.) ... Finally, for two-string constructs like s/// and tr///, */
+/* if the first pair of quotes is a bracketing pair, then the second */
+/* part gets its own starting quote character, which needn't be the */
+/* same as the first pair. So you can write things like s{foo}(bar) */
+/* or tr[a-z][A-Z]. Whitespace is allowed between the two inner quote */
+/* characters, so you could even write that last one as */
+/* */
+/* tr [a-z] */
+/* [A-Z] */
+/* */
+/* At present the tokens below have three problems implementing this: */
+/* */
+/* (1) When / appears without anything in front, it will be recognized */
+/* as a division operator, hence the following regular expression */
+/* will be treated as ordinary Perl, not as a regular expression; */
+/* */
+/* (2) When the delimiters are a matching pair, embedded occurrences */
+/* are not recognised as such: the first closing delimiter will be */
+/* taken to close the pair; */
+/* */
+/* (3) The case s{foo}(bar) is not implemented; the second bracketing */
+/* pair must be the same as the first; in fact prg2lout will keep */
+/* going, looking for a second closing brace, in this example. */
+/* */
+/*****************************************************************************/
+
+
+TOKEN PerlSingleQuoteStringToken = {
+ "''-string", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PS", /* Lout command for formatting strings */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "'" }, /* strings begin with a ' character */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllCharacters, /* inside, any character at all is OK */
+ "\\", /* within strings, \\ is the escape character */
+ "\\'", /* after escape, only \ and ' are OK (WCS p. 39) */
+ "", /* strings do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "\'", /* strings end with a ' character */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PerlDoubleQuoteStringToken = {
+ "\"\"-string", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PS", /* Lout command for formatting strings */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "\"" }, /* strings begin with a " character */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllCharacters, /* inside, any character at all is OK */
+ "\\", /* within strings, \\ is the escape character */
+ AllCharacters, /* after escape char, any character at all is OK */
+ "", /* strings do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "\"", /* strings end with a " character */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PerlBackQuoteStringToken = {
+ "``-string", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PS", /* Lout command for formatting strings */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "`" }, /* strings begin with a ` character */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllCharacters, /* inside, any character at all is OK */
+ "\\", /* within strings, \\ is the escape character */
+ AllCharacters, /* after escape char, any character at all is OK */
+ "", /* strings do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "`", /* strings end with a ` character */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+
+TOKEN PerlQTypeStringToken = {
+ "q-type-string", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PS", /* Lout command for formatting strings */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "q", "qq", "qx", "qw", "qr", "m" }, /* q-type strings begin with these */
+ { "/", "(", "[", "{", "<", "!", "%", "#", "|", ",", ":", ";" }, /*start2 */
+ { "/", ")", "]", "}", ">", "!", "%", "#", "|", ",", ":", ";" }, /* end2 */
+ AllCharacters, /* inside, any character at all is OK */
+ "\\", /* within strings, \\ is the escape character */
+ AllCharacters, /* after escape char, any character at all is OK */
+ "", /* strings do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "", /* will be using end2 for the end delimiter here */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PerlSTypeStringToken = {
+ "s-type-string", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PS", /* Lout command for formatting strings */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "s", "y", "tr" }, /* s-type strings begin with these */
+ { "/", "(", "[", "{", "<", "!", "%", "#", "|", ",", ":", ";" }, /*start2 */
+ { "/", ")", "]", "}", ">", "!", "%", "#", "|", ",", ":", ";" }, /* end2 */
+ AllCharacters, /* inside, any character at all is OK */
+ "\\", /* within strings, \\ is the escape character */
+ AllCharacters, /* after escape char, any character at all is OK */
+ "", /* strings do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "", /* will be using end2 for the end delimiter here */
+ FALSE, /* end delimiter does not have to be at line start */
+ TRUE, /* need to see end delimiter twice to stop */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Perl "bare" regular expressions */
+/* */
+/* By a bare regular expression, we mean one that is not preceded */
+/* by q, qq, m etc. These are distinguished from division by being */
+/* preceded by one of =, =~, !~, split, if, not, unless, for, foreach, */
+/* or while, with up to two white space characters intervening. */
+/* */
+/*****************************************************************************/
+
+#define PerlREToken(start, com) \
+{ \
+ "regexp", /* used by error messages */ \
+ PRINT_NODELIMS_QUOTED,/* no delims since we supply them */ \
+ com, /* the command */ \
+ "", /* no alternate command */ \
+ "@PS{\"/\"}", /* following command (final /) */ \
+ FALSE, /* token allowed not just start of line */ \
+ { start }, /* preceding token in this case */ \
+ { "/", " /", "\t/", " /", " \t/", "\t /", "\t\t/" }, /* start2 */ \
+ { "/", "/", "/", "/", "/", "/", "/" }, /* end2 */ \
+ AllCharacters, /* any character OK inside */ \
+ "\\", /* \\ is the escape character */ \
+ AllCharacters, /* after escape char, any is OK */ \
+ "", /* no inner escapes */ \
+ "", /* no end innner escape either */ \
+ "", /* will be using end2 here */ \
+ FALSE, /* no need to end at line start */ \
+ FALSE, /* don't want end delimiter twice */ \
+}
+
+TOKEN PerlRegExpLPar = PerlREToken("(", "@PO{\"(\"} @PS{\"/\"}@PS");
+TOKEN PerlRegExpEq = PerlREToken("=", "@PO{\"=\"} @PS{\"/\"}@PS");
+TOKEN PerlRegExpMatch = PerlREToken("=~", "@PO{\"=~\"} @PS{\"/\"}@PS");
+TOKEN PerlRegExpNoMatch = PerlREToken("!~", "@PO{\"!~\"} @PS{\"/\"}@PS");
+TOKEN PerlRegExpSplit = PerlREToken("split", "@PK{split} @PS{\"/\"}@PS");
+TOKEN PerlRegExpIf = PerlREToken("if", "@PK{if} @PS{\"/\"}@PS");
+TOKEN PerlRegExpNot = PerlREToken("not", "@PK{not} @PS{\"/\"}@PS");
+TOKEN PerlRegExpUnless = PerlREToken("unless", "@PK{unless} @PS{\"/\"}@PS");
+TOKEN PerlRegExpFor = PerlREToken("for", "@PK{for} @PS{\"/\"}@PS");
+TOKEN PerlRegExpForEach = PerlREToken("foreach","@PK{foreach} @PS{\"/\"}@PS");
+TOKEN PerlRegExpWhile = PerlREToken("while", "@PK{while} @PS{\"/\"}@PS");
+
+TOKEN PerlRegExpStartLineToken =
+{
+ "regexp", /* used by error messages */
+ PRINT_WHOLE_QUOTED, /* we can print the whole thing this time */
+ "@PS", /* the command */
+ "", /* no alternate command */
+ "", /* no following command */
+ TRUE, /* token allowed only at start of line */
+ { "/" }, /* starting delimiter (so easy!) */
+ { NULL }, /* no start2 */
+ { NULL }, /* no end2 */
+ AllCharacters, /* any character OK inside */
+ "\\", /* \\ is the escape character */
+ AllCharacters, /* after escape char, any is OK */
+ "", /* no inner escapes */
+ "", /* no end innner escape either */
+ "/", /* ending delimiter */
+ FALSE, /* no need to end at line start */
+ FALSE, /* don't want end delimiter twice */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Perl's here-documents */
+/* */
+/* At present the only terminating strings recognized are EOT, EOF, END, */
+/* and the empty string. These may all be quoted in the usual ways. */
+/* */
+/*****************************************************************************/
+
+#define X(startstr, endstr, startcom, endcom) \
+{ \
+ "here-document", /* used by error messages */ \
+ PRINT_NODELIMS_QUOTED,/* no delims since we supply them */ \
+ startcom, /* the command */ \
+ "", /* no alternate command */ \
+ endcom, /* following command (final /) */ \
+ FALSE, /* token allowed not just start of line */ \
+ { startstr }, /* starting delimiter */ \
+ { NULL }, /* no start2 */ \
+ { NULL }, /* no end2 */ \
+ AllCharacters, /* any character OK inside */ \
+ "", "", /* no escape character */ \
+ "", "", /* no inner escapes */ \
+ endstr, /* token ends with this */ \
+ TRUE, /* must be found at line start */ \
+ FALSE, /* don't want end delimiter twice */ \
+}
+
+#define sEOT "\n@PS{\"EOT\"}\n"
+#define sEOF "\n@PS{\"EOF\"}\n"
+#define sEND "\n@PS{\"END\"}\n"
+#define sBLA "\n@PS{\"\"}\n"
+
+TOKEN HereEOTuq = X("<<EOT", "EOT\n", "@PO{<<}@PS{\"EOT\"}@PS", sEOT);
+TOKEN HereEOTdq = X("<<\"EOT\"","EOT\n", "@PO{<<}@PS{\"\\\"EOT\\\"\"}@PS",sEOT);
+TOKEN HereEOTfq = X("<<'EOT'", "EOT\n", "@PO{<<}@PS{\"'EOT'\"}@PS", sEOT);
+TOKEN HereEOTbq = X("<<`EOT`", "EOT\n", "@PO{<<}@PS{\"`EOT`\"}@PS", sEOT);
+
+TOKEN HereEOFuq = X("<<EOF", "EOF\n", "@PO{<<}@PS{\"EOF\"}@PS", sEOF);
+TOKEN HereEOFdq = X("<<\"EOF\"","EOF\n", "@PO{<<}@PS{\"\\\"EOF\\\"\"}@PS",sEOF);
+TOKEN HereEOFfq = X("<<'EOF'", "EOF\n", "@PO{<<}@PS{\"'EOF'\"}@PS", sEOF);
+TOKEN HereEOFbq = X("<<`EOF`", "EOF\n", "@PO{<<}@PS{\"`EOF`\"}@PS", sEOF);
+
+TOKEN HereENDuq = X("<<END", "END\n", "@PO{<<}@PS{\"END\"}@PS", sEND);
+TOKEN HereENDdq = X("<<\"END\"","END\n", "@PO{<<}@PS{\"\\\"END\\\"\"}@PS",sEND);
+TOKEN HereENDfq = X("<<'END'", "END\n", "@PO{<<}@PS{\"'END'\"}@PS", sEND);
+TOKEN HereENDbq = X("<<`END`", "END\n", "@PO{<<}@PS{\"`END`\"}@PS", sEND);
+
+TOKEN HereBLAuq = X("<< ", "\n", "@PO{<<} @PS", sBLA);
+TOKEN HereBLAdq = X("<<\"\"", "\n", "@PO{<<}@PS{\"\\\"\\\"\"}@PS", sBLA);
+TOKEN HereBLAfq = X("<<''", "\n", "@PO{<<}@PS{\"''\"}@PS", sBLA);
+TOKEN HereBLAbq = X("<<``", "\n", "@PO{<<}@PS{\"``\"}@PS", sBLA);
+
+
+/*****************************************************************************/
+/* */
+/* Perl's identifiers */
+/* */
+/* "Names that start with a letter or underscore may be of any */
+/* length ... and may contain letters, digits, and underscores. */
+/* Names that start with a digit may only contain more digits. */
+/* Names that start with anything else are limited to that one */
+/* character (like $? or $$) and generally have a predefined */
+/* significance to Perl." (WCS page 38) */
+/* */
+/* In addition we have to consider that variable names may be preceded */
+/* by $, @, %, &, or *. Whether these are part of the variable or not is */
+/* rather doubtful. We will treat $ and @ as part of the variable and the */
+/* others not (since they occur elsewhere in the token list anyway); plus */
+/* we have a separate token type for identifiers beginning with $ and */
+/* followed by one character, which we will list explicitly. */
+/* */
+/*****************************************************************************/
+
+TOKEN PerlIdentifierToken = {
+ "identifier", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PI", /* Lout command for formatting identifiers */
+ "@PK", /* Alternate command (for keywords) */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ {SepLetters,"_","$","@"}, /* identifiers begin with these */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ Letter_Digit, /* inside, letters, underscores, digits are OK */
+ "", /* no escape character within identifiers */
+ "", /* so nothing legal after escape char either */
+ "", /* identifiers do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "", /* identifiers do not end with a delimiter */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PerlSpecialIdentifierToken = {
+ "special identifier", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PI", /* Lout command for formatting identifiers */
+ "@PK", /* Alternate command (for keywords) */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "$_", "$.", "$/", "$,", "$\\", "$\"", "$;", "$^L", "$:",
+ "$^A", "$#", "$?", "$!", "$@", "$$", "$<", "$>", "$(",
+ "$)", "$0", "$[", "$]", "$^D", "$^E", "$^F", "$^H", "$^I",
+ "$^M", "$^O", "$^P", "$^T", "$^W", "$^X"
+ }, /* all possible values of special variables */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ "", /* nothing allowed inside, since ends after start */
+ "", /* no escape character within identifiers */
+ "", /* so nothing legal after escape char either */
+ "", /* identifiers do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "", /* identifiers do not end with a delimiter */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Perl's numeric literals */
+/* */
+/* These are defined in WCS page 39 basically by giving these examples: */
+/* */
+/* 12345 # integer */
+/* 12345.67 # floating point */
+/* 6.02E23 # scientific notation */
+/* 0xffff # hexadecimal */
+/* 0377 # octal */
+/* 4_294_967_296 # underline for legibility */
+/* */
+/* Implementation is straightforward; hexadecimal is a separate token. */
+/* */
+/*****************************************************************************/
+
+TOKEN PerlLiteralNumberToken = {
+ "number", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PN", /* Lout command for formatting numbers */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { SepDigits }, /* numbers must begin with a digit */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ "0123456789.eE_", /* inside, digits, point, exponent, underscore */
+ "", /* no escape character within numbers */
+ "", /* so nothing legal after escape char either */
+ "", /* numbers do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "", /* numbers do not end with a delimiter */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PerlHexNumberToken = {
+ "number", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PN", /* Lout command for formatting numbers */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "0x" }, /* hex numbers must begin with 0x */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ "0123456789AaBbCcDdEeFf", /* inside, hexadecimal digits */
+ "", /* no escape character within numbers */
+ "", /* so nothing legal after escape char either */
+ "", /* numbers do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "", /* numbers do not end with a delimiter */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Perl's comments */
+/* */
+/* "Comments are indicated by the # character and extend to the end of */
+/* the line." (WCS page 35). To this we have added the usual Lout escape */
+/* comment beginning with #@. */
+/* */
+/*****************************************************************************/
+
+TOKEN PerlCommentToken = {
+ "comment", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */
+ "@PC", /* Lout command for formatting comments */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "#" }, /* comments begin with this character */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllPrintable, /* inside, any printable char is OK (not NL) */
+ "", /* no escape character within comments */
+ "", /* so nothing legal after escape char either */
+ "", /* C comments do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "", /* no end delimiter (end of line will end it) */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PerlCommentEscapeToken = {
+ "Lout escape", /* used by error messages involving this token */
+ PRINT_NODELIMS_UNQUOTED, /* print this token unformatted */
+ "", /* no Lout command since we are printing raw */
+ "", /* no alternate command */
+ "", /* no following command */
+ FALSE, /* token allowed anywhere, not just start of line */
+ { "#@" }, /* comments begin with this character pair */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllPrintable, /* inside, any printable char is OK */
+ "", /* no escape character within comments */
+ "", /* so nothing legal after escape char either */
+ "", /* no "inner escape" in escape comments */
+ "", /* so no end of "inner escape" either */
+ "", /* no end delimiter (end of line will end it) */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Perl's POD sub-language */
+/* */
+/* Pod is handled as a completely different language. However we need */
+/* one Perl token which recognizes an entire Pod interpolation and prints */
+/* it enclosed in @Pod { ... } so that Lout knows to call back later on it. */
+/* */
+/* "A line beginning with = is assumed to introduce some documentation, */
+/* which continues until another line is reached beginning with =cut" */
+/* (WCS page36). Strictly speaking this is only valid at points where */
+/* a statement would be legal, but that is beyond prg2lout to implement. */
+/* */
+/*****************************************************************************/
+
+TOKEN PerlPodToken = {
+ "perl-pod", /* used by error messages involving this token */
+ PRINT_NODELIMS_UNQUOTED, /* unquoted but with a command enclosing it */
+ "@DP @Pod", /* Lout command for formatting Pod */
+ "", /* no alternate command */
+ "@DP\n", /* following command */
+ TRUE, /* token allowed at start of line only */
+ { "=", "=pod" }, /* pod insert begins with either of these */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllCharacters, /* inside, any character at all is OK */
+ "", /* no escape character within pod comments */
+ "", /* so nothing legal after escape char either */
+ "", /* pod comments do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "=cut", /* pod comments end with this string */
+ TRUE, /* end delimiter must be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Perl's operators */
+/* */
+/* Only those not already in the C/C++ list are given here. */
+/* */
+/*****************************************************************************/
+
+TOKEN PerlIncrementToken = FixedToken( "++", "@PO" ) ;
+TOKEN PerlDecrementToken = FixedToken( "--", "@PO" ) ;
+TOKEN PerlExponentiateToken = FixedToken( "**", "@PO" ) ;
+TOKEN PerlMatchToken = FixedToken( "=~", "@PO" ) ;
+TOKEN PerlNotMatchToken = FixedToken( "!~", "@PO" ) ;
+TOKEN PerlEqualToken = FixedToken( "==", "@PO" ) ;
+TOKEN PerlAssignToken = FixedToken( "=", "@PO" ) ;
+TOKEN PerlBitLeftShiftToken = FixedToken( "<<", "@PO" ) ;
+TOKEN PerlBitRightShiftToken = FixedToken( ">>", "@PO" ) ;
+TOKEN PerlSpaceshipToken = FixedToken( "<=>", "@PO" ) ;
+TOKEN PerlAndToken = FixedToken( "&&", "@PO" ) ;
+TOKEN PerlOrToken = FixedToken( "||", "@PO" ) ;
+TOKEN PerlRange2Token = FixedToken( "..", "@PO" ) ;
+TOKEN PerlRange3Token = FixedToken( "...", "@PO" ) ;
+TOKEN PerlFileTestrToken = FixedToken( "-r", "@PO" ) ;
+TOKEN PerlFileTestwToken = FixedToken( "-w", "@PO" ) ;
+TOKEN PerlFileTestxToken = FixedToken( "-x", "@PO" ) ;
+TOKEN PerlFileTestoToken = FixedToken( "-o", "@PO" ) ;
+TOKEN PerlFileTestRToken = FixedToken( "-R", "@PO" ) ;
+TOKEN PerlFileTestWToken = FixedToken( "-W", "@PO" ) ;
+TOKEN PerlFileTestXToken = FixedToken( "-X", "@PO" ) ;
+TOKEN PerlFileTestOToken = FixedToken( "-O", "@PO" ) ;
+TOKEN PerlFileTesteToken = FixedToken( "-e", "@PO" ) ;
+TOKEN PerlFileTestzToken = FixedToken( "-z", "@PO" ) ;
+TOKEN PerlFileTestsToken = FixedToken( "-s", "@PO" ) ;
+TOKEN PerlFileTestfToken = FixedToken( "-f", "@PO" ) ;
+TOKEN PerlFileTestdToken = FixedToken( "-d", "@PO" ) ;
+TOKEN PerlFileTestlToken = FixedToken( "-l", "@PO" ) ;
+TOKEN PerlFileTestpToken = FixedToken( "-p", "@PO" ) ;
+TOKEN PerlFileTestSToken = FixedToken( "-S", "@PO" ) ;
+TOKEN PerlFileTestbToken = FixedToken( "-b", "@PO" ) ;
+TOKEN PerlFileTestcToken = FixedToken( "-c", "@PO" ) ;
+TOKEN PerlFileTesttToken = FixedToken( "-t", "@PO" ) ;
+TOKEN PerlFileTestuToken = FixedToken( "-u", "@PO" ) ;
+TOKEN PerlFileTestgToken = FixedToken( "-g", "@PO" ) ;
+TOKEN PerlFileTestkToken = FixedToken( "-k", "@PO" ) ;
+TOKEN PerlFileTestTToken = FixedToken( "-T", "@PO" ) ;
+TOKEN PerlFileTestBToken = FixedToken( "-B", "@PO" ) ;
+TOKEN PerlFileTestMToken = FixedToken( "-M", "@PO" ) ;
+TOKEN PerlFileTestAToken = FixedToken( "-A", "@PO" ) ;
+TOKEN PerlFileTestCToken = FixedToken( "-C", "@PO" ) ;
+
+
+/*****************************************************************************/
+/* */
+/* Pod (Plain Old Documentation, used with Perl) tokens */
+/* */
+/* Pod is treated as a completely different language to Perl. It is */
+/* quite possible to use Pod alone without Perl; or, thanks to the */
+/* PerlPodToken, to embed Pod in Perl in the usual way. Quotations below */
+/* are from Larry Wall's documentation, communicated by Mark Summerfield. */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* Pod Verbatim paragraphs */
+/* */
+/* "A verbatim paragraph [is] distinguished by being indented (that is, it */
+/* starts with a space or tab). It should be reproduced exactly, with */
+/* tabs assumed to be on 8-column boundaries. There are no special */
+/* formatting escapes." */
+/* */
+/* By a "paragraph" is meant a sequence of lines down to the next empty */
+/* line; but we will handle verbatim paragraphs one line at a time. */
+/* It also seems that an empty line in the input has to become an empty */
+/* line in the output; so we include a token for that. */
+/* */
+/*****************************************************************************/
+
+TOKEN PodVerbatimLineToken = {
+ "verbatim-para", /* used by error messages involving this token */
+ PRINT_WHOLE_QUOTED, /* printing the whole paragraph quoted */
+ "@PV ", /* Lout command for formatting verbatim line */
+ "", /* no alternate command */
+ "", /* no following command */
+ TRUE, /* token allowed at start of line only */
+ { "\t", " " }, /* command begins with this */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllPrintablePlusTab, /* inside, any printable char except newline is OK */
+ "", "", /* no escape character within verbatim lines */
+ "", "", /* no "inner escapes" within verbatim lines */
+ "", /* ends at end of line */
+ FALSE, /* don't need to be at start of line to end it */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PodEmptyLineToken = {
+ "pod-empty-line", /* used by error messages involving this token */
+ PRINT_COMMAND_ONLY, /* printing just the command */
+ "@PPG\n", /* Lout command for formatting Pod empty line */
+ "", /* no alternate command */
+ "", /* no following command */
+ TRUE, /* token allowed at start of line only */
+ { "\n" }, /* command begins with this */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ "", /* nothing inside */
+ "", "", /* no escape character */
+ "", "", /* no inner escape */
+ "", /* token will end with the end of the line */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Pod Command paragraphs */
+/* */
+/* "All command paragraphs start with =, followed by an identifier, */
+/* followed by arbitrary text that the command can use." */
+/* */
+/* "[A] command lasts up until the end of the paragraph, not the line. */
+/* Hence, ... you can see the empty lines after each command to end */
+/* its paragraph." */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* Pod command paragraphs: =pod */
+/* */
+/* "The =pod directive does nothing beyond telling the compiler to lay off */
+/* parsing code through the next =cut." */
+/* */
+/*****************************************************************************/
+
+TOKEN PodIgnoreToken = {
+ "pod-cut", /* used by error messages involving this token */
+ PRINT_COMMAND_ONLY, /* printing just the command */
+ "", /* Lout command for formatting Pod cut (nothing) */
+ "", /* no alternate command */
+ "", /* no following command */
+ TRUE, /* token allowed at start of line only */
+ { "=pod", "=cut" }, /* command begins with this */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllCharacters, /* anything at all can be inside */
+ "", /* no escape character */
+ "", /* so nothing legal after escape char either */
+ "", /* cut tokens do not permit "inner escapes" */
+ "", /* and so there is no end innner escape either */
+ "\n", /* token will end with the end of the line */
+ TRUE, /* end delimiter (\n) has to be at a line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Pod command paragraphs: =head1, =head2 (and =head3, folklore extension) */
+/* */
+/* "... produce first and second level heading" */
+/* */
+/*****************************************************************************/
+
+TOKEN PodHeading1Token = {
+ "=head1", /* used by error messages involving this token */
+ PRINT_NODELIMS_QUOTED, /* print without delimiters */
+ "@PHA", /* Lout command for formatting Pod heading */
+ "", /* no alternate command */
+ "", /* following command */
+ TRUE, /* token allowed at start of line only */
+ {"=head1","head1"}, /* command begins with this */
+ { " ", "\t" }, /* helps to skip following white space */
+ { "\n", "\n" }, /* token ends at end of line */
+ AllCharacters, /* anything in the heading */
+ "", "", /* no escape character; nothing legal after escape */
+ "", "", /* no inner escapes; no end inner escape */
+ "\n\n", /* token will end with the first blank line */
+ FALSE, /* end delimiter (\n) has to be at a line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PodHeading2Token = {
+ "=head2", /* used by error messages involving this token */
+ PRINT_NODELIMS_QUOTED, /* print without delimiters */
+ "@PHB", /* Lout command for formatting Pod heading */
+ "", /* no alternate command */
+ "", /* following command */
+ TRUE, /* token allowed at start of line only */
+ { "=head2" }, /* command begins with this */
+ { " ", "\t" }, /* helps to skip following white space */
+ { "\n", "\n" }, /* token ends at end of line */
+ AllCharacters, /* anything in the heading */
+ "", "", /* no escape character; nothing legal after escape */
+ "", "", /* no inner escapes; no end inner escape */
+ "\n\n", /* token will end with the first blank line */
+ FALSE, /* end delimiter (\n) has to be at a line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PodHeading3Token = {
+ "=head3", /* used by error messages involving this token */
+ PRINT_NODELIMS_QUOTED, /* print without delimiters */
+ "@PHC", /* Lout command for formatting Pod heading */
+ "", /* no alternate command */
+ "", /* following command */
+ TRUE, /* token allowed at start of line only */
+ { "=head3" }, /* command begins with this */
+ { " ", "\t" }, /* helps to skip following white space */
+ { "\n", "\n" }, /* token ends at end of line */
+ AllCharacters, /* anything in the heading */
+ "", "", /* no escape character; nothing legal after escape */
+ "", "", /* no inner escapes; no end inner escape */
+ "\n\n", /* token will end with the first blank line */
+ FALSE, /* end delimiter (\n) has to be at a line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Pod command paragraphs: =over, =item, and =back (for lists) */
+/* */
+/*****************************************************************************/
+
+TOKEN PodOverToken = {
+ "=over", /* used by error messages involving this token */
+ PRINT_NODELIMS_UNQUOTED, /* just a number after =over, so this is safe */
+ "@RawTaggedList gap{@PLG}indent{@PLI}rightindent{@PLRI}labelwidth{@PLLW ",
+ "", /* no alternate command */
+ "} // {", /* open brace to match } at first item */
+ TRUE, /* token allowed at start of line only */
+ { "=over" }, /* command begins with this */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllCharacters, /* inside, any printable char is OK */
+ "", "", /* no escape character; nothing legal after escape */
+ "", "", /* no inner escapes; no end inner escape */
+ "\n", /* token will end with the end of the line */
+ TRUE, /* end delimiter (\n) has to be at a line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PodItemToken = {
+ "=item", /* used by error messages involving this token */
+ PRINT_NODELIMS_QUOTED,/* printing just what follows =item on the line */
+ "@Null //}\n@TI {@PLL", /* Lout command for formatting Pod item */
+ "", /* no alternate command */
+ "} {", /* open brace to enclose the item content */
+ TRUE, /* token allowed at start of line only */
+ { "=item" }, /* command begins with this */
+ { " ", "\t" }, /* helps to skip following white space */
+ { "\n\n", "\n\n" }, /* token will end at blank line */
+ AllPrintableTabNL, /* any printable inside */
+ "", "", /* no escape character; nothing legal after escape */
+ "", "", /* no inner escapes; no end inner escape */
+ "", /* see ends2[] */
+ FALSE, /* end delimiter (\n) must already be at start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PodBackToken = {
+ "=back", /* used by error messages involving this token */
+ PRINT_COMMAND_ONLY, /* printing just the command */
+ "@Null // }\n@EndList\n", /* Lout command for formatting Pod back */
+ "", /* no alternate command */
+ "", /* no following command */
+ TRUE, /* token allowed at start of line only */
+ { "=back" }, /* command begins with this */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllCharacters, /* anything inside (in principle) */
+ "", "", /* no escape character; nothing legal after escape */
+ "", "", /* no inner escapes; no end inner escape */
+ "\n", /* token will end with the next blank line */
+ TRUE, /* end delimiter (\n) has to be at a line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Pod command paragraphs: =for, =begin, =end */
+/* */
+/* "passed directly to particular formatters. A formatter that can utilize */
+/* that format will use the section, otherwise it will be ignored." */
+/* */
+/* So I've put in a "=begin lout" token, also recognized as "=begin Lout". */
+/* */
+/*****************************************************************************/
+
+TOKEN PodForToken = {
+ "=for", /* used by error messages involving this token */
+ PRINT_COMMAND_ONLY, /* printing just the command */
+ "", /* Lout command for formatting Pod for (nothing) */
+ "", "", /* no alternate command; no following command */
+ TRUE, /* token allowed at start of line only */
+ { "=for" }, /* command begins with this */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllCharacters, /* anything inside */
+ "", "", /* no escape character; nothing legal after escape */
+ "", "", /* no inner escapes; no end inner escape */
+ "\n", /* token will end with the end of the line */
+ TRUE, /* end delimiter (\n) has to be at a line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PodBeginToken = {
+ "=begin", /* used by error messages involving this token */
+ PRINT_COMMAND_ONLY, /* printing just the command */
+ "", /* Lout command for formatting Pod for (nothing) */
+ "", "", /* no alternate command; no following command */
+ TRUE, /* token allowed at start of line only */
+ { "=begin" }, /* command begins with this */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllCharacters, /* anything inside */
+ "", "", /* no escape character; nothing legal after escape */
+ "", "", /* no inner escapes; no end inner escape */
+ "=end", /* token will end with =end character */
+ TRUE, /* end delimiter has to be at a line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+TOKEN PodBeginLoutToken = {
+ "=begin lout", /* used by error messages involving this token */
+ PRINT_NODELIMS_UNQUOTED,/* this is a Lout escape, no delims or quotes */
+ "", /* Lout command for formatting Pod for (nothing) */
+ "", "", /* no alternate command; no following command */
+ TRUE, /* token allowed at start of line only */
+ { "=begin lout", "=begin Lout" }, /* command begins with this */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ AllCharacters, /* anything inside */
+ "", "", /* no escape character; nothing legal after escape */
+ "", "", /* no inner escapes; no end inner escape */
+ "=end", /* token will end with =end character */
+ TRUE, /* end delimiter has to be at a line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+
+/*****************************************************************************/
+/* */
+/* Pod "Ordinary Block of Text" paragraphs */
+/* */
+/* "It will be filled, and maybe even justified" - I'm setting the whole */
+/* Pod in adjust @Break, and making sure that verbatim and command */
+/* paragraphs don't get adjusted. So no special requirements here, it */
+/* should all happen without any explicit tokens, given that I've set */
+/* the Pod language up to simply echo any characters (suitably quoted if */
+/* necessary in Lout) that don't match anything else. */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* Pod interior sequences (recursive) */
+/* */
+/* I<text> Italicize text */
+/* B<text> Embolden text */
+/* S<text> Text containing non-break spaces */
+/* C<code> Code "render in typewriter font, or ..." */
+/* */
+/* Alternatively, instead of "<" .. ">" we may use "<< " .. " >>", or */
+/* "<<< " .. " >>>", etc. (Note the whitespace.) */
+/* */
+/*****************************************************************************/
+
+#define RecursiveToken(str, command) /* Pod recursive token */ \
+{ \
+ str, /* name used for debugging only */ \
+ PRINT_NODELIMS_INNER, /* recursively format the inside */ \
+ command, /* Lout command for formatting this */ \
+ "", "", /* no alternate command; no following */ \
+ FALSE, /* token not just start of line */ \
+ { str }, /* token begins with this */ \
+ { "<", "<< ", "<<< ", "<<<< " }, /* start2 */ \
+ { ">", " >>", " >>>", " >>>>" }, /* end2 */ \
+ AllCharacters, /* anything inside (in fact, not used)*/ \
+ "", "", /* no escape character */ \
+ "", "", /* no inner escape; no end inner esc */ \
+ "", /* will use end2 here */ \
+ FALSE, /* end not have to be at line start */ \
+ FALSE, /* don't end delimiter twice to stop */ \
}
-TOKEN HashToken = FixedToken("#", "@O");
-TOKEN ExclamationToken = FixedToken("!", "@O");
-TOKEN PercentToken = FixedToken("%", "@O");
-TOKEN HatToken = FixedToken("^", "@O");
-TOKEN AmpersandToken = FixedToken("&", "@O");
-TOKEN StarToken = FixedToken("*", "@ST");
-TOKEN SlashToken = FixedToken("/", "@O");
-TOKEN ArrowToken = FixedToken("->", "arrowright @A @O");
-TOKEN BackSlashToken = FixedToken("\\", "@O");
-TOKEN LeftParenToken = FixedToken("(", "@O");
-TOKEN RightParenToken = FixedToken(")", "@O");
-TOKEN MinusToken = FixedToken("-", "@M");
-TOKEN PlusToken = FixedToken("+", "plus @A @O");
-TOKEN EqualToken = FixedToken("=", "equal @A @O");
-TOKEN LeftBraceToken = FixedToken("{", "@O");
-TOKEN RightBraceToken = FixedToken("}", "@O");
-TOKEN BarToken = FixedToken("|", "@O");
-TOKEN CircumToken = FixedToken("~", "@O");
-TOKEN LeftBracketToken = FixedToken("[", "@O");
-TOKEN RightBracketToken = FixedToken("]", "@O");
-TOKEN SemicolonToken = FixedToken(";", "@O");
-TOKEN ColonToken = FixedToken(":", "@O");
-TOKEN LessToken = FixedToken("<", "less @A @O");
-TOKEN GreaterToken = FixedToken(">", "greater @A @O");
-TOKEN QuestionToken = FixedToken("?", "@O");
-TOKEN CommaToken = FixedToken(",", "@O");
-TOKEN DotToken = FixedToken(".", "@O");
-TOKEN EiffelDotToken = FixedToken(".", "@ED");
-TOKEN LessEqualToken = FixedToken("<=", "lessequal @A @O");
-TOKEN GreaterEqualToken = FixedToken(">=", "greaterequal @A @O");
-TOKEN CNotEqualToken = FixedToken("!=", "notequal @A @O");
-TOKEN EiffelNotEqualToken = FixedToken("/=", "notequal @A @O");
-TOKEN BlueNotEqualToken = FixedToken("<>", "notequal @A @O");
-TOKEN AssignToken = FixedToken(":=", "@O");
-TOKEN QuestionAssignToken = FixedToken("?=", "@O");
-TOKEN DollarToken = FixedToken("$", "@O");
-TOKEN ImpliesToken = FixedToken("=>", "implies @A @O");
+TOKEN PodItalicToken = RecursiveToken("I", "@PFI");
+TOKEN PodBoldToken = RecursiveToken("B", "@PFB");
+TOKEN PodNoBreakToken = RecursiveToken("S", "@OneCol");
+TOKEN PodCodeToken = RecursiveToken("C", "@PFC");
+
+/*****************************************************************************/
+/* */
+/* Pod interior sequences (non-recursive) */
+/* */
+/* L<name> A link; these have an internal format I've not looked at yet. */
+/* F<file> File name */
+/* X<index> Index */
+/* Z<> A zero-width space */
+/* */
+/* Alternatively, instead of "<" .. ">" we may use "<< " .. " >>", or */
+/* "<<< " .. " >>>", etc. (Note the whitespace.) */
+/* */
+/*****************************************************************************/
+
+#define InteriorToken(str, command, style) /* Pod delimited token */ \
+{ \
+ str, /* name used for debugging only */ \
+ style, /* print this token unquoted */ \
+ command, /* Lout command for formatting this */ \
+ "", "", /* no alternate command; no following */ \
+ FALSE, /* token not just start of line */ \
+ { str }, /* token begins with this */ \
+ { "<", "<< ", "<<< ", "<<<< " }, /* start2 */ \
+ { ">", " >>", " >>>", " >>>>" }, /* end2 */ \
+ AllCharacters, /* anything inside */ \
+ "", "", /* no escape character */ \
+ "", "", /* no inner escape; no end inner esc */ \
+ "", /* will use end2 here */ \
+ FALSE, /* end not have to be at line start */ \
+ FALSE, /* don't end delimiter twice to stop */ \
+}
+
+TOKEN PodFileToken = InteriorToken("F", "@PFF", PRINT_NODELIMS_QUOTED);
+TOKEN PodLinkToken = InteriorToken("L", "@PFL", PRINT_NODELIMS_QUOTED);
+TOKEN PodIndexToken = InteriorToken("X", "@PFX", PRINT_NODELIMS_QUOTED);
+TOKEN PodZeroToken = InteriorToken("Z", "", PRINT_COMMAND_ONLY);
+
+/*****************************************************************************/
+/* */
+/* Pod interior sequences (escape sequences) */
+/* */
+/* E<escape> A named character ("optional except in other interior */
+/* sequences and when preceded by a capital letter") */
+/* */
+/* E<lt> A literal < */
+/* E<gt> A literal > */
+/* E<sol> A literal / */
+/* E<verbar> A literal | */
+/* E<n> Character number n (probably in ASCII) */
+/* E<html> Some non-numeric HTML entity, such as E<Agrave> */
+/* */
+/* PodNumCharToken not tested. */
+/* */
+/*****************************************************************************/
+
+TOKEN PodNumCharToken = {
+ "E<>", /* used by error messages involving this token */
+ PRINT_NODELIMS_UNQUOTED,/* we're doing these manually, since they're funny*/
+ "\"\\", /* precede character number with \" */
+ "", /* no alternate command */
+ "\"", /* follow character number with " */
+ FALSE, /* token allowed at start of line only */
+ { "E<" }, /* command begins with this */
+ { NULL }, /* no start2 needed */
+ { NULL }, /* so no end2 either */
+ "0123456789", /* digits inside */
+ "", "", /* no escape character */
+ "", "", /* no "inner escapes" */
+ ">", /* token will end with > character */
+ FALSE, /* end delimiter does not have to be at line start */
+ FALSE, /* don't need to see end delimiter twice to stop */
+};
+
+#define PodEscapeToken(str, command) /* Pod delimited token */ \
+{ \
+ str, /* name used for debugging only */ \
+ PRINT_COMMAND_ONLY, /* print this token unquoted */ \
+ command, /* Lout command for formatting this */ \
+ "", /* no alternate command */ \
+ "", /* no following command */ \
+ FALSE, /* token not just start of line */ \
+ { str }, /* token begins with this */ \
+ { NULL }, /* start2 */ \
+ { NULL }, /* end2 */ \
+ "", /* nothing inside */ \
+ "", "", /* no escape character */ \
+ "", "", /* no inner escape either */ \
+ "", /* no ending delimiter except end2 */ \
+ FALSE, /* end not have to be at line start */ \
+ FALSE, /* don't end delimiter twice to stop */ \
+}
+
+TOKEN PodLessThanToken = PodEscapeToken("E<lt>", "<");
+TOKEN PodGreaterThanToken = PodEscapeToken("E<gt>", ">");
+TOKEN PodSlashToken = PodEscapeToken("E<sol>", "/");
+TOKEN PodVerbarToken = PodEscapeToken("E<verbar>", "|");
+
+
+/*****************************************************************************/
+/* */
+/* Mark Summerfield writes: */
+/* */
+/* The following table (and most of its comments) is copied from Gisle Aas */
+/* HTML::Entities.pm module with the plain text characters being replaced */
+/* by their Lout equivalents and the HTML entities with their pod */
+/* equivalents. */
+/* */
+/*****************************************************************************/
+
+/* Some normal chars that have special meaning in SGML context */
+TOKEN PE00 = PodEscapeToken("E<amp>", "&");
+/* already done above TOKEN PE01 = PodEscapeToken("E<gt>", ">"); */
+/* already done above TOKEN PE02 = PodEscapeToken("E<lt>", "<"); */
+TOKEN PE03 = PodEscapeToken("E<quot>", "\"\\\"\"");
+
+/* PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML */
+TOKEN PE04 = PodEscapeToken("E<AElig>", "{@Char AE}");
+TOKEN PE05 = PodEscapeToken("E<Aacute>", "{@Char Aacute}");
+TOKEN PE06 = PodEscapeToken("E<Acirc>", "{@Char Acircumflex}");
+TOKEN PE07 = PodEscapeToken("E<Agrave>", "{@Char Agrave}");
+TOKEN PE08 = PodEscapeToken("E<Aring>", "{@Char Aring}");
+TOKEN PE09 = PodEscapeToken("E<Atilde>", "{@Char Atilde}");
+TOKEN PE10 = PodEscapeToken("E<Auml>", "{@Char Adieresis}");
+TOKEN PE11 = PodEscapeToken("E<Ccedil>", "{@Char Ccedilla}");
+TOKEN PE12 = PodEscapeToken("E<ETH>", "{@Char Eth}");
+TOKEN PE13 = PodEscapeToken("E<Eacute>", "{@Char Eacute}");
+TOKEN PE14 = PodEscapeToken("E<Ecirc>", "{@Char Ecircumflex}");
+TOKEN PE15 = PodEscapeToken("E<Egrave>", "{@Char Egrave}");
+TOKEN PE16 = PodEscapeToken("E<Euml>", "{@Char Edieresis}");
+TOKEN PE17 = PodEscapeToken("E<Iacute>", "{@Char Iacute}");
+TOKEN PE18 = PodEscapeToken("E<Icirc>", "{@Char Icircumflex}");
+TOKEN PE19 = PodEscapeToken("E<Igrave>", "{@Char Igrave}");
+TOKEN PE20 = PodEscapeToken("E<Iuml>", "{@Char Idieresis}");
+TOKEN PE21 = PodEscapeToken("E<Ntilde>", "{@Char Ntilde}");
+TOKEN PE22 = PodEscapeToken("E<Oacute>", "{@Char Oacute}");
+TOKEN PE23 = PodEscapeToken("E<Ocirc>", "{@Char Ocircumflex}");
+TOKEN PE24 = PodEscapeToken("E<Ograve>", "{@Char Ograve}");
+TOKEN PE25 = PodEscapeToken("E<Oslash>", "{@Char Oslash}");
+TOKEN PE26 = PodEscapeToken("E<Otilde>", "{@Char Otilde}");
+TOKEN PE27 = PodEscapeToken("E<Ouml>", "{@Char Odieresis}");
+TOKEN PE28 = PodEscapeToken("E<THORN>", "{@Char Thorn}");
+TOKEN PE29 = PodEscapeToken("E<Uacute>", "{@Char Uacute}");
+TOKEN PE30 = PodEscapeToken("E<Ucirc>", "{@Char Ucircumflex}");
+TOKEN PE31 = PodEscapeToken("E<Ugrave>", "{@Char Ugrave}");
+TOKEN PE32 = PodEscapeToken("E<Uuml>", "{@Char Udieresis}");
+TOKEN PE33 = PodEscapeToken("E<Yacute>", "{@Char Yacute}");
+TOKEN PE34 = PodEscapeToken("E<aacute>", "{@Char aacute}");
+TOKEN PE35 = PodEscapeToken("E<acirc>", "{@Char acircumflex}");
+TOKEN PE36 = PodEscapeToken("E<aelig>", "{@Char ae}");
+TOKEN PE37 = PodEscapeToken("E<agrave>", "{@Char agrave}");
+TOKEN PE38 = PodEscapeToken("E<aring>", "{@Char aring}");
+TOKEN PE39 = PodEscapeToken("E<atilde>", "{@Char atilde}");
+TOKEN PE40 = PodEscapeToken("E<auml>", "{@Char adieresis}");
+TOKEN PE41 = PodEscapeToken("E<ccedil>", "{@Char ccedilla}");
+TOKEN PE42 = PodEscapeToken("E<eacute>", "{@Char eacute}");
+TOKEN PE43 = PodEscapeToken("E<ecirc>", "{@Char ecircumflex}");
+TOKEN PE44 = PodEscapeToken("E<egrave>", "{@Char egrave}");
+TOKEN PE45 = PodEscapeToken("E<eth>", "{@Char eth}");
+TOKEN PE46 = PodEscapeToken("E<euml>", "{@Char edieresis}");
+TOKEN PE47 = PodEscapeToken("E<iacute>", "{@Char iacute}");
+TOKEN PE48 = PodEscapeToken("E<icirc>", "{@Char icircumflex}");
+TOKEN PE49 = PodEscapeToken("E<igrave>", "{@Char igrave}");
+TOKEN PE50 = PodEscapeToken("E<iuml>", "{@Char idieresis}");
+TOKEN PE51 = PodEscapeToken("E<ntilde>", "{@Char ntilde}");
+TOKEN PE52 = PodEscapeToken("E<oacute>", "{@Char oacute}");
+TOKEN PE53 = PodEscapeToken("E<ocirc>", "{@Char ocircumflex}");
+TOKEN PE54 = PodEscapeToken("E<ograve>", "{@Char ograve}");
+TOKEN PE55 = PodEscapeToken("E<oslash>", "{@Char oslash}");
+TOKEN PE56 = PodEscapeToken("E<otilde>", "{@Char otilde}");
+TOKEN PE57 = PodEscapeToken("E<ouml>", "{@Char odieresis}");
+TOKEN PE58 = PodEscapeToken("E<szlig>", "{@Char germandbls}");
+TOKEN PE59 = PodEscapeToken("E<thorn>", "{@Char thorn}");
+TOKEN PE60 = PodEscapeToken("E<uacute>", "{@Char uacute}");
+TOKEN PE61 = PodEscapeToken("E<ucirc>", "{@Char ucircumflex}");
+TOKEN PE62 = PodEscapeToken("E<ugrave>", "{@Char ugrave}");
+TOKEN PE63 = PodEscapeToken("E<uuml>", "{@Char udieresis}");
+TOKEN PE64 = PodEscapeToken("E<yacute>", "{@Char yacute}");
+TOKEN PE65 = PodEscapeToken("E<yuml>", "{@Char ydieresis}");
+
+/* Some extra Latin 1 chars that are listed in the HTML3.2 draft 1996/05/21 */
+TOKEN PE66 = PodEscapeToken("E<copy>", "{@CopyRight}");
+TOKEN PE67 = PodEscapeToken("E<reg>", "{@Register}");
+TOKEN PE68 = PodEscapeToken("E<nbsp>", "~");
+
+/* Additional ISO-8859/1 entities listed in rfc1866 (section 14) */
+TOKEN PE69 = PodEscapeToken("E<iexcl>", "{@Char exclamdown}");
+TOKEN PE70 = PodEscapeToken("E<cent>", "{@Char cent}");
+TOKEN PE71 = PodEscapeToken("E<pound>", "{@Sterling}");
+TOKEN PE72 = PodEscapeToken("E<curren>", "{@Char currency}");
+TOKEN PE73 = PodEscapeToken("E<yen>", "{@Yen}");
+TOKEN PE74 = PodEscapeToken("E<brvbar>", "{@Char bar}");
+TOKEN PE75 = PodEscapeToken("E<sect>", "{@SectSym}");
+TOKEN PE76 = PodEscapeToken("E<uml>", "{@Char dieresis}");
+TOKEN PE77 = PodEscapeToken("E<ordf>", "{@Char ordfeminine}");
+TOKEN PE78 = PodEscapeToken("E<laquo>", "{@Char guillemotleft}");
+TOKEN PE79 = PodEscapeToken("E<not>", "{@Char logicalnot}");
+TOKEN PE80 = PodEscapeToken("E<shy>", "{@Char hyphen}");
+TOKEN PE81 = PodEscapeToken("E<macr>", "{@Char macron}");
+TOKEN PE82 = PodEscapeToken("E<deg>", "{@Char degree}");
+TOKEN PE83 = PodEscapeToken("E<plusmn>", "{@Char plusminus}");
+TOKEN PE84 = PodEscapeToken("E<sup1>", "{@Char onesuperior}");
+TOKEN PE85 = PodEscapeToken("E<sup2>", "{@Char twosuperior}");
+TOKEN PE86 = PodEscapeToken("E<sup3>", "{@Char threesuperior}");
+TOKEN PE87 = PodEscapeToken("E<acute>", "{@Char acute}");
+TOKEN PE88 = PodEscapeToken("E<micro>", "{@Char mu}");
+TOKEN PE89 = PodEscapeToken("E<para>", "{@ParSym}");
+TOKEN PE90 = PodEscapeToken("E<middot>", "{@Char periodcentered}");
+TOKEN PE91 = PodEscapeToken("E<cedil>", "{@Char cedilla}");
+TOKEN PE92 = PodEscapeToken("E<ordm>", "{@Char ordmasculine}");
+TOKEN PE93 = PodEscapeToken("E<raquo>", "{@Char guillemotright}");
+TOKEN PE94 = PodEscapeToken("E<frac14>", "{@Char onequarter}");
+TOKEN PE95 = PodEscapeToken("E<frac12>", "{@Char onehalf}");
+TOKEN PE96 = PodEscapeToken("E<frac34>", "{@Char threequarters}");
+TOKEN PE97 = PodEscapeToken("E<iquest>", "{@Char questiondown}");
+TOKEN PE98 = PodEscapeToken("E<times>", "{@Multiply}");
+TOKEN PE99 = PodEscapeToken("E<divide>", "{@Divide}");
+
+
/*****************************************************************************/
/* */
/* LANGUAGE - put your language declarations in this section. */
/* */
+/* The field names and their meanings are: */
+/* */
+/* names Set of alternative names for this languages */
+/* setup_file The default Lout setup file (e.g. "cprint", "eiffel") */
+/* lang_sym The symbol for the language (e.g. "@CP", "@Eiffel") */
+/* no_match What to do if something fails to match (see below) */
+/* tokens Set of all tokens of this language */
+/* keywords Set of all keywords for this language */
+/* */
+/* Acceptable values for no_match are: */
+/* */
+/* NO_MATCH_ERROR Generate an error message and skip the character. */
+/* */
+/* NO_MATCH_PRINT Print the character in a way that is Lout-safe; that */
+/* is, mostly raw but in quotes for "/", "@" etc., and */
+/* handling tabs and newlines appropriately. */
+/* */
/*****************************************************************************/
+#define NO_MATCH_ERROR 1
+#define NO_MATCH_PRINT 2
+#define NO_LANGUAGE ((LANGUAGE *) NULL)
typedef struct lang_rec {
char *names[MAX_ARRAY_LENGTH];
- char *default_style;
+ char *setup_file;
+ char *lang_sym;
+ int no_match;
TOKEN *tokens[MAX_ARRAY_LENGTH];
char *keywords[MAX_ARRAY_LENGTH];
} LANGUAGE;
+
LANGUAGE CLanguage = {
{ "C", "c", "C++", "c++" },
- "fixed",
+ "cprint", "@CP",
+ NO_MATCH_ERROR,
{
&CStringToken, &CCharacterToken, &IdentifierToken, &NumberToken,
&CCommentToken, &CCommentEscapeToken,
@@ -395,19 +1975,26 @@ LANGUAGE CLanguage = {
},
{ "asm", "auto", "break", "case", "catch", "char", "class", "const",
- "continue", "default", "delete", "do", "double", "else", "enum",
- "extern", "float", "for", "friend", "goto", "if", "inline", "int",
- "long", "new", "operator", "private", "protected", "public", "register",
- "return", "short", "signed", "sizeof", "static", "struct", "switch",
- "template", "this", "throw", "try", "typedef", "union", "unsigned",
- "virtual", "void", "volatile", "while",
+ "continue", "default", "delete", "do", "double", "else", "enum", "extern",
+ "float", "for", "friend", "goto", "if", "inline", "int", "long", "new",
+ "operator", "private", "protected", "public", "register", "return",
+ "short", "signed", "sizeof", "static", "struct", "switch", "template",
+ "this", "throw", "try", "typedef", "union", "unsigned", "virtual",
+ "void", "volatile", "while",
}
};
+
+/*****************************************************************************/
+/* */
+/* Eiffel and Blue */
+/* */
+/*****************************************************************************/
LANGUAGE EiffelLanguage = {
{ "Eiffel", "eiffel" },
- "varying",
+ "eiffel", "@Eiffel",
+ NO_MATCH_ERROR,
{
&EiffelStringToken, &EiffelCharacterToken, &IdentifierToken, &NumberToken,
&EiffelCommentToken, &EiffelCommentEscapeToken,
@@ -418,22 +2005,21 @@ LANGUAGE EiffelLanguage = {
&MinusToken, &DollarToken, &HatToken, &SlashToken, &BackSlashToken,
&LessToken, &GreaterToken, &LessEqualToken, &GreaterEqualToken
},
-
{ "alias", "all", "and", "as", "check", "class", "creation", "debug",
- "deferred", "do", "else", "elseif", "end", "ensure", "expanded",
- "export", "external", "false", "feature", "from", "frozen", "if",
- "implies", "indexing", "infix", "inherit", "inspect", "invariant",
- "is", "like", "local", "loop", "obsolete", "old", "once", "or",
- "prefix", "redefine", "rename", "require", "rescue", "retry", "select",
- "separate", "strip", "then", "true", "undefine", "unique", "until",
- "variant", "when", "xor", "not", "interface"
+ "deferred", "do", "else", "elseif", "end", "ensure", "expanded", "export",
+ "external", "false", "feature", "from", "frozen", "if", "implies",
+ "indexing", "infix", "inherit", "inspect", "invariant", "is", "like",
+ "local", "loop", "obsolete", "old", "once", "or", "prefix", "redefine",
+ "rename", "require", "rescue", "retry", "select", "separate", "strip",
+ "then", "true", "undefine", "unique", "until", "variant", "when", "xor",
+ "not", "interface"
}
};
-
LANGUAGE BlueLanguage = {
{ "Blue", "blue" },
- "varying",
+ "blue", "@Blue",
+ NO_MATCH_ERROR,
{
&CStringToken, &IdentifierToken, &NumberToken,
&BlueCommentToken, &BlueCommentEscapeToken,
@@ -443,30 +2029,246 @@ LANGUAGE BlueLanguage = {
&EqualToken, &BlueNotEqualToken, &LeftBraceToken, &RightBraceToken,
&PlusToken, &MinusToken, &StarToken, &SlashToken, &HatToken,
&LessEqualToken, &GreaterEqualToken
+ },
+ { "and", "assert", "builtin", "case", "class", "const", "create", "creation",
+ "deferred", "div", "do", "else", "elseif", "end", "Enumeration",
+ "enumeration", "exit", "if", "in", "interface", "internal", "invariant",
+ "is", "loop", "manifest", "mod", "not", "of", "old", "on", "or", "post",
+ "pre", "redefined", "return", "routines", "super", "then", "uses", "var"
+ }
+};
+
+/*****************************************************************************/
+/* */
+/* Perl and Pod */
+/* */
+/*****************************************************************************/
+
+LANGUAGE PerlLanguage = {
+ { "Perl", "perl", },
+ "perl", "@Perl",
+ NO_MATCH_ERROR,
+ {
+ &PerlSingleQuoteStringToken, &PerlDoubleQuoteStringToken,
+ &PerlBackQuoteStringToken, &PerlQTypeStringToken, &PerlSTypeStringToken,
+ &PerlRegExpLPar, &PerlRegExpEq, &PerlRegExpMatch, &PerlRegExpNoMatch,
+ &PerlRegExpSplit, &PerlRegExpIf, &PerlRegExpNot, &PerlRegExpUnless,
+ &PerlRegExpFor, &PerlRegExpForEach, &PerlRegExpWhile,
+ &PerlRegExpStartLineToken,
+ &HereEOTuq, &HereEOTdq, &HereEOTfq, &HereEOTbq,
+ &HereEOFuq, &HereEOFdq, &HereEOFfq, &HereEOFbq,
+ &HereENDuq, &HereENDdq, &HereENDfq, &HereENDbq,
+ &HereBLAuq, &HereBLAdq, &HereBLAfq, &HereBLAbq,
+ &PerlIdentifierToken, &PerlSpecialIdentifierToken,
+ &PerlLiteralNumberToken, &PerlHexNumberToken,
+ &PerlCommentToken, &PerlCommentEscapeToken, &PerlPodToken,
+ &ExclamationToken, &PercentToken, &HatToken, &AmpersandToken,
+ &StarToken, &SlashToken, &ArrowToken, &BackSlashToken,
+ &LeftParenToken, &RightParenToken, &MinusToken, &PlusToken,
+ &LeftBraceToken, &RightBraceToken, &BarToken, &CircumToken,
+ &LeftBracketToken, &RightBracketToken, &SemicolonToken, &ColonToken,
+ &LessToken, &GreaterToken, &QuestionToken, &CommaToken, &DotToken,
+ &LessEqualToken, &GreaterEqualToken, &CNotEqualToken,
+ &PerlIncrementToken, &PerlDecrementToken, &PerlExponentiateToken,
+ &PerlMatchToken, &PerlNotMatchToken,
+ &PerlEqualToken, &PerlAssignToken, &PerlBitLeftShiftToken,
+ &PerlBitRightShiftToken, &PerlSpaceshipToken,
+ &PerlAndToken, &PerlOrToken, &PerlRange2Token, &PerlRange3Token,
+ &PerlFileTestrToken, &PerlFileTestwToken, &PerlFileTestxToken,
+ &PerlFileTestoToken, &PerlFileTestRToken, &PerlFileTestWToken,
+ &PerlFileTestXToken, &PerlFileTestOToken, &PerlFileTesteToken,
+ &PerlFileTestzToken, &PerlFileTestsToken, &PerlFileTestfToken,
+ &PerlFileTestdToken, &PerlFileTestlToken, &PerlFileTestpToken,
+ &PerlFileTestSToken, &PerlFileTestbToken, &PerlFileTestcToken,
+ &PerlFileTesttToken, &PerlFileTestuToken, &PerlFileTestgToken,
+ &PerlFileTestkToken, &PerlFileTestTToken, &PerlFileTestBToken,
+ &PerlFileTestMToken, &PerlFileTestAToken, &PerlFileTestCToken,
},
- { "and", "assert", "builtin", "case", "class", "const", "create",
- "creation", "deferred", "div", "do", "else", "elseif", "end",
- "Enumeration", "enumeration", "exit", "if", "in", "interface",
- "internal", "invariant", "is", "loop", "manifest", "mod", "not",
- "of", "old", "on", "or", "post", "pre", "redefined", "return",
- "routines", "super", "then", "uses", "var"
+ {
+ /* Built-ins taken from Programming Perl 2nd Ed. */
+ "abs", "accept", "alarm", "atan2",
+ "bind", "binmode", "bless",
+ "caller", "chdir", "chmod", "chomp", "chop", "chown", "chr", "chroot",
+ "close", "closedir", "connect", "continue", "cos", "crypt",
+ "dbmclose", "dbmopen", "defined", "delete", "die", "do", "dump",
+ "each", "endgrent", "endhostent", "endnetent", "endprotoent",
+ "endpwent", "endservent", "eof", "eval", "exec", "exists", "exit",
+ "exp",
+ "fcntl", "fileno", "flock", "fork", "format", "formline",
+ "getc", "getgrent", "getgrgid", "getgrnam", "gethostbyaddr",
+ "gethostbyname", "gethostent", "getlogin", "getnetbyaddr",
+ "getnetbyname", "getnetent", "getpeername", "getpgrp", "getppid",
+ "getpriority", "getprotobyname", "getprotobynumber", "getprotoent",
+ "getpwent", "getpwnam", "getpwuid", "getservbyname", "getservbyport",
+ "getservent", "getsockname", "getsockopt", "glob", "gmtime", "goto",
+ "grep",
+ "hex",
+ "import", "index", "int", "ioctl",
+ "join",
+ "keys", "kill",
+ "last", "lc", "lcfirst", "length", "link", "listen", "local",
+ "localtime", "log", "lstat",
+ "map", "mkdir", "msgctl", "msgget", "msgrcv", "msgsnd", "my",
+ "next", "no",
+ "oct", "open", "opendir", "ord",
+ "pack", "package", "pipe", "pop", "pos", "print", "printf", "push",
+ "quotemeta",
+ "rand", "read", "readdir", "readlink", "recv", "redo", "ref",
+ "rename", "require", "reset", "return", "reverse", "rewinddir",
+ "rindex", "rmdir",
+ "scalar", "seek", "seekdir", "select", "semctl", "semget", "semop",
+ "send", "setgrent", "sethostent", "setnetent", "setpgrp",
+ "setpriority", "setprotoent", "setpwent", "setservent",
+ "setsockopt", "shift", "shmctl", "shmget", "shmread", "shmwrite",
+ "shutdown", "sin", "sleep", "socket", "socketpair", "sort",
+ "splice", "split", "sprintf", "sqrt", "srand", "stat", "study",
+ "sub", "substr", "symlink", "syscall", "sysopen", "sysread", "sysseek",
+ "system", "syswrite",
+ "tell", "telldir", "tie", "tied", "time", "times", "truncate",
+ "uc", "ucfirst", "umask", "undef", "unlink", "unpack", "unshift",
+ "untie", "use", "utime",
+ "values", "vec",
+ "wait", "waitpid", "wantarray", "warn", "write",
+
+ /* from later versions of Perl (excluding experimental thread stuff) */
+ "our", "prototype", "readline", "readpipe",
+
+ /* operators */
+ "lt", "gt", "eq", "ne", "cmp", "le", "ge",
+
+ /* Perl special variables */
+ /* NB special variables that begin $^, e.g. $^D can also be written as */
+ /* dollar control D (yes literal control D) -- but suggest we ignore */
+ "$_", "$ARG",
+ "$.", "$INPUT_LINE_SEPARATOR", "$NR",
+ "$/", "$INPUT_RECORD_SEPARATOR", "$RS",
+ "$,", "$OUTPUT_FIELD_SEPARATOR", "$OFS",
+ "$\\", "$OUTPUT_RECORD_SEPARATOR", "$ORS",
+ "$\"", "$LIST_SEPARATOR",
+ "$;", "$SUBSCRIPT_SEPARATOR", "$SUBSEP",
+ "$^L", "$FORMAT_FORMFEED",
+ "$:", "$FORMAT_LINE_BREAK_CHARACTERS",
+ "$^A", "$ACCUMULATOR",
+ "$#", "$OFMT",
+ "$?", "$CHILD_ERROR",
+ "$!", "$OS_ERROR", "$ERRNO",
+ "$@", "$EVAL_ERROR",
+ "$$", "$PROCESS_ID", "$PID",
+ "$<", "$REAL_USER_ID", "$UID",
+ "$>", "$EFFECTIVE_USER_ID", "$EUID",
+ "$(", "$REAL_GROUP_ID", "$GID",
+ "$)", "$EFFECTIVE_GROUP_ID", "$EGID",
+ "$0", /* dollar zero */ "$PROGRAM_NAME",
+ "$[", /* deprecated */
+ "$]", "$PERL_VERSION",
+ "$^D", "$DEBUGGING",
+ "$^E", "$EXTENDED_OS_ERROR",
+ "$^F", "$SYSTEM_FD_MAX",
+ "$^H",
+ "$^I", "$INPLACE_EDIT",
+ "$^M",
+ "$^O", "$OSNAME",
+ "$^P", "$PERLDB",
+ "$^T", "$BASETIME",
+ "$^W", "$WARNING",
+ "$^X", "$EXECUTABLE_NAME",
+ "$ARGV",
+ "@ARGV", "@INC", "@F", "%INC", "%ENV", "%SIG",
+ "ARGV", "STDERR", "STDIN", "STDOUT", "DATA"
+ "$&", "$MATCH",
+ "$`", "$PREMATCH",
+ "$'", "$POSTMATCH",
+ "$+", "$LAST_PAREN_MATCH",
+ "$|", "$OUTPUT_AUTOFLUSH",
+ "$%", "$FORMAT_PAGE_NUMBER",
+ "$=", "$FORMAT_LINES_PER_PAGE",
+ "$-", "$FORMAT_LINES_LEFT",
+ "$~", "$FORMAT_NAME",
+ "$^", "$FORMAT_TOP_NAME",
+
+ /* Perl special constants */
+ "__END__", "__FILE__", "__LINE__", "__PACKAGE__",
+
+ /* Perl regex variables - first 9 only */
+ "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9",
+
+ /* Perl pragmas */
+ "attrs", "autouse", "base", "blib", "constant", "diagnostics", "fields",
+ "integer", "less", "lib", "locale", "ops", "overload", "re", "sigtrap",
+ "strict", "subs", "vars",
+
+ /* Perl textual operators */
+ "and", "or", "xor", "not",
+
+ /* the x keyword */
+ "x",
+
+ /* Perl control structures */
+ "if", "elsif", /* yes one e */ "else", "unless",
+ "while", "for", "foreach", "continue", "until",
}
};
+LANGUAGE PodLanguage = {
+ { "Pod", "pod", "POD" },
+ "pod", "@Pod",
+ NO_MATCH_PRINT,
+ {
+ &PodVerbatimLineToken, &PodEmptyLineToken, &PodIgnoreToken,
+ &PodHeading1Token, &PodHeading2Token,
+ &PodOverToken, &PodItemToken, &PodBackToken,
+ &PodForToken, &PodBeginToken, &PodBeginLoutToken,
+ &PodItalicToken, &PodBoldToken, &PodCodeToken, &PodFileToken,
+ &PodNoBreakToken, &PodLinkToken, &PodIndexToken, &PodZeroToken,
+ &PodLessThanToken, &PodGreaterThanToken, &PodSlashToken,
+ &PodVerbarToken,
+ &PE00, /* &PE01, &PE02, */ &PE03, &PE04, &PE05, &PE06, &PE07, &PE08, &PE09,
+ &PE10, &PE11, &PE12, &PE13, &PE14, &PE15, &PE16, &PE17, &PE18, &PE19,
+ &PE20, &PE21, &PE22, &PE23, &PE24, &PE25, &PE26, &PE27, &PE28, &PE29,
+ &PE30, &PE31, &PE32, &PE33, &PE34, &PE35, &PE36, &PE37, &PE38, &PE39,
+ &PE40, &PE41, &PE42, &PE43, &PE44, &PE45, &PE46, &PE47, &PE48, &PE49,
+ &PE50, &PE51, &PE52, &PE53, &PE54, &PE55, &PE56, &PE57, &PE58, &PE59,
+ &PE60, &PE61, &PE62, &PE63, &PE64, &PE65, &PE66, &PE67, &PE68, &PE69,
+ &PE70, &PE71, &PE72, &PE73, &PE74, &PE75, &PE76, &PE77, &PE78, &PE79,
+ &PE80, &PE81, &PE82, &PE83, &PE84, &PE85, &PE86, &PE87, &PE88, &PE89,
+ &PE90, &PE91, &PE92, &PE93, &PE94, &PE95, &PE96, &PE97, &PE98, &PE99,
+ &PodNumCharToken,
+ },
+
+ { NULL },
+};
+
+
+/*****************************************************************************/
+/* */
+/* The "languages" variable - add your language to this list */
+/* */
+/*****************************************************************************/
+
LANGUAGE *languages[] = {
& CLanguage,
& EiffelLanguage,
& BlueLanguage,
+ & PerlLanguage,
+ & PodLanguage,
};
+
+
+
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************/
+/*** ***/
+/*** If you are adding a new language, you don't need to change anything ***/
+/*** below this point. Just repeating: don't change anything below here. ***/
+/*** ***/
+/*****************************************************************************/
/*****************************************************************************/
-/* */
-/* If you are adding a new language, you don't need to change anything */
-/* below this point. Just repeating: don't change anything below here. */
-/* */
/*****************************************************************************/
@@ -483,18 +2285,9 @@ LANGUAGE *languages[] = {
#define DEBUG_EMIT 0
#define DEBUG_MAIN 0
-#define PRG2LOUT_VERSION "prg2lout Version 1.0 (February 2000)"
-#define BOOLEAN unsigned
-#define FALSE 0
-#define TRUE 1
+#define PRG2LOUT_VERSION "prg2lout Version 2.0 (April 2000)"
#define MAX_LINE 1024
-/* print styles */
-#define NO_STYLE 0
-#define FIXED_STYLE 1
-#define VARYING_STYLE 2
-#define SYMBOL_STYLE 3
-
static char file_name[MAX_LINE]; /* current input file name */
static char curr_line[MAX_LINE]; /* current input line */
static int line_num; /* current input line number */
@@ -502,12 +2295,13 @@ static int line_pos; /* current input column number */
static BOOLEAN raw_seen; /* TRUE if -r (raw mode) */
static BOOLEAN headers_option; /* TRUE if no -n option (headers) */
-static int style_option; /* value of -p option, or NO_STYLE */
+static char *style_option; /* value of -p option, else null */
static char *font_option; /* value of -f option, else null */
static char *size_option; /* value of -s option, else null */
static char *line_option; /* value of -v option, else null */
static char *tabin_option; /* value of -t option, else null */
static char *tabout_option; /* value of -T option, else null */
+static char *setup_option; /* value of -S option, else null */
static char *language_option; /* value of -l option, else null */
static BOOLEAN tab_by_spacing; /* TRUE if using space chars to tab */
@@ -515,9 +2309,22 @@ static int tab_in; /* tab interval, value of -t option */
static float tab_out; /* tab interval width (-T option) */
static char tab_unit; /* unit of measurement for tab */
+static FILE *in_fp; /* where input comes from */
+static FILE *out_fp; /* where output goes */
+static FILE *err_fp; /* where error messages go */
+
+
+/*****************************************************************************/
+/* */
+/* char *ErrorHeader() */
+/* */
+/* Returns a string showing the current file, line, and column. */
+/* */
+/*****************************************************************************/
+
char *ErrorHeader()
{ static char buff[MAX_LINE];
- if( line_num == 0 )
+ if( line_num == 0 || line_pos == 0 )
sprintf(buff, "prg2lout");
else if( raw_seen )
sprintf(buff, "prg2lout %d,%d", line_num, line_pos);
@@ -525,6 +2332,16 @@ char *ErrorHeader()
sprintf(buff, "prg2lout %s %d,%d", file_name, line_num, line_pos);
return buff;
}
+
+
+/*****************************************************************************/
+/* */
+/* GetArg(arg, message, null_ok) */
+/* */
+/* Get the next command line argument's value into arg. If there isn't */
+/* one, print an error message and quit unless null_ok is true. */
+/* */
+/*****************************************************************************/
#define GetArg(arg, message, null_ok) \
{ if( strcmp(argv[arg_pos]+2, "") != 0 ) \
@@ -556,9 +2373,84 @@ char *EchoToken(TOKEN *t)
sprintf(buff, "%s", t->name);
return buff;
}
+
+/*****************************************************************************/
+/* */
+/* NextChar() */
+/* */
+/* Move to next character in the input file. This may involve changing */
+/* global variables curr_line, line_num, and line_pos; the new character */
+/* may be found in curr_line[line_pos]. */
+/* */
+/* NextChar does not skip any characters at all. When end of file is */
+/* reached, curr_line[line_pos] contains '\0'. */
+/* */
+/* It is possible for code to read ahead of curr_line[line_pos] up to and */
+/* including the newline character at the end of the line after the line */
+/* we are currently on (thus it is possible to recognize an empty line as */
+/* \n\n), but not beyond, using curr_line[line_pos + i] for i > 0. */
+/* */
+/*****************************************************************************/
+void NextChar()
+{
+ if( curr_line[line_pos] != '\n' )
+ {
+ /* we can carry on with the current line. This will yield '\0' as */
+ /* desired if EOF arrives before the end of the line */
+ line_pos++;
+ }
+ else if( curr_line[line_pos+1] != '\0' )
+ {
+ /* we've already read in the next line; it's at &curr_line[line_pos+1] */
+ strcpy(&curr_line[1], &curr_line[line_pos+1]);
+ line_num++;
+ line_pos = 1;
+ }
+ else
+ {
+ /* we need to read in the new line */
+ line_num++;
+ line_pos = 1;
+ if( fgets(&curr_line[1], MAX_LINE+2, in_fp) == (char *) NULL )
+ curr_line[1] = '\0';
+ }
+ if( DEBUG_NEXTCHAR )
+ fprintf(stderr, "after NextChar, line_num %d, line_pos %d, curr_line %s",
+ line_num, line_pos, &curr_line[1]);
+} /* end NextChar */
+
+/*****************************************************************************/
+/* */
+/* BOOLEAN InputMatches(char *pattern) */
+/* */
+/* Returns TRUE if input starting at curr_line[line_pos] matches pattern. */
+/* To check this we may have to read an extra line or more of input. */
+/* */
+/*****************************************************************************/
+
+BOOLEAN InputMatches(char *pattern)
+{ char *p, *q;
+ for(p = &curr_line[line_pos], q = pattern; *q != '\0'; p++, q++ )
+ {
+ if( *p == '\0' )
+ {
+ /* attempt to read another line of input, since we are off the end */
+ if( fgets(p, MAX_LINE+2-(p - curr_line), in_fp) == (char *) NULL )
+ *p = '\0';
+ }
+ if( *p != *q )
+ break;
+ }
+ if( DEBUG_PREFIXEQ )
+ fprintf(stderr, "InputMatches(%s, %s) returning %s\n",
+ &curr_line[line_pos], pattern, *q == '\0' ? "TRUE" : "FALSE");
+ return (*q == '\0');
+} /* end InputMatches */
+
+
/*****************************************************************************/
/* */
/* TRIE */
@@ -573,6 +2465,7 @@ typedef struct trie_node {
struct trie_node *sub[MAX_CHAR];
TOKEN *value[MAX_CHAR];
} *TRIE;
+
/*****************************************************************************/
/* */
@@ -591,16 +2484,14 @@ BOOLEAN TrieInsert(TRIE *T, char *str, TOKEN *val)
if( *str == '\0' )
res = FALSE;
else
- {
- if( *T == (TRIE) NULL )
+ { if( *T == (TRIE) NULL )
*T = (TRIE) calloc(1, sizeof(struct trie_node)); /* will set all to 0 */
if( *(str + 1) != '\0' )
res = TrieInsert(&((*T)->sub[(int) *str]), str + 1, val);
else if( (*T)->value[(int) *str] != (TOKEN *) NULL )
res = FALSE;
else
- {
- (*T)->value[(int) *str] = val;
+ { (*T)->value[(int) *str] = val;
res = TRUE;
}
}
@@ -627,12 +2518,10 @@ TOKEN *TrieRetrieve(TRIE T, char *str, int *len)
res = (TOKEN *) NULL;
*len = 0;
for( i = 0; T != (TRIE) NULL; T = T->sub[(int) str[i]], i++ )
- {
- if( DEBUG_TRIE )
+ { if( DEBUG_TRIE )
fprintf(stderr, " i = %d, res = %s\n", i, EchoToken(res));
if( T->value[(int) str[i]] != (TOKEN *) NULL )
- {
- res = T->value[(int) str[i]];
+ { res = T->value[(int) str[i]];
*len = i+1;
}
}
@@ -641,8 +2530,8 @@ TOKEN *TrieRetrieve(TRIE T, char *str, int *len)
*len, EchoToken(res));
return res;
}
-
+
/*****************************************************************************/
/* */
/* HASH_TABLE */
@@ -653,7 +2542,7 @@ TOKEN *TrieRetrieve(TRIE T, char *str, int *len)
/* NB MAX_SYM must be somewhat larger than the number of keywords. */
/* */
/*****************************************************************************/
-#define MAX_SYM 309
+#define MAX_SYM 609
static char *HashTable[MAX_SYM]; /* will initialze to NULL */
static int HashTableCount = 0; /* number of entries */
@@ -667,10 +2556,10 @@ static int hash(char *key)
return res % MAX_SYM;
} /* end hash */
-void HashTableInsert(char *str, FILE *err_fp)
+void HashInsert(char *str)
{ int i;
if( DEBUG_SETUP )
- fprintf(stderr, "[ HashTableInsert(%s)\n", str);
+ fprintf(stderr, "[ HashInsert(%s)\n", str);
if( HashTableCount >= MAX_SYM - 20 )
{
fprintf(err_fp, "%s internal error: full hash table (increase MAX_SYM)\n",
@@ -681,40 +2570,54 @@ void HashTableInsert(char *str, FILE *err_fp)
HashTable[i] = str;
HashTableCount++;
if( DEBUG_SETUP )
- fprintf(stderr, "] HashTableInsert(%s)\n", str);
+ fprintf(stderr, "] HashInsert(%s)\n", str);
}
-BOOLEAN HashTableRetrieve(char *str)
+BOOLEAN HashRetrieve(char *str)
{ int i;
for( i = hash(str); HashTable[i] != (char *) NULL; i = (i+1) % MAX_SYM )
if( strcmp(HashTable[i], str) == 0 )
return TRUE;
return FALSE;
}
-
+
/*****************************************************************************/
/* */
/* BACK END */
/* */
-/* This is the code that actually prints the output file. It accumulates */
-/* each token and only prints it at the end of the token, so that it can */
-/* check whether an alternative command ought to be printed (keywords). */
+/* This is the code that actually prints the output file. */
+/* To emit one token, the call sequence should be as follows: */
+/* */
+/* StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, l) */
+/* Emit(TOKEN *current_token, char ch) */
+/* ... */
+/* Emit(TOKEN *current_token, char ch) */
+/* EndEmit(TOKEN *current_token, char *end_delim) */
+/* */
+/* The back end will then take care of all print styles automatically, */
+/* including checking for keywords. When emitting white space each space */
+/* can be sent directly: */
+/* */
+/* EmitRaw(ch) */
/* */
/*****************************************************************************/
-static char save_token[MAX_LINE]; /* the token text */
-static int save_len; /* index of \0 in save_token */
+static char save_value[MAX_LINE]; /* the token text */
+static int save_len; /* index of \0 in save_value */
static BOOLEAN save_on = FALSE; /* TRUE when saving */
+static LANGUAGE *save_language; /* the current language */
+
/*****************************************************************************/
/* */
-/* EmitRaw(ch, out_fp, FILE *err_fp) */
+/* EmitRaw(ch) */
/* */
/* Emit this character immediately. This is only legal when not saving. */
-/* All characters printed on the output file should pass through here, */
-/* since EmitRaw keeps track of where we are on the output line, in order */
-/* to handle tab characters correctly. */
+/* All characters printed on the output file that represent actual text */
+/* of the program (i.e. not commands, {}, "", \ in strings etc.) should */
+/* pass through here, since EmitRaw keeps track of where we are on */
+/* the output line, in order to handle tab characters correctly. */
/* */
/* NB out_linepos is the column where the *next* character will go, and */
/* it counts the first column on the line as column zero. It understands */
@@ -723,277 +2626,546 @@ static BOOLEAN save_on = FALSE; /* TRUE when saving */
/* */
/*****************************************************************************/
-void EmitRaw(char ch, FILE *out_fp, FILE *err_fp)
-{
- static int out_linepos = 0; /* output line position */
+void EmitRaw(char ch)
+{ static int out_linepos = 0; /* output line position */
static BOOLEAN out_linestart = TRUE; /* TRUE if out line start */
if( DEBUG_EMIT )
fprintf(stderr, "EmitRaw(%c); out_linepos %d, out_linestart %s\n",
ch, out_linepos, out_linestart ? "TRUE" : "FALSE");
if( save_on )
- {
- fprintf(err_fp, "%s internal error (EmitRaw save_on)\n", ErrorHeader());
+ { fprintf(err_fp, "%s internal error (EmitRaw save_on)\n", ErrorHeader());
abort();
}
if( ch == '\t' )
- {
- if( tab_by_spacing )
+ { if( tab_by_spacing )
{ putc(' ', out_fp);
out_linepos++;
while( out_linepos % tab_in != 0 )
{ putc(' ', out_fp);
- out_linepos++;
+ out_linepos++;
}
}
else
- {
- out_linepos++;
- while( out_linepos % tab_in != 0 )
- { out_linepos++;
- }
+ { out_linepos++;
+ while( out_linepos % tab_in != 0 ) out_linepos++;
if( out_linestart )
- { fprintf(out_fp, "$>%.1f%c {}", tab_out, tab_unit);
+ { fprintf(out_fp, "$>\"%.1f%c\" {}", tab_out, tab_unit);
/* NB {} is required in case nothing follows on this line */
}
else
- { fprintf(out_fp, "$>%.1f%ct {}", (out_linepos/tab_in)*tab_out, tab_unit);
- }
+ fprintf(out_fp, "$>\"%.1f%ct\" {}", (out_linepos/tab_in)*tab_out, tab_unit);
}
}
else if( ch == '\n' )
- {
- fputc(ch, out_fp);
+ { fputc(ch, out_fp);
out_linepos = 0;
out_linestart = TRUE;
}
else
- {
- fputc(ch, out_fp);
+ { fputc(ch, out_fp);
out_linepos++;
- if( ch != ' ' )
- out_linestart = FALSE;
+ if( ch != ' ' ) out_linestart = FALSE;
}
if( DEBUG_EMIT )
fprintf(stderr, "EmitRaw(%c) returning; out_linepos %d, out_linestart %s\n",
ch, out_linepos, out_linestart ? "TRUE" : "FALSE");
} /* end EmitRaw */
-
+
/*****************************************************************************/
/* */
-/* StartEmit(FILE *err_fp) */
+/* StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, len) */
+/* */
+/* Start the emission of a token. If it is a PRINT_WHOLE_QUOTED, it has */
+/* to be saved since it might be a keyword. */
/* */
-/* Start the emission of a token. */
+/* The token began with the starting delimiter start_delim[0..len-1]. */
/* */
/*****************************************************************************/
-void StartEmit(FILE *err_fp)
-{
+void StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, int len)
+{ int i;
if( save_on )
{
fprintf(err_fp, "%s internal error (StartEmit)\n", ErrorHeader());
abort();
}
- save_on = TRUE;
- save_len = 0;
-}
+ save_language = lang;
+ switch( current_token->print_style )
+ {
+ case PRINT_WHOLE_QUOTED:
+
+ /* start_delim is to be printed, but must be saved up */
+ save_on = TRUE;
+ for( i = 0; i < len; i++ )
+ save_value[i] = start_delim[i];
+ save_len = len;
+ save_value[save_len] = '\0';
+ break;
+
+
+ case PRINT_NODELIMS_QUOTED:
+
+ /* easiest to save this, treat like PRINT_WHOLE_QUOTED, but no delims */
+ save_on = TRUE;
+ save_len = 0;
+ save_value[save_len] = '\0';
+ break;
+
+ case PRINT_WHOLE_UNQUOTED:
+
+ /* print command */
+ if( current_token->command[0] != '\0' )
+ fprintf(out_fp, "%s{", current_token->command); /*}*/
+
+ /* print opening delimiter, verbatim */
+ for( i = 0; i < len; i++ )
+ putc(start_delim[i], out_fp);
+ break;
+
+
+ case PRINT_NODELIMS_UNQUOTED:
+
+ /* command is printed but not delimiter */
+ if( current_token->command[0] != '\0' )
+ fprintf(out_fp, "%s{", current_token->command); /*}*/
+ break;
+
+
+ case PRINT_NODELIMS_INNER:
+
+ /* command is printed but not delimiter; always print opening brace */
+ fprintf(out_fp, "%s{", current_token->command); /*}*/
+ break;
+
+
+ case PRINT_COMMAND_ONLY:
+
+ /* command is printed but nothing else */
+ fprintf(out_fp, "%s", current_token->command);
+ break;
+
+
+ default:
+
+ fprintf(err_fp, "%s internal error (print_style)\n", ErrorHeader());
+ abort();
+ break;
+ }
+} /* end StartEmit */
+
/*****************************************************************************/
/* */
-/* EndEmit(LANGUAGE *lang, TOKEN *current_token, FILE *out_fp, *err_fp) */
+/* EndEmit(TOKEN *current_token, char *end_delim) */
/* */
-/* End emitting a token of this type on file out_fp. */
+/* End emitting the current token. Its ending delimiter was end_delim. */
/* */
/*****************************************************************************/
+#define is_whitespace(ch) ((ch)==' ' || (ch)=='\t' || (ch)=='\n' || (ch)=='\f')
-void EndEmit(LANGUAGE *lang, TOKEN *current_token, FILE *out_fp, FILE *err_fp)
-{ BOOLEAN altern; int i;
- if( !save_on )
+void EndEmit(TOKEN *current_token, char *end_delim)
+{ char *com; int i; BOOLEAN quoted_now = FALSE;
+ switch( current_token->print_style )
{
- fprintf(err_fp, "%s internal error (EndEmit)\n", ErrorHeader());
- abort();
- }
- save_on = FALSE;
- if( save_len == 0 )
- {
- /* nothing in token, must be resumed after a newline or tab */
- /* so emit nothing in this case */
- }
- else if( current_token->command == (char *) NULL )
- {
- /* no command, which means to emit this token without its delimiters */
- for( i = 0; i < save_len; i++ )
- EmitRaw(save_token[i], out_fp, err_fp);
- }
- else
- {
- /* if there is an alternate command in this language, check whether the */
- /* completed token is a keyword, and if so apply the alternate command. */
- altern = (lang != (LANGUAGE *) NULL
- && current_token->alternate_command[0] != '\0'
- && HashTableRetrieve(save_token));
-
- putc('{', out_fp);
- fputs(altern ? current_token->alternate_command : current_token->command,
- out_fp);
- putc('"', out_fp);
- for( i = 0; i < save_len; i++ )
- {
- if( save_token[i] == '"' || save_token[i] == '\\' )
- putc('\\', out_fp);
- EmitRaw(save_token[i], out_fp, err_fp);
- }
- putc('"', out_fp);
- putc('}', out_fp);
+ case PRINT_WHOLE_QUOTED:
+
+ strcpy(&save_value[save_len], end_delim);
+ save_len += strlen(end_delim);
+ /* NB NO BREAK */
+
+
+ case PRINT_NODELIMS_QUOTED:
+
+ /* work out whether we are printing the command or its alternative */
+ com = (current_token->alternate_command[0]!='\0' && HashRetrieve(save_value)?
+ current_token->alternate_command : current_token->command);
+
+ /* print command, opening brace */
+ if( com[0] != '\0' ) fprintf(out_fp, "%s{", com); /* } */
+
+ /* omit trailing white space (will not be significant to Lout anyway) */
+ /* *** not doing this now
+ while( save_len>0 && is_whitespace(save_value[save_len-1]) ) save_len--;
+ *** */
+
+ /* print the token with appropriate escapes */
+ save_on = FALSE;
+ for( i = 0; i < save_len; i++ ) switch( save_value[i] )
+ {
+ case '/':
+ case '|':
+ case '&':
+ case '#':
+ case '{':
+ case '}':
+ case '^':
+ case '~':
+ case '-':
+
+ if( !quoted_now ) { putc('"', out_fp); quoted_now = TRUE; }
+ EmitRaw(save_value[i]);
+ break;
+
+
+ case '"':
+ case '\\':
+
+ if( !quoted_now ) { putc('"', out_fp); quoted_now = TRUE; }
+ putc('\\', out_fp);
+ EmitRaw(save_value[i]);
+ break;
+
+
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+
+ if( i == 0 )
+ { /* make initial white space significant using "" */
+ putc('"', out_fp);
+ quoted_now = TRUE;
+ }
+ if( quoted_now ) { putc('"', out_fp); quoted_now = FALSE; }
+ EmitRaw(save_value[i]);
+ break;
+
+
+ default:
+
+ /* anything else can be quoted or unquoted ad. lib. */
+ EmitRaw(save_value[i]);
+ break;
+ }
+ /* print closing quote and closing brace if needed */
+ if( quoted_now ) putc('"', out_fp);
+ else if( save_len > 0 && is_whitespace(save_value[save_len-1]) )
+ fputs("\"\"", out_fp); /* makes trailing white space significant */
+ if( com[0] != '\0' ) /*{*/ putc('}', out_fp);
+ break;
+
+
+ case PRINT_WHOLE_UNQUOTED:
+
+ /* print end delimiter, verbatim */
+ fputs(end_delim, out_fp);
+ /* NB NO BREAK */
+
+
+ case PRINT_NODELIMS_UNQUOTED:
+
+ /* print closing brace if required*/
+ if( current_token->command[0] != '\0' ) /*{*/ putc('}', out_fp);
+ break;
+
+
+ case PRINT_NODELIMS_INNER:
+
+ /* always print closing brace */
+ /*{*/ putc('}', out_fp);
+ break;
+
+
+ case PRINT_COMMAND_ONLY:
+
+ break;
+
+
+ default:
+
+ fprintf(err_fp, "%s internal error (print_style)\n", ErrorHeader());
+ abort();
+ break;
}
-} /* end EndEmit */
+ /* print following command if any */
+ if( current_token->following_command != NULL )
+ fprintf(out_fp, "%s", current_token->following_command);
+} /* end EndEmit */
+
/*****************************************************************************/
/* */
-/* Emit(char ch, TOKEN *current_token, FILE *out_fp, FILE *err_fp) */
+/* Emit(TOKEN *current_token, char ch) */
/* */
-/* Emit one character. */
+/* Emit one character of the current token. */
/* */
/*****************************************************************************/
-void Emit(char ch, TOKEN *current_token, FILE *out_fp, FILE *err_fp)
+void Emit(TOKEN *current_token, char ch)
{
- if( !save_on )
+ switch( current_token->print_style )
{
- fprintf(err_fp, "%s internal error (EmitChar)\n", ErrorHeader());
- abort();
+ case PRINT_WHOLE_QUOTED:
+ case PRINT_NODELIMS_QUOTED:
+
+ if( !save_on )
+ { fprintf(err_fp, "%s internal error (EmitChar)\n", ErrorHeader());
+ abort();
+ }
+ save_value[save_len++] = ch;
+ save_value[save_len] = '\0';
+ break;
+
+
+ case PRINT_WHOLE_UNQUOTED:
+ case PRINT_NODELIMS_UNQUOTED:
+
+ /* verbatim output */
+ putc(ch, out_fp);
+ break;
+
+
+ case PRINT_NODELIMS_INNER:
+
+ fprintf(err_fp, "%s internal error (emitting INNER)\n", ErrorHeader());
+ abort();
+ break;
+
+
+ case PRINT_COMMAND_ONLY:
+
+ /* emit nothing since printing the command only */
+ break;
+
+
+ default:
+
+ fprintf(err_fp, "%s internal error (print_style)\n", ErrorHeader());
+ abort();
+ break;
+
}
- if( ch == '\n' || ch == '\t' )
+} /* end Emit */
+
+
+/*****************************************************************************/
+/* */
+/* EmitProtected(char ch) */
+/* */
+/* Emit one character of the current token. If the character is a special */
+/* one in Lout, protect it with quotes. */
+/* */
+/*****************************************************************************/
+
+void EmitProtected(char ch)
+{
+ switch( ch )
{
- EndEmit((LANGUAGE *) NULL, current_token, out_fp, err_fp);
- EmitRaw(ch, out_fp, err_fp);
- StartEmit(err_fp);
+ case '/':
+ case '|':
+ case '&':
+ case '#':
+ case '{':
+ case '}':
+ case '^':
+ case '~':
+ case '-':
+
+ putc('"', out_fp);
+ EmitRaw(ch);
+ putc('"', out_fp);
+ break;
+
+
+ case '"':
+ case '\\':
+
+ putc('"', out_fp);
+ putc('\\', out_fp);
+ EmitRaw(ch);
+ putc('"', out_fp);
+ break;
+
+
+ default:
+
+ EmitRaw(ch);
+ break;
}
- else
+} /* end EmitProtected */
+
+
+/*****************************************************************************/
+/* */
+/* TOKEN *ExpandToken(TOKEN *t, int starts_pos) */
+/* */
+/* Create a new token corresponding to t but using starts2[starts_pos] and */
+/* ends2[starts_pos] only. */
+/* */
+/*****************************************************************************/
+
+char *clone2strings(char *s1, char *s2)
+{ char *res;
+ res = (char *) malloc( (strlen(s1) + strlen(s2) + 1) * sizeof(char));
+ sprintf(res, "%s%s", s1, s2);
+ if( DEBUG_SETUP )
+ fprintf(stderr, "clone2strings(%s, %s) = %s\n", s1, s2, res);
+ return res;
+} /* end clone2strings */
+
+TOKEN *ExpandToken(TOKEN *t, int starts_pos)
+{ TOKEN *res; int i;
+ if( DEBUG_SETUP )
+ fprintf(stderr, "ExpandToken(%s, starts[0] = %s)\n", t->name, t->starts[0]);
+ res = (TOKEN *) calloc(1, sizeof(struct token_rec));
+ res->name = t->name;
+ res->print_style = t->print_style;
+ res->command = t->command;
+ res->alternate_command = t->alternate_command;
+ res->following_command = t->following_command;
+ res->start_line_only = t->start_line_only;
+ for( i = 0; t->starts[i] != NULL; i++ )
{
- save_token[save_len++] = ch;
- save_token[save_len] = '\0';
+ /* the starts of res are the start of t with starts2[starts_pos] added */
+ res->starts[i] = clone2strings(t->starts[i], t->starts2[starts_pos]);
}
-} /* end Emit */
-
+ res->legal = t->legal;
+ res->escape = t->escape;
+ res->escape_legal = t->escape_legal;
+ res->inner_escape = t->inner_escape;
+ res->end_inner_escape = t->end_inner_escape;
+ res->end_delimiter = t->ends2[starts_pos];
+ res->end_start_line_only = t->end_start_line_only;
+ res->want_two_ends = t->want_two_ends;
+ if( DEBUG_SETUP )
+ fprintf(stderr, "ExpandToken returning res = %s, starts[0] = %s)\n",
+ res->name, res->starts[0]);
+ return res;
+} /* end ExpandToken */
+
/*****************************************************************************/
/* */
-/* SetupTokens(LANGUAGE *lang) */
+/* void SetupOneToken(TOKEN *t) */
/* */
-/* Set up the runtime token structures. This involves initializing the */
-/* chtype and escape_chtype fields for each token type in the chosen */
-/* language, and loading the trie with all the opening delimiters of all */
-/* the tokens. */
+/* Set up one token. This involves initializing the chtype and */
+/* escape_chtype fields for the token, and loading the trie with all */
+/* the opening delimiters of the token. */
/* */
/*****************************************************************************/
#define LEGAL 1
#define ESCAPE 2
#define INNER_ESCAPE 3
+TRIE Trie = (TRIE) NULL; /* these tokens allowed anywhere */
+TRIE StartLineTrie = (TRIE) NULL; /* these allowed at line start only */
-TRIE Trie = (TRIE) NULL;
-
-void SetupTokens(LANGUAGE *lang, FILE *err_fp)
-{ TOKEN *t; int i, j;
+void SetupOneToken(TOKEN *t)
+{ int j;
if( DEBUG_SETUP )
- fprintf(stderr, "SetupTokens(%s)\n", lang->names[0]);
+ fprintf(stderr, "SetupOneToken starting %s\n", t->starts[0]);
- /* set up each token in the language */
- for( i = 0; lang->tokens[i] != (TOKEN *) NULL; i++ )
+ /* check that any PRINT_NODELIMS_INNER styles have an end delimiter */
+ if( t->print_style == PRINT_NODELIMS_INNER )
{
- t = lang->tokens[i];
- if( DEBUG_SETUP )
- fprintf(stderr, "SetupTokens token starting %s\n", t->starts[0]);
+ if( t->end_delimiter == NULL || t->end_delimiter[0] == '\0' )
+ {
+ fprintf(err_fp, "%s: token %s is INNER but has no end delimiter\n",
+ t->name, ErrorHeader());
+ }
+ }
- /* set up the chtype table for this token */
+ /* set up the chtype table for this token */
+ if( t->legal == NULL )
+ { /* all characters are legal */
+ for( j = 0; j < MAX_CHAR; j++ )
+ t->chtype[j] = LEGAL;
+ }
+ else
+ { /* the characters in t->legal are legal */
for( j = 0; t->legal[j] != '\0'; j++ )
t->chtype[(int) t->legal[j]] = LEGAL;
- if( t->escape[0] != '\0' )
- t->chtype[(int) t->escape[0]] = ESCAPE;
- if( t->inner_escape[0] != '\0' )
- t->chtype[(int) t->inner_escape[0]] = INNER_ESCAPE;
-
- /* set up the escape_chtype table for this token */
+ }
+ if( t->escape[0] != '\0' )
+ t->chtype[(int) t->escape[0]] = ESCAPE;
+ if( t->inner_escape[0] != '\0' )
+ t->chtype[(int) t->inner_escape[0]] = INNER_ESCAPE;
+
+ /* set up the escape_chtype table for this token */
+ if( t->escape_legal == NULL )
+ { /* all characters are legal after an escape character */
+ for( j = 0; j < MAX_CHAR; j++ )
+ t->escape_chtype[j] = LEGAL;
+ }
+ else
+ { /* the characters in t->escape_legal are legal after an escape character */
for( j = 0; t->escape_legal[j] != '\0'; j++ )
t->escape_chtype[(int) t->escape_legal[j]] = LEGAL;
+ }
- /* load the opening delimiters of this token into the trie */
- for( j = 0; t->starts[j] != (char *) NULL; j++ )
- {
- if( !TrieInsert(&Trie, t->starts[j], t) )
- {
- if( *(t->starts[j]) == '\0' )
- fprintf(err_fp, "%s: empty starting delimiter\n", ErrorHeader());
- else
- fprintf(err_fp, "%s: starting delimiter %s appears twice\n",
- ErrorHeader(), t->starts[j]);
- }
+ /* load the opening delimiters of this token into the trie */
+ for( j = 0; t->starts[j] != (char *) NULL; j++ )
+ {
+ if( !TrieInsert(t->start_line_only ? &StartLineTrie:&Trie,t->starts[j],t) )
+ { if( *(t->starts[j]) == '\0' )
+ fprintf(err_fp, "%s: empty starting delimiter\n", ErrorHeader());
+ else
+ fprintf(err_fp, "%s: starting delimiter %s appears twice\n",
+ ErrorHeader(), t->starts[j]);
}
-
- if( DEBUG_SETUP )
- fprintf(stderr, "SetupTokens token ending %s\n", t->starts[0]);
}
- /* load the keyword hash table */
- for( j = 0; lang->keywords[j] != (char *) NULL; j++ )
- HashTableInsert(lang->keywords[j], err_fp);
-
if( DEBUG_SETUP )
- fprintf(stderr, "SetupTokens(%s) returning.\n", lang->names[0]);
-} /* end SetupTokens */
-
+ fprintf(stderr, "SetupOneToken ending %s\n", t->starts[0]);
+} /* end SetupOneToken */
-/*****************************************************************************/
-/* */
-/* BOOLEAN PrefixEq(char *str, char *prefix) */
-/* */
-/* Returns TRUE if str (unterminated) begins with prefix (terminated). */
-/* */
-/*****************************************************************************/
-
-BOOLEAN PrefixEq(char *str, char *prefix)
-{ char *p, *q;
- for( p = str, q = prefix; *q != '\0' && *p == *q; p++, q++ );
- if( DEBUG_PREFIXEQ )
- fprintf(stderr, "PrefixEq(%s, %s) returning %s\n",
- str, prefix, *q == '\0' ? "TRUE" : "FALSE");
- return (*q == '\0');
-} /* end PrefixEq */
-
/*****************************************************************************/
/* */
-/* NextChar(FILE *in_fp) */
+/* SetupLanguage(LANGUAGE *lang) */
/* */
-/* Move to next character in the input file. This may involve changing */
-/* global variables curr_line, line_num, and line_pos; the new character */
-/* may be found in curr_line[line_pos]. */
+/* Set up the runtime token structures. This involves setting up each */
+/* token (see above), and also loading the hash table with the keywords. */
/* */
-/* NextChar does not skip any characters at all. When end of file is */
-/* reached, curr_line[line_pos] contains '\0'. */
+/* If a token has non-empty start2 and end2 pairs, it is expanded into */
+/* a set of tokens, one for each pair, with the start delimiter set to */
+/* the concatenation of the start delimiters and starts2, and end */
+/* delimiter set to the corresponding end2. */
/* */
/*****************************************************************************/
-void NextChar(FILE *in_fp)
-{
- if( curr_line[line_pos] == '\n' )
+void SetupLanguage(LANGUAGE *lang)
+{ int i, j; TOKEN *t;
+ if( DEBUG_SETUP )
+ fprintf(stderr, "SetupLanguage(%s)\n", lang->names[0]);
+
+ /* set up each token in the language */
+ for( i = 0; lang->tokens[i] != (TOKEN *) NULL; i++ )
{
- /* need a new line */
- line_num++;
- line_pos = 1;
- if( fgets(&curr_line[1], MAX_LINE+2, in_fp) == (char *) NULL )
- curr_line[1] = '\0';
+ if( DEBUG_SETUP )
+ fprintf(stderr, " (1) setting up token %s (starts[0] = %s)\n",
+ lang->tokens[i]->name, lang->tokens[i]->starts[0]);
+ if( lang->tokens[i]->starts2[0] != NULL )
+ {
+ /* starts2, so set up one token for each entry in starts[2] */
+ for( j = 0; lang->tokens[i]->starts2[j] != NULL; j++ )
+ {
+ t = ExpandToken(lang->tokens[i], j);
+ if( DEBUG_SETUP )
+ fprintf(stderr, " (2) setting up token %s (starts[0] = %s)\n",
+ t->name, t->starts[0]);
+ SetupOneToken(t);
+ }
+ }
+ else
+ {
+ /* no starts2, so set up just one token */
+ SetupOneToken(lang->tokens[i]);
+ }
}
- else line_pos++; /* will yield '\0' as desired if EOF before end of line */
- if( DEBUG_NEXTCHAR )
- fprintf(stderr, "after NextChar, line_num %d, line_pos %d, curr_line %s",
- line_num, line_pos, &curr_line[1]);
-} /* end NextChar */
+ /* load the keyword hash table */
+ for( j = 0; lang->keywords[j] != (char *) NULL; j++ )
+ HashInsert(lang->keywords[j]);
+
+ if( DEBUG_SETUP )
+ fprintf(stderr, "SetupLanguage(%s) returning.\n", lang->names[0]);
+} /* end SetupLanguage */
+
/*****************************************************************************/
/* */
@@ -1010,15 +3182,41 @@ BOOLEAN Printable(char ch)
return (*p == ch);
} /* end Printable */
+
+/*****************************************************************************/
+/* */
+/* TOKEN *TokenStartingHere(int *len) */
+/* */
+/* Returns the token starting here if there is one, else NULL. */
+/* If found, the length of its starting delimiter is returned in *len. */
+/* */
+/*****************************************************************************/
+
+TOKEN *TokenStartingHere(int *len)
+{ TOKEN *res;
+
+ if( line_pos == 1 )
+ {
+ res = TrieRetrieve(StartLineTrie, &curr_line[line_pos], len);
+ if( res == (TOKEN *) NULL )
+ res = TrieRetrieve(Trie, &curr_line[line_pos], len);
+ }
+ else
+ {
+ res = TrieRetrieve(Trie, &curr_line[line_pos], len);
+ }
+ return res;
+}
+
/*****************************************************************************/
/* */
-/* Process(LANGUAGE*lang, FILE*in_fp, *out_fp, *err_fp, TOKEN *outer_token) */
+/* Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter) */
/* */
/* Process a sequence of input tokens. If we are currently recursing */
/* inside some other token, outer_token is non-null and is that token, */
-/* and we stop when we reach the ending delimiter of the inner escape */
-/* of that token. Otherwise, we stop at end of file. */
+/* and we stop when we reach outer_end_delimiter outside any token. */
+/* Otherwise we stop at end of file. */
/* */
/*****************************************************************************/
#define START 1
@@ -1027,242 +3225,230 @@ BOOLEAN Printable(char ch)
#define IN_TOKEN_AFTER_INNER_ESCAPE 4
#define STOP 5
-void Process(LANGUAGE *lang, FILE *in_fp, FILE *out_fp, FILE *err_fp,
-TOKEN *outer_token)
+void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter)
{ TOKEN *current_token; int len, i, state;
+ BOOLEAN end_delimiter_seen;
if( DEBUG_PROCESS )
fprintf(stderr, "[ Process(%s, -, -, -, -)\n", lang->names[0]);
state = START;
+ end_delimiter_seen = FALSE;
while( curr_line[line_pos] != '\0' && state != STOP )
{
- if( DEBUG_PROCESS )
- fprintf(stderr, " state %d, ch %c\n", state, curr_line[line_pos]);
- switch( state )
- {
- case START: /* between tokens */
-
- /* check whether outer_token's delimiter is current */
- if( outer_token != (TOKEN *) NULL &&
- outer_token->end_inner_escape[0] != '\0' &&
- PrefixEq(&curr_line[line_pos], outer_token->end_inner_escape) )
- {
- len = strlen(outer_token->end_inner_escape);
- for( i = 0; i < len; i++ )
- NextChar(in_fp);
- state = STOP;
- }
- else switch( curr_line[line_pos] )
- {
+ if( DEBUG_PROCESS )
+ fprintf(stderr, " state %d, ch %c\n", state, curr_line[line_pos]);
+ switch( state )
+ {
- case ' ':
-
- EmitRaw(' ', out_fp, err_fp);
- NextChar(in_fp);
- break;
+ case START: /* between tokens; try each of the following */
+ /* check whether outer_token is ending here, in which case stop */
+ if( outer_token != (TOKEN *) NULL &&
+ curr_line[line_pos] == outer_end_delimiter[0] &&
+ InputMatches(outer_end_delimiter) )
+ {
+ len = strlen(outer_end_delimiter);
+ for( i = 0; i < len; i++ )
+ NextChar();
+ state = STOP;
+ }
- case '\t':
-
- if( outer_token != (TOKEN *) NULL )
- {
- fprintf(err_fp, "%s: replacing tab character in escape by space\n",
- ErrorHeader());
- EmitRaw(' ', out_fp, err_fp);
- NextChar(in_fp);
- }
- else
- {
- EmitRaw('\t', out_fp, err_fp);
- NextChar(in_fp);
+ /* check whether a token is starting here, in which case start it */
+ else if( (current_token = TokenStartingHere(&len)) != (TOKEN *) NULL )
+ {
+ if( DEBUG_PROCESS )
+ { fprintf(stderr, "current_token (len = %d): %s\n",
+ len, EchoToken(current_token));
}
- break;
+ StartEmit(lang, current_token, &curr_line[line_pos], len);
+ /* skip the starting delimiter */
+ for( i = 0; i < len; i++ )
+ NextChar();
- case '\n':
-
- if( outer_token != (TOKEN *) NULL )
+ /* we are now either in a token, or else we have to start an inner */
+ if( current_token->print_style == PRINT_NODELIMS_INNER )
{
- fprintf(err_fp, "%s: inserting %s to fix unterminated escape\n",
- ErrorHeader(), outer_token->end_inner_escape);
- state = STOP;
+ Process(lang, current_token, current_token->end_delimiter);
+ EndEmit(current_token, "");
}
- else
- {
- EmitRaw('\n', out_fp, err_fp);
- NextChar(in_fp);
- }
- break;
+ else state = IN_TOKEN;
+ }
+ /* check whether we have a space */
+ else if( curr_line[line_pos] == ' ' )
+ {
+ EmitRaw(' ');
+ NextChar();
+ }
- case '\f':
-
- if( outer_token != (TOKEN *) NULL )
- {
- fprintf(err_fp, "%s: replacing formfeed character in escape by space\n",
- ErrorHeader());
- EmitRaw(' ', out_fp, err_fp);
- NextChar(in_fp);
- }
- else
- {
+ /* check whether we have a tab character */
+ else if( curr_line[line_pos] == '\t' )
+ {
+ EmitRaw('\t');
+ NextChar();
+ }
+
+ /* check whether we have a newline character */
+ else if( curr_line[line_pos] == '\n' )
+ {
+ EmitRaw('\n');
+ NextChar();
+ }
+
+ /* check whether we have a formfeed character */
+ else if( curr_line[line_pos] == '\f' )
+ {
fprintf(out_fp, "\n@NP");
- EmitRaw('\n', out_fp, err_fp);
- NextChar(in_fp);
- }
- break;
+ EmitRaw('\n');
+ NextChar();
+ }
+ /* check whether we are supposed to echo things that don't match */
+ else if( lang->no_match == NO_MATCH_PRINT )
+ {
+ EmitProtected(curr_line[line_pos]);
+ NextChar();
+ }
- default:
+ /* finally, we have an error and must skip the character */
+ else if( lang->no_match == NO_MATCH_ERROR )
+ {
+ if( Printable(curr_line[line_pos]) )
+ fprintf(err_fp, "%s: skipping unexpected %c character\n",
+ ErrorHeader(), curr_line[line_pos]);
+ else
+ fprintf(err_fp, "%s: %s (octal %o)\n",
+ "skipping unexpected unprintable character",
+ ErrorHeader(), (int) curr_line[line_pos]);
+ NextChar();
+ }
+ else
+ {
+ fprintf(err_fp, "%s internal error: lang->no_match\n", ErrorHeader());
+ }
+ break;
+
+
+ case IN_TOKEN: /* within a token; current_token says which kind */
- /* look up the trie to see which token we are starting */
- current_token = TrieRetrieve(Trie, &curr_line[line_pos], &len);
- if( current_token == (TOKEN *) NULL )
+ /* check for ending delimiter if there is one */
+ if( current_token->end_delimiter[0] != '\0' &&
+ (!current_token->end_start_line_only || line_pos == 1) &&
+ InputMatches(current_token->end_delimiter) )
+ {
+ if( current_token->want_two_ends && !end_delimiter_seen )
{
- /* error; no token starts here, so print error message and skip */
- if( Printable(curr_line[line_pos]) )
- fprintf(err_fp, "%s: skipping unexpected %c character\n",
- ErrorHeader(), curr_line[line_pos]);
- else
- fprintf(err_fp,
- "%s: skipping unexpected unprintable character (octal %o)\n",
- ErrorHeader(), (int) curr_line[line_pos]);
- NextChar(in_fp);
+ /* if we have to see the end delimiter twice before stopping, */
+ /* and we haven't seen it yet, then emit the char and carry on */
+ Emit(current_token, curr_line[line_pos]);
+ NextChar();
+ end_delimiter_seen = TRUE;
}
else
{
- /* have a token, so start emitting it */
if( DEBUG_PROCESS )
- {
- fprintf(stderr, "current_token (len = %d): %s\n",
- len, EchoToken(current_token));
- }
- StartEmit(err_fp);
+ fprintf(stderr, " InputMatches(%s) so finishing token\n",
+ current_token->end_delimiter);
+ len = strlen(current_token->end_delimiter);
for( i = 0; i < len; i++ )
- {
- /* ***
- if( DEBUG_PROCESS )
- fprintf(stderr, " emitting delim (i %d, len %d)\n", i, len);
- *** */
- if( current_token->command != (char *) NULL )
- Emit(curr_line[line_pos], current_token, out_fp, err_fp);
- NextChar(in_fp);
- }
- state = IN_TOKEN;
+ NextChar();
+ EndEmit(current_token, current_token->end_delimiter);
+ state = START;
}
- }
- break;
-
-
- case IN_TOKEN: /* within a token; current_token says which kind */
-
- /* check for ending delimiter if there is one */
- if( current_token->end_delimiter[0] != '\0' &&
- PrefixEq(&curr_line[line_pos], current_token->end_delimiter) )
- {
- if( DEBUG_PROCESS )
- fprintf(stderr, " PrefixEq(-, %s) so finishing token\n",
- current_token->end_delimiter);
- len = strlen(current_token->end_delimiter);
- for( i = 0; i < len; i++ )
- { if( current_token->command != (char *) NULL )
- Emit(curr_line[line_pos], current_token, out_fp, err_fp);
- NextChar(in_fp);
}
- EndEmit(lang, current_token, out_fp, err_fp);
- state = START;
- }
- else switch( current_token->chtype[(int) curr_line[line_pos]] )
- {
+ else switch( current_token->chtype[(int) curr_line[line_pos]] )
+ {
- case LEGAL:
+ case LEGAL:
- Emit(curr_line[line_pos], current_token, out_fp, err_fp);
- NextChar(in_fp);
- break;
+ Emit(current_token, curr_line[line_pos]);
+ NextChar();
+ break;
- case ESCAPE:
+ case ESCAPE:
- NextChar(in_fp);
- state = IN_TOKEN_AFTER_ESCAPE;
- break;
+ NextChar();
+ state = IN_TOKEN_AFTER_ESCAPE;
+ break;
- case INNER_ESCAPE:
+ case INNER_ESCAPE:
- EndEmit(lang, current_token, out_fp, err_fp);
- NextChar(in_fp);
- Process(lang, in_fp, out_fp, err_fp, current_token);
- state = IN_TOKEN_AFTER_INNER_ESCAPE;
- break;
+ EndEmit(current_token, "");
+ NextChar();
+ Process(lang, current_token, current_token->end_inner_escape);
+ state = IN_TOKEN_AFTER_INNER_ESCAPE;
+ break;
- default:
+ default:
- if( current_token->end_delimiter[0] != '\0' )
- {
- /* error: token ends at delimiter, not at unexpected character */
- if( Printable(curr_line[line_pos]) )
- fprintf(err_fp, "%s: skipping unexpected %c character in %s\n",
- ErrorHeader(), curr_line[line_pos], current_token->name);
+ if( current_token->end_delimiter[0] != '\0' )
+ {
+ /* error: token ends at delimiter, not at unexpected character */
+ if( Printable(curr_line[line_pos]) )
+ fprintf(err_fp, "%s: skipping unexpected %c character in %s\n",
+ ErrorHeader(), curr_line[line_pos], current_token->name);
+ else
+ fprintf(err_fp, "%s: %s (octal %o) in %s\n",
+ ErrorHeader(), "skipping unexpected unprintable character",
+ (int) curr_line[line_pos], current_token->name);
+ NextChar();
+ }
else
- fprintf(err_fp,
- "%s: skipping unexpected unprintable character (octal %o) in %s\n",
- ErrorHeader(), (int) curr_line[line_pos], current_token->name);
- NextChar(in_fp);
- }
- else
- {
- /* normal termination after last legal character */
- EndEmit(lang, current_token, out_fp, err_fp);
- state = START;
- }
- break;
+ {
+ /* normal termination after last legal character */
+ EndEmit(current_token, "");
+ state = START;
+ }
+ break;
- }
- break;
+ }
+ break;
- case IN_TOKEN_AFTER_ESCAPE:
+ case IN_TOKEN_AFTER_ESCAPE:
- if( current_token->escape_chtype[(int) curr_line[line_pos]] == LEGAL )
- {
- Emit(current_token->escape[0], current_token, out_fp, err_fp);
- Emit(curr_line[line_pos], current_token, out_fp, err_fp);
- }
- else
- {
- if( Printable(curr_line[line_pos]) )
- fprintf(err_fp, "%s: skipping %c%c in %s, since %c not legal here\n",
- ErrorHeader(), current_token->escape[0], curr_line[line_pos],
- current_token->name, curr_line[line_pos]);
+ if( current_token->escape_chtype[(int) curr_line[line_pos]] == LEGAL )
+ {
+ Emit(current_token, current_token->escape[0]);
+ Emit(current_token, curr_line[line_pos]);
+ }
else
- fprintf(err_fp,
- "%s: skipping %c and unprintable unexpected character (octal %o)\n",
- ErrorHeader(), current_token->escape[0], (int) curr_line[line_pos]);
- }
- NextChar(in_fp);
- state = IN_TOKEN;
- break;
+ {
+ if( Printable(curr_line[line_pos]) )
+ fprintf(err_fp,"%s: skipping %c%c in %s, since %c not legal here\n",
+ ErrorHeader(), current_token->escape[0], curr_line[line_pos],
+ current_token->name, curr_line[line_pos]);
+ else
+ fprintf(err_fp,
+ "%s: skipping %c and %s (octal %o)\n", ErrorHeader(),
+ current_token->escape[0], "unprintable unexpected character",
+ (int) curr_line[line_pos]);
+ }
+ NextChar();
+ state = IN_TOKEN;
+ break;
- case IN_TOKEN_AFTER_INNER_ESCAPE:
+ case IN_TOKEN_AFTER_INNER_ESCAPE:
- /* ending delimiter of inner escape has been read over */
- StartEmit(err_fp);
- state = IN_TOKEN;
- break;
+ /* ending delimiter of inner escape has been read over */
+ StartEmit(lang, current_token, "", 0);
+ state = IN_TOKEN;
+ break;
- default:
+ default:
- fprintf(err_fp, "%s internal error (state = %d)\n",
- ErrorHeader(), state);
- abort();
- break;
- }
+ fprintf(err_fp, "%s internal error (state = %d)\n",
+ ErrorHeader(), state);
+ abort();
+ break;
+ }
}
/* at end, need to tidy up any residual messiness */
@@ -1285,9 +3471,9 @@ TOKEN *outer_token)
fprintf(err_fp, "%s: program text ended within %s\n",
ErrorHeader(), current_token->name);
else
- fprintf(err_fp, "%s: %s escape ended within %s\n",
+ fprintf(err_fp, "%s: %s token ended within %s\n",
ErrorHeader(), outer_token->name, current_token->name);
- EndEmit(lang, current_token, out_fp, err_fp);
+ EndEmit(current_token, "");
}
break;
@@ -1297,7 +3483,7 @@ TOKEN *outer_token)
/* we stopped after the escape character */
fprintf(err_fp, "%s: skipping %c at end of program text\n",
ErrorHeader(), current_token->escape[0]);
- EndEmit(lang, current_token, out_fp, err_fp);
+ EndEmit(current_token, "");
break;
@@ -1310,7 +3496,7 @@ TOKEN *outer_token)
fprintf(err_fp, "%s: program text ended within %s after escape\n",
ErrorHeader(), current_token->name);
else
- fprintf(err_fp, "%s: %s escape ended within %s after escape\n",
+ fprintf(err_fp, "%s: %s token ended within %s after escape\n",
ErrorHeader(), outer_token->name, current_token->name);
}
break;
@@ -1325,44 +3511,50 @@ TOKEN *outer_token)
}
} /* end Process */
-
+
/*****************************************************************************/
/* */
-/* PrintUsage(fp) */
+/* PrintUsage() */
/* */
-/* Print usage message on file fp. */
+/* Print usage message on file err_fp. */
/* */
/*****************************************************************************/
-void PrintUsage(FILE *fp)
+void PrintUsage()
{ int i;
- fprintf(fp, "\n");
- fprintf(fp, "usage: prg2lout <options> <files>\n\n");
- fprintf(fp, " where <options> can be\n");
- fprintf(fp, "\n");
- fprintf(fp, " -r raw mode (used within Lout only)\n");
- fprintf(fp, " -i<file> take input from <file>\n");
- fprintf(fp, " -o<file> send output to <file>\n");
- fprintf(fp, " -e<file> send error messages to <file>\n");
- fprintf(fp, " -l<language> input is in this programming language\n");
- fprintf(fp, " -p<style> print style: fixed, varying, symbol\n");
- fprintf(fp, " -f<family> font family (e.g. Times)\n");
- fprintf(fp, " -s<size> font size (e.g. 10p or 12p)\n");
- fprintf(fp, " -v<space> line spacing (e.g. 1.1fx)\n");
- fprintf(fp, " -t<num> tab interval (e.g. 8 is default)\n");
- fprintf(fp, " -T<dist> output tab interval (e.g. 0.5i)\n");
- fprintf(fp, " -n no file names as page headers\n");
- fprintf(fp, " -V print version information and exit\n");
- fprintf(fp, " -u print this usage message and exit\n");
- fprintf(fp, "\n");
- fprintf(fp, " and <language> (which is compulsory) can be any one of:\n");
+ fprintf(err_fp, "\n");
+ fprintf(err_fp, "usage: prg2lout <options> <files>\n\n");
+ fprintf(err_fp, " where <options> can be\n");
+ fprintf(err_fp, "\n");
+ fprintf(err_fp, " -r raw mode (used within Lout only)\n");
+ fprintf(err_fp, " -i<file> take input from <file>\n");
+ fprintf(err_fp, " -o<file> send output to <file>\n");
+ fprintf(err_fp, " -e<file> send error messages to <file>\n");
+ fprintf(err_fp, " -l<language> input is in this programming language\n");
+ fprintf(err_fp, " -p<style> print style: fixed, varying, symbol\n");
+ fprintf(err_fp, " -f<family> font family (e.g. Times)\n");
+ fprintf(err_fp, " -s<size> font size (e.g. 10p or 12p)\n");
+ fprintf(err_fp, " -v<space> line spacing (e.g. 1.1fx)\n");
+ fprintf(err_fp, " -t<num> tab interval (e.g. 8 is default)\n");
+ fprintf(err_fp, " -T<dist> output tab interval (e.g. 0.5i)\n");
+ fprintf(err_fp, " -S<file> use this as the setup file\n");
+ fprintf(err_fp, " -n no file names as page headers\n");
+ fprintf(err_fp, " -V print version information and exit\n");
+ fprintf(err_fp, " -u print this usage message and exit\n");
+ fprintf(err_fp, "\n");
+ fprintf(err_fp, " <language> (which is compulsory) can be any one of:\n");
for( i = 0; languages[i] != (LANGUAGE *) NULL; i++ )
- fprintf(fp, " %s\n", languages[i]->names[0]);
- fprintf(fp, "\n");
+ fprintf(err_fp, " %s\n", languages[i]->names[0]);
+ fprintf(err_fp, "\n");
+ fprintf(err_fp, "The values of all formatting options not given are\n");
+ fprintf(err_fp, "taken from the setup file: either the file given after\n");
+ fprintf(err_fp, "-S, or the system default setup file for this language\n");
+ fprintf(err_fp, "if there is no -S option.\n");
+ fprintf(err_fp, "\n");
} /* end PrintUsage */
-
+
/*****************************************************************************/
/* */
/* main(argc, argv) */
@@ -1373,10 +3565,9 @@ void PrintUsage(FILE *fp)
/*****************************************************************************/
int main(int argc, char *argv[])
-{ FILE *in_fp, *out_fp, *err_fp;
- BOOLEAN stdin_seen; int i, j, arg_pos;
- char *infilename, *outfilename, *errfilename, *str;
- LANGUAGE *language = (LANGUAGE *) NULL;
+{ BOOLEAN stdin_seen; int i, j, arg_pos;
+ char *infilename, *outfilename, *errfilename;
+ LANGUAGE *lang = NO_LANGUAGE;
char *file_names[1024]; int file_count = 0;
/* echo command line */
@@ -1393,15 +3584,14 @@ int main(int argc, char *argv[])
line_num = 0;
stdin_seen = raw_seen = FALSE;
tab_by_spacing = TRUE;
- style_option = NO_STYLE;
tab_in = 8;
tab_out = 3;
tab_unit = 'f';
headers_option = TRUE;
font_option = size_option = line_option = tabin_option =
- tabout_option = language_option = (char *) NULL;
+ tabout_option = setup_option = language_option = (char *) NULL;
if( argc == 1 )
- { PrintUsage(err_fp);
+ { PrintUsage();
exit(1);
}
for( arg_pos = 1; arg_pos < argc; arg_pos++ )
@@ -1487,27 +3677,12 @@ int main(int argc, char *argv[])
{ fprintf(err_fp, "%s: -p illegal with -r option\n", ErrorHeader());
exit(1);
}
- GetArg(str, "usage: -p<printstyle>", FALSE);
- /* *** -l can set style, so don't do this now
- if( style_option != NO_STYLE )
- { fprintf(err_fp, "%s: -p option appears twice!\n", ErrorHeader());
- exit(1);
- }
- else
- *** */
- if( strcmp(str, "fixed") == 0 )
- { style_option = FIXED_STYLE;
- }
- else if( strcmp(str, "varying") == 0 )
- { style_option = VARYING_STYLE;
- tab_by_spacing = FALSE;
- }
- else if( strcmp(str, "symbol") == 0 )
- { style_option = SYMBOL_STYLE;
- tab_by_spacing = FALSE;
- }
- else
- { fprintf(err_fp, "%s: unknown -p option %s\n", ErrorHeader(), str);
+ GetArg(style_option, "usage: -p<printstyle>", FALSE);
+ if( strcmp(style_option, "fixed") != 0 &&
+ strcmp(style_option, "varying") != 0 &&
+ strcmp(style_option, "symbol") != 0 )
+ { fprintf(err_fp, "%s: unknown -p option %s\n", ErrorHeader(),
+ style_option);
exit(1);
}
break;
@@ -1588,6 +3763,17 @@ int main(int argc, char *argv[])
break;
+ case 'S':
+
+ /* read alternative setup file */
+ if( raw_seen )
+ { fprintf(err_fp, "%s: -s illegal with -r option\n", ErrorHeader());
+ exit(1);
+ }
+ GetArg(setup_option, "usage: -S<filename>", FALSE);
+ break;
+
+
case 'n':
if( raw_seen )
@@ -1615,7 +3801,7 @@ int main(int argc, char *argv[])
{ fprintf(err_fp, "%s: -u illegal with -r option\n", ErrorHeader());
exit(1);
}
- PrintUsage(err_fp);
+ PrintUsage();
exit(0);
break;
@@ -1627,48 +3813,22 @@ int main(int argc, char *argv[])
exit(1);
}
GetArg(language_option, "usage: -l<language>", FALSE);
- for( i = 0; languages[i] != (LANGUAGE *) NULL; i++ )
- {
- for( j = 0; languages[i]->names[j] != (char *) NULL; j++ )
- {
- if( strcmp(languages[i]->names[j], language_option) == 0 )
- break;
- }
- if( languages[i]->names[j] != (char *) NULL )
- break;
- }
- if( languages[i] != (LANGUAGE *) NULL )
+ i = 0; j = 0;
+ while( lang == NO_LANGUAGE && languages[i] != NO_LANGUAGE )
{
- language = languages[i];
+ if( languages[i]->names[j] == (char *) NULL )
+ i++, j = 0;
+ else if( strcmp(languages[i]->names[j], language_option) == 0 )
+ lang = languages[i];
+ else
+ j++;
}
- else
+ if( lang == NO_LANGUAGE )
{
fprintf(err_fp, "%s: unknown language %s\n", ErrorHeader(),
language_option);
exit(1);
}
-
- /* set style unless already done by a -p flag */
- if( style_option == NO_STYLE )
- {
- if( strcmp(language->default_style, "fixed") == 0 )
- { style_option = FIXED_STYLE;
- }
- else if( strcmp(language->default_style, "varying") == 0 )
- { style_option = VARYING_STYLE;
- tab_by_spacing = FALSE;
- }
- else if( strcmp(language->default_style, "symbol") == 0 )
- { style_option = SYMBOL_STYLE;
- tab_by_spacing = FALSE;
- }
- else
- { fprintf(err_fp,
- "%s internal error: bad default print style for language %s\n",
- ErrorHeader(), language_option);
- abort();
- }
- }
break;
@@ -1695,7 +3855,7 @@ int main(int argc, char *argv[])
} /* for */
/* make sure we have a language */
- if( language == (LANGUAGE *) NULL )
+ if( lang == NO_LANGUAGE )
{
fprintf(err_fp, "%s: missing -l option\n", ErrorHeader());
exit(0);
@@ -1713,87 +3873,28 @@ int main(int argc, char *argv[])
}
/* process the file */
- SetupTokens(language, err_fp);
+ SetupLanguage(lang);
line_pos = 1;
curr_line[line_pos] = '\n'; /* forces line read */
line_num = 0;
- NextChar(in_fp);
- Process(language, in_fp, out_fp, err_fp, (TOKEN *) NULL);
+ NextChar();
+ Process(lang, (TOKEN *) NULL, "");
}
else if( file_count > 0 )
- {
- int ch;
- char *style_str, *font_str, *size_str, *line_str, *face_str;
- char *tabin_str, *tabout_str;
-
- /* sort out the options' values */
- switch( style_option )
- {
-
- case FIXED_STYLE:
-
- style_str = "fixed";
- face_str = "Base";
- font_str = font_option != NULL ? font_option : "Courier";
- size_str = size_option != NULL ? size_option : "9p";
- line_str = line_option != NULL ? line_option : "1.1fx";
- tabin_str = tabin_option != NULL ? tabin_option : "8";
- tabout_str = tabout_option != NULL ? tabout_option : "8s";
- break;
+ { int ch;
-
- case NO_STYLE:
- case VARYING_STYLE:
-
- style_str = "varying";
- face_str = "Slope";
- font_str = font_option != NULL ? font_option : "Times";
- size_str = size_option != NULL ? size_option : "10p";
- line_str = line_option != NULL ? line_option : "1.1fx";
- tabin_str = tabin_option != NULL ? tabin_option : "8";
- tabout_str = tabout_option != NULL ? tabout_option : "3f";
- break;
-
-
- case SYMBOL_STYLE:
-
- style_str = "symbol";
- face_str = "Slope";
- font_str = font_option != NULL ? font_option : "Times";
- size_str = size_option != NULL ? size_option : "10p";
- line_str = line_option != NULL ? line_option : "1.1fx";
- tabin_str = tabin_option != NULL ? tabin_option : "8";
- tabout_str = tabout_option != NULL ? tabout_option : "3f";
- break;
-
-
- default:
-
- fprintf(err_fp, "%s internal error in -p option\n", ErrorHeader());
- abort();
- break;
- }
-
/* make sure we have an output file */
if( out_fp == (FILE *) NULL )
out_fp = stdout;
/* print the initial @Use clauses etc.*/
- fprintf(out_fp, "%s%s\n", "@Sy", "sInclude { progf }");
fprintf(out_fp, "%s%s\n", "@Sy", "sInclude { doc }");
- fprintf(out_fp, "@Use { @ProgSetup\n");
- fprintf(out_fp, " language { %s }\n", language_option);
- fprintf(out_fp, " style { %s }\n", style_str);
- fprintf(out_fp, " %sfont { %s }\n", style_str, font_str);
- fprintf(out_fp, " %ssize { %s }\n", style_str, size_str);
- fprintf(out_fp, " %sline { %s }\n", style_str, line_str);
- fprintf(out_fp, " %stabin { %s }\n", style_str, tabin_str);
- fprintf(out_fp, " %stabout { %s }\n", style_str, tabout_str);
- fprintf(out_fp, "}\n");
+ if( setup_option != NULL )
+ fprintf(out_fp, "%s%s { %s }\n", "@In", "clude", setup_option);
+ else
+ fprintf(out_fp, "%s%s { %s }\n", "@Sy", "sInclude", lang->setup_file);
fprintf(out_fp, "@Document\n");
- fprintf(out_fp, " @InitialFont { \"%s\" \"%s\" \"%s\" }\n",
- font_str, face_str, size_str);
- fprintf(out_fp, " @InitialBreak { lines \"%s\" nohyphen }\n", line_str);
+ fprintf(out_fp, " @InitialBreak { lines 1.2fx nohyphen }\n");
fprintf(out_fp, "//\n");
fprintf(out_fp, "%s%s\n", "@Text @Be", "gin");
@@ -1805,7 +3906,7 @@ int main(int argc, char *argv[])
if( in_fp == NULL )
{ fprintf(err_fp, "%s: skipping input file %s (cannot open)\n",
ErrorHeader(), file_names[i]);
- continue;
+ continue;
}
strcpy(file_name, file_names[i]);
@@ -1817,11 +3918,24 @@ int main(int argc, char *argv[])
file_names[i]);
/* print file name and contents (don't format, let Lout call back) */
- /* this string has been disguised so as not to fool progwlout! */
- fprintf(out_fp, "%s%s%s\n", "@P", "rog @Be", "gin");
+ /* this string has been disguised to avoid recognition by prg2lout */
+ fprintf(out_fp, "%s\n", lang->lang_sym);
+ if( style_option != NULL )
+ fprintf(out_fp, " style { %s }\n", style_option );
+ if( font_option != NULL )
+ fprintf(out_fp, " font { %s }\n", font_option );
+ if( size_option != NULL )
+ fprintf(out_fp, " size { %s }\n", size_option );
+ if( line_option != NULL )
+ fprintf(out_fp, " line { %s }\n", line_option );
+ if( tabin_option != NULL )
+ fprintf(out_fp, " tabin { %s }\n", tabin_option );
+ if( tabout_option != NULL )
+ fprintf(out_fp, " tabout { %s }\n", tabout_option );
+ fprintf(out_fp, "%s%s\n", "@Be", "gin");
while( (ch = getc(in_fp)) != EOF )
- putc(ch, out_fp);
- fprintf(out_fp, "%s%s%s\n", "@E", "nd @P", "rog");
+ putc(ch, out_fp);
+ fprintf(out_fp, "%s%s %s\n", "@E", "nd", lang->lang_sym);
}
/* finish off whole input */