diff options
author | Jeffrey H. Kingston <jeff@it.usyd.edu.au> | 2010-09-14 20:36:01 +0000 |
---|---|---|
committer | Jeffrey H. Kingston <jeff@it.usyd.edu.au> | 2010-09-14 20:36:01 +0000 |
commit | 2f4268e5e02216be53cd85816362191373512463 (patch) | |
tree | 57165bf2889337044bc3633854e5aa38f7d89e6b /prg2lout.c | |
parent | 73d840b9f14b65166b92e6b43f930fd0ef7b8267 (diff) | |
download | lout-2f4268e5e02216be53cd85816362191373512463.tar.gz |
Lout 3.20.
git-svn-id: http://svn.savannah.nongnu.org/svn/lout/trunk@9 9365b830-b601-4143-9ba8-b4a8e2c3339c
Diffstat (limited to 'prg2lout.c')
-rw-r--r-- | prg2lout.c | 3598 |
1 files changed, 2856 insertions, 742 deletions
@@ -1,6 +1,7 @@ + /*****************************************************************************/ /* */ -/* PRG2LOUT: A PROGRAM TO CONVERT PROGRAM SOURCES INTO LOUT (VERSION 1.0) */ +/* PRG2LOUT: A PROGRAM TO CONVERT PROGRAM SOURCES INTO LOUT (VERSION 2.0) */ /* COPYRIGHT (C) 2000 Jeffrey H. Kingston */ /* */ /* Jeffrey H. Kingston (jeff@cs.su.oz.au) */ @@ -23,50 +24,123 @@ /* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* */ /*****************************************************************************/ -#include <stdio.h> -#include <stdlib.h> + + +/*****************************************************************************/ +/* */ +/* GENERAL INTRODUCTION TO PRG2LOUT */ +/* */ +/* The best way to see what the aim of prg2lout as currently conceived is, */ +/* is to look in file cprint at the setup file options. You will see that */ +/* the aim is to provide three basic styles: fixed (essentially mono font), */ +/* varying (essentially varying-width font with various faces for different */ +/* elements at the user's choice), and symbol (similar to varying). */ +/* */ +/* The elements currently aimed for are strings, identifiers, comments, */ +/* keywords, numbers, and operators, and the end user is able to choose, */ +/* for each of these kinds of elements, which font to set them in. */ +/* */ +/* This is achieved by a simple division of labour: prg2lout does the */ +/* classifying of the input into a sequence of these elements, and the Lout */ +/* end (cprint and cprintf, or their clones for other languages) does the */ +/* formatting. For example, the C text */ +/* */ +/* inc = inc + 1 */ +/* */ +/* would be classified by prg2lout into identifier, operator, identifier, */ +/* operator, number; and consequently prg2lout would emit */ +/* */ +/* @PI{"inc"} @PO{"="} @PI{"inc"} @PO{"+"} @PN{"1"} */ +/* */ +/* which is readable by Lout, thanks to having quotes around everything */ +/* potentially dangerous, and clearly tells Lout, by means of the commands */ +/* @PC, @PI, etc., how each part of the input has been classified. */ +/* */ +/* The actual classification is carried out by prg2lout as follows. Each */ +/* programming language is described to prg2lout as a collection of tokens; */ +/* you say what the token begins with, what's a legal character inside the */ +/* token, and how it ends. You also say which command (@PC, @PI etc.) to */ +/* emit when a token of that kind is found. Prg2lout does the rest. */ +/* */ +/* Prg2lout knows all about tricky problems such as multi-line tokens (it */ +/* breaks them up into single-line pieces) and backslash in Lout strings */ +/* (it replaces any \ within an output string by \\, " by \", etc.). It */ +/* also handles tab characters and formfeed characters properly, and it */ +/* produces intelligible error messages when unexpected things happen, */ +/* such as input terminating in the middle of a string. This attention to */ +/* detail is a strong reason for using prg2lout rather than something more */ +/* ad-hoc, such as @Verbatim or a quick script. */ +/* */ +/*****************************************************************************/ + /*****************************************************************************/ /* */ -/* How to add another language to prg2lout's list */ +/* HOW TO ADD ANOTHER LANGUAGE TO PRG2LOUT */ +/* */ +/* Step 1. Construct clones of (say) eiffel and eiffelf (these are in */ +/* $LOUTLIB/include) with every occurrence of eiffel or Eiffel in them */ +/* changed to your language as appropriate. Install your files in the */ +/* Lout system include directory alongside eiffel and eiffelf. */ /* */ -/* Step 1. Have a browse through the token declarations below, and work */ +/* It is good to clone the files exactly because that way all program */ +/* formatting works the same way, and one chapter of the User's Guide */ +/* covers the lot. However if your language has some unique element, not */ +/* readily classifiable as a string, identifier, comment, keyword, */ +/* number, or operator, it is possible to emit a different command of */ +/* your choice for the new element; but then your clones of eiffel and */ +/* eiffelf have to be extended to handle that command. */ +/* */ +/* Step 2. Have a browse through the token declarations below, and work */ /* out which of them you need for your language. If you need a token that */ /* isn't there already, you'll have to define it; there are many examples */ -/* there to help you. */ +/* and documentation there to help you. The tokens for Perl are rather */ +/* complicated and don't make a good model for most languages, so look */ +/* more at the C and Eiffel ones. */ /* */ -/* Step 2. Browse through the language declarations, and declare your */ +/* Step 3. Browse through the language declarations, and declare your */ /* language following those examples: first you give a set of one or more */ -/* alternative names for your language, then the default printing style for */ -/* it (either "fixed", "varying", or "symbol"), then you list the tokens */ -/* of the language, then you list the keywords. */ +/* alternative names for your language, then some other things, including */ +/* the list of tokens of the language, and its keywords. */ /* */ -/* Step 3. Add your language variable to the list in the initializer of */ +/* Step 4. Add your language variable to the list in the initializer of */ /* variable languages, as you can see the others have been done. */ /* */ -/* Step 4. If any lists of initializers now contain more than */ +/* Step 5. If any lists of initializers now contain more than */ /* MAX_ARRAY_LENGTH-1 elements, increase MAX_ARRAY_LENGTH until they don't. */ +/* The gcc compiler will warn you if you forget to do this. */ /* */ -/* Step 5. Recompile and reinstall prg2lout, test "prg2lout -u" then */ +/* Step 6. Recompile and reinstall prg2lout, test "prg2lout -u" then */ /* "prg2lout -l <mylanguage> <myfile> | lout -s > out.ps". */ /* */ -/* Step 6. Send your tested and verified changes to jeff@cs.usyd.edu.au */ -/* for incorporation in the next Lout release. If you do this, please */ -/* try hard to ensure that your changes conform to the formal definition */ -/* of your language. */ +/* Step 7. Send your tested and tidied files to me for incorporation */ +/* in the next Lout release. If you do this, please try hard to ensure */ +/* that your new code conforms to the formal definition of your language. */ +/* Feel free to email me for advice as you go along. */ +/* */ +/* Jeff Kingston */ +/* jeff@cs.usyd.edu.au */ /* */ /*****************************************************************************/ - + +#include <stdio.h> +#include <stdlib.h> +#define FALSE 0 +#define TRUE 1 +#define BOOLEAN unsigned + /*****************************************************************************/ /* */ /* Character sets */ /* */ -/* Here are prg2lout's definitions of various commonly needed sets of */ +/* These are prg2lout's definitions of various commonly needed sets of */ /* characters. May need enhancement for Latin1 etc. */ /* */ /*****************************************************************************/ +#define AllCharacters NULL /* code will recognize NULL and do this */ + char AllPrintable[] = " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\ ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ; @@ -75,18 +149,14 @@ char AllPrintablePlusNL[] = " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\ ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n" ; +char AllPrintablePlusTab[] = + " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\ +ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\t" ; + char AllPrintableTabNL[] = " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\ ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n\t" ; -char NotDoubleQuote[] = - " !#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\ -ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ; - -char NotQuote[] = - " !#$%&\"()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\ -ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ; - char Letters[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ; char Letter_Digit[] = @@ -100,287 +170,1797 @@ char Letter_Digit[] = #define SepDigits "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" +#define HexDigits "A", "a", "B", "b", "C", "c", "D", "d", "E", "e", "F", "f" + /*****************************************************************************/ /* */ /* TOKEN - put your token declarations in this section */ /* */ +/* The fields of token_rec have the following meanings: */ +/* */ +/* name */ +/* The name of this token, e.g. "string" or "identifier". This field */ +/* is used only by error messages generated by prg2lout; for example, */ +/* prg2lout might print the message "input ended within string". */ +/* */ +/* print_style */ +/* */ +/* print_style What gets printed */ +/* ------------------------------------------------------- */ +/* PRINT_WHOLE_QUOTED command{"token"} */ +/* PRINT_NODELIMS_QUOTED command{"token-minus-delims"} */ +/* PRINT_WHOLE_UNQUOTED command{token} */ +/* PRINT_NODELIMS_UNQUOTED command{token-minus-delims} */ +/* PRINT_NODELIMS_INNER command{inner} */ +/* PRINT_COMMAND_ONLY command */ +/* */ +/* If command (see next) is empty then the braces {} are not printed. */ +/* */ +/* PRINT_WHOLE_QUOTED. This command is the most frequently used one; */ +/* it prints the token, enclosed in braces and quotes, preceded by the */ +/* command. The quotes ensure that the result is legal Lout; any " or */ +/* \ in the token is printed with a preceding \ as required in Lout. */ +/* The usual arrangement for handling white space is that none of the */ +/* tokens contain it; when it is encountered prg2lout generates the */ +/* appropriate Lout without being told (a space for a space, a newline */ +/* for a newline, @NP for a formfeed, and something clever for tab */ +/* which does the required thing). However, you can define a token */ +/* that contains white space if you wish. If you do this, the */ +/* quotation marks will be temporarily closed off, the white space */ +/* handled as just described, then the quotes opened again. */ +/* */ +/* PRINT_NODELIMS_QUOTED. This is like PRINT_WHOLE_QUOTED except that */ +/* the opening and closing delimiters of the token are omitted from */ +/* the print. This is useful occasionally when these delimiters are */ +/* formatting markers, not intended to be printed. */ +/* */ +/* PRINT_WHOLE_UNQUOTED. This style prints the command and braces */ +/* as usual, but omits the quotes and prints the token absolutely */ +/* verbatim. In general this is not going to produce legal Lout, */ +/* but it is useful in two cases: when the token is a Lout escape, */ +/* so that it is the user's responsibility to ensure that its content */ +/* is legal Lout; and when the command is another filter command, so */ +/* that the token content will not go directly into Lout anyway, it */ +/* will go through the other filter first. */ +/* */ +/* PRINT_NODELIMS_UNQUOTED. This is like PRINT_WHOLE_UNQUOTED except */ +/* that the opening and closing delimiters of the token are omitted. */ +/* */ +/* PRINT_NODELIMS_INNER. Like PRINT_NODELIMS_UNQUOTED except that the */ +/* inner part (i.e. not delimiters) is formatted in the same language. */ +/* */ +/* PRINT_COMMAND_ONLY. This ignores the token and prints just the */ +/* command, presumably because the command says it all for that token. */ +/* When using PRINT_COMMAND_ONLY you will probably need to finish your */ +/* command with a space: since there are no following braces in this */ +/* print style, your command will run into the next one otherwise. */ +/* */ /*****************************************************************************/ -#define MAX_CHAR 256 -#define MAX_STRING_LENGTH 101 -#define MAX_ARRAY_LENGTH 101 + +/*****************************************************************************/ +/* */ +/* Meaning of TOKEN fields (ctd.) */ +/* */ +/* command */ +/* The Lout command to print. This command could be an legal Lout; */ +/* programming language setup files offer the following Lout symbols */ +/* that make the most common commands: */ +/* */ +/* @PI for formatting identifiers */ +/* @PK for formatting keywords */ +/* @PO for formatting operators */ +/* @PN for formatting numbers */ +/* @PS for formatting strings */ +/* @PC for formatting comments */ +/* @PA for printing an asterisk (lower on the line than usual) */ +/* @PM for printing a minus sign (longer than a hyphen) */ +/* @PD for printing a dot (.), only larger than usual */ +/* */ +/* The last three require PRINT_COMMAND_ONLY (they take no parameter). */ +/* If command is NULL or "", then no command will be printed and */ +/* furthermore the token will not be enclosed in the usual braces. */ +/* */ +/* alternate_command */ +/* Every language has a list of keywords. Just before printing each */ +/* token, it is compared against the keywords. If it is one of them, */ +/* then alternate_command is used instead of command. For example, */ +/* identifiers usually have command @PI and alternate_command @PK. */ +/* */ +/* following_command */ +/* Print this Lout command (or commands) after the token. */ +/* */ +/* start_line_only */ +/* A Boolean field. If TRUE, this token is to be recognized only */ +/* if it occurs at the very start of a line. */ +/* */ +/* starts[] */ +/* This field holds an array of strings. If prg2lout discovers any */ +/* one of these strings while it is not reading some other token, */ +/* then it deems that this token has begun. The recognized string */ +/* is the token's "starting delimiter". */ +/* */ +/* starts2[], ends2[] */ +/* These fields each hold an array of strings, and the two arrays */ +/* must have equal length. If they have length zero, these fields */ +/* do not apply. Otherwise, they modify the meaning of starts[] and */ +/* end_delimiter below. Their effect is best seen by looking at some */ +/* examples from Perl, their only user at the time of writing: */ +/* */ +/* q/hello/ qq/hello/ qq?hello? qq{hello} */ +/* */ +/* These strings may begin with q, qq, qx, and several other things; */ +/* this is then followed by a single character which determines the */ +/* string terminator; e.g. / means "terminate with /", { means */ +/* "terminate with }", etc. This is implemented by putting these */ +/* pairs of strings in corresponding places in starts[] and ends[]. */ +/* */ +/* starts: { "q", "qq", "qx" } */ +/* starts2: { "/", "?", "{" } */ +/* ends2: { "/", "?", "}" } */ +/* */ +/* PerlQTypeToken is a larger example of this. */ +/* */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* */ +/* Meaning of TOKEN fields (ctd.) */ +/* */ +/* legal */ +/* This string defines the set of legal characters inside this token. */ +/* For example, numbers might have "0123456789." for this field, since */ +/* these are the characters that are legal within numbers, usually. */ +/* */ +/* escape */ +/* This string defines a single character which is the escape */ +/* character for this token. That is, if we are reading this token */ +/* and come upon this character, the character following it is */ +/* treated differently. An empty string "" means no escape character. */ +/* */ +/* escape_legal */ +/* This string defines the set of characters which are legal after */ +/* the escape character just mentioned. If any one of these appears */ +/* immediately after the escape character, it is deemed to be part */ +/* of the token even if without the preceding escape it would not be. */ +/* */ +/* inner_escape */ +/* end_inner_escape */ +/* The inner_escape string should be either empty (in which case it */ +/* does not apply), or else it should contain a single character, the */ +/* "inner escape" character. An inner escape is a temporary suspension */ +/* of a token, reverting to the original language. It is used to set */ +/* program text within comments. For example, in Eiffel and Blue, */ +/* inner_escape is "`" and end_inner_escape is "'" and so we can write */ +/* */ +/* -- increment `balance' by `amount' */ +/* */ +/* to treat balance and amount as identifiers within a comment token. */ +/* The inner escape is not limited to one token, it may have any */ +/* number of tokens, and they may have inner escapes too; prg2lout */ +/* imposes no limit on the depth of nesting of inner escapes. */ +/* */ +/* end_delimiter */ +/* This string shows how the token ends; for example, a string would */ +/* have end_delimiter ". If empty, it means that the token ends */ +/* just before the first character encountered that is not legal (see */ +/* "legal" above). For example, identifiers and numbers would have */ +/* empty end_delimiter. If ends2[] is not empty then end_delimiter */ +/* is ignored, since ends2[] explains how the token ends. */ +/* */ +/* end_start_line_only */ +/* A BOOLEAN field. If true, the end delimiter is to be recognized */ +/* only if it occurs at the very start of a line. */ +/* */ +/* want_two_ends */ +/* A Boolean feature used only by Perl; TRUE means that end_delimiter */ +/* (or ends2[]) has to be encountered twice before the token ends, */ +/* rather than the usual once. Used by PerSTypeToken to recognise */ +/* */ +/* s/abc/ABC/ */ +/* */ +/* and similar things as single tokens. Note that this works without */ +/* modification on s{abc}{ABC} provided { is legal within strings: as */ +/* long as the token terminates at the second }, we are safe. */ +/* */ +/*****************************************************************************/ + + +#define MAX_CHAR 256 +#define MAX_ARRAY_LENGTH 401 +#define PRINT_WHOLE_QUOTED 1 +#define PRINT_NODELIMS_QUOTED 2 +#define PRINT_WHOLE_UNQUOTED 3 +#define PRINT_NODELIMS_UNQUOTED 4 +#define PRINT_NODELIMS_INNER 5 +#define PRINT_COMMAND_ONLY 6 typedef struct token_rec { - char *name; /* the name of this token, e.g. "string" */ - char *command; /* the Lout command for it, e.g. "@D" */ - char *alternate_command; /* the alternate command for it, e.g. "@K" */ - char *starts[MAX_ARRAY_LENGTH]; /* strings that start this token */ - char *legal; /* legal characters in this token */ - char *escape; /* the escape character e.g. "\\", or "" */ - char *escape_legal; /* legal characters after escape character */ - char *inner_escape; /* inner escape, e.g. "`", or else "" */ - char *end_inner_escape; /* end inner escape, e.g. "'" */ - char *end_delimiter; /* the end delimiter, else "" */ + char *name; + int print_style; + char *command; + char *alternate_command; + char *following_command; + BOOLEAN start_line_only; + char *starts[MAX_ARRAY_LENGTH]; + char *starts2[MAX_ARRAY_LENGTH]; + char *ends2[MAX_ARRAY_LENGTH]; + char *legal; + char *escape; + char *escape_legal; + char *inner_escape; + char *end_inner_escape; + char *end_delimiter; + BOOLEAN end_start_line_only; + BOOLEAN want_two_ends; /* The following options are initialized by the program, so don't you */ - char chtype[MAX_CHAR]; /* character types within token */ + char chtype[MAX_CHAR]; /* character types within token */ char escape_chtype[MAX_CHAR]; /* character types after escape */ } TOKEN; + +/*****************************************************************************/ +/* */ +/* Tokens defining strings and literal characters in non-Perl languages. */ +/* */ +/*****************************************************************************/ TOKEN CStringToken = { "string", /* used by error messages involving this token */ - "@S", /* Lout command for formatting strings */ - "", /* No alternate command */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PS", /* Lout command for formatting strings */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ { "\"" }, /* strings begin with a " character */ - NotDoubleQuote, /* inside, any printable except " is OK */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllPrintable, /* inside, any printable is OK */ "\\", /* within strings, \\ is the escape character */ AllPrintablePlusNL, /* after escape char, any printable char or nl OK */ "", /* strings do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ - "\"" /* strings end with a " character */ + "\"", /* strings end with a " character */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; TOKEN CCharacterToken = { "character", /* used by error messages involving this token */ - "@C", /* Lout command for formatting characters */ - "", /* No alternate command */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PC", /* Lout command for formatting characters */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ { "'" }, /* characters begin with a ' character */ - NotQuote, /* inside, any printable except ' is OK */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllPrintable, /* inside, any printable character is OK */ "\\", /* within characters, \\ is the escape character */ AllPrintable, /* after escape char, any printable char is OK */ "", /* characters do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ - "'" /* characters end with a ' character */ + "'", /* characters end with a ' character */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; + TOKEN EiffelStringToken = { "string", /* used by error messages involving this token */ - "@S", /* Lout command for formatting strings */ - "", /* No alternate command */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PS", /* Lout command for formatting strings */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ { "\"" }, /* strings begin with a " character */ - NotDoubleQuote, /* inside, any printable except " is OK */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllPrintable, /* inside, any printable except " is OK */ "%", /* within strings, % is the escape character */ AllPrintable, /* after escape char, any printable char is OK */ "", /* strings do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ - "\"" /* strings end with a " character */ + "\"", /* strings end with a " character */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; TOKEN EiffelCharacterToken = { "character", /* used by error messages involving this token */ - "@C", /* Lout command for formatting characters */ - "", /* No alternate command */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PC", /* Lout command for formatting characters */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ { "'" }, /* characters begin with a ' character */ - NotQuote, /* inside, any printable except ' is OK */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllPrintable, /* inside, any printable except ' is OK */ "%", /* within characters, % is the escape character */ AllPrintable, /* after escape char, any printable char is OK */ "", /* characters do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ - "'" /* characters end with a ' character */ + "'", /* characters end with a ' character */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; + + +/*****************************************************************************/ +/* */ +/* Identifiers, in the form common to most programming languages. */ +/* */ +/*****************************************************************************/ TOKEN IdentifierToken = { "identifier", /* used by error messages involving this token */ - "@D", /* Lout command for formatting identifiers */ - "@K", /* Alternate command (for keywords) */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PI", /* Lout command for formatting identifiers */ + "@PK", /* Alternate command (for keywords) */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ { SepLetters, "_" }, /* identifiers begin with any letter or _ */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ Letter_Digit, /* inside, letters, underscores, digits are OK */ "", /* no escape character within identifiers */ "", /* so nothing legal after escape char either */ "", /* identifiers do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ - "" /* identifiers do not end with a delimiter */ + "", /* identifiers do not end with a delimiter */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; + +/*****************************************************************************/ +/* */ +/* Numbers, in the form common to most programming languages. */ +/* */ +/*****************************************************************************/ + TOKEN NumberToken = { "number", /* used by error messages involving this token */ - "@N", /* Lout command for formatting numbers */ - "", /* No alternate command */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PN", /* Lout command for formatting numbers */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ { SepDigits }, /* numbers must begin with a digit */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ "0123456789.eE", /* inside, digits, decimal point, exponent */ "", /* no escape character within numbers */ "", /* so nothing legal after escape char either */ "", /* numbers do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ - "" /* numbers do not end with a delimiter */ + "", /* numbers do not end with a delimiter */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; + + +/*****************************************************************************/ +/* */ +/* Tokens defining comments in various languages. */ +/* */ +/*****************************************************************************/ TOKEN CCommentToken = { "comment", /* used by error messages involving this token */ - "@C", /* Lout command for formatting comments */ - "", /* No alternate command */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PC", /* Lout command for formatting comments */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ { "/*" }, /* comments begin with this character pair */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ AllPrintableTabNL, /* inside, any printable char, tab, or nl is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* C comments do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ - "*/" /* comments end with this character pair */ -}; - -TOKEN CCommentEscapeToken = { - "Lout escape", /* used by error messages involving this token */ - NULL, /* no Lout command since passed through unformatted */ - "", /* No alternate command */ - { "/*@" }, /* comments begin with this character pair */ - AllPrintableTabNL, /* inside, any printable char, tab, or nl is OK */ - "", /* no escape character within comments */ - "", /* so nothing legal after escape char either */ - "", /* no "inner escape" in escape comments */ - "", /* so no end of "inner escape" either */ - "*/" /* comments end with this character pair */ + "*/", /* comments end with this character pair */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; TOKEN CPPCommentToken = { "comment", /* used by error messages involving this token */ - "@C", /* Lout command for formatting comments */ - "", /* No alternate command */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PC", /* Lout command for formatting comments */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ { "//" }, /* comments begin with this character pair */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ AllPrintable, /* inside, any printable char is OK (not NL) */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* C comments do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ - "" /* no end delimiter (end of line will end it) */ + "", /* no end delimiter (end of line will end it) */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; + -TOKEN CPPCommentEscapeToken = { - "Lout escape", /* used by error messages involving this token */ - NULL, /* no Lout command since passed through unformatted */ - "", /* No alternate command */ - { "//@" }, /* comments begin with this character pair */ +TOKEN EiffelCommentToken = { + "comment", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PC", /* Lout command for formatting comments */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "--" }, /* comments begin with this character pair */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ AllPrintable, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ - "", /* no "inner escape" in escape comments */ - "", /* so no end of "inner escape" either */ - "" /* no end delimiter (end of line will end it) */ + "`", /* start of "inner escape" in Eiffel comment */ + "'", /* end of "inner escape" in Eiffel comment */ + "", /* no ending delimiter; end of line will end it */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; -TOKEN EiffelCommentToken = { +TOKEN BlueCommentToken = { "comment", /* used by error messages involving this token */ - "@C", /* Lout command for formatting comments */ - "", /* No alternate command */ - { "--" }, /* comments begin with this character pair */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PC", /* Lout command for formatting comments */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "==", "--" }, /* comments begin with this character pair */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ AllPrintable, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ - "`", /* start of "inner escape" in Eiffel comment */ - "'", /* end of "inner escape" in Eiffel comment */ - "" /* no ending delimiter; end of line will end it */ + "`", /* start of "inner escape" in Blue comment */ + "'", /* end of "inner escape" in Blue comment */ + "", /* no ending delimiter; end of line will end it */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; -TOKEN EiffelCommentEscapeToken = { + +/*****************************************************************************/ +/* */ +/* Tokens defining escape comments in various languages. */ +/* */ +/* See discussion of "inner escapes" above for more information. */ +/* */ +/*****************************************************************************/ + +TOKEN CCommentEscapeToken = { "Lout escape", /* used by error messages involving this token */ - NULL, /* no Lout command since passed through unformatted */ - "", /* No alternate command */ - { "--@" }, /* comments begin with this character pair */ + PRINT_NODELIMS_UNQUOTED, /* print this token unformatted */ + "", /* no Lout command since we are printing raw */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "/*@" }, /* escape comments begin with this delimiter */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllPrintableTabNL, /* inside, any printable char, tab, or nl is OK */ + "", /* no escape character within comments */ + "", /* so nothing legal after escape char either */ + "", /* no "inner escape" in escape comments */ + "", /* so no end of "inner escape" either */ + "*/", /* comments end with this character pair */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN CPPCommentEscapeToken = { + "Lout escape", /* used by error messages involving this token */ + PRINT_NODELIMS_UNQUOTED, /* print this token unformatted */ + "", /* no Lout command since we are printing raw */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "//@" }, /* escape comments begin with this delimiter */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ AllPrintable, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* no "inner escape" in escape comments */ "", /* so no end of "inner escape" either */ - "" /* no ending delimiter; end of line will end it */ + "", /* no end delimiter (end of line will end it) */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; + -TOKEN BlueCommentToken = { - "comment", /* used by error messages involving this token */ - "@C", /* Lout command for formatting comments */ - "", /* No alternate command */ - { "==", "--" }, /* comments begin with this character pair */ +TOKEN EiffelCommentEscapeToken = { + "Lout escape", /* used by error messages involving this token */ + PRINT_NODELIMS_UNQUOTED, /* print this token unformatted */ + "", /* no Lout command since we are printing raw */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "--@" }, /* escape comments begin with this delimiter */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ AllPrintable, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ - "`", /* start of "inner escape" in Blue comment */ - "'", /* end of "inner escape" in Blue comment */ - "" /* no ending delimiter; end of line will end it */ + "", /* no "inner escape" in escape comments */ + "", /* so no end of "inner escape" either */ + "", /* no ending delimiter; end of line will end it */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; TOKEN BlueCommentEscapeToken = { "Lout escape", /* used by error messages involving this token */ - NULL, /* no Lout command since passed through unformatted */ - "", /* No alternate command */ - { "==@", "--@" }, /* comments begin with this character pair */ + PRINT_NODELIMS_UNQUOTED, /* print this token unformatted */ + "", /* no Lout command since we are printing raw */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "==@", "--@" }, /* escape comments begin with these delimiters */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ AllPrintable, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* no "inner escape" in escape comments */ "", /* so no end of "inner escape" either */ - "" /* no ending delimiter; end of line will end it */ + "", /* no ending delimiter; end of line will end it */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ }; + +/*****************************************************************************/ +/* */ +/* Tokens which are fixed strings, hence simpler than the above. */ +/* */ +/*****************************************************************************/ #define FixedToken(str, command) /* define fixed-string token */ \ { \ str, /* name used for debugging only */ \ + PRINT_WHOLE_QUOTED, /* print this token as usual */ \ + command, /* Lout command for formatting this */ \ + "", /* no alternate command */ \ + "", /* no following command */ \ + FALSE, /* token not just start of line */ \ + { str }, /* token begins (and ends!) with this */ \ + { NULL }, /* no start2 needed */ \ + { NULL }, /* so no end2 either */ \ + "", /* nothing inside, since no inside */ \ + "", "", /* no escape character */ \ + "", "", /* no inner escape; no end inner esc */ \ + "", /* no ending delimiter */ \ + FALSE, /* end not have to be at line start */ \ + FALSE, /* don't end delimiter twice to stop */ \ +} + +TOKEN HashToken = FixedToken("#", "@PO"); +TOKEN ExclamationToken = FixedToken("!", "@PO"); +TOKEN PercentToken = FixedToken("%", "@PO"); +TOKEN HatToken = FixedToken("^", "@PO"); +TOKEN AmpersandToken = FixedToken("&", "@PO"); +TOKEN SlashToken = FixedToken("/", "@PO"); +TOKEN ArrowToken = FixedToken("->", "arrowright @A @PO"); +TOKEN BackSlashToken = FixedToken("\\", "@PO"); +TOKEN LeftParenToken = FixedToken("(", "@PO"); +TOKEN RightParenToken = FixedToken(")", "@PO"); +TOKEN PlusToken = FixedToken("+", "plus @A @PO"); +TOKEN EqualToken = FixedToken("=", "equal @A @PO"); +TOKEN LeftBraceToken = FixedToken("{", "@PO"); +TOKEN RightBraceToken = FixedToken("}", "@PO"); +TOKEN BarToken = FixedToken("|", "@PO"); +TOKEN CircumToken = FixedToken("~", "@PO"); +TOKEN LeftBracketToken = FixedToken("[", "@PO"); +TOKEN RightBracketToken = FixedToken("]", "@PO"); +TOKEN SemicolonToken = FixedToken(";", "@PO"); +TOKEN ColonToken = FixedToken(":", "@PO"); +TOKEN LessToken = FixedToken("<", "less @A @PO"); +TOKEN GreaterToken = FixedToken(">", "greater @A @PO"); +TOKEN QuestionToken = FixedToken("?", "@PO"); +TOKEN CommaToken = FixedToken(",", "@PO"); +TOKEN DotToken = FixedToken(".", "@PO"); +TOKEN LessEqualToken = FixedToken("<=", "lessequal @A @PO"); +TOKEN GreaterEqualToken = FixedToken(">=", "greaterequal @A @PO"); +TOKEN CNotEqualToken = FixedToken("!=", "notequal @A @PO"); +TOKEN EiffelNotEqualToken = FixedToken("/=", "notequal @A @PO"); +TOKEN BlueNotEqualToken = FixedToken("<>", "notequal @A @PO"); +TOKEN AssignToken = FixedToken(":=", "@PO"); +TOKEN QuestionAssignToken = FixedToken("?=", "@PO"); +TOKEN DollarToken = FixedToken("$", "@PO"); +TOKEN ImpliesToken = FixedToken("=>", "implies @A @PO"); + + +/*****************************************************************************/ +/* */ +/* Fixed-string tokens that are to be printed COMMAND_ONLY (no parameter). */ +/* */ +/*****************************************************************************/ + +#define NoParameterToken(str, command) /* define fixed-string token */ \ +{ \ + str, /* name used for debugging only */ \ + PRINT_COMMAND_ONLY, /* print only the command */ \ command, /* Lout command for formatting this */ \ - "", /* No alternate command */ \ + "", /* no alternate command */ \ + "", /* no following command */ \ + FALSE, /* token not just start of line */ \ { str }, /* token begins (and ends!) with this */ \ + { NULL }, /* no start2 needed */ \ + { NULL }, /* so no end2 either */ \ "", /* nothing inside, since no inside */ \ - "", /* no escape character */ \ - "", /* so nothing after escape character */ \ - "", /* no inner escape either */ \ - "", /* so no end inner escape */ \ - "" /* no ending delimiter */ \ + "", "", /* no escape character */ \ + "", "", /* no inner escape; no end inner esc */ \ + "", /* no ending delimiter */ \ + FALSE, /* end not have to be at line start */ \ + FALSE, /* don't end delimiter twice to stop */ \ +} + +TOKEN StarToken = NoParameterToken("*", "{@PA}"); +TOKEN MinusToken = NoParameterToken("-", "{@PM}"); +TOKEN EiffelDotToken = NoParameterToken(".", "{@PD}"); + + +/*****************************************************************************/ +/* */ +/* Perl (quarantined from other languages since it's very different). */ +/* */ +/* Perl code co-authored by Jeffrey H. Kingston and Mark Summerfield */ +/* March 2000 */ +/* */ +/* In the comments below, WCS refers to "Programming Perl", Second */ +/* Edition (1996), by Wall, Christiansen, and Schwartz. However Perl */ +/* has changed since then and this code also reflects those changes. */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Perl's strings and regular expressions */ +/* */ +/* The table in WCS pp. 41 is a good summary of the possibilities: */ +/* */ +/* '' q// */ +/* "" qq// */ +/* `` qx// */ +/* () qw// */ +/* // m// */ +/* s/// s/// */ +/* y/// tr/// */ +/* */ +/* To this must be added the following quotation, which begins just */ +/* below the table: */ +/* */ +/* Any non-alphabetic, non-whitespace delimiter can be used in place */ +/* of /. If the opening delimiter is a parenthesis, bracket, brace, */ +/* or angle bracket, the closing delimiter will be the matching */ +/* construct. (Embedded occurrences of the delimiters must match in */ +/* pairs.) ... Finally, for two-string constructs like s/// and tr///, */ +/* if the first pair of quotes is a bracketing pair, then the second */ +/* part gets its own starting quote character, which needn't be the */ +/* same as the first pair. So you can write things like s{foo}(bar) */ +/* or tr[a-z][A-Z]. Whitespace is allowed between the two inner quote */ +/* characters, so you could even write that last one as */ +/* */ +/* tr [a-z] */ +/* [A-Z] */ +/* */ +/* At present the tokens below have three problems implementing this: */ +/* */ +/* (1) When / appears without anything in front, it will be recognized */ +/* as a division operator, hence the following regular expression */ +/* will be treated as ordinary Perl, not as a regular expression; */ +/* */ +/* (2) When the delimiters are a matching pair, embedded occurrences */ +/* are not recognised as such: the first closing delimiter will be */ +/* taken to close the pair; */ +/* */ +/* (3) The case s{foo}(bar) is not implemented; the second bracketing */ +/* pair must be the same as the first; in fact prg2lout will keep */ +/* going, looking for a second closing brace, in this example. */ +/* */ +/*****************************************************************************/ + + +TOKEN PerlSingleQuoteStringToken = { + "''-string", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PS", /* Lout command for formatting strings */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "'" }, /* strings begin with a ' character */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllCharacters, /* inside, any character at all is OK */ + "\\", /* within strings, \\ is the escape character */ + "\\'", /* after escape, only \ and ' are OK (WCS p. 39) */ + "", /* strings do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "\'", /* strings end with a ' character */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PerlDoubleQuoteStringToken = { + "\"\"-string", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PS", /* Lout command for formatting strings */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "\"" }, /* strings begin with a " character */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllCharacters, /* inside, any character at all is OK */ + "\\", /* within strings, \\ is the escape character */ + AllCharacters, /* after escape char, any character at all is OK */ + "", /* strings do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "\"", /* strings end with a " character */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PerlBackQuoteStringToken = { + "``-string", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PS", /* Lout command for formatting strings */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "`" }, /* strings begin with a ` character */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllCharacters, /* inside, any character at all is OK */ + "\\", /* within strings, \\ is the escape character */ + AllCharacters, /* after escape char, any character at all is OK */ + "", /* strings do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "`", /* strings end with a ` character */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +TOKEN PerlQTypeStringToken = { + "q-type-string", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PS", /* Lout command for formatting strings */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "q", "qq", "qx", "qw", "qr", "m" }, /* q-type strings begin with these */ + { "/", "(", "[", "{", "<", "!", "%", "#", "|", ",", ":", ";" }, /*start2 */ + { "/", ")", "]", "}", ">", "!", "%", "#", "|", ",", ":", ";" }, /* end2 */ + AllCharacters, /* inside, any character at all is OK */ + "\\", /* within strings, \\ is the escape character */ + AllCharacters, /* after escape char, any character at all is OK */ + "", /* strings do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "", /* will be using end2 for the end delimiter here */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PerlSTypeStringToken = { + "s-type-string", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PS", /* Lout command for formatting strings */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "s", "y", "tr" }, /* s-type strings begin with these */ + { "/", "(", "[", "{", "<", "!", "%", "#", "|", ",", ":", ";" }, /*start2 */ + { "/", ")", "]", "}", ">", "!", "%", "#", "|", ",", ":", ";" }, /* end2 */ + AllCharacters, /* inside, any character at all is OK */ + "\\", /* within strings, \\ is the escape character */ + AllCharacters, /* after escape char, any character at all is OK */ + "", /* strings do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "", /* will be using end2 for the end delimiter here */ + FALSE, /* end delimiter does not have to be at line start */ + TRUE, /* need to see end delimiter twice to stop */ +}; + + +/*****************************************************************************/ +/* */ +/* Perl "bare" regular expressions */ +/* */ +/* By a bare regular expression, we mean one that is not preceded */ +/* by q, qq, m etc. These are distinguished from division by being */ +/* preceded by one of =, =~, !~, split, if, not, unless, for, foreach, */ +/* or while, with up to two white space characters intervening. */ +/* */ +/*****************************************************************************/ + +#define PerlREToken(start, com) \ +{ \ + "regexp", /* used by error messages */ \ + PRINT_NODELIMS_QUOTED,/* no delims since we supply them */ \ + com, /* the command */ \ + "", /* no alternate command */ \ + "@PS{\"/\"}", /* following command (final /) */ \ + FALSE, /* token allowed not just start of line */ \ + { start }, /* preceding token in this case */ \ + { "/", " /", "\t/", " /", " \t/", "\t /", "\t\t/" }, /* start2 */ \ + { "/", "/", "/", "/", "/", "/", "/" }, /* end2 */ \ + AllCharacters, /* any character OK inside */ \ + "\\", /* \\ is the escape character */ \ + AllCharacters, /* after escape char, any is OK */ \ + "", /* no inner escapes */ \ + "", /* no end innner escape either */ \ + "", /* will be using end2 here */ \ + FALSE, /* no need to end at line start */ \ + FALSE, /* don't want end delimiter twice */ \ +} + +TOKEN PerlRegExpLPar = PerlREToken("(", "@PO{\"(\"} @PS{\"/\"}@PS"); +TOKEN PerlRegExpEq = PerlREToken("=", "@PO{\"=\"} @PS{\"/\"}@PS"); +TOKEN PerlRegExpMatch = PerlREToken("=~", "@PO{\"=~\"} @PS{\"/\"}@PS"); +TOKEN PerlRegExpNoMatch = PerlREToken("!~", "@PO{\"!~\"} @PS{\"/\"}@PS"); +TOKEN PerlRegExpSplit = PerlREToken("split", "@PK{split} @PS{\"/\"}@PS"); +TOKEN PerlRegExpIf = PerlREToken("if", "@PK{if} @PS{\"/\"}@PS"); +TOKEN PerlRegExpNot = PerlREToken("not", "@PK{not} @PS{\"/\"}@PS"); +TOKEN PerlRegExpUnless = PerlREToken("unless", "@PK{unless} @PS{\"/\"}@PS"); +TOKEN PerlRegExpFor = PerlREToken("for", "@PK{for} @PS{\"/\"}@PS"); +TOKEN PerlRegExpForEach = PerlREToken("foreach","@PK{foreach} @PS{\"/\"}@PS"); +TOKEN PerlRegExpWhile = PerlREToken("while", "@PK{while} @PS{\"/\"}@PS"); + +TOKEN PerlRegExpStartLineToken = +{ + "regexp", /* used by error messages */ + PRINT_WHOLE_QUOTED, /* we can print the whole thing this time */ + "@PS", /* the command */ + "", /* no alternate command */ + "", /* no following command */ + TRUE, /* token allowed only at start of line */ + { "/" }, /* starting delimiter (so easy!) */ + { NULL }, /* no start2 */ + { NULL }, /* no end2 */ + AllCharacters, /* any character OK inside */ + "\\", /* \\ is the escape character */ + AllCharacters, /* after escape char, any is OK */ + "", /* no inner escapes */ + "", /* no end innner escape either */ + "/", /* ending delimiter */ + FALSE, /* no need to end at line start */ + FALSE, /* don't want end delimiter twice */ +}; + + +/*****************************************************************************/ +/* */ +/* Perl's here-documents */ +/* */ +/* At present the only terminating strings recognized are EOT, EOF, END, */ +/* and the empty string. These may all be quoted in the usual ways. */ +/* */ +/*****************************************************************************/ + +#define X(startstr, endstr, startcom, endcom) \ +{ \ + "here-document", /* used by error messages */ \ + PRINT_NODELIMS_QUOTED,/* no delims since we supply them */ \ + startcom, /* the command */ \ + "", /* no alternate command */ \ + endcom, /* following command (final /) */ \ + FALSE, /* token allowed not just start of line */ \ + { startstr }, /* starting delimiter */ \ + { NULL }, /* no start2 */ \ + { NULL }, /* no end2 */ \ + AllCharacters, /* any character OK inside */ \ + "", "", /* no escape character */ \ + "", "", /* no inner escapes */ \ + endstr, /* token ends with this */ \ + TRUE, /* must be found at line start */ \ + FALSE, /* don't want end delimiter twice */ \ +} + +#define sEOT "\n@PS{\"EOT\"}\n" +#define sEOF "\n@PS{\"EOF\"}\n" +#define sEND "\n@PS{\"END\"}\n" +#define sBLA "\n@PS{\"\"}\n" + +TOKEN HereEOTuq = X("<<EOT", "EOT\n", "@PO{<<}@PS{\"EOT\"}@PS", sEOT); +TOKEN HereEOTdq = X("<<\"EOT\"","EOT\n", "@PO{<<}@PS{\"\\\"EOT\\\"\"}@PS",sEOT); +TOKEN HereEOTfq = X("<<'EOT'", "EOT\n", "@PO{<<}@PS{\"'EOT'\"}@PS", sEOT); +TOKEN HereEOTbq = X("<<`EOT`", "EOT\n", "@PO{<<}@PS{\"`EOT`\"}@PS", sEOT); + +TOKEN HereEOFuq = X("<<EOF", "EOF\n", "@PO{<<}@PS{\"EOF\"}@PS", sEOF); +TOKEN HereEOFdq = X("<<\"EOF\"","EOF\n", "@PO{<<}@PS{\"\\\"EOF\\\"\"}@PS",sEOF); +TOKEN HereEOFfq = X("<<'EOF'", "EOF\n", "@PO{<<}@PS{\"'EOF'\"}@PS", sEOF); +TOKEN HereEOFbq = X("<<`EOF`", "EOF\n", "@PO{<<}@PS{\"`EOF`\"}@PS", sEOF); + +TOKEN HereENDuq = X("<<END", "END\n", "@PO{<<}@PS{\"END\"}@PS", sEND); +TOKEN HereENDdq = X("<<\"END\"","END\n", "@PO{<<}@PS{\"\\\"END\\\"\"}@PS",sEND); +TOKEN HereENDfq = X("<<'END'", "END\n", "@PO{<<}@PS{\"'END'\"}@PS", sEND); +TOKEN HereENDbq = X("<<`END`", "END\n", "@PO{<<}@PS{\"`END`\"}@PS", sEND); + +TOKEN HereBLAuq = X("<< ", "\n", "@PO{<<} @PS", sBLA); +TOKEN HereBLAdq = X("<<\"\"", "\n", "@PO{<<}@PS{\"\\\"\\\"\"}@PS", sBLA); +TOKEN HereBLAfq = X("<<''", "\n", "@PO{<<}@PS{\"''\"}@PS", sBLA); +TOKEN HereBLAbq = X("<<``", "\n", "@PO{<<}@PS{\"``\"}@PS", sBLA); + + +/*****************************************************************************/ +/* */ +/* Perl's identifiers */ +/* */ +/* "Names that start with a letter or underscore may be of any */ +/* length ... and may contain letters, digits, and underscores. */ +/* Names that start with a digit may only contain more digits. */ +/* Names that start with anything else are limited to that one */ +/* character (like $? or $$) and generally have a predefined */ +/* significance to Perl." (WCS page 38) */ +/* */ +/* In addition we have to consider that variable names may be preceded */ +/* by $, @, %, &, or *. Whether these are part of the variable or not is */ +/* rather doubtful. We will treat $ and @ as part of the variable and the */ +/* others not (since they occur elsewhere in the token list anyway); plus */ +/* we have a separate token type for identifiers beginning with $ and */ +/* followed by one character, which we will list explicitly. */ +/* */ +/*****************************************************************************/ + +TOKEN PerlIdentifierToken = { + "identifier", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PI", /* Lout command for formatting identifiers */ + "@PK", /* Alternate command (for keywords) */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + {SepLetters,"_","$","@"}, /* identifiers begin with these */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + Letter_Digit, /* inside, letters, underscores, digits are OK */ + "", /* no escape character within identifiers */ + "", /* so nothing legal after escape char either */ + "", /* identifiers do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "", /* identifiers do not end with a delimiter */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PerlSpecialIdentifierToken = { + "special identifier", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PI", /* Lout command for formatting identifiers */ + "@PK", /* Alternate command (for keywords) */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "$_", "$.", "$/", "$,", "$\\", "$\"", "$;", "$^L", "$:", + "$^A", "$#", "$?", "$!", "$@", "$$", "$<", "$>", "$(", + "$)", "$0", "$[", "$]", "$^D", "$^E", "$^F", "$^H", "$^I", + "$^M", "$^O", "$^P", "$^T", "$^W", "$^X" + }, /* all possible values of special variables */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + "", /* nothing allowed inside, since ends after start */ + "", /* no escape character within identifiers */ + "", /* so nothing legal after escape char either */ + "", /* identifiers do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "", /* identifiers do not end with a delimiter */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +/*****************************************************************************/ +/* */ +/* Perl's numeric literals */ +/* */ +/* These are defined in WCS page 39 basically by giving these examples: */ +/* */ +/* 12345 # integer */ +/* 12345.67 # floating point */ +/* 6.02E23 # scientific notation */ +/* 0xffff # hexadecimal */ +/* 0377 # octal */ +/* 4_294_967_296 # underline for legibility */ +/* */ +/* Implementation is straightforward; hexadecimal is a separate token. */ +/* */ +/*****************************************************************************/ + +TOKEN PerlLiteralNumberToken = { + "number", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PN", /* Lout command for formatting numbers */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { SepDigits }, /* numbers must begin with a digit */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + "0123456789.eE_", /* inside, digits, point, exponent, underscore */ + "", /* no escape character within numbers */ + "", /* so nothing legal after escape char either */ + "", /* numbers do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "", /* numbers do not end with a delimiter */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PerlHexNumberToken = { + "number", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PN", /* Lout command for formatting numbers */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "0x" }, /* hex numbers must begin with 0x */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + "0123456789AaBbCcDdEeFf", /* inside, hexadecimal digits */ + "", /* no escape character within numbers */ + "", /* so nothing legal after escape char either */ + "", /* numbers do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "", /* numbers do not end with a delimiter */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +/*****************************************************************************/ +/* */ +/* Perl's comments */ +/* */ +/* "Comments are indicated by the # character and extend to the end of */ +/* the line." (WCS page 35). To this we have added the usual Lout escape */ +/* comment beginning with #@. */ +/* */ +/*****************************************************************************/ + +TOKEN PerlCommentToken = { + "comment", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PC", /* Lout command for formatting comments */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "#" }, /* comments begin with this character */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllPrintable, /* inside, any printable char is OK (not NL) */ + "", /* no escape character within comments */ + "", /* so nothing legal after escape char either */ + "", /* C comments do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "", /* no end delimiter (end of line will end it) */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PerlCommentEscapeToken = { + "Lout escape", /* used by error messages involving this token */ + PRINT_NODELIMS_UNQUOTED, /* print this token unformatted */ + "", /* no Lout command since we are printing raw */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "#@" }, /* comments begin with this character pair */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllPrintable, /* inside, any printable char is OK */ + "", /* no escape character within comments */ + "", /* so nothing legal after escape char either */ + "", /* no "inner escape" in escape comments */ + "", /* so no end of "inner escape" either */ + "", /* no end delimiter (end of line will end it) */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +/*****************************************************************************/ +/* */ +/* Perl's POD sub-language */ +/* */ +/* Pod is handled as a completely different language. However we need */ +/* one Perl token which recognizes an entire Pod interpolation and prints */ +/* it enclosed in @Pod { ... } so that Lout knows to call back later on it. */ +/* */ +/* "A line beginning with = is assumed to introduce some documentation, */ +/* which continues until another line is reached beginning with =cut" */ +/* (WCS page36). Strictly speaking this is only valid at points where */ +/* a statement would be legal, but that is beyond prg2lout to implement. */ +/* */ +/*****************************************************************************/ + +TOKEN PerlPodToken = { + "perl-pod", /* used by error messages involving this token */ + PRINT_NODELIMS_UNQUOTED, /* unquoted but with a command enclosing it */ + "@DP @Pod", /* Lout command for formatting Pod */ + "", /* no alternate command */ + "@DP\n", /* following command */ + TRUE, /* token allowed at start of line only */ + { "=", "=pod" }, /* pod insert begins with either of these */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllCharacters, /* inside, any character at all is OK */ + "", /* no escape character within pod comments */ + "", /* so nothing legal after escape char either */ + "", /* pod comments do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "=cut", /* pod comments end with this string */ + TRUE, /* end delimiter must be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +/*****************************************************************************/ +/* */ +/* Perl's operators */ +/* */ +/* Only those not already in the C/C++ list are given here. */ +/* */ +/*****************************************************************************/ + +TOKEN PerlIncrementToken = FixedToken( "++", "@PO" ) ; +TOKEN PerlDecrementToken = FixedToken( "--", "@PO" ) ; +TOKEN PerlExponentiateToken = FixedToken( "**", "@PO" ) ; +TOKEN PerlMatchToken = FixedToken( "=~", "@PO" ) ; +TOKEN PerlNotMatchToken = FixedToken( "!~", "@PO" ) ; +TOKEN PerlEqualToken = FixedToken( "==", "@PO" ) ; +TOKEN PerlAssignToken = FixedToken( "=", "@PO" ) ; +TOKEN PerlBitLeftShiftToken = FixedToken( "<<", "@PO" ) ; +TOKEN PerlBitRightShiftToken = FixedToken( ">>", "@PO" ) ; +TOKEN PerlSpaceshipToken = FixedToken( "<=>", "@PO" ) ; +TOKEN PerlAndToken = FixedToken( "&&", "@PO" ) ; +TOKEN PerlOrToken = FixedToken( "||", "@PO" ) ; +TOKEN PerlRange2Token = FixedToken( "..", "@PO" ) ; +TOKEN PerlRange3Token = FixedToken( "...", "@PO" ) ; +TOKEN PerlFileTestrToken = FixedToken( "-r", "@PO" ) ; +TOKEN PerlFileTestwToken = FixedToken( "-w", "@PO" ) ; +TOKEN PerlFileTestxToken = FixedToken( "-x", "@PO" ) ; +TOKEN PerlFileTestoToken = FixedToken( "-o", "@PO" ) ; +TOKEN PerlFileTestRToken = FixedToken( "-R", "@PO" ) ; +TOKEN PerlFileTestWToken = FixedToken( "-W", "@PO" ) ; +TOKEN PerlFileTestXToken = FixedToken( "-X", "@PO" ) ; +TOKEN PerlFileTestOToken = FixedToken( "-O", "@PO" ) ; +TOKEN PerlFileTesteToken = FixedToken( "-e", "@PO" ) ; +TOKEN PerlFileTestzToken = FixedToken( "-z", "@PO" ) ; +TOKEN PerlFileTestsToken = FixedToken( "-s", "@PO" ) ; +TOKEN PerlFileTestfToken = FixedToken( "-f", "@PO" ) ; +TOKEN PerlFileTestdToken = FixedToken( "-d", "@PO" ) ; +TOKEN PerlFileTestlToken = FixedToken( "-l", "@PO" ) ; +TOKEN PerlFileTestpToken = FixedToken( "-p", "@PO" ) ; +TOKEN PerlFileTestSToken = FixedToken( "-S", "@PO" ) ; +TOKEN PerlFileTestbToken = FixedToken( "-b", "@PO" ) ; +TOKEN PerlFileTestcToken = FixedToken( "-c", "@PO" ) ; +TOKEN PerlFileTesttToken = FixedToken( "-t", "@PO" ) ; +TOKEN PerlFileTestuToken = FixedToken( "-u", "@PO" ) ; +TOKEN PerlFileTestgToken = FixedToken( "-g", "@PO" ) ; +TOKEN PerlFileTestkToken = FixedToken( "-k", "@PO" ) ; +TOKEN PerlFileTestTToken = FixedToken( "-T", "@PO" ) ; +TOKEN PerlFileTestBToken = FixedToken( "-B", "@PO" ) ; +TOKEN PerlFileTestMToken = FixedToken( "-M", "@PO" ) ; +TOKEN PerlFileTestAToken = FixedToken( "-A", "@PO" ) ; +TOKEN PerlFileTestCToken = FixedToken( "-C", "@PO" ) ; + + +/*****************************************************************************/ +/* */ +/* Pod (Plain Old Documentation, used with Perl) tokens */ +/* */ +/* Pod is treated as a completely different language to Perl. It is */ +/* quite possible to use Pod alone without Perl; or, thanks to the */ +/* PerlPodToken, to embed Pod in Perl in the usual way. Quotations below */ +/* are from Larry Wall's documentation, communicated by Mark Summerfield. */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Pod Verbatim paragraphs */ +/* */ +/* "A verbatim paragraph [is] distinguished by being indented (that is, it */ +/* starts with a space or tab). It should be reproduced exactly, with */ +/* tabs assumed to be on 8-column boundaries. There are no special */ +/* formatting escapes." */ +/* */ +/* By a "paragraph" is meant a sequence of lines down to the next empty */ +/* line; but we will handle verbatim paragraphs one line at a time. */ +/* It also seems that an empty line in the input has to become an empty */ +/* line in the output; so we include a token for that. */ +/* */ +/*****************************************************************************/ + +TOKEN PodVerbatimLineToken = { + "verbatim-para", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* printing the whole paragraph quoted */ + "@PV ", /* Lout command for formatting verbatim line */ + "", /* no alternate command */ + "", /* no following command */ + TRUE, /* token allowed at start of line only */ + { "\t", " " }, /* command begins with this */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllPrintablePlusTab, /* inside, any printable char except newline is OK */ + "", "", /* no escape character within verbatim lines */ + "", "", /* no "inner escapes" within verbatim lines */ + "", /* ends at end of line */ + FALSE, /* don't need to be at start of line to end it */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PodEmptyLineToken = { + "pod-empty-line", /* used by error messages involving this token */ + PRINT_COMMAND_ONLY, /* printing just the command */ + "@PPG\n", /* Lout command for formatting Pod empty line */ + "", /* no alternate command */ + "", /* no following command */ + TRUE, /* token allowed at start of line only */ + { "\n" }, /* command begins with this */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + "", /* nothing inside */ + "", "", /* no escape character */ + "", "", /* no inner escape */ + "", /* token will end with the end of the line */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +/*****************************************************************************/ +/* */ +/* Pod Command paragraphs */ +/* */ +/* "All command paragraphs start with =, followed by an identifier, */ +/* followed by arbitrary text that the command can use." */ +/* */ +/* "[A] command lasts up until the end of the paragraph, not the line. */ +/* Hence, ... you can see the empty lines after each command to end */ +/* its paragraph." */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Pod command paragraphs: =pod */ +/* */ +/* "The =pod directive does nothing beyond telling the compiler to lay off */ +/* parsing code through the next =cut." */ +/* */ +/*****************************************************************************/ + +TOKEN PodIgnoreToken = { + "pod-cut", /* used by error messages involving this token */ + PRINT_COMMAND_ONLY, /* printing just the command */ + "", /* Lout command for formatting Pod cut (nothing) */ + "", /* no alternate command */ + "", /* no following command */ + TRUE, /* token allowed at start of line only */ + { "=pod", "=cut" }, /* command begins with this */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllCharacters, /* anything at all can be inside */ + "", /* no escape character */ + "", /* so nothing legal after escape char either */ + "", /* cut tokens do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "\n", /* token will end with the end of the line */ + TRUE, /* end delimiter (\n) has to be at a line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +/*****************************************************************************/ +/* */ +/* Pod command paragraphs: =head1, =head2 (and =head3, folklore extension) */ +/* */ +/* "... produce first and second level heading" */ +/* */ +/*****************************************************************************/ + +TOKEN PodHeading1Token = { + "=head1", /* used by error messages involving this token */ + PRINT_NODELIMS_QUOTED, /* print without delimiters */ + "@PHA", /* Lout command for formatting Pod heading */ + "", /* no alternate command */ + "", /* following command */ + TRUE, /* token allowed at start of line only */ + {"=head1","head1"}, /* command begins with this */ + { " ", "\t" }, /* helps to skip following white space */ + { "\n", "\n" }, /* token ends at end of line */ + AllCharacters, /* anything in the heading */ + "", "", /* no escape character; nothing legal after escape */ + "", "", /* no inner escapes; no end inner escape */ + "\n\n", /* token will end with the first blank line */ + FALSE, /* end delimiter (\n) has to be at a line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PodHeading2Token = { + "=head2", /* used by error messages involving this token */ + PRINT_NODELIMS_QUOTED, /* print without delimiters */ + "@PHB", /* Lout command for formatting Pod heading */ + "", /* no alternate command */ + "", /* following command */ + TRUE, /* token allowed at start of line only */ + { "=head2" }, /* command begins with this */ + { " ", "\t" }, /* helps to skip following white space */ + { "\n", "\n" }, /* token ends at end of line */ + AllCharacters, /* anything in the heading */ + "", "", /* no escape character; nothing legal after escape */ + "", "", /* no inner escapes; no end inner escape */ + "\n\n", /* token will end with the first blank line */ + FALSE, /* end delimiter (\n) has to be at a line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PodHeading3Token = { + "=head3", /* used by error messages involving this token */ + PRINT_NODELIMS_QUOTED, /* print without delimiters */ + "@PHC", /* Lout command for formatting Pod heading */ + "", /* no alternate command */ + "", /* following command */ + TRUE, /* token allowed at start of line only */ + { "=head3" }, /* command begins with this */ + { " ", "\t" }, /* helps to skip following white space */ + { "\n", "\n" }, /* token ends at end of line */ + AllCharacters, /* anything in the heading */ + "", "", /* no escape character; nothing legal after escape */ + "", "", /* no inner escapes; no end inner escape */ + "\n\n", /* token will end with the first blank line */ + FALSE, /* end delimiter (\n) has to be at a line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +/*****************************************************************************/ +/* */ +/* Pod command paragraphs: =over, =item, and =back (for lists) */ +/* */ +/*****************************************************************************/ + +TOKEN PodOverToken = { + "=over", /* used by error messages involving this token */ + PRINT_NODELIMS_UNQUOTED, /* just a number after =over, so this is safe */ + "@RawTaggedList gap{@PLG}indent{@PLI}rightindent{@PLRI}labelwidth{@PLLW ", + "", /* no alternate command */ + "} // {", /* open brace to match } at first item */ + TRUE, /* token allowed at start of line only */ + { "=over" }, /* command begins with this */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllCharacters, /* inside, any printable char is OK */ + "", "", /* no escape character; nothing legal after escape */ + "", "", /* no inner escapes; no end inner escape */ + "\n", /* token will end with the end of the line */ + TRUE, /* end delimiter (\n) has to be at a line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PodItemToken = { + "=item", /* used by error messages involving this token */ + PRINT_NODELIMS_QUOTED,/* printing just what follows =item on the line */ + "@Null //}\n@TI {@PLL", /* Lout command for formatting Pod item */ + "", /* no alternate command */ + "} {", /* open brace to enclose the item content */ + TRUE, /* token allowed at start of line only */ + { "=item" }, /* command begins with this */ + { " ", "\t" }, /* helps to skip following white space */ + { "\n\n", "\n\n" }, /* token will end at blank line */ + AllPrintableTabNL, /* any printable inside */ + "", "", /* no escape character; nothing legal after escape */ + "", "", /* no inner escapes; no end inner escape */ + "", /* see ends2[] */ + FALSE, /* end delimiter (\n) must already be at start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PodBackToken = { + "=back", /* used by error messages involving this token */ + PRINT_COMMAND_ONLY, /* printing just the command */ + "@Null // }\n@EndList\n", /* Lout command for formatting Pod back */ + "", /* no alternate command */ + "", /* no following command */ + TRUE, /* token allowed at start of line only */ + { "=back" }, /* command begins with this */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllCharacters, /* anything inside (in principle) */ + "", "", /* no escape character; nothing legal after escape */ + "", "", /* no inner escapes; no end inner escape */ + "\n", /* token will end with the next blank line */ + TRUE, /* end delimiter (\n) has to be at a line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +/*****************************************************************************/ +/* */ +/* Pod command paragraphs: =for, =begin, =end */ +/* */ +/* "passed directly to particular formatters. A formatter that can utilize */ +/* that format will use the section, otherwise it will be ignored." */ +/* */ +/* So I've put in a "=begin lout" token, also recognized as "=begin Lout". */ +/* */ +/*****************************************************************************/ + +TOKEN PodForToken = { + "=for", /* used by error messages involving this token */ + PRINT_COMMAND_ONLY, /* printing just the command */ + "", /* Lout command for formatting Pod for (nothing) */ + "", "", /* no alternate command; no following command */ + TRUE, /* token allowed at start of line only */ + { "=for" }, /* command begins with this */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllCharacters, /* anything inside */ + "", "", /* no escape character; nothing legal after escape */ + "", "", /* no inner escapes; no end inner escape */ + "\n", /* token will end with the end of the line */ + TRUE, /* end delimiter (\n) has to be at a line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PodBeginToken = { + "=begin", /* used by error messages involving this token */ + PRINT_COMMAND_ONLY, /* printing just the command */ + "", /* Lout command for formatting Pod for (nothing) */ + "", "", /* no alternate command; no following command */ + TRUE, /* token allowed at start of line only */ + { "=begin" }, /* command begins with this */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllCharacters, /* anything inside */ + "", "", /* no escape character; nothing legal after escape */ + "", "", /* no inner escapes; no end inner escape */ + "=end", /* token will end with =end character */ + TRUE, /* end delimiter has to be at a line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +TOKEN PodBeginLoutToken = { + "=begin lout", /* used by error messages involving this token */ + PRINT_NODELIMS_UNQUOTED,/* this is a Lout escape, no delims or quotes */ + "", /* Lout command for formatting Pod for (nothing) */ + "", "", /* no alternate command; no following command */ + TRUE, /* token allowed at start of line only */ + { "=begin lout", "=begin Lout" }, /* command begins with this */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + AllCharacters, /* anything inside */ + "", "", /* no escape character; nothing legal after escape */ + "", "", /* no inner escapes; no end inner escape */ + "=end", /* token will end with =end character */ + TRUE, /* end delimiter has to be at a line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +/*****************************************************************************/ +/* */ +/* Pod "Ordinary Block of Text" paragraphs */ +/* */ +/* "It will be filled, and maybe even justified" - I'm setting the whole */ +/* Pod in adjust @Break, and making sure that verbatim and command */ +/* paragraphs don't get adjusted. So no special requirements here, it */ +/* should all happen without any explicit tokens, given that I've set */ +/* the Pod language up to simply echo any characters (suitably quoted if */ +/* necessary in Lout) that don't match anything else. */ +/* */ +/*****************************************************************************/ + +/*****************************************************************************/ +/* */ +/* Pod interior sequences (recursive) */ +/* */ +/* I<text> Italicize text */ +/* B<text> Embolden text */ +/* S<text> Text containing non-break spaces */ +/* C<code> Code "render in typewriter font, or ..." */ +/* */ +/* Alternatively, instead of "<" .. ">" we may use "<< " .. " >>", or */ +/* "<<< " .. " >>>", etc. (Note the whitespace.) */ +/* */ +/*****************************************************************************/ + +#define RecursiveToken(str, command) /* Pod recursive token */ \ +{ \ + str, /* name used for debugging only */ \ + PRINT_NODELIMS_INNER, /* recursively format the inside */ \ + command, /* Lout command for formatting this */ \ + "", "", /* no alternate command; no following */ \ + FALSE, /* token not just start of line */ \ + { str }, /* token begins with this */ \ + { "<", "<< ", "<<< ", "<<<< " }, /* start2 */ \ + { ">", " >>", " >>>", " >>>>" }, /* end2 */ \ + AllCharacters, /* anything inside (in fact, not used)*/ \ + "", "", /* no escape character */ \ + "", "", /* no inner escape; no end inner esc */ \ + "", /* will use end2 here */ \ + FALSE, /* end not have to be at line start */ \ + FALSE, /* don't end delimiter twice to stop */ \ } -TOKEN HashToken = FixedToken("#", "@O"); -TOKEN ExclamationToken = FixedToken("!", "@O"); -TOKEN PercentToken = FixedToken("%", "@O"); -TOKEN HatToken = FixedToken("^", "@O"); -TOKEN AmpersandToken = FixedToken("&", "@O"); -TOKEN StarToken = FixedToken("*", "@ST"); -TOKEN SlashToken = FixedToken("/", "@O"); -TOKEN ArrowToken = FixedToken("->", "arrowright @A @O"); -TOKEN BackSlashToken = FixedToken("\\", "@O"); -TOKEN LeftParenToken = FixedToken("(", "@O"); -TOKEN RightParenToken = FixedToken(")", "@O"); -TOKEN MinusToken = FixedToken("-", "@M"); -TOKEN PlusToken = FixedToken("+", "plus @A @O"); -TOKEN EqualToken = FixedToken("=", "equal @A @O"); -TOKEN LeftBraceToken = FixedToken("{", "@O"); -TOKEN RightBraceToken = FixedToken("}", "@O"); -TOKEN BarToken = FixedToken("|", "@O"); -TOKEN CircumToken = FixedToken("~", "@O"); -TOKEN LeftBracketToken = FixedToken("[", "@O"); -TOKEN RightBracketToken = FixedToken("]", "@O"); -TOKEN SemicolonToken = FixedToken(";", "@O"); -TOKEN ColonToken = FixedToken(":", "@O"); -TOKEN LessToken = FixedToken("<", "less @A @O"); -TOKEN GreaterToken = FixedToken(">", "greater @A @O"); -TOKEN QuestionToken = FixedToken("?", "@O"); -TOKEN CommaToken = FixedToken(",", "@O"); -TOKEN DotToken = FixedToken(".", "@O"); -TOKEN EiffelDotToken = FixedToken(".", "@ED"); -TOKEN LessEqualToken = FixedToken("<=", "lessequal @A @O"); -TOKEN GreaterEqualToken = FixedToken(">=", "greaterequal @A @O"); -TOKEN CNotEqualToken = FixedToken("!=", "notequal @A @O"); -TOKEN EiffelNotEqualToken = FixedToken("/=", "notequal @A @O"); -TOKEN BlueNotEqualToken = FixedToken("<>", "notequal @A @O"); -TOKEN AssignToken = FixedToken(":=", "@O"); -TOKEN QuestionAssignToken = FixedToken("?=", "@O"); -TOKEN DollarToken = FixedToken("$", "@O"); -TOKEN ImpliesToken = FixedToken("=>", "implies @A @O"); +TOKEN PodItalicToken = RecursiveToken("I", "@PFI"); +TOKEN PodBoldToken = RecursiveToken("B", "@PFB"); +TOKEN PodNoBreakToken = RecursiveToken("S", "@OneCol"); +TOKEN PodCodeToken = RecursiveToken("C", "@PFC"); + +/*****************************************************************************/ +/* */ +/* Pod interior sequences (non-recursive) */ +/* */ +/* L<name> A link; these have an internal format I've not looked at yet. */ +/* F<file> File name */ +/* X<index> Index */ +/* Z<> A zero-width space */ +/* */ +/* Alternatively, instead of "<" .. ">" we may use "<< " .. " >>", or */ +/* "<<< " .. " >>>", etc. (Note the whitespace.) */ +/* */ +/*****************************************************************************/ + +#define InteriorToken(str, command, style) /* Pod delimited token */ \ +{ \ + str, /* name used for debugging only */ \ + style, /* print this token unquoted */ \ + command, /* Lout command for formatting this */ \ + "", "", /* no alternate command; no following */ \ + FALSE, /* token not just start of line */ \ + { str }, /* token begins with this */ \ + { "<", "<< ", "<<< ", "<<<< " }, /* start2 */ \ + { ">", " >>", " >>>", " >>>>" }, /* end2 */ \ + AllCharacters, /* anything inside */ \ + "", "", /* no escape character */ \ + "", "", /* no inner escape; no end inner esc */ \ + "", /* will use end2 here */ \ + FALSE, /* end not have to be at line start */ \ + FALSE, /* don't end delimiter twice to stop */ \ +} + +TOKEN PodFileToken = InteriorToken("F", "@PFF", PRINT_NODELIMS_QUOTED); +TOKEN PodLinkToken = InteriorToken("L", "@PFL", PRINT_NODELIMS_QUOTED); +TOKEN PodIndexToken = InteriorToken("X", "@PFX", PRINT_NODELIMS_QUOTED); +TOKEN PodZeroToken = InteriorToken("Z", "", PRINT_COMMAND_ONLY); + +/*****************************************************************************/ +/* */ +/* Pod interior sequences (escape sequences) */ +/* */ +/* E<escape> A named character ("optional except in other interior */ +/* sequences and when preceded by a capital letter") */ +/* */ +/* E<lt> A literal < */ +/* E<gt> A literal > */ +/* E<sol> A literal / */ +/* E<verbar> A literal | */ +/* E<n> Character number n (probably in ASCII) */ +/* E<html> Some non-numeric HTML entity, such as E<Agrave> */ +/* */ +/* PodNumCharToken not tested. */ +/* */ +/*****************************************************************************/ + +TOKEN PodNumCharToken = { + "E<>", /* used by error messages involving this token */ + PRINT_NODELIMS_UNQUOTED,/* we're doing these manually, since they're funny*/ + "\"\\", /* precede character number with \" */ + "", /* no alternate command */ + "\"", /* follow character number with " */ + FALSE, /* token allowed at start of line only */ + { "E<" }, /* command begins with this */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no end2 either */ + "0123456789", /* digits inside */ + "", "", /* no escape character */ + "", "", /* no "inner escapes" */ + ">", /* token will end with > character */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + +#define PodEscapeToken(str, command) /* Pod delimited token */ \ +{ \ + str, /* name used for debugging only */ \ + PRINT_COMMAND_ONLY, /* print this token unquoted */ \ + command, /* Lout command for formatting this */ \ + "", /* no alternate command */ \ + "", /* no following command */ \ + FALSE, /* token not just start of line */ \ + { str }, /* token begins with this */ \ + { NULL }, /* start2 */ \ + { NULL }, /* end2 */ \ + "", /* nothing inside */ \ + "", "", /* no escape character */ \ + "", "", /* no inner escape either */ \ + "", /* no ending delimiter except end2 */ \ + FALSE, /* end not have to be at line start */ \ + FALSE, /* don't end delimiter twice to stop */ \ +} + +TOKEN PodLessThanToken = PodEscapeToken("E<lt>", "<"); +TOKEN PodGreaterThanToken = PodEscapeToken("E<gt>", ">"); +TOKEN PodSlashToken = PodEscapeToken("E<sol>", "/"); +TOKEN PodVerbarToken = PodEscapeToken("E<verbar>", "|"); + + +/*****************************************************************************/ +/* */ +/* Mark Summerfield writes: */ +/* */ +/* The following table (and most of its comments) is copied from Gisle Aas */ +/* HTML::Entities.pm module with the plain text characters being replaced */ +/* by their Lout equivalents and the HTML entities with their pod */ +/* equivalents. */ +/* */ +/*****************************************************************************/ + +/* Some normal chars that have special meaning in SGML context */ +TOKEN PE00 = PodEscapeToken("E<amp>", "&"); +/* already done above TOKEN PE01 = PodEscapeToken("E<gt>", ">"); */ +/* already done above TOKEN PE02 = PodEscapeToken("E<lt>", "<"); */ +TOKEN PE03 = PodEscapeToken("E<quot>", "\"\\\"\""); + +/* PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML */ +TOKEN PE04 = PodEscapeToken("E<AElig>", "{@Char AE}"); +TOKEN PE05 = PodEscapeToken("E<Aacute>", "{@Char Aacute}"); +TOKEN PE06 = PodEscapeToken("E<Acirc>", "{@Char Acircumflex}"); +TOKEN PE07 = PodEscapeToken("E<Agrave>", "{@Char Agrave}"); +TOKEN PE08 = PodEscapeToken("E<Aring>", "{@Char Aring}"); +TOKEN PE09 = PodEscapeToken("E<Atilde>", "{@Char Atilde}"); +TOKEN PE10 = PodEscapeToken("E<Auml>", "{@Char Adieresis}"); +TOKEN PE11 = PodEscapeToken("E<Ccedil>", "{@Char Ccedilla}"); +TOKEN PE12 = PodEscapeToken("E<ETH>", "{@Char Eth}"); +TOKEN PE13 = PodEscapeToken("E<Eacute>", "{@Char Eacute}"); +TOKEN PE14 = PodEscapeToken("E<Ecirc>", "{@Char Ecircumflex}"); +TOKEN PE15 = PodEscapeToken("E<Egrave>", "{@Char Egrave}"); +TOKEN PE16 = PodEscapeToken("E<Euml>", "{@Char Edieresis}"); +TOKEN PE17 = PodEscapeToken("E<Iacute>", "{@Char Iacute}"); +TOKEN PE18 = PodEscapeToken("E<Icirc>", "{@Char Icircumflex}"); +TOKEN PE19 = PodEscapeToken("E<Igrave>", "{@Char Igrave}"); +TOKEN PE20 = PodEscapeToken("E<Iuml>", "{@Char Idieresis}"); +TOKEN PE21 = PodEscapeToken("E<Ntilde>", "{@Char Ntilde}"); +TOKEN PE22 = PodEscapeToken("E<Oacute>", "{@Char Oacute}"); +TOKEN PE23 = PodEscapeToken("E<Ocirc>", "{@Char Ocircumflex}"); +TOKEN PE24 = PodEscapeToken("E<Ograve>", "{@Char Ograve}"); +TOKEN PE25 = PodEscapeToken("E<Oslash>", "{@Char Oslash}"); +TOKEN PE26 = PodEscapeToken("E<Otilde>", "{@Char Otilde}"); +TOKEN PE27 = PodEscapeToken("E<Ouml>", "{@Char Odieresis}"); +TOKEN PE28 = PodEscapeToken("E<THORN>", "{@Char Thorn}"); +TOKEN PE29 = PodEscapeToken("E<Uacute>", "{@Char Uacute}"); +TOKEN PE30 = PodEscapeToken("E<Ucirc>", "{@Char Ucircumflex}"); +TOKEN PE31 = PodEscapeToken("E<Ugrave>", "{@Char Ugrave}"); +TOKEN PE32 = PodEscapeToken("E<Uuml>", "{@Char Udieresis}"); +TOKEN PE33 = PodEscapeToken("E<Yacute>", "{@Char Yacute}"); +TOKEN PE34 = PodEscapeToken("E<aacute>", "{@Char aacute}"); +TOKEN PE35 = PodEscapeToken("E<acirc>", "{@Char acircumflex}"); +TOKEN PE36 = PodEscapeToken("E<aelig>", "{@Char ae}"); +TOKEN PE37 = PodEscapeToken("E<agrave>", "{@Char agrave}"); +TOKEN PE38 = PodEscapeToken("E<aring>", "{@Char aring}"); +TOKEN PE39 = PodEscapeToken("E<atilde>", "{@Char atilde}"); +TOKEN PE40 = PodEscapeToken("E<auml>", "{@Char adieresis}"); +TOKEN PE41 = PodEscapeToken("E<ccedil>", "{@Char ccedilla}"); +TOKEN PE42 = PodEscapeToken("E<eacute>", "{@Char eacute}"); +TOKEN PE43 = PodEscapeToken("E<ecirc>", "{@Char ecircumflex}"); +TOKEN PE44 = PodEscapeToken("E<egrave>", "{@Char egrave}"); +TOKEN PE45 = PodEscapeToken("E<eth>", "{@Char eth}"); +TOKEN PE46 = PodEscapeToken("E<euml>", "{@Char edieresis}"); +TOKEN PE47 = PodEscapeToken("E<iacute>", "{@Char iacute}"); +TOKEN PE48 = PodEscapeToken("E<icirc>", "{@Char icircumflex}"); +TOKEN PE49 = PodEscapeToken("E<igrave>", "{@Char igrave}"); +TOKEN PE50 = PodEscapeToken("E<iuml>", "{@Char idieresis}"); +TOKEN PE51 = PodEscapeToken("E<ntilde>", "{@Char ntilde}"); +TOKEN PE52 = PodEscapeToken("E<oacute>", "{@Char oacute}"); +TOKEN PE53 = PodEscapeToken("E<ocirc>", "{@Char ocircumflex}"); +TOKEN PE54 = PodEscapeToken("E<ograve>", "{@Char ograve}"); +TOKEN PE55 = PodEscapeToken("E<oslash>", "{@Char oslash}"); +TOKEN PE56 = PodEscapeToken("E<otilde>", "{@Char otilde}"); +TOKEN PE57 = PodEscapeToken("E<ouml>", "{@Char odieresis}"); +TOKEN PE58 = PodEscapeToken("E<szlig>", "{@Char germandbls}"); +TOKEN PE59 = PodEscapeToken("E<thorn>", "{@Char thorn}"); +TOKEN PE60 = PodEscapeToken("E<uacute>", "{@Char uacute}"); +TOKEN PE61 = PodEscapeToken("E<ucirc>", "{@Char ucircumflex}"); +TOKEN PE62 = PodEscapeToken("E<ugrave>", "{@Char ugrave}"); +TOKEN PE63 = PodEscapeToken("E<uuml>", "{@Char udieresis}"); +TOKEN PE64 = PodEscapeToken("E<yacute>", "{@Char yacute}"); +TOKEN PE65 = PodEscapeToken("E<yuml>", "{@Char ydieresis}"); + +/* Some extra Latin 1 chars that are listed in the HTML3.2 draft 1996/05/21 */ +TOKEN PE66 = PodEscapeToken("E<copy>", "{@CopyRight}"); +TOKEN PE67 = PodEscapeToken("E<reg>", "{@Register}"); +TOKEN PE68 = PodEscapeToken("E<nbsp>", "~"); + +/* Additional ISO-8859/1 entities listed in rfc1866 (section 14) */ +TOKEN PE69 = PodEscapeToken("E<iexcl>", "{@Char exclamdown}"); +TOKEN PE70 = PodEscapeToken("E<cent>", "{@Char cent}"); +TOKEN PE71 = PodEscapeToken("E<pound>", "{@Sterling}"); +TOKEN PE72 = PodEscapeToken("E<curren>", "{@Char currency}"); +TOKEN PE73 = PodEscapeToken("E<yen>", "{@Yen}"); +TOKEN PE74 = PodEscapeToken("E<brvbar>", "{@Char bar}"); +TOKEN PE75 = PodEscapeToken("E<sect>", "{@SectSym}"); +TOKEN PE76 = PodEscapeToken("E<uml>", "{@Char dieresis}"); +TOKEN PE77 = PodEscapeToken("E<ordf>", "{@Char ordfeminine}"); +TOKEN PE78 = PodEscapeToken("E<laquo>", "{@Char guillemotleft}"); +TOKEN PE79 = PodEscapeToken("E<not>", "{@Char logicalnot}"); +TOKEN PE80 = PodEscapeToken("E<shy>", "{@Char hyphen}"); +TOKEN PE81 = PodEscapeToken("E<macr>", "{@Char macron}"); +TOKEN PE82 = PodEscapeToken("E<deg>", "{@Char degree}"); +TOKEN PE83 = PodEscapeToken("E<plusmn>", "{@Char plusminus}"); +TOKEN PE84 = PodEscapeToken("E<sup1>", "{@Char onesuperior}"); +TOKEN PE85 = PodEscapeToken("E<sup2>", "{@Char twosuperior}"); +TOKEN PE86 = PodEscapeToken("E<sup3>", "{@Char threesuperior}"); +TOKEN PE87 = PodEscapeToken("E<acute>", "{@Char acute}"); +TOKEN PE88 = PodEscapeToken("E<micro>", "{@Char mu}"); +TOKEN PE89 = PodEscapeToken("E<para>", "{@ParSym}"); +TOKEN PE90 = PodEscapeToken("E<middot>", "{@Char periodcentered}"); +TOKEN PE91 = PodEscapeToken("E<cedil>", "{@Char cedilla}"); +TOKEN PE92 = PodEscapeToken("E<ordm>", "{@Char ordmasculine}"); +TOKEN PE93 = PodEscapeToken("E<raquo>", "{@Char guillemotright}"); +TOKEN PE94 = PodEscapeToken("E<frac14>", "{@Char onequarter}"); +TOKEN PE95 = PodEscapeToken("E<frac12>", "{@Char onehalf}"); +TOKEN PE96 = PodEscapeToken("E<frac34>", "{@Char threequarters}"); +TOKEN PE97 = PodEscapeToken("E<iquest>", "{@Char questiondown}"); +TOKEN PE98 = PodEscapeToken("E<times>", "{@Multiply}"); +TOKEN PE99 = PodEscapeToken("E<divide>", "{@Divide}"); + + /*****************************************************************************/ /* */ /* LANGUAGE - put your language declarations in this section. */ /* */ +/* The field names and their meanings are: */ +/* */ +/* names Set of alternative names for this languages */ +/* setup_file The default Lout setup file (e.g. "cprint", "eiffel") */ +/* lang_sym The symbol for the language (e.g. "@CP", "@Eiffel") */ +/* no_match What to do if something fails to match (see below) */ +/* tokens Set of all tokens of this language */ +/* keywords Set of all keywords for this language */ +/* */ +/* Acceptable values for no_match are: */ +/* */ +/* NO_MATCH_ERROR Generate an error message and skip the character. */ +/* */ +/* NO_MATCH_PRINT Print the character in a way that is Lout-safe; that */ +/* is, mostly raw but in quotes for "/", "@" etc., and */ +/* handling tabs and newlines appropriately. */ +/* */ /*****************************************************************************/ +#define NO_MATCH_ERROR 1 +#define NO_MATCH_PRINT 2 +#define NO_LANGUAGE ((LANGUAGE *) NULL) typedef struct lang_rec { char *names[MAX_ARRAY_LENGTH]; - char *default_style; + char *setup_file; + char *lang_sym; + int no_match; TOKEN *tokens[MAX_ARRAY_LENGTH]; char *keywords[MAX_ARRAY_LENGTH]; } LANGUAGE; + LANGUAGE CLanguage = { { "C", "c", "C++", "c++" }, - "fixed", + "cprint", "@CP", + NO_MATCH_ERROR, { &CStringToken, &CCharacterToken, &IdentifierToken, &NumberToken, &CCommentToken, &CCommentEscapeToken, @@ -395,19 +1975,26 @@ LANGUAGE CLanguage = { }, { "asm", "auto", "break", "case", "catch", "char", "class", "const", - "continue", "default", "delete", "do", "double", "else", "enum", - "extern", "float", "for", "friend", "goto", "if", "inline", "int", - "long", "new", "operator", "private", "protected", "public", "register", - "return", "short", "signed", "sizeof", "static", "struct", "switch", - "template", "this", "throw", "try", "typedef", "union", "unsigned", - "virtual", "void", "volatile", "while", + "continue", "default", "delete", "do", "double", "else", "enum", "extern", + "float", "for", "friend", "goto", "if", "inline", "int", "long", "new", + "operator", "private", "protected", "public", "register", "return", + "short", "signed", "sizeof", "static", "struct", "switch", "template", + "this", "throw", "try", "typedef", "union", "unsigned", "virtual", + "void", "volatile", "while", } }; + +/*****************************************************************************/ +/* */ +/* Eiffel and Blue */ +/* */ +/*****************************************************************************/ LANGUAGE EiffelLanguage = { { "Eiffel", "eiffel" }, - "varying", + "eiffel", "@Eiffel", + NO_MATCH_ERROR, { &EiffelStringToken, &EiffelCharacterToken, &IdentifierToken, &NumberToken, &EiffelCommentToken, &EiffelCommentEscapeToken, @@ -418,22 +2005,21 @@ LANGUAGE EiffelLanguage = { &MinusToken, &DollarToken, &HatToken, &SlashToken, &BackSlashToken, &LessToken, &GreaterToken, &LessEqualToken, &GreaterEqualToken }, - { "alias", "all", "and", "as", "check", "class", "creation", "debug", - "deferred", "do", "else", "elseif", "end", "ensure", "expanded", - "export", "external", "false", "feature", "from", "frozen", "if", - "implies", "indexing", "infix", "inherit", "inspect", "invariant", - "is", "like", "local", "loop", "obsolete", "old", "once", "or", - "prefix", "redefine", "rename", "require", "rescue", "retry", "select", - "separate", "strip", "then", "true", "undefine", "unique", "until", - "variant", "when", "xor", "not", "interface" + "deferred", "do", "else", "elseif", "end", "ensure", "expanded", "export", + "external", "false", "feature", "from", "frozen", "if", "implies", + "indexing", "infix", "inherit", "inspect", "invariant", "is", "like", + "local", "loop", "obsolete", "old", "once", "or", "prefix", "redefine", + "rename", "require", "rescue", "retry", "select", "separate", "strip", + "then", "true", "undefine", "unique", "until", "variant", "when", "xor", + "not", "interface" } }; - LANGUAGE BlueLanguage = { { "Blue", "blue" }, - "varying", + "blue", "@Blue", + NO_MATCH_ERROR, { &CStringToken, &IdentifierToken, &NumberToken, &BlueCommentToken, &BlueCommentEscapeToken, @@ -443,30 +2029,246 @@ LANGUAGE BlueLanguage = { &EqualToken, &BlueNotEqualToken, &LeftBraceToken, &RightBraceToken, &PlusToken, &MinusToken, &StarToken, &SlashToken, &HatToken, &LessEqualToken, &GreaterEqualToken + }, + { "and", "assert", "builtin", "case", "class", "const", "create", "creation", + "deferred", "div", "do", "else", "elseif", "end", "Enumeration", + "enumeration", "exit", "if", "in", "interface", "internal", "invariant", + "is", "loop", "manifest", "mod", "not", "of", "old", "on", "or", "post", + "pre", "redefined", "return", "routines", "super", "then", "uses", "var" + } +}; + +/*****************************************************************************/ +/* */ +/* Perl and Pod */ +/* */ +/*****************************************************************************/ + +LANGUAGE PerlLanguage = { + { "Perl", "perl", }, + "perl", "@Perl", + NO_MATCH_ERROR, + { + &PerlSingleQuoteStringToken, &PerlDoubleQuoteStringToken, + &PerlBackQuoteStringToken, &PerlQTypeStringToken, &PerlSTypeStringToken, + &PerlRegExpLPar, &PerlRegExpEq, &PerlRegExpMatch, &PerlRegExpNoMatch, + &PerlRegExpSplit, &PerlRegExpIf, &PerlRegExpNot, &PerlRegExpUnless, + &PerlRegExpFor, &PerlRegExpForEach, &PerlRegExpWhile, + &PerlRegExpStartLineToken, + &HereEOTuq, &HereEOTdq, &HereEOTfq, &HereEOTbq, + &HereEOFuq, &HereEOFdq, &HereEOFfq, &HereEOFbq, + &HereENDuq, &HereENDdq, &HereENDfq, &HereENDbq, + &HereBLAuq, &HereBLAdq, &HereBLAfq, &HereBLAbq, + &PerlIdentifierToken, &PerlSpecialIdentifierToken, + &PerlLiteralNumberToken, &PerlHexNumberToken, + &PerlCommentToken, &PerlCommentEscapeToken, &PerlPodToken, + &ExclamationToken, &PercentToken, &HatToken, &AmpersandToken, + &StarToken, &SlashToken, &ArrowToken, &BackSlashToken, + &LeftParenToken, &RightParenToken, &MinusToken, &PlusToken, + &LeftBraceToken, &RightBraceToken, &BarToken, &CircumToken, + &LeftBracketToken, &RightBracketToken, &SemicolonToken, &ColonToken, + &LessToken, &GreaterToken, &QuestionToken, &CommaToken, &DotToken, + &LessEqualToken, &GreaterEqualToken, &CNotEqualToken, + &PerlIncrementToken, &PerlDecrementToken, &PerlExponentiateToken, + &PerlMatchToken, &PerlNotMatchToken, + &PerlEqualToken, &PerlAssignToken, &PerlBitLeftShiftToken, + &PerlBitRightShiftToken, &PerlSpaceshipToken, + &PerlAndToken, &PerlOrToken, &PerlRange2Token, &PerlRange3Token, + &PerlFileTestrToken, &PerlFileTestwToken, &PerlFileTestxToken, + &PerlFileTestoToken, &PerlFileTestRToken, &PerlFileTestWToken, + &PerlFileTestXToken, &PerlFileTestOToken, &PerlFileTesteToken, + &PerlFileTestzToken, &PerlFileTestsToken, &PerlFileTestfToken, + &PerlFileTestdToken, &PerlFileTestlToken, &PerlFileTestpToken, + &PerlFileTestSToken, &PerlFileTestbToken, &PerlFileTestcToken, + &PerlFileTesttToken, &PerlFileTestuToken, &PerlFileTestgToken, + &PerlFileTestkToken, &PerlFileTestTToken, &PerlFileTestBToken, + &PerlFileTestMToken, &PerlFileTestAToken, &PerlFileTestCToken, }, - { "and", "assert", "builtin", "case", "class", "const", "create", - "creation", "deferred", "div", "do", "else", "elseif", "end", - "Enumeration", "enumeration", "exit", "if", "in", "interface", - "internal", "invariant", "is", "loop", "manifest", "mod", "not", - "of", "old", "on", "or", "post", "pre", "redefined", "return", - "routines", "super", "then", "uses", "var" + { + /* Built-ins taken from Programming Perl 2nd Ed. */ + "abs", "accept", "alarm", "atan2", + "bind", "binmode", "bless", + "caller", "chdir", "chmod", "chomp", "chop", "chown", "chr", "chroot", + "close", "closedir", "connect", "continue", "cos", "crypt", + "dbmclose", "dbmopen", "defined", "delete", "die", "do", "dump", + "each", "endgrent", "endhostent", "endnetent", "endprotoent", + "endpwent", "endservent", "eof", "eval", "exec", "exists", "exit", + "exp", + "fcntl", "fileno", "flock", "fork", "format", "formline", + "getc", "getgrent", "getgrgid", "getgrnam", "gethostbyaddr", + "gethostbyname", "gethostent", "getlogin", "getnetbyaddr", + "getnetbyname", "getnetent", "getpeername", "getpgrp", "getppid", + "getpriority", "getprotobyname", "getprotobynumber", "getprotoent", + "getpwent", "getpwnam", "getpwuid", "getservbyname", "getservbyport", + "getservent", "getsockname", "getsockopt", "glob", "gmtime", "goto", + "grep", + "hex", + "import", "index", "int", "ioctl", + "join", + "keys", "kill", + "last", "lc", "lcfirst", "length", "link", "listen", "local", + "localtime", "log", "lstat", + "map", "mkdir", "msgctl", "msgget", "msgrcv", "msgsnd", "my", + "next", "no", + "oct", "open", "opendir", "ord", + "pack", "package", "pipe", "pop", "pos", "print", "printf", "push", + "quotemeta", + "rand", "read", "readdir", "readlink", "recv", "redo", "ref", + "rename", "require", "reset", "return", "reverse", "rewinddir", + "rindex", "rmdir", + "scalar", "seek", "seekdir", "select", "semctl", "semget", "semop", + "send", "setgrent", "sethostent", "setnetent", "setpgrp", + "setpriority", "setprotoent", "setpwent", "setservent", + "setsockopt", "shift", "shmctl", "shmget", "shmread", "shmwrite", + "shutdown", "sin", "sleep", "socket", "socketpair", "sort", + "splice", "split", "sprintf", "sqrt", "srand", "stat", "study", + "sub", "substr", "symlink", "syscall", "sysopen", "sysread", "sysseek", + "system", "syswrite", + "tell", "telldir", "tie", "tied", "time", "times", "truncate", + "uc", "ucfirst", "umask", "undef", "unlink", "unpack", "unshift", + "untie", "use", "utime", + "values", "vec", + "wait", "waitpid", "wantarray", "warn", "write", + + /* from later versions of Perl (excluding experimental thread stuff) */ + "our", "prototype", "readline", "readpipe", + + /* operators */ + "lt", "gt", "eq", "ne", "cmp", "le", "ge", + + /* Perl special variables */ + /* NB special variables that begin $^, e.g. $^D can also be written as */ + /* dollar control D (yes literal control D) -- but suggest we ignore */ + "$_", "$ARG", + "$.", "$INPUT_LINE_SEPARATOR", "$NR", + "$/", "$INPUT_RECORD_SEPARATOR", "$RS", + "$,", "$OUTPUT_FIELD_SEPARATOR", "$OFS", + "$\\", "$OUTPUT_RECORD_SEPARATOR", "$ORS", + "$\"", "$LIST_SEPARATOR", + "$;", "$SUBSCRIPT_SEPARATOR", "$SUBSEP", + "$^L", "$FORMAT_FORMFEED", + "$:", "$FORMAT_LINE_BREAK_CHARACTERS", + "$^A", "$ACCUMULATOR", + "$#", "$OFMT", + "$?", "$CHILD_ERROR", + "$!", "$OS_ERROR", "$ERRNO", + "$@", "$EVAL_ERROR", + "$$", "$PROCESS_ID", "$PID", + "$<", "$REAL_USER_ID", "$UID", + "$>", "$EFFECTIVE_USER_ID", "$EUID", + "$(", "$REAL_GROUP_ID", "$GID", + "$)", "$EFFECTIVE_GROUP_ID", "$EGID", + "$0", /* dollar zero */ "$PROGRAM_NAME", + "$[", /* deprecated */ + "$]", "$PERL_VERSION", + "$^D", "$DEBUGGING", + "$^E", "$EXTENDED_OS_ERROR", + "$^F", "$SYSTEM_FD_MAX", + "$^H", + "$^I", "$INPLACE_EDIT", + "$^M", + "$^O", "$OSNAME", + "$^P", "$PERLDB", + "$^T", "$BASETIME", + "$^W", "$WARNING", + "$^X", "$EXECUTABLE_NAME", + "$ARGV", + "@ARGV", "@INC", "@F", "%INC", "%ENV", "%SIG", + "ARGV", "STDERR", "STDIN", "STDOUT", "DATA" + "$&", "$MATCH", + "$`", "$PREMATCH", + "$'", "$POSTMATCH", + "$+", "$LAST_PAREN_MATCH", + "$|", "$OUTPUT_AUTOFLUSH", + "$%", "$FORMAT_PAGE_NUMBER", + "$=", "$FORMAT_LINES_PER_PAGE", + "$-", "$FORMAT_LINES_LEFT", + "$~", "$FORMAT_NAME", + "$^", "$FORMAT_TOP_NAME", + + /* Perl special constants */ + "__END__", "__FILE__", "__LINE__", "__PACKAGE__", + + /* Perl regex variables - first 9 only */ + "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + + /* Perl pragmas */ + "attrs", "autouse", "base", "blib", "constant", "diagnostics", "fields", + "integer", "less", "lib", "locale", "ops", "overload", "re", "sigtrap", + "strict", "subs", "vars", + + /* Perl textual operators */ + "and", "or", "xor", "not", + + /* the x keyword */ + "x", + + /* Perl control structures */ + "if", "elsif", /* yes one e */ "else", "unless", + "while", "for", "foreach", "continue", "until", } }; +LANGUAGE PodLanguage = { + { "Pod", "pod", "POD" }, + "pod", "@Pod", + NO_MATCH_PRINT, + { + &PodVerbatimLineToken, &PodEmptyLineToken, &PodIgnoreToken, + &PodHeading1Token, &PodHeading2Token, + &PodOverToken, &PodItemToken, &PodBackToken, + &PodForToken, &PodBeginToken, &PodBeginLoutToken, + &PodItalicToken, &PodBoldToken, &PodCodeToken, &PodFileToken, + &PodNoBreakToken, &PodLinkToken, &PodIndexToken, &PodZeroToken, + &PodLessThanToken, &PodGreaterThanToken, &PodSlashToken, + &PodVerbarToken, + &PE00, /* &PE01, &PE02, */ &PE03, &PE04, &PE05, &PE06, &PE07, &PE08, &PE09, + &PE10, &PE11, &PE12, &PE13, &PE14, &PE15, &PE16, &PE17, &PE18, &PE19, + &PE20, &PE21, &PE22, &PE23, &PE24, &PE25, &PE26, &PE27, &PE28, &PE29, + &PE30, &PE31, &PE32, &PE33, &PE34, &PE35, &PE36, &PE37, &PE38, &PE39, + &PE40, &PE41, &PE42, &PE43, &PE44, &PE45, &PE46, &PE47, &PE48, &PE49, + &PE50, &PE51, &PE52, &PE53, &PE54, &PE55, &PE56, &PE57, &PE58, &PE59, + &PE60, &PE61, &PE62, &PE63, &PE64, &PE65, &PE66, &PE67, &PE68, &PE69, + &PE70, &PE71, &PE72, &PE73, &PE74, &PE75, &PE76, &PE77, &PE78, &PE79, + &PE80, &PE81, &PE82, &PE83, &PE84, &PE85, &PE86, &PE87, &PE88, &PE89, + &PE90, &PE91, &PE92, &PE93, &PE94, &PE95, &PE96, &PE97, &PE98, &PE99, + &PodNumCharToken, + }, + + { NULL }, +}; + + +/*****************************************************************************/ +/* */ +/* The "languages" variable - add your language to this list */ +/* */ +/*****************************************************************************/ + LANGUAGE *languages[] = { & CLanguage, & EiffelLanguage, & BlueLanguage, + & PerlLanguage, + & PodLanguage, }; + + + + +/*****************************************************************************/ +/*****************************************************************************/ +/*****************************************************************************/ +/*** ***/ +/*** If you are adding a new language, you don't need to change anything ***/ +/*** below this point. Just repeating: don't change anything below here. ***/ +/*** ***/ +/*****************************************************************************/ /*****************************************************************************/ -/* */ -/* If you are adding a new language, you don't need to change anything */ -/* below this point. Just repeating: don't change anything below here. */ -/* */ /*****************************************************************************/ @@ -483,18 +2285,9 @@ LANGUAGE *languages[] = { #define DEBUG_EMIT 0 #define DEBUG_MAIN 0 -#define PRG2LOUT_VERSION "prg2lout Version 1.0 (February 2000)" -#define BOOLEAN unsigned -#define FALSE 0 -#define TRUE 1 +#define PRG2LOUT_VERSION "prg2lout Version 2.0 (April 2000)" #define MAX_LINE 1024 -/* print styles */ -#define NO_STYLE 0 -#define FIXED_STYLE 1 -#define VARYING_STYLE 2 -#define SYMBOL_STYLE 3 - static char file_name[MAX_LINE]; /* current input file name */ static char curr_line[MAX_LINE]; /* current input line */ static int line_num; /* current input line number */ @@ -502,12 +2295,13 @@ static int line_pos; /* current input column number */ static BOOLEAN raw_seen; /* TRUE if -r (raw mode) */ static BOOLEAN headers_option; /* TRUE if no -n option (headers) */ -static int style_option; /* value of -p option, or NO_STYLE */ +static char *style_option; /* value of -p option, else null */ static char *font_option; /* value of -f option, else null */ static char *size_option; /* value of -s option, else null */ static char *line_option; /* value of -v option, else null */ static char *tabin_option; /* value of -t option, else null */ static char *tabout_option; /* value of -T option, else null */ +static char *setup_option; /* value of -S option, else null */ static char *language_option; /* value of -l option, else null */ static BOOLEAN tab_by_spacing; /* TRUE if using space chars to tab */ @@ -515,9 +2309,22 @@ static int tab_in; /* tab interval, value of -t option */ static float tab_out; /* tab interval width (-T option) */ static char tab_unit; /* unit of measurement for tab */ +static FILE *in_fp; /* where input comes from */ +static FILE *out_fp; /* where output goes */ +static FILE *err_fp; /* where error messages go */ + + +/*****************************************************************************/ +/* */ +/* char *ErrorHeader() */ +/* */ +/* Returns a string showing the current file, line, and column. */ +/* */ +/*****************************************************************************/ + char *ErrorHeader() { static char buff[MAX_LINE]; - if( line_num == 0 ) + if( line_num == 0 || line_pos == 0 ) sprintf(buff, "prg2lout"); else if( raw_seen ) sprintf(buff, "prg2lout %d,%d", line_num, line_pos); @@ -525,6 +2332,16 @@ char *ErrorHeader() sprintf(buff, "prg2lout %s %d,%d", file_name, line_num, line_pos); return buff; } + + +/*****************************************************************************/ +/* */ +/* GetArg(arg, message, null_ok) */ +/* */ +/* Get the next command line argument's value into arg. If there isn't */ +/* one, print an error message and quit unless null_ok is true. */ +/* */ +/*****************************************************************************/ #define GetArg(arg, message, null_ok) \ { if( strcmp(argv[arg_pos]+2, "") != 0 ) \ @@ -556,9 +2373,84 @@ char *EchoToken(TOKEN *t) sprintf(buff, "%s", t->name); return buff; } + +/*****************************************************************************/ +/* */ +/* NextChar() */ +/* */ +/* Move to next character in the input file. This may involve changing */ +/* global variables curr_line, line_num, and line_pos; the new character */ +/* may be found in curr_line[line_pos]. */ +/* */ +/* NextChar does not skip any characters at all. When end of file is */ +/* reached, curr_line[line_pos] contains '\0'. */ +/* */ +/* It is possible for code to read ahead of curr_line[line_pos] up to and */ +/* including the newline character at the end of the line after the line */ +/* we are currently on (thus it is possible to recognize an empty line as */ +/* \n\n), but not beyond, using curr_line[line_pos + i] for i > 0. */ +/* */ +/*****************************************************************************/ +void NextChar() +{ + if( curr_line[line_pos] != '\n' ) + { + /* we can carry on with the current line. This will yield '\0' as */ + /* desired if EOF arrives before the end of the line */ + line_pos++; + } + else if( curr_line[line_pos+1] != '\0' ) + { + /* we've already read in the next line; it's at &curr_line[line_pos+1] */ + strcpy(&curr_line[1], &curr_line[line_pos+1]); + line_num++; + line_pos = 1; + } + else + { + /* we need to read in the new line */ + line_num++; + line_pos = 1; + if( fgets(&curr_line[1], MAX_LINE+2, in_fp) == (char *) NULL ) + curr_line[1] = '\0'; + } + if( DEBUG_NEXTCHAR ) + fprintf(stderr, "after NextChar, line_num %d, line_pos %d, curr_line %s", + line_num, line_pos, &curr_line[1]); +} /* end NextChar */ + +/*****************************************************************************/ +/* */ +/* BOOLEAN InputMatches(char *pattern) */ +/* */ +/* Returns TRUE if input starting at curr_line[line_pos] matches pattern. */ +/* To check this we may have to read an extra line or more of input. */ +/* */ +/*****************************************************************************/ + +BOOLEAN InputMatches(char *pattern) +{ char *p, *q; + for(p = &curr_line[line_pos], q = pattern; *q != '\0'; p++, q++ ) + { + if( *p == '\0' ) + { + /* attempt to read another line of input, since we are off the end */ + if( fgets(p, MAX_LINE+2-(p - curr_line), in_fp) == (char *) NULL ) + *p = '\0'; + } + if( *p != *q ) + break; + } + if( DEBUG_PREFIXEQ ) + fprintf(stderr, "InputMatches(%s, %s) returning %s\n", + &curr_line[line_pos], pattern, *q == '\0' ? "TRUE" : "FALSE"); + return (*q == '\0'); +} /* end InputMatches */ + + /*****************************************************************************/ /* */ /* TRIE */ @@ -573,6 +2465,7 @@ typedef struct trie_node { struct trie_node *sub[MAX_CHAR]; TOKEN *value[MAX_CHAR]; } *TRIE; + /*****************************************************************************/ /* */ @@ -591,16 +2484,14 @@ BOOLEAN TrieInsert(TRIE *T, char *str, TOKEN *val) if( *str == '\0' ) res = FALSE; else - { - if( *T == (TRIE) NULL ) + { if( *T == (TRIE) NULL ) *T = (TRIE) calloc(1, sizeof(struct trie_node)); /* will set all to 0 */ if( *(str + 1) != '\0' ) res = TrieInsert(&((*T)->sub[(int) *str]), str + 1, val); else if( (*T)->value[(int) *str] != (TOKEN *) NULL ) res = FALSE; else - { - (*T)->value[(int) *str] = val; + { (*T)->value[(int) *str] = val; res = TRUE; } } @@ -627,12 +2518,10 @@ TOKEN *TrieRetrieve(TRIE T, char *str, int *len) res = (TOKEN *) NULL; *len = 0; for( i = 0; T != (TRIE) NULL; T = T->sub[(int) str[i]], i++ ) - { - if( DEBUG_TRIE ) + { if( DEBUG_TRIE ) fprintf(stderr, " i = %d, res = %s\n", i, EchoToken(res)); if( T->value[(int) str[i]] != (TOKEN *) NULL ) - { - res = T->value[(int) str[i]]; + { res = T->value[(int) str[i]]; *len = i+1; } } @@ -641,8 +2530,8 @@ TOKEN *TrieRetrieve(TRIE T, char *str, int *len) *len, EchoToken(res)); return res; } - + /*****************************************************************************/ /* */ /* HASH_TABLE */ @@ -653,7 +2542,7 @@ TOKEN *TrieRetrieve(TRIE T, char *str, int *len) /* NB MAX_SYM must be somewhat larger than the number of keywords. */ /* */ /*****************************************************************************/ -#define MAX_SYM 309 +#define MAX_SYM 609 static char *HashTable[MAX_SYM]; /* will initialze to NULL */ static int HashTableCount = 0; /* number of entries */ @@ -667,10 +2556,10 @@ static int hash(char *key) return res % MAX_SYM; } /* end hash */ -void HashTableInsert(char *str, FILE *err_fp) +void HashInsert(char *str) { int i; if( DEBUG_SETUP ) - fprintf(stderr, "[ HashTableInsert(%s)\n", str); + fprintf(stderr, "[ HashInsert(%s)\n", str); if( HashTableCount >= MAX_SYM - 20 ) { fprintf(err_fp, "%s internal error: full hash table (increase MAX_SYM)\n", @@ -681,40 +2570,54 @@ void HashTableInsert(char *str, FILE *err_fp) HashTable[i] = str; HashTableCount++; if( DEBUG_SETUP ) - fprintf(stderr, "] HashTableInsert(%s)\n", str); + fprintf(stderr, "] HashInsert(%s)\n", str); } -BOOLEAN HashTableRetrieve(char *str) +BOOLEAN HashRetrieve(char *str) { int i; for( i = hash(str); HashTable[i] != (char *) NULL; i = (i+1) % MAX_SYM ) if( strcmp(HashTable[i], str) == 0 ) return TRUE; return FALSE; } - + /*****************************************************************************/ /* */ /* BACK END */ /* */ -/* This is the code that actually prints the output file. It accumulates */ -/* each token and only prints it at the end of the token, so that it can */ -/* check whether an alternative command ought to be printed (keywords). */ +/* This is the code that actually prints the output file. */ +/* To emit one token, the call sequence should be as follows: */ +/* */ +/* StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, l) */ +/* Emit(TOKEN *current_token, char ch) */ +/* ... */ +/* Emit(TOKEN *current_token, char ch) */ +/* EndEmit(TOKEN *current_token, char *end_delim) */ +/* */ +/* The back end will then take care of all print styles automatically, */ +/* including checking for keywords. When emitting white space each space */ +/* can be sent directly: */ +/* */ +/* EmitRaw(ch) */ /* */ /*****************************************************************************/ -static char save_token[MAX_LINE]; /* the token text */ -static int save_len; /* index of \0 in save_token */ +static char save_value[MAX_LINE]; /* the token text */ +static int save_len; /* index of \0 in save_value */ static BOOLEAN save_on = FALSE; /* TRUE when saving */ +static LANGUAGE *save_language; /* the current language */ + /*****************************************************************************/ /* */ -/* EmitRaw(ch, out_fp, FILE *err_fp) */ +/* EmitRaw(ch) */ /* */ /* Emit this character immediately. This is only legal when not saving. */ -/* All characters printed on the output file should pass through here, */ -/* since EmitRaw keeps track of where we are on the output line, in order */ -/* to handle tab characters correctly. */ +/* All characters printed on the output file that represent actual text */ +/* of the program (i.e. not commands, {}, "", \ in strings etc.) should */ +/* pass through here, since EmitRaw keeps track of where we are on */ +/* the output line, in order to handle tab characters correctly. */ /* */ /* NB out_linepos is the column where the *next* character will go, and */ /* it counts the first column on the line as column zero. It understands */ @@ -723,277 +2626,546 @@ static BOOLEAN save_on = FALSE; /* TRUE when saving */ /* */ /*****************************************************************************/ -void EmitRaw(char ch, FILE *out_fp, FILE *err_fp) -{ - static int out_linepos = 0; /* output line position */ +void EmitRaw(char ch) +{ static int out_linepos = 0; /* output line position */ static BOOLEAN out_linestart = TRUE; /* TRUE if out line start */ if( DEBUG_EMIT ) fprintf(stderr, "EmitRaw(%c); out_linepos %d, out_linestart %s\n", ch, out_linepos, out_linestart ? "TRUE" : "FALSE"); if( save_on ) - { - fprintf(err_fp, "%s internal error (EmitRaw save_on)\n", ErrorHeader()); + { fprintf(err_fp, "%s internal error (EmitRaw save_on)\n", ErrorHeader()); abort(); } if( ch == '\t' ) - { - if( tab_by_spacing ) + { if( tab_by_spacing ) { putc(' ', out_fp); out_linepos++; while( out_linepos % tab_in != 0 ) { putc(' ', out_fp); - out_linepos++; + out_linepos++; } } else - { - out_linepos++; - while( out_linepos % tab_in != 0 ) - { out_linepos++; - } + { out_linepos++; + while( out_linepos % tab_in != 0 ) out_linepos++; if( out_linestart ) - { fprintf(out_fp, "$>%.1f%c {}", tab_out, tab_unit); + { fprintf(out_fp, "$>\"%.1f%c\" {}", tab_out, tab_unit); /* NB {} is required in case nothing follows on this line */ } else - { fprintf(out_fp, "$>%.1f%ct {}", (out_linepos/tab_in)*tab_out, tab_unit); - } + fprintf(out_fp, "$>\"%.1f%ct\" {}", (out_linepos/tab_in)*tab_out, tab_unit); } } else if( ch == '\n' ) - { - fputc(ch, out_fp); + { fputc(ch, out_fp); out_linepos = 0; out_linestart = TRUE; } else - { - fputc(ch, out_fp); + { fputc(ch, out_fp); out_linepos++; - if( ch != ' ' ) - out_linestart = FALSE; + if( ch != ' ' ) out_linestart = FALSE; } if( DEBUG_EMIT ) fprintf(stderr, "EmitRaw(%c) returning; out_linepos %d, out_linestart %s\n", ch, out_linepos, out_linestart ? "TRUE" : "FALSE"); } /* end EmitRaw */ - + /*****************************************************************************/ /* */ -/* StartEmit(FILE *err_fp) */ +/* StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, len) */ +/* */ +/* Start the emission of a token. If it is a PRINT_WHOLE_QUOTED, it has */ +/* to be saved since it might be a keyword. */ /* */ -/* Start the emission of a token. */ +/* The token began with the starting delimiter start_delim[0..len-1]. */ /* */ /*****************************************************************************/ -void StartEmit(FILE *err_fp) -{ +void StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, int len) +{ int i; if( save_on ) { fprintf(err_fp, "%s internal error (StartEmit)\n", ErrorHeader()); abort(); } - save_on = TRUE; - save_len = 0; -} + save_language = lang; + switch( current_token->print_style ) + { + case PRINT_WHOLE_QUOTED: + + /* start_delim is to be printed, but must be saved up */ + save_on = TRUE; + for( i = 0; i < len; i++ ) + save_value[i] = start_delim[i]; + save_len = len; + save_value[save_len] = '\0'; + break; + + + case PRINT_NODELIMS_QUOTED: + + /* easiest to save this, treat like PRINT_WHOLE_QUOTED, but no delims */ + save_on = TRUE; + save_len = 0; + save_value[save_len] = '\0'; + break; + + case PRINT_WHOLE_UNQUOTED: + + /* print command */ + if( current_token->command[0] != '\0' ) + fprintf(out_fp, "%s{", current_token->command); /*}*/ + + /* print opening delimiter, verbatim */ + for( i = 0; i < len; i++ ) + putc(start_delim[i], out_fp); + break; + + + case PRINT_NODELIMS_UNQUOTED: + + /* command is printed but not delimiter */ + if( current_token->command[0] != '\0' ) + fprintf(out_fp, "%s{", current_token->command); /*}*/ + break; + + + case PRINT_NODELIMS_INNER: + + /* command is printed but not delimiter; always print opening brace */ + fprintf(out_fp, "%s{", current_token->command); /*}*/ + break; + + + case PRINT_COMMAND_ONLY: + + /* command is printed but nothing else */ + fprintf(out_fp, "%s", current_token->command); + break; + + + default: + + fprintf(err_fp, "%s internal error (print_style)\n", ErrorHeader()); + abort(); + break; + } +} /* end StartEmit */ + /*****************************************************************************/ /* */ -/* EndEmit(LANGUAGE *lang, TOKEN *current_token, FILE *out_fp, *err_fp) */ +/* EndEmit(TOKEN *current_token, char *end_delim) */ /* */ -/* End emitting a token of this type on file out_fp. */ +/* End emitting the current token. Its ending delimiter was end_delim. */ /* */ /*****************************************************************************/ +#define is_whitespace(ch) ((ch)==' ' || (ch)=='\t' || (ch)=='\n' || (ch)=='\f') -void EndEmit(LANGUAGE *lang, TOKEN *current_token, FILE *out_fp, FILE *err_fp) -{ BOOLEAN altern; int i; - if( !save_on ) +void EndEmit(TOKEN *current_token, char *end_delim) +{ char *com; int i; BOOLEAN quoted_now = FALSE; + switch( current_token->print_style ) { - fprintf(err_fp, "%s internal error (EndEmit)\n", ErrorHeader()); - abort(); - } - save_on = FALSE; - if( save_len == 0 ) - { - /* nothing in token, must be resumed after a newline or tab */ - /* so emit nothing in this case */ - } - else if( current_token->command == (char *) NULL ) - { - /* no command, which means to emit this token without its delimiters */ - for( i = 0; i < save_len; i++ ) - EmitRaw(save_token[i], out_fp, err_fp); - } - else - { - /* if there is an alternate command in this language, check whether the */ - /* completed token is a keyword, and if so apply the alternate command. */ - altern = (lang != (LANGUAGE *) NULL - && current_token->alternate_command[0] != '\0' - && HashTableRetrieve(save_token)); - - putc('{', out_fp); - fputs(altern ? current_token->alternate_command : current_token->command, - out_fp); - putc('"', out_fp); - for( i = 0; i < save_len; i++ ) - { - if( save_token[i] == '"' || save_token[i] == '\\' ) - putc('\\', out_fp); - EmitRaw(save_token[i], out_fp, err_fp); - } - putc('"', out_fp); - putc('}', out_fp); + case PRINT_WHOLE_QUOTED: + + strcpy(&save_value[save_len], end_delim); + save_len += strlen(end_delim); + /* NB NO BREAK */ + + + case PRINT_NODELIMS_QUOTED: + + /* work out whether we are printing the command or its alternative */ + com = (current_token->alternate_command[0]!='\0' && HashRetrieve(save_value)? + current_token->alternate_command : current_token->command); + + /* print command, opening brace */ + if( com[0] != '\0' ) fprintf(out_fp, "%s{", com); /* } */ + + /* omit trailing white space (will not be significant to Lout anyway) */ + /* *** not doing this now + while( save_len>0 && is_whitespace(save_value[save_len-1]) ) save_len--; + *** */ + + /* print the token with appropriate escapes */ + save_on = FALSE; + for( i = 0; i < save_len; i++ ) switch( save_value[i] ) + { + case '/': + case '|': + case '&': + case '#': + case '{': + case '}': + case '^': + case '~': + case '-': + + if( !quoted_now ) { putc('"', out_fp); quoted_now = TRUE; } + EmitRaw(save_value[i]); + break; + + + case '"': + case '\\': + + if( !quoted_now ) { putc('"', out_fp); quoted_now = TRUE; } + putc('\\', out_fp); + EmitRaw(save_value[i]); + break; + + + case ' ': + case '\t': + case '\n': + case '\f': + + if( i == 0 ) + { /* make initial white space significant using "" */ + putc('"', out_fp); + quoted_now = TRUE; + } + if( quoted_now ) { putc('"', out_fp); quoted_now = FALSE; } + EmitRaw(save_value[i]); + break; + + + default: + + /* anything else can be quoted or unquoted ad. lib. */ + EmitRaw(save_value[i]); + break; + } + /* print closing quote and closing brace if needed */ + if( quoted_now ) putc('"', out_fp); + else if( save_len > 0 && is_whitespace(save_value[save_len-1]) ) + fputs("\"\"", out_fp); /* makes trailing white space significant */ + if( com[0] != '\0' ) /*{*/ putc('}', out_fp); + break; + + + case PRINT_WHOLE_UNQUOTED: + + /* print end delimiter, verbatim */ + fputs(end_delim, out_fp); + /* NB NO BREAK */ + + + case PRINT_NODELIMS_UNQUOTED: + + /* print closing brace if required*/ + if( current_token->command[0] != '\0' ) /*{*/ putc('}', out_fp); + break; + + + case PRINT_NODELIMS_INNER: + + /* always print closing brace */ + /*{*/ putc('}', out_fp); + break; + + + case PRINT_COMMAND_ONLY: + + break; + + + default: + + fprintf(err_fp, "%s internal error (print_style)\n", ErrorHeader()); + abort(); + break; } -} /* end EndEmit */ + /* print following command if any */ + if( current_token->following_command != NULL ) + fprintf(out_fp, "%s", current_token->following_command); +} /* end EndEmit */ + /*****************************************************************************/ /* */ -/* Emit(char ch, TOKEN *current_token, FILE *out_fp, FILE *err_fp) */ +/* Emit(TOKEN *current_token, char ch) */ /* */ -/* Emit one character. */ +/* Emit one character of the current token. */ /* */ /*****************************************************************************/ -void Emit(char ch, TOKEN *current_token, FILE *out_fp, FILE *err_fp) +void Emit(TOKEN *current_token, char ch) { - if( !save_on ) + switch( current_token->print_style ) { - fprintf(err_fp, "%s internal error (EmitChar)\n", ErrorHeader()); - abort(); + case PRINT_WHOLE_QUOTED: + case PRINT_NODELIMS_QUOTED: + + if( !save_on ) + { fprintf(err_fp, "%s internal error (EmitChar)\n", ErrorHeader()); + abort(); + } + save_value[save_len++] = ch; + save_value[save_len] = '\0'; + break; + + + case PRINT_WHOLE_UNQUOTED: + case PRINT_NODELIMS_UNQUOTED: + + /* verbatim output */ + putc(ch, out_fp); + break; + + + case PRINT_NODELIMS_INNER: + + fprintf(err_fp, "%s internal error (emitting INNER)\n", ErrorHeader()); + abort(); + break; + + + case PRINT_COMMAND_ONLY: + + /* emit nothing since printing the command only */ + break; + + + default: + + fprintf(err_fp, "%s internal error (print_style)\n", ErrorHeader()); + abort(); + break; + } - if( ch == '\n' || ch == '\t' ) +} /* end Emit */ + + +/*****************************************************************************/ +/* */ +/* EmitProtected(char ch) */ +/* */ +/* Emit one character of the current token. If the character is a special */ +/* one in Lout, protect it with quotes. */ +/* */ +/*****************************************************************************/ + +void EmitProtected(char ch) +{ + switch( ch ) { - EndEmit((LANGUAGE *) NULL, current_token, out_fp, err_fp); - EmitRaw(ch, out_fp, err_fp); - StartEmit(err_fp); + case '/': + case '|': + case '&': + case '#': + case '{': + case '}': + case '^': + case '~': + case '-': + + putc('"', out_fp); + EmitRaw(ch); + putc('"', out_fp); + break; + + + case '"': + case '\\': + + putc('"', out_fp); + putc('\\', out_fp); + EmitRaw(ch); + putc('"', out_fp); + break; + + + default: + + EmitRaw(ch); + break; } - else +} /* end EmitProtected */ + + +/*****************************************************************************/ +/* */ +/* TOKEN *ExpandToken(TOKEN *t, int starts_pos) */ +/* */ +/* Create a new token corresponding to t but using starts2[starts_pos] and */ +/* ends2[starts_pos] only. */ +/* */ +/*****************************************************************************/ + +char *clone2strings(char *s1, char *s2) +{ char *res; + res = (char *) malloc( (strlen(s1) + strlen(s2) + 1) * sizeof(char)); + sprintf(res, "%s%s", s1, s2); + if( DEBUG_SETUP ) + fprintf(stderr, "clone2strings(%s, %s) = %s\n", s1, s2, res); + return res; +} /* end clone2strings */ + +TOKEN *ExpandToken(TOKEN *t, int starts_pos) +{ TOKEN *res; int i; + if( DEBUG_SETUP ) + fprintf(stderr, "ExpandToken(%s, starts[0] = %s)\n", t->name, t->starts[0]); + res = (TOKEN *) calloc(1, sizeof(struct token_rec)); + res->name = t->name; + res->print_style = t->print_style; + res->command = t->command; + res->alternate_command = t->alternate_command; + res->following_command = t->following_command; + res->start_line_only = t->start_line_only; + for( i = 0; t->starts[i] != NULL; i++ ) { - save_token[save_len++] = ch; - save_token[save_len] = '\0'; + /* the starts of res are the start of t with starts2[starts_pos] added */ + res->starts[i] = clone2strings(t->starts[i], t->starts2[starts_pos]); } -} /* end Emit */ - + res->legal = t->legal; + res->escape = t->escape; + res->escape_legal = t->escape_legal; + res->inner_escape = t->inner_escape; + res->end_inner_escape = t->end_inner_escape; + res->end_delimiter = t->ends2[starts_pos]; + res->end_start_line_only = t->end_start_line_only; + res->want_two_ends = t->want_two_ends; + if( DEBUG_SETUP ) + fprintf(stderr, "ExpandToken returning res = %s, starts[0] = %s)\n", + res->name, res->starts[0]); + return res; +} /* end ExpandToken */ + /*****************************************************************************/ /* */ -/* SetupTokens(LANGUAGE *lang) */ +/* void SetupOneToken(TOKEN *t) */ /* */ -/* Set up the runtime token structures. This involves initializing the */ -/* chtype and escape_chtype fields for each token type in the chosen */ -/* language, and loading the trie with all the opening delimiters of all */ -/* the tokens. */ +/* Set up one token. This involves initializing the chtype and */ +/* escape_chtype fields for the token, and loading the trie with all */ +/* the opening delimiters of the token. */ /* */ /*****************************************************************************/ #define LEGAL 1 #define ESCAPE 2 #define INNER_ESCAPE 3 +TRIE Trie = (TRIE) NULL; /* these tokens allowed anywhere */ +TRIE StartLineTrie = (TRIE) NULL; /* these allowed at line start only */ -TRIE Trie = (TRIE) NULL; - -void SetupTokens(LANGUAGE *lang, FILE *err_fp) -{ TOKEN *t; int i, j; +void SetupOneToken(TOKEN *t) +{ int j; if( DEBUG_SETUP ) - fprintf(stderr, "SetupTokens(%s)\n", lang->names[0]); + fprintf(stderr, "SetupOneToken starting %s\n", t->starts[0]); - /* set up each token in the language */ - for( i = 0; lang->tokens[i] != (TOKEN *) NULL; i++ ) + /* check that any PRINT_NODELIMS_INNER styles have an end delimiter */ + if( t->print_style == PRINT_NODELIMS_INNER ) { - t = lang->tokens[i]; - if( DEBUG_SETUP ) - fprintf(stderr, "SetupTokens token starting %s\n", t->starts[0]); + if( t->end_delimiter == NULL || t->end_delimiter[0] == '\0' ) + { + fprintf(err_fp, "%s: token %s is INNER but has no end delimiter\n", + t->name, ErrorHeader()); + } + } - /* set up the chtype table for this token */ + /* set up the chtype table for this token */ + if( t->legal == NULL ) + { /* all characters are legal */ + for( j = 0; j < MAX_CHAR; j++ ) + t->chtype[j] = LEGAL; + } + else + { /* the characters in t->legal are legal */ for( j = 0; t->legal[j] != '\0'; j++ ) t->chtype[(int) t->legal[j]] = LEGAL; - if( t->escape[0] != '\0' ) - t->chtype[(int) t->escape[0]] = ESCAPE; - if( t->inner_escape[0] != '\0' ) - t->chtype[(int) t->inner_escape[0]] = INNER_ESCAPE; - - /* set up the escape_chtype table for this token */ + } + if( t->escape[0] != '\0' ) + t->chtype[(int) t->escape[0]] = ESCAPE; + if( t->inner_escape[0] != '\0' ) + t->chtype[(int) t->inner_escape[0]] = INNER_ESCAPE; + + /* set up the escape_chtype table for this token */ + if( t->escape_legal == NULL ) + { /* all characters are legal after an escape character */ + for( j = 0; j < MAX_CHAR; j++ ) + t->escape_chtype[j] = LEGAL; + } + else + { /* the characters in t->escape_legal are legal after an escape character */ for( j = 0; t->escape_legal[j] != '\0'; j++ ) t->escape_chtype[(int) t->escape_legal[j]] = LEGAL; + } - /* load the opening delimiters of this token into the trie */ - for( j = 0; t->starts[j] != (char *) NULL; j++ ) - { - if( !TrieInsert(&Trie, t->starts[j], t) ) - { - if( *(t->starts[j]) == '\0' ) - fprintf(err_fp, "%s: empty starting delimiter\n", ErrorHeader()); - else - fprintf(err_fp, "%s: starting delimiter %s appears twice\n", - ErrorHeader(), t->starts[j]); - } + /* load the opening delimiters of this token into the trie */ + for( j = 0; t->starts[j] != (char *) NULL; j++ ) + { + if( !TrieInsert(t->start_line_only ? &StartLineTrie:&Trie,t->starts[j],t) ) + { if( *(t->starts[j]) == '\0' ) + fprintf(err_fp, "%s: empty starting delimiter\n", ErrorHeader()); + else + fprintf(err_fp, "%s: starting delimiter %s appears twice\n", + ErrorHeader(), t->starts[j]); } - - if( DEBUG_SETUP ) - fprintf(stderr, "SetupTokens token ending %s\n", t->starts[0]); } - /* load the keyword hash table */ - for( j = 0; lang->keywords[j] != (char *) NULL; j++ ) - HashTableInsert(lang->keywords[j], err_fp); - if( DEBUG_SETUP ) - fprintf(stderr, "SetupTokens(%s) returning.\n", lang->names[0]); -} /* end SetupTokens */ - + fprintf(stderr, "SetupOneToken ending %s\n", t->starts[0]); +} /* end SetupOneToken */ -/*****************************************************************************/ -/* */ -/* BOOLEAN PrefixEq(char *str, char *prefix) */ -/* */ -/* Returns TRUE if str (unterminated) begins with prefix (terminated). */ -/* */ -/*****************************************************************************/ - -BOOLEAN PrefixEq(char *str, char *prefix) -{ char *p, *q; - for( p = str, q = prefix; *q != '\0' && *p == *q; p++, q++ ); - if( DEBUG_PREFIXEQ ) - fprintf(stderr, "PrefixEq(%s, %s) returning %s\n", - str, prefix, *q == '\0' ? "TRUE" : "FALSE"); - return (*q == '\0'); -} /* end PrefixEq */ - /*****************************************************************************/ /* */ -/* NextChar(FILE *in_fp) */ +/* SetupLanguage(LANGUAGE *lang) */ /* */ -/* Move to next character in the input file. This may involve changing */ -/* global variables curr_line, line_num, and line_pos; the new character */ -/* may be found in curr_line[line_pos]. */ +/* Set up the runtime token structures. This involves setting up each */ +/* token (see above), and also loading the hash table with the keywords. */ /* */ -/* NextChar does not skip any characters at all. When end of file is */ -/* reached, curr_line[line_pos] contains '\0'. */ +/* If a token has non-empty start2 and end2 pairs, it is expanded into */ +/* a set of tokens, one for each pair, with the start delimiter set to */ +/* the concatenation of the start delimiters and starts2, and end */ +/* delimiter set to the corresponding end2. */ /* */ /*****************************************************************************/ -void NextChar(FILE *in_fp) -{ - if( curr_line[line_pos] == '\n' ) +void SetupLanguage(LANGUAGE *lang) +{ int i, j; TOKEN *t; + if( DEBUG_SETUP ) + fprintf(stderr, "SetupLanguage(%s)\n", lang->names[0]); + + /* set up each token in the language */ + for( i = 0; lang->tokens[i] != (TOKEN *) NULL; i++ ) { - /* need a new line */ - line_num++; - line_pos = 1; - if( fgets(&curr_line[1], MAX_LINE+2, in_fp) == (char *) NULL ) - curr_line[1] = '\0'; + if( DEBUG_SETUP ) + fprintf(stderr, " (1) setting up token %s (starts[0] = %s)\n", + lang->tokens[i]->name, lang->tokens[i]->starts[0]); + if( lang->tokens[i]->starts2[0] != NULL ) + { + /* starts2, so set up one token for each entry in starts[2] */ + for( j = 0; lang->tokens[i]->starts2[j] != NULL; j++ ) + { + t = ExpandToken(lang->tokens[i], j); + if( DEBUG_SETUP ) + fprintf(stderr, " (2) setting up token %s (starts[0] = %s)\n", + t->name, t->starts[0]); + SetupOneToken(t); + } + } + else + { + /* no starts2, so set up just one token */ + SetupOneToken(lang->tokens[i]); + } } - else line_pos++; /* will yield '\0' as desired if EOF before end of line */ - if( DEBUG_NEXTCHAR ) - fprintf(stderr, "after NextChar, line_num %d, line_pos %d, curr_line %s", - line_num, line_pos, &curr_line[1]); -} /* end NextChar */ + /* load the keyword hash table */ + for( j = 0; lang->keywords[j] != (char *) NULL; j++ ) + HashInsert(lang->keywords[j]); + + if( DEBUG_SETUP ) + fprintf(stderr, "SetupLanguage(%s) returning.\n", lang->names[0]); +} /* end SetupLanguage */ + /*****************************************************************************/ /* */ @@ -1010,15 +3182,41 @@ BOOLEAN Printable(char ch) return (*p == ch); } /* end Printable */ + +/*****************************************************************************/ +/* */ +/* TOKEN *TokenStartingHere(int *len) */ +/* */ +/* Returns the token starting here if there is one, else NULL. */ +/* If found, the length of its starting delimiter is returned in *len. */ +/* */ +/*****************************************************************************/ + +TOKEN *TokenStartingHere(int *len) +{ TOKEN *res; + + if( line_pos == 1 ) + { + res = TrieRetrieve(StartLineTrie, &curr_line[line_pos], len); + if( res == (TOKEN *) NULL ) + res = TrieRetrieve(Trie, &curr_line[line_pos], len); + } + else + { + res = TrieRetrieve(Trie, &curr_line[line_pos], len); + } + return res; +} + /*****************************************************************************/ /* */ -/* Process(LANGUAGE*lang, FILE*in_fp, *out_fp, *err_fp, TOKEN *outer_token) */ +/* Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter) */ /* */ /* Process a sequence of input tokens. If we are currently recursing */ /* inside some other token, outer_token is non-null and is that token, */ -/* and we stop when we reach the ending delimiter of the inner escape */ -/* of that token. Otherwise, we stop at end of file. */ +/* and we stop when we reach outer_end_delimiter outside any token. */ +/* Otherwise we stop at end of file. */ /* */ /*****************************************************************************/ #define START 1 @@ -1027,242 +3225,230 @@ BOOLEAN Printable(char ch) #define IN_TOKEN_AFTER_INNER_ESCAPE 4 #define STOP 5 -void Process(LANGUAGE *lang, FILE *in_fp, FILE *out_fp, FILE *err_fp, -TOKEN *outer_token) +void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter) { TOKEN *current_token; int len, i, state; + BOOLEAN end_delimiter_seen; if( DEBUG_PROCESS ) fprintf(stderr, "[ Process(%s, -, -, -, -)\n", lang->names[0]); state = START; + end_delimiter_seen = FALSE; while( curr_line[line_pos] != '\0' && state != STOP ) { - if( DEBUG_PROCESS ) - fprintf(stderr, " state %d, ch %c\n", state, curr_line[line_pos]); - switch( state ) - { - case START: /* between tokens */ - - /* check whether outer_token's delimiter is current */ - if( outer_token != (TOKEN *) NULL && - outer_token->end_inner_escape[0] != '\0' && - PrefixEq(&curr_line[line_pos], outer_token->end_inner_escape) ) - { - len = strlen(outer_token->end_inner_escape); - for( i = 0; i < len; i++ ) - NextChar(in_fp); - state = STOP; - } - else switch( curr_line[line_pos] ) - { + if( DEBUG_PROCESS ) + fprintf(stderr, " state %d, ch %c\n", state, curr_line[line_pos]); + switch( state ) + { - case ' ': - - EmitRaw(' ', out_fp, err_fp); - NextChar(in_fp); - break; + case START: /* between tokens; try each of the following */ + /* check whether outer_token is ending here, in which case stop */ + if( outer_token != (TOKEN *) NULL && + curr_line[line_pos] == outer_end_delimiter[0] && + InputMatches(outer_end_delimiter) ) + { + len = strlen(outer_end_delimiter); + for( i = 0; i < len; i++ ) + NextChar(); + state = STOP; + } - case '\t': - - if( outer_token != (TOKEN *) NULL ) - { - fprintf(err_fp, "%s: replacing tab character in escape by space\n", - ErrorHeader()); - EmitRaw(' ', out_fp, err_fp); - NextChar(in_fp); - } - else - { - EmitRaw('\t', out_fp, err_fp); - NextChar(in_fp); + /* check whether a token is starting here, in which case start it */ + else if( (current_token = TokenStartingHere(&len)) != (TOKEN *) NULL ) + { + if( DEBUG_PROCESS ) + { fprintf(stderr, "current_token (len = %d): %s\n", + len, EchoToken(current_token)); } - break; + StartEmit(lang, current_token, &curr_line[line_pos], len); + /* skip the starting delimiter */ + for( i = 0; i < len; i++ ) + NextChar(); - case '\n': - - if( outer_token != (TOKEN *) NULL ) + /* we are now either in a token, or else we have to start an inner */ + if( current_token->print_style == PRINT_NODELIMS_INNER ) { - fprintf(err_fp, "%s: inserting %s to fix unterminated escape\n", - ErrorHeader(), outer_token->end_inner_escape); - state = STOP; + Process(lang, current_token, current_token->end_delimiter); + EndEmit(current_token, ""); } - else - { - EmitRaw('\n', out_fp, err_fp); - NextChar(in_fp); - } - break; + else state = IN_TOKEN; + } + /* check whether we have a space */ + else if( curr_line[line_pos] == ' ' ) + { + EmitRaw(' '); + NextChar(); + } - case '\f': - - if( outer_token != (TOKEN *) NULL ) - { - fprintf(err_fp, "%s: replacing formfeed character in escape by space\n", - ErrorHeader()); - EmitRaw(' ', out_fp, err_fp); - NextChar(in_fp); - } - else - { + /* check whether we have a tab character */ + else if( curr_line[line_pos] == '\t' ) + { + EmitRaw('\t'); + NextChar(); + } + + /* check whether we have a newline character */ + else if( curr_line[line_pos] == '\n' ) + { + EmitRaw('\n'); + NextChar(); + } + + /* check whether we have a formfeed character */ + else if( curr_line[line_pos] == '\f' ) + { fprintf(out_fp, "\n@NP"); - EmitRaw('\n', out_fp, err_fp); - NextChar(in_fp); - } - break; + EmitRaw('\n'); + NextChar(); + } + /* check whether we are supposed to echo things that don't match */ + else if( lang->no_match == NO_MATCH_PRINT ) + { + EmitProtected(curr_line[line_pos]); + NextChar(); + } - default: + /* finally, we have an error and must skip the character */ + else if( lang->no_match == NO_MATCH_ERROR ) + { + if( Printable(curr_line[line_pos]) ) + fprintf(err_fp, "%s: skipping unexpected %c character\n", + ErrorHeader(), curr_line[line_pos]); + else + fprintf(err_fp, "%s: %s (octal %o)\n", + "skipping unexpected unprintable character", + ErrorHeader(), (int) curr_line[line_pos]); + NextChar(); + } + else + { + fprintf(err_fp, "%s internal error: lang->no_match\n", ErrorHeader()); + } + break; + + + case IN_TOKEN: /* within a token; current_token says which kind */ - /* look up the trie to see which token we are starting */ - current_token = TrieRetrieve(Trie, &curr_line[line_pos], &len); - if( current_token == (TOKEN *) NULL ) + /* check for ending delimiter if there is one */ + if( current_token->end_delimiter[0] != '\0' && + (!current_token->end_start_line_only || line_pos == 1) && + InputMatches(current_token->end_delimiter) ) + { + if( current_token->want_two_ends && !end_delimiter_seen ) { - /* error; no token starts here, so print error message and skip */ - if( Printable(curr_line[line_pos]) ) - fprintf(err_fp, "%s: skipping unexpected %c character\n", - ErrorHeader(), curr_line[line_pos]); - else - fprintf(err_fp, - "%s: skipping unexpected unprintable character (octal %o)\n", - ErrorHeader(), (int) curr_line[line_pos]); - NextChar(in_fp); + /* if we have to see the end delimiter twice before stopping, */ + /* and we haven't seen it yet, then emit the char and carry on */ + Emit(current_token, curr_line[line_pos]); + NextChar(); + end_delimiter_seen = TRUE; } else { - /* have a token, so start emitting it */ if( DEBUG_PROCESS ) - { - fprintf(stderr, "current_token (len = %d): %s\n", - len, EchoToken(current_token)); - } - StartEmit(err_fp); + fprintf(stderr, " InputMatches(%s) so finishing token\n", + current_token->end_delimiter); + len = strlen(current_token->end_delimiter); for( i = 0; i < len; i++ ) - { - /* *** - if( DEBUG_PROCESS ) - fprintf(stderr, " emitting delim (i %d, len %d)\n", i, len); - *** */ - if( current_token->command != (char *) NULL ) - Emit(curr_line[line_pos], current_token, out_fp, err_fp); - NextChar(in_fp); - } - state = IN_TOKEN; + NextChar(); + EndEmit(current_token, current_token->end_delimiter); + state = START; } - } - break; - - - case IN_TOKEN: /* within a token; current_token says which kind */ - - /* check for ending delimiter if there is one */ - if( current_token->end_delimiter[0] != '\0' && - PrefixEq(&curr_line[line_pos], current_token->end_delimiter) ) - { - if( DEBUG_PROCESS ) - fprintf(stderr, " PrefixEq(-, %s) so finishing token\n", - current_token->end_delimiter); - len = strlen(current_token->end_delimiter); - for( i = 0; i < len; i++ ) - { if( current_token->command != (char *) NULL ) - Emit(curr_line[line_pos], current_token, out_fp, err_fp); - NextChar(in_fp); } - EndEmit(lang, current_token, out_fp, err_fp); - state = START; - } - else switch( current_token->chtype[(int) curr_line[line_pos]] ) - { + else switch( current_token->chtype[(int) curr_line[line_pos]] ) + { - case LEGAL: + case LEGAL: - Emit(curr_line[line_pos], current_token, out_fp, err_fp); - NextChar(in_fp); - break; + Emit(current_token, curr_line[line_pos]); + NextChar(); + break; - case ESCAPE: + case ESCAPE: - NextChar(in_fp); - state = IN_TOKEN_AFTER_ESCAPE; - break; + NextChar(); + state = IN_TOKEN_AFTER_ESCAPE; + break; - case INNER_ESCAPE: + case INNER_ESCAPE: - EndEmit(lang, current_token, out_fp, err_fp); - NextChar(in_fp); - Process(lang, in_fp, out_fp, err_fp, current_token); - state = IN_TOKEN_AFTER_INNER_ESCAPE; - break; + EndEmit(current_token, ""); + NextChar(); + Process(lang, current_token, current_token->end_inner_escape); + state = IN_TOKEN_AFTER_INNER_ESCAPE; + break; - default: + default: - if( current_token->end_delimiter[0] != '\0' ) - { - /* error: token ends at delimiter, not at unexpected character */ - if( Printable(curr_line[line_pos]) ) - fprintf(err_fp, "%s: skipping unexpected %c character in %s\n", - ErrorHeader(), curr_line[line_pos], current_token->name); + if( current_token->end_delimiter[0] != '\0' ) + { + /* error: token ends at delimiter, not at unexpected character */ + if( Printable(curr_line[line_pos]) ) + fprintf(err_fp, "%s: skipping unexpected %c character in %s\n", + ErrorHeader(), curr_line[line_pos], current_token->name); + else + fprintf(err_fp, "%s: %s (octal %o) in %s\n", + ErrorHeader(), "skipping unexpected unprintable character", + (int) curr_line[line_pos], current_token->name); + NextChar(); + } else - fprintf(err_fp, - "%s: skipping unexpected unprintable character (octal %o) in %s\n", - ErrorHeader(), (int) curr_line[line_pos], current_token->name); - NextChar(in_fp); - } - else - { - /* normal termination after last legal character */ - EndEmit(lang, current_token, out_fp, err_fp); - state = START; - } - break; + { + /* normal termination after last legal character */ + EndEmit(current_token, ""); + state = START; + } + break; - } - break; + } + break; - case IN_TOKEN_AFTER_ESCAPE: + case IN_TOKEN_AFTER_ESCAPE: - if( current_token->escape_chtype[(int) curr_line[line_pos]] == LEGAL ) - { - Emit(current_token->escape[0], current_token, out_fp, err_fp); - Emit(curr_line[line_pos], current_token, out_fp, err_fp); - } - else - { - if( Printable(curr_line[line_pos]) ) - fprintf(err_fp, "%s: skipping %c%c in %s, since %c not legal here\n", - ErrorHeader(), current_token->escape[0], curr_line[line_pos], - current_token->name, curr_line[line_pos]); + if( current_token->escape_chtype[(int) curr_line[line_pos]] == LEGAL ) + { + Emit(current_token, current_token->escape[0]); + Emit(current_token, curr_line[line_pos]); + } else - fprintf(err_fp, - "%s: skipping %c and unprintable unexpected character (octal %o)\n", - ErrorHeader(), current_token->escape[0], (int) curr_line[line_pos]); - } - NextChar(in_fp); - state = IN_TOKEN; - break; + { + if( Printable(curr_line[line_pos]) ) + fprintf(err_fp,"%s: skipping %c%c in %s, since %c not legal here\n", + ErrorHeader(), current_token->escape[0], curr_line[line_pos], + current_token->name, curr_line[line_pos]); + else + fprintf(err_fp, + "%s: skipping %c and %s (octal %o)\n", ErrorHeader(), + current_token->escape[0], "unprintable unexpected character", + (int) curr_line[line_pos]); + } + NextChar(); + state = IN_TOKEN; + break; - case IN_TOKEN_AFTER_INNER_ESCAPE: + case IN_TOKEN_AFTER_INNER_ESCAPE: - /* ending delimiter of inner escape has been read over */ - StartEmit(err_fp); - state = IN_TOKEN; - break; + /* ending delimiter of inner escape has been read over */ + StartEmit(lang, current_token, "", 0); + state = IN_TOKEN; + break; - default: + default: - fprintf(err_fp, "%s internal error (state = %d)\n", - ErrorHeader(), state); - abort(); - break; - } + fprintf(err_fp, "%s internal error (state = %d)\n", + ErrorHeader(), state); + abort(); + break; + } } /* at end, need to tidy up any residual messiness */ @@ -1285,9 +3471,9 @@ TOKEN *outer_token) fprintf(err_fp, "%s: program text ended within %s\n", ErrorHeader(), current_token->name); else - fprintf(err_fp, "%s: %s escape ended within %s\n", + fprintf(err_fp, "%s: %s token ended within %s\n", ErrorHeader(), outer_token->name, current_token->name); - EndEmit(lang, current_token, out_fp, err_fp); + EndEmit(current_token, ""); } break; @@ -1297,7 +3483,7 @@ TOKEN *outer_token) /* we stopped after the escape character */ fprintf(err_fp, "%s: skipping %c at end of program text\n", ErrorHeader(), current_token->escape[0]); - EndEmit(lang, current_token, out_fp, err_fp); + EndEmit(current_token, ""); break; @@ -1310,7 +3496,7 @@ TOKEN *outer_token) fprintf(err_fp, "%s: program text ended within %s after escape\n", ErrorHeader(), current_token->name); else - fprintf(err_fp, "%s: %s escape ended within %s after escape\n", + fprintf(err_fp, "%s: %s token ended within %s after escape\n", ErrorHeader(), outer_token->name, current_token->name); } break; @@ -1325,44 +3511,50 @@ TOKEN *outer_token) } } /* end Process */ - + /*****************************************************************************/ /* */ -/* PrintUsage(fp) */ +/* PrintUsage() */ /* */ -/* Print usage message on file fp. */ +/* Print usage message on file err_fp. */ /* */ /*****************************************************************************/ -void PrintUsage(FILE *fp) +void PrintUsage() { int i; - fprintf(fp, "\n"); - fprintf(fp, "usage: prg2lout <options> <files>\n\n"); - fprintf(fp, " where <options> can be\n"); - fprintf(fp, "\n"); - fprintf(fp, " -r raw mode (used within Lout only)\n"); - fprintf(fp, " -i<file> take input from <file>\n"); - fprintf(fp, " -o<file> send output to <file>\n"); - fprintf(fp, " -e<file> send error messages to <file>\n"); - fprintf(fp, " -l<language> input is in this programming language\n"); - fprintf(fp, " -p<style> print style: fixed, varying, symbol\n"); - fprintf(fp, " -f<family> font family (e.g. Times)\n"); - fprintf(fp, " -s<size> font size (e.g. 10p or 12p)\n"); - fprintf(fp, " -v<space> line spacing (e.g. 1.1fx)\n"); - fprintf(fp, " -t<num> tab interval (e.g. 8 is default)\n"); - fprintf(fp, " -T<dist> output tab interval (e.g. 0.5i)\n"); - fprintf(fp, " -n no file names as page headers\n"); - fprintf(fp, " -V print version information and exit\n"); - fprintf(fp, " -u print this usage message and exit\n"); - fprintf(fp, "\n"); - fprintf(fp, " and <language> (which is compulsory) can be any one of:\n"); + fprintf(err_fp, "\n"); + fprintf(err_fp, "usage: prg2lout <options> <files>\n\n"); + fprintf(err_fp, " where <options> can be\n"); + fprintf(err_fp, "\n"); + fprintf(err_fp, " -r raw mode (used within Lout only)\n"); + fprintf(err_fp, " -i<file> take input from <file>\n"); + fprintf(err_fp, " -o<file> send output to <file>\n"); + fprintf(err_fp, " -e<file> send error messages to <file>\n"); + fprintf(err_fp, " -l<language> input is in this programming language\n"); + fprintf(err_fp, " -p<style> print style: fixed, varying, symbol\n"); + fprintf(err_fp, " -f<family> font family (e.g. Times)\n"); + fprintf(err_fp, " -s<size> font size (e.g. 10p or 12p)\n"); + fprintf(err_fp, " -v<space> line spacing (e.g. 1.1fx)\n"); + fprintf(err_fp, " -t<num> tab interval (e.g. 8 is default)\n"); + fprintf(err_fp, " -T<dist> output tab interval (e.g. 0.5i)\n"); + fprintf(err_fp, " -S<file> use this as the setup file\n"); + fprintf(err_fp, " -n no file names as page headers\n"); + fprintf(err_fp, " -V print version information and exit\n"); + fprintf(err_fp, " -u print this usage message and exit\n"); + fprintf(err_fp, "\n"); + fprintf(err_fp, " <language> (which is compulsory) can be any one of:\n"); for( i = 0; languages[i] != (LANGUAGE *) NULL; i++ ) - fprintf(fp, " %s\n", languages[i]->names[0]); - fprintf(fp, "\n"); + fprintf(err_fp, " %s\n", languages[i]->names[0]); + fprintf(err_fp, "\n"); + fprintf(err_fp, "The values of all formatting options not given are\n"); + fprintf(err_fp, "taken from the setup file: either the file given after\n"); + fprintf(err_fp, "-S, or the system default setup file for this language\n"); + fprintf(err_fp, "if there is no -S option.\n"); + fprintf(err_fp, "\n"); } /* end PrintUsage */ - + /*****************************************************************************/ /* */ /* main(argc, argv) */ @@ -1373,10 +3565,9 @@ void PrintUsage(FILE *fp) /*****************************************************************************/ int main(int argc, char *argv[]) -{ FILE *in_fp, *out_fp, *err_fp; - BOOLEAN stdin_seen; int i, j, arg_pos; - char *infilename, *outfilename, *errfilename, *str; - LANGUAGE *language = (LANGUAGE *) NULL; +{ BOOLEAN stdin_seen; int i, j, arg_pos; + char *infilename, *outfilename, *errfilename; + LANGUAGE *lang = NO_LANGUAGE; char *file_names[1024]; int file_count = 0; /* echo command line */ @@ -1393,15 +3584,14 @@ int main(int argc, char *argv[]) line_num = 0; stdin_seen = raw_seen = FALSE; tab_by_spacing = TRUE; - style_option = NO_STYLE; tab_in = 8; tab_out = 3; tab_unit = 'f'; headers_option = TRUE; font_option = size_option = line_option = tabin_option = - tabout_option = language_option = (char *) NULL; + tabout_option = setup_option = language_option = (char *) NULL; if( argc == 1 ) - { PrintUsage(err_fp); + { PrintUsage(); exit(1); } for( arg_pos = 1; arg_pos < argc; arg_pos++ ) @@ -1487,27 +3677,12 @@ int main(int argc, char *argv[]) { fprintf(err_fp, "%s: -p illegal with -r option\n", ErrorHeader()); exit(1); } - GetArg(str, "usage: -p<printstyle>", FALSE); - /* *** -l can set style, so don't do this now - if( style_option != NO_STYLE ) - { fprintf(err_fp, "%s: -p option appears twice!\n", ErrorHeader()); - exit(1); - } - else - *** */ - if( strcmp(str, "fixed") == 0 ) - { style_option = FIXED_STYLE; - } - else if( strcmp(str, "varying") == 0 ) - { style_option = VARYING_STYLE; - tab_by_spacing = FALSE; - } - else if( strcmp(str, "symbol") == 0 ) - { style_option = SYMBOL_STYLE; - tab_by_spacing = FALSE; - } - else - { fprintf(err_fp, "%s: unknown -p option %s\n", ErrorHeader(), str); + GetArg(style_option, "usage: -p<printstyle>", FALSE); + if( strcmp(style_option, "fixed") != 0 && + strcmp(style_option, "varying") != 0 && + strcmp(style_option, "symbol") != 0 ) + { fprintf(err_fp, "%s: unknown -p option %s\n", ErrorHeader(), + style_option); exit(1); } break; @@ -1588,6 +3763,17 @@ int main(int argc, char *argv[]) break; + case 'S': + + /* read alternative setup file */ + if( raw_seen ) + { fprintf(err_fp, "%s: -s illegal with -r option\n", ErrorHeader()); + exit(1); + } + GetArg(setup_option, "usage: -S<filename>", FALSE); + break; + + case 'n': if( raw_seen ) @@ -1615,7 +3801,7 @@ int main(int argc, char *argv[]) { fprintf(err_fp, "%s: -u illegal with -r option\n", ErrorHeader()); exit(1); } - PrintUsage(err_fp); + PrintUsage(); exit(0); break; @@ -1627,48 +3813,22 @@ int main(int argc, char *argv[]) exit(1); } GetArg(language_option, "usage: -l<language>", FALSE); - for( i = 0; languages[i] != (LANGUAGE *) NULL; i++ ) - { - for( j = 0; languages[i]->names[j] != (char *) NULL; j++ ) - { - if( strcmp(languages[i]->names[j], language_option) == 0 ) - break; - } - if( languages[i]->names[j] != (char *) NULL ) - break; - } - if( languages[i] != (LANGUAGE *) NULL ) + i = 0; j = 0; + while( lang == NO_LANGUAGE && languages[i] != NO_LANGUAGE ) { - language = languages[i]; + if( languages[i]->names[j] == (char *) NULL ) + i++, j = 0; + else if( strcmp(languages[i]->names[j], language_option) == 0 ) + lang = languages[i]; + else + j++; } - else + if( lang == NO_LANGUAGE ) { fprintf(err_fp, "%s: unknown language %s\n", ErrorHeader(), language_option); exit(1); } - - /* set style unless already done by a -p flag */ - if( style_option == NO_STYLE ) - { - if( strcmp(language->default_style, "fixed") == 0 ) - { style_option = FIXED_STYLE; - } - else if( strcmp(language->default_style, "varying") == 0 ) - { style_option = VARYING_STYLE; - tab_by_spacing = FALSE; - } - else if( strcmp(language->default_style, "symbol") == 0 ) - { style_option = SYMBOL_STYLE; - tab_by_spacing = FALSE; - } - else - { fprintf(err_fp, - "%s internal error: bad default print style for language %s\n", - ErrorHeader(), language_option); - abort(); - } - } break; @@ -1695,7 +3855,7 @@ int main(int argc, char *argv[]) } /* for */ /* make sure we have a language */ - if( language == (LANGUAGE *) NULL ) + if( lang == NO_LANGUAGE ) { fprintf(err_fp, "%s: missing -l option\n", ErrorHeader()); exit(0); @@ -1713,87 +3873,28 @@ int main(int argc, char *argv[]) } /* process the file */ - SetupTokens(language, err_fp); + SetupLanguage(lang); line_pos = 1; curr_line[line_pos] = '\n'; /* forces line read */ line_num = 0; - NextChar(in_fp); - Process(language, in_fp, out_fp, err_fp, (TOKEN *) NULL); + NextChar(); + Process(lang, (TOKEN *) NULL, ""); } else if( file_count > 0 ) - { - int ch; - char *style_str, *font_str, *size_str, *line_str, *face_str; - char *tabin_str, *tabout_str; - - /* sort out the options' values */ - switch( style_option ) - { - - case FIXED_STYLE: - - style_str = "fixed"; - face_str = "Base"; - font_str = font_option != NULL ? font_option : "Courier"; - size_str = size_option != NULL ? size_option : "9p"; - line_str = line_option != NULL ? line_option : "1.1fx"; - tabin_str = tabin_option != NULL ? tabin_option : "8"; - tabout_str = tabout_option != NULL ? tabout_option : "8s"; - break; + { int ch; - - case NO_STYLE: - case VARYING_STYLE: - - style_str = "varying"; - face_str = "Slope"; - font_str = font_option != NULL ? font_option : "Times"; - size_str = size_option != NULL ? size_option : "10p"; - line_str = line_option != NULL ? line_option : "1.1fx"; - tabin_str = tabin_option != NULL ? tabin_option : "8"; - tabout_str = tabout_option != NULL ? tabout_option : "3f"; - break; - - - case SYMBOL_STYLE: - - style_str = "symbol"; - face_str = "Slope"; - font_str = font_option != NULL ? font_option : "Times"; - size_str = size_option != NULL ? size_option : "10p"; - line_str = line_option != NULL ? line_option : "1.1fx"; - tabin_str = tabin_option != NULL ? tabin_option : "8"; - tabout_str = tabout_option != NULL ? tabout_option : "3f"; - break; - - - default: - - fprintf(err_fp, "%s internal error in -p option\n", ErrorHeader()); - abort(); - break; - } - /* make sure we have an output file */ if( out_fp == (FILE *) NULL ) out_fp = stdout; /* print the initial @Use clauses etc.*/ - fprintf(out_fp, "%s%s\n", "@Sy", "sInclude { progf }"); fprintf(out_fp, "%s%s\n", "@Sy", "sInclude { doc }"); - fprintf(out_fp, "@Use { @ProgSetup\n"); - fprintf(out_fp, " language { %s }\n", language_option); - fprintf(out_fp, " style { %s }\n", style_str); - fprintf(out_fp, " %sfont { %s }\n", style_str, font_str); - fprintf(out_fp, " %ssize { %s }\n", style_str, size_str); - fprintf(out_fp, " %sline { %s }\n", style_str, line_str); - fprintf(out_fp, " %stabin { %s }\n", style_str, tabin_str); - fprintf(out_fp, " %stabout { %s }\n", style_str, tabout_str); - fprintf(out_fp, "}\n"); + if( setup_option != NULL ) + fprintf(out_fp, "%s%s { %s }\n", "@In", "clude", setup_option); + else + fprintf(out_fp, "%s%s { %s }\n", "@Sy", "sInclude", lang->setup_file); fprintf(out_fp, "@Document\n"); - fprintf(out_fp, " @InitialFont { \"%s\" \"%s\" \"%s\" }\n", - font_str, face_str, size_str); - fprintf(out_fp, " @InitialBreak { lines \"%s\" nohyphen }\n", line_str); + fprintf(out_fp, " @InitialBreak { lines 1.2fx nohyphen }\n"); fprintf(out_fp, "//\n"); fprintf(out_fp, "%s%s\n", "@Text @Be", "gin"); @@ -1805,7 +3906,7 @@ int main(int argc, char *argv[]) if( in_fp == NULL ) { fprintf(err_fp, "%s: skipping input file %s (cannot open)\n", ErrorHeader(), file_names[i]); - continue; + continue; } strcpy(file_name, file_names[i]); @@ -1817,11 +3918,24 @@ int main(int argc, char *argv[]) file_names[i]); /* print file name and contents (don't format, let Lout call back) */ - /* this string has been disguised so as not to fool progwlout! */ - fprintf(out_fp, "%s%s%s\n", "@P", "rog @Be", "gin"); + /* this string has been disguised to avoid recognition by prg2lout */ + fprintf(out_fp, "%s\n", lang->lang_sym); + if( style_option != NULL ) + fprintf(out_fp, " style { %s }\n", style_option ); + if( font_option != NULL ) + fprintf(out_fp, " font { %s }\n", font_option ); + if( size_option != NULL ) + fprintf(out_fp, " size { %s }\n", size_option ); + if( line_option != NULL ) + fprintf(out_fp, " line { %s }\n", line_option ); + if( tabin_option != NULL ) + fprintf(out_fp, " tabin { %s }\n", tabin_option ); + if( tabout_option != NULL ) + fprintf(out_fp, " tabout { %s }\n", tabout_option ); + fprintf(out_fp, "%s%s\n", "@Be", "gin"); while( (ch = getc(in_fp)) != EOF ) - putc(ch, out_fp); - fprintf(out_fp, "%s%s%s\n", "@E", "nd @P", "rog"); + putc(ch, out_fp); + fprintf(out_fp, "%s%s %s\n", "@E", "nd", lang->lang_sym); } /* finish off whole input */ |