diff options
author | Jeffrey H. Kingston <jeff@it.usyd.edu.au> | 2010-09-14 20:36:35 +0000 |
---|---|---|
committer | Jeffrey H. Kingston <jeff@it.usyd.edu.au> | 2010-09-14 20:36:35 +0000 |
commit | b10d39aec443165093f8f28bc6f940530b89cdaf (patch) | |
tree | 63a1ef3b3f1d2562c498291cda341a2171a1fe1c /prg2lout.c | |
parent | 2f4268e5e02216be53cd85816362191373512463 (diff) | |
download | lout-b10d39aec443165093f8f28bc6f940530b89cdaf.tar.gz |
Lout 3.21.
git-svn-id: http://svn.savannah.nongnu.org/svn/lout/trunk@11 9365b830-b601-4143-9ba8-b4a8e2c3339c
Diffstat (limited to 'prg2lout.c')
-rw-r--r-- | prg2lout.c | 1460 |
1 files changed, 1029 insertions, 431 deletions
@@ -45,12 +45,12 @@ /* end (cprint and cprintf, or their clones for other languages) does the */ /* formatting. For example, the C text */ /* */ -/* inc = inc + 1 */ +/* inc = inc / 2 */ /* */ /* would be classified by prg2lout into identifier, operator, identifier, */ /* operator, number; and consequently prg2lout would emit */ /* */ -/* @PI{"inc"} @PO{"="} @PI{"inc"} @PO{"+"} @PN{"1"} */ +/* @PI{inc} @PO{=} @PI{inc} @PO{"/"} @PN{2} */ /* */ /* which is readable by Lout, thanks to having quotes around everything */ /* potentially dangerous, and clearly tells Lout, by means of the commands */ @@ -104,11 +104,13 @@ /* the list of tokens of the language, and its keywords. */ /* */ /* Step 4. Add your language variable to the list in the initializer of */ -/* variable languages, as you can see the others have been done. */ +/* variable languages, as you can see the others have been done. Try to */ +/* keep the list alphabetical to deflect any charges of language bias. */ /* */ -/* Step 5. If any lists of initializers now contain more than */ -/* MAX_ARRAY_LENGTH-1 elements, increase MAX_ARRAY_LENGTH until they don't. */ -/* The gcc compiler will warn you if you forget to do this. */ +/* Step 5. If any lists of initializers now contain more than MAX_STARTS, */ +/* MAX_STARTS2, MAX_NAMES, MAX_TOKENS, or MAX_KEYWORDS elements, increase */ +/* these constants until they don't. The gcc compiler will warn you if */ +/* you forget to do this. */ /* */ /* Step 6. Recompile and reinstall prg2lout, test "prg2lout -u" then */ /* "prg2lout -l <mylanguage> <myfile> | lout -s > out.ps". */ @@ -126,9 +128,49 @@ #include <stdio.h> #include <stdlib.h> -#define FALSE 0 -#define TRUE 1 -#define BOOLEAN unsigned +#define FALSE 0 +#define TRUE 1 +#define BOOLEAN unsigned +#define MAX_CHAR 256 +#define is_whitespace(ch) ((ch)==' ' || (ch)=='\t' || (ch)=='\n' || (ch)=='\f') + +/*****************************************************************************/ +/* */ +/* MAX_STARTS 1 + Maximum length of "starts" array in any token */ +/* MAX_STARTS2 1 + Maximum length of "starts2" array in any token */ +/* MAX_NAMES 1 + Maximum number of names for any language */ +/* MAX_TOKENS 1 + Maximum number of tokens in any language */ +/* MAX_KEYWORDS 1 + Maximum number of keywords in any language */ +/* */ +/*****************************************************************************/ + +#define MAX_STARTS 120 +#define MAX_STARTS2 30 +#define MAX_NAMES 10 +#define MAX_TOKENS 150 +#define MAX_KEYWORDS 350 + +/*****************************************************************************/ +/* */ +/* Bracketing pairs */ +/* */ +/* This declaration explains to prg2lout that { matches }, etc. */ +/* */ +/*****************************************************************************/ + +typedef struct { + char *first; + char *second; +} CHAR_PAIR; + +CHAR_PAIR pairs[] = { + { "(", ")" }, + { "{", "}" }, + { "[", "]" }, + { "<", ">" }, + { NULL, NULL } +}; + /*****************************************************************************/ /* */ @@ -157,6 +199,10 @@ char AllPrintableTabNL[] = " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\ ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n\t" ; +char AllPrintableTabNLFF[] = + " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\ +ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n\t\f" ; + char Letters[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ; char Letter_Digit[] = @@ -171,6 +217,26 @@ char Letter_Digit[] = #define SepDigits "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" #define HexDigits "A", "a", "B", "b", "C", "c", "D", "d", "E", "e", "F", "f" + +#define SepPunct "/", "(", "[", "{", "<", "!", "%", "#", "|", ",", \ + ":", ";", "$", "\"","^", "&", "*", "-", "=", "+", \ + "~", "'", "@", "?", ".", "`" + +#define BktPunct "", "(", "[", "{", "<", "", "", "", "", "", \ + "", "", "", "", "", "", "", "", "", "", \ + "", "", "", "", "", "" + +#define EndPunct "/", ")", "]", "}", ">", "!", "%", "#", "|", ",", \ + ":", ";", "$", "\"","^", "&", "*", "-", "=", "+", \ + "~", "'", "@", "?", ".", "`" + +#define PercentLetters \ +"%A", "%B", "%C", "%D", "%E", "%F", "%G", "%H", "%I", "%J", "%K", \ +"%L", "%M", "%N", "%O", "%P", "%Q", "%R", "%S", "%T", "%U", "%V", \ +"%W", "%X", "%Y", "%Z", \ +"%a", "%b", "%c", "%d", "%e", "%f", "%g", "%h", "%i", "%j", "%k", \ +"%l", "%m", "%n", "%o", "%p", "%q", "%r", "%s", "%t", "%u", "%v", \ +"%w", "%x", "%y", "%z", "%_" /*****************************************************************************/ @@ -203,12 +269,21 @@ char Letter_Digit[] = /* \ in the token is printed with a preceding \ as required in Lout. */ /* The usual arrangement for handling white space is that none of the */ /* tokens contain it; when it is encountered prg2lout generates the */ -/* appropriate Lout without being told (a space for a space, a newline */ -/* for a newline, @NP for a formfeed, and something clever for tab */ -/* which does the required thing). However, you can define a token */ -/* that contains white space if you wish. If you do this, the */ -/* quotation marks will be temporarily closed off, the white space */ -/* handled as just described, then the quotes opened again. */ +/* appropriate Lout without being told: a space for a space, a newline */ +/* for a newline (possibly triggering a line number on the next line), */ +/* @NP for a formfeed, and something clever for tab which does the */ +/* required thing. However, you can define a token that contains */ +/* white space if you wish, and then the effect will be: */ +/* */ +/* space and tab The quotation marks will be temporarily */ +/* closed off, the white space handled as just */ +/* described, then the quotes opened again */ +/* */ +/* newline and ff Both the quotation marks and the command */ +/* will be closed off, the white space handled */ +/* as just described, and then a new command */ +/* started. In effect, the token is broken into */ +/* a sequence of tokens at these characters. */ /* */ /* PRINT_NODELIMS_QUOTED. This is like PRINT_WHOLE_QUOTED except that */ /* the opening and closing delimiters of the token are omitted from */ @@ -222,7 +297,18 @@ char Letter_Digit[] = /* so that it is the user's responsibility to ensure that its content */ /* is legal Lout; and when the command is another filter command, so */ /* that the token content will not go directly into Lout anyway, it */ -/* will go through the other filter first. */ +/* will go through the other filter first. Since the result has to */ +/* be verbatim, there is no special treatment of white space characters */ +/* and no insertion of line numbers. However, if braces are printed */ +/* they really ought to match, so prog2lout checks this and will */ +/* complain and insert braces into the verbatim part if necessary. */ +/* */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* */ +/* Meaning of TOKEN fields (ctd.) */ /* */ /* PRINT_NODELIMS_UNQUOTED. This is like PRINT_WHOLE_UNQUOTED except */ /* that the opening and closing delimiters of the token are omitted. */ @@ -232,19 +318,12 @@ char Letter_Digit[] = /* */ /* PRINT_COMMAND_ONLY. This ignores the token and prints just the */ /* command, presumably because the command says it all for that token. */ -/* When using PRINT_COMMAND_ONLY you will probably need to finish your */ -/* command with a space: since there are no following braces in this */ +/* When using PRINT_COMMAND_ONLY you will probably need to enclose the */ +/* command with braces: since there are no following braces in this */ /* print style, your command will run into the next one otherwise. */ /* */ -/*****************************************************************************/ - - -/*****************************************************************************/ -/* */ -/* Meaning of TOKEN fields (ctd.) */ -/* */ /* command */ -/* The Lout command to print. This command could be an legal Lout; */ +/* The Lout command to print. This command could be any legal Lout; */ /* programming language setup files offer the following Lout symbols */ /* that make the most common commands: */ /* */ @@ -269,7 +348,8 @@ char Letter_Digit[] = /* identifiers usually have command @PI and alternate_command @PK. */ /* */ /* following_command */ -/* Print this Lout command (or commands) after the token. */ +/* Print this Lout command (or commands) after the token. If it is a */ +/* "broken" multi-line token, print this command after each fragment */ /* */ /* start_line_only */ /* A Boolean field. If TRUE, this token is to be recognized only */ @@ -281,33 +361,38 @@ char Letter_Digit[] = /* then it deems that this token has begun. The recognized string */ /* is the token's "starting delimiter". */ /* */ -/* starts2[], ends2[] */ -/* These fields each hold an array of strings, and the two arrays */ -/* must have equal length. If they have length zero, these fields */ -/* do not apply. Otherwise, they modify the meaning of starts[] and */ -/* end_delimiter below. Their effect is best seen by looking at some */ -/* examples from Perl, their only user at the time of writing: */ +/*****************************************************************************/ + + +/*****************************************************************************/ /* */ -/* q/hello/ qq/hello/ qq?hello? qq{hello} */ +/* Meaning of TOKEN fields (ctd.) */ +/* */ +/* starts2[], brackets2[], ends2[] */ +/* These fields each hold an array of strings, and the three arrays */ +/* must have equal length. If starts2[] has length zero, these fields */ +/* do not apply. Otherwise, they modify the meaning of starts[], */ +/* bracket_delimiter, and end_delimiter below. Their effect is best */ +/* seen by looking at some examples from Perl, their main user: */ +/* */ +/* q/hello/ qq/hello/ qq?hello? qq{hel{}lo} */ /* */ /* These strings may begin with q, qq, qx, and several other things; */ /* this is then followed by a single character which determines the */ /* string terminator; e.g. / means "terminate with /", { means */ -/* "terminate with }", etc. This is implemented by putting these */ -/* pairs of strings in corresponding places in starts[] and ends[]. */ +/* "terminate with }", etc. In some cases the start and end delims */ +/* come in matching pairs, and then there may be nested matching */ +/* pairs. This is implemented as follows: */ /* */ -/* starts: { "q", "qq", "qx" } */ -/* starts2: { "/", "?", "{" } */ -/* ends2: { "/", "?", "}" } */ -/* */ -/* PerlQTypeToken is a larger example of this. */ -/* */ -/*****************************************************************************/ - - -/*****************************************************************************/ +/* starts: { "q", "qq" } */ +/* starts2: { "/", "?", "{" } */ +/* brackets2: { "", "", "{" } */ +/* ends2: { "/", "?", "}" } */ /* */ -/* Meaning of TOKEN fields (ctd.) */ +/* Briefly, every token with non-null starts2 is expanded into a set */ +/* of tokens, one for each element i of starts2, whose starting delims */ +/* are starts with starts2[i] added, bracketing delim brackets2[i], */ +/* and end_delim ends2[i]. PerlQTypeToken is a larger example of this. */ /* */ /* legal */ /* This string defines the set of legal characters inside this token. */ @@ -342,6 +427,18 @@ char Letter_Digit[] = /* number of tokens, and they may have inner escapes too; prg2lout */ /* imposes no limit on the depth of nesting of inner escapes. */ /* */ +/*****************************************************************************/ + + +/*****************************************************************************/ +/* */ +/* Meaning of TOKEN fields (ctd.) */ +/* */ +/* bracket_delimiter */ +/* If this string is encountered within a token (not escaped), it */ +/* brackets with the next end_delimiter, meaning that the next end */ +/* delimiter will not end the token. */ +/* */ /* end_delimiter */ /* This string shows how the token ends; for example, a string would */ /* have end_delimiter ". If empty, it means that the token ends */ @@ -361,15 +458,10 @@ char Letter_Digit[] = /* */ /* s/abc/ABC/ */ /* */ -/* and similar things as single tokens. Note that this works without */ -/* modification on s{abc}{ABC} provided { is legal within strings: as */ -/* long as the token terminates at the second }, we are safe. */ +/* etc. as single tokens. If there is a bracket delimiter (see above), */ +/* this will look for a new matching delimiter pair, as in s{}<>. */ /* */ /*****************************************************************************/ - - -#define MAX_CHAR 256 -#define MAX_ARRAY_LENGTH 401 #define PRINT_WHOLE_QUOTED 1 #define PRINT_NODELIMS_QUOTED 2 #define PRINT_WHOLE_UNQUOTED 3 @@ -380,18 +472,18 @@ char Letter_Digit[] = typedef struct token_rec { char *name; int print_style; - char *command; - char *alternate_command; - char *following_command; + char *command, *alternate_command, *following_command; BOOLEAN start_line_only; - char *starts[MAX_ARRAY_LENGTH]; - char *starts2[MAX_ARRAY_LENGTH]; - char *ends2[MAX_ARRAY_LENGTH]; + char *starts[MAX_STARTS]; + char *starts2[MAX_STARTS2]; + char *brackets2[MAX_STARTS2]; + char *ends2[MAX_STARTS2]; char *legal; char *escape; char *escape_legal; char *inner_escape; char *end_inner_escape; + char *bracket_delimiter; char *end_delimiter; BOOLEAN end_start_line_only; BOOLEAN want_two_ends; @@ -417,12 +509,14 @@ TOKEN CStringToken = { FALSE, /* token allowed anywhere, not just start of line */ { "\"" }, /* strings begin with a " character */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ AllPrintable, /* inside, any printable is OK */ "\\", /* within strings, \\ is the escape character */ AllPrintablePlusNL, /* after escape char, any printable char or nl OK */ "", /* strings do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "\"", /* strings end with a " character */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -437,12 +531,14 @@ TOKEN CCharacterToken = { FALSE, /* token allowed anywhere, not just start of line */ { "'" }, /* characters begin with a ' character */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ AllPrintable, /* inside, any printable character is OK */ "\\", /* within characters, \\ is the escape character */ AllPrintable, /* after escape char, any printable char is OK */ "", /* characters do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "'", /* characters end with a ' character */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -458,12 +554,13 @@ TOKEN EiffelStringToken = { FALSE, /* token allowed anywhere, not just start of line */ { "\"" }, /* strings begin with a " character */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ AllPrintable, /* inside, any printable except " is OK */ "%", /* within strings, % is the escape character */ - AllPrintable, /* after escape char, any printable char is OK */ - "", /* strings do not permit "inner escapes" */ + AllPrintable, /* after escape char, any printable char is OK */ "", /* strings do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "\"", /* strings end with a " character */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -478,12 +575,14 @@ TOKEN EiffelCharacterToken = { FALSE, /* token allowed anywhere, not just start of line */ { "'" }, /* characters begin with a ' character */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ AllPrintable, /* inside, any printable except ' is OK */ "%", /* within characters, % is the escape character */ AllPrintable, /* after escape char, any printable char is OK */ "", /* characters do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "'", /* characters end with a ' character */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -505,12 +604,14 @@ TOKEN IdentifierToken = { FALSE, /* token allowed anywhere, not just start of line */ { SepLetters, "_" }, /* identifiers begin with any letter or _ */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ Letter_Digit, /* inside, letters, underscores, digits are OK */ "", /* no escape character within identifiers */ "", /* so nothing legal after escape char either */ "", /* identifiers do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "", /* identifiers do not end with a delimiter */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -526,18 +627,20 @@ TOKEN IdentifierToken = { TOKEN NumberToken = { "number", /* used by error messages involving this token */ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ - "@PN", /* Lout command for formatting numbers */ + "@PN", /* Lout command for formatting numbers */ "", /* no alternate command */ "", /* no following command */ FALSE, /* token allowed anywhere, not just start of line */ { SepDigits }, /* numbers must begin with a digit */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ "0123456789.eE", /* inside, digits, decimal point, exponent */ "", /* no escape character within numbers */ "", /* so nothing legal after escape char either */ "", /* numbers do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "", /* numbers do not end with a delimiter */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -559,12 +662,14 @@ TOKEN CCommentToken = { FALSE, /* token allowed anywhere, not just start of line */ { "/*" }, /* comments begin with this character pair */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ - AllPrintableTabNL, /* inside, any printable char, tab, or nl is OK */ + AllPrintableTabNLFF, /* inside, any printable char, tab, nl, ff is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* C comments do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "*/", /* comments end with this character pair */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -579,12 +684,14 @@ TOKEN CPPCommentToken = { FALSE, /* token allowed anywhere, not just start of line */ { "//" }, /* comments begin with this character pair */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ - AllPrintable, /* inside, any printable char is OK (not NL) */ + AllPrintablePlusTab, /* inside, any printable char is OK (not NL) */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* C comments do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "", /* no end delimiter (end of line will end it) */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -600,12 +707,14 @@ TOKEN EiffelCommentToken = { FALSE, /* token allowed anywhere, not just start of line */ { "--" }, /* comments begin with this character pair */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ - AllPrintable, /* inside, any printable char is OK */ + AllPrintablePlusTab, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "`", /* start of "inner escape" in Eiffel comment */ "'", /* end of "inner escape" in Eiffel comment */ + "", /* no bracketing delimiter */ "", /* no ending delimiter; end of line will end it */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -620,12 +729,14 @@ TOKEN BlueCommentToken = { FALSE, /* token allowed anywhere, not just start of line */ { "==", "--" }, /* comments begin with this character pair */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ - AllPrintable, /* inside, any printable char is OK */ + AllPrintablePlusTab, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "`", /* start of "inner escape" in Blue comment */ "'", /* end of "inner escape" in Blue comment */ + "", /* no bracketing delimiter */ "", /* no ending delimiter; end of line will end it */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -649,12 +760,14 @@ TOKEN CCommentEscapeToken = { FALSE, /* token allowed anywhere, not just start of line */ { "/*@" }, /* escape comments begin with this delimiter */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ - AllPrintableTabNL, /* inside, any printable char, tab, or nl is OK */ + AllPrintableTabNLFF, /* inside, any printable char, tab, nl, ff is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* no "inner escape" in escape comments */ "", /* so no end of "inner escape" either */ + "", /* no bracketing delimiter */ "*/", /* comments end with this character pair */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -669,12 +782,14 @@ TOKEN CPPCommentEscapeToken = { FALSE, /* token allowed anywhere, not just start of line */ { "//@" }, /* escape comments begin with this delimiter */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ - AllPrintable, /* inside, any printable char is OK */ + AllPrintablePlusTab, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* no "inner escape" in escape comments */ "", /* so no end of "inner escape" either */ + "", /* no bracketing delimiter */ "", /* no end delimiter (end of line will end it) */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -690,12 +805,14 @@ TOKEN EiffelCommentEscapeToken = { FALSE, /* token allowed anywhere, not just start of line */ { "--@" }, /* escape comments begin with this delimiter */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ - AllPrintable, /* inside, any printable char is OK */ + AllPrintablePlusTab, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* no "inner escape" in escape comments */ "", /* so no end of "inner escape" either */ + "", /* no bracketing delimiter */ "", /* no ending delimiter; end of line will end it */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -710,12 +827,14 @@ TOKEN BlueCommentEscapeToken = { FALSE, /* token allowed anywhere, not just start of line */ { "==@", "--@" }, /* escape comments begin with these delimiters */ { NULL }, /* no start2 needed */ + { NULL }, /* so no brackets2 either */ { NULL }, /* so no end2 either */ - AllPrintable, /* inside, any printable char is OK */ + AllPrintablePlusTab, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* no "inner escape" in escape comments */ "", /* so no end of "inner escape" either */ + "", /* no bracketing delimiter */ "", /* no ending delimiter; end of line will end it */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -738,10 +857,12 @@ TOKEN BlueCommentEscapeToken = { FALSE, /* token not just start of line */ \ { str }, /* token begins (and ends!) with this */ \ { NULL }, /* no start2 needed */ \ + { NULL }, /* so no brackets2 either */ \ { NULL }, /* so no end2 either */ \ "", /* nothing inside, since no inside */ \ "", "", /* no escape character */ \ "", "", /* no inner escape; no end inner esc */ \ + "", /* no bracketing delimiter */ \ "", /* no ending delimiter */ \ FALSE, /* end not have to be at line start */ \ FALSE, /* don't end delimiter twice to stop */ \ @@ -799,10 +920,12 @@ TOKEN ImpliesToken = FixedToken("=>", "implies @A @PO"); FALSE, /* token not just start of line */ \ { str }, /* token begins (and ends!) with this */ \ { NULL }, /* no start2 needed */ \ + { NULL }, /* so no bracket2 either */ \ { NULL }, /* so no end2 either */ \ "", /* nothing inside, since no inside */ \ "", "", /* no escape character */ \ "", "", /* no inner escape; no end inner esc */ \ + "", /* no bracketing delimiter */ \ "", /* no ending delimiter */ \ FALSE, /* end not have to be at line start */ \ FALSE, /* don't end delimiter twice to stop */ \ @@ -822,7 +945,8 @@ TOKEN EiffelDotToken = NoParameterToken(".", "{@PD}"); /* */ /* In the comments below, WCS refers to "Programming Perl", Second */ /* Edition (1996), by Wall, Christiansen, and Schwartz. However Perl */ -/* has changed since then and this code also reflects those changes. */ +/* has changed since then and this code also reflects those changes */ +/* based on the on-line documentation provided with the 5.6.0 release. */ /* */ /*****************************************************************************/ @@ -857,19 +981,11 @@ TOKEN EiffelDotToken = NoParameterToken(".", "{@PD}"); /* tr [a-z] */ /* [A-Z] */ /* */ -/* At present the tokens below have three problems implementing this: */ -/* */ -/* (1) When / appears without anything in front, it will be recognized */ -/* as a division operator, hence the following regular expression */ -/* will be treated as ordinary Perl, not as a regular expression; */ -/* */ -/* (2) When the delimiters are a matching pair, embedded occurrences */ -/* are not recognised as such: the first closing delimiter will be */ -/* taken to close the pair; */ -/* */ -/* (3) The case s{foo}(bar) is not implemented; the second bracketing */ -/* pair must be the same as the first; in fact prg2lout will keep */ -/* going, looking for a second closing brace, in this example. */ +/* Amazingly, the tokens below implement all of this perfectly except that */ +/* when / appears without anything in front, it will be recognized as a */ +/* regular expression provided that one of a long list of things precedes */ +/* it, otherwise it will be a division symbol. This is not perfect but */ +/* seems to come extremely close in practice. */ /* */ /*****************************************************************************/ @@ -877,18 +993,19 @@ TOKEN EiffelDotToken = NoParameterToken(".", "{@PD}"); TOKEN PerlSingleQuoteStringToken = { "''-string", /* used by error messages involving this token */ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ - "@PS", /* Lout command for formatting strings */ + "@PS", /* Lout command for formatting strings */ "", /* no alternate command */ "", /* no following command */ FALSE, /* token allowed anywhere, not just start of line */ { "'" }, /* strings begin with a ' character */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ AllCharacters, /* inside, any character at all is OK */ "\\", /* within strings, \\ is the escape character */ - "\\'", /* after escape, only \ and ' are OK (WCS p. 39) */ - "", /* strings do not permit "inner escapes" */ - "", /* and so there is no end innner escape either */ + AllCharacters, /* after escape, any character is OK (trust us) */ + "", "", /* no "inner escapes"; no end innner escape */ + "", /* no bracketing delimiter */ "\'", /* strings end with a ' character */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -897,18 +1014,19 @@ TOKEN PerlSingleQuoteStringToken = { TOKEN PerlDoubleQuoteStringToken = { "\"\"-string", /* used by error messages involving this token */ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ - "@PS", /* Lout command for formatting strings */ + "@PS", /* Lout command for formatting strings */ "", /* no alternate command */ "", /* no following command */ FALSE, /* token allowed anywhere, not just start of line */ { "\"" }, /* strings begin with a " character */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ AllCharacters, /* inside, any character at all is OK */ "\\", /* within strings, \\ is the escape character */ AllCharacters, /* after escape char, any character at all is OK */ - "", /* strings do not permit "inner escapes" */ - "", /* and so there is no end innner escape either */ + "", "", /* no "inner escapes"; no end innner escape */ + "", /* no bracketing delimiter */ "\"", /* strings end with a " character */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -923,12 +1041,13 @@ TOKEN PerlBackQuoteStringToken = { FALSE, /* token allowed anywhere, not just start of line */ { "`" }, /* strings begin with a ` character */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ AllCharacters, /* inside, any character at all is OK */ "\\", /* within strings, \\ is the escape character */ AllCharacters, /* after escape char, any character at all is OK */ - "", /* strings do not permit "inner escapes" */ - "", /* and so there is no end innner escape either */ + "", "", /* no "inner escapes"; no end innner escape */ + "", /* no bracketing delimiter */ "`", /* strings end with a ` character */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -936,40 +1055,44 @@ TOKEN PerlBackQuoteStringToken = { TOKEN PerlQTypeStringToken = { - "q-type-string", /* used by error messages involving this token */ + "q-type string", /* used by error messages involving this token */ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ - "@PS", /* Lout command for formatting strings */ + "@PS", /* Lout command for formatting strings */ "", /* no alternate command */ "", /* no following command */ FALSE, /* token allowed anywhere, not just start of line */ { "q", "qq", "qx", "qw", "qr", "m" }, /* q-type strings begin with these */ - { "/", "(", "[", "{", "<", "!", "%", "#", "|", ",", ":", ";" }, /*start2 */ - { "/", ")", "]", "}", ">", "!", "%", "#", "|", ",", ":", ";" }, /* end2 */ + { SepPunct }, /* start2 can be any punctuation character */ + { BktPunct }, /* bracketing delimiters to match SepPunct */ + { EndPunct }, /* end2 must match start2 */ AllCharacters, /* inside, any character at all is OK */ "\\", /* within strings, \\ is the escape character */ AllCharacters, /* after escape char, any character at all is OK */ "", /* strings do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* will be using bracket2 for bracket delimiter */ "", /* will be using end2 for the end delimiter here */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ }; TOKEN PerlSTypeStringToken = { - "s-type-string", /* used by error messages involving this token */ + "s-type string", /* used by error messages involving this token */ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ - "@PS", /* Lout command for formatting strings */ + "@PS", /* Lout command for formatting strings */ "", /* no alternate command */ "", /* no following command */ FALSE, /* token allowed anywhere, not just start of line */ { "s", "y", "tr" }, /* s-type strings begin with these */ - { "/", "(", "[", "{", "<", "!", "%", "#", "|", ",", ":", ";" }, /*start2 */ - { "/", ")", "]", "}", ">", "!", "%", "#", "|", ",", ":", ";" }, /* end2 */ + { SepPunct }, /* start2 can be any punctuation character */ + { BktPunct }, /* bracketing delimiters to match SepPunct */ + { EndPunct }, /* end2 must match start2 */ AllCharacters, /* inside, any character at all is OK */ "\\", /* within strings, \\ is the escape character */ AllCharacters, /* after escape char, any character at all is OK */ "", /* strings do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* will be using bracket2 for bracket delimiter */ "", /* will be using end2 for the end delimiter here */ FALSE, /* end delimiter does not have to be at line start */ TRUE, /* need to see end delimiter twice to stop */ @@ -980,16 +1103,17 @@ TOKEN PerlSTypeStringToken = { /* */ /* Perl "bare" regular expressions */ /* */ -/* By a bare regular expression, we mean one that is not preceded */ -/* by q, qq, m etc. These are distinguished from division by being */ -/* preceded by one of =, =~, !~, split, if, not, unless, for, foreach, */ -/* or while, with up to two white space characters intervening. */ +/* By a bare regular expression, we mean one that is not preceded by m. */ +/* These are distinguished from division by being preceded by one of (, =, */ +/* =~, !~, split, if, and, &&, not, ||, xor, not, !, unless, for, foreach, */ +/* or while, with up to two white space characters intervening. Also, */ +/* a / at the start of a line is taken to begin a regular expression. */ /* */ /*****************************************************************************/ #define PerlREToken(start, com) \ { \ - "regexp", /* used by error messages */ \ + "regex", /* used by error messages */ \ PRINT_NODELIMS_QUOTED,/* no delims since we supply them */ \ com, /* the command */ \ "", /* no alternate command */ \ @@ -997,16 +1121,19 @@ TOKEN PerlSTypeStringToken = { FALSE, /* token allowed not just start of line */ \ { start }, /* preceding token in this case */ \ { "/", " /", "\t/", " /", " \t/", "\t /", "\t\t/" }, /* start2 */ \ - { "/", "/", "/", "/", "/", "/", "/" }, /* end2 */ \ + { "", "", "", "", "", "", "" }, /* bracket2 */ \ + { "/", "/", "/", "/", "/", "/", "/" }, /* end2 */ \ AllCharacters, /* any character OK inside */ \ "\\", /* \\ is the escape character */ \ AllCharacters, /* after escape char, any is OK */ \ "", /* no inner escapes */ \ "", /* no end innner escape either */ \ + "", /* will be using bracket2 here */ \ "", /* will be using end2 here */ \ FALSE, /* no need to end at line start */ \ FALSE, /* don't want end delimiter twice */ \ } + TOKEN PerlRegExpLPar = PerlREToken("(", "@PO{\"(\"} @PS{\"/\"}@PS"); TOKEN PerlRegExpEq = PerlREToken("=", "@PO{\"=\"} @PS{\"/\"}@PS"); @@ -1014,7 +1141,13 @@ TOKEN PerlRegExpMatch = PerlREToken("=~", "@PO{\"=~\"} @PS{\"/\"}@PS"); TOKEN PerlRegExpNoMatch = PerlREToken("!~", "@PO{\"!~\"} @PS{\"/\"}@PS"); TOKEN PerlRegExpSplit = PerlREToken("split", "@PK{split} @PS{\"/\"}@PS"); TOKEN PerlRegExpIf = PerlREToken("if", "@PK{if} @PS{\"/\"}@PS"); +TOKEN PerlRegExpAnd = PerlREToken("and", "@PK{and} @PS{\"/\"}@PS"); +TOKEN PerlRegExpAnd2 = PerlREToken("&&", "@PO{\"&&\"} @PS{\"/\"}@PS"); +TOKEN PerlRegExpOr = PerlREToken("or", "@PK{or} @PS{\"/\"}@PS"); +TOKEN PerlRegExpOr2 = PerlREToken("||", "@PO{\"||\"} @PS{\"/\"}@PS"); +TOKEN PerlRegExpXor = PerlREToken("xor", "@PK{xor} @PS{\"/\"}@PS"); TOKEN PerlRegExpNot = PerlREToken("not", "@PK{not} @PS{\"/\"}@PS"); +TOKEN PerlRegExpNot2 = PerlREToken("!", "@PO{\"!\"} @PS{\"/\"}@PS"); TOKEN PerlRegExpUnless = PerlREToken("unless", "@PK{unless} @PS{\"/\"}@PS"); TOKEN PerlRegExpFor = PerlREToken("for", "@PK{for} @PS{\"/\"}@PS"); TOKEN PerlRegExpForEach = PerlREToken("foreach","@PK{foreach} @PS{\"/\"}@PS"); @@ -1022,7 +1155,7 @@ TOKEN PerlRegExpWhile = PerlREToken("while", "@PK{while} @PS{\"/\"}@PS"); TOKEN PerlRegExpStartLineToken = { - "regexp", /* used by error messages */ + "regex", /* used by error messages */ PRINT_WHOLE_QUOTED, /* we can print the whole thing this time */ "@PS", /* the command */ "", /* no alternate command */ @@ -1030,12 +1163,14 @@ TOKEN PerlRegExpStartLineToken = TRUE, /* token allowed only at start of line */ { "/" }, /* starting delimiter (so easy!) */ { NULL }, /* no start2 */ - { NULL }, /* no end2 */ + { NULL }, /* so no bracket2 either */ + { NULL }, /* so no end2 either */ AllCharacters, /* any character OK inside */ "\\", /* \\ is the escape character */ AllCharacters, /* after escape char, any is OK */ "", /* no inner escapes */ "", /* no end innner escape either */ + "", /* no bracketing delimiter */ "/", /* ending delimiter */ FALSE, /* no need to end at line start */ FALSE, /* don't want end delimiter twice */ @@ -1044,7 +1179,7 @@ TOKEN PerlRegExpStartLineToken = /*****************************************************************************/ /* */ -/* Perl's here-documents */ +/* Perl's here-documents [OBSOLETE CODE - see following for replacement] */ /* */ /* At present the only terminating strings recognized are EOT, EOF, END, */ /* and the empty string. These may all be quoted in the usual ways. */ @@ -1057,14 +1192,16 @@ TOKEN PerlRegExpStartLineToken = PRINT_NODELIMS_QUOTED,/* no delims since we supply them */ \ startcom, /* the command */ \ "", /* no alternate command */ \ - endcom, /* following command (final /) */ \ + endcom, /* following command */ \ FALSE, /* token allowed not just start of line */ \ { startstr }, /* starting delimiter */ \ { NULL }, /* no start2 */ \ + { NULL }, /* so no bracket2 either */ \ { NULL }, /* no end2 */ \ AllCharacters, /* any character OK inside */ \ "", "", /* no escape character */ \ "", "", /* no inner escapes */ \ + "", /* no bracketing delimiter */ \ endstr, /* token ends with this */ \ TRUE, /* must be found at line start */ \ FALSE, /* don't want end delimiter twice */ \ @@ -1075,6 +1212,7 @@ TOKEN PerlRegExpStartLineToken = #define sEND "\n@PS{\"END\"}\n" #define sBLA "\n@PS{\"\"}\n" +/* *** TOKEN HereEOTuq = X("<<EOT", "EOT\n", "@PO{<<}@PS{\"EOT\"}@PS", sEOT); TOKEN HereEOTdq = X("<<\"EOT\"","EOT\n", "@PO{<<}@PS{\"\\\"EOT\\\"\"}@PS",sEOT); TOKEN HereEOTfq = X("<<'EOT'", "EOT\n", "@PO{<<}@PS{\"'EOT'\"}@PS", sEOT); @@ -1094,6 +1232,58 @@ TOKEN HereBLAuq = X("<< ", "\n", "@PO{<<} @PS", sBLA); TOKEN HereBLAdq = X("<<\"\"", "\n", "@PO{<<}@PS{\"\\\"\\\"\"}@PS", sBLA); TOKEN HereBLAfq = X("<<''", "\n", "@PO{<<}@PS{\"''\"}@PS", sBLA); TOKEN HereBLAbq = X("<<``", "\n", "@PO{<<}@PS{\"``\"}@PS", sBLA); +*** */ + + +/*****************************************************************************/ +/* */ +/* Perl's here-documents [OBSOLETE CODE - see following for replacement] */ +/* */ +/* At present the only terminating strings recognized are EOT, EOF, END, */ +/* and the empty string. These may all be quoted in the usual ways. */ +/* */ +/*****************************************************************************/ + +#define HereToken(startstr, endstr) \ +{ \ + "here-document", /* used by error messages */ \ + PRINT_WHOLE_QUOTED, /* as usual */ \ + "@PS", /* here documents are strings */ \ + "", /* no alternate command */ \ + "", /* no following command */ \ + FALSE, /* token allowed not just start of line */ \ + { startstr }, /* starting delimiter */ \ + { NULL }, /* no start2 */ \ + { NULL }, /* no bracket2 */ \ + { NULL }, /* no end2 */ \ + AllCharacters, /* any character OK inside */ \ + "", "", /* no escape character */ \ + "", "", /* no inner escapes */ \ + "", /* no bracketing delimiter */ \ + endstr, /* token ends with this */ \ + TRUE, /* must be found at line start */ \ + FALSE, /* don't want end delimiter twice */ \ +} + +TOKEN HereEOTuq = HereToken("<<EOT", "EOT\n"); +TOKEN HereEOTdq = HereToken("<<\"EOT\"","EOT\n"); +TOKEN HereEOTfq = HereToken("<<'EOT'", "EOT\n"); +TOKEN HereEOTbq = HereToken("<<`EOT`", "EOT\n"); + +TOKEN HereEOFuq = HereToken("<<EOF", "EOF\n"); +TOKEN HereEOFdq = HereToken("<<\"EOF\"","EOF\n"); +TOKEN HereEOFfq = HereToken("<<'EOF'", "EOF\n"); +TOKEN HereEOFbq = HereToken("<<`EOF`", "EOF\n"); + +TOKEN HereENDuq = HereToken("<<END", "END\n"); +TOKEN HereENDdq = HereToken("<<\"END\"","END\n"); +TOKEN HereENDfq = HereToken("<<'END'", "END\n"); +TOKEN HereENDbq = HereToken("<<`END`", "END\n"); + +TOKEN HereBLAuq = HereToken("<< ", "\n"); +TOKEN HereBLAdq = HereToken("<<\"\"", "\n"); +TOKEN HereBLAfq = HereToken("<<''", "\n"); +TOKEN HereBLAbq = HereToken("<<``", "\n"); /*****************************************************************************/ @@ -1109,52 +1299,66 @@ TOKEN HereBLAbq = X("<<``", "\n", "@PO{<<}@PS{\"``\"}@PS", sBLA); /* */ /* In addition we have to consider that variable names may be preceded */ /* by $, @, %, &, or *. Whether these are part of the variable or not is */ -/* rather doubtful. We will treat $ and @ as part of the variable and the */ -/* others not (since they occur elsewhere in the token list anyway); plus */ -/* we have a separate token type for identifiers beginning with $ and */ +/* rather doubtful. We will treat $, @ and % as part of the variable and */ +/* the others not (since they occur elsewhere in the token list anyway); */ +/* plus we have a separate token type for identifiers beginning with $ and */ /* followed by one character, which we will list explicitly. */ +/* We also deal with the $^. variables, e.g. $^W. */ /* */ /*****************************************************************************/ TOKEN PerlIdentifierToken = { "identifier", /* used by error messages involving this token */ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ - "@PI", /* Lout command for formatting identifiers */ + "@PI", /* Lout command for formatting identifiers */ "@PK", /* Alternate command (for keywords) */ "", /* no following command */ FALSE, /* token allowed anywhere, not just start of line */ - {SepLetters,"_","$","@"}, /* identifiers begin with these */ + { SepLetters, "_", "$", "@", PercentLetters}, /* identifier starts */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ Letter_Digit, /* inside, letters, underscores, digits are OK */ "", /* no escape character within identifiers */ "", /* so nothing legal after escape char either */ "", /* identifiers do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "", /* identifiers do not end with a delimiter */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ }; + TOKEN PerlSpecialIdentifierToken = { - "special identifier", /* used by error messages involving this token */ + "special variable", /* used by error messages involving this token */ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ - "@PI", /* Lout command for formatting identifiers */ - "@PK", /* Alternate command (for keywords) */ + "@PI", /* Lout command for formatting identifiers */ + "", /* Alternate command (for keywords) */ "", /* no following command */ FALSE, /* token allowed anywhere, not just start of line */ - { "$_", "$.", "$/", "$,", "$\\", "$\"", "$;", "$^L", "$:", - "$^A", "$#", "$?", "$!", "$@", "$$", "$<", "$>", "$(", - "$)", "$0", "$[", "$]", "$^D", "$^E", "$^F", "$^H", "$^I", - "$^M", "$^O", "$^P", "$^T", "$^W", "$^X" - }, /* all possible values of special variables */ + { + /* Taken from 5.6.0's perlvar.pod */ + /* NB special variables that begin $^, e.g. $^D can also be written as */ + /* dollar control D (yes literal control D) -- but we ignore this */ + /* wrinkle. We only list the first 9 regex match variables. */ + + /* Only the ones not recognized elsewhere are being kept now */ + "$&", "$`", "$'", "$+", "@+", "$*", "$.", "$/", "$|", "$,", "$\\", "$\"", + "$;", "$#", "$%", "$=", "$-", "@-", "$~", "$^", "$:", "$^L", "$^A", "$?", + "$!", "$^E", "$@", "$$", "$<", "$>", "$(", "$)", "$0", "$[", "$]", "$^C", + "$^D", "$^F", "$^H", "%^H", "$^I", "$^M", "$^O", "$^P", "$^R", "$^S", + "$^T", "$^V", "$^W", "${^WARNING_BITS}", "${^WIDE_SYSTEM_CALLS}", "$^X", + }, { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ "", /* nothing allowed inside, since ends after start */ "", /* no escape character within identifiers */ "", /* so nothing legal after escape char either */ "", /* identifiers do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "", /* identifiers do not end with a delimiter */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1175,24 +1379,28 @@ TOKEN PerlSpecialIdentifierToken = { /* 4_294_967_296 # underline for legibility */ /* */ /* Implementation is straightforward; hexadecimal is a separate token. */ +/* Binary numbers introduced with 5.6.0 of the form 0b1010 are also */ +/* catered for. */ /* */ /*****************************************************************************/ TOKEN PerlLiteralNumberToken = { "number", /* used by error messages involving this token */ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ - "@PN", /* Lout command for formatting numbers */ + "@PN", /* Lout command for formatting numbers */ "", /* no alternate command */ "", /* no following command */ FALSE, /* token allowed anywhere, not just start of line */ { SepDigits }, /* numbers must begin with a digit */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ "0123456789.eE_", /* inside, digits, point, exponent, underscore */ "", /* no escape character within numbers */ "", /* so nothing legal after escape char either */ "", /* numbers do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "", /* numbers do not end with a delimiter */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1201,18 +1409,43 @@ TOKEN PerlLiteralNumberToken = { TOKEN PerlHexNumberToken = { "number", /* used by error messages involving this token */ PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ - "@PN", /* Lout command for formatting numbers */ + "@PN", /* Lout command for formatting numbers */ "", /* no alternate command */ "", /* no following command */ FALSE, /* token allowed anywhere, not just start of line */ { "0x" }, /* hex numbers must begin with 0x */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ "0123456789AaBbCcDdEeFf", /* inside, hexadecimal digits */ "", /* no escape character within numbers */ "", /* so nothing legal after escape char either */ "", /* numbers do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ + "", /* numbers do not end with a delimiter */ + FALSE, /* end delimiter does not have to be at line start */ + FALSE, /* don't need to see end delimiter twice to stop */ +}; + + +TOKEN PerlBinaryNumberToken = { + "number", /* used by error messages involving this token */ + PRINT_WHOLE_QUOTED, /* print this token in quotes etc. as usual */ + "@PN", /* Lout command for formatting numbers */ + "", /* no alternate command */ + "", /* no following command */ + FALSE, /* token allowed anywhere, not just start of line */ + { "0b" }, /* binary numbers must begin with 0b */ + { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ + { NULL }, /* so no end2 either */ + "01", /* inside, binary digits */ + "", /* no escape character within numbers */ + "", /* so nothing legal after escape char either */ + "", /* numbers do not permit "inner escapes" */ + "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "", /* numbers do not end with a delimiter */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1238,12 +1471,14 @@ TOKEN PerlCommentToken = { FALSE, /* token allowed anywhere, not just start of line */ { "#" }, /* comments begin with this character */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ - AllPrintable, /* inside, any printable char is OK (not NL) */ + AllPrintablePlusTab, /* inside, any printable char is OK (not NL) */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* C comments do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "", /* no end delimiter (end of line will end it) */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1258,12 +1493,14 @@ TOKEN PerlCommentEscapeToken = { FALSE, /* token allowed anywhere, not just start of line */ { "#@" }, /* comments begin with this character pair */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ - AllPrintable, /* inside, any printable char is OK */ + AllPrintablePlusTab, /* inside, any printable char is OK */ "", /* no escape character within comments */ "", /* so nothing legal after escape char either */ "", /* no "inner escape" in escape comments */ "", /* so no end of "inner escape" either */ + "", /* no bracketing delimiter */ "", /* no end delimiter (end of line will end it) */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1280,7 +1517,7 @@ TOKEN PerlCommentEscapeToken = { /* */ /* "A line beginning with = is assumed to introduce some documentation, */ /* which continues until another line is reached beginning with =cut" */ -/* (WCS page36). Strictly speaking this is only valid at points where */ +/* (WCS page 36). Strictly speaking this is only valid at points where */ /* a statement would be legal, but that is beyond prg2lout to implement. */ /* */ /*****************************************************************************/ @@ -1294,17 +1531,19 @@ TOKEN PerlPodToken = { TRUE, /* token allowed at start of line only */ { "=", "=pod" }, /* pod insert begins with either of these */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ AllCharacters, /* inside, any character at all is OK */ "", /* no escape character within pod comments */ "", /* so nothing legal after escape char either */ "", /* pod comments do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "=cut", /* pod comments end with this string */ TRUE, /* end delimiter must be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ }; - + /*****************************************************************************/ /* */ @@ -1328,33 +1567,62 @@ TOKEN PerlAndToken = FixedToken( "&&", "@PO" ) ; TOKEN PerlOrToken = FixedToken( "||", "@PO" ) ; TOKEN PerlRange2Token = FixedToken( "..", "@PO" ) ; TOKEN PerlRange3Token = FixedToken( "...", "@PO" ) ; -TOKEN PerlFileTestrToken = FixedToken( "-r", "@PO" ) ; -TOKEN PerlFileTestwToken = FixedToken( "-w", "@PO" ) ; -TOKEN PerlFileTestxToken = FixedToken( "-x", "@PO" ) ; -TOKEN PerlFileTestoToken = FixedToken( "-o", "@PO" ) ; -TOKEN PerlFileTestRToken = FixedToken( "-R", "@PO" ) ; -TOKEN PerlFileTestWToken = FixedToken( "-W", "@PO" ) ; -TOKEN PerlFileTestXToken = FixedToken( "-X", "@PO" ) ; -TOKEN PerlFileTestOToken = FixedToken( "-O", "@PO" ) ; -TOKEN PerlFileTesteToken = FixedToken( "-e", "@PO" ) ; -TOKEN PerlFileTestzToken = FixedToken( "-z", "@PO" ) ; -TOKEN PerlFileTestsToken = FixedToken( "-s", "@PO" ) ; -TOKEN PerlFileTestfToken = FixedToken( "-f", "@PO" ) ; -TOKEN PerlFileTestdToken = FixedToken( "-d", "@PO" ) ; -TOKEN PerlFileTestlToken = FixedToken( "-l", "@PO" ) ; -TOKEN PerlFileTestpToken = FixedToken( "-p", "@PO" ) ; -TOKEN PerlFileTestSToken = FixedToken( "-S", "@PO" ) ; -TOKEN PerlFileTestbToken = FixedToken( "-b", "@PO" ) ; -TOKEN PerlFileTestcToken = FixedToken( "-c", "@PO" ) ; -TOKEN PerlFileTesttToken = FixedToken( "-t", "@PO" ) ; -TOKEN PerlFileTestuToken = FixedToken( "-u", "@PO" ) ; -TOKEN PerlFileTestgToken = FixedToken( "-g", "@PO" ) ; -TOKEN PerlFileTestkToken = FixedToken( "-k", "@PO" ) ; -TOKEN PerlFileTestTToken = FixedToken( "-T", "@PO" ) ; -TOKEN PerlFileTestBToken = FixedToken( "-B", "@PO" ) ; -TOKEN PerlFileTestMToken = FixedToken( "-M", "@PO" ) ; -TOKEN PerlFileTestAToken = FixedToken( "-A", "@PO" ) ; -TOKEN PerlFileTestCToken = FixedToken( "-C", "@PO" ) ; + + +/*****************************************************************************/ +/* */ +/* FlagToken - for -r and the rest (followed by white space) */ +/* */ +/*****************************************************************************/ + +#define FlagToken(str, command) /* define fixed-string token */ \ +{ \ + str, /* name used for debugging only */ \ + PRINT_WHOLE_QUOTED, /* print this token as usual */ \ + command, /* Lout command for formatting this */ \ + "", /* no alternate command */ \ + "", /* no following command */ \ + FALSE, /* token not just start of line */ \ + { str }, /* token begins (and ends!) with this */ \ + { " ", "\t" }, /* plus a white space char */ \ + { "", "" }, /* no bracket2 though */ \ + { "", "" }, /* no end2 though */ \ + "", /* nothing inside, since no inside */ \ + "", "", /* no escape character */ \ + "", "", /* no inner escape; no end inner esc */ \ + "", /* no bracketing delimiter */ \ + "", /* no ending delimiter */ \ + FALSE, /* end not have to be at line start */ \ + FALSE, /* don't end delimiter twice to stop */ \ +} + +TOKEN PerlFileTestrToken = FlagToken( "-r", "@PO" ) ; +TOKEN PerlFileTestwToken = FlagToken( "-w", "@PO" ) ; +TOKEN PerlFileTestxToken = FlagToken( "-x", "@PO" ) ; +TOKEN PerlFileTestoToken = FlagToken( "-o", "@PO" ) ; +TOKEN PerlFileTestRToken = FlagToken( "-R", "@PO" ) ; +TOKEN PerlFileTestWToken = FlagToken( "-W", "@PO" ) ; +TOKEN PerlFileTestXToken = FlagToken( "-X", "@PO" ) ; +TOKEN PerlFileTestOToken = FlagToken( "-O", "@PO" ) ; +TOKEN PerlFileTesteToken = FlagToken( "-e", "@PO" ) ; +TOKEN PerlFileTestzToken = FlagToken( "-z", "@PO" ) ; +TOKEN PerlFileTestsToken = FlagToken( "-s", "@PO" ) ; +TOKEN PerlFileTestfToken = FlagToken( "-f", "@PO" ) ; +TOKEN PerlFileTestdToken = FlagToken( "-d", "@PO" ) ; +TOKEN PerlFileTestlToken = FlagToken( "-l", "@PO" ) ; +TOKEN PerlFileTestpToken = FlagToken( "-p", "@PO" ) ; +TOKEN PerlFileTestSToken = FlagToken( "-S", "@PO" ) ; +TOKEN PerlFileTestbToken = FlagToken( "-b", "@PO" ) ; +TOKEN PerlFileTestcToken = FlagToken( "-c", "@PO" ) ; +TOKEN PerlFileTesttToken = FlagToken( "-t", "@PO" ) ; +TOKEN PerlFileTestuToken = FlagToken( "-u", "@PO" ) ; +TOKEN PerlFileTestgToken = FlagToken( "-g", "@PO" ) ; +TOKEN PerlFileTestkToken = FlagToken( "-k", "@PO" ) ; +TOKEN PerlFileTestTToken = FlagToken( "-T", "@PO" ) ; +TOKEN PerlFileTestBToken = FlagToken( "-B", "@PO" ) ; +TOKEN PerlFileTestMToken = FlagToken( "-M", "@PO" ) ; +TOKEN PerlFileTestAToken = FlagToken( "-A", "@PO" ) ; +TOKEN PerlFileTestCToken = FlagToken( "-C", "@PO" ) ; /*****************************************************************************/ @@ -1379,8 +1647,7 @@ TOKEN PerlFileTestCToken = FixedToken( "-C", "@PO" ) ; /* */ /* By a "paragraph" is meant a sequence of lines down to the next empty */ /* line; but we will handle verbatim paragraphs one line at a time. */ -/* It also seems that an empty line in the input has to become an empty */ -/* line in the output; so we include a token for that. */ +/* Also, an empty line in the input has to become an empty line in output. */ /* */ /*****************************************************************************/ @@ -1388,15 +1655,16 @@ TOKEN PodVerbatimLineToken = { "verbatim-para", /* used by error messages involving this token */ PRINT_WHOLE_QUOTED, /* printing the whole paragraph quoted */ "@PV ", /* Lout command for formatting verbatim line */ - "", /* no alternate command */ - "", /* no following command */ + "", "", /* no alternate command; no following command */ TRUE, /* token allowed at start of line only */ { "\t", " " }, /* command begins with this */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ AllPrintablePlusTab, /* inside, any printable char except newline is OK */ "", "", /* no escape character within verbatim lines */ "", "", /* no "inner escapes" within verbatim lines */ + "", /* no bracketing delimiter */ "", /* ends at end of line */ FALSE, /* don't need to be at start of line to end it */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1406,15 +1674,16 @@ TOKEN PodEmptyLineToken = { "pod-empty-line", /* used by error messages involving this token */ PRINT_COMMAND_ONLY, /* printing just the command */ "@PPG\n", /* Lout command for formatting Pod empty line */ - "", /* no alternate command */ - "", /* no following command */ + "", "", /* no alternate command; no following command */ TRUE, /* token allowed at start of line only */ { "\n" }, /* command begins with this */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ "", /* nothing inside */ "", "", /* no escape character */ "", "", /* no inner escape */ + "", /* no bracketing delimiter */ "", /* token will end with the end of the line */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1452,12 +1721,14 @@ TOKEN PodIgnoreToken = { TRUE, /* token allowed at start of line only */ { "=pod", "=cut" }, /* command begins with this */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ AllCharacters, /* anything at all can be inside */ "", /* no escape character */ "", /* so nothing legal after escape char either */ "", /* cut tokens do not permit "inner escapes" */ "", /* and so there is no end innner escape either */ + "", /* no bracketing delimiter */ "\n", /* token will end with the end of the line */ TRUE, /* end delimiter (\n) has to be at a line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1468,23 +1739,22 @@ TOKEN PodIgnoreToken = { /* */ /* Pod command paragraphs: =head1, =head2 (and =head3, folklore extension) */ /* */ -/* "... produce first and second level heading" */ -/* */ /*****************************************************************************/ TOKEN PodHeading1Token = { "=head1", /* used by error messages involving this token */ - PRINT_NODELIMS_QUOTED, /* print without delimiters */ + PRINT_NODELIMS_INNER, /* print without delimiters, formatting inner */ "@PHA", /* Lout command for formatting Pod heading */ - "", /* no alternate command */ - "", /* following command */ + "", "", /* no alternate command; no following command */ TRUE, /* token allowed at start of line only */ {"=head1","head1"}, /* command begins with this */ { " ", "\t" }, /* helps to skip following white space */ + { "", "" }, /* no bracket2 */ { "\n", "\n" }, /* token ends at end of line */ AllCharacters, /* anything in the heading */ "", "", /* no escape character; nothing legal after escape */ "", "", /* no inner escapes; no end inner escape */ + "", /* no bracketing delimiter */ "\n\n", /* token will end with the first blank line */ FALSE, /* end delimiter (\n) has to be at a line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1492,17 +1762,18 @@ TOKEN PodHeading1Token = { TOKEN PodHeading2Token = { "=head2", /* used by error messages involving this token */ - PRINT_NODELIMS_QUOTED, /* print without delimiters */ + PRINT_NODELIMS_INNER, /* print without delimiters, formatting inner */ "@PHB", /* Lout command for formatting Pod heading */ - "", /* no alternate command */ - "", /* following command */ + "", "", /* no alternate command; no following command */ TRUE, /* token allowed at start of line only */ { "=head2" }, /* command begins with this */ { " ", "\t" }, /* helps to skip following white space */ + { "", "" }, /* no bracket2 */ { "\n", "\n" }, /* token ends at end of line */ AllCharacters, /* anything in the heading */ "", "", /* no escape character; nothing legal after escape */ "", "", /* no inner escapes; no end inner escape */ + "", /* no bracketing delimiter */ "\n\n", /* token will end with the first blank line */ FALSE, /* end delimiter (\n) has to be at a line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1510,17 +1781,18 @@ TOKEN PodHeading2Token = { TOKEN PodHeading3Token = { "=head3", /* used by error messages involving this token */ - PRINT_NODELIMS_QUOTED, /* print without delimiters */ + PRINT_NODELIMS_INNER, /* print without delimiters, formatting inner */ "@PHC", /* Lout command for formatting Pod heading */ - "", /* no alternate command */ - "", /* following command */ + "", "", /* no alternate command; no following command */ TRUE, /* token allowed at start of line only */ { "=head3" }, /* command begins with this */ { " ", "\t" }, /* helps to skip following white space */ + { "", "" }, /* no bracket2 */ { "\n", "\n" }, /* token ends at end of line */ AllCharacters, /* anything in the heading */ "", "", /* no escape character; nothing legal after escape */ "", "", /* no inner escapes; no end inner escape */ + "", /* no bracketing delimiter */ "\n\n", /* token will end with the first blank line */ FALSE, /* end delimiter (\n) has to be at a line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1542,10 +1814,12 @@ TOKEN PodOverToken = { TRUE, /* token allowed at start of line only */ { "=over" }, /* command begins with this */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ AllCharacters, /* inside, any printable char is OK */ "", "", /* no escape character; nothing legal after escape */ "", "", /* no inner escapes; no end inner escape */ + "", /* no bracketing delimiter */ "\n", /* token will end with the end of the line */ TRUE, /* end delimiter (\n) has to be at a line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1553,18 +1827,19 @@ TOKEN PodOverToken = { TOKEN PodItemToken = { "=item", /* used by error messages involving this token */ - PRINT_NODELIMS_QUOTED,/* printing just what follows =item on the line */ - "@Null //}\n@TI {@PLL", /* Lout command for formatting Pod item */ + PRINT_NODELIMS_INNER, /* printing just what follows =item on the line */ + "@Null //}\n@DTI {@PLL", /* Lout command for formatting Pod item */ "", /* no alternate command */ "} {", /* open brace to enclose the item content */ TRUE, /* token allowed at start of line only */ { "=item" }, /* command begins with this */ { " ", "\t" }, /* helps to skip following white space */ + { "", "" }, /* no bracket2 */ { "\n\n", "\n\n" }, /* token will end at blank line */ AllPrintableTabNL, /* any printable inside */ "", "", /* no escape character; nothing legal after escape */ "", "", /* no inner escapes; no end inner escape */ - "", /* see ends2[] */ + "", "", /* see brackets2[]; see ends2[] */ FALSE, /* end delimiter (\n) must already be at start */ FALSE, /* don't need to see end delimiter twice to stop */ }; @@ -1573,15 +1848,16 @@ TOKEN PodBackToken = { "=back", /* used by error messages involving this token */ PRINT_COMMAND_ONLY, /* printing just the command */ "@Null // }\n@EndList\n", /* Lout command for formatting Pod back */ - "", /* no alternate command */ - "", /* no following command */ + "", "", /* no alternate command; no following command */ TRUE, /* token allowed at start of line only */ { "=back" }, /* command begins with this */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ AllCharacters, /* anything inside (in principle) */ "", "", /* no escape character; nothing legal after escape */ "", "", /* no inner escapes; no end inner escape */ + "", /* no bracketing delimiter */ "\n", /* token will end with the next blank line */ TRUE, /* end delimiter (\n) has to be at a line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1590,12 +1866,51 @@ TOKEN PodBackToken = { /*****************************************************************************/ /* */ +/* Pod narrow items; for these, we are confident in using @TI not @DTI */ +/* */ +/*****************************************************************************/ + +#define PodNarrowItemToken(tag, command) \ +{ \ + "=item", /* used by error messages */ \ + PRINT_NODELIMS_INNER, /* printing just what follows =item */ \ + command, /* Lout command for formatting Pod item */ \ + "", /* no alternate command */ \ + "}} {", /* open brace to enclose the item content*/ \ + TRUE, /* token allowed at start of line only */ \ + { "=item", "=item ", "=item\t", /* starts */ \ + "=item ", "=item \t", "=item\t ", "=item\t\t" }, /* */ \ + { tag }, /* the tag we recognize */ \ + { "" }, /* no bracket2 */ \ + { "\n\n", "\n\n" }, /* token will end at blank line */ \ + AllPrintableTabNL, /* any printable inside */ \ + "", "", /* no escape character */ \ + "", "", /* no inner escapes; no end inner escape */ \ + "", "", /* see brackets2[]; see ends2[] */ \ + FALSE, /* end delimiter (\n) already at start */ \ + FALSE, /* don't need to see end delimiter twice */ \ +} + +TOKEN PodItemBullet = PodNarrowItemToken("*", "@Null //}\n@TI {@PLL {*"); +TOKEN PodItem0 = PodNarrowItemToken("0", "@Null //}\n@TI {@PLL {0"); +TOKEN PodItem1 = PodNarrowItemToken("1", "@Null //}\n@TI {@PLL {1"); +TOKEN PodItem2 = PodNarrowItemToken("2", "@Null //}\n@TI {@PLL {2"); +TOKEN PodItem3 = PodNarrowItemToken("3", "@Null //}\n@TI {@PLL {3"); +TOKEN PodItem4 = PodNarrowItemToken("4", "@Null //}\n@TI {@PLL {4"); +TOKEN PodItem5 = PodNarrowItemToken("5", "@Null //}\n@TI {@PLL {5"); +TOKEN PodItem6 = PodNarrowItemToken("6", "@Null //}\n@TI {@PLL {6"); +TOKEN PodItem7 = PodNarrowItemToken("7", "@Null //}\n@TI {@PLL {7"); +TOKEN PodItem8 = PodNarrowItemToken("8", "@Null //}\n@TI {@PLL {8"); +TOKEN PodItem9 = PodNarrowItemToken("9", "@Null //}\n@TI {@PLL {9"); + + +/*****************************************************************************/ +/* */ /* Pod command paragraphs: =for, =begin, =end */ /* */ /* "passed directly to particular formatters. A formatter that can utilize */ -/* that format will use the section, otherwise it will be ignored." */ -/* */ -/* So I've put in a "=begin lout" token, also recognized as "=begin Lout". */ +/* that format will use the section, otherwise it will be ignored." So */ +/* I've put in a "=begin lout" token, also recognized as "=begin Lout". */ /* */ /*****************************************************************************/ @@ -1606,11 +1921,12 @@ TOKEN PodForToken = { "", "", /* no alternate command; no following command */ TRUE, /* token allowed at start of line only */ { "=for" }, /* command begins with this */ - { NULL }, /* no start2 needed */ + { NULL }, { NULL }, /* no start2 needed; so no bracket2 either */ { NULL }, /* so no end2 either */ AllCharacters, /* anything inside */ "", "", /* no escape character; nothing legal after escape */ "", "", /* no inner escapes; no end inner escape */ + "", /* no bracketing delimiter */ "\n", /* token will end with the end of the line */ TRUE, /* end delimiter (\n) has to be at a line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1623,11 +1939,12 @@ TOKEN PodBeginToken = { "", "", /* no alternate command; no following command */ TRUE, /* token allowed at start of line only */ { "=begin" }, /* command begins with this */ - { NULL }, /* no start2 needed */ + { NULL }, { NULL }, /* no start2 needed; so no bracket2 either */ { NULL }, /* so no end2 either */ AllCharacters, /* anything inside */ "", "", /* no escape character; nothing legal after escape */ "", "", /* no inner escapes; no end inner escape */ + "", /* no bracketing delimiter */ "=end", /* token will end with =end character */ TRUE, /* end delimiter has to be at a line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1640,11 +1957,12 @@ TOKEN PodBeginLoutToken = { "", "", /* no alternate command; no following command */ TRUE, /* token allowed at start of line only */ { "=begin lout", "=begin Lout" }, /* command begins with this */ - { NULL }, /* no start2 needed */ + { NULL }, { NULL }, /* no start2 needed; so no bracket2 either */ { NULL }, /* so no end2 either */ AllCharacters, /* anything inside */ "", "", /* no escape character; nothing legal after escape */ "", "", /* no inner escapes; no end inner escape */ + "", /* no bracketing delimiter */ "=end", /* token will end with =end character */ TRUE, /* end delimiter has to be at a line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1687,10 +2005,12 @@ TOKEN PodBeginLoutToken = { FALSE, /* token not just start of line */ \ { str }, /* token begins with this */ \ { "<", "<< ", "<<< ", "<<<< " }, /* start2 */ \ + { "", "", "", "" }, /* no bracket2 */ \ { ">", " >>", " >>>", " >>>>" }, /* end2 */ \ AllCharacters, /* anything inside (in fact, not used)*/ \ "", "", /* no escape character */ \ "", "", /* no inner escape; no end inner esc */ \ + "", /* will use brackets2 here */ \ "", /* will use end2 here */ \ FALSE, /* end not have to be at line start */ \ FALSE, /* don't end delimiter twice to stop */ \ @@ -1725,10 +2045,12 @@ TOKEN PodCodeToken = RecursiveToken("C", "@PFC"); FALSE, /* token not just start of line */ \ { str }, /* token begins with this */ \ { "<", "<< ", "<<< ", "<<<< " }, /* start2 */ \ + { "", "", "", "" }, /* no bracket2 */ \ { ">", " >>", " >>>", " >>>>" }, /* end2 */ \ AllCharacters, /* anything inside */ \ "", "", /* no escape character */ \ "", "", /* no inner escape; no end inner esc */ \ + "", /* will use brackets2 here */ \ "", /* will use end2 here */ \ FALSE, /* end not have to be at line start */ \ FALSE, /* don't end delimiter twice to stop */ \ @@ -1767,10 +2089,12 @@ TOKEN PodNumCharToken = { FALSE, /* token allowed at start of line only */ { "E<" }, /* command begins with this */ { NULL }, /* no start2 needed */ + { NULL }, /* so no bracket2 either */ { NULL }, /* so no end2 either */ "0123456789", /* digits inside */ "", "", /* no escape character */ "", "", /* no "inner escapes" */ + "", /* no bracketing delimiter */ ">", /* token will end with > character */ FALSE, /* end delimiter does not have to be at line start */ FALSE, /* don't need to see end delimiter twice to stop */ @@ -1786,11 +2110,13 @@ TOKEN PodNumCharToken = { FALSE, /* token not just start of line */ \ { str }, /* token begins with this */ \ { NULL }, /* start2 */ \ + { NULL }, /* bracket2 */ \ { NULL }, /* end2 */ \ "", /* nothing inside */ \ "", "", /* no escape character */ \ "", "", /* no inner escape either */ \ - "", /* no ending delimiter except end2 */ \ + "", /* no bracketing delimiter */ \ + "", /* no ending delimiter */ \ FALSE, /* end not have to be at line start */ \ FALSE, /* don't end delimiter twice to stop */ \ } @@ -1948,12 +2274,12 @@ TOKEN PE99 = PodEscapeToken("E<divide>", "{@Divide}"); #define NO_LANGUAGE ((LANGUAGE *) NULL) typedef struct lang_rec { - char *names[MAX_ARRAY_LENGTH]; + char *names[MAX_NAMES]; char *setup_file; char *lang_sym; int no_match; - TOKEN *tokens[MAX_ARRAY_LENGTH]; - char *keywords[MAX_ARRAY_LENGTH]; + TOKEN *tokens[MAX_TOKENS]; + char *keywords[MAX_KEYWORDS]; } LANGUAGE; @@ -2002,8 +2328,9 @@ LANGUAGE EiffelLanguage = { &ExclamationToken, &EqualToken, &EiffelNotEqualToken, &LeftParenToken, &RightParenToken, &LeftBracketToken, &RightBracketToken, &LeftBraceToken, &RightBraceToken, &AssignToken, &QuestionAssignToken, &PlusToken, - &MinusToken, &DollarToken, &HatToken, &SlashToken, &BackSlashToken, - &LessToken, &GreaterToken, &LessEqualToken, &GreaterEqualToken + &MinusToken, &StarToken, &DollarToken, &HatToken, &SlashToken, + &BackSlashToken, &LessToken, &GreaterToken, &LessEqualToken, + &GreaterEqualToken }, { "alias", "all", "and", "as", "check", "class", "creation", "debug", "deferred", "do", "else", "elseif", "end", "ensure", "expanded", "export", @@ -2043,6 +2370,9 @@ LANGUAGE BlueLanguage = { /* */ /* Perl and Pod */ /* */ +/* We list here all keywords, special variables, predefined filehandles, */ +/* and any other identifier that is "built-in". */ +/* */ /*****************************************************************************/ LANGUAGE PerlLanguage = { @@ -2053,15 +2383,16 @@ LANGUAGE PerlLanguage = { &PerlSingleQuoteStringToken, &PerlDoubleQuoteStringToken, &PerlBackQuoteStringToken, &PerlQTypeStringToken, &PerlSTypeStringToken, &PerlRegExpLPar, &PerlRegExpEq, &PerlRegExpMatch, &PerlRegExpNoMatch, - &PerlRegExpSplit, &PerlRegExpIf, &PerlRegExpNot, &PerlRegExpUnless, - &PerlRegExpFor, &PerlRegExpForEach, &PerlRegExpWhile, - &PerlRegExpStartLineToken, + &PerlRegExpSplit, &PerlRegExpIf, &PerlRegExpAnd, &PerlRegExpAnd2, + &PerlRegExpOr, &PerlRegExpOr2, &PerlRegExpXor, &PerlRegExpNot, + &PerlRegExpNot2, &PerlRegExpUnless, &PerlRegExpFor, &PerlRegExpForEach, + &PerlRegExpWhile, &PerlRegExpStartLineToken, &HereEOTuq, &HereEOTdq, &HereEOTfq, &HereEOTbq, &HereEOFuq, &HereEOFdq, &HereEOFfq, &HereEOFbq, &HereENDuq, &HereENDdq, &HereENDfq, &HereENDbq, &HereBLAuq, &HereBLAdq, &HereBLAfq, &HereBLAbq, &PerlIdentifierToken, &PerlSpecialIdentifierToken, - &PerlLiteralNumberToken, &PerlHexNumberToken, + &PerlLiteralNumberToken, &PerlHexNumberToken, &PerlBinaryNumberToken, &PerlCommentToken, &PerlCommentEscapeToken, &PerlPodToken, &ExclamationToken, &PercentToken, &HatToken, &AmpersandToken, &StarToken, &SlashToken, &ArrowToken, &BackSlashToken, @@ -2087,12 +2418,13 @@ LANGUAGE PerlLanguage = { }, { - /* Built-ins taken from Programming Perl 2nd Ed. */ + /* Built-ins taken from WCS and on-line documentation for 5.6.0 */ + /* dbmopen and dbmclose are not included because they are obsolete. */ "abs", "accept", "alarm", "atan2", "bind", "binmode", "bless", - "caller", "chdir", "chmod", "chomp", "chop", "chown", "chr", "chroot", + "caller", "can", "chdir", "chmod", "chomp", "chop", "chown", "chr", "chroot", "close", "closedir", "connect", "continue", "cos", "crypt", - "dbmclose", "dbmopen", "defined", "delete", "die", "do", "dump", + "defined", "delete", "die", "do", "dump", "each", "endgrent", "endhostent", "endnetent", "endprotoent", "endpwent", "endservent", "eof", "eval", "exec", "exists", "exit", "exp", @@ -2105,19 +2437,19 @@ LANGUAGE PerlLanguage = { "getservent", "getsockname", "getsockopt", "glob", "gmtime", "goto", "grep", "hex", - "import", "index", "int", "ioctl", + "import", "index", "int", "ioctl", "isa", "join", "keys", "kill", "last", "lc", "lcfirst", "length", "link", "listen", "local", - "localtime", "log", "lstat", + "localtime", "lock", "log", "lstat", "map", "mkdir", "msgctl", "msgget", "msgrcv", "msgsnd", "my", "next", "no", - "oct", "open", "opendir", "ord", - "pack", "package", "pipe", "pop", "pos", "print", "printf", "push", + "oct", "open", "opendir", "ord", "our", + "pack", "package", "pipe", "pop", "pos", "print", "printf", "prototype", "push", "quotemeta", - "rand", "read", "readdir", "readlink", "recv", "redo", "ref", - "rename", "require", "reset", "return", "reverse", "rewinddir", - "rindex", "rmdir", + "rand", "read", "readdir", "readline", "readlink", "readpipe", "recv", + "redo", "ref", "rename", "require", "reset", "return", "reverse", + "rewinddir", "rindex", "rmdir", "scalar", "seek", "seekdir", "select", "semctl", "semget", "semop", "send", "setgrent", "sethostent", "setnetent", "setpgrp", "setpriority", "setprotoent", "setpwent", "setservent", @@ -2127,87 +2459,56 @@ LANGUAGE PerlLanguage = { "sub", "substr", "symlink", "syscall", "sysopen", "sysread", "sysseek", "system", "syswrite", "tell", "telldir", "tie", "tied", "time", "times", "truncate", + "unimport", "uc", "ucfirst", "umask", "undef", "unlink", "unpack", "unshift", "untie", "use", "utime", - "values", "vec", + "values", "vec", "VERSION", "wait", "waitpid", "wantarray", "warn", "write", - /* from later versions of Perl (excluding experimental thread stuff) */ - "our", "prototype", "readline", "readpipe", - - /* operators */ + /* Comparison operators */ "lt", "gt", "eq", "ne", "cmp", "le", "ge", - /* Perl special variables */ - /* NB special variables that begin $^, e.g. $^D can also be written as */ - /* dollar control D (yes literal control D) -- but suggest we ignore */ - "$_", "$ARG", - "$.", "$INPUT_LINE_SEPARATOR", "$NR", - "$/", "$INPUT_RECORD_SEPARATOR", "$RS", - "$,", "$OUTPUT_FIELD_SEPARATOR", "$OFS", - "$\\", "$OUTPUT_RECORD_SEPARATOR", "$ORS", - "$\"", "$LIST_SEPARATOR", - "$;", "$SUBSCRIPT_SEPARATOR", "$SUBSEP", - "$^L", "$FORMAT_FORMFEED", - "$:", "$FORMAT_LINE_BREAK_CHARACTERS", - "$^A", "$ACCUMULATOR", - "$#", "$OFMT", - "$?", "$CHILD_ERROR", - "$!", "$OS_ERROR", "$ERRNO", - "$@", "$EVAL_ERROR", - "$$", "$PROCESS_ID", "$PID", - "$<", "$REAL_USER_ID", "$UID", - "$>", "$EFFECTIVE_USER_ID", "$EUID", - "$(", "$REAL_GROUP_ID", "$GID", - "$)", "$EFFECTIVE_GROUP_ID", "$EGID", - "$0", /* dollar zero */ "$PROGRAM_NAME", - "$[", /* deprecated */ - "$]", "$PERL_VERSION", - "$^D", "$DEBUGGING", - "$^E", "$EXTENDED_OS_ERROR", - "$^F", "$SYSTEM_FD_MAX", - "$^H", - "$^I", "$INPLACE_EDIT", - "$^M", - "$^O", "$OSNAME", - "$^P", "$PERLDB", - "$^T", "$BASETIME", - "$^W", "$WARNING", - "$^X", "$EXECUTABLE_NAME", - "$ARGV", - "@ARGV", "@INC", "@F", "%INC", "%ENV", "%SIG", - "ARGV", "STDERR", "STDIN", "STDOUT", "DATA" - "$&", "$MATCH", - "$`", "$PREMATCH", - "$'", "$POSTMATCH", - "$+", "$LAST_PAREN_MATCH", - "$|", "$OUTPUT_AUTOFLUSH", - "$%", "$FORMAT_PAGE_NUMBER", - "$=", "$FORMAT_LINES_PER_PAGE", - "$-", "$FORMAT_LINES_LEFT", - "$~", "$FORMAT_NAME", - "$^", "$FORMAT_TOP_NAME", - - /* Perl special constants */ - "__END__", "__FILE__", "__LINE__", "__PACKAGE__", - - /* Perl regex variables - first 9 only */ - "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", - - /* Perl pragmas */ - "attrs", "autouse", "base", "blib", "constant", "diagnostics", "fields", - "integer", "less", "lib", "locale", "ops", "overload", "re", "sigtrap", - "strict", "subs", "vars", - - /* Perl textual operators */ + /* Special markers & constants */ + "__DATA__", "__END__", "__FILE__", "__LINE__", "__PACKAGE__", + + /* Predefined filehandles */ + "ARGV", "ARGVOUT", "STDERR", "STDIN", "STDOUT", "DATA" + + /* Pragmas */ + "attributes", "autouse", "base", "blib", "bytes", + "constant", "charnames", "diagnostics", "fields", "filetest", + "integer", "less", "lib", "locale", + /* "open", Not listed here since its also a function */ + "ops", "overload", "re", "sigtrap", "strict", "subs", "utf8", + "vars", "warnings", + + /* Low-precedence logical operators */ "and", "or", "xor", "not", - /* the x keyword */ + /* The x keyword */ "x", - /* Perl control structures */ + /* Control structures */ "if", "elsif", /* yes one e */ "else", "unless", "while", "for", "foreach", "continue", "until", + + /* Special subroutines */ + "AUTOLOAD", "BEGIN", "CHECK", "END", "DESTROY", "INIT", + + /* Predefined classes & namespaces */ + "CORE", "GLOBAL", "UNIVERSAL", "SUPER", + + /* Tie predefined subroutines */ + "TIESCALAR", + "FETCH", "STORE", + "TIEARRAY", + "FETCHSIZE", "STORESIZE", "EXISTS", "DELETE", + "CLEAR", "PUSH", "POP", "SHIFT", "UNSHIFT", "SPLICE", "EXTEND", + "TIEHASH", + "FIRSTKEY", "NEXTKEY" + "TIEHANDLE", + "PRINT", "PRINTF", "WRITE", "READLINE", "GETC", "READ", "CLOSE", + "BINMODE", "OPEN", "EOF", "FILENO", "SEEK", "TELL", } }; @@ -2220,6 +2521,8 @@ LANGUAGE PodLanguage = { &PodVerbatimLineToken, &PodEmptyLineToken, &PodIgnoreToken, &PodHeading1Token, &PodHeading2Token, &PodOverToken, &PodItemToken, &PodBackToken, + &PodItemBullet, &PodItem0, &PodItem1, &PodItem2, &PodItem3, + &PodItem4, &PodItem5, &PodItem6, &PodItem7, &PodItem8, &PodItem9, &PodForToken, &PodBeginToken, &PodBeginLoutToken, &PodItalicToken, &PodBoldToken, &PodCodeToken, &PodFileToken, &PodNoBreakToken, &PodLinkToken, &PodIndexToken, &PodZeroToken, @@ -2249,9 +2552,9 @@ LANGUAGE PodLanguage = { /*****************************************************************************/ LANGUAGE *languages[] = { + & BlueLanguage, & CLanguage, & EiffelLanguage, - & BlueLanguage, & PerlLanguage, & PodLanguage, }; @@ -2303,11 +2606,14 @@ static char *tabin_option; /* value of -t option, else null */ static char *tabout_option; /* value of -T option, else null */ static char *setup_option; /* value of -S option, else null */ static char *language_option; /* value of -l option, else null */ +static char *numbered_option; /* value of -L option, else null */ static BOOLEAN tab_by_spacing; /* TRUE if using space chars to tab */ static int tab_in; /* tab interval, value of -t option */ static float tab_out; /* tab interval width (-T option) */ static char tab_unit; /* unit of measurement for tab */ +static BOOLEAN print_lines; /* TRUE if we are printing line nums */ +static int print_num; /* current line num for printing */ static FILE *in_fp; /* where input comes from */ static FILE *out_fp; /* where output goes */ @@ -2413,7 +2719,7 @@ void NextChar() /* we need to read in the new line */ line_num++; line_pos = 1; - if( fgets(&curr_line[1], MAX_LINE+2, in_fp) == (char *) NULL ) + if( fgets(&curr_line[1], MAX_LINE-2, in_fp) == (char *) NULL ) curr_line[1] = '\0'; } if( DEBUG_NEXTCHAR ) @@ -2438,7 +2744,7 @@ BOOLEAN InputMatches(char *pattern) if( *p == '\0' ) { /* attempt to read another line of input, since we are off the end */ - if( fgets(p, MAX_LINE+2-(p - curr_line), in_fp) == (char *) NULL ) + if( fgets(p, MAX_LINE-2-(p - curr_line), in_fp) == (char *) NULL ) *p = '\0'; } if( *p != *q ) @@ -2607,6 +2913,45 @@ static char save_value[MAX_LINE]; /* the token text */ static int save_len; /* index of \0 in save_value */ static BOOLEAN save_on = FALSE; /* TRUE when saving */ static LANGUAGE *save_language; /* the current language */ +static int out_linepos = 0; /* output line position */ +static BOOLEAN out_linestart = TRUE; /* TRUE if out line start */ +static BOOLEAN out_formfeed = FALSE; /* TRUE if last was formfeed */ +static int brace_depth; /* brace depth in verbatim */ + +extern void Emit(TOKEN *current_token, char ch); + +/*****************************************************************************/ +/* */ +/* EmitTab(int *out_linepos) */ +/* */ +/* Emit one tab character, keeping track of where we are up to in */ +/* *out_linepos. */ +/* */ +/*****************************************************************************/ + +void EmitTab() +{ + if( tab_by_spacing ) + { putc(' ', out_fp); + out_linepos++; + while( out_linepos % tab_in != 0 ) + { putc(' ', out_fp); + out_linepos++; + } + } + else + { out_linepos++; + while( out_linepos % tab_in != 0 ) out_linepos++; + if( out_linestart ) + { fprintf(out_fp, "$>\"%.1f%c\" {}", tab_out, tab_unit); + /* NB {} is required in case nothing follows on this line */ + } + else + fprintf(out_fp, "$>\"%.1f%ct\" {}", (out_linepos/tab_in)*tab_out, + tab_unit); + } + out_formfeed = FALSE; +} /*****************************************************************************/ @@ -2627,8 +2972,7 @@ static LANGUAGE *save_language; /* the current language */ /*****************************************************************************/ void EmitRaw(char ch) -{ static int out_linepos = 0; /* output line position */ - static BOOLEAN out_linestart = TRUE; /* TRUE if out line start */ +{ if( DEBUG_EMIT ) fprintf(stderr, "EmitRaw(%c); out_linepos %d, out_linestart %s\n", @@ -2637,35 +2981,69 @@ void EmitRaw(char ch) { fprintf(err_fp, "%s internal error (EmitRaw save_on)\n", ErrorHeader()); abort(); } - if( ch == '\t' ) - { if( tab_by_spacing ) - { putc(' ', out_fp); - out_linepos++; - while( out_linepos % tab_in != 0 ) - { putc(' ', out_fp); - out_linepos++; - } - } - else - { out_linepos++; - while( out_linepos % tab_in != 0 ) out_linepos++; - if( out_linestart ) - { fprintf(out_fp, "$>\"%.1f%c\" {}", tab_out, tab_unit); - /* NB {} is required in case nothing follows on this line */ - } - else - fprintf(out_fp, "$>\"%.1f%ct\" {}", (out_linepos/tab_in)*tab_out, tab_unit); - } + + /* drop empty lines following formfeed */ + if( out_formfeed && (ch == '\n' || ch == '\f') ) + { + out_formfeed = (ch == '\f'); + return; } - else if( ch == '\n' ) - { fputc(ch, out_fp); - out_linepos = 0; - out_linestart = TRUE; + + /* emit line number if required */ + if( print_lines && out_linepos == 0 ) + { + char buff[20]; + if( out_formfeed ) print_num--; + sprintf(buff, "%d", print_num++); + fprintf(out_fp, "@PL{\"%s\"}", buff); + out_linepos += strlen(buff); + out_linestart = FALSE; + EmitTab(); } - else - { fputc(ch, out_fp); - out_linepos++; - if( ch != ' ' ) out_linestart = FALSE; + + switch( ch ) + { + case ' ': + + fputc(ch, out_fp); + out_linepos++; + out_formfeed = FALSE; + break; + + + case '\t': + + EmitTab(); + out_formfeed = FALSE; + break; + + + case '\n': + + fputc(ch, out_fp); + out_linepos = 0; + out_linestart = TRUE; + out_formfeed = FALSE; + break; + + + case '\f': + + fputs("\n@NP\n", out_fp); + out_linepos = 0; + out_linestart = TRUE; + out_formfeed = TRUE; + break; + + + default: + + fputc(ch, out_fp); + out_linepos++; + out_linestart = FALSE; + out_formfeed = FALSE; + break; + } if( DEBUG_EMIT ) fprintf(stderr, "EmitRaw(%c) returning; out_linepos %d, out_linestart %s\n", @@ -2692,22 +3070,35 @@ void StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, int len) abort(); } save_language = lang; + + /* emit line number if required */ + if( print_lines && out_linepos == 0 ) + { + char buff[20]; + if( out_formfeed ) print_num--; + sprintf(buff, "%d", print_num++); + fprintf(out_fp, "@PL{\"%s\"}", buff); + out_linepos += strlen(buff); + out_linestart = FALSE; + EmitTab(); + } + switch( current_token->print_style ) { case PRINT_WHOLE_QUOTED: - /* start_delim is to be printed, but must be saved up */ + /* start_delim is to be printed */ save_on = TRUE; - for( i = 0; i < len; i++ ) - save_value[i] = start_delim[i]; - save_len = len; + save_len = 0; save_value[save_len] = '\0'; + for( i = 0; i < len; i++ ) + Emit(current_token, start_delim[i]); break; case PRINT_NODELIMS_QUOTED: - /* easiest to save this, treat like PRINT_WHOLE_QUOTED, but no delims */ + /* like PRINT_WHOLE_QUOTED, but no delims */ save_on = TRUE; save_len = 0; save_value[save_len] = '\0'; @@ -2723,6 +3114,7 @@ void StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, int len) /* print opening delimiter, verbatim */ for( i = 0; i < len; i++ ) putc(start_delim[i], out_fp); + break; @@ -2731,6 +3123,9 @@ void StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, int len) /* command is printed but not delimiter */ if( current_token->command[0] != '\0' ) fprintf(out_fp, "%s{", current_token->command); /*}*/ + + /* record that we are currently inside no braces in the verbatim text */ + brace_depth = 0; break; @@ -2764,38 +3159,37 @@ void StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, int len) /* End emitting the current token. Its ending delimiter was end_delim. */ /* */ /*****************************************************************************/ -#define is_whitespace(ch) ((ch)==' ' || (ch)=='\t' || (ch)=='\n' || (ch)=='\f') +#define at_start_line(s, i) ((i) == 0 || s[(i)-1] == '\n' || s[(i)-1] == '\f' ) void EndEmit(TOKEN *current_token, char *end_delim) -{ char *com; int i; BOOLEAN quoted_now = FALSE; +{ char *com; + int i; + BOOLEAN quoted_now = FALSE; switch( current_token->print_style ) { case PRINT_WHOLE_QUOTED: - strcpy(&save_value[save_len], end_delim); - save_len += strlen(end_delim); + /* first, emit (i.e. save) ending delimiter */ + for( i = 0; end_delim[i] != '\0'; i++ ) + Emit(current_token, end_delim[i]); /* NB NO BREAK */ case PRINT_NODELIMS_QUOTED: /* work out whether we are printing the command or its alternative */ - com = (current_token->alternate_command[0]!='\0' && HashRetrieve(save_value)? + com=(current_token->alternate_command[0]!='\0'&&HashRetrieve(save_value)? current_token->alternate_command : current_token->command); /* print command, opening brace */ - if( com[0] != '\0' ) fprintf(out_fp, "%s{", com); /* } */ - - /* omit trailing white space (will not be significant to Lout anyway) */ - /* *** not doing this now - while( save_len>0 && is_whitespace(save_value[save_len-1]) ) save_len--; - *** */ + if( com[0] != '\0' ) fprintf(out_fp, "%s{", com); /*}*/ /* print the token with appropriate escapes */ save_on = FALSE; for( i = 0; i < save_len; i++ ) switch( save_value[i] ) { + case '@': case '/': case '|': case '&': @@ -2806,7 +3200,10 @@ void EndEmit(TOKEN *current_token, char *end_delim) case '~': case '-': - if( !quoted_now ) { putc('"', out_fp); quoted_now = TRUE; } + if( !quoted_now ) + { putc('"', out_fp); + quoted_now = TRUE; + } EmitRaw(save_value[i]); break; @@ -2814,7 +3211,10 @@ void EndEmit(TOKEN *current_token, char *end_delim) case '"': case '\\': - if( !quoted_now ) { putc('"', out_fp); quoted_now = TRUE; } + if( !quoted_now ) + { putc('"', out_fp); + quoted_now = TRUE; + } putc('\\', out_fp); EmitRaw(save_value[i]); break; @@ -2822,24 +3222,41 @@ void EndEmit(TOKEN *current_token, char *end_delim) case ' ': case '\t': - case '\n': - case '\f': - if( i == 0 ) - { /* make initial white space significant using "" */ + /* make initial white space significant using "" */ + if( !quoted_now && at_start_line(save_value, i) ) + { putc('"', out_fp); quoted_now = TRUE; + out_linestart = FALSE; + } + + /* make sure we aren't in quoted text */ + if( quoted_now ) + { putc('"', out_fp); + quoted_now = FALSE; } - if( quoted_now ) { putc('"', out_fp); quoted_now = FALSE; } + + /* print the character */ EmitRaw(save_value[i]); break; + case '\n': + case '\f': + + /* these characters are not saved */ + fprintf(err_fp, "%s internal error (EndEmit nl/ff)\n", ErrorHeader()); + exit(1); + break; + + default: /* anything else can be quoted or unquoted ad. lib. */ EmitRaw(save_value[i]); break; + } /* print closing quote and closing brace if needed */ if( quoted_now ) putc('"', out_fp); @@ -2859,7 +3276,24 @@ void EndEmit(TOKEN *current_token, char *end_delim) case PRINT_NODELIMS_UNQUOTED: /* print closing brace if required*/ - if( current_token->command[0] != '\0' ) /*{*/ putc('}', out_fp); + if( current_token->command[0] != '\0' ) + { + if( brace_depth > 0 ) + { + if( brace_depth > 1 ) + fprintf(err_fp, "%s: inserted %d closing braces at end of %s\n", + ErrorHeader(), brace_depth, current_token->name); + else + fprintf(err_fp, "%s: inserted one closing brace at end of %s\n", + ErrorHeader(), current_token->name); + while( brace_depth > 0 ) + { + /*{*/ putc('}', out_fp); + brace_depth--; + } + } + /*{*/ putc('}', out_fp); + } break; @@ -2907,14 +3341,44 @@ void Emit(TOKEN *current_token, char ch) { fprintf(err_fp, "%s internal error (EmitChar)\n", ErrorHeader()); abort(); } - save_value[save_len++] = ch; - save_value[save_len] = '\0'; + if( ch == '\n' || ch == '\f' ) + { + /* could save newline too, but uses less memory if print now */ + EndEmit(current_token, ""); + EmitRaw(ch); + StartEmit(save_language, current_token, "", 0); + } + else if( save_len < MAX_LINE - 1 ) + { + save_value[save_len++] = ch; + save_value[save_len] = '\0'; + } + else + { + fprintf(err_fp, "%s internal error (token too long)\n", ErrorHeader()); + exit(1); + } break; case PRINT_WHOLE_UNQUOTED: case PRINT_NODELIMS_UNQUOTED: + /* keep trace of braces, and insert matching braces if required */ + if( ch == '{' ) + brace_depth++; + else if( ch == '}' ) + { + brace_depth--; + if( brace_depth < 0 && current_token->command[0] != '\0' ) + { + fprintf(err_fp, "%s: inserted opening brace within %s\n", + ErrorHeader(), current_token->name); + putc('{', out_fp); /*}*/ + brace_depth++; + } + } + /* verbatim output */ putc(ch, out_fp); break; @@ -2956,6 +3420,7 @@ void EmitProtected(char ch) { switch( ch ) { + case '@': case '/': case '|': case '&': @@ -3029,6 +3494,7 @@ TOKEN *ExpandToken(TOKEN *t, int starts_pos) res->escape_legal = t->escape_legal; res->inner_escape = t->inner_escape; res->end_inner_escape = t->end_inner_escape; + res->bracket_delimiter = t->brackets2[starts_pos]; res->end_delimiter = t->ends2[starts_pos]; res->end_start_line_only = t->end_start_line_only; res->want_two_ends = t->want_two_ends; @@ -3056,30 +3522,21 @@ TRIE StartLineTrie = (TRIE) NULL; /* these allowed at line start only */ void SetupOneToken(TOKEN *t) { int j; - if( DEBUG_SETUP ) - fprintf(stderr, "SetupOneToken starting %s\n", t->starts[0]); + if( DEBUG_SETUP ) fprintf(stderr, "SetupOneToken(%s)\n", t->starts[0]); /* check that any PRINT_NODELIMS_INNER styles have an end delimiter */ if( t->print_style == PRINT_NODELIMS_INNER ) - { - if( t->end_delimiter == NULL || t->end_delimiter[0] == '\0' ) - { - fprintf(err_fp, "%s: token %s is INNER but has no end delimiter\n", + { if( t->end_delimiter == NULL || t->end_delimiter[0] == '\0' ) + { fprintf(err_fp, "%s: token %s is INNER but has no end delimiter\n", t->name, ErrorHeader()); } } /* set up the chtype table for this token */ - if( t->legal == NULL ) - { /* all characters are legal */ - for( j = 0; j < MAX_CHAR; j++ ) - t->chtype[j] = LEGAL; - } - else - { /* the characters in t->legal are legal */ - for( j = 0; t->legal[j] != '\0'; j++ ) - t->chtype[(int) t->legal[j]] = LEGAL; - } + if( t->legal == NULL ) /* all characters are legal in this case */ + for( j = 0; j < MAX_CHAR; j++ ) t->chtype[j] = LEGAL; + else /* the characters in t->legal are legal in this case */ + for( j = 0; t->legal[j] != '\0'; j++ ) t->chtype[(int) t->legal[j]] = LEGAL; if( t->escape[0] != '\0' ) t->chtype[(int) t->escape[0]] = ESCAPE; if( t->inner_escape[0] != '\0' ) @@ -3088,8 +3545,7 @@ void SetupOneToken(TOKEN *t) /* set up the escape_chtype table for this token */ if( t->escape_legal == NULL ) { /* all characters are legal after an escape character */ - for( j = 0; j < MAX_CHAR; j++ ) - t->escape_chtype[j] = LEGAL; + for( j = 0; j < MAX_CHAR; j++ ) t->escape_chtype[j] = LEGAL; } else { /* the characters in t->escape_legal are legal after an escape character */ @@ -3099,8 +3555,7 @@ void SetupOneToken(TOKEN *t) /* load the opening delimiters of this token into the trie */ for( j = 0; t->starts[j] != (char *) NULL; j++ ) - { - if( !TrieInsert(t->start_line_only ? &StartLineTrie:&Trie,t->starts[j],t) ) + { if( !TrieInsert(t->start_line_only ? &StartLineTrie:&Trie,t->starts[j],t) ) { if( *(t->starts[j]) == '\0' ) fprintf(err_fp, "%s: empty starting delimiter\n", ErrorHeader()); else @@ -3109,8 +3564,7 @@ void SetupOneToken(TOKEN *t) } } - if( DEBUG_SETUP ) - fprintf(stderr, "SetupOneToken ending %s\n", t->starts[0]); + if( DEBUG_SETUP ) fprintf(stderr, "SetupOneToken ending %s\n", t->starts[0]); } /* end SetupOneToken */ @@ -3208,7 +3662,26 @@ TOKEN *TokenStartingHere(int *len) return res; } + +/*****************************************************************************/ +/* */ +/* int Matching() */ +/* */ +/* Return the index of the pair that matches the current input. */ +/* */ +/*****************************************************************************/ + +int Matching() +{ int i; + for( i = 0; pairs[i].first != NULL && !InputMatches(pairs[i].first); i++ ); + if( DEBUG_PROCESS ) + fprintf(stderr, "Matching() = %d (\"%s\", \"%s\")\n", i, + pairs[i].first == NULL ? "NULL" : pairs[i].first, + pairs[i].second == NULL ? "NULL" : pairs[i].second); + return i; +} + /*****************************************************************************/ /* */ /* Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter) */ @@ -3221,22 +3694,47 @@ TOKEN *TokenStartingHere(int *len) /*****************************************************************************/ #define START 1 #define IN_TOKEN 2 -#define IN_TOKEN_AFTER_ESCAPE 3 -#define IN_TOKEN_AFTER_INNER_ESCAPE 4 -#define STOP 5 +#define IN_TOKEN_NEEDING_DELIM 3 +#define IN_TOKEN_AFTER_ESCAPE 4 +#define IN_TOKEN_AFTER_INNER_ESCAPE 5 +#define STOP 6 + +char *debug_state(int s) +{ + switch( s ) + { + case START: return "START"; + case IN_TOKEN: return "IN_TOKEN"; + case IN_TOKEN_NEEDING_DELIM: return "IN_TOKEN_NEEDING_DELIM"; + case IN_TOKEN_AFTER_ESCAPE: return "IN_TOKEN_AFTER_ESCAPE"; + case IN_TOKEN_AFTER_INNER_ESCAPE: return "IN_TOKEN_AFTER_INNER_ESCAPE"; + case STOP: return "STOP"; + default: return "?"; + } +} void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter) { TOKEN *current_token; int len, i, state; - BOOLEAN end_delimiter_seen; + int end_delimiter_depth, end_delimiter_count; + char *curr_end_delim, *curr_bracket_delim; if( DEBUG_PROCESS ) fprintf(stderr, "[ Process(%s, -, -, -, -)\n", lang->names[0]); state = START; - end_delimiter_seen = FALSE; while( curr_line[line_pos] != '\0' && state != STOP ) { if( DEBUG_PROCESS ) - fprintf(stderr, " state %d, ch %c\n", state, curr_line[line_pos]); + { + if( state >= IN_TOKEN ) + fprintf(stderr, + " %s, depth %d, count %d, bracket \"%s\", end \"%s\", ch %c\n", + debug_state(state), end_delimiter_depth, end_delimiter_count, + curr_bracket_delim, curr_end_delim, curr_line[line_pos]); + else + fprintf(stderr, " %s, ch %c\n", + debug_state(state), curr_line[line_pos]); + } + switch( state ) { @@ -3272,38 +3770,23 @@ void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter) Process(lang, current_token, current_token->end_delimiter); EndEmit(current_token, ""); } - else state = IN_TOKEN; + else + { + end_delimiter_depth = 1; + end_delimiter_count = current_token->want_two_ends ? 2 : 1; + curr_end_delim = current_token->end_delimiter; + curr_bracket_delim = current_token->bracket_delimiter; + state = IN_TOKEN; + } } /* check whether we have a space */ - else if( curr_line[line_pos] == ' ' ) + else if( is_whitespace(curr_line[line_pos]) ) { - EmitRaw(' '); + EmitRaw(curr_line[line_pos]); NextChar(); } - /* check whether we have a tab character */ - else if( curr_line[line_pos] == '\t' ) - { - EmitRaw('\t'); - NextChar(); - } - - /* check whether we have a newline character */ - else if( curr_line[line_pos] == '\n' ) - { - EmitRaw('\n'); - NextChar(); - } - - /* check whether we have a formfeed character */ - else if( curr_line[line_pos] == '\f' ) - { - fprintf(out_fp, "\n@NP"); - EmitRaw('\n'); - NextChar(); - } - /* check whether we are supposed to echo things that don't match */ else if( lang->no_match == NO_MATCH_PRINT ) { @@ -3319,8 +3802,8 @@ void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter) ErrorHeader(), curr_line[line_pos]); else fprintf(err_fp, "%s: %s (octal %o)\n", - "skipping unexpected unprintable character", - ErrorHeader(), (int) curr_line[line_pos]); + ErrorHeader(), "skipping unexpected unprintable character", + (int) curr_line[line_pos]); NextChar(); } else @@ -3333,79 +3816,162 @@ void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter) case IN_TOKEN: /* within a token; current_token says which kind */ /* check for ending delimiter if there is one */ - if( current_token->end_delimiter[0] != '\0' && + if( curr_end_delim[0] != '\0' && (!current_token->end_start_line_only || line_pos == 1) && - InputMatches(current_token->end_delimiter) ) + InputMatches(curr_end_delim) ) { - if( current_token->want_two_ends && !end_delimiter_seen ) + end_delimiter_depth--; + if( DEBUG_PROCESS ) + fprintf(stderr, " InputMatches(%s) so end_delimiter_depth--\n", + curr_end_delim); + if( end_delimiter_depth > 0 ) { - /* if we have to see the end delimiter twice before stopping, */ - /* and we haven't seen it yet, then emit the char and carry on */ + /* if this end delimiter matches with a bracketing delimiter, */ + /* so is not the end of the token, emit the char and carry on */ Emit(current_token, curr_line[line_pos]); NextChar(); - end_delimiter_seen = TRUE; } else { + end_delimiter_count--; if( DEBUG_PROCESS ) - fprintf(stderr, " InputMatches(%s) so finishing token\n", - current_token->end_delimiter); - len = strlen(current_token->end_delimiter); - for( i = 0; i < len; i++ ) + fprintf(stderr, " InputMatches(%s) so end_delimiter_count--\n", + curr_end_delim); + if( end_delimiter_count == 0 ) + { + /* seen all the end delimiters we need, so token ends */ + len = strlen(curr_end_delim); + for( i = 0; i < len; i++ ) + NextChar(); + EndEmit(current_token, curr_end_delim); + state = START; + } + else + { + /* need more end delimiters yet, so keep scanning */ + Emit(current_token, curr_line[line_pos]); NextChar(); - EndEmit(current_token, current_token->end_delimiter); - state = START; + if( curr_bracket_delim[0] != '\0' ) + state = IN_TOKEN_NEEDING_DELIM; + else + state = IN_TOKEN; + } } } - else switch( current_token->chtype[(int) curr_line[line_pos]] ) + else { + /* check for bracketing delimiter if there is one */ + if( curr_bracket_delim[0] != '\0' && + InputMatches(curr_bracket_delim) ) + { + if( DEBUG_PROCESS ) + fprintf(stderr, " InputMatches(%s) so end_delimiter_depth++\n", + curr_bracket_delim); + end_delimiter_depth++; + } + + /* handle current character as usual */ + switch( current_token->chtype[(int) curr_line[line_pos]] ) + { - case LEGAL: + case LEGAL: - Emit(current_token, curr_line[line_pos]); - NextChar(); - break; + Emit(current_token, curr_line[line_pos]); + NextChar(); + break; - case ESCAPE: + case ESCAPE: - NextChar(); - state = IN_TOKEN_AFTER_ESCAPE; - break; + NextChar(); + state = IN_TOKEN_AFTER_ESCAPE; + break; - case INNER_ESCAPE: + case INNER_ESCAPE: - EndEmit(current_token, ""); + EndEmit(current_token, ""); + NextChar(); + Process(lang, current_token, current_token->end_inner_escape); + state = IN_TOKEN_AFTER_INNER_ESCAPE; + break; + + + default: + + if( curr_end_delim[0] != '\0' ) + { + /* error: token ends at delimiter, not unexpected character */ + if( Printable(curr_line[line_pos]) ) + fprintf(err_fp, + "%s: skipping %c character (not allowed in %s)\n", + ErrorHeader(), curr_line[line_pos], current_token->name); + else if( curr_line[line_pos] == '\t' ) + fprintf(err_fp, + "%s: skipping tab character (not allowed in %s)\n", + ErrorHeader(), current_token->name); + else if( curr_line[line_pos] == '\n' ) + fprintf(err_fp, + "%s: skipping newline character (not allowed in %s)\n", + ErrorHeader(), current_token->name); + else if( curr_line[line_pos] == '\f' ) + fprintf(err_fp, + "%s: skipping formfeed character (not allowed in %s)\n", + ErrorHeader(), current_token->name); + else + fprintf(err_fp, "%s: %s, octal code %o (not allowed in %s)\n", + ErrorHeader(), "skipping unprintable character", + (unsigned) curr_line[line_pos], current_token->name); + NextChar(); + } + else + { + /* normal termination after last legal character */ + EndEmit(current_token, ""); + state = START; + } + break; + + + } + } + break; + + + case IN_TOKEN_NEEDING_DELIM: /* within a token looking for delim */ + + /* looking for either a white space or a new matching delim */ + switch( curr_line[line_pos] ) + { + case ' ': + case '\t': + case '\n': + case '\f': + + Emit(current_token, curr_line[line_pos]); NextChar(); - Process(lang, current_token, current_token->end_inner_escape); - state = IN_TOKEN_AFTER_INNER_ESCAPE; break; default: - if( current_token->end_delimiter[0] != '\0' ) + /* had better match */ + i = Matching(); + if( pairs[i].first == NULL ) { - /* error: token ends at delimiter, not at unexpected character */ - if( Printable(curr_line[line_pos]) ) - fprintf(err_fp, "%s: skipping unexpected %c character in %s\n", - ErrorHeader(), curr_line[line_pos], current_token->name); - else - fprintf(err_fp, "%s: %s (octal %o) in %s\n", - ErrorHeader(), "skipping unexpected unprintable character", - (int) curr_line[line_pos], current_token->name); - NextChar(); - } - else - { - /* normal termination after last legal character */ - EndEmit(current_token, ""); - state = START; + /* this is not a suitable new start for delimiters */ + fprintf(err_fp, "%s: expected new delimiter here, found %c\n", + ErrorHeader(), curr_line[line_pos]); + exit(0); } + curr_bracket_delim = pairs[i].first; + curr_end_delim = pairs[i].second; + Emit(current_token, curr_line[line_pos]); + NextChar(); + end_delimiter_depth++; + state = IN_TOKEN; break; - } break; @@ -3478,6 +4044,19 @@ void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter) break; + case IN_TOKEN_NEEDING_DELIM: + + /* we stopped in a token at a point where we were looking for a delim */ + if( outer_token == (TOKEN *) NULL ) + fprintf(err_fp, "%s: program text ended within %s\n", + ErrorHeader(), current_token->name); + else + fprintf(err_fp, "%s: %s token ended within %s\n", + ErrorHeader(), outer_token->name, current_token->name); + EndEmit(current_token, ""); + break; + + case IN_TOKEN_AFTER_ESCAPE: /* we stopped after the escape character */ @@ -3539,6 +4118,7 @@ void PrintUsage() fprintf(err_fp, " -t<num> tab interval (e.g. 8 is default)\n"); fprintf(err_fp, " -T<dist> output tab interval (e.g. 0.5i)\n"); fprintf(err_fp, " -S<file> use this as the setup file\n"); + fprintf(err_fp, " -L<num> number lines from <num> (default is 1)\n"); fprintf(err_fp, " -n no file names as page headers\n"); fprintf(err_fp, " -V print version information and exit\n"); fprintf(err_fp, " -u print this usage message and exit\n"); @@ -3587,6 +4167,8 @@ int main(int argc, char *argv[]) tab_in = 8; tab_out = 3; tab_unit = 'f'; + print_lines = FALSE; + numbered_option = NULL; headers_option = TRUE; font_option = size_option = line_option = tabin_option = tabout_option = setup_option = language_option = (char *) NULL; @@ -3654,6 +4236,7 @@ int main(int argc, char *argv[]) ErrorHeader(), outfilename); exit(1); } + /* setbuf(out_fp, (char *) NULL); */ break; @@ -3767,13 +4350,26 @@ int main(int argc, char *argv[]) /* read alternative setup file */ if( raw_seen ) - { fprintf(err_fp, "%s: -s illegal with -r option\n", ErrorHeader()); + { fprintf(err_fp, "%s: -S illegal with -r option\n", ErrorHeader()); exit(1); } GetArg(setup_option, "usage: -S<filename>", FALSE); break; + case 'L': + + /* read line numbering */ + GetArg(numbered_option, "usage: -L<number>", TRUE); + print_lines = TRUE; + print_num = 1; + if( numbered_option!=NULL && sscanf(numbered_option,"%d",&print_num)!=1) + { fprintf(err_fp, "%s usage: -L or -L<number>\n", ErrorHeader()); + exit(1); + } + break; + + case 'n': if( raw_seen ) @@ -3932,6 +4528,8 @@ int main(int argc, char *argv[]) fprintf(out_fp, " tabin { %s }\n", tabin_option ); if( tabout_option != NULL ) fprintf(out_fp, " tabout { %s }\n", tabout_option ); + if( print_lines ) + fprintf(out_fp, " numbered { %d }\n", print_num ); fprintf(out_fp, "%s%s\n", "@Be", "gin"); while( (ch = getc(in_fp)) != EOF ) putc(ch, out_fp); |