Lout 3.21.

git-svn-id: http://svn.savannah.nongnu.org/svn/lout/trunk@11 9365b830-b601-4143-9ba8-b4a8e2c3339c
author: Jeffrey H. Kingston <jeff@it.usyd.edu.au> 2010-09-14 20:36:35 +0000
committer: Jeffrey H. Kingston <jeff@it.usyd.edu.au> 2010-09-14 20:36:35 +0000
commit: b10d39aec443165093f8f28bc6f940530b89cdaf (patch)
tree: 63a1ef3b3f1d2562c498291cda341a2171a1fe1c /prg2lout.c
parent: 2f4268e5e02216be53cd85816362191373512463 (diff)
download: lout-b10d39aec443165093f8f28bc6f940530b89cdaf.tar.gz
1 files changed, 1029 insertions, 431 deletions
diff --git a/prg2lout.c b/prg2lout.c
index d592edb..2c81371 100644
--- a/prg2lout.c
+++ b/prg2lout.c
@@ -45,12 +45,12 @@
 /*  end (cprint and cprintf, or their clones for other languages) does the   */
 /*  formatting.  For example, the C text                                     */
 /*                                                                           */
-/*      inc = inc + 1                                                        */
+/*      inc = inc / 2                                                        */
 /*                                                                           */
 /*  would be classified by prg2lout into identifier, operator, identifier,   */
 /*  operator, number; and consequently prg2lout would emit                   */
 /*                                                                           */
-/*      @PI{"inc"} @PO{"="} @PI{"inc"} @PO{"+"} @PN{"1"}                     */
+/*      @PI{inc} @PO{=} @PI{inc} @PO{"/"} @PN{2}                             */
 /*                                                                           */
 /*  which is readable by Lout, thanks to having quotes around everything     */
 /*  potentially dangerous, and clearly tells Lout, by means of the commands  */
@@ -104,11 +104,13 @@
 /*  the list of tokens of the language, and its keywords.                    */
 /*                                                                           */
 /*  Step 4.  Add your language variable to the list in the initializer of    */
-/*  variable languages, as you can see the others have been done.            */
+/*  variable languages, as you can see the others have been done.  Try to    */
+/*  keep the list alphabetical to deflect any charges of language bias.      */
 /*                                                                           */
-/*  Step 5.  If any lists of initializers now contain more than              */
-/*  MAX_ARRAY_LENGTH-1 elements, increase MAX_ARRAY_LENGTH until they don't. */
-/*  The gcc compiler will warn you if you forget to do this.                 */
+/*  Step 5.  If any lists of initializers now contain more than MAX_STARTS,  */
+/*  MAX_STARTS2, MAX_NAMES, MAX_TOKENS, or MAX_KEYWORDS elements, increase   */
+/*  these constants until they don't.  The gcc compiler will warn you if     */
+/*  you forget to do this.                                                   */
 /*                                                                           */
 /*  Step 6.  Recompile and reinstall prg2lout, test "prg2lout -u" then       */
 /*  "prg2lout -l <mylanguage> <myfile> | lout -s > out.ps".                  */
@@ -126,9 +128,49 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#define	FALSE		0
-#define	TRUE		1
-#define	BOOLEAN		unsigned
+#define	FALSE			0
+#define	TRUE			1
+#define	BOOLEAN			unsigned
+#define MAX_CHAR		256
+#define is_whitespace(ch) ((ch)==' ' || (ch)=='\t' || (ch)=='\n' || (ch)=='\f')
+
+/*****************************************************************************/
+/*                                                                           */
+/*  MAX_STARTS        1 + Maximum length of "starts" array in any token      */
+/*  MAX_STARTS2       1 + Maximum length of "starts2" array in any token     */
+/*  MAX_NAMES         1 + Maximum number of names for any language           */
+/*  MAX_TOKENS        1 + Maximum number of tokens in any language           */
+/*  MAX_KEYWORDS      1 + Maximum number of keywords in any language         */
+/*                                                                           */
+/*****************************************************************************/
+
+#define MAX_STARTS	120
+#define	MAX_STARTS2	 30
+#define MAX_NAMES	 10
+#define MAX_TOKENS	150
+#define MAX_KEYWORDS	350
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Bracketing pairs                                                         */
+/*                                                                           */
+/*  This declaration explains to prg2lout that { matches }, etc.             */
+/*                                                                           */
+/*****************************************************************************/
+
+typedef struct {
+  char *first;
+  char *second;
+} CHAR_PAIR;
+
+CHAR_PAIR pairs[] = {
+  { "(", ")" },
+  { "{", "}" },
+  { "[", "]" },
+  { "<", ">" },
+  { NULL, NULL }
+};
+
 
 /*****************************************************************************/
 /*                                                                           */
@@ -157,6 +199,10 @@ char AllPrintableTabNL[] =
   " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
 ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n\t" ;
 
+char AllPrintableTabNLFF[] =
+  " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
+ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n\t\f" ;
+
 char Letters[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ;
 
 char Letter_Digit[] =
@@ -171,6 +217,26 @@ char Letter_Digit[] =
 #define SepDigits "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"
 
 #define HexDigits "A", "a", "B", "b", "C", "c", "D", "d", "E", "e", "F", "f"
+
+#define SepPunct  "/", "(", "[", "{", "<", "!", "%", "#", "|", ",",	\
+		  ":", ";", "$", "\"","^", "&", "*", "-", "=", "+",	\
+		  "~", "'", "@", "?", ".", "`"
+
+#define BktPunct  "",  "(", "[", "{", "<", "",  "",  "",  "",  "",	\
+		  "",  "",  "",  "",  "",  "",  "",  "",  "",  "",	\
+		  "",  "",  "",  "",  "",  ""
+
+#define EndPunct  "/", ")", "]", "}", ">", "!", "%", "#", "|", ",",	\
+		  ":", ";", "$", "\"","^", "&", "*", "-", "=", "+",	\
+		  "~", "'", "@", "?", ".", "`"
+
+#define	PercentLetters							\
+"%A", "%B", "%C", "%D", "%E", "%F", "%G", "%H", "%I", "%J", "%K",	\
+"%L", "%M", "%N", "%O", "%P", "%Q", "%R", "%S", "%T", "%U", "%V",	\
+"%W", "%X", "%Y", "%Z",							\
+"%a", "%b", "%c", "%d", "%e", "%f", "%g", "%h", "%i", "%j", "%k",	\
+"%l", "%m", "%n", "%o", "%p", "%q", "%r", "%s", "%t", "%u", "%v",	\
+"%w", "%x", "%y", "%z", "%_"
 
 
 /*****************************************************************************/
@@ -203,12 +269,21 @@ char Letter_Digit[] =
 /*      \ in the token is printed with a preceding \ as required in Lout.    */
 /*      The usual arrangement for handling white space is that none of the   */
 /*      tokens contain it; when it is encountered prg2lout generates the     */
-/*      appropriate Lout without being told (a space for a space, a newline  */
-/*      for a newline, @NP for a formfeed, and something clever for tab      */
-/*      which does the required thing).  However, you can define a token     */
-/*      that contains white space if you wish.  If you do this, the          */
-/*      quotation marks will be temporarily closed off, the white space      */
-/*      handled as just described, then the quotes opened again.             */
+/*      appropriate Lout without being told: a space for a space, a newline  */
+/*      for a newline (possibly triggering a line number on the next line),  */
+/*      @NP for a formfeed, and something clever for tab which does the      */
+/*      required thing.  However, you can define a token that contains       */
+/*      white space if you wish, and then the effect will be:                */
+/*                                                                           */
+/*         space and tab    The quotation marks will be temporarily          */
+/*                          closed off, the white space handled as just      */
+/*                          described, then the quotes opened again          */
+/*                                                                           */
+/*         newline and ff   Both the quotation marks and the command         */
+/*                          will be closed off, the white space handled      */
+/*                          as just described, and then a new command        */
+/*                          started.  In effect, the token is broken into    */
+/*                          a sequence of tokens at these characters.        */
 /*                                                                           */
 /*      PRINT_NODELIMS_QUOTED.  This is like PRINT_WHOLE_QUOTED except that  */
 /*      the opening and closing delimiters of the token are omitted from     */
@@ -222,7 +297,18 @@ char Letter_Digit[] =
 /*      so that it is the user's responsibility to ensure that its content   */
 /*      is legal Lout; and when the command is another filter command, so    */
 /*      that the token content will not go directly into Lout anyway, it     */
-/*      will go through the other filter first.                              */
+/*      will go through the other filter first.  Since the result has to     */
+/*      be verbatim, there is no special treatment of white space characters */
+/*      and no insertion of line numbers.  However, if braces are printed    */
+/*      they really ought to match, so prog2lout checks this and will        */
+/*      complain and insert braces into the verbatim part if necessary.      */
+/*                                                                           */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Meaning of TOKEN fields (ctd.)                                           */
 /*                                                                           */
 /*      PRINT_NODELIMS_UNQUOTED.  This is like PRINT_WHOLE_UNQUOTED except   */
 /*      that the opening and closing delimiters of the token are omitted.    */
@@ -232,19 +318,12 @@ char Letter_Digit[] =
 /*                                                                           */
 /*      PRINT_COMMAND_ONLY.  This ignores the token and prints just the      */
 /*      command, presumably because the command says it all for that token.  */
-/*      When using PRINT_COMMAND_ONLY you will probably need to finish your  */
-/*      command with a space: since there are no following braces in this    */
+/*      When using PRINT_COMMAND_ONLY you will probably need to enclose the  */
+/*      command with braces: since there are no following braces in this     */
 /*      print style, your command will run into the next one otherwise.      */
 /*                                                                           */
-/*****************************************************************************/
-
-
-/*****************************************************************************/
-/*                                                                           */
-/*  Meaning of TOKEN fields (ctd.)                                           */
-/*                                                                           */
 /*  command                                                                  */
-/*      The Lout command to print.  This command could be an legal Lout;     */
+/*      The Lout command to print.  This command could be any legal Lout;    */
 /*      programming language setup files offer the following Lout symbols    */
 /*      that make the most common commands:                                  */
 /*                                                                           */
@@ -269,7 +348,8 @@ char Letter_Digit[] =
 /*      identifiers usually have command @PI and alternate_command @PK.      */
 /*                                                                           */
 /*  following_command                                                        */
-/*      Print this Lout command (or commands) after the token.               */
+/*      Print this Lout command (or commands) after the token.  If it is a   */
+/*      "broken" multi-line token, print this command after each fragment    */
 /*                                                                           */
 /*  start_line_only                                                          */
 /*      A Boolean field.  If TRUE, this token is to be recognized only       */
@@ -281,33 +361,38 @@ char Letter_Digit[] =
 /*      then it deems that this token has begun.  The recognized string      */
 /*      is the token's "starting delimiter".                                 */
 /*                                                                           */
-/*  starts2[], ends2[]                                                       */
-/*      These fields each hold an array of strings, and the two arrays       */
-/*      must have equal length.  If they have length zero, these fields      */
-/*      do not apply.  Otherwise, they modify the meaning of starts[] and    */
-/*      end_delimiter below.  Their effect is best seen by looking at some   */
-/*      examples from Perl, their only user at the time of writing:          */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
 /*                                                                           */
-/*         q/hello/  qq/hello/  qq?hello?  qq{hello}                         */
+/*  Meaning of TOKEN fields (ctd.)                                           */
+/*                                                                           */
+/*  starts2[], brackets2[], ends2[]                                          */
+/*      These fields each hold an array of strings, and the three arrays     */
+/*      must have equal length.  If starts2[] has length zero, these fields  */
+/*      do not apply.  Otherwise, they modify the meaning of starts[],       */
+/*      bracket_delimiter, and end_delimiter below.  Their effect is best    */
+/*      seen by looking at some examples from Perl, their main user:         */
+/*                                                                           */
+/*         q/hello/  qq/hello/  qq?hello?  qq{hel{}lo}                       */
 /*                                                                           */
 /*      These strings may begin with q, qq, qx, and several other things;    */
 /*      this is then followed by a single character which determines the     */
 /*      string terminator; e.g. / means "terminate with /", { means          */
-/*      "terminate with }", etc.  This is implemented by putting these       */
-/*      pairs of strings in corresponding places in starts[] and ends[].     */
+/*      "terminate with }", etc.  In some cases the start and end delims     */
+/*      come in matching pairs, and then there may be nested matching        */
+/*      pairs.  This is implemented as follows:                              */
 /*                                                                           */
-/*         starts:  { "q", "qq", "qx" }                                      */
-/*         starts2: { "/", "?",  "{"  }                                      */
-/*         ends2:   { "/", "?",  "}"  }                                      */
-/*                                                                           */
-/*      PerlQTypeToken is a larger example of this.                          */
-/*                                                                           */
-/*****************************************************************************/
-
-
-/*****************************************************************************/
+/*         starts:    { "q", "qq" }                                          */
+/*         starts2:   { "/", "?", "{" }                                      */
+/*         brackets2: { "",  "",  "{" }                                      */
+/*         ends2:     { "/", "?", "}" }                                      */
 /*                                                                           */
-/*  Meaning of TOKEN fields (ctd.)                                           */
+/*      Briefly, every token with non-null starts2 is expanded into a set    */
+/*      of tokens, one for each element i of starts2, whose starting delims  */
+/*      are starts with starts2[i] added, bracketing delim brackets2[i],     */
+/*      and end_delim ends2[i].  PerlQTypeToken is a larger example of this. */
 /*                                                                           */
 /*  legal                                                                    */
 /*      This string defines the set of legal characters inside this token.   */
@@ -342,6 +427,18 @@ char Letter_Digit[] =
 /*      number of tokens, and they may have inner escapes too; prg2lout      */
 /*      imposes no limit on the depth of nesting of inner escapes.           */
 /*                                                                           */
+/*****************************************************************************/
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Meaning of TOKEN fields (ctd.)                                           */
+/*                                                                           */
+/*  bracket_delimiter                                                        */
+/*      If this string is encountered within a token (not escaped), it       */
+/*      brackets with the next end_delimiter, meaning that the next end      */
+/*      delimiter will not end the token.                                    */
+/*                                                                           */
 /*  end_delimiter                                                            */
 /*      This string shows how the token ends; for example, a string would    */
 /*      have end_delimiter ".  If empty, it means that the token ends        */
@@ -361,15 +458,10 @@ char Letter_Digit[] =
 /*                                                                           */
 /*           s/abc/ABC/                                                      */
 /*                                                                           */
-/*      and similar things as single tokens.  Note that this works without   */
-/*      modification on s{abc}{ABC} provided { is legal within strings: as   */
-/*      long as the token terminates at the second }, we are safe.           */
+/*      etc. as single tokens.  If there is a bracket delimiter (see above), */
+/*      this will look for a new matching delimiter pair, as in s{}<>.       */
 /*                                                                           */
 /*****************************************************************************/
-
-
-#define MAX_CHAR		256
-#define MAX_ARRAY_LENGTH	401
 #define	PRINT_WHOLE_QUOTED	1
 #define	PRINT_NODELIMS_QUOTED	2
 #define	PRINT_WHOLE_UNQUOTED	3
@@ -380,18 +472,18 @@ char Letter_Digit[] =
 typedef struct token_rec {
   char *name;
   int print_style;
-  char *command;
-  char *alternate_command;
-  char *following_command;
+  char *command, *alternate_command, *following_command;
   BOOLEAN start_line_only;
-  char *starts[MAX_ARRAY_LENGTH];
-  char *starts2[MAX_ARRAY_LENGTH];
-  char *ends2[MAX_ARRAY_LENGTH];
+  char *starts[MAX_STARTS];
+  char *starts2[MAX_STARTS2];
+  char *brackets2[MAX_STARTS2];
+  char *ends2[MAX_STARTS2];
   char *legal;
   char *escape;
   char *escape_legal;
   char *inner_escape;
   char *end_inner_escape;
+  char *bracket_delimiter;
   char *end_delimiter;
   BOOLEAN end_start_line_only;
   BOOLEAN want_two_ends;
@@ -417,12 +509,14 @@ TOKEN CStringToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "\"" },		/* strings begin with a " character                 */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
   AllPrintable,		/* inside, any printable is OK		            */
   "\\",			/* within strings, \\ is the escape character       */
   AllPrintablePlusNL,	/* after escape char, any printable char or nl OK   */
   "",			/* strings do not permit "inner escapes"            */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "\"",			/* strings end with a " character                   */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -437,12 +531,14 @@ TOKEN CCharacterToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "'" },		/* characters begin with a ' character              */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
   AllPrintable,		/* inside, any printable character is OK            */
   "\\",			/* within characters, \\ is the escape character    */
   AllPrintable,		/* after escape char, any printable char is OK      */
   "",			/* characters do not permit "inner escapes"         */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "'",			/* characters end with a ' character                */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -458,12 +554,13 @@ TOKEN EiffelStringToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "\"" },		/* strings begin with a " character                 */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
   AllPrintable,		/* inside, any printable except " is OK             */
   "%",			/* within strings, % is the escape character        */
-  AllPrintable,		/* after escape char, any printable char is OK      */
-  "",			/* strings do not permit "inner escapes"            */
+  AllPrintable,		/* after escape char, any printable char is OK      */ "",			/* strings do not permit "inner escapes"            */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "\"",			/* strings end with a " character                   */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -478,12 +575,14 @@ TOKEN EiffelCharacterToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "'" },		/* characters begin with a ' character              */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
   AllPrintable,		/* inside, any printable except ' is OK             */
   "%",			/* within characters, % is the escape character     */
   AllPrintable,		/* after escape char, any printable char is OK      */
   "",			/* characters do not permit "inner escapes"         */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "'",			/* characters end with a ' character                */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -505,12 +604,14 @@ TOKEN IdentifierToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { SepLetters, "_" },	/* identifiers begin with any letter or _           */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
   Letter_Digit,		/* inside, letters, underscores, digits are OK      */
   "",			/* no escape character within identifiers           */
   "",			/* so nothing legal after escape char either        */
   "",			/* identifiers do not permit "inner escapes"        */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "",			/* identifiers do not end with a delimiter          */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -526,18 +627,20 @@ TOKEN IdentifierToken = {
 TOKEN NumberToken = {
   "number",		/* used by error messages involving this token      */
   PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
-  "@PN",			/* Lout command for formatting numbers              */
+  "@PN",		/* Lout command for formatting numbers              */
   "",			/* no alternate command                             */
   "",			/* no following command                             */
   FALSE,		/* token allowed anywhere, not just start of line   */
   { SepDigits },	/* numbers must begin with a digit                  */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
   "0123456789.eE",	/* inside, digits, decimal point, exponent          */
   "",			/* no escape character within numbers               */
   "",			/* so nothing legal after escape char either        */
   "",			/* numbers do not permit "inner escapes"            */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "",			/* numbers do not end with a delimiter              */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -559,12 +662,14 @@ TOKEN CCommentToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "/*" },		/* comments begin with this character pair          */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
-  AllPrintableTabNL,	/* inside, any printable char, tab, or nl is OK     */
+  AllPrintableTabNLFF,	/* inside, any printable char, tab, nl, ff is OK    */
   "",			/* no escape character within comments              */
   "",			/* so nothing legal after escape char either        */
   "",			/* C comments do not permit "inner escapes"         */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "*/",			/* comments end with this character pair            */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -579,12 +684,14 @@ TOKEN CPPCommentToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "//" },		/* comments begin with this character pair          */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
-  AllPrintable,		/* inside, any printable char is OK (not NL)        */
+  AllPrintablePlusTab,	/* inside, any printable char is OK (not NL)        */
   "",			/* no escape character within comments              */
   "",			/* so nothing legal after escape char either        */
   "",			/* C comments do not permit "inner escapes"         */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "",			/* no end delimiter (end of line will end it)       */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -600,12 +707,14 @@ TOKEN EiffelCommentToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "--" },		/* comments begin with this character pair          */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
-  AllPrintable,		/* inside, any printable char is OK                 */
+  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
   "",			/* no escape character within comments              */
   "",			/* so nothing legal after escape char either        */
   "`",			/* start of "inner escape" in Eiffel comment        */
   "'",			/* end of "inner escape" in Eiffel comment          */
+  "",			/* no bracketing delimiter			    */
   "",			/* no ending delimiter; end of line will end it     */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -620,12 +729,14 @@ TOKEN BlueCommentToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "==", "--" },	/* comments begin with this character pair          */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
-  AllPrintable,		/* inside, any printable char is OK                 */
+  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
   "",			/* no escape character within comments              */
   "",			/* so nothing legal after escape char either        */
   "`",			/* start of "inner escape" in Blue comment          */
   "'",			/* end of "inner escape" in Blue comment            */
+  "",			/* no bracketing delimiter			    */
   "",			/* no ending delimiter; end of line will end it     */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -649,12 +760,14 @@ TOKEN CCommentEscapeToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "/*@" },		/* escape comments begin with this delimiter        */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
-  AllPrintableTabNL,	/* inside, any printable char, tab, or nl is OK     */
+  AllPrintableTabNLFF,	/* inside, any printable char, tab, nl, ff is OK    */
   "",			/* no escape character within comments              */
   "",			/* so nothing legal after escape char either        */
   "",			/* no "inner escape" in escape comments             */
   "",			/* so no end of "inner escape" either               */
+  "",			/* no bracketing delimiter			    */
   "*/",			/* comments end with this character pair            */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -669,12 +782,14 @@ TOKEN CPPCommentEscapeToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "//@" },		/* escape comments begin with this delimiter        */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
-  AllPrintable,		/* inside, any printable char is OK                 */
+  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
   "",			/* no escape character within comments              */
   "",			/* so nothing legal after escape char either        */
   "",			/* no "inner escape" in escape comments             */
   "",			/* so no end of "inner escape" either               */
+  "",			/* no bracketing delimiter			    */
   "",			/* no end delimiter (end of line will end it)       */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -690,12 +805,14 @@ TOKEN EiffelCommentEscapeToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "--@" },		/* escape comments begin with this delimiter        */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
-  AllPrintable,		/* inside, any printable char is OK                 */
+  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
   "",			/* no escape character within comments              */
   "",			/* so nothing legal after escape char either        */
   "",			/* no "inner escape" in escape comments             */
   "",			/* so no end of "inner escape" either               */
+  "",			/* no bracketing delimiter			    */
   "",			/* no ending delimiter; end of line will end it     */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -710,12 +827,14 @@ TOKEN BlueCommentEscapeToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "==@", "--@" },	/* escape comments begin with these delimiters      */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no brackets2 either			    */
   { NULL },		/* so no end2 either				    */
-  AllPrintable,		/* inside, any printable char is OK                 */
+  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
   "",			/* no escape character within comments              */
   "",			/* so nothing legal after escape char either        */
   "",			/* no "inner escape" in escape comments             */
   "",			/* so no end of "inner escape" either               */
+  "",			/* no bracketing delimiter			    */
   "",			/* no ending delimiter; end of line will end it     */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -738,10 +857,12 @@ TOKEN BlueCommentEscapeToken = {
   FALSE,		/* token not just start of line       */	\
   { str },		/* token begins (and ends!) with this */	\
   { NULL },		/* no start2 needed		      */	\
+  { NULL },		/* so no brackets2 either	      */	\
   { NULL },		/* so no end2 either		      */	\
   "",			/* nothing inside, since no inside    */	\
   "", "",		/* no escape character                */	\
   "", "",		/* no inner escape; no end inner esc  */	\
+  "",			/* no bracketing delimiter	      */	\
   "",			/* no ending delimiter                */	\
   FALSE,		/* end not have to be at line start   */	\
   FALSE,		/* don't end delimiter twice to stop  */	\
@@ -799,10 +920,12 @@ TOKEN ImpliesToken		= FixedToken("=>", "implies @A @PO");
   FALSE,		/* token not just start of line       */	\
   { str },		/* token begins (and ends!) with this */	\
   { NULL },		/* no start2 needed		      */	\
+  { NULL },		/* so no bracket2 either	      */	\
   { NULL },		/* so no end2 either		      */	\
   "",			/* nothing inside, since no inside    */	\
   "", "",		/* no escape character                */	\
   "", "",		/* no inner escape; no end inner esc  */	\
+  "",			/* no bracketing delimiter            */	\
   "",			/* no ending delimiter                */	\
   FALSE,		/* end not have to be at line start   */	\
   FALSE,		/* don't end delimiter twice to stop  */	\
@@ -822,7 +945,8 @@ TOKEN EiffelDotToken		= NoParameterToken(".",  "{@PD}");
 /*                                                                           */
 /*  In the comments below, WCS refers to "Programming Perl", Second          */
 /*  Edition (1996), by Wall, Christiansen, and Schwartz.  However Perl       */
-/*  has changed since then and this code also reflects those changes.        */
+/*  has changed since then and this code also reflects those changes         */
+/*  based on the on-line documentation provided with the 5.6.0 release.      */
 /*                                                                           */
 /*****************************************************************************/
 
@@ -857,19 +981,11 @@ TOKEN EiffelDotToken		= NoParameterToken(".",  "{@PD}");
 /*          tr [a-z]                                                         */
 /*             [A-Z]                                                         */
 /*                                                                           */
-/*  At present the tokens below have three problems implementing this:       */
-/*                                                                           */
-/*  (1)  When / appears without anything in front, it will be recognized     */
-/*       as a division operator, hence the following regular expression      */
-/*       will be treated as ordinary Perl, not as a regular expression;      */
-/*                                                                           */
-/*  (2)  When the delimiters are a matching pair, embedded occurrences       */
-/*       are not recognised as such: the first closing delimiter will be     */
-/*       taken to close the pair;                                            */
-/*                                                                           */
-/*  (3)  The case s{foo}(bar) is not implemented; the second bracketing      */
-/*       pair must be the same as the first; in fact prg2lout will keep      */
-/*       going, looking for a second closing brace, in this example.         */
+/*  Amazingly, the tokens below implement all of this perfectly except that  */
+/*  when / appears without anything in front, it will be recognized as a     */
+/*  regular expression provided that one of a long list of things precedes   */
+/*  it, otherwise it will be a division symbol.  This is not perfect but     */
+/*  seems to come extremely close in practice.                               */
 /*                                                                           */
 /*****************************************************************************/
 
@@ -877,18 +993,19 @@ TOKEN EiffelDotToken		= NoParameterToken(".",  "{@PD}");
 TOKEN PerlSingleQuoteStringToken = {
   "''-string",		/* used by error messages involving this token      */
   PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
-  "@PS",			/* Lout command for formatting strings              */
+  "@PS",		/* Lout command for formatting strings              */
   "",			/* no alternate command                             */
   "",			/* no following command                             */
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "'" },		/* strings begin with a ' character                 */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   AllCharacters,	/* inside, any character at all is OK               */
   "\\",			/* within strings, \\ is the escape character       */
-  "\\'",		/* after escape, only \ and ' are OK (WCS p. 39)    */
-  "",			/* strings do not permit "inner escapes"            */
-  "",			/* and so there is no end innner escape either      */
+  AllCharacters,	/* after escape, any character is OK (trust us)     */
+  "", "",		/* no "inner escapes"; no end innner escape         */
+  "",			/* no bracketing delimiter			    */
   "\'",			/* strings end with a ' character                   */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -897,18 +1014,19 @@ TOKEN PerlSingleQuoteStringToken = {
 TOKEN PerlDoubleQuoteStringToken = {
   "\"\"-string",	/* used by error messages involving this token      */
   PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
-  "@PS",			/* Lout command for formatting strings              */
+  "@PS",		/* Lout command for formatting strings              */
   "",			/* no alternate command                             */
   "",			/* no following command                             */
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "\"" },		/* strings begin with a " character                 */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   AllCharacters,	/* inside, any character at all is OK               */
   "\\",			/* within strings, \\ is the escape character       */
   AllCharacters,	/* after escape char, any character at all is OK    */
-  "",			/* strings do not permit "inner escapes"            */
-  "",			/* and so there is no end innner escape either      */
+  "", "",		/* no "inner escapes"; no end innner escape         */
+  "",			/* no bracketing delimiter			    */
   "\"",			/* strings end with a " character                   */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -923,12 +1041,13 @@ TOKEN PerlBackQuoteStringToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "`" },		/* strings begin with a ` character                 */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   AllCharacters,	/* inside, any character at all is OK               */
   "\\",			/* within strings, \\ is the escape character       */
   AllCharacters,	/* after escape char, any character at all is OK    */
-  "",			/* strings do not permit "inner escapes"            */
-  "",			/* and so there is no end innner escape either      */
+  "", "",		/* no "inner escapes"; no end innner escape         */
+  "",			/* no bracketing delimiter			    */
   "`",			/* strings end with a ` character                   */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -936,40 +1055,44 @@ TOKEN PerlBackQuoteStringToken = {
 
 
 TOKEN PerlQTypeStringToken = {
-  "q-type-string",	/* used by error messages involving this token      */
+  "q-type string",	/* used by error messages involving this token      */
   PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
-  "@PS",			/* Lout command for formatting strings              */
+  "@PS",		/* Lout command for formatting strings              */
   "",			/* no alternate command                             */
   "",			/* no following command                             */
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "q", "qq", "qx", "qw", "qr", "m" },	/* q-type strings begin with these  */
-  { "/", "(", "[", "{", "<", "!", "%", "#", "|", ",", ":", ";" },  /*start2 */
-  { "/", ")", "]", "}", ">", "!", "%", "#", "|", ",", ":", ";" },  /* end2  */
+  { SepPunct },		/* start2 can be any punctuation character	    */
+  { BktPunct },		/* bracketing delimiters to match SepPunct	    */
+  { EndPunct },		/* end2 must match start2			    */
   AllCharacters,	/* inside, any character at all is OK               */
   "\\",			/* within strings, \\ is the escape character       */
   AllCharacters,	/* after escape char, any character at all is OK    */
   "",			/* strings do not permit "inner escapes"            */
   "",			/* and so there is no end innner escape either      */
+  "",			/* will be using bracket2 for bracket delimiter     */
   "",			/* will be using end2 for the end delimiter here    */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
 };
 
 TOKEN PerlSTypeStringToken = {
-  "s-type-string",	/* used by error messages involving this token      */
+  "s-type string",	/* used by error messages involving this token      */
   PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
-  "@PS",			/* Lout command for formatting strings              */
+  "@PS",		/* Lout command for formatting strings              */
   "",			/* no alternate command                             */
   "",			/* no following command                             */
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "s", "y", "tr" },	/* s-type strings begin with these		    */
-  { "/", "(", "[", "{", "<", "!", "%", "#", "|", ",", ":", ";" },  /*start2 */
-  { "/", ")", "]", "}", ">", "!", "%", "#", "|", ",", ":", ";" },  /* end2  */
+  { SepPunct },		/* start2 can be any punctuation character	    */
+  { BktPunct },		/* bracketing delimiters to match SepPunct	    */
+  { EndPunct },		/* end2 must match start2			    */
   AllCharacters,	/* inside, any character at all is OK               */
   "\\",			/* within strings, \\ is the escape character       */
   AllCharacters,	/* after escape char, any character at all is OK    */
   "",			/* strings do not permit "inner escapes"            */
   "",			/* and so there is no end innner escape either      */
+  "",			/* will be using bracket2 for bracket delimiter     */
   "",			/* will be using end2 for the end delimiter here    */
   FALSE,		/* end delimiter does not have to be at line start  */
   TRUE,			/* need to see end delimiter twice to stop	    */
@@ -980,16 +1103,17 @@ TOKEN PerlSTypeStringToken = {
 /*                                                                           */
 /*  Perl "bare" regular expressions                                          */
 /*                                                                           */
-/*  By a bare regular expression, we mean one that is not preceded           */
-/*  by q, qq, m etc.  These are distinguished from division by being         */
-/*  preceded by one of =, =~, !~, split, if, not, unless, for, foreach,      */
-/*  or while, with up to two white space characters intervening.             */
+/*  By a bare regular expression, we mean one that is not preceded by m.     */
+/*  These are distinguished from division by being preceded by one of (, =,  */
+/*  =~, !~, split, if, and, &&, not, ||, xor, not, !, unless, for, foreach,  */
+/*  or while, with up to two white space characters intervening.  Also,      */
+/*  a / at the start of a line is taken to begin a regular expression.       */
 /*                                                                           */
 /*****************************************************************************/
 
 #define PerlREToken(start, com)						\
 {									\
-  "regexp",		/* used by error messages                 */	\
+  "regex",		/* used by error messages                 */	\
   PRINT_NODELIMS_QUOTED,/* no delims since we supply them         */	\
   com,			/* the command                            */	\
   "",			/* no alternate command                   */	\
@@ -997,16 +1121,19 @@ TOKEN PerlSTypeStringToken = {
   FALSE,		/* token allowed not just start of line   */	\
   { start },		/* preceding token in this case           */	\
   { "/", " /", "\t/", "  /", " \t/", "\t /", "\t\t/" }, /* start2 */	\
-  { "/",  "/",   "/",   "/",    "/",    "/",     "/" }, /* end2   */	\
+  { "",  "",   "",    "",    "",     "",     ""      }, /* bracket2 */	\
+  { "/",  "/", "/",   "/",   "/",    "/",    "/"     }, /* end2   */	\
   AllCharacters,	/* any character OK inside                */	\
   "\\",			/* \\ is the escape character             */	\
   AllCharacters,	/* after escape char, any is OK           */	\
   "",			/* no inner escapes                       */	\
   "",			/* no end innner escape either            */	\
+  "",			/* will be using bracket2 here            */	\
   "",			/* will be using end2 here                */	\
   FALSE,		/* no need to end at line start           */	\
   FALSE,		/* don't want end delimiter twice         */	\
 }
+
 
 TOKEN PerlRegExpLPar    = PerlREToken("(",      "@PO{\"(\"} @PS{\"/\"}@PS");
 TOKEN PerlRegExpEq      = PerlREToken("=",      "@PO{\"=\"} @PS{\"/\"}@PS");
@@ -1014,7 +1141,13 @@ TOKEN PerlRegExpMatch   = PerlREToken("=~",     "@PO{\"=~\"} @PS{\"/\"}@PS");
 TOKEN PerlRegExpNoMatch = PerlREToken("!~",     "@PO{\"!~\"} @PS{\"/\"}@PS");
 TOKEN PerlRegExpSplit   = PerlREToken("split",  "@PK{split} @PS{\"/\"}@PS");
 TOKEN PerlRegExpIf      = PerlREToken("if",     "@PK{if} @PS{\"/\"}@PS");
+TOKEN PerlRegExpAnd     = PerlREToken("and",    "@PK{and} @PS{\"/\"}@PS");
+TOKEN PerlRegExpAnd2    = PerlREToken("&&",     "@PO{\"&&\"} @PS{\"/\"}@PS");
+TOKEN PerlRegExpOr      = PerlREToken("or",     "@PK{or} @PS{\"/\"}@PS");
+TOKEN PerlRegExpOr2     = PerlREToken("||",     "@PO{\"||\"} @PS{\"/\"}@PS");
+TOKEN PerlRegExpXor     = PerlREToken("xor",    "@PK{xor} @PS{\"/\"}@PS");
 TOKEN PerlRegExpNot     = PerlREToken("not",    "@PK{not} @PS{\"/\"}@PS");
+TOKEN PerlRegExpNot2    = PerlREToken("!",      "@PO{\"!\"} @PS{\"/\"}@PS");
 TOKEN PerlRegExpUnless  = PerlREToken("unless", "@PK{unless} @PS{\"/\"}@PS");
 TOKEN PerlRegExpFor     = PerlREToken("for",    "@PK{for} @PS{\"/\"}@PS");
 TOKEN PerlRegExpForEach = PerlREToken("foreach","@PK{foreach} @PS{\"/\"}@PS");
@@ -1022,7 +1155,7 @@ TOKEN PerlRegExpWhile   = PerlREToken("while",  "@PK{while} @PS{\"/\"}@PS");
 
 TOKEN PerlRegExpStartLineToken =
 {
-  "regexp",		/* used by error messages                 */
+  "regex",		/* used by error messages                 */
   PRINT_WHOLE_QUOTED,	/* we can print the whole thing this time */
   "@PS",		/* the command                            */
   "",			/* no alternate command                   */
@@ -1030,12 +1163,14 @@ TOKEN PerlRegExpStartLineToken =
   TRUE,			/* token allowed only at start of line    */
   { "/" },		/* starting delimiter (so easy!)          */
   { NULL },		/* no start2				  */
-  { NULL },		/* no end2				  */
+  { NULL },		/* so no bracket2 either		  */
+  { NULL },		/* so no end2 either			  */
   AllCharacters,	/* any character OK inside                */
   "\\",			/* \\ is the escape character             */
   AllCharacters,	/* after escape char, any is OK           */
   "",			/* no inner escapes                       */
   "",			/* no end innner escape either            */
+  "",			/* no bracketing delimiter                */
   "/",			/* ending delimiter                       */
   FALSE,		/* no need to end at line start           */
   FALSE,		/* don't want end delimiter twice         */
@@ -1044,7 +1179,7 @@ TOKEN PerlRegExpStartLineToken =
 
 /*****************************************************************************/
 /*                                                                           */
-/*  Perl's here-documents                                                    */
+/*  Perl's here-documents [OBSOLETE CODE - see following for replacement]    */
 /*                                                                           */
 /*  At present the only terminating strings recognized are EOT, EOF, END,    */
 /*  and the empty string.  These may all be quoted in the usual ways.        */
@@ -1057,14 +1192,16 @@ TOKEN PerlRegExpStartLineToken =
   PRINT_NODELIMS_QUOTED,/* no delims since we supply them         */	\
   startcom,		/* the command                            */	\
   "",			/* no alternate command                   */	\
-  endcom,		/* following command (final /)            */	\
+  endcom,		/* following command                      */	\
   FALSE,		/* token allowed not just start of line   */	\
   { startstr },		/* starting delimiter                     */	\
   { NULL },		/* no start2				  */	\
+  { NULL },		/* so no bracket2 either		  */	\
   { NULL },		/* no end2				  */	\
   AllCharacters,	/* any character OK inside                */	\
   "", "",		/* no escape character                    */	\
   "", "",		/* no inner escapes                       */	\
+  "",			/* no bracketing delimiter		  */	\
   endstr,		/* token ends with this                   */	\
   TRUE,			/* must be found at line start		  */	\
   FALSE,		/* don't want end delimiter twice         */	\
@@ -1075,6 +1212,7 @@ TOKEN PerlRegExpStartLineToken =
 #define sEND	"\n@PS{\"END\"}\n"
 #define sBLA	"\n@PS{\"\"}\n"
 
+/* ***
 TOKEN HereEOTuq = X("<<EOT",    "EOT\n", "@PO{<<}@PS{\"EOT\"}@PS",        sEOT);
 TOKEN HereEOTdq = X("<<\"EOT\"","EOT\n", "@PO{<<}@PS{\"\\\"EOT\\\"\"}@PS",sEOT);
 TOKEN HereEOTfq = X("<<'EOT'",  "EOT\n", "@PO{<<}@PS{\"'EOT'\"}@PS",      sEOT);
@@ -1094,6 +1232,58 @@ TOKEN HereBLAuq = X("<< ",      "\n",    "@PO{<<} @PS",                  sBLA);
 TOKEN HereBLAdq = X("<<\"\"",   "\n",    "@PO{<<}@PS{\"\\\"\\\"\"}@PS",   sBLA);
 TOKEN HereBLAfq = X("<<''",     "\n",    "@PO{<<}@PS{\"''\"}@PS",         sBLA);
 TOKEN HereBLAbq = X("<<``",     "\n",    "@PO{<<}@PS{\"``\"}@PS",         sBLA);
+*** */
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  Perl's here-documents [OBSOLETE CODE - see following for replacement]    */
+/*                                                                           */
+/*  At present the only terminating strings recognized are EOT, EOF, END,    */
+/*  and the empty string.  These may all be quoted in the usual ways.        */
+/*                                                                           */
+/*****************************************************************************/
+
+#define HereToken(startstr, endstr)					\
+{									\
+  "here-document",	/* used by error messages                 */	\
+  PRINT_WHOLE_QUOTED,	/* as usual                               */	\
+  "@PS",		/* here documents are strings             */	\
+  "",			/* no alternate command                   */	\
+  "",			/* no following command                   */	\
+  FALSE,		/* token allowed not just start of line   */	\
+  { startstr },		/* starting delimiter                     */	\
+  { NULL },		/* no start2				  */	\
+  { NULL },		/* no bracket2				  */	\
+  { NULL },		/* no end2				  */	\
+  AllCharacters,	/* any character OK inside                */	\
+  "", "",		/* no escape character                    */	\
+  "", "",		/* no inner escapes                       */	\
+  "",			/* no bracketing delimiter		  */	\
+  endstr,		/* token ends with this                   */	\
+  TRUE,			/* must be found at line start		  */	\
+  FALSE,		/* don't want end delimiter twice         */	\
+}
+
+TOKEN HereEOTuq = HereToken("<<EOT",    "EOT\n");
+TOKEN HereEOTdq = HereToken("<<\"EOT\"","EOT\n");
+TOKEN HereEOTfq = HereToken("<<'EOT'",  "EOT\n");
+TOKEN HereEOTbq = HereToken("<<`EOT`",  "EOT\n");
+
+TOKEN HereEOFuq = HereToken("<<EOF",    "EOF\n");
+TOKEN HereEOFdq = HereToken("<<\"EOF\"","EOF\n");
+TOKEN HereEOFfq = HereToken("<<'EOF'",  "EOF\n");
+TOKEN HereEOFbq = HereToken("<<`EOF`",  "EOF\n");
+
+TOKEN HereENDuq = HereToken("<<END",    "END\n");
+TOKEN HereENDdq = HereToken("<<\"END\"","END\n");
+TOKEN HereENDfq = HereToken("<<'END'",  "END\n");
+TOKEN HereENDbq = HereToken("<<`END`",  "END\n");
+
+TOKEN HereBLAuq = HereToken("<< ",      "\n");
+TOKEN HereBLAdq = HereToken("<<\"\"",   "\n");
+TOKEN HereBLAfq = HereToken("<<''",     "\n");
+TOKEN HereBLAbq = HereToken("<<``",     "\n");
 
 
 /*****************************************************************************/
@@ -1109,52 +1299,66 @@ TOKEN HereBLAbq = X("<<``",     "\n",    "@PO{<<}@PS{\"``\"}@PS",         sBLA);
 /*                                                                           */
 /*  In addition we have to consider that variable names may be preceded      */
 /*  by $, @, %, &, or *.  Whether these are part of the variable or not is   */
-/*  rather doubtful.  We will treat $ and @ as part of the variable and the  */
-/*  others not (since they occur elsewhere in the token list anyway); plus   */
-/*  we have a separate token type for identifiers beginning with $ and       */
+/*  rather doubtful.  We will treat $, @ and % as part of the variable and   */
+/*  the others not (since they occur elsewhere in the token list anyway);    */
+/*  plus we have a separate token type for identifiers beginning with $ and  */
 /*  followed by one character, which we will list explicitly.                */
+/*  We also deal with the $^. variables, e.g. $^W.                           */
 /*                                                                           */
 /*****************************************************************************/
 
 TOKEN PerlIdentifierToken = {
   "identifier",		/* used by error messages involving this token      */
   PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
-  "@PI",			/* Lout command for formatting identifiers          */
+  "@PI",		/* Lout command for formatting identifiers          */
   "@PK",		/* Alternate command (for keywords)                 */
   "",			/* no following command                             */
   FALSE,		/* token allowed anywhere, not just start of line   */
-  {SepLetters,"_","$","@"},	/* identifiers begin with these		    */
+  { SepLetters, "_", "$", "@", PercentLetters},	/* identifier starts        */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   Letter_Digit,		/* inside, letters, underscores, digits are OK      */
   "",			/* no escape character within identifiers           */
   "",			/* so nothing legal after escape char either        */
   "",			/* identifiers do not permit "inner escapes"        */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "",			/* identifiers do not end with a delimiter          */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
 };
+
 
 TOKEN PerlSpecialIdentifierToken = {
-  "special identifier",	/* used by error messages involving this token      */
+  "special variable",	/* used by error messages involving this token      */
   PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
-  "@PI",			/* Lout command for formatting identifiers          */
-  "@PK",		/* Alternate command (for keywords)                 */
+  "@PI",		/* Lout command for formatting identifiers          */
+  "",			/* Alternate command (for keywords)                 */
   "",			/* no following command                             */
   FALSE,		/* token allowed anywhere, not just start of line   */
-  { "$_", "$.", "$/", "$,", "$\\", "$\"", "$;", "$^L", "$:",
-    "$^A", "$#", "$?", "$!", "$@", "$$", "$<", "$>", "$(",
-    "$)", "$0", "$[", "$]", "$^D", "$^E", "$^F", "$^H", "$^I",
-    "$^M", "$^O", "$^P", "$^T", "$^W", "$^X"
-  },			/* all possible values of special variables         */
+  {
+    /* Taken from 5.6.0's perlvar.pod */
+    /* NB special variables that begin $^, e.g. $^D can also be written as */
+    /* dollar control D (yes literal control D) -- but we ignore this      */
+    /* wrinkle. We only list the first 9 regex match variables.            */
+
+    /* Only the ones not recognized elsewhere are being kept now */
+    "$&", "$`", "$'", "$+", "@+", "$*", "$.", "$/", "$|", "$,", "$\\", "$\"",
+    "$;", "$#", "$%", "$=", "$-", "@-", "$~", "$^", "$:", "$^L", "$^A", "$?",
+    "$!", "$^E", "$@", "$$", "$<", "$>", "$(", "$)", "$0", "$[", "$]", "$^C",
+    "$^D", "$^F", "$^H", "%^H", "$^I", "$^M", "$^O", "$^P", "$^R", "$^S",
+    "$^T", "$^V", "$^W", "${^WARNING_BITS}", "${^WIDE_SYSTEM_CALLS}", "$^X",
+  },
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   "",			/* nothing allowed inside, since ends after start   */
   "",			/* no escape character within identifiers           */
   "",			/* so nothing legal after escape char either        */
   "",			/* identifiers do not permit "inner escapes"        */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "",			/* identifiers do not end with a delimiter          */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1175,24 +1379,28 @@ TOKEN PerlSpecialIdentifierToken = {
 /*      4_294_967_296   # underline for legibility                           */
 /*                                                                           */
 /*  Implementation is straightforward; hexadecimal is a separate token.      */
+/*  Binary numbers introduced with 5.6.0 of the form 0b1010 are also         */
+/*  catered for.                                                             */
 /*                                                                           */
 /*****************************************************************************/
 
 TOKEN PerlLiteralNumberToken = {
   "number",		/* used by error messages involving this token      */
   PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
-  "@PN",			/* Lout command for formatting numbers              */
+  "@PN",		/* Lout command for formatting numbers              */
   "",			/* no alternate command                             */
   "",			/* no following command                             */
   FALSE,		/* token allowed anywhere, not just start of line   */
   { SepDigits },	/* numbers must begin with a digit                  */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   "0123456789.eE_",	/* inside, digits, point, exponent, underscore      */
   "",			/* no escape character within numbers               */
   "",			/* so nothing legal after escape char either        */
   "",			/* numbers do not permit "inner escapes"            */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "",			/* numbers do not end with a delimiter              */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1201,18 +1409,43 @@ TOKEN PerlLiteralNumberToken = {
 TOKEN PerlHexNumberToken = {
   "number",		/* used by error messages involving this token      */
   PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
-  "@PN",			/* Lout command for formatting numbers              */
+  "@PN",		/* Lout command for formatting numbers              */
   "",			/* no alternate command                             */
   "",			/* no following command                             */
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "0x" },		/* hex numbers must begin with 0x		    */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   "0123456789AaBbCcDdEeFf",	/* inside, hexadecimal digits		    */
   "",			/* no escape character within numbers               */
   "",			/* so nothing legal after escape char either        */
   "",			/* numbers do not permit "inner escapes"            */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
+  "",			/* numbers do not end with a delimiter              */
+  FALSE,		/* end delimiter does not have to be at line start  */
+  FALSE,		/* don't need to see end delimiter twice to stop    */
+};
+
+
+TOKEN PerlBinaryNumberToken = {
+  "number",		/* used by error messages involving this token      */
+  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
+  "@PN",		/* Lout command for formatting numbers              */
+  "",			/* no alternate command                             */
+  "",			/* no following command                             */
+  FALSE,		/* token allowed anywhere, not just start of line   */
+  { "0b" },		/* binary numbers must begin with 0b		    */
+  { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
+  { NULL },		/* so no end2 either				    */
+  "01",	        	/* inside, binary digits			    */
+  "",			/* no escape character within numbers               */
+  "",			/* so nothing legal after escape char either        */
+  "",			/* numbers do not permit "inner escapes"            */
+  "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "",			/* numbers do not end with a delimiter              */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1238,12 +1471,14 @@ TOKEN PerlCommentToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "#" },		/* comments begin with this character		    */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
-  AllPrintable,		/* inside, any printable char is OK (not NL)        */
+  AllPrintablePlusTab,	/* inside, any printable char is OK (not NL)        */
   "",			/* no escape character within comments              */
   "",			/* so nothing legal after escape char either        */
   "",			/* C comments do not permit "inner escapes"         */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "",			/* no end delimiter (end of line will end it)       */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1258,12 +1493,14 @@ TOKEN PerlCommentEscapeToken = {
   FALSE,		/* token allowed anywhere, not just start of line   */
   { "#@" },		/* comments begin with this character pair	    */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
-  AllPrintable,		/* inside, any printable char is OK                 */
+  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
   "",			/* no escape character within comments              */
   "",			/* so nothing legal after escape char either        */
   "",			/* no "inner escape" in escape comments             */
   "",			/* so no end of "inner escape" either               */
+  "",			/* no bracketing delimiter			    */
   "",			/* no end delimiter (end of line will end it)       */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1280,7 +1517,7 @@ TOKEN PerlCommentEscapeToken = {
 /*                                                                           */
 /*  "A line beginning with = is assumed to introduce some documentation,     */
 /*  which continues until another line is reached beginning with =cut"       */
-/*  (WCS page36).  Strictly speaking this is only valid at points where      */
+/*  (WCS page 36).  Strictly speaking this is only valid at points where     */
 /*  a statement would be legal, but that is beyond prg2lout to implement.    */
 /*                                                                           */
 /*****************************************************************************/
@@ -1294,17 +1531,19 @@ TOKEN PerlPodToken = {
   TRUE,			/* token allowed at start of line only              */
   { "=", "=pod" },	/* pod insert begins with either of these	    */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   AllCharacters,	/* inside, any character at all is OK               */
   "",			/* no escape character within pod comments          */
   "",			/* so nothing legal after escape char either        */
   "",			/* pod comments do not permit "inner escapes"       */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter			    */
   "=cut",		/* pod comments end with this string                */
   TRUE,			/* end delimiter must be at line start		    */
   FALSE,		/* don't need to see end delimiter twice to stop    */
 };
-
+
 
 /*****************************************************************************/
 /*                                                                           */
@@ -1328,33 +1567,62 @@ TOKEN PerlAndToken              = FixedToken( "&&",  "@PO" ) ;
 TOKEN PerlOrToken               = FixedToken( "||",  "@PO" ) ;
 TOKEN PerlRange2Token           = FixedToken( "..",  "@PO" ) ;
 TOKEN PerlRange3Token           = FixedToken( "...", "@PO" ) ;
-TOKEN PerlFileTestrToken        = FixedToken( "-r",  "@PO" ) ;
-TOKEN PerlFileTestwToken        = FixedToken( "-w",  "@PO" ) ;
-TOKEN PerlFileTestxToken        = FixedToken( "-x",  "@PO" ) ;
-TOKEN PerlFileTestoToken        = FixedToken( "-o",  "@PO" ) ;
-TOKEN PerlFileTestRToken        = FixedToken( "-R",  "@PO" ) ;
-TOKEN PerlFileTestWToken        = FixedToken( "-W",  "@PO" ) ;
-TOKEN PerlFileTestXToken        = FixedToken( "-X",  "@PO" ) ;
-TOKEN PerlFileTestOToken        = FixedToken( "-O",  "@PO" ) ;
-TOKEN PerlFileTesteToken        = FixedToken( "-e",  "@PO" ) ;
-TOKEN PerlFileTestzToken        = FixedToken( "-z",  "@PO" ) ;
-TOKEN PerlFileTestsToken        = FixedToken( "-s",  "@PO" ) ;
-TOKEN PerlFileTestfToken        = FixedToken( "-f",  "@PO" ) ;
-TOKEN PerlFileTestdToken        = FixedToken( "-d",  "@PO" ) ;
-TOKEN PerlFileTestlToken        = FixedToken( "-l",  "@PO" ) ;
-TOKEN PerlFileTestpToken        = FixedToken( "-p",  "@PO" ) ;
-TOKEN PerlFileTestSToken        = FixedToken( "-S",  "@PO" ) ;
-TOKEN PerlFileTestbToken        = FixedToken( "-b",  "@PO" ) ;
-TOKEN PerlFileTestcToken        = FixedToken( "-c",  "@PO" ) ;
-TOKEN PerlFileTesttToken        = FixedToken( "-t",  "@PO" ) ;
-TOKEN PerlFileTestuToken        = FixedToken( "-u",  "@PO" ) ;
-TOKEN PerlFileTestgToken        = FixedToken( "-g",  "@PO" ) ;
-TOKEN PerlFileTestkToken        = FixedToken( "-k",  "@PO" ) ;
-TOKEN PerlFileTestTToken        = FixedToken( "-T",  "@PO" ) ;
-TOKEN PerlFileTestBToken        = FixedToken( "-B",  "@PO" ) ;
-TOKEN PerlFileTestMToken        = FixedToken( "-M",  "@PO" ) ;
-TOKEN PerlFileTestAToken        = FixedToken( "-A",  "@PO" ) ;
-TOKEN PerlFileTestCToken        = FixedToken( "-C",  "@PO" ) ;
+
+
+/*****************************************************************************/
+/*                                                                           */
+/*  FlagToken - for -r and the rest (followed by white space)                */
+/*                                                                           */
+/*****************************************************************************/
+
+#define FlagToken(str, command) /* define fixed-string token */	\
+{									\
+  str,			/* name used for debugging only       */	\
+  PRINT_WHOLE_QUOTED,	/* print this token as usual          */	\
+  command,		/* Lout command for formatting this   */	\
+  "",			/* no alternate command               */	\
+  "",			/* no following command               */	\
+  FALSE,		/* token not just start of line       */	\
+  { str },		/* token begins (and ends!) with this */	\
+  { " ", "\t" },	/* plus a white space char	      */	\
+  { "",  ""   },	/* no bracket2 though  		      */	\
+  { "",  ""   },	/* no end2 though   		      */	\
+  "",			/* nothing inside, since no inside    */	\
+  "", "",		/* no escape character                */	\
+  "", "",		/* no inner escape; no end inner esc  */	\
+  "",			/* no bracketing delimiter            */	\
+  "",			/* no ending delimiter                */	\
+  FALSE,		/* end not have to be at line start   */	\
+  FALSE,		/* don't end delimiter twice to stop  */	\
+}
+
+TOKEN PerlFileTestrToken        = FlagToken( "-r",  "@PO" ) ;
+TOKEN PerlFileTestwToken        = FlagToken( "-w",  "@PO" ) ;
+TOKEN PerlFileTestxToken        = FlagToken( "-x",  "@PO" ) ;
+TOKEN PerlFileTestoToken        = FlagToken( "-o",  "@PO" ) ;
+TOKEN PerlFileTestRToken        = FlagToken( "-R",  "@PO" ) ;
+TOKEN PerlFileTestWToken        = FlagToken( "-W",  "@PO" ) ;
+TOKEN PerlFileTestXToken        = FlagToken( "-X",  "@PO" ) ;
+TOKEN PerlFileTestOToken        = FlagToken( "-O",  "@PO" ) ;
+TOKEN PerlFileTesteToken        = FlagToken( "-e",  "@PO" ) ;
+TOKEN PerlFileTestzToken        = FlagToken( "-z",  "@PO" ) ;
+TOKEN PerlFileTestsToken        = FlagToken( "-s",  "@PO" ) ;
+TOKEN PerlFileTestfToken        = FlagToken( "-f",  "@PO" ) ;
+TOKEN PerlFileTestdToken        = FlagToken( "-d",  "@PO" ) ;
+TOKEN PerlFileTestlToken        = FlagToken( "-l",  "@PO" ) ;
+TOKEN PerlFileTestpToken        = FlagToken( "-p",  "@PO" ) ;
+TOKEN PerlFileTestSToken        = FlagToken( "-S",  "@PO" ) ;
+TOKEN PerlFileTestbToken        = FlagToken( "-b",  "@PO" ) ;
+TOKEN PerlFileTestcToken        = FlagToken( "-c",  "@PO" ) ;
+TOKEN PerlFileTesttToken        = FlagToken( "-t",  "@PO" ) ;
+TOKEN PerlFileTestuToken        = FlagToken( "-u",  "@PO" ) ;
+TOKEN PerlFileTestgToken        = FlagToken( "-g",  "@PO" ) ;
+TOKEN PerlFileTestkToken        = FlagToken( "-k",  "@PO" ) ;
+TOKEN PerlFileTestTToken        = FlagToken( "-T",  "@PO" ) ;
+TOKEN PerlFileTestBToken        = FlagToken( "-B",  "@PO" ) ;
+TOKEN PerlFileTestMToken        = FlagToken( "-M",  "@PO" ) ;
+TOKEN PerlFileTestAToken        = FlagToken( "-A",  "@PO" ) ;
+TOKEN PerlFileTestCToken        = FlagToken( "-C",  "@PO" ) ;
 
 
 /*****************************************************************************/
@@ -1379,8 +1647,7 @@ TOKEN PerlFileTestCToken        = FixedToken( "-C",  "@PO" ) ;
 /*                                                                           */
 /*  By a "paragraph" is meant a sequence of lines down to the next empty     */
 /*  line; but we will handle verbatim paragraphs one line at a time.         */
-/*  It also seems that an empty line in the input has to become an empty     */
-/*  line in the output; so we include a token for that.                      */
+/*  Also, an empty line in the input has to become an empty line in output.  */
 /*                                                                           */
 /*****************************************************************************/
 
@@ -1388,15 +1655,16 @@ TOKEN PodVerbatimLineToken = {
   "verbatim-para",	/* used by error messages involving this token      */
   PRINT_WHOLE_QUOTED,	/* printing the whole paragraph quoted		    */
   "@PV ",		/* Lout command for formatting verbatim line        */
-  "",			/* no alternate command                             */
-  "",			/* no following command                             */
+  "", "",		/* no alternate command; no following command       */
   TRUE,			/* token allowed at start of line only              */
   { "\t", " " },	/* command begins with this                   	    */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   AllPrintablePlusTab,	/* inside, any printable char except newline is OK  */
   "", "",		/* no escape character within verbatim lines        */
   "", "",		/* no "inner escapes" within verbatim lines         */
+  "",			/* no bracketing delimiter                          */
   "",			/* ends at end of line                              */
   FALSE,		/* don't need to be at start of line to end it      */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1406,15 +1674,16 @@ TOKEN PodEmptyLineToken = {
   "pod-empty-line",	/* used by error messages involving this token      */
   PRINT_COMMAND_ONLY,	/* printing just the command                        */
   "@PPG\n",		/* Lout command for formatting Pod empty line       */
-  "",			/* no alternate command                             */
-  "",			/* no following command                             */
+  "", "",		/* no alternate command; no following command       */
   TRUE,			/* token allowed at start of line only              */
   { "\n" },		/* command begins with this                   	    */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   "",			/* nothing inside                                   */
   "", "",		/* no escape character                              */
   "", "",		/* no inner escape                                  */
+  "",			/* no bracketing delimiter                          */
   "",			/* token will end with the end of the line          */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1452,12 +1721,14 @@ TOKEN PodIgnoreToken = {
   TRUE,			/* token allowed at start of line only              */
   { "=pod", "=cut" },	/* command begins with this                   	    */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   AllCharacters,	/* anything at all can be inside                    */
   "",			/* no escape character                              */
   "",			/* so nothing legal after escape char either        */
   "",			/* cut tokens do not permit "inner escapes"         */
   "",			/* and so there is no end innner escape either      */
+  "",			/* no bracketing delimiter                          */
   "\n",			/* token will end with the end of the line          */
   TRUE,			/* end delimiter (\n) has to be at a line start     */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1468,23 +1739,22 @@ TOKEN PodIgnoreToken = {
 /*                                                                           */
 /*  Pod command paragraphs: =head1, =head2 (and =head3, folklore extension)  */
 /*                                                                           */
-/*  "... produce first and second level heading"                             */
-/*                                                                           */
 /*****************************************************************************/
 
 TOKEN PodHeading1Token = {
   "=head1",		/* used by error messages involving this token      */
-  PRINT_NODELIMS_QUOTED,  /* print without delimiters                       */
+  PRINT_NODELIMS_INNER,	/* print without delimiters, formatting inner       */
   "@PHA",		/* Lout command for formatting Pod heading          */
-  "",			/* no alternate command                             */
-  "",			/* following command                                */
+  "", "",		/* no alternate command; no following command       */
   TRUE,			/* token allowed at start of line only              */
   {"=head1","head1"},	/* command begins with this                   	    */
   { " ", "\t" },	/* helps to skip following white space		    */
+  { "",  ""   },	/* no bracket2                        		    */
   { "\n", "\n" },	/* token ends at end of line			    */
   AllCharacters,	/* anything in the heading			    */
   "", "",		/* no escape character; nothing legal after escape  */
   "", "",		/* no inner escapes; no end inner escape            */
+  "",			/* no bracketing delimiter                          */
   "\n\n",		/* token will end with the first blank line         */
   FALSE,		/* end delimiter (\n) has to be at a line start     */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1492,17 +1762,18 @@ TOKEN PodHeading1Token = {
 
 TOKEN PodHeading2Token = {
   "=head2",		/* used by error messages involving this token      */
-  PRINT_NODELIMS_QUOTED,  /* print without delimiters                       */
+  PRINT_NODELIMS_INNER,	/* print without delimiters, formatting inner       */
   "@PHB",		/* Lout command for formatting Pod heading          */
-  "",			/* no alternate command                             */
-  "",			/* following command                                */
+  "", "",		/* no alternate command; no following command       */
   TRUE,			/* token allowed at start of line only              */
   { "=head2" },		/* command begins with this                   	    */
   { " ", "\t" },	/* helps to skip following white space		    */
+  { "",  ""   },	/* no bracket2                        		    */
   { "\n", "\n" },	/* token ends at end of line			    */
   AllCharacters,	/* anything in the heading			    */
   "", "",		/* no escape character; nothing legal after escape  */
   "", "",		/* no inner escapes; no end inner escape            */
+  "",			/* no bracketing delimiter                          */
   "\n\n",		/* token will end with the first blank line         */
   FALSE,		/* end delimiter (\n) has to be at a line start     */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1510,17 +1781,18 @@ TOKEN PodHeading2Token = {
 
 TOKEN PodHeading3Token = {
   "=head3",		/* used by error messages involving this token      */
-  PRINT_NODELIMS_QUOTED,  /* print without delimiters                       */
+  PRINT_NODELIMS_INNER,	/* print without delimiters, formatting inner       */
   "@PHC",		/* Lout command for formatting Pod heading          */
-  "",			/* no alternate command                             */
-  "",			/* following command                                */
+  "", "",		/* no alternate command; no following command       */
   TRUE,			/* token allowed at start of line only              */
   { "=head3" },		/* command begins with this                   	    */
   { " ", "\t" },	/* helps to skip following white space		    */
+  { "",  ""   },	/* no bracket2                        		    */
   { "\n", "\n" },	/* token ends at end of line			    */
   AllCharacters,	/* anything in the heading			    */
   "", "",		/* no escape character; nothing legal after escape  */
   "", "",		/* no inner escapes; no end inner escape            */
+  "",			/* no bracketing delimiter                          */
   "\n\n",		/* token will end with the first blank line         */
   FALSE,		/* end delimiter (\n) has to be at a line start     */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1542,10 +1814,12 @@ TOKEN PodOverToken = {
   TRUE,			/* token allowed at start of line only              */
   { "=over" },		/* command begins with this                   	    */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   AllCharacters,	/* inside, any printable char is OK		    */
   "", "",		/* no escape character; nothing legal after escape  */
   "", "",		/* no inner escapes; no end inner escape            */
+  "",			/* no bracketing delimiter                          */
   "\n",			/* token will end with the end of the line          */
   TRUE,			/* end delimiter (\n) has to be at a line start     */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1553,18 +1827,19 @@ TOKEN PodOverToken = {
 
 TOKEN PodItemToken = {
   "=item",		/* used by error messages involving this token      */
-  PRINT_NODELIMS_QUOTED,/* printing just what follows =item on the line     */
-  "@Null //}\n@TI {@PLL", /* Lout command for formatting Pod item           */
+  PRINT_NODELIMS_INNER,	/* printing just what follows =item on the line     */
+  "@Null //}\n@DTI {@PLL", /* Lout command for formatting Pod item           */
   "",			/* no alternate command                             */
   "} {",		/* open brace to enclose the item content           */
   TRUE,			/* token allowed at start of line only              */
   { "=item" },		/* command begins with this                   	    */
   { " ", "\t" },	/* helps to skip following white space		    */
+  { "",  ""   },	/* no bracket2                        		    */
   { "\n\n", "\n\n"  },	/* token will end at blank line			    */
   AllPrintableTabNL,	/* any printable inside				    */
   "", "",		/* no escape character; nothing legal after escape  */
   "", "",		/* no inner escapes; no end inner escape            */
-  "",			/* see ends2[]                                      */
+  "", "",		/* see brackets2[]; see ends2[]                     */
   FALSE,		/* end delimiter (\n) must already be at start      */
   FALSE,		/* don't need to see end delimiter twice to stop    */
 };
@@ -1573,15 +1848,16 @@ TOKEN PodBackToken = {
   "=back",		/* used by error messages involving this token      */
   PRINT_COMMAND_ONLY,	/* printing just the command                        */
   "@Null // }\n@EndList\n", /* Lout command for formatting Pod back         */
-  "",			/* no alternate command                             */
-  "",			/* no following command                             */
+  "", "",		/* no alternate command; no following command       */
   TRUE,			/* token allowed at start of line only              */
   { "=back" },		/* command begins with this                   	    */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   AllCharacters,	/* anything inside (in principle)   		    */
   "", "",		/* no escape character; nothing legal after escape  */
   "", "",		/* no inner escapes; no end inner escape            */
+  "",			/* no bracketing delimiter                          */
   "\n",			/* token will end with the next blank line          */
   TRUE,			/* end delimiter (\n) has to be at a line start     */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1590,12 +1866,51 @@ TOKEN PodBackToken = {
 
 /*****************************************************************************/
 /*                                                                           */
+/*  Pod narrow items; for these, we are confident in using @TI not @DTI      */
+/*                                                                           */
+/*****************************************************************************/
+
+#define PodNarrowItemToken(tag, command)				\
+{									\
+  "=item",		/* used by error messages		 */	\
+  PRINT_NODELIMS_INNER,	/* printing just what follows =item      */	\
+  command,		/* Lout command for formatting Pod item  */	\
+  "",			/* no alternate command                  */	\
+  "}} {",		/* open brace to enclose the item content*/	\
+  TRUE,			/* token allowed at start of line only   */	\
+  { "=item", "=item ", "=item\t",	/* starts                */	\
+    "=item  ", "=item \t", "=item\t ", "=item\t\t" },	    /*   */	\
+  { tag },		/* the tag we recognize               	 */	\
+  { "" },		/* no bracket2                        	 */	\
+  { "\n\n", "\n\n"  },	/* token will end at blank line		 */	\
+  AllPrintableTabNL,	/* any printable inside			 */	\
+  "", "",		/* no escape character                   */	\
+  "", "",		/* no inner escapes; no end inner escape */	\
+  "", "",		/* see brackets2[]; see ends2[]          */	\
+  FALSE,		/* end delimiter (\n) already at start   */	\
+  FALSE,		/* don't need to see end delimiter twice */	\
+}
+
+TOKEN PodItemBullet = PodNarrowItemToken("*", "@Null //}\n@TI {@PLL {*");
+TOKEN PodItem0      = PodNarrowItemToken("0", "@Null //}\n@TI {@PLL {0");
+TOKEN PodItem1      = PodNarrowItemToken("1", "@Null //}\n@TI {@PLL {1");
+TOKEN PodItem2      = PodNarrowItemToken("2", "@Null //}\n@TI {@PLL {2");
+TOKEN PodItem3      = PodNarrowItemToken("3", "@Null //}\n@TI {@PLL {3");
+TOKEN PodItem4      = PodNarrowItemToken("4", "@Null //}\n@TI {@PLL {4");
+TOKEN PodItem5      = PodNarrowItemToken("5", "@Null //}\n@TI {@PLL {5");
+TOKEN PodItem6      = PodNarrowItemToken("6", "@Null //}\n@TI {@PLL {6");
+TOKEN PodItem7      = PodNarrowItemToken("7", "@Null //}\n@TI {@PLL {7");
+TOKEN PodItem8      = PodNarrowItemToken("8", "@Null //}\n@TI {@PLL {8");
+TOKEN PodItem9      = PodNarrowItemToken("9", "@Null //}\n@TI {@PLL {9");
+
+
+/*****************************************************************************/
+/*                                                                           */
 /*  Pod command paragraphs: =for, =begin, =end                               */
 /*                                                                           */
 /*  "passed directly to particular formatters.  A formatter that can utilize */
-/*   that format will use the section, otherwise it will be ignored."        */
-/*                                                                           */
-/*  So I've put in a "=begin lout" token, also recognized as "=begin Lout".  */
+/*  that format will use the section, otherwise it will be ignored."  So     */
+/*  I've put in a "=begin lout" token, also recognized as "=begin Lout".     */
 /*                                                                           */
 /*****************************************************************************/
 
@@ -1606,11 +1921,12 @@ TOKEN PodForToken = {
   "", "",		/* no alternate command; no following command       */
   TRUE,			/* token allowed at start of line only              */
   { "=for" },		/* command begins with this                   	    */
-  { NULL },		/* no start2 needed				    */
+  { NULL }, { NULL },	/* no start2 needed; so no bracket2 either	    */
   { NULL },		/* so no end2 either				    */
   AllCharacters,	/* anything inside 				    */
   "", "",		/* no escape character; nothing legal after escape  */
   "", "",		/* no inner escapes; no end inner escape            */
+  "",			/* no bracketing delimiter                          */
   "\n",			/* token will end with the end of the line          */
   TRUE,			/* end delimiter (\n) has to be at a line start     */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1623,11 +1939,12 @@ TOKEN PodBeginToken = {
   "", "",		/* no alternate command; no following command       */
   TRUE,			/* token allowed at start of line only              */
   { "=begin" },		/* command begins with this                   	    */
-  { NULL },		/* no start2 needed				    */
+  { NULL }, { NULL },	/* no start2 needed; so no bracket2 either	    */
   { NULL },		/* so no end2 either				    */
   AllCharacters,	/* anything inside 				    */
   "", "",		/* no escape character; nothing legal after escape  */
   "", "",		/* no inner escapes; no end inner escape            */
+  "",			/* no bracketing delimiter                          */
   "=end",		/* token will end with =end character               */
   TRUE,			/* end delimiter has to be at a line start          */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1640,11 +1957,12 @@ TOKEN PodBeginLoutToken = {
   "", "",		/* no alternate command; no following command       */
   TRUE,			/* token allowed at start of line only              */
   { "=begin lout", "=begin Lout" },	/* command begins with this   	    */
-  { NULL },		/* no start2 needed				    */
+  { NULL }, { NULL },	/* no start2 needed; so no bracket2 either	    */
   { NULL },		/* so no end2 either				    */
   AllCharacters,	/* anything inside 				    */
   "", "",		/* no escape character; nothing legal after escape  */
   "", "",		/* no inner escapes; no end inner escape            */
+  "",			/* no bracketing delimiter                          */
   "=end",		/* token will end with =end character               */
   TRUE,			/* end delimiter has to be at a line start          */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1687,10 +2005,12 @@ TOKEN PodBeginLoutToken = {
   FALSE,		/* token not just start of line       */	\
   { str },		/* token begins with this	      */	\
   { "<", "<< ", "<<< ", "<<<< " }, /* start2		      */	\
+  { "",  "",    "",     ""      }, /* no bracket2             */	\
   { ">", " >>", " >>>", " >>>>" }, /* end2		      */	\
   AllCharacters,	/* anything inside (in fact, not used)*/	\
   "", "",		/* no escape character                */	\
   "", "",		/* no inner escape; no end inner esc  */	\
+  "",			/* will use brackets2 here            */	\
   "",			/* will use end2 here                 */	\
   FALSE,		/* end not have to be at line start   */	\
   FALSE,		/* don't end delimiter twice to stop  */	\
@@ -1725,10 +2045,12 @@ TOKEN PodCodeToken	= RecursiveToken("C", "@PFC");
   FALSE,		/* token not just start of line       */	\
   { str },		/* token begins with this	      */	\
   { "<", "<< ", "<<< ", "<<<< " }, /* start2		      */	\
+  { "",  "",    "",     ""      }, /* no bracket2             */	\
   { ">", " >>", " >>>", " >>>>" }, /* end2		      */	\
   AllCharacters,	/* anything inside                    */	\
   "", "",		/* no escape character                */	\
   "", "",		/* no inner escape; no end inner esc  */	\
+  "",			/* will use brackets2 here            */	\
   "",			/* will use end2 here                 */	\
   FALSE,		/* end not have to be at line start   */	\
   FALSE,		/* don't end delimiter twice to stop  */	\
@@ -1767,10 +2089,12 @@ TOKEN PodNumCharToken = {
   FALSE,		/* token allowed at start of line only              */
   { "E<" },		/* command begins with this              	    */
   { NULL },		/* no start2 needed				    */
+  { NULL },		/* so no bracket2 either			    */
   { NULL },		/* so no end2 either				    */
   "0123456789",		/* digits inside 				    */
   "", "",		/* no escape character                              */
   "", "",		/* no "inner escapes"                               */
+  "",			/* no bracketing delimiter                          */
   ">",			/* token will end with > character                  */
   FALSE,		/* end delimiter does not have to be at line start  */
   FALSE,		/* don't need to see end delimiter twice to stop    */
@@ -1786,11 +2110,13 @@ TOKEN PodNumCharToken = {
   FALSE,		/* token not just start of line       */	\
   { str },		/* token begins with this	      */	\
   { NULL },		/* start2			      */	\
+  { NULL },		/* bracket2			      */	\
   { NULL },		/* end2				      */	\
   "",			/* nothing inside                     */	\
   "", "",		/* no escape character                */	\
   "", "",		/* no inner escape either             */	\
-  "",			/* no ending delimiter except end2    */	\
+  "",			/* no bracketing delimiter            */	\
+  "",			/* no ending delimiter                */	\
   FALSE,		/* end not have to be at line start   */	\
   FALSE,		/* don't end delimiter twice to stop  */	\
 }
@@ -1948,12 +2274,12 @@ TOKEN PE99 = PodEscapeToken("E<divide>",	"{@Divide}");
 #define NO_LANGUAGE	((LANGUAGE *) NULL)
 
 typedef struct lang_rec {
-  char *names[MAX_ARRAY_LENGTH];
+  char *names[MAX_NAMES];
   char *setup_file;
   char *lang_sym;
   int no_match;
-  TOKEN *tokens[MAX_ARRAY_LENGTH];
-  char *keywords[MAX_ARRAY_LENGTH];
+  TOKEN *tokens[MAX_TOKENS];
+  char *keywords[MAX_KEYWORDS];
 } LANGUAGE;
 
 
@@ -2002,8 +2328,9 @@ LANGUAGE EiffelLanguage = {
     &ExclamationToken, &EqualToken, &EiffelNotEqualToken, &LeftParenToken,
     &RightParenToken, &LeftBracketToken, &RightBracketToken, &LeftBraceToken,
     &RightBraceToken, &AssignToken, &QuestionAssignToken, &PlusToken,
-    &MinusToken, &DollarToken, &HatToken, &SlashToken, &BackSlashToken,
-    &LessToken, &GreaterToken, &LessEqualToken, &GreaterEqualToken
+    &MinusToken, &StarToken, &DollarToken, &HatToken, &SlashToken,
+    &BackSlashToken, &LessToken, &GreaterToken, &LessEqualToken,
+    &GreaterEqualToken
   },
   { "alias", "all", "and", "as", "check", "class", "creation", "debug",
     "deferred", "do", "else", "elseif", "end", "ensure", "expanded", "export",
@@ -2043,6 +2370,9 @@ LANGUAGE BlueLanguage = {
 /*                                                                           */
 /*  Perl and Pod                                                             */
 /*                                                                           */
+/*  We list here all keywords, special variables, predefined filehandles,    */
+/*  and any other identifier that is "built-in".                             */
+/*                                                                           */
 /*****************************************************************************/
 
 LANGUAGE PerlLanguage = {
@@ -2053,15 +2383,16 @@ LANGUAGE PerlLanguage = {
     &PerlSingleQuoteStringToken, &PerlDoubleQuoteStringToken,
     &PerlBackQuoteStringToken, &PerlQTypeStringToken, &PerlSTypeStringToken,
     &PerlRegExpLPar, &PerlRegExpEq, &PerlRegExpMatch, &PerlRegExpNoMatch,
-    &PerlRegExpSplit, &PerlRegExpIf, &PerlRegExpNot, &PerlRegExpUnless,
-    &PerlRegExpFor, &PerlRegExpForEach, &PerlRegExpWhile,
-    &PerlRegExpStartLineToken,
+    &PerlRegExpSplit, &PerlRegExpIf, &PerlRegExpAnd, &PerlRegExpAnd2,
+    &PerlRegExpOr, &PerlRegExpOr2, &PerlRegExpXor, &PerlRegExpNot,
+    &PerlRegExpNot2, &PerlRegExpUnless, &PerlRegExpFor, &PerlRegExpForEach,
+    &PerlRegExpWhile, &PerlRegExpStartLineToken,
     &HereEOTuq, &HereEOTdq, &HereEOTfq, &HereEOTbq,
     &HereEOFuq, &HereEOFdq, &HereEOFfq, &HereEOFbq,
     &HereENDuq, &HereENDdq, &HereENDfq, &HereENDbq,
     &HereBLAuq, &HereBLAdq, &HereBLAfq, &HereBLAbq,
     &PerlIdentifierToken, &PerlSpecialIdentifierToken,
-    &PerlLiteralNumberToken, &PerlHexNumberToken,
+    &PerlLiteralNumberToken, &PerlHexNumberToken, &PerlBinaryNumberToken,
     &PerlCommentToken, &PerlCommentEscapeToken, &PerlPodToken,
     &ExclamationToken, &PercentToken, &HatToken, &AmpersandToken, 
     &StarToken, &SlashToken, &ArrowToken, &BackSlashToken, 
@@ -2087,12 +2418,13 @@ LANGUAGE PerlLanguage = {
   },
 
   { 
-    /* Built-ins taken from Programming Perl 2nd Ed. */
+    /* Built-ins taken from WCS and on-line documentation for 5.6.0     */
+    /* dbmopen and dbmclose are not included because they are obsolete. */
     "abs", "accept", "alarm", "atan2", 
     "bind", "binmode", "bless", 
-    "caller", "chdir", "chmod", "chomp", "chop", "chown", "chr", "chroot", 
+    "caller", "can", "chdir", "chmod", "chomp", "chop", "chown", "chr", "chroot", 
     "close", "closedir", "connect", "continue", "cos", "crypt", 
-    "dbmclose", "dbmopen", "defined", "delete", "die", "do", "dump", 
+    "defined", "delete", "die", "do", "dump", 
     "each", "endgrent", "endhostent", "endnetent", "endprotoent", 
     "endpwent", "endservent", "eof", "eval", "exec", "exists", "exit", 
     "exp", 
@@ -2105,19 +2437,19 @@ LANGUAGE PerlLanguage = {
     "getservent", "getsockname", "getsockopt", "glob", "gmtime", "goto", 
     "grep", 
     "hex", 
-    "import", "index", "int", "ioctl", 
+    "import", "index", "int", "ioctl", "isa",
     "join", 
     "keys", "kill", 
     "last", "lc", "lcfirst", "length", "link", "listen", "local", 
-    "localtime", "log", "lstat", 
+    "localtime", "lock", "log", "lstat", 
     "map", "mkdir", "msgctl", "msgget", "msgrcv", "msgsnd", "my", 
     "next", "no", 
-    "oct", "open", "opendir", "ord", 
-    "pack", "package", "pipe", "pop", "pos", "print", "printf", "push", 
+    "oct", "open", "opendir", "ord", "our",
+    "pack", "package", "pipe", "pop", "pos", "print", "printf", "prototype", "push", 
     "quotemeta", 
-    "rand", "read", "readdir", "readlink", "recv", "redo", "ref", 
-    "rename", "require", "reset", "return", "reverse", "rewinddir", 
-    "rindex", "rmdir", 
+    "rand", "read", "readdir", "readline", "readlink", "readpipe", "recv", 
+    "redo", "ref", "rename", "require", "reset", "return", "reverse", 
+    "rewinddir", "rindex", "rmdir", 
     "scalar", "seek", "seekdir", "select", "semctl", "semget", "semop", 
     "send", "setgrent", "sethostent", "setnetent", "setpgrp", 
     "setpriority", "setprotoent", "setpwent", "setservent", 
@@ -2127,87 +2459,56 @@ LANGUAGE PerlLanguage = {
     "sub", "substr", "symlink", "syscall", "sysopen", "sysread", "sysseek",
     "system", "syswrite", 
     "tell", "telldir", "tie", "tied", "time", "times", "truncate", 
+    "unimport",
     "uc", "ucfirst", "umask", "undef", "unlink", "unpack", "unshift", 
     "untie", "use", "utime", 
-    "values", "vec", 
+    "values", "vec", "VERSION",
     "wait", "waitpid", "wantarray", "warn", "write", 
 
-    /* from later versions of Perl (excluding experimental thread stuff) */ 
-    "our", "prototype", "readline", "readpipe",
-
-    /* operators */
+    /* Comparison operators */
     "lt", "gt", "eq", "ne", "cmp", "le", "ge",
 
-    /* Perl special variables                                              */
-    /* NB special variables that begin $^, e.g. $^D can also be written as */
-    /* dollar control D (yes literal control D) -- but suggest we ignore   */
-    "$_", "$ARG", 
-    "$.", "$INPUT_LINE_SEPARATOR", "$NR", 
-    "$/", "$INPUT_RECORD_SEPARATOR", "$RS", 
-    "$,", "$OUTPUT_FIELD_SEPARATOR", "$OFS", 
-    "$\\", "$OUTPUT_RECORD_SEPARATOR", "$ORS", 
-    "$\"", "$LIST_SEPARATOR", 
-    "$;", "$SUBSCRIPT_SEPARATOR", "$SUBSEP", 
-    "$^L", "$FORMAT_FORMFEED", 
-    "$:", "$FORMAT_LINE_BREAK_CHARACTERS", 
-    "$^A", "$ACCUMULATOR", 
-    "$#", "$OFMT", 
-    "$?", "$CHILD_ERROR", 
-    "$!", "$OS_ERROR", "$ERRNO", 
-    "$@", "$EVAL_ERROR", 
-    "$$", "$PROCESS_ID", "$PID", 
-    "$<", "$REAL_USER_ID", "$UID", 
-    "$>", "$EFFECTIVE_USER_ID", "$EUID", 
-    "$(", "$REAL_GROUP_ID", "$GID", 
-    "$)", "$EFFECTIVE_GROUP_ID", "$EGID", 
-    "$0", /* dollar zero */ "$PROGRAM_NAME", 
-    "$[", /* deprecated */ 
-    "$]", "$PERL_VERSION", 
-    "$^D", "$DEBUGGING", 
-    "$^E", "$EXTENDED_OS_ERROR", 
-    "$^F", "$SYSTEM_FD_MAX", 
-    "$^H", 
-    "$^I", "$INPLACE_EDIT", 
-    "$^M", 
-    "$^O", "$OSNAME", 
-    "$^P", "$PERLDB", 
-    "$^T", "$BASETIME", 
-    "$^W", "$WARNING", 
-    "$^X", "$EXECUTABLE_NAME", 
-    "$ARGV", 
-    "@ARGV", "@INC", "@F", "%INC", "%ENV", "%SIG",
-    "ARGV", "STDERR", "STDIN", "STDOUT", "DATA"
-    "$&", "$MATCH",
-    "$`", "$PREMATCH",
-    "$'", "$POSTMATCH",
-    "$+", "$LAST_PAREN_MATCH",
-    "$|", "$OUTPUT_AUTOFLUSH",
-    "$%", "$FORMAT_PAGE_NUMBER",
-    "$=", "$FORMAT_LINES_PER_PAGE",
-    "$-", "$FORMAT_LINES_LEFT",
-    "$~", "$FORMAT_NAME",
-    "$^", "$FORMAT_TOP_NAME",
-
-    /* Perl special constants */
-    "__END__", "__FILE__", "__LINE__", "__PACKAGE__",
-
-    /* Perl regex variables - first 9 only */
-    "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9",
-
-    /* Perl pragmas */
-    "attrs", "autouse", "base", "blib", "constant", "diagnostics", "fields",
-    "integer", "less", "lib", "locale", "ops", "overload", "re", "sigtrap",
-    "strict", "subs", "vars",
-
-    /* Perl textual operators */
+    /* Special markers & constants */
+    "__DATA__", "__END__", "__FILE__", "__LINE__", "__PACKAGE__",
+
+    /* Predefined filehandles */
+    "ARGV", "ARGVOUT", "STDERR", "STDIN", "STDOUT", "DATA"
+
+    /* Pragmas */
+    "attributes", "autouse", "base", "blib", "bytes",
+    "constant", "charnames", "diagnostics", "fields", "filetest",
+    "integer", "less", "lib", "locale", 
+    /* "open", Not listed here since its also a function */
+    "ops", "overload", "re", "sigtrap", "strict", "subs", "utf8",
+    "vars", "warnings",
+
+    /* Low-precedence logical operators */
     "and", "or", "xor", "not", 
 
-    /* the x keyword */
+    /* The x keyword */
     "x",
 
-    /* Perl control structures */
+    /* Control structures */
     "if", "elsif", /* yes one e */ "else", "unless", 
     "while", "for", "foreach", "continue", "until",
+
+    /* Special subroutines */
+    "AUTOLOAD", "BEGIN", "CHECK", "END", "DESTROY", "INIT", 
+
+    /* Predefined classes & namespaces */
+    "CORE", "GLOBAL", "UNIVERSAL", "SUPER", 
+
+    /* Tie predefined subroutines */
+    "TIESCALAR",
+    "FETCH", "STORE",
+    "TIEARRAY",
+    "FETCHSIZE", "STORESIZE", "EXISTS", "DELETE",
+    "CLEAR", "PUSH", "POP", "SHIFT", "UNSHIFT", "SPLICE", "EXTEND",
+    "TIEHASH",
+    "FIRSTKEY", "NEXTKEY"
+    "TIEHANDLE",
+    "PRINT", "PRINTF", "WRITE", "READLINE", "GETC", "READ", "CLOSE",
+    "BINMODE", "OPEN", "EOF", "FILENO", "SEEK", "TELL",
   }
 };
 
@@ -2220,6 +2521,8 @@ LANGUAGE PodLanguage = {
     &PodVerbatimLineToken, &PodEmptyLineToken, &PodIgnoreToken,
     &PodHeading1Token, &PodHeading2Token,
     &PodOverToken, &PodItemToken, &PodBackToken,
+    &PodItemBullet, &PodItem0, &PodItem1, &PodItem2, &PodItem3,
+    &PodItem4, &PodItem5, &PodItem6, &PodItem7, &PodItem8, &PodItem9,
     &PodForToken, &PodBeginToken, &PodBeginLoutToken,
     &PodItalicToken, &PodBoldToken, &PodCodeToken, &PodFileToken,
     &PodNoBreakToken, &PodLinkToken, &PodIndexToken, &PodZeroToken,
@@ -2249,9 +2552,9 @@ LANGUAGE PodLanguage = {
 /*****************************************************************************/
 
 LANGUAGE *languages[] = {
+  & BlueLanguage,
   & CLanguage,
   & EiffelLanguage,
-  & BlueLanguage,
   & PerlLanguage,
   & PodLanguage,
 };
@@ -2303,11 +2606,14 @@ static char	*tabin_option;		/* value of -t option, else null     */
 static char	*tabout_option;		/* value of -T option, else null     */
 static char	*setup_option;		/* value of -S option, else null     */
 static char	*language_option;	/* value of -l option, else null     */
+static char	*numbered_option;	/* value of -L option, else null     */
 
 static BOOLEAN	tab_by_spacing;		/* TRUE if using space chars to tab  */
 static int	tab_in;			/* tab interval, value of -t option  */
 static float	tab_out;		/* tab interval width (-T option)    */
 static char 	tab_unit;		/* unit of measurement for tab       */
+static BOOLEAN 	print_lines;		/* TRUE if we are printing line nums */
+static int 	print_num;		/* current line num for printing     */
 
 static FILE	*in_fp;			/* where input comes from	     */
 static FILE	*out_fp;		/* where output goes		     */
@@ -2413,7 +2719,7 @@ void NextChar()
     /* we need to read in the new line */
     line_num++;
     line_pos = 1;
-    if( fgets(&curr_line[1], MAX_LINE+2, in_fp) == (char *) NULL )
+    if( fgets(&curr_line[1], MAX_LINE-2, in_fp) == (char *) NULL )
       curr_line[1] = '\0';
   }
   if( DEBUG_NEXTCHAR )
@@ -2438,7 +2744,7 @@ BOOLEAN InputMatches(char *pattern)
     if( *p == '\0' )
     {
       /* attempt to read another line of input, since we are off the end */
-      if( fgets(p, MAX_LINE+2-(p - curr_line), in_fp) == (char *) NULL )
+      if( fgets(p, MAX_LINE-2-(p - curr_line), in_fp) == (char *) NULL )
       *p = '\0';
     }
     if( *p != *q )
@@ -2607,6 +2913,45 @@ static char	save_value[MAX_LINE];		/* the token text            */
 static int	save_len;                       /* index of \0 in save_value */
 static BOOLEAN	save_on = FALSE;		/* TRUE when saving          */
 static LANGUAGE	*save_language;			/* the current language      */
+static int	out_linepos = 0;		/* output line position      */
+static BOOLEAN	out_linestart = TRUE;		/* TRUE if out line start    */
+static BOOLEAN	out_formfeed = FALSE;		/* TRUE if last was formfeed */
+static int	brace_depth;			/* brace depth in verbatim   */
+
+extern void Emit(TOKEN *current_token, char ch);
+
+/*****************************************************************************/
+/*                                                                           */
+/*  EmitTab(int *out_linepos)                                                */
+/*                                                                           */
+/*  Emit one tab character, keeping track of where we are up to in           */
+/*  *out_linepos.                                                            */
+/*                                                                           */
+/*****************************************************************************/
+
+void EmitTab()
+{
+  if( tab_by_spacing )
+  { putc(' ', out_fp);
+    out_linepos++;
+    while( out_linepos % tab_in != 0 )
+    { putc(' ', out_fp);
+      out_linepos++;
+    }
+  }
+  else
+  { out_linepos++;
+    while( out_linepos % tab_in != 0 )  out_linepos++;
+    if( out_linestart )
+    { fprintf(out_fp, "$>\"%.1f%c\" {}", tab_out, tab_unit);
+      /* NB {} is required in case nothing follows on this line */
+    }
+    else
+      fprintf(out_fp, "$>\"%.1f%ct\" {}", (out_linepos/tab_in)*tab_out,
+	tab_unit);
+  }
+  out_formfeed = FALSE;
+}
 
 
 /*****************************************************************************/
@@ -2627,8 +2972,7 @@ static LANGUAGE	*save_language;			/* the current language      */
 /*****************************************************************************/
 
 void EmitRaw(char ch)
-{ static int	 out_linepos = 0;		/* output line position      */
-  static BOOLEAN out_linestart = TRUE;		/* TRUE if out line start    */
+{
 
   if( DEBUG_EMIT )
     fprintf(stderr, "EmitRaw(%c); out_linepos %d, out_linestart %s\n",
@@ -2637,35 +2981,69 @@ void EmitRaw(char ch)
   { fprintf(err_fp, "%s internal error (EmitRaw save_on)\n", ErrorHeader());
     abort();
   }
-  if( ch == '\t' )
-  { if( tab_by_spacing )
-    { putc(' ', out_fp);
-      out_linepos++;
-      while( out_linepos % tab_in != 0 )
-      { putc(' ', out_fp);
-	out_linepos++;
-      }
-    }
-    else
-    { out_linepos++;
-      while( out_linepos % tab_in != 0 )  out_linepos++;
-      if( out_linestart )
-      { fprintf(out_fp, "$>\"%.1f%c\" {}", tab_out, tab_unit);
-	/* NB {} is required in case nothing follows on this line */
-      }
-      else
-        fprintf(out_fp, "$>\"%.1f%ct\" {}", (out_linepos/tab_in)*tab_out, tab_unit);
-    }
+
+  /* drop empty lines following formfeed */
+  if( out_formfeed && (ch == '\n' || ch == '\f') )
+  {
+    out_formfeed = (ch == '\f');
+    return;
   }
-  else if( ch == '\n' )
-  { fputc(ch, out_fp);
-    out_linepos = 0;
-    out_linestart = TRUE;
+
+  /* emit line number if required */
+  if( print_lines && out_linepos == 0 )
+  {
+    char buff[20];
+    if( out_formfeed )  print_num--;
+    sprintf(buff, "%d", print_num++);
+    fprintf(out_fp, "@PL{\"%s\"}", buff);
+    out_linepos += strlen(buff);
+    out_linestart = FALSE;
+    EmitTab();
   }
-  else
-  { fputc(ch, out_fp);
-    out_linepos++;
-    if( ch != ' ' )  out_linestart = FALSE;
+
+  switch( ch )
+  {
+    case ' ':
+
+      fputc(ch, out_fp);
+      out_linepos++;
+      out_formfeed = FALSE;
+      break;
+
+
+    case '\t':
+
+      EmitTab();
+      out_formfeed = FALSE;
+      break;
+
+
+    case '\n':
+
+      fputc(ch, out_fp);
+      out_linepos = 0;
+      out_linestart = TRUE;
+      out_formfeed = FALSE;
+      break;
+
+
+    case '\f':
+
+      fputs("\n@NP\n", out_fp);
+      out_linepos = 0;
+      out_linestart = TRUE;
+      out_formfeed = TRUE;
+      break;
+
+
+    default:
+
+      fputc(ch, out_fp);
+      out_linepos++;
+      out_linestart = FALSE;
+      out_formfeed = FALSE;
+      break;
+
   }
   if( DEBUG_EMIT )
     fprintf(stderr, "EmitRaw(%c) returning; out_linepos %d, out_linestart %s\n",
@@ -2692,22 +3070,35 @@ void StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, int len)
     abort();
   }
   save_language = lang;
+
+  /* emit line number if required */
+  if( print_lines && out_linepos == 0 )
+  {
+    char buff[20];
+    if( out_formfeed )  print_num--;
+    sprintf(buff, "%d", print_num++);
+    fprintf(out_fp, "@PL{\"%s\"}", buff);
+    out_linepos += strlen(buff);
+    out_linestart = FALSE;
+    EmitTab();
+  }
+
   switch( current_token->print_style )
   {
     case PRINT_WHOLE_QUOTED:
 
-      /* start_delim is to be printed, but must be saved up */
+      /* start_delim is to be printed */
       save_on = TRUE;
-      for( i = 0;  i < len;  i++ )
-	save_value[i] = start_delim[i];
-      save_len = len;
+      save_len = 0;
       save_value[save_len] = '\0';
+      for( i = 0;  i < len;  i++ )
+	Emit(current_token, start_delim[i]);
       break;
 
 
     case PRINT_NODELIMS_QUOTED:
 
-      /* easiest to save this, treat like PRINT_WHOLE_QUOTED, but no delims */
+      /* like PRINT_WHOLE_QUOTED, but no delims */
       save_on = TRUE;
       save_len = 0;
       save_value[save_len] = '\0';
@@ -2723,6 +3114,7 @@ void StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, int len)
       /* print opening delimiter, verbatim */
       for( i = 0;  i < len;  i++ )
 	putc(start_delim[i], out_fp);
+
       break;
 
 
@@ -2731,6 +3123,9 @@ void StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, int len)
       /* command is printed but not delimiter */
       if( current_token->command[0] != '\0' )
 	fprintf(out_fp, "%s{", current_token->command); /*}*/
+
+      /* record that we are currently inside no braces in the verbatim text */
+      brace_depth = 0;
       break;
 
 
@@ -2764,38 +3159,37 @@ void StartEmit(LANGUAGE *lang, TOKEN *current_token, char *start_delim, int len)
 /*  End emitting the current token.  Its ending delimiter was end_delim.     */
 /*                                                                           */
 /*****************************************************************************/
-#define is_whitespace(ch) ((ch)==' ' || (ch)=='\t' || (ch)=='\n' || (ch)=='\f')
+#define at_start_line(s, i) ((i) == 0 || s[(i)-1] == '\n' || s[(i)-1] == '\f' )
 
 void EndEmit(TOKEN *current_token, char *end_delim)
-{ char *com;  int i;  BOOLEAN quoted_now = FALSE;
+{ char *com;
+  int i;
+  BOOLEAN quoted_now = FALSE;
   switch( current_token->print_style )
   {
 
     case PRINT_WHOLE_QUOTED:
 
-      strcpy(&save_value[save_len], end_delim);
-      save_len += strlen(end_delim);
+      /* first, emit (i.e. save) ending delimiter */
+      for( i = 0;  end_delim[i] != '\0';  i++ )
+	Emit(current_token, end_delim[i]);
       /* NB NO BREAK */
 
 
     case PRINT_NODELIMS_QUOTED:
 
       /* work out whether we are printing the command or its alternative */
-      com = (current_token->alternate_command[0]!='\0' && HashRetrieve(save_value)?
+      com=(current_token->alternate_command[0]!='\0'&&HashRetrieve(save_value)?
 	current_token->alternate_command : current_token->command);
 
       /* print command, opening brace */
-      if( com[0] != '\0' )  fprintf(out_fp, "%s{", com);  /* } */
-
-      /* omit trailing white space (will not be significant to Lout anyway) */
-      /* *** not doing this now 
-      while( save_len>0 && is_whitespace(save_value[save_len-1]) )  save_len--;
-      *** */
+      if( com[0] != '\0' )  fprintf(out_fp, "%s{", com);  /*}*/
 
       /* print the token with appropriate escapes */
       save_on = FALSE;
       for( i = 0;  i < save_len;  i++ ) switch( save_value[i] )
       {
+	case '@':
 	case '/':
 	case '|':
 	case '&':
@@ -2806,7 +3200,10 @@ void EndEmit(TOKEN *current_token, char *end_delim)
 	case '~':
 	case '-':
 
-	  if( !quoted_now ) { putc('"', out_fp); quoted_now = TRUE; }
+	  if( !quoted_now )
+	  { putc('"', out_fp);
+	    quoted_now = TRUE;
+	  }
 	  EmitRaw(save_value[i]);
 	  break;
 
@@ -2814,7 +3211,10 @@ void EndEmit(TOKEN *current_token, char *end_delim)
 	case '"':
 	case '\\':
 
-	  if( !quoted_now ) { putc('"', out_fp); quoted_now = TRUE; }
+	  if( !quoted_now )
+	  { putc('"', out_fp);
+	    quoted_now = TRUE;
+	  }
 	  putc('\\', out_fp);
 	  EmitRaw(save_value[i]);
 	  break;
@@ -2822,24 +3222,41 @@ void EndEmit(TOKEN *current_token, char *end_delim)
 
 	case ' ':
 	case '\t':
-	case '\n':
-	case '\f':
 
-	  if( i == 0 )
-	  { /* make initial white space significant using "" */
+	  /* make initial white space significant using "" */
+	  if( !quoted_now && at_start_line(save_value, i) )
+	  {
 	    putc('"', out_fp);
 	    quoted_now = TRUE;
+	    out_linestart = FALSE;
+	  }
+
+	  /* make sure we aren't in quoted text */
+	  if( quoted_now )
+	  { putc('"', out_fp);
+	    quoted_now = FALSE;
 	  }
-	  if( quoted_now ) { putc('"', out_fp); quoted_now = FALSE; }
+
+	  /* print the character */
 	  EmitRaw(save_value[i]);
 	  break;
 
 
+	case '\n':
+	case '\f':
+
+	  /* these characters are not saved */
+	  fprintf(err_fp, "%s internal error (EndEmit nl/ff)\n", ErrorHeader());
+	  exit(1);
+	  break;
+
+
 	default:
 
 	  /* anything else can be quoted or unquoted ad. lib. */
 	  EmitRaw(save_value[i]);
 	  break;
+
       }
       /* print closing quote and closing brace if needed */
       if( quoted_now )  putc('"', out_fp);
@@ -2859,7 +3276,24 @@ void EndEmit(TOKEN *current_token, char *end_delim)
     case PRINT_NODELIMS_UNQUOTED:
 
       /* print closing brace if required*/
-      if( current_token->command[0] != '\0' ) /*{*/ putc('}', out_fp);
+      if( current_token->command[0] != '\0' )
+      {
+	if( brace_depth > 0 )
+	{
+	  if( brace_depth > 1 )
+	    fprintf(err_fp, "%s: inserted %d closing braces at end of %s\n",
+	      ErrorHeader(), brace_depth, current_token->name);
+	  else
+	    fprintf(err_fp, "%s: inserted one closing brace at end of %s\n",
+	      ErrorHeader(), current_token->name);
+	  while( brace_depth > 0 )
+	  {
+	    /*{*/ putc('}', out_fp);
+	    brace_depth--;
+	  }
+	}
+	/*{*/ putc('}', out_fp);
+      }
       break;
 
 
@@ -2907,14 +3341,44 @@ void Emit(TOKEN *current_token, char ch)
       { fprintf(err_fp, "%s internal error (EmitChar)\n", ErrorHeader());
 	abort();
       }
-      save_value[save_len++] = ch;
-      save_value[save_len] = '\0';
+      if( ch == '\n' || ch == '\f' )
+      {
+	/* could save newline too, but uses less memory if print now */
+	EndEmit(current_token, "");
+	EmitRaw(ch);
+	StartEmit(save_language, current_token, "", 0);
+      }
+      else if( save_len < MAX_LINE - 1 )
+      {
+        save_value[save_len++] = ch;
+        save_value[save_len] = '\0';
+      }
+      else
+      {
+	fprintf(err_fp, "%s internal error (token too long)\n", ErrorHeader());
+	exit(1);
+      }
       break;
 
 
     case PRINT_WHOLE_UNQUOTED:
     case PRINT_NODELIMS_UNQUOTED:
 
+      /* keep trace of braces, and insert matching braces if required */
+      if( ch == '{' )
+	brace_depth++;
+      else if( ch == '}' )
+      {
+	brace_depth--;
+	if( brace_depth < 0 && current_token->command[0] != '\0' )
+	{
+	  fprintf(err_fp, "%s: inserted opening brace within %s\n",
+	      ErrorHeader(), current_token->name);
+	  putc('{', out_fp); /*}*/
+	  brace_depth++;
+	}
+      }
+
       /* verbatim output */
       putc(ch, out_fp);
       break;
@@ -2956,6 +3420,7 @@ void EmitProtected(char ch)
 {
   switch( ch )
   {
+    case '@':
     case '/':
     case '|':
     case '&':
@@ -3029,6 +3494,7 @@ TOKEN *ExpandToken(TOKEN *t, int starts_pos)
   res->escape_legal = t->escape_legal;
   res->inner_escape = t->inner_escape;
   res->end_inner_escape = t->end_inner_escape;
+  res->bracket_delimiter = t->brackets2[starts_pos];
   res->end_delimiter = t->ends2[starts_pos];
   res->end_start_line_only = t->end_start_line_only;
   res->want_two_ends = t->want_two_ends;
@@ -3056,30 +3522,21 @@ TRIE StartLineTrie = (TRIE) NULL;	/* these allowed at line start only  */
 
 void SetupOneToken(TOKEN *t)
 { int j;
-  if( DEBUG_SETUP )
-    fprintf(stderr, "SetupOneToken starting %s\n", t->starts[0]);
+  if( DEBUG_SETUP )  fprintf(stderr, "SetupOneToken(%s)\n", t->starts[0]);
 
   /* check that any PRINT_NODELIMS_INNER styles have an end delimiter */
   if( t->print_style == PRINT_NODELIMS_INNER )
-  {
-    if( t->end_delimiter == NULL || t->end_delimiter[0] == '\0' )
-    {
-      fprintf(err_fp, "%s: token %s is INNER but has no end delimiter\n",
+  { if( t->end_delimiter == NULL || t->end_delimiter[0] == '\0' )
+    { fprintf(err_fp, "%s: token %s is INNER but has no end delimiter\n",
 	t->name, ErrorHeader());
     }
   }
 
   /* set up the chtype table for this token */
-  if( t->legal == NULL )
-  { /* all characters are legal */
-    for( j = 0;  j < MAX_CHAR;  j++ )
-      t->chtype[j] = LEGAL;
-  }
-  else
-  { /* the characters in t->legal are legal */
-    for( j = 0;  t->legal[j] != '\0'; j++ )
-      t->chtype[(int) t->legal[j]] = LEGAL;
-  }
+  if( t->legal == NULL )  /* all characters are legal in this case */
+    for( j = 0; j < MAX_CHAR; j++ )  t->chtype[j] = LEGAL;
+  else /* the characters in t->legal are legal in this case */
+    for( j = 0; t->legal[j] != '\0'; j++ ) t->chtype[(int) t->legal[j]] = LEGAL;
   if( t->escape[0] != '\0' )
     t->chtype[(int) t->escape[0]] = ESCAPE;
   if( t->inner_escape[0] != '\0' )
@@ -3088,8 +3545,7 @@ void SetupOneToken(TOKEN *t)
   /* set up the escape_chtype table for this token */
   if( t->escape_legal == NULL )
   { /* all characters are legal after an escape character */
-    for( j = 0;  j < MAX_CHAR;  j++ )
-      t->escape_chtype[j] = LEGAL;
+    for( j = 0;  j < MAX_CHAR;  j++ ) t->escape_chtype[j] = LEGAL;
   }
   else
   { /* the characters in t->escape_legal are legal after an escape character */
@@ -3099,8 +3555,7 @@ void SetupOneToken(TOKEN *t)
 
   /* load the opening delimiters of this token into the trie */
   for( j = 0;  t->starts[j] != (char *) NULL;  j++ )
-  {
-    if( !TrieInsert(t->start_line_only ? &StartLineTrie:&Trie,t->starts[j],t) )
+  { if( !TrieInsert(t->start_line_only ? &StartLineTrie:&Trie,t->starts[j],t) )
     { if( *(t->starts[j]) == '\0' )
 	fprintf(err_fp, "%s: empty starting delimiter\n", ErrorHeader());
       else
@@ -3109,8 +3564,7 @@ void SetupOneToken(TOKEN *t)
     }
   }
 
-  if( DEBUG_SETUP )
-    fprintf(stderr, "SetupOneToken ending %s\n", t->starts[0]);
+  if( DEBUG_SETUP ) fprintf(stderr, "SetupOneToken ending %s\n", t->starts[0]);
 } /* end SetupOneToken */
 
 
@@ -3208,7 +3662,26 @@ TOKEN *TokenStartingHere(int *len)
   return res;
 }
 
+
+/*****************************************************************************/
+/*                                                                           */
+/*  int Matching()                                                           */
+/*                                                                           */
+/*  Return the index of the pair that matches the current input.             */
+/*                                                                           */
+/*****************************************************************************/
+
+int Matching()
+{ int i;
+  for( i = 0; pairs[i].first != NULL && !InputMatches(pairs[i].first); i++ );
+  if( DEBUG_PROCESS )
+    fprintf(stderr, "Matching() = %d (\"%s\", \"%s\")\n", i,
+      pairs[i].first  == NULL ? "NULL" : pairs[i].first,
+      pairs[i].second == NULL ? "NULL" : pairs[i].second);
+  return i;
+}
 
+
 /*****************************************************************************/
 /*                                                                           */
 /*  Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter)   */
@@ -3221,22 +3694,47 @@ TOKEN *TokenStartingHere(int *len)
 /*****************************************************************************/
 #define	START				1
 #define	IN_TOKEN			2
-#define	IN_TOKEN_AFTER_ESCAPE		3
-#define	IN_TOKEN_AFTER_INNER_ESCAPE	4
-#define	STOP				5
+#define	IN_TOKEN_NEEDING_DELIM		3
+#define	IN_TOKEN_AFTER_ESCAPE		4
+#define	IN_TOKEN_AFTER_INNER_ESCAPE	5
+#define	STOP				6
+
+char *debug_state(int s)
+{
+  switch( s )
+  {
+    case START:				return "START";
+    case IN_TOKEN:			return "IN_TOKEN";
+    case IN_TOKEN_NEEDING_DELIM:	return "IN_TOKEN_NEEDING_DELIM";
+    case IN_TOKEN_AFTER_ESCAPE:		return "IN_TOKEN_AFTER_ESCAPE";
+    case IN_TOKEN_AFTER_INNER_ESCAPE:	return "IN_TOKEN_AFTER_INNER_ESCAPE";
+    case STOP:				return "STOP";
+    default:				return "?";
+  }
+}
 
 void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter)
 { TOKEN *current_token; int len, i, state;
-  BOOLEAN end_delimiter_seen;
+  int end_delimiter_depth, end_delimiter_count;
+  char *curr_end_delim, *curr_bracket_delim;
   if( DEBUG_PROCESS )
     fprintf(stderr, "[ Process(%s, -, -, -, -)\n", lang->names[0]);
 
   state = START;
-  end_delimiter_seen = FALSE;
   while( curr_line[line_pos] != '\0' && state != STOP )
   { 
     if( DEBUG_PROCESS )
-      fprintf(stderr, "  state %d, ch %c\n", state, curr_line[line_pos]);
+    {
+      if( state >= IN_TOKEN )
+	fprintf(stderr,
+	  "  %s, depth %d, count %d, bracket \"%s\", end \"%s\", ch %c\n",
+	  debug_state(state), end_delimiter_depth, end_delimiter_count,
+	  curr_bracket_delim, curr_end_delim, curr_line[line_pos]);
+      else
+	fprintf(stderr, "  %s, ch %c\n",
+	  debug_state(state), curr_line[line_pos]);
+    }
+
     switch( state )
     {
 
@@ -3272,38 +3770,23 @@ void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter)
 	    Process(lang, current_token, current_token->end_delimiter);
 	    EndEmit(current_token, "");
 	  }
-	  else state = IN_TOKEN;
+	  else
+	  {
+	    end_delimiter_depth = 1;
+	    end_delimiter_count = current_token->want_two_ends ? 2 : 1;
+	    curr_end_delim = current_token->end_delimiter;
+	    curr_bracket_delim = current_token->bracket_delimiter;
+	    state = IN_TOKEN;
+	  }
 	}
 
 	/* check whether we have a space */
-	else if( curr_line[line_pos] == ' ' )
+	else if( is_whitespace(curr_line[line_pos]) )
 	{
-	  EmitRaw(' ');
+	  EmitRaw(curr_line[line_pos]);
 	  NextChar();
 	}
 
-	/* check whether we have a tab character */
-	else if( curr_line[line_pos] == '\t' )
-	{
-	    EmitRaw('\t');
-	    NextChar();
-	}
-
-	/* check whether we have a newline character */
-	else if( curr_line[line_pos] == '\n' )
-	{
-	    EmitRaw('\n');
-	    NextChar();
-	}
-
-	/* check whether we have a formfeed character */
-	else if( curr_line[line_pos] == '\f' )
-	{
-	    fprintf(out_fp, "\n@NP");
-	    EmitRaw('\n');
-	    NextChar();
-	}
-
 	/* check whether we are supposed to echo things that don't match */
 	else if( lang->no_match == NO_MATCH_PRINT )
 	{
@@ -3319,8 +3802,8 @@ void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter)
 	      ErrorHeader(), curr_line[line_pos]);
 	  else
 	    fprintf(err_fp, "%s: %s (octal %o)\n",
-	      "skipping unexpected unprintable character",
-	      ErrorHeader(), (int) curr_line[line_pos]);
+	      ErrorHeader(), "skipping unexpected unprintable character",
+	      (int) curr_line[line_pos]);
 	  NextChar();
 	}
 	else
@@ -3333,79 +3816,162 @@ void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter)
       case IN_TOKEN:  /* within a token; current_token says which kind */
 
 	/* check for ending delimiter if there is one */
-	if( current_token->end_delimiter[0] != '\0' &&
+	if( curr_end_delim[0] != '\0' &&
 	    (!current_token->end_start_line_only || line_pos == 1) &&
-	    InputMatches(current_token->end_delimiter) )
+	    InputMatches(curr_end_delim) )
 	{
-	  if( current_token->want_two_ends && !end_delimiter_seen )
+	  end_delimiter_depth--;
+	  if( DEBUG_PROCESS )
+	    fprintf(stderr, "  InputMatches(%s) so end_delimiter_depth--\n",
+	      curr_end_delim);
+	  if( end_delimiter_depth > 0 )
 	  {
-	    /* if we have to see the end delimiter twice before stopping,  */
-	    /* and we haven't seen it yet, then emit the char and carry on */
+	    /* if this end delimiter matches with a bracketing delimiter,  */
+	    /* so is not the end of the token, emit the char and carry on  */
 	    Emit(current_token, curr_line[line_pos]);
 	    NextChar();
-	    end_delimiter_seen = TRUE;
 	  }
 	  else
 	  {
+	    end_delimiter_count--;
 	    if( DEBUG_PROCESS )
-	      fprintf(stderr, "  InputMatches(%s) so finishing token\n",
-	        current_token->end_delimiter);
-	    len = strlen(current_token->end_delimiter);
-	    for( i = 0;  i < len;  i++ )
+	      fprintf(stderr, "  InputMatches(%s) so end_delimiter_count--\n",
+	        curr_end_delim);
+	    if( end_delimiter_count == 0 )
+	    {
+	      /* seen all the end delimiters we need, so token ends */
+	      len = strlen(curr_end_delim);
+	      for( i = 0;  i < len;  i++ )
+	        NextChar();
+	      EndEmit(current_token, curr_end_delim);
+	      state = START;
+	    }
+	    else
+	    {
+	      /* need more end delimiters yet, so keep scanning */
+	      Emit(current_token, curr_line[line_pos]);
 	      NextChar();
-	    EndEmit(current_token, current_token->end_delimiter);
-	    state = START;
+	      if( curr_bracket_delim[0] != '\0' )
+	        state = IN_TOKEN_NEEDING_DELIM;
+	      else
+	        state = IN_TOKEN;
+	    }
 	  }
 	}
-	else switch( current_token->chtype[(int) curr_line[line_pos]] )
+	else
 	{
+	  /* check for bracketing delimiter if there is one */
+	  if( curr_bracket_delim[0] != '\0' &&
+	    InputMatches(curr_bracket_delim) )
+	  {
+	    if( DEBUG_PROCESS )
+	      fprintf(stderr, "  InputMatches(%s) so end_delimiter_depth++\n",
+	        curr_bracket_delim);
+	    end_delimiter_depth++;
+	  }
+	
+	  /* handle current character as usual */
+	  switch( current_token->chtype[(int) curr_line[line_pos]] )
+	  {
 
-	  case LEGAL:
+	    case LEGAL:
 
-	    Emit(current_token, curr_line[line_pos]);
-	    NextChar();
-	    break;
+	      Emit(current_token, curr_line[line_pos]);
+	      NextChar();
+	      break;
 
 
-	  case ESCAPE:
+	    case ESCAPE:
 
-	    NextChar();
-	    state = IN_TOKEN_AFTER_ESCAPE;
-	    break;
+	      NextChar();
+	      state = IN_TOKEN_AFTER_ESCAPE;
+	      break;
 
 
-	  case INNER_ESCAPE:
+	    case INNER_ESCAPE:
 
-	    EndEmit(current_token, "");
+	      EndEmit(current_token, "");
+	      NextChar();
+	      Process(lang, current_token, current_token->end_inner_escape);
+	      state = IN_TOKEN_AFTER_INNER_ESCAPE;
+	      break;
+
+
+	    default:
+
+	      if( curr_end_delim[0] != '\0' )
+	      {
+	        /* error: token ends at delimiter, not unexpected character */
+	        if( Printable(curr_line[line_pos]) )
+	          fprintf(err_fp,
+		    "%s: skipping %c character (not allowed in %s)\n",
+		    ErrorHeader(), curr_line[line_pos], current_token->name);
+	        else if( curr_line[line_pos] == '\t' )
+	          fprintf(err_fp,
+		    "%s: skipping tab character (not allowed in %s)\n",
+		    ErrorHeader(), current_token->name);
+		else if( curr_line[line_pos] == '\n' )
+	          fprintf(err_fp,
+		    "%s: skipping newline character (not allowed in %s)\n",
+		    ErrorHeader(), current_token->name);
+		else if( curr_line[line_pos] == '\f' )
+	          fprintf(err_fp,
+		    "%s: skipping formfeed character (not allowed in %s)\n",
+		    ErrorHeader(), current_token->name);
+		else
+	          fprintf(err_fp, "%s: %s, octal code %o (not allowed in %s)\n",
+		    ErrorHeader(), "skipping unprintable character",
+		    (unsigned) curr_line[line_pos], current_token->name);
+	        NextChar();
+	      }
+	      else
+	      {
+	        /* normal termination after last legal character */
+	        EndEmit(current_token, "");
+	        state = START;
+	      }
+	      break;
+
+
+	  }
+	}
+	break;
+
+
+      case IN_TOKEN_NEEDING_DELIM:  /* within a token looking for delim */
+
+	/* looking for either a white space or a new matching delim */
+	switch( curr_line[line_pos] )
+	{
+	  case ' ':
+	  case '\t':
+	  case '\n':
+	  case '\f':
+
+	    Emit(current_token, curr_line[line_pos]);
 	    NextChar();
-	    Process(lang, current_token, current_token->end_inner_escape);
-	    state = IN_TOKEN_AFTER_INNER_ESCAPE;
 	    break;
 
 
 	  default:
 
-	    if( current_token->end_delimiter[0] != '\0' )
+	    /* had better match */
+	    i = Matching();
+	    if( pairs[i].first == NULL )
 	    {
-	      /* error: token ends at delimiter, not at unexpected character */
-	      if( Printable(curr_line[line_pos]) )
-	        fprintf(err_fp, "%s: skipping unexpected %c character in %s\n",
-		  ErrorHeader(), curr_line[line_pos], current_token->name);
-	      else
-	        fprintf(err_fp, "%s: %s (octal %o) in %s\n",
-		  ErrorHeader(), "skipping unexpected unprintable character",
-		  (int) curr_line[line_pos], current_token->name);
-	      NextChar();
-	    }
-	    else
-	    {
-	      /* normal termination after last legal character */
-	      EndEmit(current_token, "");
-	      state = START;
+	      /* this is not a suitable new start for delimiters */
+	      fprintf(err_fp, "%s: expected new delimiter here, found %c\n",
+		ErrorHeader(), curr_line[line_pos]);
+	      exit(0);
 	    }
+	    curr_bracket_delim = pairs[i].first;
+	    curr_end_delim = pairs[i].second;
+	    Emit(current_token, curr_line[line_pos]);
+	    NextChar();
+	    end_delimiter_depth++;
+	    state = IN_TOKEN;
 	    break;
 
-
 	}
 	break;
       
@@ -3478,6 +4044,19 @@ void Process(LANGUAGE *lang, TOKEN *outer_token, char *outer_end_delimiter)
       break;
 
 
+    case IN_TOKEN_NEEDING_DELIM:
+
+      /* we stopped in a token at a point where we were looking for a delim */
+      if( outer_token == (TOKEN *) NULL )
+	fprintf(err_fp, "%s: program text ended within %s\n", 
+	  ErrorHeader(), current_token->name);
+      else
+	fprintf(err_fp, "%s: %s token ended within %s\n",
+	  ErrorHeader(), outer_token->name, current_token->name);
+      EndEmit(current_token, "");
+      break;
+
+
     case IN_TOKEN_AFTER_ESCAPE:
 
       /* we stopped after the escape character */
@@ -3539,6 +4118,7 @@ void PrintUsage()
   fprintf(err_fp, "    -t<num>      tab interval (e.g. 8 is default)\n");
   fprintf(err_fp, "    -T<dist>     output tab interval (e.g. 0.5i)\n");
   fprintf(err_fp, "    -S<file>     use this as the setup file\n");
+  fprintf(err_fp, "    -L<num>      number lines from <num> (default is 1)\n");
   fprintf(err_fp, "    -n           no file names as page headers\n");
   fprintf(err_fp, "    -V           print version information and exit\n");
   fprintf(err_fp, "    -u           print this usage message and exit\n");
@@ -3587,6 +4167,8 @@ int main(int argc, char *argv[])
   tab_in = 8;
   tab_out = 3;
   tab_unit = 'f';
+  print_lines = FALSE;
+  numbered_option = NULL;
   headers_option = TRUE;
   font_option = size_option = line_option = tabin_option =
     tabout_option = setup_option = language_option = (char *) NULL;
@@ -3654,6 +4236,7 @@ int main(int argc, char *argv[])
 	    ErrorHeader(), outfilename);
 	  exit(1);
 	}
+	/* setbuf(out_fp, (char *) NULL); */
 	break;
 
 
@@ -3767,13 +4350,26 @@ int main(int argc, char *argv[])
      
 	/* read alternative setup file */
 	if( raw_seen )
-	{ fprintf(err_fp, "%s: -s illegal with -r option\n", ErrorHeader());
+	{ fprintf(err_fp, "%s: -S illegal with -r option\n", ErrorHeader());
 	  exit(1);
 	}
 	GetArg(setup_option, "usage: -S<filename>", FALSE);
 	break;
 
 
+      case 'L':
+     
+	/* read line numbering */
+	GetArg(numbered_option, "usage: -L<number>", TRUE);
+	print_lines = TRUE;
+	print_num = 1;
+	if( numbered_option!=NULL && sscanf(numbered_option,"%d",&print_num)!=1)
+	{ fprintf(err_fp, "%s usage: -L  or  -L<number>\n", ErrorHeader());
+	  exit(1);
+	}
+	break;
+
+
       case 'n':
      
 	if( raw_seen )
@@ -3932,6 +4528,8 @@ int main(int argc, char *argv[])
 	fprintf(out_fp, "    tabin { %s }\n", tabin_option );
       if( tabout_option != NULL )
 	fprintf(out_fp, "    tabout { %s }\n", tabout_option );
+      if( print_lines )
+	fprintf(out_fp, "    numbered { %d }\n", print_num );
       fprintf(out_fp, "%s%s\n", "@Be", "gin");
       while( (ch = getc(in_fp)) != EOF )
 	putc(ch, out_fp);
author	Jeffrey H. Kingston <jeff@it.usyd.edu.au>	2010-09-14 20:36:35 +0000
committer	Jeffrey H. Kingston <jeff@it.usyd.edu.au>	2010-09-14 20:36:35 +0000
commit	b10d39aec443165093f8f28bc6f940530b89cdaf (patch)
tree	63a1ef3b3f1d2562c498291cda341a2171a1fe1c /prg2lout.c
parent	2f4268e5e02216be53cd85816362191373512463 (diff)
download	lout-b10d39aec443165093f8f28bc6f940530b89cdaf.tar.gz