path: root/prg2lout.c

 

/*****************************************************************************/
/*                                                                           */
/*  PRG2LOUT: A PROGRAM TO CONVERT PROGRAM SOURCES INTO LOUT                 */
/*  COPYRIGHT (C) 2000, 2008 Jeffrey H. Kingston                             */
/*                                                                           */
/*  Part of Lout Version 3.39                                                */
/*                                                                           */
/*  Jeffrey H. Kingston (jeff@cs.su.oz.au)                                   */
/*  Basser Department of Computer Science                                    */
/*  The University of Sydney 2006                                            */
/*  AUSTRALIA                                                                */
/*                                                                           */
/*  C and C++, Eiffel, Blue, Java, and Nonpareil by Jeff Kingston            */
/*  Perl and Pod by Jeff Kingston and Mark Summerfield                       */
/*  Python by Mark Summerfield (Python 2.5 update Nov 2006)                  */
/*  Ruby by Michael Piotrowski                                               */
/*  Haskell by Thorsten Seitz (Nov 2002), mods by Gabor Greif                */
/*  RSL by Darren Bane (February 2003)                                       */
/*  JavaScript by Mark Summerfield (Nov 2010)                                */
/*  Tcl by Mark Summerfield (Nov 2010)					     */
/*                                                                           */
/*  This program is free software; you can redistribute it and/or modify     */
/*  it under the terms of the GNU General Public License as published by     */
/*  the Free Software Foundation; either Version 3, or (at your option)      */
/*  any later version.                                                       */
/*                                                                           */
/*  This program is distributed in the hope that it will be useful,          */
/*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/*  GNU General Public License for more details.                             */
/*                                                                           */
/*  You should have received a copy of the GNU General Public License        */
/*  along with this program; if not, write to the Free Software              */
/*  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.                */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/*  GENERAL INTRODUCTION TO PRG2LOUT                                         */
/*                                                                           */
/*  The best way to see what the aim of prg2lout as currently conceived is,  */
/*  is to look in file cprint at the setup file options.  You will see that  */
/*  the aim is to provide three basic styles: fixed (essentially mono font), */
/*  varying (essentially varying-width font with various faces for different */
/*  elements at the user's choice), and symbol (similar to varying).         */
/*                                                                           */
/*  The elements currently aimed for are strings, identifiers, comments,     */
/*  keywords, numbers, and operators, and the end user is able to choose,    */
/*  for each of these kinds of elements, which font to set them in.          */
/*                                                                           */
/*  This is achieved by a simple division of labour:  prg2lout does the      */
/*  classifying of the input into a sequence of these elements, and the Lout */
/*  end (cprint and cprintf, or their clones for other languages) does the   */
/*  formatting.  For example, the C text                                     */
/*                                                                           */
/*      inc = inc / 2                                                        */
/*                                                                           */
/*  would be classified by prg2lout into identifier, operator, identifier,   */
/*  operator, number; and consequently prg2lout would emit                   */
/*                                                                           */
/*      @PI{inc} @PO{=} @PI{inc} @PO{"/"} @PN{2}                             */
/*                                                                           */
/*  which is readable by Lout, thanks to having quotes around everything     */
/*  potentially dangerous, and clearly tells Lout, by means of the commands  */
/*  @PC, @PI, etc., how each part of the input has been classified.          */
/*                                                                           */
/*  The actual classification is carried out by prg2lout as follows.  Each   */
/*  programming language is described to prg2lout as a collection of tokens; */
/*  you say what the token begins with, what's a legal character inside the  */
/*  token, and how it ends.  You also say which command (@PC, @PI etc.) to   */
/*  emit when a token of that kind is found.  Prg2lout does the rest.        */
/*                                                                           */
/*  Prg2lout knows all about tricky problems such as multi-line tokens (it   */
/*  breaks them up into single-line pieces) and backslash in Lout strings    */
/*  (it replaces any \ within an output string by \\, " by \", etc.).  It    */
/*  also handles tab characters and formfeed characters properly, and it     */
/*  produces intelligible error messages when unexpected things happen,      */
/*  such as input terminating in the middle of a string.  This attention to  */
/*  detail is a strong reason for using prg2lout rather than something more  */
/*  ad-hoc, such as @Verbatim or a quick script.                             */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/*  HOW TO ADD ANOTHER LANGUAGE TO PRG2LOUT                                  */
/*                                                                           */
/*  Step 1.  Construct clones of (say) eiffel and eiffelf (these are in      */
/*  $LOUTLIB/include) with every occurrence of eiffel or Eiffel in them      */
/*  changed to your language as appropriate.  Install your files in the      */
/*  Lout system include directory alongside eiffel and eiffelf.              */
/*                                                                           */
/*  It is good to clone the files exactly because that way all program       */
/*  formatting works the same way, and one chapter of the User's Guide       */
/*  covers the lot.  However if your language has some unique element, not   */
/*  readily classifiable as a string, identifier, comment, keyword,          */
/*  number, or operator, it is possible to emit a different command of       */
/*  your choice for the new element; but then your clones of eiffel and      */
/*  eiffelf have to be extended to handle that command.                      */
/*                                                                           */
/*  Step 2.  Have a browse through the token declarations below, and work    */
/*  out which of them you need for your language.  If you need a token that  */
/*  isn't there already, you'll have to define it; there are many examples   */
/*  and documentation there to help you.  The tokens for Perl are rather     */
/*  complicated and don't make a good model for most languages, so look      */
/*  more at the C and Eiffel ones.                                           */
/*                                                                           */
/*  Step 3.  Browse through the language declarations, and declare your      */
/*  language following those examples:  first you give a set of one or more  */
/*  alternative names for your language, then some other things, including   */
/*  the list of tokens of the language, and its keywords.                    */
/*                                                                           */
/*  Step 4.  Add your language variable to the list in the initializer of    */
/*  variable languages, as you can see the others have been done.  Try to    */
/*  keep the list alphabetical to deflect any charges of language bias.      */
/*                                                                           */
/*  Step 5.  If any lists of initializers now contain more than MAX_STARTS,  */
/*  MAX_STARTS2, MAX_NAMES, MAX_TOKENS, or MAX_KEYWORDS elements, increase   */
/*  these constants until they don't.  The gcc compiler will warn you if     */
/*  you forget to do this.                                                   */
/*                                                                           */
/*  Step 6.  Recompile and reinstall prg2lout, test "prg2lout -u" then       */
/*  "prg2lout -l <mylanguage> <myfile> | lout -s > out.ps".                  */
/*                                                                           */
/*  Step 7.  Send your tested and tidied files to me for incorporation       */
/*  in the next Lout release.  If you do this, please try hard to ensure     */
/*  that your new code conforms to the formal definition of your language.   */
/*  Feel free to email me for advice as you go along.                        */
/*                                                                           */
/*  Jeff Kingston                                                            */
/*  jeff@it.usyd.edu.au                                                      */
/*                                                                           */
/*****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define MAX_CHAR		256
#define is_whitespace(ch) ((ch)==' ' || (ch)=='\t' || (ch)=='\n' || (ch)=='\f')
#define U (unsigned char *)

/*****************************************************************************/
/*                                                                           */
/*  MAX_STARTS        1 + Maximum length of "starts" array in any token      */
/*  MAX_STARTS2       1 + Maximum length of "starts2" array in any token     */
/*  MAX_NAMES         1 + Maximum number of names for any language           */
/*  MAX_TOKENS        1 + Maximum number of tokens in any language           */
/*  MAX_KEYWORDS      1 + Maximum number of keywords in any language         */
/*                                                                           */
/*****************************************************************************/

#define MAX_STARTS	120
#define	MAX_STARTS2	 30
#define MAX_NAMES	 10
#define MAX_TOKENS	150
#define MAX_KEYWORDS	350

/*****************************************************************************/
/*                                                                           */
/*  Bracketing pairs                                                         */
/*                                                                           */
/*  This declaration explains to prg2lout that { matches }, etc.             */
/*                                                                           */
/*****************************************************************************/

typedef struct {
  unsigned char *first;
  unsigned char *second;
} CHAR_PAIR;

CHAR_PAIR pairs[] = {
  { (unsigned char *) "(", (unsigned char *) ")" },
  { (unsigned char *) "{", (unsigned char *) "}" },
  { (unsigned char *) "[", (unsigned char *) "]" },
  { (unsigned char *) "<", (unsigned char *) ">" },
  { NULL, NULL }
};


/*****************************************************************************/
/*                                                                           */
/*  Character sets                                                           */
/*                                                                           */
/*  These are prg2lout's definitions of various commonly needed sets of      */
/*  characters.  May need enhancement for Latin1 etc.                        */
/*                                                                           */
/*****************************************************************************/

#define AllCharacters	NULL	/* code will recognize NULL and do this */

/* It is not possible to further categorize the characters in the G1
 * area of ISO 8859 code sets (code points 0xA0 through 0xFF) because
 * there are no fixed ranges (e.g., 0xA1 is a punctuation mark in
 * Latin 1, but a letter in Latin 2).  However, this is not really a
 * problem since all characters in this area can be considered
 * printable. */

#define G1_Characters	"\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"


unsigned char AllPrintable[] = 
  " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" G1_Characters ;

unsigned char AllPrintablePlusNL[] =
  " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n" G1_Characters ;

unsigned char AllPrintablePlusTab[] =
  " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\t" G1_Characters ;

unsigned char AllPrintableTabNL[] =
  " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n\t" G1_Characters ;

unsigned char AllPrintableTabNLFF[] =
  " !\"#$%&'()*+,-./0123456789:;<=>?@[\\]^_`\\{|}~\
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\n\t\f" G1_Characters ;

unsigned char Letters[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" ;

unsigned char lowercaseLetters[] = "abcdefghijklmnopqrstuvwxyz" ;

unsigned char uppercaseLetters[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" ;

unsigned char Letter_Digit[] =
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789" ;

unsigned char Letter_Digit_Quotes[] =
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789`'" ;

unsigned char HaskellOpCharacters[] = "!#$%&*+./<=>?^|:-~";

unsigned char NonpareilOperatorPunct[] = "@$%^&*=+|<>/?`";

unsigned char Ruby_Methodname[] =
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_0123456789?!=" ;


#define UppercaseSepLetters 						\
U "A", U "B", U "C", U "D", U "E", U "F", U "G", U "H", U "I", U "J",	\
U "K", U "L", U "M", U "N", U "O", U "P", U "Q", U "R", U "S", U "T",	\
U "U", U "V", U "W", U "X", U "Y", U "Z"

#define LowercaseSepLetters 						\
U "a", U "b", U "c", U "d", U "e", U "f", U "g", U "h", U "i", U "j",	\
U "k", U "l", U "m", U "n", U "o", U "p", U "q", U "r", U "s", U "t",	\
U "u", U "v", U "w", U "x", U "y", U "z"

#define SepLetters UppercaseSepLetters, LowercaseSepLetters

#define SepDigits							\
U "0", U "1", U "2", U "3", U "4", U "5", U "6", U "7", U "8", U "9"

#define HexDigits							\
U "A", U "a", U "B", U "b", U "C", U "c", U "D", U "d", U "E", U "e",	\
U "F", U "f"

#define SepPunct							\
U "/", U "(", U "[", U "{",  U "<", U "!", U "%", U "#", U "|", U ",",	\
U ":", U ";", U "$", U "\"", U "^", U "&", U "*", U "-", U "=", U "+",	\
U "~", U "'", U "@", U "?",  U ".", U "`"

#define BktPunct							\
U "",  U "(", U "[", U "{",  U "<", U "",  U "",  U "",  U "",  U "",	\
U "",  U "",  U "",  U "",   U "",  U "",  U "",  U "",  U "",  U "",	\
U "",  U "",  U "",  U "",   U "",  U ""

#define EndPunct							\
U "/", U ")", U "]", U "}",  U ">", U "!", U "%", U "#", U "|", U ",",	\
U ":", U ";", U "$", U "\"", U "^", U "&", U "*", U "-", U "=", U "+",	\
U "~", U "'", U "@", U "?",  U ".", U "`"

#define SepNonpareilOperatorPunct					\
U "@", U "$", U "%",  U "^", U "&", U "*", U "=", U "+", U "|",		\
U "<", U ">",  U "/", U "?", U "`"

#define HaskellOpChars 							\
U "!", U "#", U "$", U "%", U "&", U "*", U "+", U ".", U "/",		\
U "<", U "=", U ">", U "?", U "^", U "|", U ":", U "-", U "~"

#define HaskellParenOpChars						\
U "(!", U "(#", U "($", U "(%", U "(&", U "(*", U "(+", U "(.", U "(/",	\
U "(<", U "(=", U "(>", U "(?", U "(^", U "(|", U "(:", U "(-", U "(~"

#define	PercentLetters							\
U "%A", U "%B", U "%C", U "%D", U "%E", U "%F", U "%G", U "%H", U "%I",	\
U "%J", U "%K", U "%L", U "%M", U "%N", U "%O", U "%P", U "%Q", U "%R",	\
U "%S", U "%T", U "%U", U "%V", U "%W", U "%X", U "%Y", U "%Z",		\
U "%a", U "%b", U "%c", U "%d", U "%e", U "%f", U "%g", U "%h", U "%i",	\
U "%j", U "%k", U "%l", U "%m", U "%n", U "%o", U "%p", U "%q", U "%r",	\
U "%s", U "%t", U "%u", U "%v", U "%w", U "%x", U "%y", U "%z", U "%_"


/*****************************************************************************/
/*                                                                           */
/*  TOKEN - put your token declarations in this section                      */
/*                                                                           */
/*  The fields of token_rec have the following meanings:                     */
/*                                                                           */
/*  name                                                                     */
/*      The name of this token, e.g. "string" or "identifier".  This field   */
/*      is used only by error messages generated by prg2lout; for example,   */
/*      prg2lout might print the message "input ended within string".        */
/*                                                                           */
/*  print_style                                                              */
/*                                                                           */
/*          print_style               What gets printed                      */
/*          -------------------------------------------------------          */
/*          PRINT_WHOLE_QUOTED        command{"token"}                       */
/*          PRINT_NODELIMS_QUOTED     command{"token-minus-delims"}          */
/*          PRINT_WHOLE_UNQUOTED      command{token}                         */
/*          PRINT_NODELIMS_UNQUOTED   command{token-minus-delims}            */
/*          PRINT_NODELIMS_INNER      command{inner}                         */
/*          PRINT_COMMAND_ONLY        command                                */
/*                                                                           */
/*      If command (see next) is empty then the braces {} are not printed.   */
/*                                                                           */
/*      PRINT_WHOLE_QUOTED.  This command is the most frequently used one;   */
/*      it prints the token, enclosed in braces and quotes, preceded by the  */
/*      command.  The quotes ensure that the result is legal Lout; any " or  */
/*      \ in the token is printed with a preceding \ as required in Lout.    */
/*      The usual arrangement for handling white space is that none of the   */
/*      tokens contain it; when it is encountered prg2lout generates the     */
/*      appropriate Lout without being told: a space for a space, a newline  */
/*      for a newline (possibly triggering a line number on the next line),  */
/*      @NP for a formfeed, and something clever for tab which does the      */
/*      required thing.  However, you can define a token that contains       */
/*      white space if you wish, and then the effect will be:                */
/*                                                                           */
/*         space and tab    The quotation marks will be temporarily          */
/*                          closed off, the white space handled as just      */
/*                          described, then the quotes opened again          */
/*                                                                           */
/*         newline and ff   Both the quotation marks and the command         */
/*                          will be closed off, the white space handled      */
/*                          as just described, and then a new command        */
/*                          started.  In effect, the token is broken into    */
/*                          a sequence of tokens at these characters.        */
/*                                                                           */
/*      PRINT_NODELIMS_QUOTED.  This is like PRINT_WHOLE_QUOTED except that  */
/*      the opening and closing delimiters of the token are omitted from     */
/*      the print.  This is useful occasionally when these delimiters are    */
/*      formatting markers, not intended to be printed.                      */
/*                                                                           */
/*      PRINT_WHOLE_UNQUOTED.  This style prints the command and braces      */
/*      as usual, but omits the quotes and prints the token absolutely       */
/*      verbatim.  In general this is not going to produce legal Lout,       */
/*      but it is useful in two cases:  when the token is a Lout escape,     */
/*      so that it is the user's responsibility to ensure that its content   */
/*      is legal Lout; and when the command is another filter command, so    */
/*      that the token content will not go directly into Lout anyway, it     */
/*      will go through the other filter first.  Since the result has to     */
/*      be verbatim, there is no special treatment of white space characters */
/*      and no insertion of line numbers.  However, if braces are printed    */
/*      they really ought to match, so prg2lout checks this and will         */
/*      complain and insert braces into the verbatim part if necessary.      */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/*  Meaning of TOKEN fields (ctd.)                                           */
/*                                                                           */
/*      PRINT_NODELIMS_UNQUOTED.  This is like PRINT_WHOLE_UNQUOTED except   */
/*      that the opening and closing delimiters of the token are omitted.    */
/*                                                                           */
/*      PRINT_NODELIMS_INNER.  Like PRINT_NODELIMS_UNQUOTED except that the  */
/*      inner part (i.e. not delimiters) is formatted in the same language.  */
/*                                                                           */
/*      PRINT_COMMAND_ONLY.  This ignores the token and prints just the      */
/*      command, presumably because the command says it all for that token.  */
/*      When using PRINT_COMMAND_ONLY you will probably need to enclose the  */
/*      command with braces: since there are no following braces in this     */
/*      print style, your command will run into the next one otherwise.      */
/*                                                                           */
/*  command                                                                  */
/*      The Lout command to print.  This command could be any legal Lout;    */
/*      programming language setup files offer the following Lout symbols    */
/*      that make the most common commands:                                  */
/*                                                                           */
/*          @PI  for formatting identifiers                                  */
/*          @PK  for formatting keywords                                     */
/*          @PO  for formatting operators                                    */
/*          @PN  for formatting numbers                                      */
/*          @PS  for formatting strings                                      */
/*          @PC  for formatting comments                                     */
/*          @PA  for printing an asterisk (lower on the line than usual)     */
/*          @PM  for printing a minus sign (longer than a hyphen)            */
/*          @PD  for printing a dot (.), only larger than usual              */
/*                                                                           */
/*      The last three require PRINT_COMMAND_ONLY (they take no parameter).  */
/*      If command is NULL or "", then no command will be printed and        */
/*      furthermore the token will not be enclosed in the usual braces.      */
/*                                                                           */
/*  alternate_command                                                        */
/*      Every language has a list of keywords.  Just before printing each    */
/*      token, it is compared against the keywords.  If it is one of them,   */
/*      then alternate_command is used instead of command.  For example,     */
/*      identifiers usually have command @PI and alternate_command @PK.      */
/*                                                                           */
/*  following_command                                                        */
/*      Print this Lout command (or commands) after the token.  If it is a   */
/*      "broken" multi-line token, print this command after each fragment    */
/*                                                                           */
/*  start_line_only                                                          */
/*      A Boolean field.  If true, this token is to be recognized only       */
/*      if it occurs at the very start of a line.                            */
/*                                                                           */
/*  starts[]                                                                 */
/*      This field holds an array of strings.  If prg2lout discovers any     */
/*      one of these strings while it is not reading some other token,       */
/*      then it deems that this token has begun.  The recognized string      */
/*      is the token's "starting delimiter".                                 */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/*  Meaning of TOKEN fields (ctd.)                                           */
/*                                                                           */
/*  starts2[], brackets2[], ends2[]                                          */
/*      These fields each hold an array of strings, and the three arrays     */
/*      must have equal length.  If starts2[] has length zero, these fields  */
/*      do not apply.  Otherwise, they modify the meaning of starts[],       */
/*      bracket_delimiter, and end_delimiter below.  Their effect is best    */
/*      seen by looking at some examples from Perl, their main user:         */
/*                                                                           */
/*         q/hello/  qq/hello/  qq?hello?  qq{hel{}lo}                       */
/*                                                                           */
/*      These strings may begin with q, qq, qx, and several other things;    */
/*      this is then followed by a single character which determines the     */
/*      string terminator; e.g. / means "terminate with /", { means          */
/*      "terminate with }", etc.  In some cases the start and end delims     */
/*      come in matching pairs, and then there may be nested matching        */
/*      pairs.  This is implemented as follows:                              */
/*                                                                           */
/*         starts:    { "q", "qq" }                                          */
/*         starts2:   { "/", "?", "{" }                                      */
/*         brackets2: { "",  "",  "{" }                                      */
/*         ends2:     { "/", "?", "}" }                                      */
/*                                                                           */
/*      Briefly, every token with non-null starts2 is expanded into a set    */
/*      of tokens, one for each element i of starts2, whose starting delims  */
/*      are starts with starts2[i] added, bracketing delim brackets2[i],     */
/*      and end_delim ends2[i].  PerlQTypeToken is a larger example of this. */
/*                                                                           */
/*  legal                                                                    */
/*      This string defines the set of legal characters inside this token.   */
/*      For example, numbers might have "0123456789." for this field, since  */
/*      these are the characters that are legal within numbers, usually.     */
/*                                                                           */
/*  escape                                                                   */
/*      This string defines a single character which is the escape           */
/*      character for this token.  That is, if we are reading this token     */
/*      and come upon this character, the character following it is          */
/*      treated differently.  An empty string "" means no escape character.  */
/*                                                                           */
/*  escape_legal                                                             */
/*      This string defines the set of characters which are legal after      */
/*      the escape character just mentioned.  If any one of these appears    */
/*      immediately after the escape character, it is deemed to be part      */
/*      of the token even if without the preceding escape it would not be.   */
/*                                                                           */
/*  inner_escape                                                             */
/*  end_inner_escape                                                         */
/*      The inner_escape string should be either empty (in which case it     */
/*      does not apply), or else it should contain a single character, the   */
/*      "inner escape" character.  An inner escape is a temporary suspension */
/*      of a token, reverting to the original language.  It is used to set   */
/*      program text within comments.  For example, in Eiffel and Blue,      */
/*      inner_escape is "`" and end_inner_escape is "'" and so we can write  */
/*                                                                           */
/*          -- increment `balance' by `amount'                               */
/*                                                                           */
/*      to treat balance and amount as identifiers within a comment token.   */
/*      The inner escape is not limited to one token, it may have any        */
/*      number of tokens, and they may have inner escapes too; prg2lout      */
/*      imposes no limit on the depth of nesting of inner escapes.           */
/*                                                                           */
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/*  Meaning of TOKEN fields (ctd.)                                           */
/*                                                                           */
/*  bracket_delimiter                                                        */
/*      If this string is encountered within a token (not escaped), it       */
/*      brackets with the next end_delimiter, meaning that the next end      */
/*      delimiter will not end the token.                                    */
/*                                                                           */
/*  end_delimiter                                                            */
/*      This string shows how the token ends; for example, a string would    */
/*      have end_delimiter ".  If empty, it means that the token ends        */
/*      just before the first character encountered that is not legal (see   */
/*      "legal" above).  For example, identifiers and numbers would have     */
/*      empty end_delimiter.  If ends2[] is not empty then end_delimiter     */
/*      is ignored, since ends2[] explains how the token ends.               */
/*                                                                           */
/*  end_start_line_only                                                      */
/*      A bool field.  If true, the end delimiter is to be recognized        */
/*      only if it occurs at the very start of a line.                       */
/*                                                                           */
/*  want_two_ends                                                            */
/*      A Boolean feature used only by Perl; true means that end_delimiter   */
/*      (or ends2[]) has to be encountered twice before the token ends,      */
/*      rather than the usual once.  Used by PerSTypeToken to recognise      */
/*                                                                           */
/*           s/abc/ABC/                                                      */
/*                                                                           */
/*      etc. as single tokens.  If there is a bracket delimiter (see above), */
/*      this will look for a new matching delimiter pair, as in s{}<>.       */
/*                                                                           */
/*****************************************************************************/
#define	PRINT_WHOLE_QUOTED	1
#define	PRINT_NODELIMS_QUOTED	2
#define	PRINT_WHOLE_UNQUOTED	3
#define	PRINT_NODELIMS_UNQUOTED	4
#define	PRINT_NODELIMS_INNER	5
#define	PRINT_COMMAND_ONLY	6

typedef struct token_rec {
  unsigned char *name;
  int print_style;
  unsigned char *command, *alternate_command, *following_command;
  bool start_line_only;
  unsigned char *starts[MAX_STARTS];
  unsigned char *starts2[MAX_STARTS2];
  unsigned char *brackets2[MAX_STARTS2];
  unsigned char *ends2[MAX_STARTS2];
  unsigned char *legal;
  unsigned char *escape;
  unsigned char *escape_legal;
  unsigned char *inner_escape;
  unsigned char *end_inner_escape;
  unsigned char *bracket_delimiter;
  unsigned char *end_delimiter;
  bool end_start_line_only;
  bool want_two_ends;

  /* The following options are initialized by the program, so don't you      */
  unsigned char chtype[MAX_CHAR];		/* char types within token   */
  unsigned char escape_chtype[MAX_CHAR];	/* char types after escape   */
} TOKEN;


/*****************************************************************************/
/*                                                                           */
/*  Tokens defining strings and literal characters in non-Perl languages.    */
/*  NB "U" is a cast to (unsigned char *)                                    */
/*                                                                           */
/*****************************************************************************/

TOKEN CStringToken = {
  U "string",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "\"" },		/* strings begin with a " character                 */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintable,		/* inside, any printable is OK		            */
  U "\\",		/* within strings, \\ is the escape character       */
  AllPrintablePlusNL,	/* after escape char, any printable char or nl OK   */
  U "",			/* strings do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "\"",		/* strings end with a " character                   */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN CCharacterToken = {
  U "character",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting characters           */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "'" },		/* characters begin with a ' character              */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintable,		/* inside, any printable character is OK            */
  U "\\",		/* within characters, \\ is the escape character    */
  AllPrintable,		/* after escape char, any printable char is OK      */
  U "",			/* characters do not permit "inner escapes"         */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "'",		/* characters end with a ' character                */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


TOKEN EiffelStringToken = {
  U "string",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "\"" },		/* strings begin with a " character                 */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintable,		/* inside, any printable except " is OK             */
  U "%",		/* within strings, % is the escape character        */
  AllPrintable,		/* after escape char, any printable char is OK      */
  U "",			/* strings do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "\"",		/* strings end with a " character                   */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN EiffelCharacterToken = {
  U "character",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting characters           */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "'" },		/* characters begin with a ' character              */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintable,		/* inside, any printable except ' is OK             */
  U "%",		/* within characters, % is the escape character     */
  AllPrintable,		/* after escape char, any printable char is OK      */
  U "",			/* characters do not permit "inner escapes"         */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "'",		/* characters end with a ' character                */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


TOKEN PythonDblStringToken = {
  U "string",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "\"" },		/* strings begin with a " character                 */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintable,		/* inside, any printable is OK		            */
  U "\\",		/* within strings, \\ is the escape character       */
  AllPrintablePlusNL,	/* after escape char, any printable char or nl OK   */
  U "",			/* strings do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "\"",		/* strings end with a " character                   */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PythonSnglStringToken = {
  U "string",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "'" },		/* strings begin with a ' character                 */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintable,		/* inside, any printable is OK		            */
  U "\\",		/* within strings, \\ is the escape character       */
  AllPrintablePlusNL,	/* after escape char, any printable char or nl OK   */
  U "",			/* strings do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "'",		/* strings end with a ' character                   */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PythonTriSnglStringToken = {
  U "string",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "'''" },		/* strings begin with '''                 */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintableTabNL,	/* inside, any printable is OK		            */
  U "\\",		/* within strings, \\ is the escape character       */
  AllPrintableTabNL,	/* after escape char, any printable char or nl OK   */
  U "",			/* strings do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "'''",		/* strings end with '''                   */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PythonTriDblStringToken = {
  U "string",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "\"\"\"" },	/* strings begin with """                 */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintableTabNL,	/* inside, any printable is OK		            */
  U "\\",		/* within strings, \\ is the escape character       */
  AllPrintableTabNL,	/* after escape char, any printable char or nl OK   */
  U "",			/* strings do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "\"\"\"",		/* strings end with """                    */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN HaskellStringToken = {
  U "string",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "\"" },		/* strings begin with a " character                 */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintable,		/* inside, any printable except " is OK             */
  U "\\",		/* within strings, \ is the escape character        */
  AllPrintable,		/* after escape char, any printable char is OK      */ 
  U "",			/* strings do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "\"",		/* strings end with a " character                   */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN HaskellCharacterToken = {
  U "character",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting characters           */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "'" },		/* characters begin with a ' character              */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintable,		/* inside, any printable except ' is OK             */
  U "\\",		/* within characters, \ is the escape character     */
  AllPrintable,		/* after escape char, any printable char is OK      */
  U "",			/* characters do not permit "inner escapes"         */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "'",		/* characters end with a ' character                */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Identifiers, in the form common to most programming languages.           */
/*                                                                           */
/*****************************************************************************/

TOKEN IdentifierToken = {
  U "identifier",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PI",		/* Lout command for formatting identifiers          */
  U "@PK",		/* Alternate command (for keywords)                 */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { SepLetters, U "_" }, /* identifiers begin with any letter or _          */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  Letter_Digit,		/* inside, letters, underscores, digits are OK      */
  U "",			/* no escape character within identifiers           */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* identifiers do not permit "inner escapes"        */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* identifiers do not end with a delimiter          */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN HaskellIdentifierToken = {
  U "identifier",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PI",		/* Lout command for formatting identifiers          */
  U "@PK",		/* Alternate command (for keywords)                 */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { SepLetters, U "_", U "`" },	/* identifiers begin with any letter or _   */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  Letter_Digit_Quotes,	/* inside, letters, underscores, digits are OK      */
  U "",			/* no escape character within identifiers           */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* identifiers do not permit "inner escapes"        */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* identifiers do not end with a delimiter          */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Numbers, in the form common to most programming languages.               */
/*                                                                           */
/*****************************************************************************/

TOKEN NumberToken = {
  U "number",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PN",		/* Lout command for formatting numbers              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { SepDigits },	/* numbers must begin with a digit                  */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  U "0123456789.eE",	/* inside, digits, decimal point, exponent          */
  U "",			/* no escape character within numbers               */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* numbers do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* numbers do not end with a delimiter              */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Operators, when user-defined from a set of punctuation characters        */
/*                                                                           */
/*****************************************************************************/

#define OperatorToken(start, legal) /* define operator token */		\
{									\
  U "operator",		/* name used for debugging only       */	\
  PRINT_WHOLE_QUOTED,	/* print this token as usual          */	\
  U "@PO",		/* Lout command for formatting this   */	\
  U "",			/* no alternate command               */	\
  U "",			/* no following command               */	\
  false,		/* token not just start of line       */	\
  { start },		/* token begins with any of these     */	\
  { NULL },		/* no start2 needed		      */	\
  { NULL },		/* so no brackets2 either	      */	\
  { NULL },		/* so no end2 either		      */	\
  U legal,		/* inside, same as start              */	\
  U "", U "",		/* no escape character                */	\
  U "", U "",		/* no inner escape; no end inner esc  */	\
  U "",			/* no bracketing delimiter	      */	\
  U "",			/* no ending delimiter                */	\
  false,		/* end not have to be at line start   */	\
  false,		/* don't end delimiter twice to stop  */	\
}

TOKEN NonpareilOperatorToken =
  OperatorToken(SepNonpareilOperatorPunct, NonpareilOperatorPunct);

TOKEN HaskellOperatorToken =
  OperatorToken(HaskellOpChars, HaskellOpCharacters);


/*****************************************************************************/
/*                                                                           */
/*  Tokens defining comments in various languages.                           */
/*                                                                           */
/*****************************************************************************/

TOKEN CCommentToken = {
  U "comment",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PC",		/* Lout command for formatting comments             */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "/*" },		/* comments begin with this character pair          */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintableTabNLFF,	/* inside, any printable char, tab, nl, ff is OK    */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* C comments do not permit "inner escapes"         */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "*/",		/* comments end with this character pair            */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN CPPCommentToken = {
  U "comment",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PC",		/* Lout command for formatting comments             */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "//" },		/* comments begin with this character pair          */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK (not NL)        */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* C comments do not permit "inner escapes"         */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no end delimiter (end of line will end it)       */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


TOKEN EiffelCommentToken = {
  U "comment",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PC",		/* Lout command for formatting comments             */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "--" },		/* comments begin with this character pair          */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "`",		/* start of "inner escape" in Eiffel comment        */
  U "'",		/* end of "inner escape" in Eiffel comment          */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no ending delimiter; end of line will end it     */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN BlueCommentToken = {
  U "comment",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PC",		/* Lout command for formatting comments             */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "==", U "--" },	/* comments begin with this character pair          */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "`",		/* start of "inner escape" in Blue comment          */
  U "'",		/* end of "inner escape" in Blue comment            */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no ending delimiter; end of line will end it     */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN NonpareilCommentToken = {
  U "comment",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PC",		/* Lout command for formatting comments             */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "#" },		/* comments begin with this character pair          */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK (not NL)        */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "`",		/* start of "inner escape" in Nonpareil comment     */
  U "'",		/* end of "inner escape" in Nonpareil comment       */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no end delimiter (end of line will end it)       */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PythonCommentToken = {
  U "comment",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PC",		/* Lout command for formatting comments             */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "#" },		/* comments begin with this character pair          */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK (not NL)        */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* Python comments do not permit "inner escapes"    */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no end delimiter (end of line will end it)       */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN HaskellLineCommentToken = {
  U "line comment",	/* used by error messages involving this token      */
  PRINT_NODELIMS_QUOTED,/* print this token in quotes without delimiters    */
  U "@PCL",		/* Lout command for formatting comments             */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "--" },		/* comments begin with this character pair          */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "`",		/* start of "inner escape" in Haskell comment       */
  U "'",		/* end of "inner escape" in Haskell comment         */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no ending delimiter; end of line will end it     */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN HaskellCommentToken = {
  U "comment",		/* used by error messages involving this token      */
  PRINT_NODELIMS_QUOTED,/* print this token in quotes without delimiters    */
  U "@PC",		/* Lout command for formatting comments             */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "{-" },		/* comments begin with this character pair          */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintableTabNLFF,	/* inside, any printable char, tab, nl, ff is OK    */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* C comments do not permit "inner escapes"         */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "-}",		/* comments end with this character pair            */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Tokens defining escape comments in various languages.                    */
/*                                                                           */
/*  See discussion of "inner escapes" above for more information.            */
/*                                                                           */
/*****************************************************************************/

TOKEN CCommentEscapeToken = {
  U "Lout escape",	/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED,  /* print this token unformatted                 */
  U "",			/* no Lout command since we are printing raw        */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "/*@" },		/* escape comments begin with this delimiter        */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintableTabNLFF,	/* inside, any printable char, tab, nl, ff is OK    */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* no "inner escape" in escape comments             */
  U "",			/* so no end of "inner escape" either               */
  U "",			/* no bracketing delimiter			    */
  U "*/",		/* comments end with this character pair            */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN CPPCommentEscapeToken = {
  U "Lout escape",	/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED, /* print this token unformatted                  */
  U "",			/* no Lout command since we are printing raw        */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "//@" },		/* escape comments begin with this delimiter        */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* no "inner escape" in escape comments             */
  U "",			/* so no end of "inner escape" either               */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no end delimiter (end of line will end it)       */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


TOKEN EiffelCommentEscapeToken = {
  U "Lout escape",	/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED,  /* print this token unformatted                 */
  U "",			/* no Lout command since we are printing raw        */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "--@" },		/* escape comments begin with this delimiter        */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* no "inner escape" in escape comments             */
  U "",			/* so no end of "inner escape" either               */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no ending delimiter; end of line will end it     */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN BlueCommentEscapeToken = {
  U "Lout escape",	/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED,  /* print this token unformatted                 */
  U "",			/* no Lout command since we are printing raw        */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "==@", U "--@" },	/* escape comments begin with these delimiters      */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* no "inner escape" in escape comments             */
  U "",			/* so no end of "inner escape" either               */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no ending delimiter; end of line will end it     */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PythonCommentEscapeToken = {
  U "Lout escape",	/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED,  /* print this token unformatted                 */
  U "",			/* no Lout command since we are printing raw        */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "#@" },		/* escape comments begin with this delimiter        */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK (not NL)        */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* no "inner escape" in escape comments             */
  U "",			/* so no end of "inner escape" either               */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no ending delimiter; end of line will end it     */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN HaskellCommentEscapeToken = {
  U "Lout escape",
  PRINT_NODELIMS_UNQUOTED,
  U "",
  U "",
  U "",
  false,
  { U "{-@" },
  { NULL },
  { NULL },
  { NULL },
  AllPrintablePlusTab,
  U "",
  U "",
  U "",
  U "",
  U "",
  U "-}",
  false,
  false,
};

TOKEN HaskellLineCommentEscapeToken = {
  U "Lout escape",	/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED,  /* print this token unformatted                 */
  U "",			/* no Lout command since we are printing raw        */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "--@" },		/* escape comments begin with this delimiter        */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* no "inner escape" in escape comments             */
  U "",			/* so no end of "inner escape" either               */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no ending delimiter; end of line will end it     */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Tokens which are fixed strings, hence simpler than the above.            */
/*                                                                           */
/*****************************************************************************/

#define FixedToken(str, command) /* define fixed-string token */	\
{									\
  U str,		/* name used for debugging only       */	\
  PRINT_WHOLE_QUOTED,	/* print this token as usual          */	\
  U command,		/* Lout command for formatting this   */	\
  U "",			/* no alternate command               */	\
  U "",			/* no following command               */	\
  false,		/* token not just start of line       */	\
  { U str },		/* token begins (and ends!) with this */	\
  { NULL },		/* no start2 needed		      */	\
  { NULL },		/* so no brackets2 either	      */	\
  { NULL },		/* so no end2 either		      */	\
  U "",			/* nothing inside, since no inside    */	\
  U "", U "",		/* no escape character                */	\
  U "", U "",		/* no inner escape; no end inner esc  */	\
  U "",			/* no bracketing delimiter	      */	\
  U "",			/* no ending delimiter                */	\
  false,		/* end not have to be at line start   */	\
  false,		/* don't end delimiter twice to stop  */	\
}

TOKEN HashToken			= FixedToken("#",  "@PO");
TOKEN ExclamationToken		= FixedToken("!",  "@PO");
TOKEN PercentToken		= FixedToken("%",  "@PO");
TOKEN HatToken			= FixedToken("^",  "@PO");
TOKEN AmpersandToken		= FixedToken("&",  "@PO");
TOKEN SlashToken		= FixedToken("/",  "@PO");
TOKEN ArrowToken		= FixedToken("->", "@A sym {arrowright} @PO");
TOKEN BackSlashToken		= FixedToken("\\", "@PO");
TOKEN LeftParenToken		= FixedToken("(",  "@PO");
TOKEN RightParenToken		= FixedToken(")",  "@PO");
TOKEN PlusToken			= FixedToken("+",  "@A sym {plus} @PO");
TOKEN EqualToken		= FixedToken("=",  "@A sym {equal} @PO");
TOKEN LeftBraceToken		= FixedToken("{",  "@PO");
TOKEN RightBraceToken		= FixedToken("}",  "@PO");
TOKEN BarToken			= FixedToken("|",  "@PO");
TOKEN CircumToken		= FixedToken("~",  "@PO");
TOKEN LeftBracketToken		= FixedToken("[",  "@PO");
TOKEN LeftBracketBarToken	= FixedToken("[|", "@PO");
TOKEN RightBracketToken		= FixedToken("]",  "@PO");
TOKEN RightBracketBarToken	= FixedToken("|]", "@PO");
TOKEN SemicolonToken		= FixedToken(";",  "@PO");
TOKEN ColonToken		= FixedToken(":",  "@PO");
TOKEN LessToken			= FixedToken("<",  "@A sym {less} @PO");
TOKEN GreaterToken		= FixedToken(">",  "@A sym {greater} @PO");
TOKEN QuestionToken		= FixedToken("?",  "@PO");
TOKEN CommaToken		= FixedToken(",",  "@PO");
TOKEN DotToken			= FixedToken(".",  "@PO");
TOKEN DotDotToken		= FixedToken("..", "@PO");
TOKEN DotDotDotToken		= FixedToken("...","@PO");
TOKEN LessEqualToken		= FixedToken("<=", "@A sym {lessequal} @PO");
TOKEN GreaterEqualToken		= FixedToken(">=", "@A sym {greaterequal} @PO");
TOKEN CNotEqualToken		= FixedToken("!=", "@A sym {notequal} @PO");
TOKEN EiffelNotEqualToken	= FixedToken("/=", "@A sym {notequal} @PO");
TOKEN BlueNotEqualToken		= FixedToken("<>", "@A sym {notequal} @PO");
TOKEN AssignToken		= FixedToken(":=", "@PO");
TOKEN QuestionAssignToken	= FixedToken("?=", "@PO");
TOKEN DollarToken		= FixedToken("$",  "@PO");
TOKEN ImpliesToken		= FixedToken("=>","@A sym {arrowdblright} @PO");
TOKEN LeftArrowToken		= FixedToken("<-", "@A sym {arrowleft} @PO");
TOKEN HaskellLambdaToken	= FixedToken("\\", "@PLAMBDA");
TOKEN HaskellAtPatternToken	= FixedToken("@", "@PO");
TOKEN DoubleColonToken		= FixedToken("::",  "@PDOUBLECOLON");
TOKEN FunctionCompositionToken	= FixedToken(" . ",  "@PCIRC");
TOKEN HaskellEquivalenceToken	= FixedToken("==",  "@A sym {equivalence} @PO");
TOKEN HaskellConcatenationToken = FixedToken("++", "@PPLUSPLUS");
TOKEN EqvToken			= FixedToken("<=>","@A sym {arrowdblboth} @PO");
TOKEN HaskellOrToken		= FixedToken("||", "@PO");
TOKEN HaskellAndToken		= FixedToken("&&", "@PO");
/* TOKEN HaskellBacktickToken	= FixedToken("`", "@PO"); unused */
TOKEN PythonPowerToken          = FixedToken( "**",  "@PO" );
TOKEN PythonBitLeftShiftToken   = FixedToken( "<<",  "@PO" );
TOKEN PythonBitRightShiftToken  = FixedToken( ">>",  "@PO" );
TOKEN PythonBacktickToken       = FixedToken( "`",  "@PO" );
TOKEN PythonDecoratorToken      = FixedToken( "@",  "@PO" );


/*****************************************************************************/
/*                                                                           */
/*  Fixed-string tokens that are to be printed COMMAND_ONLY (no parameter).  */
/*                                                                           */
/*****************************************************************************/

#define NoParameterToken(str, command) /* fixed-string token */	\
{									\
  U str,		/* name used for debugging only       */	\
  PRINT_COMMAND_ONLY,	/* print only the command             */	\
  U command,		/* Lout command for formatting this   */	\
  U "",			/* no alternate command               */	\
  U "",			/* following command                  */	\
  false,		/* token not just start of line       */	\
  { U str },		/* token begins (and ends!) with this */	\
  { NULL },		/* no start2 needed		      */	\
  { NULL },		/* so no bracket2 either	      */	\
  { NULL },		/* so no end2 either		      */	\
  U "",			/* nothing inside, since no inside    */	\
  U "", U "",		/* no escape character                */	\
  U "", U "",		/* no inner escape; no end inner esc  */	\
  U "",			/* no bracketing delimiter            */	\
  U "",			/* no ending delimiter                */	\
  false,		/* end not have to be at line start   */	\
  false,		/* don't end delimiter twice to stop  */	\
}

TOKEN StarToken			= NoParameterToken("*",  "{@PA}");
TOKEN MinusToken		= NoParameterToken("-",  "{@PM}");
TOKEN EiffelDotToken		= NoParameterToken(".",  "{@PD}");
TOKEN NonpareilDotDotToken	= NoParameterToken("..",  "{@PDD}");
TOKEN NonpareilExclamationToken	= NoParameterToken("!",  "@PO{\"!\" &0.1f}");
TOKEN HaskellColonToken		= NoParameterToken(":", "{@PCOLON}");


/*****************************************************************************/
/*                                                                           */
/* Ruby specifics                                                            */
/*                                                                           */
/*****************************************************************************/

TOKEN RubyIdentifierToken = {
  U "identifier",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PI",		/* Lout command for formatting identifiers          */
  U "@PK",		/* Alternate command (for keywords)                 */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { SepLetters, U "_", U "$",
    U "@@", U "@" },	/* identifiers begin with these			    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  Ruby_Methodname,	/* inside, letters, underscores, digits, !, ?, =    */
  U "",			/* no escape character within identifiers           */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* identifiers do not permit "inner escapes"        */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* identifiers do not end with a delimiter          */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN RubyGenDelimStringToken = {
  U "generalized string", /* used by error messages involving this token    */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "%", U "%q", U "%Q", U "%w",
    U "%r", U "%x" },   /* generalized strings begin with these 	    */
  { SepPunct },		/* start2 can be any punctuation character	    */
  { BktPunct },		/* bracketing delimiters to match SepPunct	    */
  { EndPunct },		/* end2 must match start2			    */
  AllCharacters,	/* inside, any character at all is OK               */
  U "\\",		/* within strings, \\ is the escape character       */
  AllCharacters,	/* after escape char, any character at all is OK    */
  U "",			/* strings do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* will be using bracket2 for bracket delimiter     */
  U "",			/* will be using end2 for the end delimiter here    */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  RSL Sepcifics                                                            */
/*                                                                           */
/*****************************************************************************/

TOKEN RSLIdentifierToken = {
  U "identifier",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PI",		/* Lout command for formatting identifiers          */
  U "@PK",		/* Alternate command (for keywords)                 */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { SepLetters, U "_", U "`" }, /* identifiers begin with any letter or _   */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no brackets2 either			    */
  { NULL },		/* so no end2 either				    */
  Letter_Digit,		/* inside, letters, underscores, digits are OK      */
  U "",			/* no escape character within identifiers           */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* identifiers do not permit "inner escapes"        */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* identifiers do not end with a delimiter          */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN RSLProductToken    = FixedToken("><",  "@A sym{multiply} @PO" ) ;
TOKEN RSLPartialMapToken = FixedToken("-~m->",  "@PartialMap @FA @PO" ) ;
TOKEN RSLAndToken        = FixedToken("/\\",  "@A sym{logicaland} @PO" ) ;
TOKEN RSLAlwaysToken     = FixedToken("always",  "@Eq { square } @FA @PO" ) ;
TOKEN RSLIsInToken       = FixedToken("isin",  "@A sym{element @PO" ) ;
TOKEN RSLSubsetToken     = FixedToken("<<=",  "@A sym{reflexsubset} @PO" ) ;
TOKEN RSLUnionToken      = FixedToken("union", "@A sym{union} @PO" ) ;
TOKEN RSLListStartToken  = FixedToken("<.",  "@A sym{angleleft} @PO" ) ;
TOKEN RSLParToken        = FixedToken("@Eq { dbar } @FA @PO",  "@PO" ) ;
TOKEN RSLIntChoiceToken  = FixedToken("|^|",  "@IntChoice @FA @PO" ) ;
TOKEN RSLTurnstileToken  = FixedToken("|-",  "@Eq { vdash } @FA @PO" ) ;
TOKEN RSLListToken       = NoParameterToken( "-list",  "{*}" ) ;
TOKEN RSLPartialFnToken  = FixedToken("-~->",  "@PartialFn @FA @PO" ) ;
TOKEN RSLRelationToken   = FixedToken("<->",  "@A sym{arrowboth} @PO" ) ;
TOKEN RSLOrToken         = FixedToken("\\/",  "@A sym{logicalor} @PO" ) ;
TOKEN RSLNotIsInToken    = FixedToken("~isin",  "@A sym{notelement }@PO" ) ;
TOKEN RSLProperSuperToken= FixedToken(">>",  "@A sym{propersuperset} @PO" ) ;
TOKEN RSLInterToken      = FixedToken("inter",  "@A sym{intersection} @PO" ) ;
TOKEN RSLListEndToken    = FixedToken(".>",  "@A sym{angleright} @PO" ) ;
TOKEN RSLInterlockToken  = FixedToken("++",  "@Interlock @FA @PO" ) ;
TOKEN RSLLambdaToken     = FixedToken("-\\",  "@A sym{lambda} @PO" ) ;
TOKEN RSLImplRelToken	 = FixedToken("{=",  "@Eq { preceq } @FA @PO" ) ;
TOKEN RSLInfListToken    = FixedToken("-inflist",  "@InfList @FA @PO" ) ;
TOKEN RSLMapToken        = FixedToken("-m->",  "@Map @FA @PO" ) ;
TOKEN RSLSTToken         = FixedToken(":-",  "@A sym{dotmath} @PO" ) ;
TOKEN RSLNotEqualToken   = FixedToken("~=",  "@A sym{notequal} @PO" ) ;
TOKEN RSLPowerToken      = FixedToken("**",  "@A sym{arrowup} @PO" ) ;
TOKEN RSLProperSubsetToken = FixedToken( "<<",  "@A sym{propersubset} @PO" ) ;
TOKEN RSLSupersetToken   = FixedToken(">>=",  "@A sym{reflexsuperset} @PO" ) ;
TOKEN RSLOverrideToken   = FixedToken("!!",  "@Dagger @FA @PO" ) ;
TOKEN RSLMapletToken     = FixedToken("+>",  "@Eq { mapsto } @FA @PO" ) ;
TOKEN RSLExtChoiceToken  = FixedToken("|=|",  "@ExtChoice @FA @PO" ) ;
TOKEN RSLApplyToken      = FixedToken("#",  "@A sym{degree} @PO" ) ;
TOKEN RSLImplExprToken   = FixedToken("[=",  "@Eq { sqsubseteq } @FA @PO" ) ;
TOKEN RSLPrimeToken      = NoParameterToken( "'", "{'}" ) ;
TOKEN RSLExistsOneToken  = FixedToken("exists!",  "{@Sym existential}! @FA @PO" );


/*****************************************************************************/
/*                                                                           */
/*  Perl (quarantined from other languages since it's very different).       */
/*                                                                           */
/*  Perl code co-authored by Jeffrey H. Kingston and Mark Summerfield        */
/*  March 2000                                                               */
/*                                                                           */
/*  In the comments below, WCS refers to "Programming Perl", Second          */
/*  Edition (1996), by Wall, Christiansen, and Schwartz.  However Perl       */
/*  has changed since then and this code also reflects those changes         */
/*  based on the on-line documentation provided with the 5.6.0 release.      */
/*                                                                           */
/*****************************************************************************/

/*****************************************************************************/
/*                                                                           */
/*  Perl's strings and regular expressions                                   */
/*                                                                           */
/*  The table in WCS pp. 41 is a good summary of the possibilities:          */
/*                                                                           */
/*      ''    q//                                                            */
/*      ""    qq//                                                           */
/*      ``    qx//                                                           */
/*      ()    qw//                                                           */
/*      //    m//                                                            */
/*      s///  s///                                                           */
/*      y///  tr///                                                          */
/*                                                                           */
/*  To this must be added the following quotation, which begins just         */
/*  below the table:                                                         */
/*                                                                           */
/*      Any non-alphabetic, non-whitespace delimiter can be used in place    */
/*      of /.  If the opening delimiter is a parenthesis, bracket, brace,    */
/*      or angle bracket, the closing delimiter will be the matching         */
/*      construct.  (Embedded occurrences of the delimiters must match in    */
/*      pairs.) ... Finally, for two-string constructs like s/// and tr///,  */
/*      if the first pair of quotes is a bracketing pair, then the second    */
/*      part gets its own starting quote character, which needn't be the     */
/*      same as the first pair.  So you can write things like s{foo}(bar)    */
/*      or tr[a-z][A-Z].  Whitespace is allowed between the two inner quote  */
/*      characters, so you could even write that last one as                 */
/*                                                                           */
/*          tr [a-z]                                                         */
/*             [A-Z]                                                         */
/*                                                                           */
/*  Amazingly, the tokens below implement all of this perfectly except that  */
/*  when / appears without anything in front, it will be recognized as a     */
/*  regular expression provided that one of a long list of things precedes   */
/*  it, otherwise it will be a division symbol.  This is not perfect but     */
/*  seems to come extremely close in practice.                               */
/*                                                                           */
/*****************************************************************************/


TOKEN PerlSingleQuoteStringToken = {
  U "''-string",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "'" },		/* strings begin with a ' character                 */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  AllCharacters,	/* inside, any character at all is OK               */
  U "\\",		/* within strings, \\ is the escape character       */
  AllCharacters,	/* after escape, any character is OK (trust us)     */
  U "", U "",		/* no "inner escapes"; no end innner escape         */
  U "",			/* no bracketing delimiter			    */
  U "\'",		/* strings end with a ' character                   */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PerlDoubleQuoteStringToken = {
  U "\"\"-string",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "\"" },		/* strings begin with a " character                 */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  AllCharacters,	/* inside, any character at all is OK               */
  U "\\",		/* within strings, \\ is the escape character       */
  AllCharacters,	/* after escape char, any character at all is OK    */
  U "", U "",		/* no "inner escapes"; no end innner escape         */
  U "",			/* no bracketing delimiter			    */
  U "\"",		/* strings end with a " character                   */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PerlBackQuoteStringToken = {
  U "``-string",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "`" },		/* strings begin with a ` character                 */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  AllCharacters,	/* inside, any character at all is OK               */
  U "\\",		/* within strings, \\ is the escape character       */
  AllCharacters,	/* after escape char, any character at all is OK    */
  U "", U "",		/* no "inner escapes"; no end innner escape         */
  U "",			/* no bracketing delimiter			    */
  U "`",		/* strings end with a ` character                   */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


TOKEN PerlQTypeStringToken = {
  U "q-type string",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "q", U "qq", U "qx", U "qw", U "qr", U "m" },/* q-type string begins  */
  { SepPunct },		/* start2 can be any punctuation character	    */
  { BktPunct },		/* bracketing delimiters to match SepPunct	    */
  { EndPunct },		/* end2 must match start2			    */
  AllCharacters,	/* inside, any character at all is OK               */
  U "\\",		/* within strings, \\ is the escape character       */
  AllCharacters,	/* after escape char, any character at all is OK    */
  U "",			/* strings do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* will be using bracket2 for bracket delimiter     */
  U "",			/* will be using end2 for the end delimiter here    */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PerlSTypeStringToken = {
  U "s-type string",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PS",		/* Lout command for formatting strings              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "s", U "y", U "tr" }, /* s-type strings begin with these		    */
  { SepPunct },		/* start2 can be any punctuation character	    */
  { BktPunct },		/* bracketing delimiters to match SepPunct	    */
  { EndPunct },		/* end2 must match start2			    */
  AllCharacters,	/* inside, any character at all is OK               */
  U "\\",		/* within strings, \\ is the escape character       */
  AllCharacters,	/* after escape char, any character at all is OK    */
  U "",			/* strings do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* will be using bracket2 for bracket delimiter     */
  U "",			/* will be using end2 for the end delimiter here    */
  false,		/* end delimiter does not have to be at line start  */
  true,			/* need to see end delimiter twice to stop	    */
};


/*****************************************************************************/
/*                                                                           */
/*  Perl "bare" regular expressions                                          */
/*                                                                           */
/*  By a bare regular expression, we mean one that is not preceded by m.     */
/*  These are distinguished from division by being preceded by one of (, =,  */
/*  =~, !~, split, if, and, &&, not, ||, xor, not, !, unless, for, foreach,  */
/*  or while, with up to two white space characters intervening.  Also,      */
/*  a / at the start of a line is taken to begin a regular expression.       */
/*                                                                           */
/*****************************************************************************/

#define PerlREToken(start, com)						\
{									\
  U "regex",		/* used by error messages                 */	\
  PRINT_NODELIMS_QUOTED,/* no delims since we supply them         */	\
  U com,			/* the command                    */	\
  U "",			/* no alternate command                   */	\
  U "@PS{\"/\"}",		/* following command (final /)    */	\
  false,		/* token allowed not just start of line   */	\
  { U start },		/* preceding token in this case           */	\
  { U "/", U " /", U "\t/", U "  /", U " \t/", U "\t /", U "\t\t/" },	\
  { U "",  U "",   U "",    U "",    U "",     U "",     U ""      },	\
  { U "/", U  "/", U "/",   U "/",   U "/",    U "/",    U "/"     },	\
  AllCharacters,	/* any character OK inside                */	\
  U "\\",		/* \\ is the escape character             */	\
  AllCharacters,	/* after escape char, any is OK           */	\
  U "",			/* no inner escapes                       */	\
  U "",			/* no end innner escape either            */	\
  U "",			/* will be using bracket2 here            */	\
  U "",			/* will be using end2 here                */	\
  false,		/* no need to end at line start           */	\
  false,		/* don't want end delimiter twice         */	\
}


TOKEN PerlRegExpLPar    = PerlREToken("(",      "@PO{\"(\"}@PS{\"/\"}@PS");
TOKEN PerlRegExpEq      = PerlREToken("=",      "@PO{\"=\"} @PS{\"/\"}@PS");
TOKEN PerlRegExpMatch   = PerlREToken("=~",     "@PO{\"=~\"} @PS{\"/\"}@PS");
TOKEN PerlRegExpNoMatch = PerlREToken("!~",     "@PO{\"!~\"} @PS{\"/\"}@PS");
TOKEN PerlRegExpSplit   = PerlREToken("split",  "@PK{split} @PS{\"/\"}@PS");
TOKEN PerlRegExpIf      = PerlREToken("if",     "@PK{if} @PS{\"/\"}@PS");
TOKEN PerlRegExpAnd     = PerlREToken("and",    "@PK{and} @PS{\"/\"}@PS");
TOKEN PerlRegExpAnd2    = PerlREToken("&&",     "@PO{\"&&\"} @PS{\"/\"}@PS");
TOKEN PerlRegExpOr      = PerlREToken("or",     "@PK{or} @PS{\"/\"}@PS");
TOKEN PerlRegExpOr2     = PerlREToken("||",     "@PO{\"||\"} @PS{\"/\"}@PS");
TOKEN PerlRegExpXor     = PerlREToken("xor",    "@PK{xor} @PS{\"/\"}@PS");
TOKEN PerlRegExpNot     = PerlREToken("not",    "@PK{not} @PS{\"/\"}@PS");
TOKEN PerlRegExpNot2    = PerlREToken("!",      "@PO{\"!\"} @PS{\"/\"}@PS");
TOKEN PerlRegExpUnless  = PerlREToken("unless", "@PK{unless} @PS{\"/\"}@PS");
TOKEN PerlRegExpFor     = PerlREToken("for",    "@PK{for} @PS{\"/\"}@PS");
TOKEN PerlRegExpForEach = PerlREToken("foreach","@PK{foreach} @PS{\"/\"}@PS");
TOKEN PerlRegExpWhile   = PerlREToken("while",  "@PK{while} @PS{\"/\"}@PS");

TOKEN PerlRegExpStartLineToken =
{
  U "regex",		/* used by error messages                 */
  PRINT_WHOLE_QUOTED,	/* we can print the whole thing this time */
  U "@PS",		/* the command                            */
  U "",			/* no alternate command                   */
  U "",			/* no following command                   */
  true,			/* token allowed only at start of line    */
  { U "/" },		/* starting delimiter (so easy!)          */
  { NULL },		/* no start2				  */
  { NULL },		/* so no bracket2 either		  */
  { NULL },		/* so no end2 either			  */
  AllCharacters,	/* any character OK inside                */
  U "\\",		/* \\ is the escape character             */
  AllCharacters,	/* after escape char, any is OK           */
  U "",			/* no inner escapes                       */
  U "",			/* no end innner escape either            */
  U "",			/* no bracketing delimiter                */
  U "/",		/* ending delimiter                       */
  false,		/* no need to end at line start           */
  false,		/* don't want end delimiter twice         */
};


/*****************************************************************************/
/*                                                                           */
/*  Perl's here-documents [OBSOLETE CODE - see following for replacement]    */
/*                                                                           */
/*  At present the only terminating strings recognized are EOT, EOF, END,    */
/*  and the empty string.  These may all be quoted in the usual ways.        */
/*                                                                           */
/*****************************************************************************/

#define X(startstr, endstr, startcom, endcom)				\
{									\
  "here-document",	/* used by error messages                 */	\
  PRINT_NODELIMS_QUOTED,/* no delims since we supply them         */	\
  startcom,		/* the command                            */	\
  "",			/* no alternate command                   */	\
  endcom,		/* following command                      */	\
  false,		/* token allowed not just start of line   */	\
  { startstr },		/* starting delimiter                     */	\
  { NULL },		/* no start2				  */	\
  { NULL },		/* so no bracket2 either		  */	\
  { NULL },		/* no end2				  */	\
  AllCharacters,	/* any character OK inside                */	\
  "", "",		/* no escape character                    */	\
  "", "",		/* no inner escapes                       */	\
  "",			/* no bracketing delimiter		  */	\
  endstr,		/* token ends with this                   */	\
  true,			/* must be found at line start		  */	\
  false,		/* don't want end delimiter twice         */	\
}

#define sEOT	"\n@PS{\"EOT\"}\n"
#define sEOF	"\n@PS{\"EOF\"}\n"
#define sEND	"\n@PS{\"END\"}\n"
#define sBLA	"\n@PS{\"\"}\n"

/* ***
TOKEN HereEOTuq = X("<<EOT",    "EOT\n", "@PO{<<}@PS{\"EOT\"}@PS",        sEOT);
TOKEN HereEOTdq = X("<<\"EOT\"","EOT\n", "@PO{<<}@PS{\"\\\"EOT\\\"\"}@PS",sEOT);
TOKEN HereEOTfq = X("<<'EOT'",  "EOT\n", "@PO{<<}@PS{\"'EOT'\"}@PS",      sEOT);
TOKEN HereEOTbq = X("<<`EOT`",  "EOT\n", "@PO{<<}@PS{\"`EOT`\"}@PS",      sEOT);

TOKEN HereEOFuq = X("<<EOF",    "EOF\n", "@PO{<<}@PS{\"EOF\"}@PS",        sEOF);
TOKEN HereEOFdq = X("<<\"EOF\"","EOF\n", "@PO{<<}@PS{\"\\\"EOF\\\"\"}@PS",sEOF);
TOKEN HereEOFfq = X("<<'EOF'",  "EOF\n", "@PO{<<}@PS{\"'EOF'\"}@PS",      sEOF);
TOKEN HereEOFbq = X("<<`EOF`",  "EOF\n", "@PO{<<}@PS{\"`EOF`\"}@PS",      sEOF);

TOKEN HereENDuq = X("<<END",    "END\n", "@PO{<<}@PS{\"END\"}@PS",        sEND);
TOKEN HereENDdq = X("<<\"END\"","END\n", "@PO{<<}@PS{\"\\\"END\\\"\"}@PS",sEND);
TOKEN HereENDfq = X("<<'END'",  "END\n", "@PO{<<}@PS{\"'END'\"}@PS",      sEND);
TOKEN HereENDbq = X("<<`END`",  "END\n", "@PO{<<}@PS{\"`END`\"}@PS",      sEND);

TOKEN HereBLAuq = X("<< ",      "\n",    "@PO{<<} @PS",                  sBLA);
TOKEN HereBLAdq = X("<<\"\"",   "\n",    "@PO{<<}@PS{\"\\\"\\\"\"}@PS",   sBLA);
TOKEN HereBLAfq = X("<<''",     "\n",    "@PO{<<}@PS{\"''\"}@PS",         sBLA);
TOKEN HereBLAbq = X("<<``",     "\n",    "@PO{<<}@PS{\"``\"}@PS",         sBLA);
*** */


/*****************************************************************************/
/*                                                                           */
/*  Perl's here-documents [OBSOLETE CODE - see following for replacement]    */
/*                                                                           */
/*  At present the only terminating strings recognized are EOT, EOF, END,    */
/*  and the empty string.  These may all be quoted in the usual ways.        */
/*                                                                           */
/*****************************************************************************/

#define HereToken(startstr, endstr)					\
{									\
  U "here-document",	/* used by error messages                 */	\
  PRINT_WHOLE_QUOTED,	/* as usual                               */	\
  U "@PS",		/* here documents are strings             */	\
  U "",			/* no alternate command                   */	\
  U "",			/* no following command                   */	\
  false,		/* token allowed not just start of line   */	\
  { U startstr },	/* starting delimiter                     */	\
  { NULL },		/* no start2				  */	\
  { NULL },		/* no bracket2				  */	\
  { NULL },		/* no end2				  */	\
  AllCharacters,	/* any character OK inside                */	\
  U "", U "",		/* no escape character                    */	\
  U "", U "",		/* no inner escapes                       */	\
  U "",			/* no bracketing delimiter		  */	\
  U endstr,		/* token ends with this                   */	\
  true,			/* must be found at line start		  */	\
  false,		/* don't want end delimiter twice         */	\
}

TOKEN HereEOTuq = HereToken("<<EOT",    "EOT\n");
TOKEN HereEOTdq = HereToken("<<\"EOT\"","EOT\n");
TOKEN HereEOTfq = HereToken("<<'EOT'",  "EOT\n");
TOKEN HereEOTbq = HereToken("<<`EOT`",  "EOT\n");

TOKEN HereEOFuq = HereToken("<<EOF",    "EOF\n");
TOKEN HereEOFdq = HereToken("<<\"EOF\"","EOF\n");
TOKEN HereEOFfq = HereToken("<<'EOF'",  "EOF\n");
TOKEN HereEOFbq = HereToken("<<`EOF`",  "EOF\n");

TOKEN HereENDuq = HereToken("<<END",    "END\n");
TOKEN HereENDdq = HereToken("<<\"END\"","END\n");
TOKEN HereENDfq = HereToken("<<'END'",  "END\n");
TOKEN HereENDbq = HereToken("<<`END`",  "END\n");

TOKEN HereBLAuq = HereToken("<< ",      "\n");
TOKEN HereBLAdq = HereToken("<<\"\"",   "\n");
TOKEN HereBLAfq = HereToken("<<''",     "\n");
TOKEN HereBLAbq = HereToken("<<``",     "\n");


/*****************************************************************************/
/*                                                                           */
/*  Perl's identifiers                                                       */
/*                                                                           */
/*      "Names that start with a letter or underscore may be of any          */
/*       length ... and may contain letters, digits, and underscores.        */
/*       Names that start with a digit may only contain more digits.         */
/*       Names that start with anything else are limited to that one         */
/*       character (like $? or $$) and generally have a predefined           */
/*       significance to Perl."  (WCS page 38)                               */
/*                                                                           */
/*  In addition we have to consider that variable names may be preceded      */
/*  by $, @, %, &, or *.  Whether these are part of the variable or not is   */
/*  rather doubtful.  We will treat $, @ and % as part of the variable and   */
/*  the others not (since they occur elsewhere in the token list anyway);    */
/*  plus we have a separate token type for identifiers beginning with $ and  */
/*  followed by one character, which we will list explicitly.                */
/*  We also deal with the $^. variables, e.g. $^W.                           */
/*                                                                           */
/*****************************************************************************/

TOKEN PerlIdentifierToken = {
  U "identifier",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PI",		/* Lout command for formatting identifiers          */
  U "@PK",		/* Alternate command (for keywords)                 */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { SepLetters, U "_", U "$", U "@", PercentLetters},	/* ident. starts    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  Letter_Digit,		/* inside, letters, underscores, digits are OK      */
  U "",			/* no escape character within identifiers           */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* identifiers do not permit "inner escapes"        */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* identifiers do not end with a delimiter          */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


TOKEN PerlSpecialIdentifierToken = {
  U "special variable",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PI",		/* Lout command for formatting identifiers          */
  U "",			/* Alternate command (for keywords)                 */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  {
    /* Taken from 5.6.0's perlvar.pod */
    /* NB special variables that begin $^, e.g. $^D can also be written as */
    /* dollar control D (yes literal control D) -- but we ignore this      */
    /* wrinkle. We only list the first 9 regex match variables.            */

    /* Only the ones not recognized elsewhere are being kept now */
    U "$&",  U "$`",  U "$'",  U "$+",  U "@+",  U "$*",  U "$.",  U "$/",
    U "$|",  U "$,",  U "$\\", U "$\"", U "$;",  U "$#",  U "$%",  U "$=",
    U "$-",  U "@-",  U "$~",  U "$^",  U "$:",  U "$^L", U "$^A", U "$?",
    U "$!",  U "$^E", U "$@",  U "$$",  U "$<",  U "$>",  U "$(",  U "$)",
    U "$0",  U "$[",  U "$]",  U "$^C", U "$^D", U "$^F", U "$^H", U "%^H",
    U "$^I", U "$^M", U "$^O", U "$^P", U "$^R", U "$^S", U "$^T", U "$^V",
    U "$^W", U "${^WARNING_BITS}", U "${^WIDE_SYSTEM_CALLS}", U "$^X",
  },
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  U "",			/* nothing allowed inside, since ends after start   */
  U "",			/* no escape character within identifiers           */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* identifiers do not permit "inner escapes"        */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* identifiers do not end with a delimiter          */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Perl's numeric literals                                                  */
/*                                                                           */
/*  These are defined in WCS page 39 basically by giving these examples:     */
/*                                                                           */
/*      12345           # integer                                            */
/*      12345.67        # floating point                                     */
/*      6.02E23         # scientific notation                                */
/*      0xffff          # hexadecimal                                        */
/*      0377            # octal                                              */
/*      4_294_967_296   # underline for legibility                           */
/*                                                                           */
/*  Implementation is straightforward; hexadecimal is a separate token.      */
/*  Binary numbers introduced with 5.6.0 of the form 0b1010 are also         */
/*  catered for.                                                             */
/*                                                                           */
/*****************************************************************************/

TOKEN PerlLiteralNumberToken = {
  U "number",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PN",		/* Lout command for formatting numbers              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { SepDigits },	/* numbers must begin with a digit                  */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  U "0123456789.eE_",	/* inside, digits, point, exponent, underscore      */
  U "",			/* no escape character within numbers               */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* numbers do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* numbers do not end with a delimiter              */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PerlHexNumberToken = {
  U "number",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PN",		/* Lout command for formatting numbers              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "0x" },		/* hex numbers must begin with 0x		    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  U "0123456789AaBbCcDdEeFf",	/* inside, hexadecimal digits		    */
  U "",			/* no escape character within numbers               */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* numbers do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* numbers do not end with a delimiter              */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


TOKEN PerlBinaryNumberToken = {
  U "number",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PN",		/* Lout command for formatting numbers              */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "0b" },		/* binary numbers must begin with 0b		    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  U "01",        	/* inside, binary digits			    */
  U "",			/* no escape character within numbers               */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* numbers do not permit "inner escapes"            */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* numbers do not end with a delimiter              */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Perl's comments                                                          */
/*                                                                           */
/*  "Comments are indicated by the # character and extend to the end of      */
/*  the line." (WCS page 35).  To this we have added the usual Lout escape   */
/*  comment beginning with #@.                                               */
/*                                                                           */
/*****************************************************************************/

TOKEN PerlCommentToken = {
  U "comment",		/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* print this token in quotes etc. as usual         */
  U "@PC",		/* Lout command for formatting comments             */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "#" },		/* comments begin with this character		    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK (not NL)        */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* C comments do not permit "inner escapes"         */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no end delimiter (end of line will end it)       */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PerlCommentEscapeToken = {
  U "Lout escape",	/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED, /* print this token unformatted                  */
  U "",			/* no Lout command since we are printing raw        */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  false,		/* token allowed anywhere, not just start of line   */
  { U "#@" },		/* comments begin with this character pair	    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char is OK                 */
  U "",			/* no escape character within comments              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* no "inner escape" in escape comments             */
  U "",			/* so no end of "inner escape" either               */
  U "",			/* no bracketing delimiter			    */
  U "",			/* no end delimiter (end of line will end it)       */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Perl's POD sub-language                                                  */
/*                                                                           */
/*  Pod is handled as a completely different language.  However we need      */
/*  one Perl token which recognizes an entire Pod interpolation and prints   */
/*  it enclosed in @Pod { ... } so that Lout knows to call back later on it. */
/*                                                                           */
/*  "A line beginning with = is assumed to introduce some documentation,     */
/*  which continues until another line is reached beginning with =cut"       */
/*  (WCS page 36).  Strictly speaking this is only valid at points where     */
/*  a statement would be legal, but that is beyond prg2lout to implement.    */
/*                                                                           */
/*****************************************************************************/

TOKEN PerlPodToken = {
  U "perl-pod",		/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED,  /* unquoted but with a command enclosing it     */
  U "@DP @Pod",		/* Lout command for formatting Pod                  */
  U "",			/* no alternate command                             */
  U "@DP\n",		/* following command                                */
  true,			/* token allowed at start of line only              */
  { U "=", U "=pod" },	/* pod insert begins with either of these	    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  AllCharacters,	/* inside, any character at all is OK               */
  U "",			/* no escape character within pod comments          */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* pod comments do not permit "inner escapes"       */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter			    */
  U "=cut",		/* pod comments end with this string                */
  true,			/* end delimiter must be at line start		    */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Perl's operators                                                         */
/*                                                                           */
/*  Only those not already in the C/C++ list are given here.                 */
/*                                                                           */
/*****************************************************************************/

TOKEN PerlIncrementToken        = FixedToken( "++",  "@PO" ) ;
TOKEN PerlDecrementToken        = FixedToken( "--",  "@PO" ) ;
TOKEN PerlExponentiateToken     = FixedToken( "**",  "@PO" ) ;
TOKEN PerlMatchToken            = FixedToken( "=~",  "@PO" ) ;
TOKEN PerlNotMatchToken         = FixedToken( "!~",  "@PO" ) ;
TOKEN PerlEqualToken            = FixedToken( "==",  "@PO" ) ;
TOKEN PerlAssignToken           = FixedToken( "=",   "@PO" ) ;
TOKEN PerlBitLeftShiftToken     = FixedToken( "<<",  "@PO" ) ;
TOKEN PerlBitRightShiftToken    = FixedToken( ">>",  "@PO" ) ;
TOKEN PerlSpaceshipToken        = FixedToken( "<=>", "@PO" ) ;
TOKEN PerlAndToken              = FixedToken( "&&",  "@PO" ) ;
TOKEN PerlOrToken               = FixedToken( "||",  "@PO" ) ;
TOKEN PerlRange2Token           = FixedToken( "..",  "@PO" ) ;
TOKEN PerlRange3Token           = FixedToken( "...", "@PO" ) ;


/*****************************************************************************/
/*                                                                           */
/*  FlagToken - for -r and the rest (followed by white space)                */
/*                                                                           */
/*****************************************************************************/

#define FlagToken(str, command) /* define fixed-string token */	\
{									\
  U str,		/* name used for debugging only       */	\
  PRINT_WHOLE_QUOTED,	/* print this token as usual          */	\
  U command,		/* Lout command for formatting this   */	\
  U "",			/* no alternate command               */	\
  U "",			/* no following command               */	\
  false,		/* token not just start of line       */	\
  { U str },		/* token begins (and ends!) with this */	\
  { U " ", U "\t" },	/* plus a white space char	      */	\
  { U "",  U ""   },	/* no bracket2 though  		      */	\
  { U "",  U ""   },	/* no end2 though   		      */	\
  U "",			/* nothing inside, since no inside    */	\
  U "", U "",		/* no escape character                */	\
  U "", U "",		/* no inner escape; no end inner esc  */	\
  U "",			/* no bracketing delimiter            */	\
  U "",			/* no ending delimiter                */	\
  false,		/* end not have to be at line start   */	\
  false,		/* don't end delimiter twice to stop  */	\
}

TOKEN PerlFileTestrToken        = FlagToken( "-r",  "@PO" ) ;
TOKEN PerlFileTestwToken        = FlagToken( "-w",  "@PO" ) ;
TOKEN PerlFileTestxToken        = FlagToken( "-x",  "@PO" ) ;
TOKEN PerlFileTestoToken        = FlagToken( "-o",  "@PO" ) ;
TOKEN PerlFileTestRToken        = FlagToken( "-R",  "@PO" ) ;
TOKEN PerlFileTestWToken        = FlagToken( "-W",  "@PO" ) ;
TOKEN PerlFileTestXToken        = FlagToken( "-X",  "@PO" ) ;
TOKEN PerlFileTestOToken        = FlagToken( "-O",  "@PO" ) ;
TOKEN PerlFileTesteToken        = FlagToken( "-e",  "@PO" ) ;
TOKEN PerlFileTestzToken        = FlagToken( "-z",  "@PO" ) ;
TOKEN PerlFileTestsToken        = FlagToken( "-s",  "@PO" ) ;
TOKEN PerlFileTestfToken        = FlagToken( "-f",  "@PO" ) ;
TOKEN PerlFileTestdToken        = FlagToken( "-d",  "@PO" ) ;
TOKEN PerlFileTestlToken        = FlagToken( "-l",  "@PO" ) ;
TOKEN PerlFileTestpToken        = FlagToken( "-p",  "@PO" ) ;
TOKEN PerlFileTestSToken        = FlagToken( "-S",  "@PO" ) ;
TOKEN PerlFileTestbToken        = FlagToken( "-b",  "@PO" ) ;
TOKEN PerlFileTestcToken        = FlagToken( "-c",  "@PO" ) ;
TOKEN PerlFileTesttToken        = FlagToken( "-t",  "@PO" ) ;
TOKEN PerlFileTestuToken        = FlagToken( "-u",  "@PO" ) ;
TOKEN PerlFileTestgToken        = FlagToken( "-g",  "@PO" ) ;
TOKEN PerlFileTestkToken        = FlagToken( "-k",  "@PO" ) ;
TOKEN PerlFileTestTToken        = FlagToken( "-T",  "@PO" ) ;
TOKEN PerlFileTestBToken        = FlagToken( "-B",  "@PO" ) ;
TOKEN PerlFileTestMToken        = FlagToken( "-M",  "@PO" ) ;
TOKEN PerlFileTestAToken        = FlagToken( "-A",  "@PO" ) ;
TOKEN PerlFileTestCToken        = FlagToken( "-C",  "@PO" ) ;


/*****************************************************************************/
/*                                                                           */
/*  Pod (Plain Old Documentation, used with Perl) tokens                     */
/*                                                                           */
/*  Pod is treated as a completely different language to Perl.  It is        */
/*  quite possible to use Pod alone without Perl; or, thanks to the          */
/*  PerlPodToken, to embed Pod in Perl in the usual way.  Quotations below   */
/*  are from Larry Wall's documentation, communicated by Mark Summerfield.   */
/*                                                                           */
/*****************************************************************************/

/*****************************************************************************/
/*                                                                           */
/*  Pod Verbatim paragraphs                                                  */
/*                                                                           */
/*  "A verbatim paragraph [is] distinguished by being indented (that is, it  */
/*   starts with a space or tab).  It should be reproduced exactly, with     */
/*   tabs assumed to be on 8-column boundaries.  There are no special        */
/*   formatting escapes."                                                    */
/*                                                                           */
/*  By a "paragraph" is meant a sequence of lines down to the next empty     */
/*  line; but we will handle verbatim paragraphs one line at a time.         */
/*  Also, an empty line in the input has to become an empty line in output.  */
/*                                                                           */
/*****************************************************************************/

TOKEN PodVerbatimLineToken = {
  U "verbatim-para",	/* used by error messages involving this token      */
  PRINT_WHOLE_QUOTED,	/* printing the whole paragraph quoted		    */
  U "@PV ",		/* Lout command for formatting verbatim line        */
  U "", U "",		/* no alternate command; no following command       */
  true,			/* token allowed at start of line only              */
  { U "\t", U " " },	/* command begins with this                   	    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  AllPrintablePlusTab,	/* inside, any printable char except newline is OK  */
  U "", U "",		/* no escape character within verbatim lines        */
  U "", U "",		/* no "inner escapes" within verbatim lines         */
  U "",			/* no bracketing delimiter                          */
  U "",			/* ends at end of line                              */
  false,		/* don't need to be at start of line to end it      */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PodEmptyLineToken = {
  U "pod-empty-line",	/* used by error messages involving this token      */
  PRINT_COMMAND_ONLY,	/* printing just the command                        */
  U "@PPG\n",		/* Lout command for formatting Pod empty line       */
  U "", U "",		/* no alternate command; no following command       */
  true,			/* token allowed at start of line only              */
  { U "\n" },		/* command begins with this                   	    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  U "",			/* nothing inside                                   */
  U "", U "",		/* no escape character                              */
  U "", U "",		/* no inner escape                                  */
  U "",			/* no bracketing delimiter                          */
  U "",			/* token will end with the end of the line          */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Pod Command paragraphs                                                   */
/*                                                                           */
/*  "All command paragraphs start with =, followed by an identifier,         */
/*   followed by arbitrary text that the command can use."                   */
/*                                                                           */
/*  "[A] command lasts up until the end of the paragraph, not the line.      */
/*   Hence, ... you can see the empty lines after each command to end        */
/*   its paragraph."                                                         */
/*                                                                           */
/*****************************************************************************/

/*****************************************************************************/
/*                                                                           */
/*  Pod command paragraphs: =pod                                             */
/*                                                                           */
/*  "The =pod directive does nothing beyond telling the compiler to lay off  */
/*   parsing code through the next =cut."                                    */
/*                                                                           */
/*****************************************************************************/

TOKEN PodIgnoreToken = {
  U "pod-cut",		/* used by error messages involving this token      */
  PRINT_COMMAND_ONLY,	/* printing just the command                        */
  U "",			/* Lout command for formatting Pod cut (nothing)    */
  U "",			/* no alternate command                             */
  U "",			/* no following command                             */
  true,			/* token allowed at start of line only              */
  { U "=pod", U "=cut" }, /* command begins with this                  	    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  AllCharacters,	/* anything at all can be inside                    */
  U "",			/* no escape character                              */
  U "",			/* so nothing legal after escape char either        */
  U "",			/* cut tokens do not permit "inner escapes"         */
  U "",			/* and so there is no end innner escape either      */
  U "",			/* no bracketing delimiter                          */
  U "\n",			/* token will end with the end of the line          */
  true,			/* end delimiter (\n) has to be at a line start     */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Pod command paragraphs: =head1, =head2 (and =head3, folklore extension)  */
/*                                                                           */
/*****************************************************************************/

TOKEN PodHeading1Token = {
  U "=head1",		/* used by error messages involving this token      */
  PRINT_NODELIMS_INNER,	/* print without delimiters, formatting inner       */
  U "@PHA",		/* Lout command for formatting Pod heading          */
  U "", U "",		/* no alternate command; no following command       */
  true,			/* token allowed at start of line only              */
  {U "=head1", U "head1"}, /* command begins with this                 	    */
  { U " ",  U "\t" },	/* helps to skip following white space		    */
  { U "",   U ""   },	/* no bracket2                        		    */
  { U "\n", U "\n" },	/* token ends at end of line			    */
  AllCharacters,	/* anything in the heading			    */
  U "", U "",		/* no escape character; nothing legal after escape  */
  U "", U "",		/* no inner escapes; no end inner escape            */
  U "",			/* no bracketing delimiter                          */
  U "\n\n",		/* token will end with the first blank line         */
  false,		/* end delimiter (\n) has to be at a line start     */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PodHeading2Token = {
  U "=head2",		/* used by error messages involving this token      */
  PRINT_NODELIMS_INNER,	/* print without delimiters, formatting inner       */
  U "@PHB",		/* Lout command for formatting Pod heading          */
  U "", U "",		/* no alternate command; no following command       */
  true,			/* token allowed at start of line only              */
  { U "=head2" },	/* command begins with this                   	    */
  { U " ",  U "\t" },	/* helps to skip following white space		    */
  { U "",   U ""   },	/* no bracket2                        		    */
  { U "\n", U "\n" },	/* token ends at end of line			    */
  AllCharacters,	/* anything in the heading			    */
  U "", U "",		/* no escape character; nothing legal after escape  */
  U "", U "",		/* no inner escapes; no end inner escape            */
  U "",			/* no bracketing delimiter                          */
  U "\n\n",		/* token will end with the first blank line         */
  false,		/* end delimiter (\n) has to be at a line start     */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PodHeading3Token = {
  U "=head3",		/* used by error messages involving this token      */
  PRINT_NODELIMS_INNER,	/* print without delimiters, formatting inner       */
  U "@PHC",		/* Lout command for formatting Pod heading          */
  U "", U "",		/* no alternate command; no following command       */
  true,			/* token allowed at start of line only              */
  { U "=head3" },	/* command begins with this                   	    */
  { U " ",  U "\t" },	/* helps to skip following white space		    */
  { U "",   U ""   },	/* no bracket2                        		    */
  { U "\n", U "\n" },	/* token ends at end of line			    */
  AllCharacters,	/* anything in the heading			    */
  U "", U "",		/* no escape character; nothing legal after escape  */
  U "", U "",		/* no inner escapes; no end inner escape            */
  U "",			/* no bracketing delimiter                          */
  U "\n\n",		/* token will end with the first blank line         */
  false,		/* end delimiter (\n) has to be at a line start     */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Pod command paragraphs: =over, =item, and =back (for lists)              */
/*                                                                           */
/*****************************************************************************/

TOKEN PodOverToken = {
  U "=over",		/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED, /* just a number after =over, so this is safe    */
  U "@RawTaggedList gap{@PLG}indent{@PLI}rightindent{@PLRI}labelwidth{@PLLW ",
  U "",			/* no alternate command                             */
  U "} // {",		/* open brace to match } at first item              */
  true,			/* token allowed at start of line only              */
  { U "=over" },	/* command begins with this                   	    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  AllCharacters,	/* inside, any printable char is OK		    */
  U "", U "",		/* no escape character; nothing legal after escape  */
  U "", U "",		/* no inner escapes; no end inner escape            */
  U "",			/* no bracketing delimiter                          */
  U "\n",		/* token will end with the end of the line          */
  true,			/* end delimiter (\n) has to be at a line start     */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PodItemToken = {
  U "=item",		/* used by error messages involving this token      */
  PRINT_NODELIMS_INNER,	/* printing just what follows =item on the line     */
  U "@Null //}\n@DTI {@PLL", /* Lout command for formatting Pod item        */
  U "",			/* no alternate command                             */
  U "} {",		/* open brace to enclose the item content           */
  true,			/* token allowed at start of line only              */
  { U "=item" },	/* command begins with this                   	    */
  { U " ",    U "\t" },	/* helps to skip following white space		    */
  { U "",     U ""   },	/* no bracket2                        		    */
  { U "\n\n", U "\n\n"},/* token will end at blank line			    */
  AllPrintableTabNL,	/* any printable inside				    */
  U "", U "",		/* no escape character; nothing legal after escape  */
  U "", U "",		/* no inner escapes; no end inner escape            */
  U "", U "",		/* see brackets2[]; see ends2[]                     */
  false,		/* end delimiter (\n) must already be at start      */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PodBackToken = {
  U "=back",		/* used by error messages involving this token      */
  PRINT_COMMAND_ONLY,	/* printing just the command                        */
  U "@Null // }\n@EndList\n", /* Lout command for formatting Pod back       */
  U "", U "",		/* no alternate command; no following command       */
  true,			/* token allowed at start of line only              */
  { U "=back" },	/* command begins with this                   	    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  AllCharacters,	/* anything inside (in principle)   		    */
  U "", U "",		/* no escape character; nothing legal after escape  */
  U "", U "",		/* no inner escapes; no end inner escape            */
  U "",			/* no bracketing delimiter                          */
  U "\n",		/* token will end with the next blank line          */
  true,			/* end delimiter (\n) has to be at a line start     */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Pod narrow items; for these, we are confident in using @TI not @DTI      */
/*                                                                           */
/*****************************************************************************/

#define PodNarrowItemToken(tag, command)				\
{									\
  U "=item",		/* used by error messages		 */	\
  PRINT_NODELIMS_INNER,	/* printing just what follows =item      */	\
  U command,		/* Lout command for formatting Pod item  */	\
  U "",			/* no alternate command                  */	\
  U "}} {",		/* open brace to enclose the item content*/	\
  true,			/* token allowed at start of line only   */	\
  { U "=item", U "=item ", U "=item\t",	/* starts                */	\
    U "=item  ", U "=item \t", U "=item\t ", U "=item\t\t" }, /* */	\
  { U tag },		/* the tag we recognize               	 */	\
  { U "" },		/* no bracket2                        	 */	\
  { U "\n\n", U "\n\n"  },	/* token will end at blank line  */	\
  AllPrintableTabNL,	/* any printable inside			 */	\
  U "", U "",		/* no escape character                   */	\
  U "", U "",		/* no inner escapes; no end inner escape */	\
  U "", U "",		/* see brackets2[]; see ends2[]          */	\
  false,		/* end delimiter (\n) already at start   */	\
  false,		/* don't need to see end delimiter twice */	\
}

TOKEN PodItemBullet = PodNarrowItemToken("*", "@Null //}\n@TI {@PLL {*");
TOKEN PodItem0      = PodNarrowItemToken("0", "@Null //}\n@TI {@PLL {0");
TOKEN PodItem1      = PodNarrowItemToken("1", "@Null //}\n@TI {@PLL {1");
TOKEN PodItem2      = PodNarrowItemToken("2", "@Null //}\n@TI {@PLL {2");
TOKEN PodItem3      = PodNarrowItemToken("3", "@Null //}\n@TI {@PLL {3");
TOKEN PodItem4      = PodNarrowItemToken("4", "@Null //}\n@TI {@PLL {4");
TOKEN PodItem5      = PodNarrowItemToken("5", "@Null //}\n@TI {@PLL {5");
TOKEN PodItem6      = PodNarrowItemToken("6", "@Null //}\n@TI {@PLL {6");
TOKEN PodItem7      = PodNarrowItemToken("7", "@Null //}\n@TI {@PLL {7");
TOKEN PodItem8      = PodNarrowItemToken("8", "@Null //}\n@TI {@PLL {8");
TOKEN PodItem9      = PodNarrowItemToken("9", "@Null //}\n@TI {@PLL {9");


/*****************************************************************************/
/*                                                                           */
/*  Pod command paragraphs: =for, =begin, =end                               */
/*                                                                           */
/*  "passed directly to particular formatters.  A formatter that can utilize */
/*  that format will use the section, otherwise it will be ignored."  So     */
/*  I've put in a "=begin lout" token, also recognized as "=begin Lout".     */
/*                                                                           */
/*****************************************************************************/

TOKEN PodForToken = {
  U "=for",		/* used by error messages involving this token      */
  PRINT_COMMAND_ONLY,	/* printing just the command                        */
  U "",			/* Lout command for formatting Pod for (nothing)    */
  U "", U "",		/* no alternate command; no following command       */
  true,			/* token allowed at start of line only              */
  { U "=for" },		/* command begins with this                   	    */
  { NULL }, { NULL },	/* no start2 needed; so no bracket2 either	    */
  { NULL },		/* so no end2 either				    */
  AllCharacters,	/* anything inside 				    */
  U "", U "",		/* no escape character; nothing legal after escape  */
  U "", U "",		/* no inner escapes; no end inner escape            */
  U "",			/* no bracketing delimiter                          */
  U "\n",		/* token will end with the end of the line          */
  true,			/* end delimiter (\n) has to be at a line start     */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PodBeginToken = {
  U "=begin",		/* used by error messages involving this token      */
  PRINT_COMMAND_ONLY,	/* printing just the command                        */
  U "",			/* Lout command for formatting Pod for (nothing)    */
  U "", U "",		/* no alternate command; no following command       */
  true,			/* token allowed at start of line only              */
  { U "=begin" },	/* command begins with this                   	    */
  { NULL }, { NULL },	/* no start2 needed; so no bracket2 either	    */
  { NULL },		/* so no end2 either				    */
  AllCharacters,	/* anything inside 				    */
  U "", U "",		/* no escape character; nothing legal after escape  */
  U "", U "",		/* no inner escapes; no end inner escape            */
  U "",			/* no bracketing delimiter                          */
  U "=end",		/* token will end with =end character               */
  true,			/* end delimiter has to be at a line start          */
  false,		/* don't need to see end delimiter twice to stop    */
};

TOKEN PodBeginLoutToken = {
  U "=begin lout",	/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED,/* this is a Lout escape, no delims or quotes     */
  U "",			/* Lout command for formatting Pod for (nothing)    */
  U "", U "",		/* no alternate command; no following command       */
  true,			/* token allowed at start of line only              */
  { U "=begin lout", U "=begin Lout" },	/* command begins with this   	    */
  { NULL }, { NULL },	/* no start2 needed; so no bracket2 either	    */
  { NULL },		/* so no end2 either				    */
  AllCharacters,	/* anything inside 				    */
  U "", U "",		/* no escape character; nothing legal after escape  */
  U "", U "",		/* no inner escapes; no end inner escape            */
  U "",			/* no bracketing delimiter                          */
  U "=end",		/* token will end with =end character               */
  true,			/* end delimiter has to be at a line start          */
  false,		/* don't need to see end delimiter twice to stop    */
};


/*****************************************************************************/
/*                                                                           */
/*  Pod "Ordinary Block of Text" paragraphs                                  */
/*                                                                           */
/*  "It will be filled, and maybe even justified" - I'm setting the whole    */
/*  Pod in adjust @Break, and making sure that verbatim and command          */
/*  paragraphs don't get adjusted.  So no special requirements here, it      */
/*  should all happen without any explicit tokens, given that I've set       */
/*  the Pod language up to simply echo any characters (suitably quoted if    */
/*  necessary in Lout) that don't match anything else.                       */
/*                                                                           */
/*****************************************************************************/

/*****************************************************************************/
/*                                                                           */
/*  Pod interior sequences (recursive)                                       */
/*                                                                           */
/*  I<text>    Italicize text                                                */
/*  B<text>    Embolden text                                                 */
/*  S<text>    Text containing non-break spaces                              */
/*  C<code>    Code "render in typewriter font, or ..."                      */
/*                                                                           */
/*  Alternatively, instead of "<" .. ">" we may use "<< " .. " >>", or       */
/*  "<<< " .. " >>>", etc. (Note the whitespace.)                            */
/*                                                                           */
/*****************************************************************************/

#define RecursiveToken(str, command) /* Pod recursive token */		\
{									\
  U str,			/* name used for debugging only       */	\
  PRINT_NODELIMS_INNER,	/* recursively format the inside      */	\
  U command,		/* Lout command for formatting this   */	\
  U "", U "",		/* no alternate command; no following */	\
  false,		/* token not just start of line       */	\
  { U str },		/* token begins with this	      */	\
  { U "<", U "<< ", U "<<< ", U "<<<< " }, /* start2	      */	\
  { U "",  U "",    U "",     U ""      }, /* no bracket2     */	\
  { U ">", U " >>", U " >>>", U " >>>>" }, /* end2	      */	\
  AllCharacters,	/* anything inside (in fact, not used)*/	\
  U "", U "",		/* no escape character                */	\
  U "", U "",		/* no inner escape; no end inner esc  */	\
  U "",			/* will use brackets2 here            */	\
  U "",			/* will use end2 here                 */	\
  false,		/* end not have to be at line start   */	\
  false,		/* don't end delimiter twice to stop  */	\
}

TOKEN PodItalicToken	= RecursiveToken("I", "@PFI");
TOKEN PodBoldToken	= RecursiveToken("B", "@PFB");
TOKEN PodNoBreakToken	= RecursiveToken("S", "@OneCol");
TOKEN PodCodeToken	= RecursiveToken("C", "@PFC");


/*****************************************************************************/
/*                                                                           */
/*  Pod interior sequences (non-recursive)                                   */
/*                                                                           */
/*  L<name>    A link; these have an internal format I've not looked at yet. */
/*  F<file>    File name                                                     */
/*  X<index>   Index                                                         */
/*  Z<>        A zero-width space                                            */
/*                                                                           */
/*  Alternatively, instead of "<" .. ">" we may use "<< " .. " >>", or       */
/*  "<<< " .. " >>>", etc. (Note the whitespace.)                            */
/*                                                                           */
/*****************************************************************************/

#define InteriorToken(str, command, style) /* Pod delimited token */	\
{									\
  U str,		/* name used for debugging only       */	\
  style,		/* print this token unquoted          */	\
  U command,		/* Lout command for formatting this   */	\
  U "", U "",		/* no alternate command; no following */	\
  false,		/* token not just start of line       */	\
  { U str },		/* token begins with this	      */	\
  { U "<", U "<< ", U "<<< ", U "<<<< " }, /* start2	      */	\
  { U "",  U "",    U "",     U ""      }, /* no bracket2     */	\
  { U ">", U " >>", U " >>>", U " >>>>" }, /* end2	      */	\
  AllCharacters,	/* anything inside                    */	\
  U "", U "",		/* no escape character                */	\
  U "", U "",		/* no inner escape; no end inner esc  */	\
  U "",			/* will use brackets2 here            */	\
  U "",			/* will use end2 here                 */	\
  false,		/* end not have to be at line start   */	\
  false,		/* don't end delimiter twice to stop  */	\
}

TOKEN PodFileToken	= InteriorToken("F", "@PFF",    PRINT_NODELIMS_QUOTED);
TOKEN PodLinkToken	= InteriorToken("L", "@PFL",    PRINT_NODELIMS_QUOTED);
TOKEN PodIndexToken	= InteriorToken("X", "@PFX",    PRINT_NODELIMS_QUOTED);
TOKEN PodZeroToken	= InteriorToken("Z", "",        PRINT_COMMAND_ONLY);


/*****************************************************************************/
/*                                                                           */
/*  Pod interior sequences (escape sequences)                                */
/*                                                                           */
/*  E<escape>  A named character ("optional except in other interior         */
/*             sequences and when preceded by a capital letter")             */
/*                                                                           */
/*                E<lt>     A literal <                                      */
/*                E<gt>     A literal >                                      */
/*                E<sol>    A literal /                                      */
/*                E<verbar> A literal |                                      */
/*                E<n>      Character number n (probably in ASCII)           */
/*                E<html>   Some non-numeric HTML entity, such as E<Agrave>  */
/*                                                                           */
/*  PodNumCharToken not tested.                                              */
/*                                                                           */
/*****************************************************************************/

TOKEN PodNumCharToken = {
  U "E<>",		/* used by error messages involving this token      */
  PRINT_NODELIMS_UNQUOTED,/* we're doing these manually, since they're funny*/
  U "\"\\",		/* precede character number with \"                 */
  U "",			/* no alternate command                             */
  U "\"",		/* follow character number with "                   */
  false,		/* token allowed at start of line only              */
  { U "E<" },		/* command begins with this              	    */
  { NULL },		/* no start2 needed				    */
  { NULL },		/* so no bracket2 either			    */
  { NULL },		/* so no end2 either				    */
  U "0123456789",	/* digits inside 				    */
  U "", U "",		/* no escape character                              */
  U "", U "",		/* no "inner escapes"                               */
  U "",			/* no bracketing delimiter                          */
  U ">",		/* token will end with > character                  */
  false,		/* end delimiter does not have to be at line start  */
  false,		/* don't need to see end delimiter twice to stop    */
};

#define PodEscapeToken(str, command) /* Pod delimited token */		\
{									\
  U str,		/* name used for debugging only       */	\
  PRINT_COMMAND_ONLY,	/* print this token unquoted          */	\
  U command,		/* Lout command for formatting this   */	\
  U "",			/* no alternate command               */	\
  U "",			/* no following command               */	\
  false,		/* token not just start of line       */	\
  { U str },		/* token begins with this	      */	\
  { NULL },		/* start2			      */	\
  { NULL },		/* bracket2			      */	\
  { NULL },		/* end2				      */	\
  U "",			/* nothing inside                     */	\
  U "", U "",		/* no escape character                */	\
  U "", U "",		/* no inner escape either             */	\
  U "",			/* no bracketing delimiter            */	\
  U "",			/* no ending delimiter                */	\
  false,		/* end not have to be at line start   */	\
  false,		/* don't end delimiter twice to stop  */	\
}

TOKEN PodLessThanToken = PodEscapeToken("E<lt>", "<");
TOKEN PodGreaterThanToken = PodEscapeToken("E<gt>", ">");
TOKEN PodSlashToken = PodEscapeToken("E<sol>", "/");
TOKEN PodVerbarToken = PodEscapeToken("E<verbar>", "|");


/*****************************************************************************/
/*                                                                           */
/*  Mark Summerfield writes:                                                 */
/*                                                                           */
/*  The following table (and most of its comments) is copied from Gisle Aas  */
/*  HTML::Entities.pm module with the plain text characters being replaced   */
/*  by their Lout equivalents and the HTML entities with their pod           */
/*  equivalents.                                                             */
/*                                                                           */
/*****************************************************************************/

/* Some normal chars that have special meaning in SGML context */
TOKEN PE00 = PodEscapeToken("E<amp>",		"&");
/* already done above TOKEN PE01 = PodEscapeToken("E<gt>",	">"); */
/* already done above TOKEN PE02 = PodEscapeToken("E<lt>",	"<"); */
TOKEN PE03 = PodEscapeToken("E<quot>",	"\"\\\"\"");

/* PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML */
TOKEN PE04 = PodEscapeToken("E<AElig>",		"{@Char AE}");
TOKEN PE05 = PodEscapeToken("E<Aacute>",	"{@Char Aacute}");
TOKEN PE06 = PodEscapeToken("E<Acirc>",		"{@Char Acircumflex}");
TOKEN PE07 = PodEscapeToken("E<Agrave>",	"{@Char Agrave}");
TOKEN PE08 = PodEscapeToken("E<Aring>",		"{@Char Aring}");
TOKEN PE09 = PodEscapeToken("E<Atilde>",	"{@Char Atilde}");
TOKEN PE10 = PodEscapeToken("E<Auml>",		"{@Char Adieresis}");
TOKEN PE11 = PodEscapeToken("E<Ccedil>",	"{@Char Ccedilla}");
TOKEN PE12 = PodEscapeToken("E<ETH>",		"{@Char Eth}");
TOKEN PE13 = PodEscapeToken("E<Eacute>",	"{@Char Eacute}");
TOKEN PE14 = PodEscapeToken("E<Ecirc>",		"{@Char Ecircumflex}");
TOKEN PE15 = PodEscapeToken("E<Egrave>",	"{@Char Egrave}");
TOKEN PE16 = PodEscapeToken("E<Euml>",		"{@Char Edieresis}");
TOKEN PE17 = PodEscapeToken("E<Iacute>",	"{@Char Iacute}");
TOKEN PE18 = PodEscapeToken("E<Icirc>",		"{@Char Icircumflex}");
TOKEN PE19 = PodEscapeToken("E<Igrave>",	"{@Char Igrave}");
TOKEN PE20 = PodEscapeToken("E<Iuml>",		"{@Char Idieresis}");
TOKEN PE21 = PodEscapeToken("E<Ntilde>",	"{@Char Ntilde}");
TOKEN PE22 = PodEscapeToken("E<Oacute>",	"{@Char Oacute}");
TOKEN PE23 = PodEscapeToken("E<Ocirc>",		"{@Char Ocircumflex}");
TOKEN PE24 = PodEscapeToken("E<Ograve>",	"{@Char Ograve}");
TOKEN PE25 = PodEscapeToken("E<Oslash>",	"{@Char Oslash}");
TOKEN PE26 = PodEscapeToken("E<Otilde>",	"{@Char Otilde}");
TOKEN PE27 = PodEscapeToken("E<Ouml>",		"{@Char Odieresis}");
TOKEN PE28 = PodEscapeToken("E<THORN>",		"{@Char Thorn}");
TOKEN PE29 = PodEscapeToken("E<Uacute>",	"{@Char Uacute}");
TOKEN PE30 = PodEscapeToken("E<Ucirc>",		"{@Char Ucircumflex}");
TOKEN PE31 = PodEscapeToken("E<Ugrave>",	"{@Char Ugrave}");
TOKEN PE32 = PodEscapeToken("E<Uuml>",		"{@Char Udieresis}");
TOKEN PE33 = PodEscapeToken("E<Yacute>",	"{@Char Yacute}");
TOKEN PE34 = PodEscapeToken("E<aacute>",	"{@Char aacute}");
TOKEN PE35 = PodEscapeToken("E<acirc>",		"{@Char acircumflex}");
TOKEN PE36 = PodEscapeToken("E<aelig>",		"{@Char ae}");
TOKEN PE37 = PodEscapeToken("E<agrave>",	"{@Char agrave}");
TOKEN PE38 = PodEscapeToken("E<aring>",		"{@Char aring}");
TOKEN PE39 = PodEscapeToken("E<atilde>",	"{@Char atilde}");
TOKEN PE40 = PodEscapeToken("E<auml>",		"{@Char adieresis}");
TOKEN PE41 = PodEscapeToken("E<ccedil>",	"{@Char ccedilla}");
TOKEN PE42 = PodEscapeToken("E<eacute>",	"{@Char eacute}");
TOKEN PE43 = PodEscapeToken("E<ecirc>",		"{@Char ecircumflex}");
TOKEN PE44 = PodEscapeToken("E<egrave>",	"{@Char egrave}");
TOKEN PE45 = PodEscapeToken("E<eth>",		"{@Char eth}");
TOKEN PE46 = PodEscapeToken("E<euml>",		"{@Char edieresis}");
TOKEN PE47 = PodEscapeToken("E<iacute>",	"{@Char iacute}");
TOKEN PE48 = PodEscapeToken("E<icirc>",		"{@Char icircumflex}");
TOKEN PE49 = PodEscapeToken("E<igrave>",	"{@Char igrave}");
TOKEN PE50 = PodEscapeToken("E<iuml>",		"{@Char idieresis}");
TOKEN PE51 = PodEscapeToken("E<ntilde>",	"{@Char ntilde}");
TOKEN PE52 = PodEscapeToken("E<oacute>",	"{@Char oacute}");
TOKEN PE53 = PodEscapeToken("E<ocirc>",		"{@Char ocircumflex}");
TOKEN PE54 = PodEscapeToken("E<ograve>",	"{@Char ograve}");
TOKEN PE55 = PodEscapeToken("E<oslash>",	"{@Char oslash}");
TOKEN PE56 = PodEscapeToken("E<otilde>",	"{@Char otilde}");
TOKEN PE57 = PodEscapeToken("E<ouml>",		"{@Char odieresis}");
TOKEN PE58 = PodEscapeToken("E<szlig>",		"{@Char germandbls}");
TOKEN PE59 = PodEscapeToken("E<thorn>",		"{@Char thorn}");
TOKEN PE60 = PodEscapeToken("E<uacute>",	"{@Char uacute}");
TOKEN PE61 = PodEscapeToken("E<ucirc>",		"{@Char ucircumflex}");
TOKEN PE62 = PodEscapeToken("E<ugrave>",	"{@Char ugrave}");
TOKEN PE63 = PodEscapeToken("E<uuml>",		"{@Char udieresis}");
TOKEN PE64 = PodEscapeToken("E<yacute>",	"{@Char yacute}");
TOKEN PE65 = PodEscapeToken("E<yuml>",		"{@Char ydieresis}");

/* Some extra Latin 1 chars that are listed in the HTML3.2 draft 1996/05/21 */
TOKEN PE66 = PodEscapeToken("E<copy>",		"{@CopyRight}");
TOKEN PE67 = PodEscapeToken("E<reg>",		"{@Register}");
TOKEN PE68 = PodEscapeToken("E<nbsp>",		"~");

/* Additional ISO-8859/1 entities listed in rfc1866 (section 14) */
TOKEN PE69 = PodEscapeToken("E<iexcl>",		"{@Char exclamdown}");
TOKEN PE70 = PodEscapeToken("E<cent>",		"{@Char cent}");
TOKEN PE71 = PodEscapeToken("E<pound>",		"{@Sterling}");
TOKEN PE72 = PodEscapeToken("E<curren>",	"{@Char currency}");
TOKEN PE73 = PodEscapeToken("E<yen>",		"{@Yen}");
TOKEN PE74 = PodEscapeToken("E<brvbar>",	"{@Char bar}");
TOKEN PE75 = PodEscapeToken("E<sect>",		"{@SectSym}");
TOKEN PE76 = PodEscapeToken("E<uml>",		"{@Char dieresis}");
TOKEN PE77 = PodEscapeToken("E<ordf>",		"{@Char ordfeminine}");
TOKEN PE78 = PodEscapeToken("E<laquo>",		"{@Char guillemotleft}");
TOKEN PE79 = PodEscapeToken("E<not>",		"{@Char logicalnot}");
TOKEN PE80 = PodEscapeToken("E<shy>",		"{@Char hyphen}");
TOKEN PE81 = PodEscapeToken("E<macr>",		"{@Char macron}");
TOKEN PE82 = PodEscapeToken("E<deg>",		"{@Char degree}");
TOKEN PE83 = PodEscapeToken("E<plusmn>",	"{@Char plusminus}");
TOKEN PE84 = PodEscapeToken("E<sup1>",		"{@Char onesuperior}");
TOKEN PE85 = PodEscapeToken("E<sup2>",		"{@Char twosuperior}");
TOKEN PE86 = PodEscapeToken("E<sup3>",		"{@Char threesuperior}");
TOKEN PE87 = PodEscapeToken("E<acute>",		"{@Char acute}");
TOKEN PE88 = PodEscapeToken("E<micro>",		"{@Char mu}");
TOKEN PE89 = PodEscapeToken("E<para>",		"{@ParSym}");
TOKEN PE90 = PodEscapeToken("E<middot>",	"{@Char periodcentered}");
TOKEN PE91 = PodEscapeToken("E<cedil>",		"{@Char cedilla}");
TOKEN PE92 = PodEscapeToken("E<ordm>",		"{@Char ordmasculine}");
TOKEN PE93 = PodEscapeToken("E<raquo>",		"{@Char guillemotright}");
TOKEN PE94 = PodEscapeToken("E<frac14>",	"{@Char onequarter}");
TOKEN PE95 = PodEscapeToken("E<frac12>",	"{@Char onehalf}");
TOKEN PE96 = PodEscapeToken("E<frac34>",	"{@Char threequarters}");
TOKEN PE97 = PodEscapeToken("E<iquest>",	"{@Char questiondown}");
TOKEN PE98 = PodEscapeToken("E<times>",		"{@Multiply}");
TOKEN PE99 = PodEscapeToken("E<divide>",	"{@Divide}");


/*****************************************************************************/
/*                                                                           */
/*  LANGUAGE - put your language declarations in this section.               */
/*                                                                           */
/*  The field names and their meanings are:                                  */
/*                                                                           */
/*     names           Set of alternative names for this languages           */
/*     setup_file      The default Lout setup file (e.g. "cprint", "eiffel") */
/*     lang_sym        The symbol for the language (e.g. "@CP", "@Eiffel")   */
/*     no_match        What to do if something fails to match (see below)    */
/*     tokens          Set of all tokens of this language                    */
/*     keywords        Set of all keywords for this language                 */
/*                                                                           */
/*  Acceptable values for no_match are:                                      */
/*                                                                           */
/*     NO_MATCH_ERROR  Generate an error message and skip the character.     */
/*                                                                           */
/*     NO_MATCH_PRINT  Print the character in a way that is Lout-safe; that  */
/*                     is, mostly raw but in quotes for "/", "@" etc., and   */
/*                     handling tabs and newlines appropriately.             */
/*                                                                           */
/*****************************************************************************/
#define	NO_MATCH_ERROR	1
#define	NO_MATCH_PRINT	2
#define NO_LANGUAGE	((LANGUAGE *) NULL)

typedef struct lang_rec {
  char *names[MAX_NAMES];
  char *setup_file;
  char *lang_sym;
  int no_match;
  TOKEN *tokens[MAX_TOKENS];
  char *keywords[MAX_KEYWORDS];
} LANGUAGE;


LANGUAGE CLanguage = {
  { "C", "c", "C++", "c++"  },
  "cprint", "@CP",
  NO_MATCH_ERROR,
  {
    &CStringToken, &CCharacterToken, &IdentifierToken, &NumberToken,
    &CCommentToken, &CCommentEscapeToken,
    &CPPCommentToken, &CPPCommentEscapeToken,
    &HashToken, &ExclamationToken, &PercentToken, &HatToken,
    &AmpersandToken, &StarToken, &LeftParenToken, &RightParenToken,
    &MinusToken, &PlusToken, &EqualToken, &LeftBraceToken, &RightBraceToken,
    &BarToken, &CircumToken, &LeftBracketToken, &RightBracketToken,
    &SemicolonToken, &ColonToken, &LessToken, &GreaterToken,
    &QuestionToken, &CommaToken, &DotToken, &SlashToken, &BackSlashToken,
    &ArrowToken, &LessEqualToken, &GreaterEqualToken, &CNotEqualToken
  },

  { "asm", "auto", "break", "case", "catch", "char", "class", "const", 
    "continue", "default", "delete", "do", "double", "else", "enum", "extern",
    "float", "for", "friend", "goto", "if", "inline", "int", "long", "new",
    "operator", "private", "protected", "public", "register", "return",
    "short", "signed", "sizeof", "static", "struct", "switch", "template",
    "this", "throw", "try", "typedef", "union", "unsigned", "virtual",
    "void", "volatile", "while", 

    /* these contributed by Isaac To <kkto@csis.hku.hk> */
    "bool", "wchar_t", "typeid", "typename", "false", "true", "const_cast",
    "dynamic_cast", "reinterpret_cast", "static_cast", "namespace", "using",
    "and", "and_eq", "bitand", "bitor", "compl", "not", "not_eq", "or",
    "or_eq", "xor", "xor_eq", "explicit", "export", "mutable",

  }
};


/* Tokens, keywords taken from the on-line documentation supplied with Python
 * 2.5 */
LANGUAGE PythonLanguage = {
  { "Python", "python"  },
  "python", "@Python",
  NO_MATCH_ERROR,
  { &BackSlashToken,
    &PythonDblStringToken, &PythonSnglStringToken,
    &PythonTriSnglStringToken, &PythonTriDblStringToken,
    &PythonCommentToken, &PythonCommentEscapeToken, &IdentifierToken,
    &NumberToken, &PlusToken, &MinusToken, &StarToken, &PythonPowerToken,
    &SlashToken, &PercentToken, &PythonBitLeftShiftToken,
    &PythonBitRightShiftToken, &AmpersandToken, &BarToken,
    &HatToken, &CircumToken, &LessToken, &GreaterToken,
    &LessEqualToken, &GreaterEqualToken, &BlueNotEqualToken,
    &CNotEqualToken,
    &LeftParenToken, &RightParenToken, &LeftBraceToken,
    &RightBraceToken, &LeftBracketToken, &RightBracketToken,
    &CommaToken, &ColonToken, &DotToken, &PythonBacktickToken,
    &EqualToken, &SemicolonToken, &PythonDecoratorToken, &DotDotDotToken
  },

  {
    /* Keywords */
    "and",       "del",       "for",       "is",        "raise",
    "as",	 "elif",      "from",      "lambda",    "return",
    "break",     "else",      "global",    "not",       "try",
    "class",     "except",    "if",        "or",        "while",
    "continue",  "exec",      "import",    "pass",	"with",
    "def",       "finally",   "in",        "print",	"yield",
    /* Built-ins */
    "False", "True", "None", "NotImplemented", "Ellipsis",
    /* Built-in Exceptions */
    "BaseException", "SystemExit", "KeyboardInterrupt", "Exception",
    "GeneratorExit", "StopIteration", "StandardError", "ArithmeticError",
    "FloatingPointError", "OverflowError", "ZeroDivisionError", "AssertionError",
    "AttributeError", "EnvironmentError", "IOError", "OSError",
    "WindowsError", "VMSError", "EOFError", "ImportError", "LookupError",
    "IndexError", "KeyError", "MemoryError", "NameError", "UnboundLocalError",
    "ReferenceError", "RuntimeError", "NotImplementedError", "SyntaxError",
    "IndentationError", "TabError", "SystemError", "TypeError", "ValueError",
    "UnicodeError", "UnicodeDecodeError", "UnicodeEncodeError",
    "UnicodeTranslateError",
    "Warning", "DeprecationWarning", "PendingDeprecationWarning",
    "RuntimeWarning", "SyntaxWarning", "UserWarning", "FutureWarning",
    "ImportWarning", "UnicodeWarning",
    /* Built-in Functions (excluding those designated "non-essential") */
    "__import__",
    "abs", "all", "any",
    "basestring", "bool",
    "callable", "chr", "classmethod", "cmp", "compile", "complex",
    "delattr", "dict", "dir", "divmod",
    "enumerate", "eval", "execfile",
    "file", "filter", "float", "frozenset",
    "getattr", "globals",
    "hasattr", "hash", "help", "hex",
    "id", "input", "int", "isinstance", "issubclass", "iter",
    "len", "list", "locals", "long",
    "map", "max", "min",
    "object", "oct", "open", "ord",
    "pow", "property",
    "range", "raw_input", "reduce", "reload", "repr", "reversed", "round",
    "set", "setattr", "slice", "sorted", "staticmethod", "str", "sum", "super",
    "tuple", "type",
    "unichr", "unicode",
    "vars",
    "xrange",
    "zip",
    /* Built-in Modules */
    /* This has been deleted because the original list was simply wrong.
       Python has a large library of modules but they are not built-in
       or part of the language per-se. */
  }
};


/*****************************************************************************/
/*                                                                           */
/*  Ruby                                                                     */
/*                                                                           */
/*****************************************************************************/

LANGUAGE RubyLanguage = {
  { "Ruby", "ruby" },
  "ruby", "@Ruby",
  NO_MATCH_ERROR,
  {
    &BackSlashToken,
    &PerlRegExpLPar, &PerlRegExpEq, &PerlRegExpMatch, &PerlRegExpNoMatch,
    &PerlRegExpSplit, &PerlRegExpIf, &PerlRegExpAnd, &PerlRegExpAnd2,
    &PerlRegExpOr, &PerlRegExpOr2, &PerlRegExpXor, &PerlRegExpNot,
    &PerlRegExpNot2, &PerlRegExpUnless,
    &PerlDoubleQuoteStringToken, &PerlSingleQuoteStringToken,
    &PerlBackQuoteStringToken, &RubyGenDelimStringToken,
    &RubyIdentifierToken, &NumberToken,
    &PerlCommentToken, &PerlCommentEscapeToken,
    &SemicolonToken, &CommaToken, &ColonToken, &EiffelDotToken,
    &HereEOTuq, &HereEOTdq, &HereEOTfq, &HereEOTbq,
    &HereEOFuq, &HereEOFdq, &HereEOFfq, &HereEOFbq,
    &HereENDuq, &HereENDdq, &HereENDfq, &HereENDbq,
    &HereBLAuq, &HereBLAdq, &HereBLAfq, &HereBLAbq,
    &ExclamationToken, &EqualToken, &CNotEqualToken, &LeftParenToken,
    &RightParenToken, &LeftBracketToken, &RightBracketToken, &LeftBraceToken,
    &RightBraceToken, &AssignToken, &QuestionAssignToken, &PlusToken,
    &MinusToken, &StarToken, &PercentToken, &HatToken, &SlashToken, &BarToken,
    &LessToken, &GreaterToken, &LessEqualToken, &CircumToken,
    &GreaterEqualToken
  },
  { "alias", "and", "begin", "break", "case", "catch", "class", "def", "do",
    "elsif", "else", "fail", "ensure", "for", "end", "if", "in", "module",
    "next", "not", "or", "raise", "redo", "rescue", "retry", "return", "then",
    "throw", "super", "unless", "undef", "until", "when", "while", "yield"
  }
};


/*****************************************************************************/
/*                                                                           */
/*  Eiffel and Blue                                                          */
/*                                                                           */
/*****************************************************************************/

LANGUAGE EiffelLanguage = {
  { "Eiffel", "eiffel" },
  "eiffel", "@Eiffel",
  NO_MATCH_ERROR,
  {
    &EiffelStringToken, &EiffelCharacterToken, &IdentifierToken, &NumberToken,
    &EiffelCommentToken, &EiffelCommentEscapeToken,
    &SemicolonToken, &CommaToken, &ColonToken, &EiffelDotToken,
    &ExclamationToken, &EqualToken, &EiffelNotEqualToken, &LeftParenToken,
    &RightParenToken, &LeftBracketToken, &RightBracketToken, &LeftBraceToken,
    &RightBraceToken, &AssignToken, &QuestionAssignToken, &PlusToken,
    &MinusToken, &StarToken, &DollarToken, &HatToken, &SlashToken,
    &BackSlashToken, &LessToken, &GreaterToken, &LessEqualToken,
    &GreaterEqualToken
  },
  { "alias", "all", "and", "as", "check", "class", "creation", "debug",
    "deferred", "do", "else", "elseif", "end", "ensure", "expanded", "export",
    "external", "false", "feature", "from", "frozen", "if", "implies",
    "indexing", "infix", "inherit", "inspect", "invariant", "is", "like",
    "local", "loop", "obsolete", "old", "once", "or", "prefix", "redefine",
    "rename", "require", "rescue", "retry", "select", "separate", "strip",
    "then", "true", "undefine", "unique", "until", "variant", "when", "xor",
    "not", "interface"
  }
};

LANGUAGE BlueLanguage = {
  { "Blue", "blue" },
  "blue", "@Blue",
  NO_MATCH_ERROR,
  {
    &CStringToken, &IdentifierToken, &NumberToken,
    &BlueCommentToken, &BlueCommentEscapeToken,
    &CommaToken, &LessToken, &GreaterToken, &ColonToken, &AssignToken,
    &LeftParenToken, &RightParenToken, &LeftBracketToken, &RightBracketToken,
    &QuestionAssignToken, &ExclamationToken, &EiffelDotToken, &ImpliesToken,
    &EqualToken, &BlueNotEqualToken, &LeftBraceToken, &RightBraceToken,
    &PlusToken, &MinusToken, &StarToken, &SlashToken, &HatToken,
    &LessEqualToken, &GreaterEqualToken
  },
  { "and", "assert", "builtin", "case", "class", "const", "create", "creation",
    "deferred", "div", "do", "else", "elseif", "end", "Enumeration",
    "enumeration", "exit", "if", "in", "interface", "internal", "invariant",
    "is", "loop", "manifest", "mod", "not", "of", "old", "on", "or", "post",
    "pre", "redefined", "return", "routines", "super", "then", "uses", "var"
  }
};


/*****************************************************************************/
/*                                                                           */
/*  Java                                                                     */
/*                                                                           */
/*****************************************************************************/

LANGUAGE JavaLanguage = {
  { "Java", "java"  },
  "java", "@Java",
  NO_MATCH_ERROR,
  {
    &CStringToken, &CCharacterToken, &IdentifierToken, &NumberToken,
    &CCommentToken, &CCommentEscapeToken,
    &CPPCommentToken, &CPPCommentEscapeToken,
    &HashToken, &ExclamationToken, &PercentToken, &HatToken,
    &AmpersandToken, &StarToken, &LeftParenToken, &RightParenToken,
    &MinusToken, &PlusToken, &EqualToken, &LeftBraceToken, &RightBraceToken,
    &BarToken, &CircumToken, &LeftBracketToken, &RightBracketToken,
    &SemicolonToken, &ColonToken, &LessToken, &GreaterToken,
    &QuestionToken, &CommaToken, &DotToken, &SlashToken, &BackSlashToken,
    &LessEqualToken, &GreaterEqualToken, &CNotEqualToken
  },

  { "abstract", "boolean", "break", "byte", "case", "catch", "char", "class",
    "const", "continue", "default", "do", "double", "else", "extends", "final",
    "finally", "float", "for", "goto", "if", "implements", "import", "instanceof",
    "int", "interface", "long", "native", "new", "package", "private", "protected",
    "public", "return", "short", "static", "strictfp", "super", "switch",
    "synchronized", "this", "throw", "throws", "transient", "try", "void",
    "volatile", "while",
  }
};


/*****************************************************************************/
/*                                                                           */
/*  Nonpareil (December 2002 - still evolving)                               */
/*                                                                           */
/*****************************************************************************/

LANGUAGE NonpareilLanguage = {
  { "Nonpareil", "nonpareil" },
  "nonpareil", "@Nonpareil",
  NO_MATCH_ERROR,
  {
    &CStringToken, &CCharacterToken, &IdentifierToken, &NumberToken,
    &CCommentToken, &CPPCommentToken, &PythonCommentEscapeToken,
    &MinusToken,
    &LeftBraceToken,
    &RightBraceToken,
    &LeftBracketToken,
    &LeftBracketBarToken,
    &RightBracketToken,
    &RightBracketBarToken,
    &CommaToken,
    &ColonToken,
    &AssignToken,
    &LeftParenToken,
    &RightParenToken,
    &EiffelDotToken,
    &NonpareilExclamationToken,
    &NonpareilDotDotToken,
    &DotDotDotToken,
    &NonpareilOperatorToken,
    &SemicolonToken
  },
  {
    "abstract", "break", "builtin", "case",
    "class", "coerce", "cometo", "continue", "default",
    "do", "else", "enum", "extension",
    "filter", "for", "fun", "goto",
    "if", "import", "include", "infix",
    "inherit", "is", "meet",
    "methods", "module", "operator", "postfix",
    "predefined", "prefix", "prefun", "private",
    "renames", "return", "self", "switch",
    "typedef", "typeobj", "upto", "while",

    /* not keywords, but conventionally set like them */
    "false", "true",
  }
};


/*****************************************************************************/
/*                                                                           */
/*  Haskell                                                                  */
/*                                                                           */
/*****************************************************************************/

LANGUAGE HaskellLanguage = {
  { "Haskell", "haskell" },
  "haskell", "@Haskell",
  NO_MATCH_ERROR,
  {
	/*&EqualToken, &PlusToken, &MinusToken, &DotToken,
	  &StarToken, &HaskellColonToken, 
	  &LessToken, &GreaterToken,
	  these overlap with HaskellOperatorToken */
    &HaskellStringToken, &HaskellCharacterToken,
    &HaskellIdentifierToken, &NumberToken,
    &HaskellLineCommentToken, &HaskellCommentToken, 
    &HaskellCommentEscapeToken, &HaskellLineCommentEscapeToken,
    &SemicolonToken, &CommaToken, &DoubleColonToken,
    &HaskellEquivalenceToken, &FunctionCompositionToken,
    &ArrowToken, &LeftArrowToken, &HaskellLambdaToken,
    &LeftParenToken, &RightParenToken, &LeftBracketToken, &RightBracketToken,
    &LeftBraceToken, &RightBraceToken, &EiffelNotEqualToken, &LessEqualToken,
    &ImpliesToken, &GreaterEqualToken, &HaskellConcatenationToken,
    &HaskellOperatorToken, &HaskellOrToken, &HaskellAndToken,
    &HaskellAtPatternToken
  },
  {
    "case", "class", "data", "default", "deriving", "do",
    "else", "if", "import", "in", "infix", "infixl", "infixr", "instance",
    "let", "mdo", "module", "newtype", "of", "then", "type", "where",

    "as", "hiding", "qualified",

    "True", "False"
  }
};


/*****************************************************************************/
/*                                                                           */
/*  RSL                                                                      */
/*                                                                           */
/*****************************************************************************/
/* Tokens, keywords taken from UNU/IIST Report No. 249 */

LANGUAGE RSLLanguage = {
  { "RSL", "rsl" },
  "rsl", "@RSL",
  NO_MATCH_ERROR,
  {
    &RSLIdentifierToken, &CommaToken, &EqualToken, &ColonToken,
    &LeftParenToken, &RightParenToken, &LeftBraceToken,
    &RightBraceToken, &EiffelDotToken, &CircumToken, &NumberToken,
    &SemicolonToken, &MinusToken, &LeftBracketToken,
    &RightBracketToken, &PlusToken, &BarToken, &CCommentToken, &HatToken,
    &SlashToken, &LessToken, &GreaterToken, &RSLPrimeToken,
    &RSLProductToken, &ArrowToken, &RSLPartialMapToken, &RSLAndToken,
    &RSLAlwaysToken, &LessEqualToken, &RSLIsInToken,
    &RSLSubsetToken, &RSLUnionToken, &RSLListStartToken, &RSLParToken,
    &RSLIntChoiceToken, &RSLTurnstileToken, &RSLListToken,
    &RSLPartialFnToken, &RSLRelationToken, &RSLOrToken,
    &GreaterEqualToken,
    &RSLNotIsInToken, &RSLProperSuperToken, &RSLInterToken,
    &RSLListEndToken, &RSLInterlockToken, &RSLLambdaToken,
    &RSLImplRelToken, &RSLInfListToken, &RSLMapToken, &ImpliesToken,
    &RSLSTToken, &RSLNotEqualToken, &RSLPowerToken,
    &RSLProperSubsetToken, &RSLSupersetToken, &RSLOverrideToken,
    &RSLMapletToken, &RSLExtChoiceToken, &RSLApplyToken,
    &RSLImplExprToken, &CCommentEscapeToken, &EiffelCommentToken,
    &EiffelCommentEscapeToken, &BackSlashToken, &RSLExistsOneToken, &StarToken
  },
  { "Bool", "Char", "Int", "Nat", "Real", "Text", "Unit", "abs", "any",
    "as", "axiom", "card", "case", "channel", "chaos", "class", "do",
    "dom", "elems", "else", "elsif", "end", "extend", "false", "for",
    "hd", "hide", "if", "in", "inds", "initialise", "int", "len", "let",
    "local", "object", "of", "out", "post", "pre", "read", "real",
    "rng", "scheme", "skip", "stop", "swap", "test_case", "then", "tl",
    "true", "type", "until", "use", "value", "variable", "while", "with",
    "write", "is", "exists", "all"
  }
};


/*****************************************************************************/
/*                                                                           */
/*  Perl and Pod                                                             */
/*                                                                           */
/*  We list here all keywords, special variables, predefined filehandles,    */
/*  and any other identifier that is "built-in".                             */
/*                                                                           */
/*****************************************************************************/

LANGUAGE PerlLanguage = {
  { "Perl", "perl", },
  "perl", "@Perl",
  NO_MATCH_ERROR,
  {
    &PerlSingleQuoteStringToken, &PerlDoubleQuoteStringToken,
    &PerlBackQuoteStringToken, &PerlQTypeStringToken, &PerlSTypeStringToken,
    &PerlRegExpLPar, &PerlRegExpEq, &PerlRegExpMatch, &PerlRegExpNoMatch,
    &PerlRegExpSplit, &PerlRegExpIf, &PerlRegExpAnd, &PerlRegExpAnd2,
    &PerlRegExpOr, &PerlRegExpOr2, &PerlRegExpXor, &PerlRegExpNot,
    &PerlRegExpNot2, &PerlRegExpUnless, &PerlRegExpFor, &PerlRegExpForEach,
    &PerlRegExpWhile, &PerlRegExpStartLineToken,
    &HereEOTuq, &HereEOTdq, &HereEOTfq, &HereEOTbq,
    &HereEOFuq, &HereEOFdq, &HereEOFfq, &HereEOFbq,
    &HereENDuq, &HereENDdq, &HereENDfq, &HereENDbq,
    &HereBLAuq, &HereBLAdq, &HereBLAfq, &HereBLAbq,
    &PerlIdentifierToken, &PerlSpecialIdentifierToken,
    &PerlLiteralNumberToken, &PerlHexNumberToken, &PerlBinaryNumberToken,
    &PerlCommentToken, &PerlCommentEscapeToken, &PerlPodToken,
    &ExclamationToken, &PercentToken, &HatToken, &AmpersandToken, 
    &StarToken, &SlashToken, &ArrowToken, &BackSlashToken, 
    &LeftParenToken, &RightParenToken, &MinusToken, &PlusToken, 
    &LeftBraceToken, &RightBraceToken, &BarToken, &CircumToken, 
    &LeftBracketToken, &RightBracketToken, &SemicolonToken, &ColonToken, 
    &LessToken, &GreaterToken, &QuestionToken, &CommaToken, &DotToken, 
    &LessEqualToken, &GreaterEqualToken, &CNotEqualToken, 
    &PerlIncrementToken, &PerlDecrementToken, &PerlExponentiateToken, 
    &PerlMatchToken, &PerlNotMatchToken,
    &PerlEqualToken, &PerlAssignToken, &PerlBitLeftShiftToken, 
    &PerlBitRightShiftToken, &PerlSpaceshipToken, 
    &PerlAndToken, &PerlOrToken, &PerlRange2Token, &PerlRange3Token, 
    &PerlFileTestrToken, &PerlFileTestwToken, &PerlFileTestxToken, 
    &PerlFileTestoToken, &PerlFileTestRToken, &PerlFileTestWToken, 
    &PerlFileTestXToken, &PerlFileTestOToken, &PerlFileTesteToken, 
    &PerlFileTestzToken, &PerlFileTestsToken, &PerlFileTestfToken, 
    &PerlFileTestdToken, &PerlFileTestlToken, &PerlFileTestpToken, 
    &PerlFileTestSToken, &PerlFileTestbToken, &PerlFileTestcToken, 
    &PerlFileTesttToken, &PerlFileTestuToken, &PerlFileTestgToken, 
    &PerlFileTestkToken, &PerlFileTestTToken, &PerlFileTestBToken, 
    &PerlFileTestMToken, &PerlFileTestAToken, &PerlFileTestCToken, 
  },

  { 
    /* Built-ins taken from WCS and on-line documentation for 5.6.0     */
    /* dbmopen and dbmclose are not included because they are obsolete. */
    "abs", "accept", "alarm", "atan2", 
    "bind", "binmode", "bless", 
    "caller", "can", "chdir", "chmod", "chomp", "chop", "chown", "chr", "chroot", 
    "close", "closedir", "connect", "continue", "cos", "crypt", 
    "defined", "delete", "die", "do", "dump", 
    "each", "endgrent", "endhostent", "endnetent", "endprotoent", 
    "endpwent", "endservent", "eof", "eval", "exec", "exists", "exit", 
    "exp", 
    "fcntl", "fileno", "flock", "fork", "format", "formline", 
    "getc", "getgrent", "getgrgid", "getgrnam", "gethostbyaddr", 
    "gethostbyname", "gethostent", "getlogin", "getnetbyaddr", 
    "getnetbyname", "getnetent", "getpeername", "getpgrp", "getppid", 
    "getpriority", "getprotobyname", "getprotobynumber", "getprotoent", 
    "getpwent", "getpwnam", "getpwuid", "getservbyname", "getservbyport", 
    "getservent", "getsockname", "getsockopt", "glob", "gmtime", "goto", 
    "grep", 
    "hex", 
    "import", "index", "int", "ioctl", "isa",
    "join", 
    "keys", "kill", 
    "last", "lc", "lcfirst", "length", "link", "listen", "local", 
    "localtime", "lock", "log", "lstat", 
    "map", "mkdir", "msgctl", "msgget", "msgrcv", "msgsnd", "my", 
    "next", "no", 
    "oct", "open", "opendir", "ord", "our",
    "pack", "package", "pipe", "pop", "pos", "print", "printf", "prototype", "push", 
    "quotemeta", 
    "rand", "read", "readdir", "readline", "readlink", "readpipe", "recv", 
    "redo", "ref", "rename", "require", "reset", "return", "reverse", 
    "rewinddir", "rindex", "rmdir", 
    "scalar", "seek", "seekdir", "select", "semctl", "semget", "semop", 
    "send", "setgrent", "sethostent", "setnetent", "setpgrp", 
    "setpriority", "setprotoent", "setpwent", "setservent", 
    "setsockopt", "shift", "shmctl", "shmget", "shmread", "shmwrite", 
    "shutdown", "sin", "sleep", "socket", "socketpair", "sort", 
    "splice", "split", "sprintf", "sqrt", "srand", "stat", "study", 
    "sub", "substr", "symlink", "syscall", "sysopen", "sysread", "sysseek",
    "system", "syswrite", 
    "tell", "telldir", "tie", "tied", "time", "times", "truncate", 
    "unimport",
    "uc", "ucfirst", "umask", "undef", "unlink", "unpack", "unshift", 
    "untie", "use", "utime", 
    "values", "vec", "VERSION",
    "wait", "waitpid", "wantarray", "warn", "write", 

    /* Comparison operators */
    "lt", "gt", "eq", "ne", "cmp", "le", "ge",

    /* Special markers & constants */
    "__DATA__", "__END__", "__FILE__", "__LINE__", "__PACKAGE__",

    /* Predefined filehandles */
    "ARGV", "ARGVOUT", "STDERR", "STDIN", "STDOUT", "DATA"

    /* Pragmas */
    "attributes", "autouse", "base", "blib", "bytes",
    "constant", "charnames", "diagnostics", "fields", "filetest",
    "integer", "less", "lib", "locale", 
    /* "open", Not listed here since its also a function */
    "ops", "overload", "re", "sigtrap", "strict", "subs", "utf8",
    "vars", "warnings",

    /* Low-precedence logical operators */
    "and", "or", "xor", "not", 

    /* The x keyword */
    "x",

    /* Control structures */
    "if", "elsif", /* yes one e */ "else", "unless", 
    "while", "for", "foreach", "continue", "until",

    /* Special subroutines */
    "AUTOLOAD", "BEGIN", "CHECK", "END", "DESTROY", "INIT", 

    /* Predefined classes & namespaces */
    "CORE", "GLOBAL", "UNIVERSAL", "SUPER", 

    /* Tie predefined subroutines */
    "TIESCALAR",
    "FETCH", "STORE",
    "TIEARRAY",
    "FETCHSIZE", "STORESIZE", "EXISTS", "DELETE",
    "CLEAR", "PUSH", "POP", "SHIFT", "UNSHIFT", "SPLICE", "EXTEND",
    "TIEHASH",
    "FIRSTKEY", "NEXTKEY"
    "TIEHANDLE",
    "PRINT", "PRINTF", "WRITE", "READLINE", "GETC", "READ", "CLOSE",
    "BINMODE", "OPEN", "EOF", "FILENO", "SEEK", "TELL",
  }
};


LANGUAGE PodLanguage = {
  { "Pod", "pod", "POD" },
  "pod", "@Pod",
  NO_MATCH_PRINT,
  {
    &PodVerbatimLineToken, &PodEmptyLineToken, &PodIgnoreToken,
    &PodHeading1Token, &PodHeading2Token, &PodHeading3Token,
    &PodOverToken, &PodItemToken, &PodBackToken,
    &PodItemBullet, &PodItem0, &PodItem1, &PodItem2, &PodItem3,
    &PodItem4, &PodItem5, &PodItem6, &PodItem7, &PodItem8, &PodItem9,
    &PodForToken, &PodBeginToken, &PodBeginLoutToken,
    &PodItalicToken, &PodBoldToken, &PodCodeToken, &PodFileToken,
    &PodNoBreakToken, &PodLinkToken, &PodIndexToken, &PodZeroToken,
    &PodLessThanToken, &PodGreaterThanToken, &PodSlashToken,
    &PodVerbarToken,
    &PE00, /* &PE01, &PE02, */ &PE03, &PE04, &PE05, &PE06, &PE07, &PE08, &PE09,
    &PE10, &PE11, &PE12, &PE13, &PE14, &PE15, &PE16, &PE17, &PE18, &PE19,
    &PE20, &PE21, &PE22, &PE23, &PE24, &PE25, &PE26, &PE27, &PE28, &PE29,
    &PE30, &PE31, &PE32, &PE33, &PE34, &PE35, &PE36, &PE37, &PE38, &PE39,
    &PE40, &PE41, &PE42, &PE43, &PE44, &PE45, &PE46, &PE47, &PE48, &PE49,
    &PE50, &PE51, &PE52, &PE53, &PE54, &PE55, &PE56, &PE57, &PE58, &PE59,
    &PE60, &PE61, &PE62, &PE63, &PE64, &PE65, &PE66, &PE67, &PE68, &PE69,
    &PE70, &PE71, &PE72, &PE73, &PE74, &PE75, &PE76, &PE77, &PE78, &PE79,
    &PE80, &PE81, &PE82, &PE83, &PE84, &PE85, &PE86, &PE87, &PE88, &PE89,
    &PE90, &PE91, &PE92, &PE93, &PE94, &PE95, &PE96, &PE97, &PE98, &PE99,
    &PodNumCharToken,
  },

  { NULL },
};

/*****************************************************************************/
/*                                                                           */
/*  JavaScript                                                               */
/*                                                                           */
/*  Based on ECMA-262 5th Edition December 2009                              */
/*                                                                           */
/*****************************************************************************/

LANGUAGE JavaScriptLanguage = {
  { "JavaScript", "javascript"  },
  "javascript", "@JavaScript",
  NO_MATCH_ERROR,
  {
    &CStringToken, /* "..." strings */
    &PythonSnglStringToken, /* '...' strings */
    &IdentifierToken, &MinusToken,
    &NumberToken, &CCommentToken, &CPPCommentToken,
    &CPPCommentEscapeToken, &AmpersandToken,
    &ExclamationToken, &PercentToken, &HatToken, &SlashToken,
    &BackSlashToken, &LeftParenToken, &RightParenToken, &LeftBraceToken,
    &RightBraceToken, &BarToken, &LeftBracketToken, &RightBracketToken,
    &SemicolonToken, &ColonToken, &CommaToken, &DotToken, &DollarToken,
    &PythonBitLeftShiftToken, &PythonBitRightShiftToken,
    &PlusToken, &EqualToken, &LessToken, &GreaterToken, &LessEqualToken,
    &GreaterEqualToken, &CNotEqualToken, &QuestionToken,
  },

  /* This includes Future Reserved Words */
  {"break", "case", "catch", "class", "const", "continue", "debugger", "default",
   "delete", "do", "else", "enum", "export", "extends", "finally", "for",
   "function", "if", "implements", "import", "in", "instanceof", "interface",
   "let", "new", "package", "private", "protected", "public", "return", "static",
   "super", "switch", "this", "throw", "try", "typeof", "var", "void", "while",
   "with", "yield",
  }
};


/*****************************************************************************/
/*                                                                           */
/*  Tcl                                                               */
/*                                                                           */
/*****************************************************************************/

LANGUAGE TclLanguage = {
  { "Tcl", "tcl"  },
  "tcl", "@Tcl",
  NO_MATCH_ERROR,
  {
    &CStringToken, /* "..." strings */
    &PythonSnglStringToken, /* '...' strings */
    &IdentifierToken, &MinusToken,
    &NumberToken, &PythonCommentToken, &PythonCommentEscapeToken,
    &ExclamationToken, &PercentToken, &HatToken, &AmpersandToken,
    &SlashToken, &BackSlashToken, &LeftParenToken, &RightParenToken,
    &LeftBraceToken, &RightBraceToken,
    &BarToken, &CircumToken, &LeftBracketToken, &RightBracketToken,
    &DollarToken, &CommaToken,
    &PlusToken, &EqualToken, &LessToken, &GreaterToken, &LessEqualToken,
    &GreaterEqualToken, &CNotEqualToken, &QuestionToken, &ColonToken,
    &DotToken,
  },

  /* Tcl has no reserved words, so we'll colour the same ones vim does */
  {"after", "append", "apply", "array", "auto_execok", "auto_import",
   "auto_load", "auto_mkindex", "auto_mkindex_old", "auto_qualify",
   "auto_reset", "bgerror", "binary", "catch", "cd", "chan", "clock",
   "close", "concat", "coroutine", "dde", "dict", "encoding", "eof",
   "error", "eval", "exec", "exit", "expr", "fblocked", "fconfigure",
   "fcopy", "file", "fileevent", "filename", "flush", "format", "gets",
   "glob", "global", "history", "incr", "info", "interp", "join",
   "lappend", "lassign", "lindex", "linsert", "list", "llength", "load",
   "lrange", "lrepeat", "lreplace", "lreverse", "lsearch", "lset", "lsort",
   "memory", "my", "namespace", "oo::copy", "oo::define", "oo::objdefine",
   "open", "package", "parray", "pid", "proc", "puts", "pwd", "read",
   "regexp", "registry", "regsub", "rename", "return", "scan", "seek",
   "self", "set", "socket", "source", "split", "string", "subst",
   "tailcall", "tcl_endOfWord", "tcl_findLibrary", "tcl_startOfNextWord",
   "tcl_startOfPreviousWord", "tcl_wordBreakAfter", "tcl_wordBreakBefore",
   "tell", "throw", "time", "trace", "unknown", "unload", "unset",
   "update", "uplevel", "upvar", "variable", "vwait", "yield",}
};


/*****************************************************************************/
/*                                                                           */
/*  The "languages" variable - add your language to this list                */
/*  in alphabetical order and before the concluding NO_LANGUAGE              */
/*                                                                           */
/*****************************************************************************/

LANGUAGE *languages[] = {
  & BlueLanguage,
  & CLanguage,
  & EiffelLanguage,
  & HaskellLanguage,
  & JavaLanguage,
  & JavaScriptLanguage,
  & NonpareilLanguage,
  & PerlLanguage,
  & PodLanguage,
  & PythonLanguage,
  & RSLLanguage,
  & RubyLanguage,
  & TclLanguage,
  NO_LANGUAGE
};


/*****************************************************************************/
/*****************************************************************************/
/*****************************************************************************/
/***                                                                       ***/
/***  If you are adding a new language, you don't need to change anything  ***/
/***  below this point.  Just repeating: don't change anything below here. ***/
/***                                                                       ***/
/*****************************************************************************/
/*****************************************************************************/
/*****************************************************************************/


/*****************************************************************************/
/*                                                                           */
/*  Global constants and variables                                           */
/*                                                                           */
/*****************************************************************************/
#define DEBUG_SETUP	0
#define DEBUG_PROCESS	0
#define DEBUG_TRIE	0
#define DEBUG_NEXTCHAR	0
#define DEBUG_PREFIXEQ	0
#define DEBUG_EMIT	0
#define DEBUG_MAIN	0

#define PRG2LOUT_VERSION "prg2lout Version 2.5 (November 2006)"
#define	MAX_LINE	1024

typedef enum {
  BLANKNUMBERED_NO,			/* blank lines have no line numbers  */
  BLANKNUMBERED_NOPRINT,		/* blank line numbers not printed    */
  BLANKNUMBERED_YES			/* blank line numbers printed        */
} BLANKNUMBERED_TYPE;

static char	file_name[MAX_LINE/2];	/* current input file name           */
static unsigned char curr_line[MAX_LINE]; /* current input line              */
static int	line_num;		/* current input line number         */
static int	line_pos;		/* current input column number       */
static bool	raw_seen;		/* true if -r (raw mode)             */

static bool	headers_option;		/* true if no -n option (headers)    */
static char	*style_option;		/* value of -p option, else null     */
static char	*font_option;		/* value of -f option, else null     */
static char	*size_option;		/* value of -s option, else null     */
static char	*line_option;		/* value of -v option, else null     */
static char	*bls_option;		/* value of -b option, else null     */
static char	*tabin_option;		/* value of -t option, else null     */
static char	*tabout_option;		/* value of -T option, else null     */
static char	*setup_option;		/* value of -S option, else null     */
static char	*language_option;	/* value of -l option, else null     */
static char	*numbered_option;	/* value of -L option, else null     */

static bool	tab_by_spacing;		/* true if using space chars to tab  */
static int	tab_in;			/* tab interval, value of -t option  */
static float	tab_out;		/* tab interval width (-T option)    */
static char 	tab_unit;		/* unit of measurement for tab       */
static bool 	print_lines;		/* true if we are printing line nums */
BLANKNUMBERED_TYPE blanknumbered;	/* blank line numbering              */
static int 	print_num;		/* current line num for printing     */

static FILE	*in_fp;			/* where input comes from	     */
static FILE	*out_fp;		/* where output goes		     */
static FILE	*err_fp;		/* where error messages go 	     */


/*****************************************************************************/
/*                                                                           */
/*  char *ErrorHeader()                                                      */
/*                                                                           */
/*  Returns a string showing the current file, line, and column.             */
/*                                                                           */
/*****************************************************************************/

char *ErrorHeader()
{ static char buff[MAX_LINE];
  if( line_num == 0 || line_pos == 0 )
    sprintf(buff, "prg2lout");
  else if( raw_seen )
    sprintf(buff, "prg2lout %d,%d", line_num, line_pos);
  else
    sprintf(buff, "prg2lout %s %d,%d", file_name, line_num, line_pos);
  return buff;
}


/*****************************************************************************/
/*                                                                           */
/*  GetArg(arg, message, null_ok)                                            */
/*                                                                           */
/*  Get the next command line argument's value into arg.  If there isn't     */
/*  one, print an error message and quit unless null_ok is true.             */
/*                                                                           */
/*****************************************************************************/

#define GetArg(arg, message, null_ok)					\
{ if( strcmp(argv[arg_pos]+2, "") != 0 )				\
    arg = argv[arg_pos]+2;						\
  else if( !null_ok && arg_pos < argc-1 && *argv[arg_pos+1] != '-' )	\
    arg = argv[++arg_pos];						\
  else if( null_ok )							\
    arg = (char *) NULL;						\
  else									\
  { fprintf(err_fp, "%s: %s\n", ErrorHeader(), message);		\
    exit(1);								\
  }									\
} /* end GetArg */


/*****************************************************************************/
/*                                                                           */
/*  char *EchoToken(TOKEN *t)                                                */
/*                                                                           */
/*  Print a brief resume of token t                                          */
/*                                                                           */
/*****************************************************************************/

char *EchoToken(TOKEN *t)
{ static char buff[MAX_LINE];
  if( t == (TOKEN *) NULL )
    sprintf(buff, "(NULL)");
  else
    sprintf(buff, "%s", t->name);
  return buff;
}


/*****************************************************************************/
/*                                                                           */
/*  NextChar()                                                               */
/*                                                                           */
/*  Move to next character in the input file.  This may involve changing     */
/*  global variables curr_line, line_num, and line_pos; the new character    */
/*  may be found in curr_line[line_pos].                                     */
/*                                                                           */
/*  NextChar does not skip any characters at all.  When end of file is       */
/*  reached, curr_line[line_pos] contains '\0'.                              */
/*                                                                           */
/*  It is possible for code to read ahead of curr_line[line_pos] up to and   */
/*  including the newline character at the end of the line after the line    */
/*  we are currently on (thus it is possible to recognize an empty line as   */
/*  \n\n), but not beyond, using curr_line[line_pos + i] for i > 0.          */
/*                                                                           */
/*****************************************************************************/

void NextChar()
{
  if( curr_line[line_pos] != '\n' )
  {
    /* we can carry on with the current line.  This will yield '\0' as */
    /* desired if EOF arrives before the end of the line               */
    line_pos++;
  }
  else if( curr_line[line_pos+1] != '\0' )
  {
    /* we've already read in the next line; it's at &curr_line[line_pos+1] */
    int len = strlen((char *) &curr_line[line_pos+1]);
    memmove(&curr_line[1], &curr_line[line_pos+1], len + 1);
    line_num++;
    line_pos = 1;
  }
  else
  {
    /* we need to read in the new line */
    line_num++;
    line_pos = 1;
    if( fgets((char *) &curr_line[1], MAX_LINE-2, in_fp) == (char *) NULL )
      curr_line[1] = '\0';
  }
  if( DEBUG_NEXTCHAR )
    fprintf(stderr, "after NextChar, line_num %d, line_pos %d, curr_line %s",
      line_num, line_pos, &curr_line[1]);
} /* end NextChar */


/*****************************************************************************/
/*                                                                           */
/*  bool InputMatches(char *pattern)                                         */
/*                                                                           */
/*  Returns true if input starting at curr_line[line_pos] matches pattern.   */
/*  To check this we may have to read an extra line or more of input.        */
/*                                                                           */
/*****************************************************************************/

bool InputMatches(unsigned char *pattern)
{ unsigned char *p, *q;
  for(p = &curr_line[line_pos], q = pattern;  *q != '\0';  p++, q++ )
  {
    if( *p == '\0' )
    {
      /* attempt to read another line of input, since we are off the end */
      if( fgets((char *) p, MAX_LINE-2-(p - curr_line), in_fp) == (char *) NULL )
      *p = '\0';
    }
    if( *p != *q )
      break;
  }
  if( DEBUG_PREFIXEQ )
    fprintf(stderr, "InputMatches(%s, %s) returning %s\n",
      &curr_line[line_pos], pattern, *q == '\0' ? "true" : "false");
  return (*q == '\0');
} /* end InputMatches */


/*****************************************************************************/
/*                                                                           */
/*  TRIE                                                                     */
/*                                                                           */
/*  We use a trie to match the input against the opening pattern of each     */
/*  token, since some tokens (e.g. <=, // etc.) have multi-character         */
/*  opening patterns.                                                        */
/*                                                                           */
/*****************************************************************************/

typedef struct trie_node {
  struct trie_node *sub[MAX_CHAR];
  TOKEN		   *value[MAX_CHAR];
} *TRIE;


/*****************************************************************************/
/*                                                                           */
/*  bool TrieInsert(&T, str, val)                                            */
/*                                                                           */
/*  Insert str into trie T.  May need a new root so pass T by reference.     */
/*  Return false if the insertion failed, either because the string was      */
/*  empty, or because it was the same as a previously inserted string.       */
/*                                                                           */
/*****************************************************************************/

bool TrieInsert(TRIE *T, unsigned char *str, TOKEN *val)
{ bool res;
  if( DEBUG_TRIE )
    fprintf(stderr, "[ TrieInsert(T, %s, %s)\n", str, EchoToken(val));
  if( *str == '\0' )
    res = false;
  else
  { if( *T == (TRIE) NULL )
      *T = (TRIE) calloc(1, sizeof(struct trie_node));  /* will set all to 0 */
    if( *(str + 1) != '\0' )
      res = TrieInsert(&((*T)->sub[(int) *str]), str + 1, val);
    else if( (*T)->value[(int) *str] != (TOKEN *) NULL )
      res = false;
    else
    { (*T)->value[(int) *str] = val;
      res = true;
    }
  }
  if( DEBUG_TRIE )
    fprintf(stderr, "] TrieInsert(T, %s, %s) returning %s\n", str,
      EchoToken(val), res ? "true" : "false");
  return res;
}

/*****************************************************************************/
/*                                                                           */
/*  TOKEN *TrieRetrieve(T, str, &len)                                        */
/*                                                                           */
/*  Find the longest prefix of string str in T.  If this is empty, return    */
/*  NULL.  If non-empty, return the corresponding value as the result, and   */
/*  the length of the prefix in *len.                                        */
/*                                                                           */
/*****************************************************************************/

TOKEN *TrieRetrieve(TRIE T, unsigned char *str, int *len)
{ TOKEN *res;  int i;
  if( DEBUG_TRIE )
    fprintf(stderr, "[ TrieRetrieve(T, %s, len)\n", str);
  res = (TOKEN *) NULL;
  *len = 0;
  for( i = 0;  T != (TRIE) NULL;  T = T->sub[(int) str[i]], i++ )
  { if( DEBUG_TRIE )
      fprintf(stderr, "  i = %d, res = %s\n", i, EchoToken(res));
    if( T->value[(int) str[i]] != (TOKEN *) NULL )
    { res = T->value[(int) str[i]];
      *len = i+1;
    }
  }
  if( DEBUG_TRIE )
    fprintf(stderr, "] TrieRetrieve returning (*len = %d) %s\n",
      *len, EchoToken(res));
  return res;
}


/*****************************************************************************/
/*                                                                           */
/*  HASH_TABLE                                                               */
/*                                                                           */
/*  We use a hash table to hold the keywords.  There is no associated        */
/*  value, we just want to know whether they are there or not.               */
/*                                                                           */
/*  NB MAX_SYM must be somewhat larger than the number of keywords.          */
/*                                                                           */
/*****************************************************************************/
#define MAX_SYM 609

static char *HashTable[MAX_SYM];		/* will initialze to NULL    */
static int HashTableCount = 0;			/* number of entries         */

static int hash(char *key)
{ int i, res;
  res = 0;
  for( i = 0;  key[i] != '\0';  i++ )
  { res += key[i];
  }
  return res % MAX_SYM;
} /* end hash */

void HashInsert(char *str)
{ int i;
  if( DEBUG_SETUP )
    fprintf(stderr, "[ HashInsert(%s)\n", str);
  if( HashTableCount >= MAX_SYM - 20 )
  {
    fprintf(err_fp, "%s internal error: full hash table (increase MAX_SYM)\n",
      ErrorHeader());
    abort();
  }
  for( i=hash(str); HashTable[i]!=(char *) NULL;  i = (i+1)%MAX_SYM );
  HashTable[i] = str;
  HashTableCount++;
  if( DEBUG_SETUP )
    fprintf(stderr, "] HashInsert(%s)\n", str);
}

bool HashRetrieve(char *str)
{ int i;
  for( i=hash(str); HashTable[i]!=(char *) NULL; i = (i+1)%MAX_SYM )
    if( strcmp( (char *) HashTable[i], (char *) str) == 0 )
      return true;
  return false;
}


/*****************************************************************************/
/*                                                                           */
/*  BACK END                                                                 */
/*                                                                           */
/*  This is the code that actually prints the output file.                   */
/*  To emit one token, the call sequence should be as follows:               */
/*                                                                           */
/*     StartEmit(LANGUAGE *lang, TOKEN *current_token,                       */
/*       unsigned char *start_delim, l)                                      */
/*     Emit(TOKEN *current_token, unsigned char ch)                          */
/*     ...                                                                   */
/*     Emit(TOKEN *current_token, unsigned char ch)                          */
/*     EndEmit(TOKEN *current_token, unsigned char *end_delim)               */
/*                                                                           */
/*  The back end will then take care of all print styles automatically,      */
/*  including checking for keywords.  When emitting white space each space   */
/*  can be sent directly:                                                    */
/*                                                                           */
/*     EmitRaw(ch)                                                           */
/*                                                                           */
/*****************************************************************************/

static unsigned char save_value[MAX_LINE];	/* the token text            */
static int	save_len;                       /* index of \0 in save_value */
static bool	save_on = false;		/* true when saving          */
static LANGUAGE	*save_language;			/* the current language      */
static int	out_linepos = 0;		/* output line position      */
static bool	out_linestart = true;		/* true if out line start    */
static bool	out_formfeed = false;		/* true if last was formfeed */
static int	brace_depth;			/* brace depth in verbatim   */

extern void Emit(TOKEN *current_token, unsigned char ch);

/*****************************************************************************/
/*                                                                           */
/*  EmitTab(int *out_linepos)                                                */
/*                                                                           */
/*  Emit one tab character, keeping track of where we are up to in           */
/*  *out_linepos.                                                            */
/*                                                                           */
/*****************************************************************************/

void EmitTab()
{
  if( tab_by_spacing )
  { putc(' ', out_fp);
    out_linepos++;
    while( out_linepos % tab_in != 0 )
    { putc(' ', out_fp);
      out_linepos++;
    }
  }
  else
  { out_linepos++;
    while( out_linepos % tab_in != 0 )  out_linepos++;
    if( out_linestart )
    { fprintf(out_fp, "$>\"%.1f%c\" {}", tab_out, tab_unit);
      /* NB {} is required in case nothing follows on this line */
    }
    else
      fprintf(out_fp, "$>\"%.1f%ct\" {}", (out_linepos/tab_in)*tab_out,
	tab_unit);
  }
  out_formfeed = false;
}


/*****************************************************************************/
/*                                                                           */
/*  EmitRaw(ch)                                                              */
/*                                                                           */
/*  Emit this character immediately.  This is only legal when not saving.    */
/*  All characters printed on the output file that represent actual text     */
/*  of the program (i.e. not commands, {}, "", \ in strings etc.) should     */
/*  pass through here, since EmitRaw keeps track of where we are on          */
/*  the output line, in order to handle tab characters correctly.            */
/*                                                                           */
/*  NB out_linepos is the column where the *next* character will go, and     */
/*  it counts the first column on the line as column zero.  It understands   */
/*  that a tab character always produces at least one space, and that the    */
/*  character after a tab goes in a column whose number mod tab_in is zero.  */
/*                                                                           */
/*****************************************************************************/

void EmitRaw(unsigned char ch)
{

  if( DEBUG_EMIT )
    fprintf(stderr, "EmitRaw(%c); out_linepos %d, out_linestart %s\n",
      ch, out_linepos, out_linestart ? "true" : "false");
  if( save_on )
  { fprintf(err_fp, "%s internal error (EmitRaw save_on)\n", ErrorHeader());
    abort();
  }

  /* drop empty lines following formfeed */
  if( out_formfeed && (ch == '\n' || ch == '\f') )
  {
    out_formfeed = (ch == '\f');
    return;
  }

  /* emit line number if required */
  if( print_lines && out_linepos == 0 )
  {
    char buff[20];
    if( out_formfeed )  print_num--;
    if( ch != '\n' || blanknumbered == BLANKNUMBERED_YES )
    {
      sprintf(buff, "%d", print_num);
      fprintf(out_fp, "@PL{\"%s\"}", buff);
      out_linepos += strlen(buff);
      out_linestart = false;
      EmitTab();
    }
    if( ch != '\n' || blanknumbered != BLANKNUMBERED_NO )
      print_num++;
  }

  switch( ch )
  {
    case ' ':

      fputc(ch, out_fp);
      out_linepos++;
      out_formfeed = false;
      break;


    case '\t':

      EmitTab();
      out_formfeed = false;
      break;


    case '\n':

      fputc(ch, out_fp);
      out_linepos = 0;
      out_linestart = true;
      out_formfeed = false;
      break;


    case '\f':

      fputs("\n@NP\n", out_fp);
      out_linepos = 0;
      out_linestart = true;
      out_formfeed = true;
      break;


    default:

      fputc(ch, out_fp);
      out_linepos++;
      out_linestart = false;
      out_formfeed = false;
      break;

  }
  if( DEBUG_EMIT )
    fprintf(stderr, "EmitRaw(%c) returning; out_linepos %d, out_linestart %s\n",
      ch, out_linepos, out_linestart ? "true" : "false");
} /* end EmitRaw */


/*****************************************************************************/
/*                                                                           */
/*  StartEmit(LANGUAGE *lang, TOKEN *current_token,                          */
/*    unsigned char *start_delim, len)                                       */
/*                                                                           */
/*  Start the emission of a token.  If it is a PRINT_WHOLE_QUOTED, it has    */
/*  to be saved since it might be a keyword.                                 */
/*                                                                           */
/*  The token began with the starting delimiter start_delim[0..len-1].       */
/*                                                                           */
/*****************************************************************************/

void StartEmit(LANGUAGE *lang, TOKEN *current_token,
  unsigned char *start_delim, int len)
{ int i;
  if( save_on )
  {
    fprintf(err_fp, "%s internal error (StartEmit)\n", ErrorHeader());
    abort();
  }
  save_language = lang;

  /* emit line number if required */
  if( print_lines && out_linepos == 0 )
  {
    char buff[20];
    if( out_formfeed )  print_num--;
    sprintf(buff, "%d", print_num);
    fprintf(out_fp, "@PL{\"%s\"}", buff);
    out_linepos += strlen(buff);
    out_linestart = false;
    EmitTab();
    print_num++;
  }

  switch( current_token->print_style )
  {
    case PRINT_WHOLE_QUOTED:

      /* start_delim is to be printed */
      save_on = true;
      save_len = 0;
      save_value[save_len] = '\0';
      for( i = 0;  i < len;  i++ )
	Emit(current_token, start_delim[i]);
      break;


    case PRINT_NODELIMS_QUOTED:

      /* like PRINT_WHOLE_QUOTED, but no delims */
      save_on = true;
      save_len = 0;
      save_value[save_len] = '\0';
      break;


    case PRINT_WHOLE_UNQUOTED:

      /* print command */
      if( current_token->command[0] != '\0' )
	fprintf(out_fp, "%s{", current_token->command); /*}*/

      /* print opening delimiter, verbatim */
      for( i = 0;  i < len;  i++ )
	putc(start_delim[i], out_fp);

      break;


    case PRINT_NODELIMS_UNQUOTED:

      /* command is printed but not delimiter */
      if( current_token->command[0] != '\0' )
	fprintf(out_fp, "%s{", current_token->command); /*}*/

      /* record that we are currently inside no braces in the verbatim text */
      brace_depth = 0;
      break;


    case PRINT_NODELIMS_INNER:

      /* command is printed but not delimiter; always print opening brace */
      fprintf(out_fp, "%s{", current_token->command); /*}*/
      break;


    case PRINT_COMMAND_ONLY:

      /* command is printed but nothing else */
      fprintf(out_fp, "%s", current_token->command);
      break;


    default:

      fprintf(err_fp, "%s internal error (print_style)\n", ErrorHeader());
      abort();
      break;
  }
} /* end StartEmit */


/*****************************************************************************/
/*                                                                           */
/*  EndEmit(TOKEN *current_token, unsigned char *end_delim)                  */
/*                                                                           */
/*  End emitting the current token.  Its ending delimiter was end_delim.     */
/*                                                                           */
/*****************************************************************************/
#define at_start_line(s, i) ((i) == 0 || s[(i)-1] == '\n' || s[(i)-1] == '\f' )

void EndEmit(TOKEN *current_token, unsigned char *end_delim)
{ unsigned char *com;
  int i;
  bool quoted_now = false;
  switch( current_token->print_style )
  {

    case PRINT_WHOLE_QUOTED:

      /* first, emit (i.e. save) ending delimiter */
      for( i = 0;  end_delim[i] != '\0';  i++ )
	Emit(current_token, end_delim[i]);
      /* NB NO BREAK */


    case PRINT_NODELIMS_QUOTED:

      /* work out whether we are printing the command or its alternative */
      com=(current_token->alternate_command[0]!='\0' &&
	HashRetrieve( (char *) save_value)?
	current_token->alternate_command : current_token->command);

      /* print command, opening brace */
      if( com[0] != '\0' )  fprintf(out_fp, "%s{", com);  /*}*/

      /* print the token with appropriate escapes */
      save_on = false;
      for( i = 0;  i < save_len;  i++ ) switch( save_value[i] )
      {
	case '@':
	case '/':
	case '|':
	case '&':
	case '#':
	case '{':
	case '}':
	case '^':
	case '~':
	case '-':
	case '.':
	case '\'':

	  if( !quoted_now )
	  { putc('"', out_fp);
	    quoted_now = true;
	  }
	  EmitRaw(save_value[i]);
	  break;


	case '"':
	case '\\':

	  if( !quoted_now )
	  { putc('"', out_fp);
	    quoted_now = true;
	  }
	  putc('\\', out_fp);
	  EmitRaw(save_value[i]);
	  break;


	case ' ':
	case '\t':

	  /* make initial white space significant using "" */
	  if( !quoted_now && at_start_line(save_value, i) )
	  {
	    putc('"', out_fp);
	    quoted_now = true;
	    out_linestart = false;
	  }

	  /* make sure we aren't in quoted text */
	  if( quoted_now )
	  { putc('"', out_fp);
	    quoted_now = false;
	  }

	  /* print the character */
	  EmitRaw(save_value[i]);
	  break;


	case '\n':
	case '\f':

	  /* these characters are not saved */
	  fprintf(err_fp, "%s internal error (EndEmit nl/ff)\n", ErrorHeader());
	  exit(1);
	  break;


	default:

	  /* anything else can be quoted or unquoted ad. lib. */
	  EmitRaw(save_value[i]);
	  break;

      }
      /* print closing quote and closing brace if needed */
      if( quoted_now )  putc('"', out_fp);
      else if( save_len > 0 && is_whitespace(save_value[save_len-1]) )
	fputs("\"\"", out_fp);  /* makes trailing white space significant */
      if( com[0] != '\0' )  /*{*/ putc('}', out_fp);
      break;


    case PRINT_WHOLE_UNQUOTED:

      /* print end delimiter, verbatim */
      fputs( (char *) end_delim, out_fp);
      /* NB NO BREAK */


    case PRINT_NODELIMS_UNQUOTED:

      /* print closing brace if required*/
      if( current_token->command[0] != '\0' )
      {
	if( brace_depth > 0 )
	{
	  if( brace_depth > 1 )
	    fprintf(err_fp, "%s: inserted %d closing braces at end of %s\n",
	      ErrorHeader(), brace_depth, current_token->name);
	  else
	    fprintf(err_fp, "%s: inserted one closing brace at end of %s\n",
	      ErrorHeader(), current_token->name);
	  while( brace_depth > 0 )
	  {
	    /*{*/ putc('}', out_fp);
	    brace_depth--;
	  }
	}
	/*{*/ putc('}', out_fp);
      }
      break;


    case PRINT_NODELIMS_INNER:

      /* always print closing brace */
      /*{*/ putc('}', out_fp);
      break;


    case PRINT_COMMAND_ONLY:

      break;


    default:

      fprintf(err_fp, "%s internal error (print_style)\n", ErrorHeader());
      abort();
      break;
  }

  /* print following command if any */
  if( current_token->following_command != NULL )
    fprintf(out_fp, "%s", current_token->following_command);
} /* end EndEmit */


/*****************************************************************************/
/*                                                                           */
/*  Emit(TOKEN *current_token, char ch)                                      */
/*                                                                           */
/*  Emit one character of the current token.                                 */
/*                                                                           */
/*****************************************************************************/

void Emit(TOKEN *current_token, unsigned char ch)
{
  switch( current_token->print_style )
  {
    case PRINT_WHOLE_QUOTED:
    case PRINT_NODELIMS_QUOTED:

      if( !save_on )
      { fprintf(err_fp, "%s internal error (EmitChar)\n", ErrorHeader());
	abort();
      }
      if( ch == '\n' || ch == '\f' )
      {
	/* could save newline too, but uses less memory if print now */
	EndEmit(current_token, U "");
	EmitRaw(ch);
	StartEmit(save_language, current_token, U "", 0);
      }
      else if( save_len < MAX_LINE - 1 )
      {
        save_value[save_len++] = ch;
        save_value[save_len] = '\0';
      }
      else
      {
	fprintf(err_fp, "%s internal error (token too long)\n", ErrorHeader());
	exit(1);
      }
      break;


    case PRINT_WHOLE_UNQUOTED:
    case PRINT_NODELIMS_UNQUOTED:

      /* keep trace of braces, and insert matching braces if required */
      if( ch == '{' )
	brace_depth++;
      else if( ch == '}' )
      {
	brace_depth--;
	if( brace_depth < 0 && current_token->command[0] != '\0' )
	{
	  fprintf(err_fp, "%s: inserted opening brace within %s\n",
	      ErrorHeader(), current_token->name);
	  putc('{', out_fp); /*}*/
	  brace_depth++;
	}
      }

      /* verbatim output */
      putc(ch, out_fp);
      break;


    case PRINT_NODELIMS_INNER:

      fprintf(err_fp, "%s internal error (emitting INNER)\n", ErrorHeader());
      abort();
      break;


    case PRINT_COMMAND_ONLY:

      /* emit nothing since printing the command only */
      break;


    default:

      fprintf(err_fp, "%s internal error (print_style)\n", ErrorHeader());
      abort();
      break;

  }
} /* end Emit */


/*****************************************************************************/
/*                                                                           */
/*  EmitProtected(unsigned char ch)                                          */
/*                                                                           */
/*  Emit one character of the current token.  If the character is a special  */
/*  one in Lout, protect it with quotes.                                     */
/*                                                                           */
/*****************************************************************************/

void EmitProtected(unsigned char ch)
{
  switch( ch )
  {
    case '@':
    case '/':
    case '|':
    case '&':
    case '#':
    case '{':
    case '}':
    case '^':
    case '~':
    case '-':

      putc('"', out_fp);
      EmitRaw(ch);
      putc('"', out_fp);
      break;


    case '"':
    case '\\':

      putc('"', out_fp);
      putc('\\', out_fp);
      EmitRaw(ch);
      putc('"', out_fp);
      break;


    default:

      EmitRaw(ch);
      break;
  }
} /* end EmitProtected */


/*****************************************************************************/
/*                                                                           */
/*  TOKEN *ExpandToken(TOKEN *t, int starts_pos)                             */
/*                                                                           */
/*  Create a new token corresponding to t but using starts2[starts_pos] and  */
/*  ends2[starts_pos] only.                                                  */
/*                                                                           */
/*****************************************************************************/

unsigned char *clone2strings(unsigned char *s1, unsigned char *s2)
{ unsigned char *res;
  res = (unsigned char *) malloc(
    (strlen( (char *) s1) + strlen( (char *) s2) + 1) * sizeof(unsigned char));
  sprintf( (char *) res, "%s%s", s1, s2);
  if( DEBUG_SETUP )
    fprintf(stderr, "clone2strings(%s, %s) = %s\n", s1, s2, res);
  return res;
} /* end clone2strings */

TOKEN *ExpandToken(TOKEN *t, int starts_pos)
{ TOKEN *res; int i;
  if( DEBUG_SETUP )
    fprintf(stderr, "ExpandToken(%s, starts[0] = %s)\n", t->name, t->starts[0]);
  res = (TOKEN *) calloc(1, sizeof(struct token_rec));
  res->name = t->name;
  res->print_style = t->print_style;
  res->command = t->command;
  res->alternate_command = t->alternate_command;
  res->following_command = t->following_command;
  res->start_line_only = t->start_line_only;
  for( i = 0;  t->starts[i] != NULL;  i++ )
  {
    /* the starts of res are the start of t with starts2[starts_pos] added */
    res->starts[i] = clone2strings(t->starts[i], t->starts2[starts_pos]);
  }
  res->legal = t->legal;
  res->escape = t->escape;
  res->escape_legal = t->escape_legal;
  res->inner_escape = t->inner_escape;
  res->end_inner_escape = t->end_inner_escape;
  res->bracket_delimiter = t->brackets2[starts_pos];
  res->end_delimiter = t->ends2[starts_pos];
  res->end_start_line_only = t->end_start_line_only;
  res->want_two_ends = t->want_two_ends;
  if( DEBUG_SETUP )
    fprintf(stderr, "ExpandToken returning res = %s, starts[0] = %s)\n",
      res->name, res->starts[0]);
  return res;
} /* end ExpandToken */


/*****************************************************************************/
/*                                                                           */
/*  void SetupOneToken(TOKEN *t)                                             */
/*                                                                           */
/*  Set up one token.  This involves initializing the chtype and             */
/*  escape_chtype fields for the token, and loading the trie with all        */
/*  the opening delimiters of the token.                                     */
/*                                                                           */
/*****************************************************************************/
#define LEGAL		1
#define ESCAPE		2
#define INNER_ESCAPE	3
TRIE Trie = (TRIE) NULL;		/* these tokens allowed anywhere     */
TRIE StartLineTrie = (TRIE) NULL;	/* these allowed at line start only  */

void SetupOneToken(TOKEN *t)
{ int j;
  if( DEBUG_SETUP )  fprintf(stderr, "SetupOneToken(%s)\n", t->starts[0]);

  /* check that any PRINT_NODELIMS_INNER styles have an end delimiter */
  if( t->print_style == PRINT_NODELIMS_INNER )
  { if( t->end_delimiter == NULL || t->end_delimiter[0] == '\0' )
    { fprintf(err_fp, "%s: token %s is INNER but has no end delimiter\n",
	t->name, ErrorHeader());
    }
  }

  /* set up the chtype table for this token */
  if( t->legal == NULL )  /* all characters are legal in this case */
    for( j = 0; j < MAX_CHAR; j++ )  t->chtype[j] = LEGAL;
  else /* the characters in t->legal are legal in this case */
    for( j = 0; t->legal[j] != '\0'; j++ ) t->chtype[(int) t->legal[j]] = LEGAL;
  if( t->escape[0] != '\0' )
    t->chtype[(int) t->escape[0]] = ESCAPE;
  if( t->inner_escape[0] != '\0' )
    t->chtype[(int) t->inner_escape[0]] = INNER_ESCAPE;

  /* set up the escape_chtype table for this token */
  if( t->escape_legal == NULL )
  { /* all characters are legal after an escape character */
    for( j = 0;  j < MAX_CHAR;  j++ ) t->escape_chtype[j] = LEGAL;
  }
  else
  { /* the characters in t->escape_legal are legal after an escape character */
    for( j = 0;  t->escape_legal[j] != '\0';  j++ )
      t->escape_chtype[(int) t->escape_legal[j]] = LEGAL;
  }

  /* load the opening delimiters of this token into the trie */
  for( j = 0;  t->starts[j] != (unsigned char *) NULL;  j++ )
  { if( !TrieInsert(t->start_line_only ? &StartLineTrie:&Trie,t->starts[j],t) )
    { if( *(t->starts[j]) == '\0' )
	fprintf(err_fp, "%s: empty starting delimiter\n", ErrorHeader());
      else
	fprintf(err_fp, "%s: starting delimiter %s appears twice\n",
	  ErrorHeader(), t->starts[j]);
    }
  }

  if( DEBUG_SETUP ) fprintf(stderr, "SetupOneToken ending %s\n", t->starts[0]);
} /* end SetupOneToken */


/*****************************************************************************/
/*                                                                           */
/*  SetupLanguage(LANGUAGE *lang)                                            */
/*                                                                           */
/*  Set up the runtime token structures.  This involves setting up each      */
/*  token (see above), and also loading the hash table with the keywords.    */
/*                                                                           */
/*  If a token has non-empty start2 and end2 pairs, it is expanded into      */
/*  a set of tokens, one for each pair, with the start delimiter set to      */
/*  the concatenation of the start delimiters and starts2, and end           */
/*  delimiter set to the corresponding end2.                                 */
/*                                                                           */
/*****************************************************************************/

void SetupLanguage(LANGUAGE *lang)
{ int i, j; TOKEN *t;
  if( DEBUG_SETUP )
    fprintf(stderr, "SetupLanguage(%s)\n", lang->names[0]);

  /* set up each token in the language */
  for( i = 0;  lang->tokens[i] != (TOKEN *) NULL; i++ )
  {
    if( DEBUG_SETUP )
      fprintf(stderr, "  (1) setting up token %s (starts[0] = %s)\n",
	lang->tokens[i]->name, lang->tokens[i]->starts[0]);
    if( lang->tokens[i]->starts2[0] != NULL )
    {
      /* starts2, so set up one token for each entry in starts[2] */
      for( j = 0;  lang->tokens[i]->starts2[j] != NULL;  j++ )
      {
	t = ExpandToken(lang->tokens[i], j);
	if( DEBUG_SETUP )
	  fprintf(stderr, "  (2) setting up token %s (starts[0] = %s)\n",
	    t->name, t->starts[0]);
	SetupOneToken(t);
      }
    }
    else
    {
      /* no starts2, so set up just one token */
      SetupOneToken(lang->tokens[i]);
    }
  }

  /* load the keyword hash table */
  for( j = 0;  lang->keywords[j] != NULL;  j++ )
    HashInsert(lang->keywords[j]);

  if( DEBUG_SETUP )
    fprintf(stderr, "SetupLanguage(%s) returning.\n", lang->names[0]);
} /* end SetupLanguage */


/*****************************************************************************/
/*                                                                           */
/*  bool Printable(unsigned char ch)                                         */
/*                                                                           */
/*  Return true if ch is a printable character.  Used only by error          */
/*  messages so can be slow.                                                 */
/*                                                                           */
/*****************************************************************************/

bool Printable(unsigned char ch)
{ unsigned char *p;
  for( p = AllPrintable;  *p != '\0' && *p != ch;  p++ );
  return (*p == ch);
} /* end Printable */


/*****************************************************************************/
/*                                                                           */
/*  TOKEN *TokenStartingHere(int *len)                                       */
/*                                                                           */
/*  Returns the token starting here if there is one, else NULL.              */
/*  If found, the length of its starting delimiter is returned in *len.      */
/*                                                                           */
/*****************************************************************************/

TOKEN *TokenStartingHere(int *len)
{ TOKEN *res;

  if( line_pos == 1 )
  {
    res = TrieRetrieve(StartLineTrie, &curr_line[line_pos], len);
    if( res == (TOKEN *) NULL )
      res = TrieRetrieve(Trie, &curr_line[line_pos], len);
  }
  else
  {
    res = TrieRetrieve(Trie, &curr_line[line_pos], len);
  }
  return res;
}


/*****************************************************************************/
/*                                                                           */
/*  int Matching()                                                           */
/*                                                                           */
/*  Return the index of the pair that matches the current input.             */
/*                                                                           */
/*****************************************************************************/

int Matching()
{ int i;
  for( i = 0; pairs[i].first != NULL && !InputMatches(pairs[i].first); i++ );
  if( DEBUG_PROCESS )
    fprintf(stderr, "Matching() = %d (\"%s\", \"%s\")\n", i,
      pairs[i].first  == NULL ? "NULL" : (char *) pairs[i].first,
      pairs[i].second == NULL ? "NULL" : (char *) pairs[i].second);
  return i;
}


/*****************************************************************************/
/*                                                                           */
/*  Process(LANGUAGE *lang, TOKEN *outer_token,                              */
/*    unsigned char *outer_end_delimiter)                                    */
/*                                                                           */
/*  Process a sequence of input tokens.  If we are currently recursing       */
/*  inside some other token, outer_token is non-null and is that token,      */
/*  and we stop when we reach outer_end_delimiter outside any token.         */
/*  Otherwise we stop at end of file.                                        */
/*                                                                           */
/*****************************************************************************/
#define	START				1
#define	IN_TOKEN			2
#define	IN_TOKEN_NEEDING_DELIM		3
#define	IN_TOKEN_AFTER_ESCAPE		4
#define	IN_TOKEN_AFTER_INNER_ESCAPE	5
#define	STOP				6

char *debug_state(int s)
{
  switch( s )
  {
    case START:				return "START";
    case IN_TOKEN:			return "IN_TOKEN";
    case IN_TOKEN_NEEDING_DELIM:	return "IN_TOKEN_NEEDING_DELIM";
    case IN_TOKEN_AFTER_ESCAPE:		return "IN_TOKEN_AFTER_ESCAPE";
    case IN_TOKEN_AFTER_INNER_ESCAPE:	return "IN_TOKEN_AFTER_INNER_ESCAPE";
    case STOP:				return "STOP";
    default:				return "?";
  }
}

void Process(LANGUAGE *lang, TOKEN *outer_token,
  unsigned char *outer_end_delimiter)
{ TOKEN *current_token = (TOKEN *) NULL; int len, i, state;
  int end_delimiter_depth = 0, end_delimiter_count = 0;
  unsigned char *curr_end_delim = U "", *curr_bracket_delim = U "";
  if( DEBUG_PROCESS )
    fprintf(stderr, "[ Process(%s, -, -, -, -)\n", lang->names[0]);

  state = START;
  while( curr_line[line_pos] != '\0' && state != STOP )
  { 
    if( DEBUG_PROCESS )
    {
      if( state >= IN_TOKEN )
	fprintf(stderr,
	  "  %s, depth %d, count %d, bracket \"%s\", end \"%s\", ch %c\n",
	  debug_state(state), end_delimiter_depth, end_delimiter_count,
	  curr_bracket_delim, curr_end_delim, curr_line[line_pos]);
      else
	fprintf(stderr, "  %s, ch %c\n",
	  debug_state(state), curr_line[line_pos]);
    }

    switch( state )
    {

      case START:  /* between tokens; try each of the following */

	/* check whether outer_token is ending here, in which case stop */
	if( outer_token != (TOKEN *) NULL &&
	    curr_line[line_pos] == outer_end_delimiter[0] &&
	    InputMatches(outer_end_delimiter) )
	{
	  len = strlen( (char *) outer_end_delimiter);
	  for( i = 0;  i < len;  i++ )
	    NextChar();
	  state = STOP;
	}

	/* check whether a token is starting here, in which case start it */
	else if( (current_token = TokenStartingHere(&len)) != (TOKEN *) NULL )
	{
	  if( DEBUG_PROCESS )
	  { fprintf(stderr, "current_token (len = %d): %s\n",
	      len, EchoToken(current_token));
	  }
	  StartEmit(lang, current_token, &curr_line[line_pos], len);

	  /* skip the starting delimiter */
	  for( i = 0;  i < len;  i++ )
	    NextChar();

	  /* we are now either in a token, or else we have to start an inner */
	  if( current_token->print_style == PRINT_NODELIMS_INNER )
	  {
	    Process(lang, current_token, current_token->end_delimiter);
	    EndEmit(current_token, U "");
	  }
	  else
	  {
	    end_delimiter_depth = 1;
	    end_delimiter_count = current_token->want_two_ends ? 2 : 1;
	    curr_end_delim = current_token->end_delimiter;
	    curr_bracket_delim = current_token->bracket_delimiter;
	    state = IN_TOKEN;
	  }
	}

	/* check whether we have a space */
	else if( is_whitespace(curr_line[line_pos]) )
	{
	  EmitRaw(curr_line[line_pos]);
	  NextChar();
	}

	/* check whether we are supposed to echo things that don't match */
	else if( lang->no_match == NO_MATCH_PRINT )
	{
	  EmitProtected(curr_line[line_pos]);
	  NextChar();
	}

	/* finally, we have an error and must skip the character */
	else if( lang->no_match == NO_MATCH_ERROR )
	{
	  if( Printable(curr_line[line_pos]) )
	    fprintf(err_fp, "%s: skipping unexpected %c character\n",
	      ErrorHeader(), curr_line[line_pos]);
	  else
	    fprintf(err_fp, "%s: %s (octal %o)\n",
	      ErrorHeader(), "skipping unexpected unprintable character",
	      (int) curr_line[line_pos]);
	  NextChar();
	}
	else
	{
	  fprintf(err_fp, "%s internal error: lang->no_match\n", ErrorHeader());
	  exit(1);
	}
	break;


      case IN_TOKEN:  /* within a token; current_token says which kind */

	/* check for ending delimiter if there is one */
	if( curr_end_delim[0] != '\0' &&
	    (!current_token->end_start_line_only || line_pos == 1) &&
	    InputMatches(curr_end_delim) )
	{
	  end_delimiter_depth--;
	  if( DEBUG_PROCESS )
	    fprintf(stderr, "  InputMatches(%s) so end_delimiter_depth--\n",
	      curr_end_delim);
	  if( end_delimiter_depth > 0 )
	  {
	    /* if this end delimiter matches with a bracketing delimiter,  */
	    /* so is not the end of the token, emit the char and carry on  */
	    Emit(current_token, curr_line[line_pos]);
	    NextChar();
	  }
	  else
	  {
	    end_delimiter_count--;
	    if( DEBUG_PROCESS )
	      fprintf(stderr, "  InputMatches(%s) so end_delimiter_count--\n",
	        curr_end_delim);
	    if( end_delimiter_count == 0 )
	    {
	      /* seen all the end delimiters we need, so token ends */
	      len = strlen( (char *) curr_end_delim);
	      for( i = 0;  i < len;  i++ )
	        NextChar();
	      EndEmit(current_token, curr_end_delim);
	      state = START;
	    }
	    else
	    {
	      /* need more end delimiters yet, so keep scanning */
	      Emit(current_token, curr_line[line_pos]);
	      NextChar();
	      if( curr_bracket_delim[0] != '\0' )
	        state = IN_TOKEN_NEEDING_DELIM;
	      else
	        state = IN_TOKEN;
	    }
	  }
	}
	else
	{
	  /* check for bracketing delimiter if there is one */
	  if( curr_bracket_delim[0] != '\0' &&
	    InputMatches(curr_bracket_delim) )
	  {
	    if( DEBUG_PROCESS )
	      fprintf(stderr, "  InputMatches(%s) so end_delimiter_depth++\n",
	        curr_bracket_delim);
	    end_delimiter_depth++;
	  }
	
	  /* handle current character as usual */
	  switch( current_token->chtype[(int) curr_line[line_pos]] )
	  {

	    case LEGAL:

	      Emit(current_token, curr_line[line_pos]);
	      NextChar();
	      break;


	    case ESCAPE:

	      NextChar();
	      state = IN_TOKEN_AFTER_ESCAPE;
	      break;


	    case INNER_ESCAPE:

	      EndEmit(current_token, U "");
	      NextChar();
	      Process(lang, current_token, current_token->end_inner_escape);
	      state = IN_TOKEN_AFTER_INNER_ESCAPE;
	      break;


	    default:

	      if( curr_end_delim[0] != '\0' )
	      {
	        /* error: token ends at delimiter, not unexpected character */
	        if( Printable(curr_line[line_pos]) )
	          fprintf(err_fp,
		    "%s: skipping %c character (not allowed in %s)\n",
		    ErrorHeader(), curr_line[line_pos], current_token->name);
	        else if( curr_line[line_pos] == '\t' )
	          fprintf(err_fp,
		    "%s: skipping tab character (not allowed in %s)\n",
		    ErrorHeader(), current_token->name);
		else if( curr_line[line_pos] == '\n' )
	          fprintf(err_fp,
		    "%s: skipping newline character (not allowed in %s)\n",
		    ErrorHeader(), current_token->name);
		else if( curr_line[line_pos] == '\f' )
	          fprintf(err_fp,
		    "%s: skipping formfeed character (not allowed in %s)\n",
		    ErrorHeader(), current_token->name);
		else
	          fprintf(err_fp, "%s: %s, octal code %o (not allowed in %s)\n",
		    ErrorHeader(), "skipping unprintable character",
		    (unsigned) curr_line[line_pos], current_token->name);
	        NextChar();
	      }
	      else
	      {
	        /* normal termination after last legal character */
	        EndEmit(current_token, U "");
	        state = START;
	      }
	      break;


	  }
	}
	break;


      case IN_TOKEN_NEEDING_DELIM:  /* within a token looking for delim */

	/* looking for either a white space or a new matching delim */
	switch( curr_line[line_pos] )
	{
	  case ' ':
	  case '\t':
	  case '\n':
	  case '\f':

	    Emit(current_token, curr_line[line_pos]);
	    NextChar();
	    break;


	  default:

	    /* had better match */
	    i = Matching();
	    if( pairs[i].first == NULL )
	    {
	      /* this is not a suitable new start for delimiters */
	      fprintf(err_fp, "%s: expected new delimiter here, found %c\n",
		ErrorHeader(), curr_line[line_pos]);
	      exit(0);
	    }
	    curr_bracket_delim = pairs[i].first;
	    curr_end_delim = pairs[i].second;
	    Emit(current_token, curr_line[line_pos]);
	    NextChar();
	    end_delimiter_depth++;
	    state = IN_TOKEN;
	    break;

	}
	break;
      

      case IN_TOKEN_AFTER_ESCAPE:

	if( current_token->escape_chtype[(int) curr_line[line_pos]] == LEGAL )
	{
	  Emit(current_token, current_token->escape[0]);
	  Emit(current_token, curr_line[line_pos]);
	}
	else
	{
	  if( Printable(curr_line[line_pos]) )
	    fprintf(err_fp,"%s: skipping %c%c in %s, since %c not legal here\n",
	      ErrorHeader(), current_token->escape[0], curr_line[line_pos],
	      current_token->name, curr_line[line_pos]);
	  else
	    fprintf(err_fp,
	      "%s: skipping %c and %s (octal %o)\n", ErrorHeader(),
	      current_token->escape[0], "unprintable unexpected character",
	       (int) curr_line[line_pos]);
	}
	NextChar();
	state = IN_TOKEN;
	break;


      case IN_TOKEN_AFTER_INNER_ESCAPE:

	/* ending delimiter of inner escape has been read over */
	StartEmit(lang, current_token, U "", 0);
	state = IN_TOKEN;
	break;


      default:

	fprintf(err_fp, "%s internal error (state = %d)\n",
	  ErrorHeader(), state);
	abort();
	break;
    }
  }

  /* at end, need to tidy up any residual messiness */
  switch( state )
  {

    case START:
    case STOP:

      /* we stopped outside any token, or after an escape */
      break;


    case IN_TOKEN:

      /* we stopped in a token (only a problem if it ends with a delimiter) */
      if( current_token->end_delimiter[0] != '\0' )
      {
	if( outer_token == (TOKEN *) NULL )
	  fprintf(err_fp, "%s: program text ended within %s\n", 
	    ErrorHeader(), current_token->name);
	else
	  fprintf(err_fp, "%s: %s token ended within %s\n",
	    ErrorHeader(), outer_token->name, current_token->name);
	EndEmit(current_token, U "");
      }
      break;


    case IN_TOKEN_NEEDING_DELIM:

      /* we stopped in a token at a point where we were looking for a delim */
      if( outer_token == (TOKEN *) NULL )
	fprintf(err_fp, "%s: program text ended within %s\n", 
	  ErrorHeader(), current_token->name);
      else
	fprintf(err_fp, "%s: %s token ended within %s\n",
	  ErrorHeader(), outer_token->name, current_token->name);
      EndEmit(current_token, U "");
      break;


    case IN_TOKEN_AFTER_ESCAPE:

      /* we stopped after the escape character */
      fprintf(err_fp, "%s: skipping %c at end of program text\n",
	ErrorHeader(), current_token->escape[0]);
      EndEmit(current_token, U "");
      break;


    case IN_TOKEN_AFTER_INNER_ESCAPE:

      /* we stopped after an inner escape (NB no EndEmit in this case) */
      if( current_token->end_delimiter[0] != '\0' )
      {
	if( outer_token == (TOKEN *) NULL )
	  fprintf(err_fp, "%s: program text ended within %s after escape\n", 
	    ErrorHeader(), current_token->name);
	else
	  fprintf(err_fp, "%s: %s token ended within %s after escape\n",
	    ErrorHeader(), outer_token->name, current_token->name);
      }
      break;


    default:

      fprintf(err_fp, "%s: internal error (state %d)\n",
	ErrorHeader(), state);
      abort();
      break;

  }
} /* end Process */


/*****************************************************************************/
/*                                                                           */
/*  PrintUsage()                                                             */
/*                                                                           */
/*  Print usage message on file err_fp.                                      */
/*                                                                           */
/*****************************************************************************/

void PrintUsage()
{ int i;
  fprintf(err_fp, "\n");
  fprintf(err_fp, "usage: prg2lout <options> <files>\n\n");
  fprintf(err_fp, "where <options> can be\n");
  fprintf(err_fp, "\n");
  fprintf(err_fp, "    -r           raw mode (used within Lout only)\n");
  fprintf(err_fp, "    -i<file>     take input from <file>\n");
  fprintf(err_fp, "    -o<file>     send output to <file>\n");
  fprintf(err_fp, "    -e<file>     send error messages to <file>\n");
  fprintf(err_fp, "    -l<language> input is in this programming language\n");
  fprintf(err_fp, "    -p<style>    print style: fixed, varying, symbol\n");
  fprintf(err_fp, "    -f<family>   font family (e.g. Times)\n");
  fprintf(err_fp, "    -s<size>     font size (e.g. 10p or 12p)\n");
  fprintf(err_fp, "    -v<space>    line spacing (e.g. 1.1fx)\n");
  fprintf(err_fp, "    -t<num>      tab interval (e.g. 8 is default)\n");
  fprintf(err_fp, "    -T<dist>     output tab interval (e.g. 0.5i)\n");
  fprintf(err_fp, "    -S<file>     use this as the setup file\n");
  fprintf(err_fp, "    -L<num>      number lines from <num> (default is 1)\n");
  fprintf(err_fp, "    -n           no file names as page headers\n");
  fprintf(err_fp, "    -V           print version information and exit\n");
  fprintf(err_fp, "    -u           print this usage message and exit\n");
  fprintf(err_fp, "\n");
  fprintf(err_fp, "and <language> (which is compulsory) can be any one of:\n\n");
  for( i = 0;  languages[i] != (LANGUAGE *) NULL;  i++ )
    fprintf(err_fp, "    %s\n", languages[i]->names[0]);
  fprintf(err_fp, "\n");
  fprintf(err_fp, "The values of all formatting options not given are\n");
  fprintf(err_fp, "taken from the setup file: either the file given after\n");
  fprintf(err_fp, "-S, or the system default setup file for this language\n");
  fprintf(err_fp, "if there is no -S option.\n");
  fprintf(err_fp, "\n");
} /* end PrintUsage */


/*****************************************************************************/
/*                                                                           */
/*  main(argc, argv)                                                         */
/*                                                                           */
/*  Read command line and either process each file in turn, or, in the       */
/*  raw case, do the actual conversion of one file.                          */
/*                                                                           */
/*****************************************************************************/

int main(int argc, char *argv[])
{ int i, j, arg_pos;  char *infilename, *outfilename, *errfilename;
  LANGUAGE *lang = NO_LANGUAGE;
  char *file_names[1024];  int file_count = 0;

  /* echo command line */
  if( DEBUG_MAIN )
  {
    for( i = 0;  i < argc;  i++ )
      fprintf(stderr, i == 0 ? "%s" : " %s", argv[i]);
    fprintf(stderr, "\n\n");
  }

  /* read command line */
  in_fp = out_fp = (FILE *) NULL;
  err_fp = stderr;
  line_num = line_pos = 0;
  raw_seen = false;
  tab_by_spacing = true;
  tab_in = 8;
  tab_out = 3;
  tab_unit = 'f';
  print_lines = false;
  blanknumbered = BLANKNUMBERED_YES;
  numbered_option = NULL;
  headers_option = true;
  style_option = font_option = size_option = line_option = bls_option =
    tabin_option = tabout_option = setup_option = language_option =(char *)NULL;
  if( argc == 1 )
  { PrintUsage();
    exit(1);
  }
  for( arg_pos = 1;  arg_pos < argc;  arg_pos++ )
  {
    if( DEBUG_SETUP )
      fprintf(stderr, "examining argument %d = \"%s\"\n",
	arg_pos, argv[arg_pos]);
    if( *argv[arg_pos] == '-' ) switch( *(argv[arg_pos]+1) )
    {
      case 'r':

	if( arg_pos > 1 )
	{ fprintf(err_fp, "%s: -r must be first if it occurs at all\n",
	    ErrorHeader());
	  exit(1);
	}
	raw_seen = true;
	break;


      case 'i':
     
	/* read name of input file */
	if( !raw_seen )
	{ fprintf(err_fp, "%s: -i illegal without -r\n", ErrorHeader());
	  exit(1);
	}
	if( in_fp != NULL )
	{ fprintf(err_fp, "%s: -i seen twice\n", ErrorHeader());
	  exit(1);
	}
	GetArg(infilename, "usage: -i<filename>", false);

	/* open the file */
	in_fp = fopen(infilename, "r");
	if( in_fp == NULL )
	{ fprintf(err_fp, "%s: cannot open input file %s\n",
	    ErrorHeader(), infilename);
	  exit(1);
	}

	/* initialize file position */
	strcpy(file_name, infilename);
	line_num = 1;
	line_pos = 0;
	break;


      case 'o':
     
	/* read name of output file */
	if( out_fp != NULL )
	{ fprintf(err_fp, "%s: -o seen twice\n", ErrorHeader());
	  exit(1);
	}
	GetArg(outfilename, "usage: -o<filename>", false);
	out_fp = fopen(outfilename, "w");
	if( out_fp == NULL )
	{ fprintf(err_fp, "%s: cannot open output file %s\n",
	    ErrorHeader(), outfilename);
	  exit(1);
	}
	break;


      case 'e':
     
	/* read name of error file */
	GetArg(errfilename, "usage: -e<filename>", false);
	err_fp = fopen(errfilename, "w");
	if( err_fp == NULL )
	{ fprintf(stderr, "%s: cannot open error file %s",
	    ErrorHeader(), errfilename);
	  exit(1);
	}
	break;


      case 'p':
     
	/* read print style */
	if( raw_seen )
	{ fprintf(err_fp, "%s: -p illegal with -r option\n", ErrorHeader());
	  exit(1);
	}
	GetArg(style_option, "usage: -p<printstyle>", false);
	if( strcmp(style_option, "fixed") != 0 &&
	    strcmp(style_option, "varying") != 0 &&
	    strcmp(style_option, "symbol") != 0 )
	{ fprintf(err_fp, "%s: unknown -p option %s\n", ErrorHeader(),
	    style_option);
	  exit(1);
	}
	break;


      case 'f':
     
	/* read font family */
	if( raw_seen )
	{ fprintf(err_fp, "%s: -f illegal with -r option\n", ErrorHeader());
	  exit(1);
	}
	GetArg(font_option, "usage: -f<font_family>", false);
	break;


      case 's':
     
	/* read font size */
	if( raw_seen )
	{ fprintf(err_fp, "%s: -s illegal with -r option\n", ErrorHeader());
	  exit(1);
	}
	GetArg(size_option, "usage: -s<size>", false);
	break;


      case 'v':
     
	/* read line spacing */
	if( raw_seen )
	{ fprintf(err_fp, "%s: -v illegal with -r option\n", ErrorHeader());
	  exit(1);
	}
	GetArg(line_option, "usage: -v<line_spacing>", false);
	break;


      case 'b':
     
	/* read blanklinescale */
	if( raw_seen )
	{ fprintf(err_fp, "%s: -b illegal with -r option\n", ErrorHeader());
	  exit(1);
	}
	GetArg(bls_option, "usage: -b<scale_factor>", false);
	break;


      case 't':
     
	/* read tab interval */
	GetArg(tabin_option, "usage: -t<number>", true);
	if( tabin_option != NULL && sscanf(tabin_option,"%d",&tab_in) != 1 )
	{ fprintf(err_fp, "%s usage: -t<number>\n", ErrorHeader());
	  exit(1);
	}
	if( tab_in <= 0 )
	{ fprintf(err_fp, "%s -t: tab interval must be greater than 0\n",
	    ErrorHeader());
	  exit(1);
	}
	break;


      case 'T':
     
	/* read tab_out and tab_unit */
	GetArg(tabout_option, "usage: -T<number><unit>", true);
	if( tabout_option != NULL )
	{ if( sscanf(tabout_option, "%f%c",&tab_out,&tab_unit) != 2 )
	  { fprintf(err_fp, "%s usage: -T<number><unit>\n", ErrorHeader());
	    exit(1);
	  }
	  if( tab_out <= 0 || tab_out >= 50 )
	  { fprintf(err_fp, "%s -T: unreasonably large or small tab interval\n",
	      ErrorHeader());
	    exit(1);
	  }
	  if( tab_unit != 'c' && tab_unit != 'i' && tab_unit != 'p' &&
	      tab_unit != 'm' && tab_unit != 'f' && tab_unit != 's' &&
	      tab_unit != 'v' )
	  { fprintf(err_fp, "%s -T: tab unit must be one of cipmfsv\n",
	      ErrorHeader());
	    exit(1);
	  }
	  tab_by_spacing = false;
	}
	break;


      case 'S':
     
	/* read alternative setup file */
	if( raw_seen )
	{ fprintf(err_fp, "%s: -S illegal with -r option\n", ErrorHeader());
	  exit(1);
	}
	GetArg(setup_option, "usage: -S<filename>", false);
	break;


      case 'L':
     
	/* read line numbering */
	GetArg(numbered_option, "usage: -L<number>", true);
	print_lines = true;
	print_num = 1;
	if( numbered_option!=NULL && sscanf(numbered_option,"%d",&print_num)!=1)
	{ fprintf(err_fp, "%s usage: -L  or  -L<number>\n", ErrorHeader());
	  exit(1);
	}
	break;


      case 'N':
     
	/* print numbers on non-blank lines only */
	blanknumbered = BLANKNUMBERED_NOPRINT;
	break;


      case 'M':
     
	/* like -N but do not assign line numbers to blank lines */
	blanknumbered = BLANKNUMBERED_NO;
	break;


      case 'n':
     
	if( raw_seen )
	{ fprintf(err_fp, "%s: -n illegal with -r option\n", ErrorHeader());
	  exit(1);
	}
	headers_option = false;
	break;


      case 'V':
     
	if( raw_seen )
	{ fprintf(err_fp, "%s: -V illegal with -r option\n", ErrorHeader());
	  exit(1);
	}
	fprintf(err_fp, "%s\n", PRG2LOUT_VERSION);
	exit(0);
	break;


      case 'u':
     
	if( raw_seen )
	{ fprintf(err_fp, "%s: -u illegal with -r option\n", ErrorHeader());
	  exit(1);
	}
	PrintUsage();
	exit(0);
	break;


      case 'l':
     
	if( language_option != (char *) NULL )
	{ fprintf(err_fp, "%s: -l seen twice\n", ErrorHeader());
	  exit(1);
	}
	GetArg(language_option, "usage: -l<language>", false);
	i = 0;  j = 0;
	while( lang == NO_LANGUAGE && languages[i] != NO_LANGUAGE )
	{
	  if( languages[i]->names[j] == (char *) NULL )
	    i++, j = 0;
	  else if( strcmp(languages[i]->names[j], language_option) == 0 )
	    lang = languages[i];
	  else
	    j++;
	}
	if( lang == NO_LANGUAGE )
	{
	  fprintf(err_fp, "%s: unknown language %s\n", ErrorHeader(),
	    language_option);
	  exit(1);
	}
	break;


      default:
     
	fprintf(err_fp, "%s: unknown command line flag %s\n", ErrorHeader(),
	  argv[arg_pos]);
	exit(1);
	break;

    }
    else
    {
      if( raw_seen )
      { fprintf(err_fp, "%s: file parameter illegal with -r flag\n",
	    ErrorHeader());
	  exit(1);
      }
      if( DEBUG_SETUP )
	fprintf(stderr, "file_names[%d++] = argv[%d] = %s\n",
	  file_count, arg_pos, argv[arg_pos]);
      file_names[file_count++] = argv[arg_pos];
    }
  } /* for */

  /* make sure we have a language */
  if( lang == NO_LANGUAGE )
  {
    fprintf(err_fp, "%s: missing -l option\n", ErrorHeader());
    exit(0);
  }

  /* do the actual work */
  if( raw_seen )
  {
    /* check that input and output files are open */
    if( in_fp == NULL )
      in_fp = stdin;
    if( out_fp == NULL )
    { fprintf(err_fp, "%s -r: missing -o option\n", ErrorHeader());
      exit(1);
    }

    /* process the file */
    SetupLanguage(lang);
    line_pos = 1;
    curr_line[line_pos] = '\n';  /* forces line read */
    curr_line[line_pos + 1] = '\0';
    line_num = 0;
    NextChar();
    Process(lang, (TOKEN *) NULL, U "");
  }
  else if( file_count > 0 )
  { int ch;

    /* make sure we have an output file */
    if( out_fp == (FILE *) NULL )
      out_fp = stdout;

    /* print the initial @Use clauses etc.*/
    fprintf(out_fp, "%s%s\n", "@Sy", "sInclude { doc }");
    if( setup_option != NULL )
      fprintf(out_fp, "%s%s { %s }\n", "@In", "clude", setup_option);
    else
      fprintf(out_fp, "%s%s { %s }\n", "@Sy", "sInclude", lang->setup_file);
    fprintf(out_fp, "@Document\n");
    fprintf(out_fp, "    @InitialBreak { lines 1.2fx nohyphen }\n");
    fprintf(out_fp, "//\n");
    fprintf(out_fp, "%s%s\n", "@Text @Be", "gin");
 
    /* print each file, possibly with a header */
    for( i = 0;  i < file_count;  i++ )
    {
      /* open file and initialize file position */
      in_fp = fopen(file_names[i], "r");
      if( in_fp == NULL )
      { fprintf(err_fp, "%s: skipping input file %s (cannot open)\n",
	  ErrorHeader(), file_names[i]);
	continue;
      }
      strcpy(file_name, file_names[i]);

      /* print @NP if not first, and header if required */
      if( i > 0 )
	fprintf(out_fp, "\n\n@NP\n\n");
      if( headers_option )
	fprintf(out_fp, "{ Times Bold \"+3p\" } @Font \"%s\"\n@DP\n",
	  file_names[i]);

      /* print file name and contents (don't format, let Lout call back) */
      /* this string has been disguised to avoid recognition by prg2lout */
      fprintf(out_fp, "%s\n", lang->lang_sym);
      if( style_option != NULL )
	fprintf(out_fp, "    style { %s }\n", style_option);
      if( font_option != NULL )
	fprintf(out_fp, "    font { %s }\n", font_option);
      if( size_option != NULL )
	fprintf(out_fp, "    size { %s }\n", size_option);
      if( line_option != NULL )
	fprintf(out_fp, "    line { %s }\n", line_option);
      if( bls_option != NULL )
	fprintf(out_fp, "    blanklinescale { %s }\n", bls_option);
      if( tabin_option != NULL )
	fprintf(out_fp, "    tabin { %s }\n", tabin_option);
      if( tabout_option != NULL )
	fprintf(out_fp, "    tabout { %s }\n", tabout_option);
      if( print_lines )
	fprintf(out_fp, "    numbered { %d }\n", print_num);
      if( print_lines && blanknumbered != BLANKNUMBERED_YES )
	fprintf(out_fp, "    blanknumbered { %s }\n",
	  blanknumbered == BLANKNUMBERED_NO ? "No" : "NoPrint");
      fprintf(out_fp, "%s%s\n", "@Be", "gin");
      while( (ch = getc(in_fp)) != EOF )
	putc(ch, out_fp);
      fprintf(out_fp, "%s%s %s\n", "@E", "nd", lang->lang_sym);
    }

    /* finish off whole input */
    fprintf(out_fp, "%s%s%s\n", "@E", "nd @T", "ext");

  }
  exit(0);
} /* end main */