Lout 3.27.

git-svn-id: http://svn.savannah.nongnu.org/svn/lout/trunk@23 9365b830-b601-4143-9ba8-b4a8e2c3339c
author: Jeffrey H. Kingston <jeff@it.usyd.edu.au> 2010-09-14 20:38:23 +0000
committer: Jeffrey H. Kingston <jeff@it.usyd.edu.au> 2010-09-14 20:38:23 +0000
commit: 78c2bcf9e96ab00615ee6f96905bca78fcd52a00 (patch)
tree: 9c7e31f2a59e174433e55b589771005b48a34158 /z02.c
parent: 9daa98ce90ceeeaba9e942d28575d8fcfe36db4b (diff)
download: lout-78c2bcf9e96ab00615ee6f96905bca78fcd52a00.tar.gz
1 files changed, 176 insertions, 48 deletions
diff --git a/z02.c b/z02.c
index 7f9b751..c19a4fc 100644
--- a/z02.c
+++ b/z02.c
@@ -1,9 +1,9 @@
 /*@z02.c:Lexical Analyser:Declarations@***************************************/
 /*                                                                           */
-/*  THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.26)                       */
+/*  THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.27)                       */
 /*  COPYRIGHT (C) 1991, 2002 Jeffrey H. Kingston                             */
 /*                                                                           */
-/*  Jeffrey H. Kingston (jeff@cs.usyd.edu.au)                                */
+/*  Jeffrey H. Kingston (jeff@it.usyd.edu.au)                                */
 /*  Basser Department of Computer Science                                    */
 /*  The University of Sydney 2006                                            */
 /*  AUSTRALIA                                                                */
@@ -31,6 +31,10 @@
 /*  from Waite, W. M.: The Cost of Lexical Analysis, in Software - Practice  */
 /*  and Experience, v16, pp473-488 (May 1986).                               */
 /*                                                                           */
+/*  Converted 8 November 2002 to handle DOS etc. line endings.  The end of   */
+/*  a line is now taken to be the second end-of-line character when there    */
+/*  are two of them.  With this rule it's quite easy to do the conversion.   */
+/*                                                                           */
 /*****************************************************************************/
 #include "externs.h"
 #define	BUFFER_SIZE    8192		/* size of buffer for block read     */
@@ -143,7 +147,7 @@ static void initchtbl(val, str)
 int val;  FULL_CHAR *str;
 { int i;
   for( i = 0;  str[i] != '\0';  i++ )
-	chtbl[ str[i] ] = val;
+    chtbl[ str[i] ] = val;
 } /* end initchtbl */
 
 void LexInit(void)
@@ -165,7 +169,8 @@ void LexInit(void)
   initchtbl(CSPACE,  STR_SPACE);
   initchtbl(FORMFEED,STR_FORMFEED);
   initchtbl(TAB,     STR_TAB);
-  initchtbl(NEWLINE, STR_NEWLINE);
+  chtbl[CH_LF] = NEWLINE;
+  chtbl[CH_CR] = NEWLINE;
   chtbl['\0'] = ENDFILE;
   stack_free = -1;
 } /* end LexInit */
@@ -237,7 +242,7 @@ void LexPush(FILE_NUM x, int offs, int ftyp, int lnum, BOOLEAN same)
     Error(2, 3, "run out of memory when opening file %s",
       FATAL, PosOfFile(x), FullFileName(x));
   buf = chpt = &mem_block[MAX_LINE];
-  last_char = CH_NEWLINE;
+  last_char = CH_CR;
   this_file = x;  offset = offs;
   first_line_num = lnum;  same_file = same;
   ftype = ftyp;  next_token = nilobj;
@@ -301,8 +306,8 @@ void LexPop(void)
 #define setword(typ, res, file_pos, str, len)				\
 { NewWord(res, typ, len, &file_pos);					\
   FposCopy(fpos(res), file_pos);					\
-  for( c = 0;  c < len;  c++ ) string(res)[c] = str[c];			\
-  string(res)[c] = '\0';						\
+  for( ch = 0;  ch < len;  ch++ ) string(res)[ch] = str[ch];		\
+  string(res)[ch] = '\0';						\
 }
 
 
@@ -336,13 +341,16 @@ long LexNextTokenPos(void)
    * need this workaround (I haven't tried compiling lout with gcc
    * though, as the result will need cygwin.dll to run).
    */
+
+/* *** retired by JeffK 2002-11-08 since now reading in binary mode
   {
     register FULL_CHAR *p;
     for (p = chpt; p < limit; ++p) {
-      if (*p == (FULL_CHAR) CH_NEWLINE)
+      if (*p == (FULL_CHAR) CH_LF)
         --res;
     }
   }
+*** */
 #endif /* DB_FIX */
 
   debug1(DLA, DD, "LexNextTokenPos() returning %ld", res);
@@ -368,7 +376,7 @@ static void srcnext(void)
   if( blksize != 0 && chpt < limit )
   { debugcond0(DLA, DD, stack_free <= 1, "srcnext: transferring.");
     col = buf;
-    while( (*--col = *--limit) != CH_NEWLINE );
+    while( chtbl[(*--col = *--limit)] != NEWLINE );
     frst = col + 1;  limit++;  blksize = 0;
   }
 
@@ -384,11 +392,11 @@ static void srcnext(void)
     blksize = fread( (char *) buf, sizeof(char), BUFFER_SIZE, fp);
     if( blksize > 0 )
       last_char = *(buf + blksize - 1);
-    if( blksize < BUFFER_SIZE && last_char != CH_NEWLINE )
+    if( blksize < BUFFER_SIZE && chtbl[last_char] != NEWLINE )
     {
-      /* at end of file since blksize = 0; so add missing newline char */
+      /* at end of file since blksize < BUFFER_SIZE; add missing newline char */
       blksize++;
-      last_char = *(buf+blksize-1) = CH_NEWLINE;
+      last_char = *(buf+blksize-1) = CH_LF;
 
       /* this adjustment breaks LexNextTokenPos, so fatal error if database */
       if( ftype == DATABASE_FILE )
@@ -401,7 +409,7 @@ static void srcnext(void)
     debugcond4(DLA, DD, stack_free <= 1,
       "srcnext: %d = fread(0x%x, %d, %d, fp)",
       blksize, buf, sizeof(char), BUFFER_SIZE);
-    frst = buf;  limit = buf + blksize;  *limit = CH_NEWLINE;
+    frst = buf;  limit = buf + blksize;  *limit = CH_LF;
   }
 
   /* if nothing more to read, make this clear */
@@ -427,7 +435,7 @@ OBJECT LexGetToken(void)
 {
 	   FULL_CHAR *startpos;		/* where the latest token started    */
   register FULL_CHAR *p, *q;		/* pointer to current input char     */
-  register int      c;			/* temporary character (really char) */
+  register int      ch;			/* temporary character (really char) */
   OBJECT   res;				/* result token                      */
   int vcount, hcount;			/* no. of newlines and spaces seen   */
 
@@ -445,7 +453,7 @@ OBJECT LexGetToken(void)
   {
       case ESCAPE:
       
-	if( ftype==DATABASE_FILE && *p>='a' && *p<='z' && *(p+1) == '{' /*}*/ )
+	if( ftype==DATABASE_FILE && *p>='a' && *p<='z' && *(p+1) == '{' )
 	{ res = NewToken(LBR, &file_pos, 0, 0, (unsigned) *p, StartSym);
 	  p += 2;
 	}
@@ -461,9 +469,21 @@ OBJECT LexGetToken(void)
       case COMMENT:
       
 	debug1(DLA, DDD, "LexGetToken%s: comment", EchoFilePos(&file_pos));
-	while( (c = *p++) != CH_NEWLINE && c != '\0' );
-	if( c == CH_NEWLINE )
+	while( chtbl[(ch = *p++)] != NEWLINE && ch != '\0' );
+	if( chtbl[ch] == NEWLINE )
 	{
+	  /* skip over second newline character if any */
+	  if( ch == CH_LF )
+	  {
+	    if( *p == CH_CR )
+	      p++;
+	  }
+	  else /* ch == CH_CR */
+	  {
+	    if( *p == CH_LF )
+	      p++;
+	  }
+
 	  /* do NEWLINE action, only preserve existing horizontal space */
 	  /* and don't count the newline in the vcount. */
 	  chpt = p;  srcnext();
@@ -493,6 +513,19 @@ OBJECT LexGetToken(void)
 
       case NEWLINE:
       
+	/* skip over second newline character if any */
+	if( *(p-1) == CH_LF )
+	{
+	  if( *p == CH_CR )
+	    p++;
+	}
+	else /* *(p-1) == CH_CR */
+	{
+	  if( *p == CH_LF )
+	    p++;
+	}
+
+	/* do newline action */
 	chpt = p;  srcnext();
 	line_num(file_pos)++;
 	col_num(file_pos) = 0;
@@ -533,7 +566,7 @@ OBJECT LexGetToken(void)
 	    line_num(file_pos) = first_line_num;
 	  }
 	  frst = limit = chpt = buf;
-	  blksize = 0;  last_char = CH_NEWLINE;
+	  blksize = 0;  last_char = CH_LF;
 	  srcnext();
 	  startline = (p = chpt) - 1;
 	  hcount = 0;
@@ -557,9 +590,6 @@ OBJECT LexGetToken(void)
 	  
 	    /* input ends with "@End @FilterOut" */
 	    res = NewToken(END, &file_pos, 0, 0, END_PREC, FilterOutSym);
-	    /* ***
-	    next_token = NewToken(CLOSURE,&file_pos,0,0,NO_PREC,FilterOutSym);
-	    *** */
 	    --p;  startline = p;
 	    break;
 
@@ -585,11 +615,12 @@ OBJECT LexGetToken(void)
       
 	col_num(file_pos) = (startpos = p-1) - startline;
 	while( chtbl[*p++] == OTHER );
-	c = p - startpos - 1;
+	/* using ch as a real int here, not a char */
+	ch = p - startpos - 1;
 	do
-	{ res = SearchSym(startpos, c);
-	  --c; --p;
-	} while( c > 0 && res == nilobj );
+	{ res = SearchSym(startpos, ch);
+	  --ch; --p;
+	} while( ch > 0 && res == nilobj );
 	goto MORE;  /* 7 lines down */
 
 
@@ -635,7 +666,7 @@ OBJECT LexGetToken(void)
 	    New(t, LBR);
 	  }
 	  fname = Parse(&t, nilobj, FALSE, FALSE);
-	  fname = ReplaceWithTidy(fname, FALSE);
+	  fname = ReplaceWithTidy(fname, ACAT_TIDY);
 	  if( scope_suppressed ) SuppressScope();
 	  if( !is_word(type(fname)) )
 	  { Error(2, 10, "name of include file expected here",
@@ -647,15 +678,31 @@ OBJECT LexGetToken(void)
 	  len = StringLength(string(fname)) - StringLength(SOURCE_SUFFIX);
 	  if( len >= 0 && StringEqual(&string(fname)[len], SOURCE_SUFFIX) )
 	    StringCopy(&string(fname)[len], STR_EMPTY);
-	  debug0(DFS, D, "  calling DefineFile from LexGetToken");
-	  fnum = DefineFile(string(fname), STR_EMPTY, &fpos(fname),
+	  if( !InDefinitions ||
+	      (FileNum(string(fname), STR_EMPTY) == NO_FILE &&
+	      FileNum(string(fname), SOURCE_SUFFIX) == NO_FILE) )
+	  {
+	    /* need to define and read this include file */
+	    debug1(DFS, D, "  calling DefineFile %s from LexGetToken",
+	      string(fname));
+	    fnum = DefineFile(string(fname), STR_EMPTY, &fpos(fname),
 	      INCLUDE_FILE,
 	      predefined(res)==INCLUDE ? INCLUDE_PATH : SYSINCLUDE_PATH);
-	  Dispose(fname);
-	  LexPush(fnum, 0, INCLUDE_FILE, 1, FALSE);
-	  res = LexGetToken();
-	  vcount++; /** TEST ADDITION! **/
-	  p = chpt;
+	    Dispose(fname);
+	    LexPush(fnum, 0, INCLUDE_FILE, 1, FALSE);
+	    res = LexGetToken();
+	    vcount++; /** TEST ADDITION! **/
+	    p = chpt;
+	  }
+	  else
+	  {
+	    debug1(DFS, D, "  skipping DefineFile %s from LexGetToken",
+	      string(fname));
+	    res = nilobj;
+	    p = chpt;
+	    Dispose(fname);
+	    break;
+	  }
 	}
 	else if( predefined(res) == END )
 	  res = NewToken(predefined(res), &file_pos,0,0,precedence(res),nilobj);
@@ -728,9 +775,14 @@ OBJECT LexGetToken(void)
   chpt = p;
   vspace(res) = vcount;
   hspace(res) = hcount;
+  debug5(DLA, DD, "LexGetToken%s returning %s %s %d.%d",
+    EchoFilePos(&file_pos), Image(type(res)), EchoToken(res),
+    vspace(res), hspace(res));
+  /* ***
   debugcond5(DLA, DD, stack_free <= 1, "LexGetToken%s returning %s %s %d.%d",
     EchoFilePos(&file_pos), Image(type(res)), EchoToken(res),
     vspace(res), hspace(res));
+  *** */
   return res;
 } /* end LexGetToken */
 
@@ -745,40 +797,100 @@ OBJECT LexGetToken(void)
 /*  terminate at matching right brace.                                       */
 /*                                                                           */
 /*  If lessskip is true it means that we should skip only up to and          */
-/*  including the first newline character, as opposed to the usual           */
+/*  including the first newline character sequence, as opposed to the usual  */
 /*  skipping of all initial white space characters.                          */
 /*                                                                           */
 /*****************************************************************************/
 
+/*****************************************************************************/
+/*                                                                           */
+/*  print(ch)                                                                */
+/*                                                                           */
+/*  Add ch to the result of the verbatim scan.  If chtbl[ch] == NEWLINE,     */
+/*  this means to add whatever sequence of characters counts as the end      */
+/*  of line sequence in today's operating system.  Otherwise, just add ch.   */
+/*                                                                           */
+/*  The code that calls print() ensures that only one character is passed    */
+/*  to print() per newline sequence; this could be either CH_LF or CH_CR.    */
+/*                                                                           */
+/*****************************************************************************/
+
 #define print(ch)							\
 { debug2(DLA, D, "print(%c), bufftop = %d", ch, bufftop);		\
   if( fp == NULL )							\
   { if( bufftop < MAX_BUFF )						\
     { if( chtbl[ch] == NEWLINE )					\
-      { res = BuildLines(res, buff, &bufftop);				\
+      { res = BuildLines(res, buff, &bufftop, hs_lnum_overshoot);	\
       }									\
       else buff[bufftop++] = ch;					\
     }									\
   }									\
-  else putc(ch, fp);							\
+  else if( chtbl[ch] != NEWLINE )					\
+    putc(ch, fp);							\
+  else									\
+    StringFPuts(STR_NEWLINE, fp);					\
 }
 
-#define clear()								\
-{ int i;								\
-  for( i = 0;  i < hs_top;  i++ )  print(hs_buff[i]);			\
-  hs_top = 0;								\
-}
+/*****************************************************************************/
+/*                                                                           */
+/*  hold(ch)                                                                 */
+/*                                                                           */
+/*  At this point we are thinking about print(ch) but we are not certain     */
+/*  yet, so ch is held pending a decision.                                   */
+/*                                                                           */
+/*  The code that calls hold() ensures that only one character is passed     */
+/*  to hold() per newline sequence; this could be either CH_LF or CH_CR.     */
+/*                                                                           */
+/*****************************************************************************/
 
 #define hold(ch)							\
 { if( hs_top == MAX_BUFF )  clear();					\
   hs_buff[hs_top++] = ch;						\
+  if( chtbl[ch] == NEWLINE )						\
+    hs_lnum_overshoot++;						\
 }
 
-static OBJECT BuildLines(OBJECT current, FULL_CHAR *buff, int *bufftop)
-{ OBJECT wd, res, gp, gpword;  int c;
+/*****************************************************************************/
+/*                                                                           */
+/*  clear()                                                                  */
+/*                                                                           */
+/*  A decision has been made that all currently held characters are now      */
+/*  to be printed.                                                           */
+/*                                                                           */
+/*****************************************************************************/
+
+#define clear()								\
+{ int i;								\
+  for( i = 0;  i < hs_top;  i++ )					\
+  { print(hs_buff[i]);							\
+    if( chtbl[hs_buff[i]] == NEWLINE )					\
+      hs_lnum_overshoot--;						\
+  }									\
+  hs_top = 0;								\
+  assert(hs_lnum_overshoot == 0, "clear(): hs_lnum_overshoot!");	\
+}
+
+/*****************************************************************************/
+/*                                                                           */
+/*  OBJECT BuildLines(current, buff, bufftop, ladj)                          */
+/*                                                                           */
+/*  Add one line containing word buff to the growing set of verbatim lines.  */
+/*  Adjust the line number of file_pos by ladj to account for extra lines    */
+/*  that we may be holding, which would otherwise cause us to overestimate   */
+/*  which line we are on.                                                    */
+/*                                                                           */
+/*****************************************************************************/
+
+static OBJECT BuildLines(OBJECT current, FULL_CHAR *buff, int *bufftop, int ladj)
+{ OBJECT wd, res, gp, gpword;  int ch;  FILE_POS xfp;
+
+  /* adjust file position since we may have been holding stuff */
+  file_num(xfp) = file_num(file_pos);
+  line_num(xfp) = line_num(file_pos) - ladj;
+  col_num(xfp) = 1;
 
   /* build a new word and reset the buffer */
-  setword(WORD, wd, file_pos, buff, *bufftop);
+  setword(WORD, wd, xfp, buff, *bufftop);
   debug1(DLA, D, "BuildLines(current, %s)", EchoObject(wd));
   *bufftop = 0;
 
@@ -801,8 +913,8 @@ static OBJECT BuildLines(OBJECT current, FULL_CHAR *buff, int *bufftop)
     New(gp, GAP_OBJ);
     mark(gap(gp)) = FALSE;
     join(gap(gp)) = FALSE;
-    FposCopy(fpos(gp), file_pos);
-    gpword = MakeWord(WORD, AsciiToFull("1vx"), &file_pos);
+    FposCopy(fpos(gp), xfp);
+    gpword = MakeWord(WORD, AsciiToFull("1vx"), &xfp);
     Link(gp, gpword);
     Link(res, gp);
     Link(res, wd);
@@ -820,6 +932,7 @@ FILE *fp;  BOOLEAN end_stop;  FILE_POS *err_pos;  BOOLEAN lessskip;
   BOOLEAN skipping;			/* TRUE when skipping initial spaces */
   FULL_CHAR hs_buff[MAX_BUFF];		/* hold spaces here in case last     */
   int hs_top;				/* next free spot in hs_buff         */
+  int hs_lnum_overshoot = 0;		/* subtract this from lnum           */
   FULL_CHAR buff[MAX_BUFF];		/* hold line here if not to file     */
   int bufftop;				/* top of buff                       */
   OBJECT res = nilobj;			/* result object if not to file      */
@@ -857,8 +970,23 @@ FILE *fp;  BOOLEAN end_stop;  FILE_POS *err_pos;  BOOLEAN lessskip;
 
       case NEWLINE:
       
+	/* skip over second newline character if any */
+	if( *(p-1) == CH_LF )
+	{
+	  if( *p == CH_CR )
+	    p++;
+	}
+	else /* c == CH_CR */
+	{
+	  if( *p == CH_LF )
+	    p++;
+	}
+
+	/* sort out skipping and holding */
 	if( !skipping )  hold(*(p-1));
 	if( lessskip ) skipping = FALSE;
+
+	/* perform newline action */
 	chpt = p;  srcnext();
 	line_num(file_pos)++;
 	col_num(file_pos) = 0;
@@ -925,7 +1053,7 @@ FILE *fp;  BOOLEAN end_stop;  FILE_POS *err_pos;  BOOLEAN lessskip;
 		FATAL, &fpos(t), KW_LBR, sysinc ? KW_SYSINCLUDE : KW_INCLUDE);
 	    incl_fname = Parse(&t, nilobj, FALSE, FALSE);
 	    p = chpt;
-	    incl_fname = ReplaceWithTidy(incl_fname, FALSE);
+	    incl_fname = ReplaceWithTidy(incl_fname, ACAT_TIDY);
 	    if( !is_word(type(incl_fname)) )
 	      Error(2, 19, "expected file name here", FATAL,&fpos(incl_fname));
 	    debug0(DFS, D, "  calling DefineFile from LexScanVerbatim");
@@ -960,7 +1088,7 @@ FILE *fp;  BOOLEAN end_stop;  FILE_POS *err_pos;  BOOLEAN lessskip;
 	break;
 
   };
-  print('\n');
+  print(CH_LF);
 
   if( p - startline >= MAX_LINE )
   { col_num(file_pos) = 1;
author	Jeffrey H. Kingston <jeff@it.usyd.edu.au>	2010-09-14 20:38:23 +0000
committer	Jeffrey H. Kingston <jeff@it.usyd.edu.au>	2010-09-14 20:38:23 +0000
commit	78c2bcf9e96ab00615ee6f96905bca78fcd52a00 (patch)
tree	9c7e31f2a59e174433e55b589771005b48a34158 /z02.c
parent	9daa98ce90ceeeaba9e942d28575d8fcfe36db4b (diff)
download	lout-78c2bcf9e96ab00615ee6f96905bca78fcd52a00.tar.gz