aboutsummaryrefslogblamecommitdiffstats
path: root/z45.c
blob: 32e1c7d5abe55e35b9e9f5044be5aa7bef206076 (plain) (tree)
1
2
3
4
5
6
7

                                                                               

                                                                               
                                                                               
                                                                               
                                                                               




                                                                               
                                                                               


























                                                                               































































                                                                               












































































































                                                                               





                                                                               



                                                                               



                                                                               
                                                                               














































                                                                               

                                              




















                                                                          
                                    



                                                     













                                                                               







































                                                                               
                                    




















































                                                                               
                                             




                                                         
                                                










                                                                   
/*@z45.c:External Sort:SortFile()@********************************************/
/*                                                                           */
/*  THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.41)                       */
/*  COPYRIGHT (C) 1991, 2023 Jeffrey H. Kingston                             */
/*                                                                           */
/*  Jeffrey H. Kingston (jeff@it.usyd.edu.au)                                */
/*  School of Information Technologies                                       */
/*  The University of Sydney 2006                                            */
/*  AUSTRALIA                                                                */
/*                                                                           */
/*  This program is free software; you can redistribute it and/or modify     */
/*  it under the terms of the GNU General Public License as published by     */
/*  the Free Software Foundation; either Version 3, or (at your option)      */
/*  any later version.                                                       */
/*                                                                           */
/*  This program is distributed in the hope that it will be useful,          */
/*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/*  GNU General Public License for more details.                             */
/*                                                                           */
/*  You should have received a copy of the GNU General Public License        */
/*  along with this program; if not, write to the Free Software              */
/*  Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA   */
/*                                                                           */
/*  FILE:         z45.c                                                      */
/*  MODULE:       External Sort                                              */
/*  EXTERNS:      SortFile()                                                 */
/*                                                                           */
/*  This simple sort utility assumes that the source file can all be read    */
/*  into memory.  If not, you get an "out of memory" error message.          */
/*                                                                           */
/*****************************************************************************/
#include "externs.h"

#define BUFF_SIZE	4096		/* size of one memory buffer */
#define LINES_GUESS     2000		/* initial guess of number of lines */


/*****************************************************************************/
/*                                                                           */
/*  int ReadOneLine(FILE *fp, char *buff, int buff_len)                      */
/*                                                                           */
/*  Read one line of fp, up to and including the following newline           */
/*  sequence, which may be a CH_LF/CH_CR pair as well as singleton.          */
/*  Place the contents of the line into *buff, including a concluding        */
/*  \0 but never the concluding newline sequence.                            */
/*                                                                           */
/*  The buffer has space for buff_len characters including the concluding    */
/*  \0.  If space runs out, terminate the read early; the unread portion     */
/*  will be read by the next call to ReadOneLine().  This code assumes       */
/*  that buff_len is at least 2.                                             */
/*                                                                           */
/*  Return values are as follows:                                            */
/*                                                                           */
/*     0   Did not read a line because EOF was encountered immediately       */
/*     1   Successfully read a line, empty or otherwise, into *buff          */
/*     2   Read a line but had to stop early owing to buffer overflow        */
/*                                                                           */
/*****************************************************************************/

int ReadOneLine(FILE *fp, FULL_CHAR *buff, int buff_len)
{ int ch;
  int len1 = buff_len - 1;

  /* read characters up to the end of line or file */
  int count = 0;
  while( (ch = getc(fp)) != EOF && ch != CH_LF && ch != CH_CR )
  {
    buff[count++] = ch;
    if( count >= len1 )
    {
      /* out of space, have to stop early */
      buff[count++] = '\0';
      return 2;
    }
  }

  /* terminate gracefully depending what character we stopped at */
  if( ch == EOF )
  {
    if( count == 0 )
      return 0;
  }
  else if( ch == CH_LF )
  {
    /* consume any immediately following CH_CR too */
    ch = getc(fp);
    if( ch != CH_CR )
      ungetc(ch, fp);
  }
  else /* ch == CH_CR */
  {
    /* consume any immediately following CH_LF too */
    ch = getc(fp);
    if( ch != CH_LF )
      ungetc(ch, fp);
  }
  buff[count++] = '\0';
  return 1;
} /* end ReadOneLine */


/*****************************************************************************/
/*                                                                           */
/*  int ReadOneBinaryLine(FILE *fp, char *buff, int buff_len, int *count)    */
/*                                                                           */
/*  Contributed by William Bader, July 2008, mainly to allow included EPS    */
/*  files to contain null characters.                                        */
/*                                                                           */
/*  Read one line of fp, up to and including the following newline           */
/*  sequence, which may be a CH_LF/CH_CR pair as well as singleton.          */
/*  Place the contents of the line into *buff including the line ending.     */
/*  Set count to the total number of characters written into *buff.          */
/*  Adds a terminating null (not included in count).                         */
/*                                                                           */
/*  If remaining_len is >= 0, then read at most that many characters and     */
/*  decrement remaining_len by the number of characters read.                */
/*                                                                           */
/*  The buffer has space for buff_len characters.                            */
/*  If space runs out, terminate the read early; the unread portion          */
/*  will be read by the next call to ReadOneLine().  This code assumes       */
/*  that buff_len is at least 3.                                             */
/*                                                                           */
/*  Return values are as follows:                                            */
/*                                                                           */
/*     0   Did not read a line because EOF was encountered immediately       */
/*     1   Successfully read a line, empty or otherwise, into *buff          */
/*     2   Read a line but had to stop early owing to buffer overflow        */
/*                                                                           */
/*****************************************************************************/

int ReadOneBinaryLine(FILE *fp, FULL_CHAR *buff, int buff_len, int *count_ptr,
  long *remaining_len)
{ int ch;
  int len1 = buff_len - 3;

  /* read characters up to the end of line or file or remaining_len */
  int count = 0;
  while( (ch = getc(fp)) != EOF && ch != CH_LF && ch != CH_CR )
  {
    if( *remaining_len >= 0 )
    {
      if( *remaining_len == 0 )
      {
       ch = EOF;
       break;
      }
      (*remaining_len)--;
    }
    buff[count++] = ch;
    if( count >= len1 )
    {
      *count_ptr = count;
      return 2;
    }
  }

  /* terminate gracefully depending what character we stopped at */
  if( ch == EOF )
  {
    if( count == 0 ) {
      *count_ptr = 0;
      return 0;
    }
  }
  else if( ch == CH_LF )
  {
    /* consume any immediately following CH_CR too */
    if( *remaining_len > 0 )
    {
      (*remaining_len)--;
    }
    buff[count++] = ch;
    ch = getc(fp);
    if( ch != CH_CR )
      ungetc(ch, fp);
    else
    {
      if( *remaining_len > 0 )
      {
        (*remaining_len)--;
      }
      buff[count++] = ch;
    }
  }
  else /* ch == CH_CR */
  {
    /* consume any immediately following CH_LF too */
    if( *remaining_len > 0 )
    {
      (*remaining_len)--;
    }
    buff[count++] = ch;
    ch = getc(fp);
    if( ch != CH_LF )
      ungetc(ch, fp);
    else
    {
      if( *remaining_len > 0 )
      {
        (*remaining_len)--;
      }
      buff[count++] = ch;
    }
  }
  buff[count] = '\0';
  *count_ptr = count;
  return 1;
} /* end ReadOneBinaryLine */


/*****************************************************************************/
/*                                                                           */
/*  LINE *ReadLines(FILE *fp, FULL_CHAR *fname, FULL_CHAR *first_line, *len) */
/*                                                                           */
/*  Read all of the lines of fp into memory and return a null-terminated     */
/*  array of pointers to these lines, and set *len to the number of lines.   */
/*  Make sure the lines themselves are null-terminated, also.                */
/*                                                                           */
/*  As for ReadOneLine above, lines may be terminated by a one or two        */
/*  character newline sequence, but these characters are never included      */
/*  in the lines returned.                                                   */
/*                                                                           */
/*  fname is the name of the file being sorted, and is used for error        */
/*  messages only.                                                           */
/*                                                                           */
/*  if first_line is non-null then it is a pointer to a string which is      */
/*  to become the first line of the result.  This string needs copying.      */
/*                                                                           */
/*****************************************************************************/

LINE *ReadLines(FILE *fp, FULL_CHAR *fname, FULL_CHAR *first_line, int *len)
{
  char *buff;				/* the current input line buffer     */
  char *buff_top;			/* first spot off end of buffer      */
  char *bp;				/* first free spot in buff           */

  LINE *lines;				/* the array of pointers to lines    */
  int  lines_length;			/* the length of the lines array     */
  LINE *lines_top;			/* first spot off end of lines       */
  LINE *lp;				/* first free spot in lines          */

  char *p, *q;
  int ch;
  debug1(DEX, D, "ReadLines(-, %s, -)", fname);

  /* initialize buff to be empty with size BUFF_SIZE */
  buff = malloc(BUFF_SIZE * sizeof(char));
  if( buff == NULL )
    Error(45, 1, "run out of memory when reading index file %s",
      FATAL, no_fpos, fname);
  buff_top = buff + BUFF_SIZE;
  bp = buff;

  /* initialize the lines buffer to be the first line  */
  lines_length = LINES_GUESS;
  lines = malloc(lines_length * sizeof(LINE *));
  lines_top = &lines[lines_length];
  lp = lines;

  /* add first_line to lines buffer if required */
  if( first_line != (FULL_CHAR *) null )
  {
    *lp = malloc((StringLength(first_line) + 1) * sizeof(char));
    StringCopy( (char *) *lp, first_line);
    lp++;
  }

  *lp++ = bp;
  while( (ch = getc(fp)) != EOF )
  {
    debug4(DEX, DD, "lines: [%d  %d(%d)  %d]",
      (int) lines, (int) (lp-1), (int) *(lp-1), (int) lines_top -1);
    debug3(DEX, DD, " buff: [%d   bp %d   %d]",
      (int) buff, (int) bp, (int) buff_top - 1);
    assert( (long) buff >= (long) lines_top ||
	    (long) buff_top <= (long) lines,
	    "ReadLines: lines and buff overlap!" );

    /* get new buffer and copy current line across if out of buff space */
    if( bp == buff_top )
    {
      debug0(DEX, D, "  getting new buff");
      buff = malloc(BUFF_SIZE * sizeof(char));
      if( buff == NULL )
	Error(45, 2, "run out of memory when reading index file %s",
	  FATAL, no_fpos, fname);
      buff_top = buff + BUFF_SIZE;
      for( p = buff, q = *(lp-1);  q != bp;  *p++ = *q++ );
      bp = p; *bp = '\0';
      debug1(DEX, D, "  copied into new buff: %s", buff);
      *(lp-1) = buff;
      if( bp == buff_top )
	Error(45, 3, "line too long when reading index file %s",
	  FATAL, no_fpos, fname);
    }

    /* if newline char, end this line and start the next */
    if( ch == CH_LF || ch == CH_CR )
    {
      *bp++ = '\0';
      debug1(DEX, D, "  finished line: %s", *(lp-1));

      /* get rid of following character if part of two-character line ending */
      if( ch == CH_LF )
      {
	ch = getc(fp);
	if( ch != CH_CR )
	  ungetc(ch, fp);
      }
      else /* ch == CH_CR */
      {
	ch = getc(fp);
	if( ch != CH_LF )
	  ungetc(ch, fp);
      }

      /* if no room in lines for next line, double its size */
      if( lp == lines_top )
      {
        debug1(DEX, D, "  realloc(lines, %d)", 2 * lines_length);
	lines = realloc(lines, 2 * lines_length * sizeof(LINE *));
	if( lines == NULL )
	  Error(45, 4, "run out of memory when reading index file %s",
	    FATAL, no_fpos, fname);
	lp = &lines[lines_length];
	lines_length = 2 * lines_length;
	lines_top = &lines[lines_length];
      }

      *lp++ = bp;
    }
    else /* ordinary char with space available, so just add it */
    {
      *bp++ = ch;
    }
  }

  *len = (lp - lines - 1);
  debug1(DEX, D, "ReadLines returning (len = %d)", *len);
  return lines;

} /* end ReadLines */


/*****************************************************************************/
/*                                                                           */
/*  WriteLines(FILE *fp, LINE *lines, int len)                               */
/*                                                                           */
/*  Write array of lines "lines", of length len, to file fp.                 */
/*                                                                           */
/*****************************************************************************/

void WriteLines(FILE *fp, LINE *lines, int len)
{ int i;
  for( i = 0;  i < len;  i++ )
  { fputs(lines[i], fp);
    fputs((char *) STR_NEWLINE, fp);
  }
}


/*****************************************************************************/
/*                                                                           */
/*  Line comparison functions (for qsort)                                    */
/*                                                                           */
/*  By Jeff Kingston and Valery Ushakov (uwe).                               */
/*                                                                           */
/*****************************************************************************/

static int pstrcmp(const void *a, const void *b)	/* !UseCollate */
{
  return strcmp (*(char **)a, *(char **)b);
}

static int pstrcollcmp(const void *a, const void *b)	/* UseCollate */
{
  return strcollcmp (*(char **)a, *(char**)b);
}


/*****************************************************************************/
/*                                                                           */
/*  void SortLines(LINE *lines, int lines_len)                               */
/*                                                                           */
/*  Sort the given lines.                                                    */
/*                                                                           */
/*****************************************************************************/

void SortLines(LINE *lines, int lines_len)
{
  qsort(lines, lines_len, sizeof(LINE), (UseCollate ? pstrcollcmp : pstrcmp));
}


/*****************************************************************************/
/*                                                                           */
/*  void SortFile(char *infile, char *outfile)                               */
/*                                                                           */
/*  Sort file infile, placing the result on file outfile.                    */
/*                                                                           */
/*****************************************************************************/

void SortFile(FULL_CHAR *infile, FULL_CHAR *outfile)
{
  LINE *lines;
  int lines_len;
  FILE *in_fp, *out_fp; 
  debug2(DEX, D, "SortFile(%s, %s)", infile, outfile);
  
  /* open input file */
  in_fp = fopen( (char *) infile, READ_FILE);
  if( in_fp == (FILE *) NULL )
    Error(45, 5, "cannot open index file %s for reading",
	    FATAL, no_fpos, outfile);

  /* open output file */
  out_fp = fopen( (char *) outfile, WRITE_FILE);
  if( out_fp == (FILE *) NULL )
    Error(45, 6, "cannot open index file %s for writing",
	    FATAL, no_fpos, outfile);

  /* read lines, sort them, and write them out again sorted */
  lines = ReadLines(in_fp, infile, (FULL_CHAR *) NULL, &lines_len);
  SortLines(lines, lines_len);
  fclose(in_fp);
  WriteLines(out_fp, lines, lines_len);
  fclose(out_fp);
}