aboutsummaryrefslogblamecommitdiffstats
path: root/z45.c
blob: 693356ca70bccb60498197147046545d6ed65a0b (plain) (tree)













































































































































































































































                                                                               
/*@z45.c:External Sort:SortFile()@********************************************/
/*                                                                           */
/*  THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.17)                       */
/*  COPYRIGHT (C) 1991, 1999 Jeffrey H. Kingston                             */
/*                                                                           */
/*  Jeffrey H. Kingston (jeff@cs.usyd.edu.au)                                */
/*  Basser Department of Computer Science                                    */
/*  The University of Sydney 2006                                            */
/*  AUSTRALIA                                                                */
/*                                                                           */
/*  This program is free software; you can redistribute it and/or modify     */
/*  it under the terms of the GNU General Public License as published by     */
/*  the Free Software Foundation; either Version 2, or (at your option)      */
/*  any later version.                                                       */
/*                                                                           */
/*  This program is distributed in the hope that it will be useful,          */
/*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/*  GNU General Public License for more details.                             */
/*                                                                           */
/*  You should have received a copy of the GNU General Public License        */
/*  along with this program; if not, write to the Free Software              */
/*  Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA   */
/*                                                                           */
/*  FILE:         z45.c                                                      */
/*  MODULE:       External Sort                                              */
/*  EXTERNS:      SortFile()                                                 */
/*                                                                           */
/*  This simple sort utility assumes that the source file can all be read    */
/*  into memory.  If not, you get an "out of memory" error message.          */
/*                                                                           */
/*****************************************************************************/
#include "externs.h"

#define BUFF_SIZE	4096		/* size of one memory buffer */
#define LINES_GUESS     2000		/* initial guess of number of lines */


/*****************************************************************************/
/*                                                                           */
/*  LINE *ReadLines(FILE *fp, FULL_CHAR *fname, FULL_CHAR *first_line, *len) */
/*                                                                           */
/*  Read all of the lines of fp into memory and return a null-terminated     */
/*  array of pointers to these lines, and set *len to the number of lines.   */
/*  Make sure the lines themselves are null-terminated, also.                */
/*                                                                           */
/*  fname is the name of the file being sorted, and is used for error        */
/*  messages only.                                                           */
/*                                                                           */
/*  if first_line is non-null then it is a pointer to a string which is      */
/*  to become as the first line of the result.  This string needs copying.   */
/*                                                                           */
/*****************************************************************************/

LINE *ReadLines(FILE *fp, FULL_CHAR *fname, FULL_CHAR *first_line, int *len)
{
  char *buff;				/* the current input line buffer     */
  char *buff_top;			/* first spot off end of buffer      */
  char *bp;				/* first free spot in buff           */

  LINE *lines;				/* the array of pointers to lines    */
  int  lines_length;			/* the length of the lines array     */
  LINE *lines_top;			/* first spot off end of lines       */
  LINE *lp;				/* first free spot in lines          */

  char *p, *q;
  int ch;
  debug1(DEX, D, "ReadLines(-, %s, -)", fname);

  /* initialize buff to be empty with size BUFF_SIZE */
  buff = malloc(BUFF_SIZE * sizeof(char));
  if( buff == NULL )
    Error(45, 1, "run out of memory when reading index file %s",
      FATAL, no_fpos, fname);
  buff_top = buff + BUFF_SIZE;
  bp = buff;

  /* initialize the lines buffer to be the first line  */
  lines_length = LINES_GUESS;
  lines = malloc(lines_length * sizeof(LINE *));
  lines_top = &lines[lines_length];
  lp = lines;

  /* add first_line to lines buffer if required */
  if( first_line != (FULL_CHAR *) null )
  {
    *lp = malloc((StringLength(first_line) + 1) * sizeof(char));
    StringCopy( (char *) *lp, first_line);
    lp++;
  }

  *lp++ = bp;
  while( (ch = getc(fp)) != EOF )
  {
    debug4(DEX, DD, "lines: [%d  %d(%d)  %d]",
      (int) lines, (int) (lp-1), (int) *(lp-1), (int) lines_top -1);
    debug3(DEX, DD, " buff: [%d   bp %d   %d]",
      (int) buff, (int) bp, (int) buff_top - 1);
    assert( (int) buff >= (int) lines_top ||
	    (int) buff_top <= (int) lines,
	    "ReadLines: lines and buff overlap!" );

    /* get new buffer and copy current line across if out of buff space */
    if( bp == buff_top )
    {
      debug0(DEX, D, "  getting new buff");
      buff = malloc(BUFF_SIZE * sizeof(char));
      if( buff == NULL )
	Error(45, 2, "run out of memory when reading index file %s",
	  FATAL, no_fpos, fname);
      buff_top = buff + BUFF_SIZE;
      for( p = buff, q = *(lp-1);  q != bp;  *p++ = *q++ );
      bp = p; *bp = '\0';
      debug1(DEX, D, "  copied into new buff: %s", buff);
      *(lp-1) = buff;
      if( bp == buff_top )
	Error(45, 3, "line too long when reading index file %s",
	  FATAL, no_fpos, fname);
    }

    /* if newline char, end this line and start the next */
    if( ch == '\n' )
    {
      *bp++ = '\0';
      debug1(DEX, D, "  finished line: %s", *(lp-1));

      /* if no room in lines for next line, double its size */
      if( lp == lines_top )
      {
        debug1(DEX, D, "  realloc(lines, %d)", 2 * lines_length);
	lines = realloc(lines, 2 * lines_length * sizeof(LINE *));
	if( lines == NULL )
	  Error(45, 4, "run out of memory when reading index file %s",
	    FATAL, no_fpos, fname);
	lp = &lines[lines_length];
	lines_length = 2 * lines_length;
	lines_top = &lines[lines_length];
      }

      *lp++ = bp;
    }
    else /* ordinary char with space available, so just add it */
    {
      *bp++ = ch;
    }
  }

  *len = (lp - lines - 1);
  debug1(DEX, D, "ReadLines returning (len = %d)", *len);
  return lines;

} /* end ReadLines */


/*****************************************************************************/
/*                                                                           */
/*  WriteLines(FILE *fp, LINE *lines, int len)                               */
/*                                                                           */
/*  Write array of lines "lines", of length len, to file fp.                 */
/*                                                                           */
/*****************************************************************************/

void WriteLines(FILE *fp, LINE *lines, int len)
{ int i;
  for( i = 0;  i < len;  i++ )
  { fputs(lines[i], fp);
    fputs("\n", fp);
  }
}


/*****************************************************************************/
/*                                                                           */
/*  Line comparison functions (for qsort)                                    */
/*                                                                           */
/*  By Jeff Kingston and Valery Ushakov (uwe).                               */
/*                                                                           */
/*****************************************************************************/

static int pstrcmp(const void *a, const void *b)	/* !UseCollate */
{
  return strcmp (*(char **)a, *(char **)b);
}

static int pstrcollcmp(const void *a, const void *b)	/* UseCollate */
{
  return strcollcmp (*(char **)a, *(char**)b);
}


/*****************************************************************************/
/*                                                                           */
/*  void SortLines(LINE *lines, int lines_len)                               */
/*                                                                           */
/*  Sort the given lines.                                                    */
/*                                                                           */
/*****************************************************************************/

void SortLines(LINE *lines, int lines_len)
{
  qsort(lines, lines_len, sizeof(LINE), (UseCollate ? pstrcollcmp : pstrcmp));
}


/*****************************************************************************/
/*                                                                           */
/*  void SortFile(char *infile, char *outfile)                               */
/*                                                                           */
/*  Sort file infile, placing the result on file outfile.                    */
/*                                                                           */
/*****************************************************************************/

void SortFile(FULL_CHAR *infile, FULL_CHAR *outfile)
{
  LINE *lines;
  int lines_len;
  FILE *in_fp, *out_fp; 
  debug2(DEX, D, "SortFile(%s, %s)", infile, outfile);
  
  /* open input file */
  in_fp = fopen( (char *) infile, READ_BINARY);
  if( in_fp == (FILE *) NULL )
    Error(45, 5, "cannot open index file %s for reading",
	    FATAL, no_fpos, outfile);

  /* open output file */
  out_fp = fopen( (char *) outfile, WRITE_BINARY);
  if( out_fp == (FILE *) NULL )
    Error(45, 6, "cannot open index file %s for writing",
	    FATAL, no_fpos, outfile);

  /* read lines, sort them, and write them out again sorted */
  lines = ReadLines(in_fp, infile, (FULL_CHAR *) NULL, &lines_len);
  SortLines(lines, lines_len);
  fclose(in_fp);
  WriteLines(out_fp, lines, lines_len);
  fclose(out_fp);
}