diff options
Diffstat (limited to 'z45.c')
-rw-r--r-- | z45.c | 238 |
1 files changed, 238 insertions, 0 deletions
@@ -0,0 +1,238 @@ +/*@z45.c:External Sort:SortFile()@********************************************/ +/* */ +/* THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.17) */ +/* COPYRIGHT (C) 1991, 1999 Jeffrey H. Kingston */ +/* */ +/* Jeffrey H. Kingston (jeff@cs.usyd.edu.au) */ +/* Basser Department of Computer Science */ +/* The University of Sydney 2006 */ +/* AUSTRALIA */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either Version 2, or (at your option) */ +/* any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA */ +/* */ +/* FILE: z45.c */ +/* MODULE: External Sort */ +/* EXTERNS: SortFile() */ +/* */ +/* This simple sort utility assumes that the source file can all be read */ +/* into memory. If not, you get an "out of memory" error message. */ +/* */ +/*****************************************************************************/ +#include "externs.h" + +#define BUFF_SIZE 4096 /* size of one memory buffer */ +#define LINES_GUESS 2000 /* initial guess of number of lines */ + + +/*****************************************************************************/ +/* */ +/* LINE *ReadLines(FILE *fp, FULL_CHAR *fname, FULL_CHAR *first_line, *len) */ +/* */ +/* Read all of the lines of fp into memory and return a null-terminated */ +/* array of pointers to these lines, and set *len to the number of lines. */ +/* Make sure the lines themselves are null-terminated, also. */ +/* */ +/* fname is the name of the file being sorted, and is used for error */ +/* messages only. */ +/* */ +/* if first_line is non-null then it is a pointer to a string which is */ +/* to become as the first line of the result. This string needs copying. */ +/* */ +/*****************************************************************************/ + +LINE *ReadLines(FILE *fp, FULL_CHAR *fname, FULL_CHAR *first_line, int *len) +{ + char *buff; /* the current input line buffer */ + char *buff_top; /* first spot off end of buffer */ + char *bp; /* first free spot in buff */ + + LINE *lines; /* the array of pointers to lines */ + int lines_length; /* the length of the lines array */ + LINE *lines_top; /* first spot off end of lines */ + LINE *lp; /* first free spot in lines */ + + char *p, *q; + int ch; + debug1(DEX, D, "ReadLines(-, %s, -)", fname); + + /* initialize buff to be empty with size BUFF_SIZE */ + buff = malloc(BUFF_SIZE * sizeof(char)); + if( buff == NULL ) + Error(45, 1, "run out of memory when reading index file %s", + FATAL, no_fpos, fname); + buff_top = buff + BUFF_SIZE; + bp = buff; + + /* initialize the lines buffer to be the first line */ + lines_length = LINES_GUESS; + lines = malloc(lines_length * sizeof(LINE *)); + lines_top = &lines[lines_length]; + lp = lines; + + /* add first_line to lines buffer if required */ + if( first_line != (FULL_CHAR *) null ) + { + *lp = malloc((StringLength(first_line) + 1) * sizeof(char)); + StringCopy( (char *) *lp, first_line); + lp++; + } + + *lp++ = bp; + while( (ch = getc(fp)) != EOF ) + { + debug4(DEX, DD, "lines: [%d %d(%d) %d]", + (int) lines, (int) (lp-1), (int) *(lp-1), (int) lines_top -1); + debug3(DEX, DD, " buff: [%d bp %d %d]", + (int) buff, (int) bp, (int) buff_top - 1); + assert( (int) buff >= (int) lines_top || + (int) buff_top <= (int) lines, + "ReadLines: lines and buff overlap!" ); + + /* get new buffer and copy current line across if out of buff space */ + if( bp == buff_top ) + { + debug0(DEX, D, " getting new buff"); + buff = malloc(BUFF_SIZE * sizeof(char)); + if( buff == NULL ) + Error(45, 2, "run out of memory when reading index file %s", + FATAL, no_fpos, fname); + buff_top = buff + BUFF_SIZE; + for( p = buff, q = *(lp-1); q != bp; *p++ = *q++ ); + bp = p; *bp = '\0'; + debug1(DEX, D, " copied into new buff: %s", buff); + *(lp-1) = buff; + if( bp == buff_top ) + Error(45, 3, "line too long when reading index file %s", + FATAL, no_fpos, fname); + } + + /* if newline char, end this line and start the next */ + if( ch == '\n' ) + { + *bp++ = '\0'; + debug1(DEX, D, " finished line: %s", *(lp-1)); + + /* if no room in lines for next line, double its size */ + if( lp == lines_top ) + { + debug1(DEX, D, " realloc(lines, %d)", 2 * lines_length); + lines = realloc(lines, 2 * lines_length * sizeof(LINE *)); + if( lines == NULL ) + Error(45, 4, "run out of memory when reading index file %s", + FATAL, no_fpos, fname); + lp = &lines[lines_length]; + lines_length = 2 * lines_length; + lines_top = &lines[lines_length]; + } + + *lp++ = bp; + } + else /* ordinary char with space available, so just add it */ + { + *bp++ = ch; + } + } + + *len = (lp - lines - 1); + debug1(DEX, D, "ReadLines returning (len = %d)", *len); + return lines; + +} /* end ReadLines */ + + +/*****************************************************************************/ +/* */ +/* WriteLines(FILE *fp, LINE *lines, int len) */ +/* */ +/* Write array of lines "lines", of length len, to file fp. */ +/* */ +/*****************************************************************************/ + +void WriteLines(FILE *fp, LINE *lines, int len) +{ int i; + for( i = 0; i < len; i++ ) + { fputs(lines[i], fp); + fputs("\n", fp); + } +} + + +/*****************************************************************************/ +/* */ +/* Line comparison functions (for qsort) */ +/* */ +/* By Jeff Kingston and Valery Ushakov (uwe). */ +/* */ +/*****************************************************************************/ + +static int pstrcmp(const void *a, const void *b) /* !UseCollate */ +{ + return strcmp (*(char **)a, *(char **)b); +} + +static int pstrcollcmp(const void *a, const void *b) /* UseCollate */ +{ + return strcollcmp (*(char **)a, *(char**)b); +} + + +/*****************************************************************************/ +/* */ +/* void SortLines(LINE *lines, int lines_len) */ +/* */ +/* Sort the given lines. */ +/* */ +/*****************************************************************************/ + +void SortLines(LINE *lines, int lines_len) +{ + qsort(lines, lines_len, sizeof(LINE), (UseCollate ? pstrcollcmp : pstrcmp)); +} + + +/*****************************************************************************/ +/* */ +/* void SortFile(char *infile, char *outfile) */ +/* */ +/* Sort file infile, placing the result on file outfile. */ +/* */ +/*****************************************************************************/ + +void SortFile(FULL_CHAR *infile, FULL_CHAR *outfile) +{ + LINE *lines; + int lines_len; + FILE *in_fp, *out_fp; + debug2(DEX, D, "SortFile(%s, %s)", infile, outfile); + + /* open input file */ + in_fp = fopen( (char *) infile, READ_BINARY); + if( in_fp == (FILE *) NULL ) + Error(45, 5, "cannot open index file %s for reading", + FATAL, no_fpos, outfile); + + /* open output file */ + out_fp = fopen( (char *) outfile, WRITE_BINARY); + if( out_fp == (FILE *) NULL ) + Error(45, 6, "cannot open index file %s for writing", + FATAL, no_fpos, outfile); + + /* read lines, sort them, and write them out again sorted */ + lines = ReadLines(in_fp, infile, (FULL_CHAR *) NULL, &lines_len); + SortLines(lines, lines_len); + fclose(in_fp); + WriteLines(out_fp, lines, lines_len); + fclose(out_fp); +} |