aboutsummaryrefslogtreecommitdiffstats
path: root/z45.c
diff options
context:
space:
mode:
Diffstat (limited to 'z45.c')
-rw-r--r--z45.c238
1 files changed, 238 insertions, 0 deletions
diff --git a/z45.c b/z45.c
new file mode 100644
index 0000000..693356c
--- /dev/null
+++ b/z45.c
@@ -0,0 +1,238 @@
+/*@z45.c:External Sort:SortFile()@********************************************/
+/* */
+/* THE LOUT DOCUMENT FORMATTING SYSTEM (VERSION 3.17) */
+/* COPYRIGHT (C) 1991, 1999 Jeffrey H. Kingston */
+/* */
+/* Jeffrey H. Kingston (jeff@cs.usyd.edu.au) */
+/* Basser Department of Computer Science */
+/* The University of Sydney 2006 */
+/* AUSTRALIA */
+/* */
+/* This program is free software; you can redistribute it and/or modify */
+/* it under the terms of the GNU General Public License as published by */
+/* the Free Software Foundation; either Version 2, or (at your option) */
+/* any later version. */
+/* */
+/* This program is distributed in the hope that it will be useful, */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
+/* GNU General Public License for more details. */
+/* */
+/* You should have received a copy of the GNU General Public License */
+/* along with this program; if not, write to the Free Software */
+/* Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA */
+/* */
+/* FILE: z45.c */
+/* MODULE: External Sort */
+/* EXTERNS: SortFile() */
+/* */
+/* This simple sort utility assumes that the source file can all be read */
+/* into memory. If not, you get an "out of memory" error message. */
+/* */
+/*****************************************************************************/
+#include "externs.h"
+
+#define BUFF_SIZE 4096 /* size of one memory buffer */
+#define LINES_GUESS 2000 /* initial guess of number of lines */
+
+
+/*****************************************************************************/
+/* */
+/* LINE *ReadLines(FILE *fp, FULL_CHAR *fname, FULL_CHAR *first_line, *len) */
+/* */
+/* Read all of the lines of fp into memory and return a null-terminated */
+/* array of pointers to these lines, and set *len to the number of lines. */
+/* Make sure the lines themselves are null-terminated, also. */
+/* */
+/* fname is the name of the file being sorted, and is used for error */
+/* messages only. */
+/* */
+/* if first_line is non-null then it is a pointer to a string which is */
+/* to become as the first line of the result. This string needs copying. */
+/* */
+/*****************************************************************************/
+
+LINE *ReadLines(FILE *fp, FULL_CHAR *fname, FULL_CHAR *first_line, int *len)
+{
+ char *buff; /* the current input line buffer */
+ char *buff_top; /* first spot off end of buffer */
+ char *bp; /* first free spot in buff */
+
+ LINE *lines; /* the array of pointers to lines */
+ int lines_length; /* the length of the lines array */
+ LINE *lines_top; /* first spot off end of lines */
+ LINE *lp; /* first free spot in lines */
+
+ char *p, *q;
+ int ch;
+ debug1(DEX, D, "ReadLines(-, %s, -)", fname);
+
+ /* initialize buff to be empty with size BUFF_SIZE */
+ buff = malloc(BUFF_SIZE * sizeof(char));
+ if( buff == NULL )
+ Error(45, 1, "run out of memory when reading index file %s",
+ FATAL, no_fpos, fname);
+ buff_top = buff + BUFF_SIZE;
+ bp = buff;
+
+ /* initialize the lines buffer to be the first line */
+ lines_length = LINES_GUESS;
+ lines = malloc(lines_length * sizeof(LINE *));
+ lines_top = &lines[lines_length];
+ lp = lines;
+
+ /* add first_line to lines buffer if required */
+ if( first_line != (FULL_CHAR *) null )
+ {
+ *lp = malloc((StringLength(first_line) + 1) * sizeof(char));
+ StringCopy( (char *) *lp, first_line);
+ lp++;
+ }
+
+ *lp++ = bp;
+ while( (ch = getc(fp)) != EOF )
+ {
+ debug4(DEX, DD, "lines: [%d %d(%d) %d]",
+ (int) lines, (int) (lp-1), (int) *(lp-1), (int) lines_top -1);
+ debug3(DEX, DD, " buff: [%d bp %d %d]",
+ (int) buff, (int) bp, (int) buff_top - 1);
+ assert( (int) buff >= (int) lines_top ||
+ (int) buff_top <= (int) lines,
+ "ReadLines: lines and buff overlap!" );
+
+ /* get new buffer and copy current line across if out of buff space */
+ if( bp == buff_top )
+ {
+ debug0(DEX, D, " getting new buff");
+ buff = malloc(BUFF_SIZE * sizeof(char));
+ if( buff == NULL )
+ Error(45, 2, "run out of memory when reading index file %s",
+ FATAL, no_fpos, fname);
+ buff_top = buff + BUFF_SIZE;
+ for( p = buff, q = *(lp-1); q != bp; *p++ = *q++ );
+ bp = p; *bp = '\0';
+ debug1(DEX, D, " copied into new buff: %s", buff);
+ *(lp-1) = buff;
+ if( bp == buff_top )
+ Error(45, 3, "line too long when reading index file %s",
+ FATAL, no_fpos, fname);
+ }
+
+ /* if newline char, end this line and start the next */
+ if( ch == '\n' )
+ {
+ *bp++ = '\0';
+ debug1(DEX, D, " finished line: %s", *(lp-1));
+
+ /* if no room in lines for next line, double its size */
+ if( lp == lines_top )
+ {
+ debug1(DEX, D, " realloc(lines, %d)", 2 * lines_length);
+ lines = realloc(lines, 2 * lines_length * sizeof(LINE *));
+ if( lines == NULL )
+ Error(45, 4, "run out of memory when reading index file %s",
+ FATAL, no_fpos, fname);
+ lp = &lines[lines_length];
+ lines_length = 2 * lines_length;
+ lines_top = &lines[lines_length];
+ }
+
+ *lp++ = bp;
+ }
+ else /* ordinary char with space available, so just add it */
+ {
+ *bp++ = ch;
+ }
+ }
+
+ *len = (lp - lines - 1);
+ debug1(DEX, D, "ReadLines returning (len = %d)", *len);
+ return lines;
+
+} /* end ReadLines */
+
+
+/*****************************************************************************/
+/* */
+/* WriteLines(FILE *fp, LINE *lines, int len) */
+/* */
+/* Write array of lines "lines", of length len, to file fp. */
+/* */
+/*****************************************************************************/
+
+void WriteLines(FILE *fp, LINE *lines, int len)
+{ int i;
+ for( i = 0; i < len; i++ )
+ { fputs(lines[i], fp);
+ fputs("\n", fp);
+ }
+}
+
+
+/*****************************************************************************/
+/* */
+/* Line comparison functions (for qsort) */
+/* */
+/* By Jeff Kingston and Valery Ushakov (uwe). */
+/* */
+/*****************************************************************************/
+
+static int pstrcmp(const void *a, const void *b) /* !UseCollate */
+{
+ return strcmp (*(char **)a, *(char **)b);
+}
+
+static int pstrcollcmp(const void *a, const void *b) /* UseCollate */
+{
+ return strcollcmp (*(char **)a, *(char**)b);
+}
+
+
+/*****************************************************************************/
+/* */
+/* void SortLines(LINE *lines, int lines_len) */
+/* */
+/* Sort the given lines. */
+/* */
+/*****************************************************************************/
+
+void SortLines(LINE *lines, int lines_len)
+{
+ qsort(lines, lines_len, sizeof(LINE), (UseCollate ? pstrcollcmp : pstrcmp));
+}
+
+
+/*****************************************************************************/
+/* */
+/* void SortFile(char *infile, char *outfile) */
+/* */
+/* Sort file infile, placing the result on file outfile. */
+/* */
+/*****************************************************************************/
+
+void SortFile(FULL_CHAR *infile, FULL_CHAR *outfile)
+{
+ LINE *lines;
+ int lines_len;
+ FILE *in_fp, *out_fp;
+ debug2(DEX, D, "SortFile(%s, %s)", infile, outfile);
+
+ /* open input file */
+ in_fp = fopen( (char *) infile, READ_BINARY);
+ if( in_fp == (FILE *) NULL )
+ Error(45, 5, "cannot open index file %s for reading",
+ FATAL, no_fpos, outfile);
+
+ /* open output file */
+ out_fp = fopen( (char *) outfile, WRITE_BINARY);
+ if( out_fp == (FILE *) NULL )
+ Error(45, 6, "cannot open index file %s for writing",
+ FATAL, no_fpos, outfile);
+
+ /* read lines, sort them, and write them out again sorted */
+ lines = ReadLines(in_fp, infile, (FULL_CHAR *) NULL, &lines_len);
+ SortLines(lines, lines_len);
+ fclose(in_fp);
+ WriteLines(out_fp, lines, lines_len);
+ fclose(out_fp);
+}