diff options
author | Martin Gruner <mg.pub@gmx.net> | 2005-10-06 09:23:26 +0000 |
---|---|---|
committer | Martin Gruner <mg.pub@gmx.net> | 2005-10-06 09:23:26 +0000 |
commit | 018c96b16ad1e8e594fc642db7ffaa37de41eeb9 (patch) | |
tree | 96632efa6ca1b9b977916b3463c810c015c6fdf0 /modules/mt-lxx-parallel | |
parent | 1243db74059bf964cb8d304f924fd84483677697 (diff) | |
download | sword-tools-018c96b16ad1e8e594fc642db7ffaa37de41eeb9.tar.gz |
updates to conversion software
git-svn-id: https://www.crosswire.org/svn/sword-tools/trunk@43 07627401-56e2-0310-80f4-f8cd0041bdcd
Diffstat (limited to 'modules/mt-lxx-parallel')
-rw-r--r-- | modules/mt-lxx-parallel/convert.pl | 149 | ||||
-rw-r--r-- | modules/mt-lxx-parallel/prepare_files.cpp | 415 | ||||
-rw-r--r-- | modules/mt-lxx-parallel/run.sh | 64 |
3 files changed, 149 insertions, 479 deletions
diff --git a/modules/mt-lxx-parallel/convert.pl b/modules/mt-lxx-parallel/convert.pl new file mode 100644 index 0000000..7858223 --- /dev/null +++ b/modules/mt-lxx-parallel/convert.pl @@ -0,0 +1,149 @@ +#!/usr/bin/perl -w + +# +# This tool is supposed to convert the ccat Parallel MT/LXX +# to a valid OSIS file. +# +# @author Martin Gruner +# @copyright GPL +# + +use strict; + +my $prefix = "parallel/"; + +# +# grabVerseContent - if the Verse can be found, returns its Content, otherwise nothing +# +sub grabVerseContent(){ #Bookname, chapter, verse, @list + my @result; + my $bookname = shift; my $chapter = shift; my $verse = shift; my @buffer = @_; + + my $index=0; +# unless ( grep(/^$bookname $chapter:$verse/, @buffer) ){ return; } #not found + LOOP: foreach my $current_item (@buffer){ + if ($current_item =~ m/^$bookname $chapter:$verse/){ + while ( not $buffer[++$index] =~ m/^\n|^\s*$/ ){ + push(@result, $buffer[$index] ); + } + return @result; + } + $index++; + } + return; +} #Nothing found, don't return a value. + +sub processBook(){ +#bookname, filename + my $bookname = shift; + my $filename = shift; + + open( FILE, "$prefix/$filename") or die("Could not open file $prefix/$filename"); + my @BUF = <FILE>; chomp(@BUF); close( FILE ); + + my @result; + + CHAPTER: foreach my $chapter(1..1000){ + print("Processing $bookname chapter $chapter.\n"); + my $verse_found; + VERSE: foreach my $verse(1..1000){ + my @verseContent = &grabVerseContent($bookname, $chapter, $verse, @BUF); + if (@verseContent) { + push(@result, "$bookname $chapter:$verse"); + push(@result, @verseContent); + $verse_found = 1; + } + else{ #verse nonexistent, goto next chapter + last VERSE; + } + } + if (not $verse_found){ #chapter empty, stop here + last CHAPTER; + } + } + return(@result); + print("done.\n"); +} + +sub processBookVariant(){ +#booknameA, filenameA, variantnameA, +#booknameB, filenameB, variantnameB, +#neutralBookName + my $booknameA = shift; + my $filenameA = shift; + my $variantNameA = shift; + my $booknameB = shift; + my $filenameB = shift; + my $variantNameB = shift; + my $neutralBookname = shift; +# print("Processing $booknameA $filenameA $booknameB $filenameB $neutralBookname... \n"); + + open( FILE, "$prefix/$filenameA") or die("Could not open file $prefix/$filenameA"); + my @BUFA = <FILE>; chomp(@BUFA); close( FILE ); + + open( FILE, "$prefix/$filenameB") or die("Could not open file $prefix/$filenameB"); + my @BUFB = <FILE>; chomp(@BUFB); close( FILE ); + + my @result; + + CHAPTER: foreach my $chapter(1..1000){ + print("Processing $booknameA and $booknameB chapter $chapter.\n"); + my $verse_found; + VERSE: foreach my $verse(1..1000){ + my @verseContentA = &grabVerseContent($booknameA, $chapter, $verse, @BUFA); + my @verseContentB = &grabVerseContent($booknameB, $chapter, $verse, @BUFB); + if (@verseContentA or @verseContentB) { + push(@result, "$neutralBookname $chapter:$verse"); + $verse_found = 1; + } + else{ #verse nonexistent, goto next chapter + last VERSE; + } + if (@verseContentA){ + if (@verseContentB){ push(@result, $variantNameA) }; + push(@result, @verseContentA); + if (@verseContentB){ push(@result, "") }; + } + if (@verseContentB){ + if (@verseContentA){ push(@result, $variantNameB) }; + push(@result, @verseContentB); + } + } + if (not $verse_found){ #chapter empty, stop here + last CHAPTER; + } + } + return(@result); + print("done.\n"); + +} + +sub fixDaniel(){ #@buffer + my @buffer = @_; + my @result; + my $index = 0; + foreach my $currentItem (@buffer){ + if ($buffer[$index] =~ m/^DANIHL/){}#Do not add this line to the result + else{ + if ($buffer[$index + 1] =~ m/^DANIHL/){ #Push both lines on one + push(@result, $buffer[$index] . $buffer[$index +1] ); + } + else{ + push(@result, $buffer[$index] ); #The normal case + } + } + $index++; + } + return @result; +} + +my @result; +#push(@result, &processBookVariant("JoshA", "07.JoshA.par", "Codex Alexandrinus:", "JoshB", "06.JoshB.par", "Codex Vaticanus:", "Josh") ); +#push(@result, &processBookVariant("JudgA", "09.JudgesA.par", "Codex Alexandrinus:", "JudgB", "08.JudgesB.par", "Codex Vaticanus:", "Judges") ); + +push(@result, &processBook("Isa", "40.Isaiah.par") ); + +#my @danielTmp = &processBookVariant("Dan", "45.DanielOG.par", "Old Greek:", "DanTh", "46.DanielTh.par", "Theodotion:", "Daniel"); +#push(@result, &fixDaniel( @danielTmp ) ); + +print( join("\n", @result) ); diff --git a/modules/mt-lxx-parallel/prepare_files.cpp b/modules/mt-lxx-parallel/prepare_files.cpp deleted file mode 100644 index 51780fa..0000000 --- a/modules/mt-lxx-parallel/prepare_files.cpp +++ /dev/null @@ -1,415 +0,0 @@ -/* - April/2005 - - Creates the composite files for needed to process the parallel MS/LXX text. - JoshA.par & JoshB.par >> Joshua_processed.par - DanielOG.par & DanielTh.par >> Daniel_processed.par -*/ - -#include <stdio.h> -#include <string.h> - -// The file paths are hardwired, change them according to their path's on your system. - -void readfile(FILE * fs, char * destination, bool bfix); -bool checkforchapter(const char * source, const char * name, int chapter); - -void processJoshua(); -bool processverseJoshua(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse); - -void processDaniel(); -bool processverseDaniel(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse); - -void processJudges(); -bool processverseJudges(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse); - - -void collectverses(const char * source, char * destination, const char * book, const char * title, int chapter, int verse); -int strfind(const char * source, const char * str, int pos); -void strcatrange(const char * source, char * destination, int start, int size); - -char bufa[500000], bufb[500000], dest[200000]; - -int main(int argc, char * argv[], char * envp[]) -{ - processJoshua(); - processDaniel(); - processJudges(); - - return 1; -} - -void processJoshua(){ - FILE * input, * output; - int chapter, verse; - - // Load both Joshua files into memory, and create the output file. - // Note that JoshB is stored in bufa, and JoshA is stored in bufb. - // JoshB is the more complete file. - - input = fopen("06.JoshB.par", "r"); - if (!input) { - printf("Unable to open 06.JoshB.par\n"); - return; - } - readfile(input, bufa, false); - fclose(input); - - input = fopen("07.JoshA.par", "r"); - if (!input) { - printf("Unable to open 06.JoshA.par\n"); - return; - } - readfile(input, bufb, false); - fclose(input); - - output = fopen("Joshua_processed.par", "w+"); - if (!output){ - printf("Unable to open/create Joshua_processed.par\n"); - return ; - } - - chapter = 1; - verse = 1; - - // Break only when no entries for the current chapter can be found in either file. - while ( checkforchapter(bufa, "JoshB", chapter) || checkforchapter(bufb, "JoshA", chapter) ){ - - while (verse < 200){ - // It is possible that both files may fail to include the current verse, - // to avoid premature termination of the process try to find 1-200. - dest[0] = '\0'; - - if ( processverseJoshua(bufb, bufa, dest, chapter, verse) ){ - fputs(dest, output); - printf("%i:%i\n", chapter, verse); - } - - verse ++; - } - chapter ++; - verse = 1; - } - - fclose(output); - printf("\nFinished Joshua\n"); -} - -void processDaniel(){ - FILE * input, * output; - int chapter, verse; - - // Same process as above for Joshua. - input = fopen("45.DanielOG.par", "r"); - if (!input) { - printf("Unable to open 45.DanielOG.par\n"); - return; - } - readfile(input, bufa, true); - fclose(input); - - input = fopen("46.DanielTh.par", "r"); - if (!input){ - printf("Unable to open 46.DanielTh.par\n"); - return; - } - readfile(input, bufb, false); - fclose(input); - - output = fopen("Daniel_processed.par", "w+"); - if (!output){ - printf("Unable to open/create Daniel_processed.par\n"); - return; - } - - chapter = 1; - verse = 1; - - while ( checkforchapter(bufa, "Dan", chapter) || checkforchapter(bufb, "DanTh", chapter) ){ - - while (verse < 200){ - dest[0] = '\0'; - - if ( processverseDaniel(bufa, bufb, dest, chapter, verse) ){ - fputs(dest, output); - printf("%i:%i\n", chapter, verse); - } - verse ++; - } - chapter ++; - verse = 1; - } - - fclose(output); - printf("\nFinished Daniel\n"); -} - -void processJudges(){ - FILE * input, * output; - int chapter, verse; - - input = fopen("08.JudgesB.par", "r"); - - if (!input){ - printf("Unable to open 08.JudgesB.par\n"); - return; - } - readfile(input, bufa, false); - fclose(input); - - input = fopen("09.JudgesA.par", "r"); - if (!input){ - printf("Unable to open 09.JudgesA.par\n"); - return; - } - readfile(input, bufb, false); - fclose(input); - - output = fopen("Judges_processed.par", "w+"); - - if (!output){ - printf("Unable to open/create Judges_processed.par\n"); - return; - } - - chapter = 1; - verse = 1; - - // Break only when no entries for the current chapter can be found in either file. - while (checkforchapter(bufa, "JudgB", chapter) == 1 || checkforchapter(bufb, "JudgA", chapter)){ - - while (verse < 200){ - // It is possible that both files may fail to include the current verse, - // to avoid premature termination of the process try to find 1-200. - dest[0] = '\0'; - - if (processverseJudges(bufb, bufa, dest, chapter, verse)){ - fputs(dest, output); - printf("%i:%i\n", chapter, verse); - } - verse ++; - } - chapter ++; - verse = 1; - } - - fclose(output); - printf("\nFinished Judges\n"); -} - - -void readfile(FILE * fs, char * destination, bool bfix) -{ - // Read a source file completely into memory. - char * pos, buf[1024]; - - while (fgets(buf, 1024, fs)){ - // Fix for verse 3:56 in DanielOG.par. - if (bfix){ - pos = strstr(buf, "Dan 3:56"); - if (pos){ - pos[6] = '2'; - pos[7] = '3'; - bfix = false; - } - } - - // Fix for cases of DANIHL which should have been on the preceeding line. - if (strstr(buf, "DANIHL") == buf) - destination[strlen(destination) - 1] = '\0'; - - // Fix for Linux, input files have have Windows \r\n. - // On Linux output files will not have '\r'. - pos = strchr(buf, '\r'); - if (pos){ - pos[0] = '\n'; - pos[1] = '\0'; - } - strcat(destination, buf); - } -} - -bool checkforchapter(const char * source, const char * name, int chapter) -{ - // Check to see if this chapter id can be found in the buffer. - char title[32]; - sprintf(title, "%s %i:", name, chapter); - - if (strstr(source, title) != NULL) - return true; - else - return false; -} - -bool processverseJoshua(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse) -{ - char title[32], *posa, *posb; - - // Check each buffer to see if it contains the current verse. - sprintf(title, "JoshA %i:%i\n", chapter, verse); - posa = strstr(sourcea, title); - - sprintf(title, "JoshB %i:%i\n", chapter, verse); - posb = strstr(sourceb, title); - - if (!posa && !posb) - return false; - - sprintf(destination, "Josh %i:%i", chapter, verse); - - if (posa) - { - // If both files contain the verse identify which - // file it came from. - if (posb) - strcat(destination, "\nCodex Alexandrinus:"); - - sprintf(title, "JoshA %i:%i\n", chapter, verse); - - // The file may contain multiple entries for the verse, - // collectverses will grab all entries. - collectverses(sourcea, destination, "JoshA", title, chapter, verse); - } - - if (posb) - { - // As above. - if (posa) - strcat(destination, "\nCodex Vaticanus:"); - - sprintf(title, "JoshB %i:%i\n", chapter, verse); - collectverses(sourceb, destination, "JoshB", title, chapter, verse); - } - - strcat(destination, "\n"); - return true; -} - -bool processverseDaniel(const char *sourcea, const char *sourceb, char *destination, int chapter, int verse) -{ - char title[32], *posa, *posb; - - // Same as processverseJoshua except for book name, and file identifiers. - sprintf(title, "Dan %i:%i\n", chapter, verse); - posa = strstr(sourcea, title); - - sprintf(title, "DanTh %i:%i\n", chapter, verse); - posb = strstr(sourceb, title); - - if (!posa && !posb) - return false; - - sprintf(destination, "Dan %i:%i", chapter, verse); - - if (posa) - { - if (posb) - strcat(destination, "\nSeptuagint:"); - - sprintf(title, "Dan %i:%i\n", chapter, verse); - collectverses(sourcea, destination, "Dan ", title, chapter, verse); - } - - if (posb) - { - if (posa) - strcat(destination, "\nTheodotion:"); - - sprintf(title, "DanTh %i:%i\n", chapter, verse); - collectverses(sourceb, destination, "DanTh ", title, chapter, verse); - } - - strcat(destination, "\n"); - return true; -} - -bool processverseJudges(const char *sourcea, const char * sourceb, char *destination, int chapter, int verse) -{ - char title[32], *posa, *posb; - - // Check each buffer to see if it contains the current verse. - sprintf(title, "JudgA %i:%i\n", chapter, verse); - posa = strstr(sourcea, title); - - sprintf(title, "JudgB %i:%i\n", chapter, verse); - posb = strstr(sourceb, title); - - if (!posa && !posb) - return false; - - sprintf(destination, "Judg %i:%i", chapter, verse); - - if (posa){ - // If both files contain the verse identify which - // file it came from. - if (posb) - strcat(destination, "\nCodex Alexandrinus:"); - - sprintf(title, "JudgA %i:%i\n", chapter, verse); - - // The file may contain multiple entries for the verse, - // collectverses will grab all entries. - collectverses(sourcea, destination, "JudgA", title, chapter, verse); - } - - if (posb){ - // As above. - if (posa) - strcat(destination, "\nCodex Vaticanus:"); - - sprintf(title, "JudgB %i:%i\n", chapter, verse); - collectverses(sourceb, destination, "JudgB", title, chapter, verse); - } - - strcat(destination, "\n"); - return true; -} - -void collectverses(const char * source, char * destination, const char * book, const char * title, int chapter, int verse) -{ - // Find all verses that have the specified id in source, - // and add them to destination. - const char * pos = source; - int end, hits = 0; - - while (pos = strstr(pos, title)){ - pos += strlen(title); - end = strfind(pos, book, 0); - - if (end == -1) - end = strlen(pos); - - // Insert a new line only if it is the first entry found. - if (!hits) - strcat(destination, "\n"); - - strcatrange(pos, destination, 0, end); - - end = strlen(destination); - - if (destination[end -1] == '\n') - destination[end -1] = '\0'; - - hits ++; - } -} - - -int strfind(const char * source, const char * str, int pos) -{ - // Get the index position of from strstr instead of a memory pointer. - const char * psz = strstr(source + pos, str); - - if (!psz) - return -1; - - return psz - source; -} - -void strcatrange(const char * source, char * destination, int start, int size) -{ - // Copy a specified range from source to destination, and terminate. - int len = strlen(destination); - memcpy(&destination[len], &source[start], size); - destination[len + size] = '\0'; -} diff --git a/modules/mt-lxx-parallel/run.sh b/modules/mt-lxx-parallel/run.sh deleted file mode 100644 index c4e5ae4..0000000 --- a/modules/mt-lxx-parallel/run.sh +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/bash - -ORIG_FILES="\ -01.Genesis.par \ -02.Exodus.par \ -03.Lev.par \ -04.Num.par \ -05.Deut.par \ -06.JoshB.par \ -07.JoshA.par \ -08.JudgesB.par \ -09.JudgesA.par \ -10.Ruth.par \ -11.1Sam.par \ -12.2Sam.par \ -13.1Kings.par \ -14.2Kings.par \ -15.1Chron.par \ -16.2Chron.par \ -17.1Esdras.par \ -18.Esther.par \ -18.Ezra.par \ -19.Neh.par \ -20.Psalms.par \ -22.Ps151.par \ -23.Prov.par \ -24.Qoh.par \ -25.Cant.par \ -26.Job.par \ -27.Sirach.par \ -28.Hosea.par \ -29.Micah.par \ -30.Amos.par \ -31.Joel.par \ -32.Jonah.par \ -33.Obadiah.par \ -34.Nahum.par \ -35.Hab.par \ -36.Zeph.par \ -37.Haggai.par \ -38.Zech.par \ -39.Malachi.par \ -40.Isaiah.par \ -41.Jer.par \ -42.Baruch.par \ -43.Lam.par \ -44.Ezekiel.par \ -45.DanielOG.par \ -46.DanielTh.par" - -TEMP_DIR="tmp/" - -mkdir $TEMP_DIR; -rm $TEMP_DIR/*; -cp $ORIG_FILES $TEMP_DIR; - -g++ prepare_files.cpp -o $TEMP_DIR/prepare_files; - -#THIS WILL CREATE Joshua_processed.par and Daniel_processed.par -cd $TEMP_DIR; -prepare_files; -#These are not needed in TEMP_DIR any more -rm "06.JoshB.par" "07.JoshA.par" "08.JudgesB.par" "09.JudgesA.par" "45.DanielOG.par" "46.DanielTh.par" "prepare_files" - |