summaryrefslogblamecommitdiffstats
path: root/thml2osis/xslt/thml2osis.xslt
blob: 0ba7fb07887f411267a3a5f5c5580b930f372bf1 (plain) (tree)






























































































































































































































                                                                                                                                        





                                               

              
                                                                                                      
















                                        
                                                       













































































































































































































                                                                                                                                  








                                                                        

         




























                                                                             

                                                                
                                                                  







                                                                              
                 











                                                                            
                 
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version = '1.0'
		xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
		xmlns='http://www.bibletechnologies.net/2003/OSIS/namespace'>

  <xsl:output method="xml"
              encoding="UTF-8"
              indent="yes"/>

  <!-- GLOBAL CONSTANTS -->
  <xsl:variable name="workID" select="/ThML/ThML.head/electronicEdInfo/bookID" />

  <!-- xml:lang: -->
  <xsl:variable name="lang">
    <xsl:choose>
      <xsl:when test="/ThML/ThML.head//DC.Language[@scheme='ISO639-1']" >
	<xsl:value-of select="/ThML/ThML.head//DC.Language[@scheme='ISO639-1']" />
      </xsl:when>
      <xsl:when test="/ThML/ThML.head//DC.Language[not(@scheme)]" >
	<xsl:value-of select="/ThML/ThML.head//DC.Language[not(@scheme)]" />
      </xsl:when>
      <xsl:otherwise>
	<xsl:value-of select="$error-no-language-defined-for-document" />
      </xsl:otherwise>
    </xsl:choose>
  </xsl:variable>

  <!-- FUNCTIONS -->
  <xsl:template name="ISO8601toOSISdate">
    <xsl:param name="date" />
    <xsl:value-of select="concat(
     substring($date, 1, 4), '.', substring($date, 6, 2), '.',
     substring($date, 9, 2))" />
  </xsl:template>

  <!-- Macro for the common case of changing one element into another -->
  <xsl:template name="changename">
    <xsl:param name="name" />
    <xsl:param name="excludeattributes" />
    <xsl:element name="{$name}" >
      <xsl:call-template name="copyattributes">
	<xsl:with-param name="exclude"><xsl:value-of select="$excludeattributes" /></xsl:with-param>
      </xsl:call-template>
      <xsl:apply-templates />
    </xsl:element>
  </xsl:template>

  <!-- Copy the attributes of the current element -->
  <xsl:template name="copyattributes">
    <xsl:param name="exclude" /> <!-- space separated list of attributes that should be excluded -->
    <xsl:for-each select="attribute::*[not(contains(concat(' ',$exclude,' '), concat(' ',name(),' ')))]">
      <xsl:attribute name="{name()}" ><xsl:value-of select="." /></xsl:attribute>
    </xsl:for-each>
  </xsl:template>


  <!-- TEMPLATES -->
  <xsl:template match="ThML">
    <osis xmlns='http://www.bibletechnologies.net/2003/OSIS/namespace' 
	  xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' 
	  xsi:schemaLocation='http://www.bibletechnologies.net/2003/OSIS/namespace http://www.bibletechnologies.net/osisCore.2.1.1.xsd'>
      <osisText osisRefWork="bible" canonical="true">
	<xsl:attribute name="osisIDWork">
	  <xsl:value-of select="$workID" />
	</xsl:attribute>
	<xsl:attribute name="xml:lang">
	  <xsl:value-of select="$lang" />
	</xsl:attribute>
	<xsl:apply-templates />
      </osisText>
    </osis>
  </xsl:template>

  <xsl:template match="ThML.head">
    <header>
      <!-- Can't parse revisionHistory, so create a single revisionDesc -->
      <xsl:if test="./electronicEdInfo//DC.Date[@sub='Created'] and ./electronicEdInfo/revisionHistory">
	<revisionDesc>
	  <date>
	    <xsl:call-template name="ISO8601toOSISdate"> 
              <xsl:with-param name="date" select="./electronicEdInfo//DC.Date[@sub='Created']"/> 
	    </xsl:call-template> 
	  </date>
	  <p>
	    <xsl:value-of select="./electronicEdInfo/revisionHistory" />
	  </p>
	</revisionDesc>
      </xsl:if>
      <!-- We assume a single work -->
      <work>
	<xsl:attribute name="osisWork">
	  <xsl:value-of select="$workID" />
	</xsl:attribute>
	<!-- the OSIS schema defines these as a sequence, so the exact
	     order must be maintained -->
	<!-- TODO: implement all the ones that are currently commented out, if possible -->
	<xsl:call-template name="header-title" /> 
<!--	<xsl:call-template name="header-contributor" /> -->
	<xsl:call-template name="header-creator" />
<!--	<xsl:call-template name="header-subject" /> -->
	<xsl:call-template name="header-date" />
	<xsl:call-template name="header-description" />
	<xsl:call-template name="header-publisher" />
<!--	<xsl:call-template name="header-type" /> -->
	<xsl:call-template name="header-format" />
<!--	<xsl:call-template name="header-identifier" />
	<xsl:call-template name="header-source" /> -->
	<xsl:call-template name="header-language" />
<!--	<xsl:call-template name="header-relation" />
	<xsl:call-template name="header-coverage" /> -->
	<xsl:call-template name="header-rights" />
<!--	<xsl:call-template name="header-scope" />
	<xsl:call-template name="header-castList" />
	<xsl:call-template name="header-teiHeader" />
	<xsl:call-template name="header-refSystem" /> -->
      </work>
    </header>
  </xsl:template>


  <xsl:template name="header-format">
    <format type="x-MIME">text/xml</format>
  </xsl:template>

  <xsl:template name="header-title">
    <!-- TODO - this allows for duplicate <title> elements in some cases -->
    <title>
      <xsl:value-of select="//DC.Title[@sub='Main']|DC.Title[not(@sub)]"/>
    </title>
  </xsl:template>

  <!-- Creator -->
  <xsl:template name="header-creator">
    <xsl:apply-templates select="//DC.Creator" />
  </xsl:template>

  <xsl:template match="DC.Creator[@sub='Author' and @scheme='short-form']">
    <creator role='aut'>
      <xsl:value-of select="." />
    </creator>
  </xsl:template>

  <xsl:template match="DC.Creator[@sub='Translator' and @scheme='short-form']">
    <creator role='trl'>
      <xsl:value-of select="." />
    </creator>
  </xsl:template>

  <!-- Publisher -->
  <xsl:template name="header-publisher">
    <xsl:apply-templates select="//DC.Publisher" />
  </xsl:template>

  <xsl:template match="DC.Publisher[not(@sub) and not(@scheme)]">
    <publisher>
      <xsl:value-of select="." />
    </publisher>
  </xsl:template>

  <!-- Date -->
  <xsl:template name="header-date">
    <xsl:apply-templates select="//generalInfo/firstPublished" />
    <xsl:apply-templates select="//DC.Date" />
  </xsl:template>

  <xsl:template match="DC.Date[@sub='Created' and @scheme='ISO8601']">
    <date event="eversion" type="ISO">
      <xsl:call-template name="ISO8601toOSISdate">
	<xsl:with-param name="date" select="." />
      </xsl:call-template>
    </date>
  </xsl:template>

  <xsl:template match="generalInfo/firstPublished">
    <date event="edition" type="ISO">
      <xsl:value-of select="." />
    </date>
  </xsl:template>

  <!-- Language -->
   <xsl:template name="header-language">
    <xsl:apply-templates select="//DC.Language" />
  </xsl:template> 

  <xsl:template match="DC.Language[@scheme='ISO639-1']">
    <language type="ISO-639-1">
      <xsl:value-of select="." />
    </language>
  </xsl:template>

  <!-- description -->
   <xsl:template name="header-description">
    <xsl:apply-templates select="//generalInfo/description" />
  </xsl:template> 

  <xsl:template match="generalInfo/description">
    <description>
      <xsl:value-of select="." />
    </description>
  </xsl:template>

  <!-- rights -->
  <xsl:template name="header-rights">
    <xsl:apply-templates select="//DC.Rights" />
  </xsl:template>

  <xsl:template match="DC.Rights">
    <rights>
      <xsl:value-of select="." />
    </rights>
  </xsl:template>

  <!-- catch all for headers -->
  <xsl:template match="//ThML.head//text()" />

  <!-- TODO:

  <titlePage> if possible

  -->


  <!-- BODY -->
  <!-- comments -->
  <xsl:template match="//ThML.body//comment()">
    <xsl:comment>
      <xsl:value-of select = "." />
    </xsl:comment>
  </xsl:template>

  <!-- div -->
  <xsl:template match="//ThML.body//div1 | //ThML.body//div2 | //ThML.body//div3 | //ThML.body//div4">
    <div>
      <xsl:apply-templates />
    </div>
  </xsl:template>

  <xsl:template match="//ThML.body//td">
    <cell>
      <xsl:apply-templates />
    </cell>
  </xsl:template>

  <xsl:template match="//ThML.body//tr">
    <row>
      <xsl:apply-templates />
    </row>
  </xsl:template>

  <xsl:template match="//ThML.body//i|//ThML.body//em">
    <hi type="italic">
      <xsl:apply-templates />
    </hi>
  </xsl:template>

  <xsl:template match="//ThML.body//b">
    <hi type="bold">
      <xsl:apply-templates />
    </hi>
  </xsl:template>

  <xsl:template match="//ThML.body//scripCom">
    <div type="section" annotateType="commentary">
      <xsl:attribute name="annotateRef">
	<xsl:value-of select="@osisRef" />
      </xsl:attribute>
      <xsl:attribute name="osisID">
	<xsl:if test="starts-with(@osisRef,'Bible:')">
	  <xsl:value-of select="substring(@osisRef,7,100)" />
	</xsl:if>
	<xsl:if test="not(starts-with(@osisRef,'Bible:'))">
	  <xsl:value-of select="@osisRef" />
	</xsl:if>
      </xsl:attribute>
      <xsl:apply-templates />
    </div>
  </xsl:template>

  <!-- scripRef -->

  <!-- OSIS does not allow <reference> inside <a>,
       so we switch the order -->
  <xsl:template match="//ThML.body//a[scripRef]">
    <reference>
      <xsl:attribute name="osisRef">
	<xsl:value-of select="scripRef/@osisRef" />
      </xsl:attribute>
      <a>
	<xsl:attribute name="href">
	  <xsl:value-of select="@href" />
	</xsl:attribute>
	<xsl:apply-templates select="./child::*//text()"  /> <!-- skip the <scripRef> node which is immediate child of <a> -->
      </a>
    </reference>
  </xsl:template>

  <!-- scripRef -->
  <xsl:template match="//ThML.body//scripRef">
    <reference>
      <xsl:attribute name="osisRef">
	<xsl:value-of select="@osisRef" />
      </xsl:attribute>
      <xsl:apply-templates select=".//*[name()!='scripRef'] | text()"/>
    </reference>
    <!-- NB: some ThML has scripRef nested within scripRef,
         which doesn't make sense, so we sort it out here -->
    <xsl:apply-templates select=".//scripRef" />
  </xsl:template>

  <!-- span lang="*" -->
  <xsl:template match="//ThML.body//span[@lang]" >
    <foreign>
      <xsl:attribute name="xml:lang">
	<xsl:value-of select="@lang" />
      </xsl:attribute>
      <xsl:apply-templates />
    </foreign>
  </xsl:template>

  <!-- other spans - remove -->
  <xsl:template match="//ThML.body//span[not(@lang)]" >
    <xsl:apply-templates />
  </xsl:template>


  <!-- table -->
  <xsl:template match="//ThML.body//table">
    <xsl:call-template name="changename">
      <xsl:with-param name="name">table</xsl:with-param>
      <xsl:with-param name="excludeattributes">border id</xsl:with-param>
    </xsl:call-template>
  </xsl:template>

  <!-- h1 - h6 -->
  <xsl:template match="//ThML.body//h1 | //ThML.body//h2 | //ThML.body//h3 | //ThML.body//h4 | //ThML.body//h5 | //ThML.body//h6">
    <title>
      <xsl:attribute name="level">
	<xsl:value-of select="substring(name(),2,1)" />
      </xsl:attribute>
      <xsl:apply-templates />
    </title>
  </xsl:template>

  <!-- br -->
  <xsl:template match="//ThML.body//br">
    <lb/>
  </xsl:template>

  <!-- pb -->
  <xsl:template match="//ThML.body//pb">
    <milestone type="pb">
      <xsl:call-template name="copyattributes">
        <xsl:with-param name="exclude">href id</xsl:with-param>
      </xsl:call-template>
    </milestone>
  </xsl:template>

  <!-- img -->
  <xsl:template match="//ThML.body//img">
    <xsl:call-template name="changename">
      <xsl:with-param name="name">figure</xsl:with-param>
    </xsl:call-template>
  </xsl:template>

  <!-- ul -->
  <xsl:template match="//ThML.body//ul | //ThML.body//ol">
    <!-- TODO - is there any markup for distinguishing ol and ul? -->
    <xsl:call-template name="changename">
      <xsl:with-param name="name">list</xsl:with-param>
      <xsl:with-param name="excludeattributes">class</xsl:with-param>
    </xsl:call-template>
  </xsl:template>

  <!-- img -->
  <xsl:template match="//ThML.body//li">
    <xsl:call-template name="changename">
      <xsl:with-param name="name">item</xsl:with-param>
      <xsl:with-param name="excludeattributes">class</xsl:with-param>
    </xsl:call-template>
  </xsl:template>


  <!-- a -->
  <!-- NB see <scripRef> above -->
  <xsl:template match="//ThML.body//a[not(scripRef)]">
    <a>
      <xsl:call-template name="copyattributes">
	<xsl:with-param name="exclude">class</xsl:with-param>
      </xsl:call-template>
      <!-- OSIS only allows <index> and text inside <a> -->
      <xsl:apply-templates select=".//index | .//text()"/>
    </a>
  </xsl:template>

  <!-- index -->
  <xsl:template match="//ThML.body//index">
    <index>
      <xsl:attribute name="index"><xsl:value-of select="@type" /></xsl:attribute>
      <xsl:if test="@subject1">
	<xsl:attribute name="level1"><xsl:value-of select="@subject1" /></xsl:attribute>
      </xsl:if>
      <xsl:if test="@subject2">
	<xsl:attribute name="level2"><xsl:value-of select="@subject2" /></xsl:attribute>
      </xsl:if>
      <xsl:if test="@subject3">
	<xsl:attribute name="level3"><xsl:value-of select="@subject3" /></xsl:attribute>
      </xsl:if>
      <xsl:if test="@subject4">
	<xsl:attribute name="level4"><xsl:value-of select="@subject4" /></xsl:attribute>
      </xsl:if>
      <!-- ThML's 'title' attribute appears to be equivalent to the last
          'levelX' attribute used in OSIS.  The following code will be adequate
	  unless there is a subject4 attribute -->
      <xsl:if test="not(@subject1)">
	<xsl:attribute name="level1"><xsl:value-of select="@title" /></xsl:attribute>
      </xsl:if>

      <xsl:if test="@subject1 and not(@subject2)">
	<xsl:attribute name="level2"><xsl:value-of select="@title" /></xsl:attribute>
      </xsl:if>

      <xsl:if test="@subject1 and @subject2 and not(@subject3)">
	<xsl:attribute name="level3"><xsl:value-of select="@title" /></xsl:attribute>
      </xsl:if>

      <xsl:if test="@subject1 and @subject2 and @subject3 and not(@subject4)">
	<xsl:attribute name="level4"><xsl:value-of select="@title" /></xsl:attribute>
      </xsl:if>

      <xsl:apply-templates />
    </index>
  </xsl:template>


  <!-- elements to strip, but really we want to find decent
       OSIS markup for them - TODO -->
  <!-- added insertIndex unclear -->
  <xsl:template match="//ThML.body//added |  //ThML.body//unclear | //ThML.body//insertIndex">
    <xsl:apply-templates />
  </xsl:template>

  <!-- TODO <cite> should really be translated as <reference>, but that
  requires creating a <work osisWork="someUniqueName"> element in the header.
  That could possibly be done in second phase, but knowing how to parse
  the contents of the <cite> element is harder. -->
  <xsl:template match="//ThML.body//cite">
    <xsl:apply-templates />
  </xsl:template>

  <xsl:template match="//ThML.body//deleted" >
    <!-- TODO: handle this better? -->
    <note>Original text contained:
      <xsl:apply-templates />
    </note>
  </xsl:template>

  <!-- verse -->
  <xsl:template match="//ThML.body//verse">
    <xsl:call-template name="changename">
      <xsl:with-param name="name">lg</xsl:with-param>
      <xsl:with-param name="excludeattributes">class id</xsl:with-param>
    </xsl:call-template>
  </xsl:template>


<!-- TODO

- date - conversion

-->
  

  <!-- elements to strip -->
  <xsl:template match="//ThML.body//sup">
    <xsl:apply-templates />
  </xsl:template>

  <!-- a without href -->
  <!-- (this is really just to correct some ThML errors -->
  <xsl:template match="//ThML.body//a[not(@href)]" />

  <!-- The rest.  Many elements are common between ThML and OSIS -->
  <!-- TODO: check that the above list of exceptions and filters
       are in fact complete -->

  <!-- Common elements between ThML and OSIS that are currently covered here:
       a
       abbr
       caption
       foreign
       note
       P
       q
       table
 -->

  <!-- if we don't want to copy empty elements use this one: -->
  <xsl:template match="//ThML.body//p">
    <!-- NB: we avoid using <xsl:copy> due to namespace issues -->
    <xsl:if test="*|text()"> <!-- don't copy empty elements -->
      <xsl:element name="{name()}">
	<xsl:call-template name="copyattributes">
	  <xsl:with-param name="exclude">id class place style</xsl:with-param>
	</xsl:call-template>
	<xsl:apply-templates />
      </xsl:element>
    </xsl:if>
  </xsl:template>


  <xsl:template match="//ThML.body//*">
    <!-- NB: we avoid using <xsl:copy> due to namespace issues -->
    <xsl:element name="{name()}">
      <xsl:call-template name="copyattributes">
	<xsl:with-param name="exclude">id class place style</xsl:with-param>
      </xsl:call-template>
	<xsl:apply-templates />
    </xsl:element>
  </xsl:template>

</xsl:stylesheet>