aboutsummaryrefslogtreecommitdiffstats
path: root/src/epy_reader/tools/KindleUnpack/mobi_ncx.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/epy_reader/tools/KindleUnpack/mobi_ncx.py')
-rw-r--r--src/epy_reader/tools/KindleUnpack/mobi_ncx.py275
1 files changed, 275 insertions, 0 deletions
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_ncx.py b/src/epy_reader/tools/KindleUnpack/mobi_ncx.py
new file mode 100644
index 0000000..60ef9a0
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_ncx.py
@@ -0,0 +1,275 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import os
+from .unipath import pathof
+from .compatibility_utils import unescapeit
+
+
+import re
+# note: re requites the pattern to be the exact same type as the data to be searched in python3
+# but u"" is not allowed for the pattern itself only b""
+
+from xml.sax.saxutils import escape as xmlescape
+
+from .mobi_utils import toBase32
+from .mobi_index import MobiIndex
+
+DEBUG_NCX = False
+
+class ncxExtract:
+
+ def __init__(self, mh, files):
+ self.mh = mh
+ self.sect = self.mh.sect
+ self.files = files
+ self.isNCX = False
+ self.mi = MobiIndex(self.sect)
+ self.ncxidx = self.mh.ncxidx
+ self.indx_data = None
+
+ def parseNCX(self):
+ indx_data = []
+ tag_fieldname_map = {
+ 1: ['pos',0],
+ 2: ['len',0],
+ 3: ['noffs',0],
+ 4: ['hlvl',0],
+ 5: ['koffs',0],
+ 6: ['pos_fid',0],
+ 21: ['parent',0],
+ 22: ['child1',0],
+ 23: ['childn',0]
+ }
+ if self.ncxidx != 0xffffffff:
+ outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx, "NCX")
+ if DEBUG_NCX:
+ print(ctoc_text)
+ print(outtbl)
+ num = 0
+ for [text, tagMap] in outtbl:
+ tmp = {
+ 'name': text.decode('utf-8'),
+ 'pos': -1,
+ 'len': 0,
+ 'noffs': -1,
+ 'text' : "Unknown Text",
+ 'hlvl' : -1,
+ 'kind' : "Unknown Kind",
+ 'pos_fid' : None,
+ 'parent' : -1,
+ 'child1' : -1,
+ 'childn' : -1,
+ 'num' : num
+ }
+ for tag in tag_fieldname_map:
+ [fieldname, i] = tag_fieldname_map[tag]
+ if tag in tagMap:
+ fieldvalue = tagMap[tag][i]
+ if tag == 6:
+ pos_fid = toBase32(fieldvalue,4).decode('utf-8')
+ fieldvalue2 = tagMap[tag][i+1]
+ pos_off = toBase32(fieldvalue2,10).decode('utf-8')
+ fieldvalue = 'kindle:pos:fid:%s:off:%s' % (pos_fid, pos_off)
+ tmp[fieldname] = fieldvalue
+ if tag == 3:
+ toctext = ctoc_text.get(fieldvalue, 'Unknown Text')
+ toctext = toctext.decode(self.mh.codec)
+ tmp['text'] = toctext
+ if tag == 5:
+ kindtext = ctoc_text.get(fieldvalue, 'Unknown Kind')
+ kindtext = kindtext.decode(self.mh.codec)
+ tmp['kind'] = kindtext
+ indx_data.append(tmp)
+ if DEBUG_NCX:
+ print("record number: ", num)
+ print("name: ", tmp['name'],)
+ print("position", tmp['pos']," length: ", tmp['len'])
+ print("text: ", tmp['text'])
+ print("kind: ", tmp['kind'])
+ print("heading level: ", tmp['hlvl'])
+ print("parent:", tmp['parent'])
+ print("first child: ",tmp['child1']," last child: ", tmp['childn'])
+ print("pos_fid is ", tmp['pos_fid'])
+ print("\n\n")
+ num += 1
+ self.indx_data = indx_data
+ return indx_data
+
+ def buildNCX(self, htmlfile, title, ident, lang):
+ indx_data = self.indx_data
+
+ ncx_header = \
+'''<?xml version='1.0' encoding='utf-8'?>
+<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%s">
+<head>
+<meta content="%s" name="dtb:uid"/>
+<meta content="%d" name="dtb:depth"/>
+<meta content="mobiunpack.py" name="dtb:generator"/>
+<meta content="0" name="dtb:totalPageCount"/>
+<meta content="0" name="dtb:maxPageNumber"/>
+</head>
+<docTitle>
+<text>%s</text>
+</docTitle>
+<navMap>
+'''
+
+ ncx_footer = \
+''' </navMap>
+</ncx>
+'''
+
+ ncx_entry = \
+'''<navPoint id="%s" playOrder="%d">
+<navLabel>
+<text>%s</text>
+</navLabel>
+<content src="%s"/>'''
+
+ # recursive part
+ def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
+ if start>len(indx_data) or end>len(indx_data):
+ print("Warning: missing INDX child entries", start, end, len(indx_data))
+ return ''
+ if DEBUG_NCX:
+ print("recursINDX lvl %d from %d to %d" % (lvl, start, end))
+ xml = ''
+ if start <= 0:
+ start = 0
+ if end <= 0:
+ end = len(indx_data)
+ if lvl > max_lvl:
+ max_lvl = lvl
+ indent = ' ' * (2 + lvl)
+
+ for i in range(start, end):
+ e = indx_data[i]
+ if not e['hlvl'] == lvl:
+ continue
+ # open entry
+ num += 1
+ link = '%s#filepos%d' % (htmlfile, e['pos'])
+ tagid = 'np_%d' % num
+ entry = ncx_entry % (tagid, num, xmlescape(unescapeit(e['text'])), link)
+ entry = re.sub(re.compile('^', re.M), indent, entry, 0)
+ xml += entry + '\n'
+ # recurs
+ if e['child1']>=0:
+ xmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
+ e['child1'], e['childn'] + 1)
+ xml += xmlrec
+ # close entry
+ xml += indent + '</navPoint>\n'
+ return xml, max_lvl, num
+
+ body, max_lvl, num = recursINDX()
+ header = ncx_header % (lang, ident, max_lvl + 1, title)
+ ncx = header + body + ncx_footer
+ if not len(indx_data) == num:
+ print("Warning: different number of entries in NCX", len(indx_data), num)
+ return ncx
+
+ def writeNCX(self, metadata):
+ # build the xml
+ self.isNCX = True
+ print("Write ncx")
+ # htmlname = os.path.basename(self.files.outbase)
+ # htmlname += '.html'
+ htmlname = 'book.html'
+ xml = self.buildNCX(htmlname, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
+ # write the ncx file
+ # ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx')
+ ncxname = os.path.join(self.files.mobi7dir, 'toc.ncx')
+ with open(pathof(ncxname), 'wb') as f:
+ f.write(xml.encode('utf-8'))
+
+ def buildK8NCX(self, indx_data, title, ident, lang):
+ ncx_header = \
+'''<?xml version='1.0' encoding='utf-8'?>
+<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%s">
+<head>
+<meta content="%s" name="dtb:uid"/>
+<meta content="%d" name="dtb:depth"/>
+<meta content="mobiunpack.py" name="dtb:generator"/>
+<meta content="0" name="dtb:totalPageCount"/>
+<meta content="0" name="dtb:maxPageNumber"/>
+</head>
+<docTitle>
+<text>%s</text>
+</docTitle>
+<navMap>
+'''
+
+ ncx_footer = \
+''' </navMap>
+</ncx>
+'''
+
+ ncx_entry = \
+'''<navPoint id="%s" playOrder="%d">
+<navLabel>
+<text>%s</text>
+</navLabel>
+<content src="%s"/>'''
+
+ # recursive part
+ def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
+ if start>len(indx_data) or end>len(indx_data):
+ print("Warning: missing INDX child entries", start, end, len(indx_data))
+ return ''
+ if DEBUG_NCX:
+ print("recursINDX lvl %d from %d to %d" % (lvl, start, end))
+ xml = ''
+ if start <= 0:
+ start = 0
+ if end <= 0:
+ end = len(indx_data)
+ if lvl > max_lvl:
+ max_lvl = lvl
+ indent = ' ' * (2 + lvl)
+
+ for i in range(start, end):
+ e = indx_data[i]
+ htmlfile = e['filename']
+ desttag = e['idtag']
+ if not e['hlvl'] == lvl:
+ continue
+ # open entry
+ num += 1
+ if desttag == '':
+ link = 'Text/%s' % htmlfile
+ else:
+ link = 'Text/%s#%s' % (htmlfile, desttag)
+ tagid = 'np_%d' % num
+ entry = ncx_entry % (tagid, num, xmlescape(unescapeit(e['text'])), link)
+ entry = re.sub(re.compile('^', re.M), indent, entry, 0)
+ xml += entry + '\n'
+ # recurs
+ if e['child1']>=0:
+ xmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
+ e['child1'], e['childn'] + 1)
+ xml += xmlrec
+ # close entry
+ xml += indent + '</navPoint>\n'
+ return xml, max_lvl, num
+
+ body, max_lvl, num = recursINDX()
+ header = ncx_header % (lang, ident, max_lvl + 1, title)
+ ncx = header + body + ncx_footer
+ if not len(indx_data) == num:
+ print("Warning: different number of entries in NCX", len(indx_data), num)
+ return ncx
+
+ def writeK8NCX(self, ncx_data, metadata):
+ # build the xml
+ self.isNCX = True
+ print("Write K8 ncx")
+ xml = self.buildK8NCX(ncx_data, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
+ bname = 'toc.ncx'
+ ncxname = os.path.join(self.files.k8oebps,bname)
+ with open(pathof(ncxname), 'wb') as f:
+ f.write(xml.encode('utf-8'))