#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
import os
from .unipath import pathof
from .compatibility_utils import unescapeit
import re
# note: re requites the pattern to be the exact same type as the data to be searched in python3
# but u"" is not allowed for the pattern itself only b""
from xml.sax.saxutils import escape as xmlescape
from .mobi_utils import toBase32
from .mobi_index import MobiIndex
DEBUG_NCX = False
class ncxExtract:
def __init__(self, mh, files):
self.mh = mh
self.sect = self.mh.sect
self.files = files
self.isNCX = False
self.mi = MobiIndex(self.sect)
self.ncxidx = self.mh.ncxidx
self.indx_data = None
def parseNCX(self):
indx_data = []
tag_fieldname_map = {
1: ['pos',0],
2: ['len',0],
3: ['noffs',0],
4: ['hlvl',0],
5: ['koffs',0],
6: ['pos_fid',0],
21: ['parent',0],
22: ['child1',0],
23: ['childn',0]
}
if self.ncxidx != 0xffffffff:
outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx, "NCX")
if DEBUG_NCX:
print(ctoc_text)
print(outtbl)
num = 0
for [text, tagMap] in outtbl:
tmp = {
'name': text.decode('utf-8'),
'pos': -1,
'len': 0,
'noffs': -1,
'text' : "Unknown Text",
'hlvl' : -1,
'kind' : "Unknown Kind",
'pos_fid' : None,
'parent' : -1,
'child1' : -1,
'childn' : -1,
'num' : num
}
for tag in tag_fieldname_map:
[fieldname, i] = tag_fieldname_map[tag]
if tag in tagMap:
fieldvalue = tagMap[tag][i]
if tag == 6:
pos_fid = toBase32(fieldvalue,4).decode('utf-8')
fieldvalue2 = tagMap[tag][i+1]
pos_off = toBase32(fieldvalue2,10).decode('utf-8')
fieldvalue = 'kindle:pos:fid:%s:off:%s' % (pos_fid, pos_off)
tmp[fieldname] = fieldvalue
if tag == 3:
toctext = ctoc_text.get(fieldvalue, 'Unknown Text')
toctext = toctext.decode(self.mh.codec)
tmp['text'] = toctext
if tag == 5:
kindtext = ctoc_text.get(fieldvalue, 'Unknown Kind')
kindtext = kindtext.decode(self.mh.codec)
tmp['kind'] = kindtext
indx_data.append(tmp)
if DEBUG_NCX:
print("record number: ", num)
print("name: ", tmp['name'],)
print("position", tmp['pos']," length: ", tmp['len'])
print("text: ", tmp['text'])
print("kind: ", tmp['kind'])
print("heading level: ", tmp['hlvl'])
print("parent:", tmp['parent'])
print("first child: ",tmp['child1']," last child: ", tmp['childn'])
print("pos_fid is ", tmp['pos_fid'])
print("\n\n")
num += 1
self.indx_data = indx_data
return indx_data
def buildNCX(self, htmlfile, title, ident, lang):
indx_data = self.indx_data
ncx_header = \
'''
%s
'''
ncx_footer = \
'''
'''
ncx_entry = \
'''
%s
'''
# recursive part
def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
if start>len(indx_data) or end>len(indx_data):
print("Warning: missing INDX child entries", start, end, len(indx_data))
return ''
if DEBUG_NCX:
print("recursINDX lvl %d from %d to %d" % (lvl, start, end))
xml = ''
if start <= 0:
start = 0
if end <= 0:
end = len(indx_data)
if lvl > max_lvl:
max_lvl = lvl
indent = ' ' * (2 + lvl)
for i in range(start, end):
e = indx_data[i]
if not e['hlvl'] == lvl:
continue
# open entry
num += 1
link = '%s#filepos%d' % (htmlfile, e['pos'])
tagid = 'np_%d' % num
entry = ncx_entry % (tagid, num, xmlescape(unescapeit(e['text'])), link)
entry = re.sub(re.compile('^', re.M), indent, entry, 0)
xml += entry + '\n'
# recurs
if e['child1']>=0:
xmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
e['child1'], e['childn'] + 1)
xml += xmlrec
# close entry
xml += indent + '\n'
return xml, max_lvl, num
body, max_lvl, num = recursINDX()
header = ncx_header % (lang, ident, max_lvl + 1, title)
ncx = header + body + ncx_footer
if not len(indx_data) == num:
print("Warning: different number of entries in NCX", len(indx_data), num)
return ncx
def writeNCX(self, metadata):
# build the xml
self.isNCX = True
print("Write ncx")
# htmlname = os.path.basename(self.files.outbase)
# htmlname += '.html'
htmlname = 'book.html'
xml = self.buildNCX(htmlname, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
# write the ncx file
# ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx')
ncxname = os.path.join(self.files.mobi7dir, 'toc.ncx')
with open(pathof(ncxname), 'wb') as f:
f.write(xml.encode('utf-8'))
def buildK8NCX(self, indx_data, title, ident, lang):
ncx_header = \
'''
%s
'''
ncx_footer = \
'''
'''
ncx_entry = \
'''
%s
'''
# recursive part
def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
if start>len(indx_data) or end>len(indx_data):
print("Warning: missing INDX child entries", start, end, len(indx_data))
return ''
if DEBUG_NCX:
print("recursINDX lvl %d from %d to %d" % (lvl, start, end))
xml = ''
if start <= 0:
start = 0
if end <= 0:
end = len(indx_data)
if lvl > max_lvl:
max_lvl = lvl
indent = ' ' * (2 + lvl)
for i in range(start, end):
e = indx_data[i]
htmlfile = e['filename']
desttag = e['idtag']
if not e['hlvl'] == lvl:
continue
# open entry
num += 1
if desttag == '':
link = 'Text/%s' % htmlfile
else:
link = 'Text/%s#%s' % (htmlfile, desttag)
tagid = 'np_%d' % num
entry = ncx_entry % (tagid, num, xmlescape(unescapeit(e['text'])), link)
entry = re.sub(re.compile('^', re.M), indent, entry, 0)
xml += entry + '\n'
# recurs
if e['child1']>=0:
xmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
e['child1'], e['childn'] + 1)
xml += xmlrec
# close entry
xml += indent + '\n'
return xml, max_lvl, num
body, max_lvl, num = recursINDX()
header = ncx_header % (lang, ident, max_lvl + 1, title)
ncx = header + body + ncx_footer
if not len(indx_data) == num:
print("Warning: different number of entries in NCX", len(indx_data), num)
return ncx
def writeK8NCX(self, ncx_data, metadata):
# build the xml
self.isNCX = True
print("Write K8 ncx")
xml = self.buildK8NCX(ncx_data, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
bname = 'toc.ncx'
ncxname = os.path.join(self.files.k8oebps,bname)
with open(pathof(ncxname), 'wb') as f:
f.write(xml.encode('utf-8'))