aboutsummaryrefslogtreecommitdiffstats
path: root/src/epy_reader/tools/KindleUnpack/mobi_nav.py
diff options
context:
space:
mode:
authorBenawi Adha <benawiadha@gmail.com>2022-10-02 21:22:38 +0700
committerBenawi Adha <benawiadha@gmail.com>2022-10-02 21:22:38 +0700
commit258c30d2e088cd4ab091a53794da3f93af79915d (patch)
treef49340bf565deb20c730358af74a01bcc231de53 /src/epy_reader/tools/KindleUnpack/mobi_nav.py
parentd43533f01d9d5baf5f78b71f832641382bd5962a (diff)
downloadepy-258c30d2e088cd4ab091a53794da3f93af79915d.tar.gz
Major refactor: breakdown epy.py script
into package project structure for easier development Squashed commit of the following: commit 01309b961a4ab32394bff0d90949b57435dfda47 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:15:04 2022 +0700 Fix missing objects commit aab2e773c30b255c81b1250b3b20967d5da40338 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:09:31 2022 +0700 Update README.md commit d4e98926bcd9b00ce0410ad71249d24e6315abc5 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:07:28 2022 +0700 Add keywords in pyproject.toml commit 432055af8245560a3ff2e046aef0b4e87da44930 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:04:34 2022 +0700 Bump version and deprecete setup.py commit 51dd15aab8f8ff5996f822f8378e813f0b9fb80d Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 20:56:38 2022 +0700 Formatting commit 81fb35e3b6fa0e27d79ef1da77202ed81eb99500 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 20:55:08 2022 +0700 Fix speakers module commit 3b852e7c59b38d5a28520038e35f50a95270d2f1 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:52:46 2022 +0700 Fix circular import commit 061e8a2649dabacd28a9e2f972559475316c654c Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:39:27 2022 +0700 Run formatting commit abc2d0ab156992c63dc04745d14a69679a60accb Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:39:00 2022 +0700 Update isort and black config in pyproject commit 5dc2e41bab5b997bd719bdc1561eb51ba0c17a83 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:31:00 2022 +0700 Add app Config commit ed485a2ea8281585bf86dc5772f0c6dd9c803cc4 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:23:02 2022 +0700 Update debugpy script commit 68b0553dd4d63eb4b847132c68ea4018587fa8ec Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:14:11 2022 +0700 Connect reader to main script commit 63c3dd176f18a784a4ed2e88aa72b13d1c2b0990 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:11:17 2022 +0700 Implement reader commit ce5eec8fb4e1db3870a16a07541365cd777d6c4c Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:29:49 2022 +0700 Fix script in pyproject.toml commit 941e8e49f1593731fb582d92084206772b3f0442 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:28:39 2022 +0700 Rename modules commit 5a3e7f766aee774c09b3b5336f3a2968e9cb1d0c Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:28:20 2022 +0700 Rename tool method commit 3c0503ff475cb7eff8b12d3be0bda7a38efe1072 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:27:03 2022 +0700 Add ebooks lib commit b5f71c3296a7d6f36454f6e1cbe84e15a45092ee Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 17:25:11 2022 +0700 Initial reorganization
Diffstat (limited to 'src/epy_reader/tools/KindleUnpack/mobi_nav.py')
-rw-r--r--src/epy_reader/tools/KindleUnpack/mobi_nav.py187
1 files changed, 187 insertions, 0 deletions
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_nav.py b/src/epy_reader/tools/KindleUnpack/mobi_nav.py
new file mode 100644
index 0000000..16fb0be
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_nav.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import unicode_str
+import os
+from .unipath import pathof
+
+import re
+# note: re requites the pattern to be the exact same type as the data to be searched in python3
+# but u"" is not allowed for the pattern itself only b""
+
+DEBUG_NAV = False
+
+FORCE_DEFAULT_TITLE = False
+""" Set to True to force to use the default title. """
+
+NAVIGATION_FINENAME = 'nav.xhtml'
+""" The name for the navigation document. """
+
+DEFAULT_TITLE = 'Navigation'
+""" The default title for the navigation document. """
+
+class NAVProcessor(object):
+
+ def __init__(self, files):
+ self.files = files
+ self.navname = NAVIGATION_FINENAME
+
+ def buildLandmarks(self, guidetext):
+ header = ''
+ header += ' <nav epub:type="landmarks" id="landmarks" hidden="">\n'
+ header += ' <h2>Guide</h2>\n'
+ header += ' <ol>\n'
+ element = ' <li><a epub:type="{:s}" href="{:s}">{:s}</a></li>\n'
+ footer = ''
+ footer += ' </ol>\n'
+ footer += ' </nav>\n'
+
+ type_map = {
+ 'cover' : 'cover',
+ 'title-page' : 'title-page',
+ # ?: 'frontmatter',
+ 'text' : 'bodymatter',
+ # ?: 'backmatter',
+ 'toc' : 'toc',
+ 'loi' : 'loi',
+ 'lot' : 'lot',
+ 'preface' : 'preface',
+ 'bibliography' : 'bibliography',
+ 'index' : 'index',
+ 'glossary' : 'glossary',
+ 'acknowledgements' : 'acknowledgements',
+ 'colophon' : None,
+ 'copyright-page' : None,
+ 'dedication' : None,
+ 'epigraph' : None,
+ 'foreword' : None,
+ 'notes' : None
+ }
+
+ re_type = re.compile(r'\s+type\s*=\s*"(.*?)"', re.I)
+ re_title = re.compile(r'\s+title\s*=\s*"(.*?)"', re.I)
+ re_link = re.compile(r'\s+href\s*=\s*"(.*?)"', re.I)
+ dir_ = os.path.relpath(self.files.k8text, self.files.k8oebps).replace('\\', '/')
+
+ data = ''
+ references = re.findall(r'<reference\s+.*?>', unicode_str(guidetext), re.I)
+ for reference in references:
+ mo_type = re_type.search(reference)
+ mo_title = re_title.search(reference)
+ mo_link = re_link.search(reference)
+ if mo_type is not None:
+ type_ = type_map.get(mo_type.group(1), None)
+ else:
+ type_ = None
+ if mo_title is not None:
+ title = mo_title.group(1)
+ else:
+ title = None
+ if mo_link is not None:
+ link = mo_link.group(1)
+ else:
+ link = None
+
+ if type_ is not None and title is not None and link is not None:
+ link = os.path.relpath(link, dir_).replace('\\', '/')
+ data += element.format(type_, link, title)
+ if len(data) > 0:
+ return header + data + footer
+ else:
+ return ''
+
+ def buildTOC(self, indx_data):
+ header = ''
+ header += ' <nav epub:type="toc" id="toc">\n'
+ header += ' <h1>Table of contents</h1>\n'
+ footer = ' </nav>\n'
+
+ # recursive part
+ def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
+ if start>len(indx_data) or end>len(indx_data):
+ print("Warning (in buildTOC): missing INDX child entries", start, end, len(indx_data))
+ return ''
+ if DEBUG_NAV:
+ print("recursINDX (in buildTOC) lvl %d from %d to %d" % (lvl, start, end))
+ xhtml = ''
+ if start <= 0:
+ start = 0
+ if end <= 0:
+ end = len(indx_data)
+ if lvl > max_lvl:
+ max_lvl = lvl
+
+ indent1 = ' ' * (2 + lvl * 2)
+ indent2 = ' ' * (3 + lvl * 2)
+ xhtml += indent1 + '<ol>\n'
+ for i in range(start, end):
+ e = indx_data[i]
+ htmlfile = e['filename']
+ desttag = e['idtag']
+ text = e['text']
+ if not e['hlvl'] == lvl:
+ continue
+ num += 1
+ if desttag == '':
+ link = htmlfile
+ else:
+ link = '{:s}#{:s}'.format(htmlfile, desttag)
+ xhtml += indent2 + '<li>'
+ entry = '<a href="{:}">{:s}</a>'.format(link, text)
+ xhtml += entry
+ # recurs
+ if e['child1'] >= 0:
+ xhtml += '\n'
+ xhtmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
+ e['child1'], e['childn'] + 1)
+ xhtml += xhtmlrec
+ xhtml += indent2
+ # close entry
+ xhtml += '</li>\n'
+ xhtml += indent1 + '</ol>\n'
+ return xhtml, max_lvl, num
+
+ data, max_lvl, num = recursINDX()
+ if not len(indx_data) == num:
+ print("Warning (in buildTOC): different number of entries in NCX", len(indx_data), num)
+ return header + data + footer
+
+ def buildNAV(self, ncx_data, guidetext, title, lang):
+ print("Building Navigation Document.")
+ if FORCE_DEFAULT_TITLE:
+ title = DEFAULT_TITLE
+ nav_header = ''
+ nav_header += '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>'
+ nav_header += '<html xmlns="http://www.w3.org/1999/xhtml"'
+ nav_header += ' xmlns:epub="http://www.idpf.org/2007/ops"'
+ nav_header += ' lang="{0:s}" xml:lang="{0:s}">\n'.format(lang)
+ nav_header += '<head>\n<title>{:s}</title>\n'.format(title)
+ nav_header += '<meta charset="UTF-8" />\n'
+ nav_header += '<style type="text/css">\n'
+ nav_header += 'nav#landmarks { display:none; }\n'
+ nav_header += 'ol { list-style-type: none; }'
+ nav_header += '</style>\n</head>\n<body>\n'
+ nav_footer = '</body>\n</html>\n'
+
+ landmarks = self.buildLandmarks(guidetext)
+ toc = self.buildTOC(ncx_data)
+
+ data = nav_header
+ data += landmarks
+ data += toc
+ data += nav_footer
+ return data
+
+ def getNAVName(self):
+ return self.navname
+
+ def writeNAV(self, ncx_data, guidetext, metadata):
+ # build the xhtml
+ # print("Write Navigation Document.")
+ xhtml = self.buildNAV(ncx_data, guidetext, metadata.get('Title')[0], metadata.get('Language')[0])
+ fname = os.path.join(self.files.k8text, self.navname)
+ with open(pathof(fname), 'wb') as f:
+ f.write(xhtml.encode('utf-8'))