diff options
author | Benawi Adha <benawiadha@gmail.com> | 2022-10-02 21:22:38 +0700 |
---|---|---|
committer | Benawi Adha <benawiadha@gmail.com> | 2022-10-02 21:22:38 +0700 |
commit | 258c30d2e088cd4ab091a53794da3f93af79915d (patch) | |
tree | f49340bf565deb20c730358af74a01bcc231de53 /src/epy_reader/tools/KindleUnpack/mobi_nav.py | |
parent | d43533f01d9d5baf5f78b71f832641382bd5962a (diff) | |
download | epy-258c30d2e088cd4ab091a53794da3f93af79915d.tar.gz |
Major refactor: breakdown epy.py script
into package project structure for easier
development
Squashed commit of the following:
commit 01309b961a4ab32394bff0d90949b57435dfda47
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sun Oct 2 21:15:04 2022 +0700
Fix missing objects
commit aab2e773c30b255c81b1250b3b20967d5da40338
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sun Oct 2 21:09:31 2022 +0700
Update README.md
commit d4e98926bcd9b00ce0410ad71249d24e6315abc5
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sun Oct 2 21:07:28 2022 +0700
Add keywords in pyproject.toml
commit 432055af8245560a3ff2e046aef0b4e87da44930
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sun Oct 2 21:04:34 2022 +0700
Bump version and deprecete setup.py
commit 51dd15aab8f8ff5996f822f8378e813f0b9fb80d
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sun Oct 2 20:56:38 2022 +0700
Formatting
commit 81fb35e3b6fa0e27d79ef1da77202ed81eb99500
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sun Oct 2 20:55:08 2022 +0700
Fix speakers module
commit 3b852e7c59b38d5a28520038e35f50a95270d2f1
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 20:52:46 2022 +0700
Fix circular import
commit 061e8a2649dabacd28a9e2f972559475316c654c
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 20:39:27 2022 +0700
Run formatting
commit abc2d0ab156992c63dc04745d14a69679a60accb
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 20:39:00 2022 +0700
Update isort and black config in pyproject
commit 5dc2e41bab5b997bd719bdc1561eb51ba0c17a83
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 20:31:00 2022 +0700
Add app Config
commit ed485a2ea8281585bf86dc5772f0c6dd9c803cc4
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 20:23:02 2022 +0700
Update debugpy script
commit 68b0553dd4d63eb4b847132c68ea4018587fa8ec
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 20:14:11 2022 +0700
Connect reader to main script
commit 63c3dd176f18a784a4ed2e88aa72b13d1c2b0990
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 20:11:17 2022 +0700
Implement reader
commit ce5eec8fb4e1db3870a16a07541365cd777d6c4c
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 19:29:49 2022 +0700
Fix script in pyproject.toml
commit 941e8e49f1593731fb582d92084206772b3f0442
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 19:28:39 2022 +0700
Rename modules
commit 5a3e7f766aee774c09b3b5336f3a2968e9cb1d0c
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 19:28:20 2022 +0700
Rename tool method
commit 3c0503ff475cb7eff8b12d3be0bda7a38efe1072
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 19:27:03 2022 +0700
Add ebooks lib
commit b5f71c3296a7d6f36454f6e1cbe84e15a45092ee
Author: Benawi Adha <benawiadha@gmail.com>
Date: Sat Oct 1 17:25:11 2022 +0700
Initial reorganization
Diffstat (limited to 'src/epy_reader/tools/KindleUnpack/mobi_nav.py')
-rw-r--r-- | src/epy_reader/tools/KindleUnpack/mobi_nav.py | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_nav.py b/src/epy_reader/tools/KindleUnpack/mobi_nav.py new file mode 100644 index 0000000..16fb0be --- /dev/null +++ b/src/epy_reader/tools/KindleUnpack/mobi_nav.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab + +from __future__ import unicode_literals, division, absolute_import, print_function + +from .compatibility_utils import unicode_str +import os +from .unipath import pathof + +import re +# note: re requites the pattern to be the exact same type as the data to be searched in python3 +# but u"" is not allowed for the pattern itself only b"" + +DEBUG_NAV = False + +FORCE_DEFAULT_TITLE = False +""" Set to True to force to use the default title. """ + +NAVIGATION_FINENAME = 'nav.xhtml' +""" The name for the navigation document. """ + +DEFAULT_TITLE = 'Navigation' +""" The default title for the navigation document. """ + +class NAVProcessor(object): + + def __init__(self, files): + self.files = files + self.navname = NAVIGATION_FINENAME + + def buildLandmarks(self, guidetext): + header = '' + header += ' <nav epub:type="landmarks" id="landmarks" hidden="">\n' + header += ' <h2>Guide</h2>\n' + header += ' <ol>\n' + element = ' <li><a epub:type="{:s}" href="{:s}">{:s}</a></li>\n' + footer = '' + footer += ' </ol>\n' + footer += ' </nav>\n' + + type_map = { + 'cover' : 'cover', + 'title-page' : 'title-page', + # ?: 'frontmatter', + 'text' : 'bodymatter', + # ?: 'backmatter', + 'toc' : 'toc', + 'loi' : 'loi', + 'lot' : 'lot', + 'preface' : 'preface', + 'bibliography' : 'bibliography', + 'index' : 'index', + 'glossary' : 'glossary', + 'acknowledgements' : 'acknowledgements', + 'colophon' : None, + 'copyright-page' : None, + 'dedication' : None, + 'epigraph' : None, + 'foreword' : None, + 'notes' : None + } + + re_type = re.compile(r'\s+type\s*=\s*"(.*?)"', re.I) + re_title = re.compile(r'\s+title\s*=\s*"(.*?)"', re.I) + re_link = re.compile(r'\s+href\s*=\s*"(.*?)"', re.I) + dir_ = os.path.relpath(self.files.k8text, self.files.k8oebps).replace('\\', '/') + + data = '' + references = re.findall(r'<reference\s+.*?>', unicode_str(guidetext), re.I) + for reference in references: + mo_type = re_type.search(reference) + mo_title = re_title.search(reference) + mo_link = re_link.search(reference) + if mo_type is not None: + type_ = type_map.get(mo_type.group(1), None) + else: + type_ = None + if mo_title is not None: + title = mo_title.group(1) + else: + title = None + if mo_link is not None: + link = mo_link.group(1) + else: + link = None + + if type_ is not None and title is not None and link is not None: + link = os.path.relpath(link, dir_).replace('\\', '/') + data += element.format(type_, link, title) + if len(data) > 0: + return header + data + footer + else: + return '' + + def buildTOC(self, indx_data): + header = '' + header += ' <nav epub:type="toc" id="toc">\n' + header += ' <h1>Table of contents</h1>\n' + footer = ' </nav>\n' + + # recursive part + def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1): + if start>len(indx_data) or end>len(indx_data): + print("Warning (in buildTOC): missing INDX child entries", start, end, len(indx_data)) + return '' + if DEBUG_NAV: + print("recursINDX (in buildTOC) lvl %d from %d to %d" % (lvl, start, end)) + xhtml = '' + if start <= 0: + start = 0 + if end <= 0: + end = len(indx_data) + if lvl > max_lvl: + max_lvl = lvl + + indent1 = ' ' * (2 + lvl * 2) + indent2 = ' ' * (3 + lvl * 2) + xhtml += indent1 + '<ol>\n' + for i in range(start, end): + e = indx_data[i] + htmlfile = e['filename'] + desttag = e['idtag'] + text = e['text'] + if not e['hlvl'] == lvl: + continue + num += 1 + if desttag == '': + link = htmlfile + else: + link = '{:s}#{:s}'.format(htmlfile, desttag) + xhtml += indent2 + '<li>' + entry = '<a href="{:}">{:s}</a>'.format(link, text) + xhtml += entry + # recurs + if e['child1'] >= 0: + xhtml += '\n' + xhtmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1, + e['child1'], e['childn'] + 1) + xhtml += xhtmlrec + xhtml += indent2 + # close entry + xhtml += '</li>\n' + xhtml += indent1 + '</ol>\n' + return xhtml, max_lvl, num + + data, max_lvl, num = recursINDX() + if not len(indx_data) == num: + print("Warning (in buildTOC): different number of entries in NCX", len(indx_data), num) + return header + data + footer + + def buildNAV(self, ncx_data, guidetext, title, lang): + print("Building Navigation Document.") + if FORCE_DEFAULT_TITLE: + title = DEFAULT_TITLE + nav_header = '' + nav_header += '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>' + nav_header += '<html xmlns="http://www.w3.org/1999/xhtml"' + nav_header += ' xmlns:epub="http://www.idpf.org/2007/ops"' + nav_header += ' lang="{0:s}" xml:lang="{0:s}">\n'.format(lang) + nav_header += '<head>\n<title>{:s}</title>\n'.format(title) + nav_header += '<meta charset="UTF-8" />\n' + nav_header += '<style type="text/css">\n' + nav_header += 'nav#landmarks { display:none; }\n' + nav_header += 'ol { list-style-type: none; }' + nav_header += '</style>\n</head>\n<body>\n' + nav_footer = '</body>\n</html>\n' + + landmarks = self.buildLandmarks(guidetext) + toc = self.buildTOC(ncx_data) + + data = nav_header + data += landmarks + data += toc + data += nav_footer + return data + + def getNAVName(self): + return self.navname + + def writeNAV(self, ncx_data, guidetext, metadata): + # build the xhtml + # print("Write Navigation Document.") + xhtml = self.buildNAV(ncx_data, guidetext, metadata.get('Title')[0], metadata.get('Language')[0]) + fname = os.path.join(self.files.k8text, self.navname) + with open(pathof(fname), 'wb') as f: + f.write(xhtml.encode('utf-8')) |