From 258c30d2e088cd4ab091a53794da3f93af79915d Mon Sep 17 00:00:00 2001
From: Benawi Adha <benawiadha@gmail.com>
Date: Sun, 2 Oct 2022 21:22:38 +0700
Subject: Major refactor: breakdown epy.py script into package project
 structure for easier development Squashed commit of the following:

commit 01309b961a4ab32394bff0d90949b57435dfda47
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sun Oct 2 21:15:04 2022 +0700

    Fix missing objects

commit aab2e773c30b255c81b1250b3b20967d5da40338
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sun Oct 2 21:09:31 2022 +0700

    Update README.md

commit d4e98926bcd9b00ce0410ad71249d24e6315abc5
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sun Oct 2 21:07:28 2022 +0700

    Add keywords in pyproject.toml

commit 432055af8245560a3ff2e046aef0b4e87da44930
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sun Oct 2 21:04:34 2022 +0700

    Bump version and deprecete setup.py

commit 51dd15aab8f8ff5996f822f8378e813f0b9fb80d
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sun Oct 2 20:56:38 2022 +0700

    Formatting

commit 81fb35e3b6fa0e27d79ef1da77202ed81eb99500
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sun Oct 2 20:55:08 2022 +0700

    Fix speakers module

commit 3b852e7c59b38d5a28520038e35f50a95270d2f1
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 20:52:46 2022 +0700

    Fix circular import

commit 061e8a2649dabacd28a9e2f972559475316c654c
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 20:39:27 2022 +0700

    Run formatting

commit abc2d0ab156992c63dc04745d14a69679a60accb
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 20:39:00 2022 +0700

    Update isort and black config in pyproject

commit 5dc2e41bab5b997bd719bdc1561eb51ba0c17a83
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 20:31:00 2022 +0700

    Add app Config

commit ed485a2ea8281585bf86dc5772f0c6dd9c803cc4
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 20:23:02 2022 +0700

    Update debugpy script

commit 68b0553dd4d63eb4b847132c68ea4018587fa8ec
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 20:14:11 2022 +0700

    Connect reader to main script

commit 63c3dd176f18a784a4ed2e88aa72b13d1c2b0990
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 20:11:17 2022 +0700

    Implement reader

commit ce5eec8fb4e1db3870a16a07541365cd777d6c4c
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 19:29:49 2022 +0700

    Fix script in pyproject.toml

commit 941e8e49f1593731fb582d92084206772b3f0442
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 19:28:39 2022 +0700

    Rename modules

commit 5a3e7f766aee774c09b3b5336f3a2968e9cb1d0c
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 19:28:20 2022 +0700

    Rename tool method

commit 3c0503ff475cb7eff8b12d3be0bda7a38efe1072
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 19:27:03 2022 +0700

    Add ebooks lib

commit b5f71c3296a7d6f36454f6e1cbe84e15a45092ee
Author: Benawi Adha <benawiadha@gmail.com>
Date:   Sat Oct 1 17:25:11 2022 +0700

    Initial reorganization
---
 Makefile                                           |   13 +-
 README.md                                          |    2 +
 epy_extras/KindleUnpack/__init__.py                |    2 -
 epy_extras/KindleUnpack/compatibility_utils.py     |  278 ----
 epy_extras/KindleUnpack/kindleunpack.py            | 1029 -------------
 epy_extras/KindleUnpack/mobi_cover.py              |  238 ---
 epy_extras/KindleUnpack/mobi_dict.py               |  377 -----
 epy_extras/KindleUnpack/mobi_header.py             |  936 ------------
 epy_extras/KindleUnpack/mobi_html.py               |  439 ------
 epy_extras/KindleUnpack/mobi_index.py              |  276 ----
 epy_extras/KindleUnpack/mobi_k8proc.py             |  496 ------
 epy_extras/KindleUnpack/mobi_k8resc.py             |  271 ----
 epy_extras/KindleUnpack/mobi_nav.py                |  187 ---
 epy_extras/KindleUnpack/mobi_ncx.py                |  275 ----
 epy_extras/KindleUnpack/mobi_opf.py                |  686 ---------
 epy_extras/KindleUnpack/mobi_pagemap.py            |  158 --
 epy_extras/KindleUnpack/mobi_sectioner.py          |  120 --
 epy_extras/KindleUnpack/mobi_split.py              |  438 ------
 epy_extras/KindleUnpack/mobi_uncompress.py         |  131 --
 epy_extras/KindleUnpack/mobi_utils.py              |  191 ---
 epy_extras/KindleUnpack/mobiml2xhtml.py            |  527 -------
 epy_extras/KindleUnpack/unipath.py                 |   93 --
 epy_extras/KindleUnpack/unpack_structure.py        |  167 --
 epy_extras/__init__.py                             |    3 -
 poetry.lock                                        |  502 +++++-
 pyproject.toml                                     |   43 +-
 setup.py                                           |   28 -
 src/epy_reader/__init__.py                         |    5 +
 src/epy_reader/__main__.py                         |   23 +
 src/epy_reader/board.py                            |  148 ++
 src/epy_reader/cli.py                              |  171 +++
 src/epy_reader/config.py                           |   80 +
 src/epy_reader/ebooks/__init__.py                  |   15 +
 src/epy_reader/ebooks/azw.py                       |   26 +
 src/epy_reader/ebooks/base.py                      |   48 +
 src/epy_reader/ebooks/epub.py                      |  202 +++
 src/epy_reader/ebooks/fictionbook.py               |   76 +
 src/epy_reader/ebooks/mobi.py                      |   69 +
 src/epy_reader/ebooks/url.py                       |   49 +
 src/epy_reader/lib.py                              |   63 +
 src/epy_reader/models.py                           |  232 +++
 src/epy_reader/parser.py                           |  421 +++++
 src/epy_reader/reader.py                           | 1610 ++++++++++++++++++++
 src/epy_reader/settings.py                         |  133 ++
 src/epy_reader/speakers/__init__.py                |    9 +
 src/epy_reader/speakers/base.py                    |   21 +
 src/epy_reader/speakers/mimic.py                   |   31 +
 src/epy_reader/speakers/pico.py                    |   43 +
 src/epy_reader/state.py                            |  195 +++
 src/epy_reader/tools/KindleUnpack/__init__.py      |    2 +
 .../tools/KindleUnpack/compatibility_utils.py      |  278 ++++
 src/epy_reader/tools/KindleUnpack/kindleunpack.py  | 1029 +++++++++++++
 src/epy_reader/tools/KindleUnpack/mobi_cover.py    |  238 +++
 src/epy_reader/tools/KindleUnpack/mobi_dict.py     |  377 +++++
 src/epy_reader/tools/KindleUnpack/mobi_header.py   |  936 ++++++++++++
 src/epy_reader/tools/KindleUnpack/mobi_html.py     |  439 ++++++
 src/epy_reader/tools/KindleUnpack/mobi_index.py    |  276 ++++
 src/epy_reader/tools/KindleUnpack/mobi_k8proc.py   |  496 ++++++
 src/epy_reader/tools/KindleUnpack/mobi_k8resc.py   |  271 ++++
 src/epy_reader/tools/KindleUnpack/mobi_nav.py      |  187 +++
 src/epy_reader/tools/KindleUnpack/mobi_ncx.py      |  275 ++++
 src/epy_reader/tools/KindleUnpack/mobi_opf.py      |  686 +++++++++
 src/epy_reader/tools/KindleUnpack/mobi_pagemap.py  |  158 ++
 .../tools/KindleUnpack/mobi_sectioner.py           |  120 ++
 src/epy_reader/tools/KindleUnpack/mobi_split.py    |  438 ++++++
 .../tools/KindleUnpack/mobi_uncompress.py          |  131 ++
 src/epy_reader/tools/KindleUnpack/mobi_utils.py    |  191 +++
 src/epy_reader/tools/KindleUnpack/mobiml2xhtml.py  |  527 +++++++
 src/epy_reader/tools/KindleUnpack/unipath.py       |   93 ++
 .../tools/KindleUnpack/unpack_structure.py         |  167 ++
 src/epy_reader/tools/__init__.py                   |    3 +
 src/epy_reader/utils.py                            |  377 +++++
 72 files changed, 11877 insertions(+), 7394 deletions(-)
 delete mode 100644 epy_extras/KindleUnpack/__init__.py
 delete mode 100755 epy_extras/KindleUnpack/compatibility_utils.py
 delete mode 100644 epy_extras/KindleUnpack/kindleunpack.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_cover.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_dict.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_header.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_html.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_index.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_k8proc.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_k8resc.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_nav.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_ncx.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_opf.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_pagemap.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_sectioner.py
 delete mode 100755 epy_extras/KindleUnpack/mobi_split.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_uncompress.py
 delete mode 100644 epy_extras/KindleUnpack/mobi_utils.py
 delete mode 100755 epy_extras/KindleUnpack/mobiml2xhtml.py
 delete mode 100755 epy_extras/KindleUnpack/unipath.py
 delete mode 100644 epy_extras/KindleUnpack/unpack_structure.py
 delete mode 100644 epy_extras/__init__.py
 delete mode 100644 setup.py
 create mode 100644 src/epy_reader/__init__.py
 create mode 100644 src/epy_reader/__main__.py
 create mode 100644 src/epy_reader/board.py
 create mode 100644 src/epy_reader/cli.py
 create mode 100644 src/epy_reader/config.py
 create mode 100644 src/epy_reader/ebooks/__init__.py
 create mode 100644 src/epy_reader/ebooks/azw.py
 create mode 100644 src/epy_reader/ebooks/base.py
 create mode 100644 src/epy_reader/ebooks/epub.py
 create mode 100644 src/epy_reader/ebooks/fictionbook.py
 create mode 100644 src/epy_reader/ebooks/mobi.py
 create mode 100644 src/epy_reader/ebooks/url.py
 create mode 100644 src/epy_reader/lib.py
 create mode 100644 src/epy_reader/models.py
 create mode 100644 src/epy_reader/parser.py
 create mode 100644 src/epy_reader/reader.py
 create mode 100644 src/epy_reader/settings.py
 create mode 100644 src/epy_reader/speakers/__init__.py
 create mode 100644 src/epy_reader/speakers/base.py
 create mode 100644 src/epy_reader/speakers/mimic.py
 create mode 100644 src/epy_reader/speakers/pico.py
 create mode 100644 src/epy_reader/state.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/__init__.py
 create mode 100755 src/epy_reader/tools/KindleUnpack/compatibility_utils.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/kindleunpack.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_cover.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_dict.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_header.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_html.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_index.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_k8proc.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_k8resc.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_nav.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_ncx.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_opf.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_pagemap.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_sectioner.py
 create mode 100755 src/epy_reader/tools/KindleUnpack/mobi_split.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_uncompress.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/mobi_utils.py
 create mode 100755 src/epy_reader/tools/KindleUnpack/mobiml2xhtml.py
 create mode 100755 src/epy_reader/tools/KindleUnpack/unipath.py
 create mode 100644 src/epy_reader/tools/KindleUnpack/unpack_structure.py
 create mode 100644 src/epy_reader/tools/__init__.py
 create mode 100644 src/epy_reader/utils.py

diff --git a/Makefile b/Makefile
index 3c5fcca..3d03560 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,15 @@
 .PHONY: tests
-.DEFAULT_GOAL := tests
+.DEFAULT_GOAL := check
+
+check:
+	mypy --follow-imports=silent src
+
+format:
+	isort src
+	black src
 
 debug:
-	python -m debugpy --listen 5678 --wait-for-client -m epy
+	python -m debugpy --listen 5678 --wait-for-client -m epy_reader
 
 dev:
 	poetry install
@@ -16,5 +23,5 @@ coverage:
 	python -m http.server -d htmlcov
 
 release:
-	python setup.py sdist bdist_wheel
+	python -m build
 	twine upload --skip-existing dist/*
diff --git a/README.md b/README.md
index 0dcfdd7..98a671a 100644
--- a/README.md
+++ b/README.md
@@ -134,3 +134,5 @@ so line scrolling navigation will act as scrolling page and textwidth is not adj
   inside epy (default key: `R`).
 
 - `v2022.2.5`: Fix process.join() issue for unstarted process.
+
+- `v2022.10.2`: Major breakdown `epy.py` module into package structure for easier development.
diff --git a/epy_extras/KindleUnpack/__init__.py b/epy_extras/KindleUnpack/__init__.py
deleted file mode 100644
index 0077258..0000000
--- a/epy_extras/KindleUnpack/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
diff --git a/epy_extras/KindleUnpack/compatibility_utils.py b/epy_extras/KindleUnpack/compatibility_utils.py
deleted file mode 100755
index c46c0bb..0000000
--- a/epy_extras/KindleUnpack/compatibility_utils.py
+++ /dev/null
@@ -1,278 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without modification,
-# are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this list of
-# conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice, this list
-# of conditions and the following disclaimer in the documentation and/or other materials
-# provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
-# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-import sys
-import codecs
-
-PY2 = sys.version_info[0] == 2
-PY3 = sys.version_info[0] == 3
-
-iswindows = sys.platform.startswith('win')
-
-try:
-    from urllib.parse import unquote
-except ImportError:
-    from urllib import unquote
-
-if PY2:
-    from HTMLParser import HTMLParser
-    _h = HTMLParser()
-elif sys.version_info[1] < 4:
-    import html.parser
-    _h = html.parser.HTMLParser()
-else:
-    import html as _h
-
-if PY3:
-    text_type = str
-    binary_type = bytes
-    # if will be printing arbitraty binary data to stdout on python 3
-    # sys.stdin = sys.stdin.detach()
-    # sys.stdout = sys.stdout.detach()
-    # sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
-else:
-    range = xrange
-    text_type = unicode
-    binary_type = str
-    # if will be printing unicode under python 2 need to protect
-    # against sys.stdout.encoding being None stupidly forcing forcing ascii encoding of unicode
-    # sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
-    # alternatively set environment variable as follows **before** launching python:  export PYTHONIOENCODING=UTF-8
-
-# NOTE: Python 3 is completely broken when accessing single bytes in bytes strings
-# (and they amazingly claim by design and no bug!)
-
-# To illustrate: this works for unicode in Python 3 and for all Python 2.X for both bytestrings and unicode
-# >>> o = '123456789'
-# >>> o[-3]
-# '7'
-# >>> type(o[-3])
-# <class 'str'>
-# >>> type(o)
-# <class 'str'>
-
-# Unfortunately, this is what Python 3 does for no sane reason and only for bytestrings
-# >>> o = b'123456789'
-# >>> o[-3]
-# 55
-# >>> type(o[-3])
-# <class 'int'>
-# >>> type(o)
-# <class 'bytes'>
-
-# This mind boggling  behaviour also happens when indexing a bytestring and/or
-# iteratoring over a bytestring.  In other words it will return an int but not
-# the byte itself!!!!!!!
-
-# The only way to access a single byte as a byte in bytestring and get the byte in both
-# Python 2 and Python 3 is to use a slice
-
-# This problem is so common there are horrible hacks floating around the net to **try**
-# to work around it, so that code that works on both Python 2 and Python 3 is possible.
-
-# So in order to write code that works on both Python 2 and Python 3
-# if you index or access a single byte and want its ord() then use the bord() function.
-# If instead you want it as a single character byte use the bchar() function
-# both of which are defined below.
-
-if PY3:
-    # Also Note: if decode a bytestring using 'latin-1' (or any other full range 0-255 encoding)
-    # in place of ascii you will get a byte value to half-word or integer value
-    # one-to-one mapping (in the 0 - 255 range)
-
-    def bchr(s):
-        return bytes([s])
-
-    def bstr(s):
-        if isinstance(s, str):
-            return bytes(s, 'latin-1')
-        else:
-            return bytes(s)
-
-    def bord(s):
-        return s
-
-    def bchar(s):
-        return bytes([s])
-
-else:
-    def bchr(s):
-        return chr(s)
-
-    def bstr(s):
-        return str(s)
-
-    def bord(s):
-        return ord(s)
-
-    def bchar(s):
-        return s
-
-if PY3:
-    # list-producing versions of the major Python iterating functions
-    def lrange(*args, **kwargs):
-        return list(range(*args, **kwargs))
-
-    def lzip(*args, **kwargs):
-        return list(zip(*args, **kwargs))
-
-    def lmap(*args, **kwargs):
-        return list(map(*args, **kwargs))
-
-    def lfilter(*args, **kwargs):
-        return list(filter(*args, **kwargs))
-else:
-    import __builtin__
-    # Python 2-builtin ranges produce lists
-    lrange = __builtin__.range
-    lzip = __builtin__.zip
-    lmap = __builtin__.map
-    lfilter = __builtin__.filter
-
-# In Python 3 you can no longer use .encode('hex') on a bytestring
-# instead use the following on both platforms
-import binascii
-def hexlify(bdata):
-    return (binascii.hexlify(bdata)).decode('ascii')
-
-# If you: import struct
-# Note:  struct pack, unpack, unpack_from all *require* bytestring format
-# data all the way up to at least Python 2.7.5, Python 3 is okay with either
-
-# If you: import re
-# note: Python 3 "re" requires the pattern to be the exact same type as the data to be
-# searched ... but u"" is not allowed for the pattern itself only b""
-# Python 2.X allows the pattern to be any type and converts it to match the data
-# and returns the same type as the data
-
-# convert string to be utf-8 encoded
-def utf8_str(p, enc='utf-8'):
-    if p is None:
-        return None
-    if isinstance(p, text_type):
-        return p.encode('utf-8')
-    if enc != 'utf-8':
-        return p.decode(enc).encode('utf-8')
-    return p
-
-# convert string to be unicode encoded
-def unicode_str(p, enc='utf-8'):
-    if p is None:
-        return None
-    if isinstance(p, text_type):
-        return p
-    return p.decode(enc)
-
-ASCII_CHARS   = set(chr(x) for x in range(128))
-URL_SAFE      = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-                    'abcdefghijklmnopqrstuvwxyz'
-                    '0123456789' '#' '_.-/~')
-IRI_UNSAFE = ASCII_CHARS - URL_SAFE
-
-# returns a quoted IRI (not a URI)
-def quoteurl(href):
-    if isinstance(href,binary_type):
-        href = href.decode('utf-8')
-    result = []
-    for char in href:
-        if char in IRI_UNSAFE:
-            char = "%%%02x" % ord(char)
-        result.append(char)
-    return ''.join(result)
-
-# unquotes url/iri
-def unquoteurl(href):
-    if isinstance(href,binary_type):
-        href = href.decode('utf-8')
-    href = unquote(href)
-    return href
-
-# unescape html
-def unescapeit(sval):
-    return _h.unescape(sval)
-
-# Python 2.X commandline parsing under Windows has been horribly broken for years!
-# Use the following code to emulate full unicode commandline parsing on Python 2
-# ie. To get  sys.argv arguments and properly encode them as unicode
-
-def unicode_argv():
-    global iswindows
-    global PY3
-    if PY3:
-        return sys.argv
-    if iswindows:
-        # Versions 2.x of Python don't support Unicode in sys.argv on
-        # Windows, with the underlying Windows API instead replacing multi-byte
-        # characters with '?'.  So use shell32.GetCommandLineArgvW to get sys.argv
-        # as a list of Unicode strings
-        from ctypes import POINTER, byref, cdll, c_int, windll
-        from ctypes.wintypes import LPCWSTR, LPWSTR
-
-        GetCommandLineW = cdll.kernel32.GetCommandLineW
-        GetCommandLineW.argtypes = []
-        GetCommandLineW.restype = LPCWSTR
-
-        CommandLineToArgvW = windll.shell32.CommandLineToArgvW
-        CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
-        CommandLineToArgvW.restype = POINTER(LPWSTR)
-
-        cmd = GetCommandLineW()
-        argc = c_int(0)
-        argv = CommandLineToArgvW(cmd, byref(argc))
-        if argc.value > 0:
-            # Remove Python executable and commands if present
-            start = argc.value - len(sys.argv)
-            return [argv[i] for i in
-                    range(start, argc.value)]
-        # this should never happen
-        return None
-    else:
-        argv = []
-        argvencoding = sys.stdin.encoding
-        if argvencoding is None:
-            argvencoding = sys.getfilesystemencoding()
-        if argvencoding is None:
-            argvencoding = 'utf-8'
-        for arg in sys.argv:
-            if isinstance(arg, text_type):
-                argv.append(arg)
-            else:
-                argv.append(arg.decode(argvencoding))
-        return argv
-
-
-# Python 2.X is broken in that it does not recognize CP65001 as UTF-8
-def add_cp65001_codec():
-    if PY2:
-        try:
-            codecs.lookup('cp65001')
-        except LookupError:
-            codecs.register(
-                lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
-    return
diff --git a/epy_extras/KindleUnpack/kindleunpack.py b/epy_extras/KindleUnpack/kindleunpack.py
deleted file mode 100644
index 317941a..0000000
--- a/epy_extras/KindleUnpack/kindleunpack.py
+++ /dev/null
@@ -1,1029 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-import os
-
-__path__ = ["lib", os.path.dirname(os.path.realpath(__file__)), "kindleunpack"]
-
-import sys
-import codecs
-import traceback
-
-from .compatibility_utils import PY2, binary_type, utf8_str, unicode_str
-from .compatibility_utils import unicode_argv, add_cp65001_codec
-from .compatibility_utils import hexlify
-
-add_cp65001_codec()
-
-from .unipath import pathof
-
-if PY2:
-    range = xrange
-    # since will be printing unicode under python 2 need to protect
-    # against sys.stdout.encoding being None stupidly forcing forcing ascii encoding
-    if sys.stdout.encoding is None:
-        sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
-    else:
-        encoding = sys.stdout.encoding
-        sys.stdout = codecs.getwriter(encoding)(sys.stdout)
-
-# Changelog
-#  0.11 - Version by adamselene
-#  0.11pd - Tweaked version by pdurrant
-#  0.12 - extracts pictures too, and all into a folder.
-#  0.13 - added back in optional output dir for those who don't want it based on infile
-#  0.14 - auto flush stdout and wrapped in main, added proper return codes
-#  0.15 - added support for metadata
-#  0.16 - metadata now starting to be output as an opf file (PD)
-#  0.17 - Also created tweaked text as source for Mobipocket Creator
-#  0.18 - removed raw mobi file completely but kept _meta.html file for ease of conversion
-#  0.19 - added in metadata for ASIN, Updated Title and Rights to the opf
-#  0.20 - remove _meta.html since no longer needed
-#  0.21 - Fixed some typos in the opf output, and also updated handling
-#         of test for trailing data/multibyte characters
-#  0.22 - Fixed problem with > 9 images
-#  0.23 - Now output Start guide item
-#  0.24 - Set firstaddl value for 'TEXtREAd'
-#  0.25 - Now added character set metadata to html file for utf-8 files.
-#  0.26 - Dictionary support added. Image handling speed improved.
-#         For huge files create temp files to speed up decoding.
-#         Language decoding fixed. Metadata is now converted to utf-8 when written to opf file.
-#  0.27 - Add idx:entry attribute "scriptable" if dictionary contains entry length tags.
-#         Don't save non-image sections as images. Extract and save source zip file
-#         included by kindlegen as kindlegensrc.zip.
-#  0.28 - Added back correct image file name extensions, created FastConcat class to simplify and clean up
-#  0.29 - Metadata handling reworked, multiple entries of the same type are now supported.
-#         Several missing types added.
-#         FastConcat class has been removed as in-memory handling with lists is faster, even for huge files.
-#  0.30 - Add support for outputting **all** metadata values - encode content with hex if of unknown type
-#  0.31 - Now supports Print Replica ebooks, outputting PDF and mysterious data sections
-#  0.32 - Now supports NCX file extraction/building.
-#                 Overhauled the structure of mobiunpack to be more class oriented.
-#  0.33 - Split Classes ito separate files and added prelim support for KF8 format eBooks
-#  0.34 - Improved KF8 support, guide support, bug fixes
-#  0.35 - Added splitting combo mobi7/mobi8 into standalone mobi7 and mobi8 files
-#         Also handle mobi8-only file properly
-#  0.36 - very minor changes to support KF8 mobis with no flow items, no ncx, etc
-#  0.37 - separate output, add command line switches to control, interface to Mobi_Unpack.pyw
-#  0.38 - improve split function by resetting flags properly, fix bug in Thumbnail Images
-#  0.39 - improve split function so that ToC info is not lost for standalone mobi8s
-#  0.40 - make mobi7 split match official versions, add support for graphic novel metadata,
-#         improve debug for KF8
-#  0.41 - fix when StartOffset set to 0xffffffff, fix to work with older mobi versions,
-#         fix other minor metadata issues
-#  0.42 - add new class interface to allow it to integrate more easily with internal calibre routines
-#  0.43 - bug fixes for new class interface
-#  0.44 - more bug fixes and fix for potnetial bug caused by not properly closing created zip archive
-#  0.45 - sync to version in the new Mobi_Unpack plugin
-#  0.46 - fixes for: obfuscated fonts, improper toc links and ncx, add support for opentype fonts
-#  0.47 - minor opf improvements
-#  0.48 - ncx link fixes
-#  0.49 - use azw3 when splitting mobis
-#  0.50 - unknown change
-#  0.51 - fix for converting filepos links to hrefs, Added GPL3 notice, made KF8 extension just '.azw3'
-#  0.52 - fix for cover metadata (no support for Mobipocket Creator)
-#  0.53 - fix for proper identification of embedded fonts, added new metadata items
-#  0.54 - Added error-handling so wonky embedded fonts don't bomb the whole unpack process,
-#         entity escape KF8 metadata to ensure valid OPF.
-#  0.55  Strip extra StartOffset EXTH from the mobi8 header when splitting, keeping only the relevant one
-#         For mobi8 files, don't generate duplicate guide entries from the metadata if we could extract one
-#         from the OTH table.
-#  0.56 - Added further entity escaping of OPF text.
-#         Allow unicode string file paths to be passed as arguments to the unpackBook method without blowing up later
-#         when the attempt to "re"-unicode a portion of that filename occurs in the process_all_mobi_headers method.
-#  0.57 - Fixed eror when splitting Preview files downloaded from KDP website
-#  0.58 - Output original kindlegen build log ('CMET' record) if included in the package.
-#  0.58 - Include and extend functionality of DumpMobiHeader, replacing DEBUG with DUMP
-#  0.59 - Much added DUMP functionality, including full dumping and descriptions of sections
-#  0.60 - Bug fixes in opf, div tables, bad links, page breaks, section descriptions
-#       - plus a number of other bug fixed that were found by Sergey Dubinets
-#       - fixs for file/paths that require full unicode to work properly
-#       - replace subprocess with multiprocessing to remove need for unbuffered stdout
-#  0.61 - renamed to be KindleUnpack and more unicode/utf-8 path bug fixes and other minor fixes
-#  0.62 - fix for multiprocessing on Windows, split fixes, opf improvements
-#  0.63 - Modified to process right to left page progression books properly.
-#       - Added some id_map_strings and RESC section processing; metadata and
-#       - spine in the RESC are integrated partly to content.opf.
-#  0.63a- Separated K8 RESC processor to an individual file. Bug fixes. Added cover page creation.
-#  0.64 - minor bug fixes to more properly handle unicode command lines, and support for more jpeg types
-#  0.64a- Modifed to handle something irregular mobi and azw3 files.
-#  0.64b- Modifed to create k8resc.spine for no RECS files.
-#  0.65 - Bug fixes to shorten title and remove epub3 "properties" to make the output epub2 compliant
-#  0.65a- Bug fixes to extract RESC section correctly, to prevent item id confliction
-#       - and to process multiline comments in RESC.
-#  0.66 - Bug fix to deal with missing first resource information sometimes generated by calibre
-#  0.66a- Fixed minor bugs, which probably do not affect the output anything
-#  0.67 - Fixed Mobi Split functionality bug with azw3 images not being properly copied
-#  0.68 - preliminary support for handling PAGE sections to create page-map.xml
-#  0.69 - preliminary support for CONT and CRES for HD Images
-#  0.70 - preliminary support for decoding apnx files when used with azw3 ebooks
-#  0.71 - extensive refactoring of kindleunpack.py to make it more manageable
-#  0.72 - many bug fixes from tkeo: fix pageProcessing, fix print replica, fix resc usage, fix font mangling, etc.
-#  0.72a- fix for still broken PrintReplica support
-#  0.72b- preview for primary epub3 support. A parameter epubver(default='2') is added to process_all_mobi_headers(), unpackBook().
-#  0.72c- preview for apnx page support
-#  0.72d- more bugs fixed in preview features, much improved GUI with ability to dynaically grow the Log Window with preference support
-#  0.72e- more bug fixes, Tk GUI adds support for epub version and HDImage use
-#  0.72f- more bug fixes, implement use hd images if present
-#  0.72g- minor bug fixes and cleanups from tkeo
-#  0.72h- updated mobi_header and mobi_k8proc to use the correct fragment and guide terms in place of div and other
-#         to better match the terms that both Calibre and Amazon use internally to their own software
-#  0.72x- very experimental conversion to use new mobi_k8resc.py and some of its associated changes
-#  0.72y- more changes to simplify and integrate in epub3 support in a simpler manner
-#  0.72z- remove redundancy in mobi_opf.py and bug fixes for mobi_k8resc.py
-#  0.73   faster mobi split, numerous bug fixes in mobi_k8proc, mobi_header, mobi_opf, mobi_k8resc, etc
-#  0.74   added refines metadata, fixed language code in ncx and title in nav, added support for opf: from refines
-#  0.75   much improved dictioanry support including support for multiple inflection sections, minor mobi_opf fixes
-#  0.76   pre-release version only fix name related issues in opf by not using original file name in mobi7
-#  0.77   bug fix for unpacking HDImages with included Fonts
-#  0.80   converted to work with both python 2.7 and Python 3.3 and later
-#  0.81   various fixes
-#  0.82   Handle calibre-generated mobis that can have skeletons with no fragments
-#  0.83   Fix header item 114 being mistakenly treated as a string instead of a value
-
-DUMP = False
-""" Set to True to dump all possible information. """
-
-WRITE_RAW_DATA = False
-""" Set to True to create additional files with raw data for debugging/reverse engineering. """
-
-SPLIT_COMBO_MOBIS = False
-""" Set to True to split combination mobis into mobi7 and mobi8 pieces. """
-
-CREATE_COVER_PAGE = True  # XXX experimental
-""" Create and insert a cover xhtml page. """
-
-EOF_RECORD = b'\xe9\x8e' + b'\r\n'
-""" The EOF record content. """
-
-TERMINATION_INDICATOR1 = b'\x00'
-TERMINATION_INDICATOR2 = b'\x00\x00'
-TERMINATION_INDICATOR3 = b'\x00\x00\x00'
-
-KINDLEGENSRC_FILENAME = "kindlegensrc.zip"
-""" The name for the kindlegen source archive. """
-
-KINDLEGENLOG_FILENAME = "kindlegenbuild.log"
-""" The name for the kindlegen build log. """
-
-K8_BOUNDARY = b'BOUNDARY'
-""" The section data that divides K8 mobi ebooks. """
-
-import os
-import struct
-import re
-import zlib
-import getopt
-
-class unpackException(Exception):
-    pass
-
-
-# import the kindleunpack support libraries
-from .unpack_structure import fileNames
-from .mobi_sectioner import Sectionizer, describe
-from .mobi_header import MobiHeader, dump_contexth
-from .mobi_utils import toBase32
-from .mobi_opf import OPFProcessor
-from .mobi_html import HTMLProcessor, XHTMLK8Processor
-from .mobi_ncx import ncxExtract
-from .mobi_k8proc import K8Processor
-from .mobi_split import mobi_split
-from .mobi_k8resc import K8RESCProcessor
-from .mobi_nav import NAVProcessor
-from .mobi_cover import CoverProcessor, get_image_type
-from .mobi_pagemap import PageMapProcessor
-from .mobi_dict import dictSupport
-
-
-def processSRCS(i, files, rscnames, sect, data):
-    # extract the source zip archive and save it.
-    print("File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME)
-    srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME)
-    with open(pathof(srcname), 'wb') as f:
-        f.write(data[16:])
-    rscnames.append(None)
-    sect.setsectiondescription(i,"Zipped Source Files")
-    return rscnames
-
-
-def processPAGE(i, files, rscnames, sect, data, mh, pagemapproc):
-    # process any page map information and create an apnx file
-    pagemapproc = PageMapProcessor(mh, data)
-    rscnames.append(None)
-    sect.setsectiondescription(i,"PageMap")
-    apnx_meta = {}
-    acr = sect.palmname.decode('latin-1').rstrip('\x00')
-    apnx_meta['acr'] = acr
-    apnx_meta['cdeType'] = mh.metadata['cdeType'][0]
-    apnx_meta['contentGuid'] = hex(int(mh.metadata['UniqueID'][0]))[2:]
-    apnx_meta['asin'] = mh.metadata['ASIN'][0]
-    apnx_meta['pageMap'] = pagemapproc.getPageMap()
-    if mh.version == 8:
-        apnx_meta['format'] = 'MOBI_8'
-    else:
-        apnx_meta['format'] = 'MOBI_7'
-    apnx_data = pagemapproc.generateAPNX(apnx_meta)
-    if mh.isK8():
-        outname = os.path.join(files.outdir, 'mobi8-'+files.getInputFileBasename() + '.apnx')
-    else:
-        outname = os.path.join(files.outdir, 'mobi7-'+files.getInputFileBasename() + '.apnx')
-    with open(pathof(outname), 'wb') as f:
-        f.write(apnx_data)
-    return rscnames, pagemapproc
-
-
-def processCMET(i, files, rscnames, sect, data):
-    # extract the build log
-    print("File contains kindlegen build log, extracting as %s" % KINDLEGENLOG_FILENAME)
-    srcname = os.path.join(files.outdir, KINDLEGENLOG_FILENAME)
-    with open(pathof(srcname), 'wb') as f:
-        f.write(data[10:])
-    rscnames.append(None)
-    sect.setsectiondescription(i,"Kindlegen log")
-    return rscnames
-
-
-# fonts only exist in KF8 ebooks
-# Format:  bytes  0 -  3:  'FONT'
-#          bytes  4 -  7:  uncompressed size
-#          bytes  8 - 11:  flags
-#              flag bit 0x0001 - zlib compression
-#              flag bit 0x0002 - obfuscated with xor string
-#          bytes 12 - 15:  offset to start of compressed font data
-#          bytes 16 - 19:  length of xor string stored before the start of the comnpress font data
-#          bytes 20 - 23:  start of xor string
-def processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr):
-    fontname = "font%05d" % i
-    ext = '.dat'
-    font_error = False
-    font_data = data
-    try:
-        usize, fflags, dstart, xor_len, xor_start = struct.unpack_from(b'>LLLLL',data,4)
-    except:
-        print("Failed to extract font: {0:s} from section {1:d}".format(fontname,i))
-        font_error = True
-        ext = '.failed'
-        pass
-    if not font_error:
-        print("Extracting font:", fontname)
-        font_data = data[dstart:]
-        extent = len(font_data)
-        extent = min(extent, 1040)
-        if fflags & 0x0002:
-            # obfuscated so need to de-obfuscate the first 1040 bytes
-            key = bytearray(data[xor_start: xor_start+ xor_len])
-            buf = bytearray(font_data)
-            for n in range(extent):
-                buf[n] ^=  key[n%xor_len]
-            font_data = bytes(buf)
-        if fflags & 0x0001:
-            # ZLIB compressed data
-            font_data = zlib.decompress(font_data)
-        hdr = font_data[0:4]
-        if hdr == b'\0\1\0\0' or hdr == b'true' or hdr == b'ttcf':
-            ext = '.ttf'
-        elif hdr == b'OTTO':
-            ext = '.otf'
-        else:
-            print("Warning: unknown font header %s" % hexlify(hdr))
-        if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002):
-            obfuscate_data.append(fontname + ext)
-        fontname += ext
-        outfnt = os.path.join(files.imgdir, fontname)
-        with open(pathof(outfnt), 'wb') as f:
-            f.write(font_data)
-        rscnames.append(fontname)
-        sect.setsectiondescription(i,"Font {0:s}".format(fontname))
-        if rsc_ptr == -1:
-            rsc_ptr = i - beg
-    return rscnames, obfuscate_data, rsc_ptr
-
-
-def processCRES(i, files, rscnames, sect, data, beg, rsc_ptr, use_hd):
-    # extract an HDImage
-    global DUMP
-    data = data[12:]
-    imgtype = get_image_type(None, data)
-
-    if imgtype is None:
-        print("Warning: CRES Section %s does not contain a recognised resource" % i)
-        rscnames.append(None)
-        sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s" % describe(data[0:4]))
-        if DUMP:
-            fname = "unknown%05d.dat" % i
-            outname= os.path.join(files.outdir, fname)
-            with open(pathof(outname), 'wb') as f:
-                f.write(data)
-            sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s extracting as %s" % (describe(data[0:4]), fname))
-        rsc_ptr += 1
-        return rscnames, rsc_ptr
-
-    if use_hd:
-        # overwrite corresponding lower res image with hd version
-        imgname = rscnames[rsc_ptr]
-        imgdest = files.imgdir
-    else:
-        imgname = "HDimage%05d.%s" % (i, imgtype)
-        imgdest = files.hdimgdir
-    print("Extracting HD image: {0:s} from section {1:d}".format(imgname,i))
-    outimg = os.path.join(imgdest, imgname)
-    with open(pathof(outimg), 'wb') as f:
-        f.write(data)
-    rscnames.append(None)
-    sect.setsectiondescription(i,"Optional HD Image {0:s}".format(imgname))
-    rsc_ptr += 1
-    return rscnames, rsc_ptr
-
-
-def processCONT(i, files, rscnames, sect, data):
-    global DUMP
-    # process a container header, most of this is unknown
-    # right now only extract its EXTH
-    dt = data[0:12]
-    if dt == b"CONTBOUNDARY":
-        rscnames.append(None)
-        sect.setsectiondescription(i,"CONTAINER BOUNDARY")
-    else:
-        sect.setsectiondescription(i,"CONT Header")
-        rscnames.append(None)
-        if DUMP:
-            cpage, = struct.unpack_from(b'>L', data, 12)
-            contexth = data[48:]
-            print("\n\nContainer EXTH Dump")
-            dump_contexth(cpage, contexth)
-            fname = "CONT_Header%05d.dat" % i
-            outname= os.path.join(files.outdir, fname)
-            with open(pathof(outname), 'wb') as f:
-                f.write(data)
-    return rscnames
-
-
-def processkind(i, files, rscnames, sect, data):
-    global DUMP
-    dt = data[0:12]
-    if dt == b"kindle:embed":
-        if DUMP:
-            print("\n\nHD Image Container Description String")
-            print(data)
-        sect.setsectiondescription(i,"HD Image Container Description String")
-        rscnames.append(None)
-    return rscnames
-
-
-# spine information from the original content.opf
-def processRESC(i, files, rscnames, sect, data, k8resc):
-    global DUMP
-    if DUMP:
-        rescname = "RESC%05d.dat" % i
-        print("Extracting Resource: ", rescname)
-        outrsc = os.path.join(files.outdir, rescname)
-        with open(pathof(outrsc), 'wb') as f:
-            f.write(data)
-    if True:  # try:
-        # parse the spine and metadata from RESC
-        k8resc = K8RESCProcessor(data[16:], DUMP)
-    else:  # except:
-        print("Warning: cannot extract information from RESC.")
-        k8resc = None
-    rscnames.append(None)
-    sect.setsectiondescription(i,"K8 RESC section")
-    return rscnames, k8resc
-
-
-def processImage(i, files, rscnames, sect, data, beg, rsc_ptr, cover_offset, thumb_offset):
-    global DUMP
-    # Extract an Image
-    imgtype = get_image_type(None, data)
-    if imgtype is None:
-        print("Warning: Section %s does not contain a recognised resource" % i)
-        rscnames.append(None)
-        sect.setsectiondescription(i,"Mysterious Section, first four bytes %s" % describe(data[0:4]))
-        if DUMP:
-            fname = "unknown%05d.dat" % i
-            outname= os.path.join(files.outdir, fname)
-            with open(pathof(outname), 'wb') as f:
-                f.write(data)
-            sect.setsectiondescription(i,"Mysterious Section, first four bytes %s extracting as %s" % (describe(data[0:4]), fname))
-        return rscnames, rsc_ptr
-
-    imgname = "image%05d.%s" % (i, imgtype)
-    if cover_offset is not None and i == beg + cover_offset:
-        imgname = "cover%05d.%s" % (i, imgtype)
-    if thumb_offset is not None and i == beg + thumb_offset:
-        imgname = "thumb%05d.%s" % (i, imgtype)
-    print("Extracting image: {0:s} from section {1:d}".format(imgname,i))
-    outimg = os.path.join(files.imgdir, imgname)
-    with open(pathof(outimg), 'wb') as f:
-        f.write(data)
-    rscnames.append(imgname)
-    sect.setsectiondescription(i,"Image {0:s}".format(imgname))
-    if rsc_ptr == -1:
-        rsc_ptr = i - beg
-    return rscnames, rsc_ptr
-
-
-def processPrintReplica(metadata, files, rscnames, mh):
-    global DUMP
-    global WRITE_RAW_DATA
-    rawML = mh.getRawML()
-    if DUMP or WRITE_RAW_DATA:
-        outraw = os.path.join(files.outdir,files.getInputFileBasename() + '.rawpr')
-        with open(pathof(outraw),'wb') as f:
-            f.write(rawML)
-
-    fileinfo = []
-    print("Print Replica ebook detected")
-    try:
-        numTables, = struct.unpack_from(b'>L', rawML, 0x04)
-        tableIndexOffset = 8 + 4*numTables
-        # for each table, read in count of sections, assume first section is a PDF
-        # and output other sections as binary files
-        for i in range(numTables):
-            sectionCount, = struct.unpack_from(b'>L', rawML, 0x08 + 4*i)
-            for j in range(sectionCount):
-                sectionOffset, sectionLength, = struct.unpack_from(b'>LL', rawML, tableIndexOffset)
-                tableIndexOffset += 8
-                if j == 0:
-                    entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.pdf' % (i+1)))
-                else:
-                    entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.%03d.data' % ((i+1),j)))
-                with open(pathof(entryName), 'wb') as f:
-                    f.write(rawML[sectionOffset:(sectionOffset+sectionLength)])
-    except Exception as e:
-        print('Error processing Print Replica: ' + str(e))
-
-    fileinfo.append([None,'', files.getInputFileBasename() + '.pdf'])
-    usedmap = {}
-    for name in rscnames:
-        if name is not None:
-            usedmap[name] = 'used'
-    opf = OPFProcessor(files, metadata, fileinfo, rscnames, False, mh, usedmap)
-    opf.writeOPF()
-
-
-def processMobi8(mh, metadata, sect, files, rscnames, pagemapproc, k8resc, obfuscate_data, apnxfile=None, epubver='2'):
-    global DUMP
-    global WRITE_RAW_DATA
-
-    # extract raw markup langauge
-    rawML = mh.getRawML()
-    if DUMP or WRITE_RAW_DATA:
-        outraw = os.path.join(files.k8dir,files.getInputFileBasename() + '.rawml')
-        with open(pathof(outraw),'wb') as f:
-            f.write(rawML)
-
-    # KF8 require other indexes which contain parsing information and the FDST info
-    # to process the rawml back into the xhtml files, css files, svg image files, etc
-    k8proc = K8Processor(mh, sect, files, DUMP)
-    k8proc.buildParts(rawML)
-
-    # collect information for the guide first
-    guidetext = unicode_str(k8proc.getGuideText())
-
-    # if the guide was empty, add in any guide info from metadata, such as StartOffset
-    if not guidetext and 'StartOffset' in metadata:
-        # Apparently, KG 2.5 carries over the StartOffset from the mobi7 part...
-        # Taking that into account, we only care about the *last* StartOffset, which
-        # should always be the correct one in these cases (the one actually pointing
-        # to the right place in the mobi8 part).
-        starts = metadata['StartOffset']
-        last_start = starts[-1]
-        last_start = int(last_start)
-        if last_start == 0xffffffff:
-            last_start = 0
-        seq, idtext = k8proc.getFragTblInfo(last_start)
-        filename, idtext = k8proc.getIDTagByPosFid(toBase32(seq), b'0000000000')
-        linktgt = filename
-        idtext = unicode_str(idtext, mh.codec)
-        if idtext != '':
-            linktgt += '#' + idtext
-        guidetext += '<reference type="text" href="Text/%s" />\n' % linktgt
-
-    # if apnxfile is passed in use it for page map information
-    if apnxfile is not None and pagemapproc is None:
-        with open(apnxfile, 'rb') as f:
-            apnxdata = b"00000000" + f.read()
-        pagemapproc = PageMapProcessor(mh, apnxdata)
-
-    # generate the page map
-    pagemapxml = ''
-    if pagemapproc is not None:
-        pagemapxml = pagemapproc.generateKF8PageMapXML(k8proc)
-        outpm = os.path.join(files.k8oebps,'page-map.xml')
-        with open(pathof(outpm),'wb') as f:
-            f.write(pagemapxml.encode('utf-8'))
-        if DUMP:
-            print(pagemapproc.getNames())
-            print(pagemapproc.getOffsets())
-            print("\n\nPage Map")
-            print(pagemapxml)
-
-    # process the toc ncx
-    # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
-    print("Processing ncx / toc")
-    ncx = ncxExtract(mh, files)
-    ncx_data = ncx.parseNCX()
-    # extend the ncx data with filenames and proper internal idtags
-    for i in range(len(ncx_data)):
-        ncxmap = ncx_data[i]
-        [junk1, junk2, junk3, fid, junk4, off] = ncxmap['pos_fid'].split(':')
-        filename, idtag = k8proc.getIDTagByPosFid(fid, off)
-        ncxmap['filename'] = filename
-        ncxmap['idtag'] = unicode_str(idtag)
-        ncx_data[i] = ncxmap
-
-    # convert the rawML to a set of xhtml files
-    print("Building an epub-like structure")
-    htmlproc = XHTMLK8Processor(rscnames, k8proc)
-    usedmap = htmlproc.buildXHTML()
-
-    # write out the xhtml svg, and css files
-    # fileinfo = [skelid|coverpage, dir, name]
-    fileinfo = []
-    # first create a cover page if none exists
-    if CREATE_COVER_PAGE:
-        cover = CoverProcessor(files, metadata, rscnames)
-        cover_img = utf8_str(cover.getImageName())
-        need_to_create_cover_page = False
-        if cover_img is not None:
-            if k8resc is None or not k8resc.hasSpine():
-                part = k8proc.getPart(0)
-                if part.find(cover_img) == -1:
-                    need_to_create_cover_page = True
-            else:
-                if "coverpage" not in k8resc.spine_idrefs:
-                    part = k8proc.getPart(int(k8resc.spine_order[0]))
-                    if part.find(cover_img) == -1:
-                        k8resc.prepend_to_spine("coverpage", "inserted", "no", None)
-                if k8resc.spine_order[0] == "coverpage":
-                    need_to_create_cover_page = True
-            if need_to_create_cover_page:
-                filename = cover.getXHTMLName()
-                fileinfo.append(["coverpage", 'Text', filename])
-                guidetext += cover.guide_toxml()
-                cover.writeXHTML()
-
-    n =  k8proc.getNumberOfParts()
-    for i in range(n):
-        part = k8proc.getPart(i)
-        [skelnum, dir, filename, beg, end, aidtext] = k8proc.getPartInfo(i)
-        fileinfo.append([str(skelnum), dir, filename])
-        fname = os.path.join(files.k8oebps,dir,filename)
-        with open(pathof(fname),'wb') as f:
-            f.write(part)
-    n = k8proc.getNumberOfFlows()
-    for i in range(1, n):
-        [ptype, pformat, pdir, filename] = k8proc.getFlowInfo(i)
-        flowpart = k8proc.getFlow(i)
-        if pformat == b'file':
-            fileinfo.append([None, pdir, filename])
-            fname = os.path.join(files.k8oebps,pdir,filename)
-            with open(pathof(fname),'wb') as f:
-                f.write(flowpart)
-
-    # create the opf
-    opf = OPFProcessor(files, metadata.copy(), fileinfo, rscnames, True, mh, usedmap,
-                       pagemapxml=pagemapxml, guidetext=guidetext, k8resc=k8resc, epubver=epubver)
-    uuid = opf.writeOPF(bool(obfuscate_data))
-
-    if opf.hasNCX():
-        # Create a toc.ncx.
-        ncx.writeK8NCX(ncx_data, metadata)
-    if opf.hasNAV():
-        # Create a navigation document.
-        nav = NAVProcessor(files)
-        nav.writeNAV(ncx_data, guidetext, metadata)
-
-    # make an epub-like structure of it all
-    print("Creating an epub-like file")
-    files.makeEPUB(usedmap, obfuscate_data, uuid)
-
-
-def processMobi7(mh, metadata, sect, files, rscnames):
-    global DUMP
-    global WRITE_RAW_DATA
-    # An original Mobi
-    rawML = mh.getRawML()
-    if DUMP or WRITE_RAW_DATA:
-        outraw = os.path.join(files.mobi7dir,files.getInputFileBasename() + '.rawml')
-        with open(pathof(outraw),'wb') as f:
-            f.write(rawML)
-
-    # process the toc ncx
-    # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
-    ncx = ncxExtract(mh, files)
-    ncx_data = ncx.parseNCX()
-    ncx.writeNCX(metadata)
-
-    positionMap = {}
-
-    # if Dictionary build up the positionMap
-    if mh.isDictionary():
-        if mh.DictInLanguage():
-            metadata['DictInLanguage'] = [mh.DictInLanguage()]
-        if mh.DictOutLanguage():
-            metadata['DictOutLanguage'] = [mh.DictOutLanguage()]
-        positionMap = dictSupport(mh, sect).getPositionMap()
-
-    # convert the rawml back to Mobi ml
-    proc = HTMLProcessor(files, metadata, rscnames)
-    srctext = proc.findAnchors(rawML, ncx_data, positionMap)
-    srctext, usedmap = proc.insertHREFS()
-
-    # write the proper mobi html
-    fileinfo=[]
-    # fname = files.getInputFileBasename() + '.html'
-    fname = 'book.html'
-    fileinfo.append([None,'', fname])
-    outhtml = os.path.join(files.mobi7dir, fname)
-    with open(pathof(outhtml), 'wb') as f:
-        f.write(srctext)
-
-    # extract guidetext from srctext
-    guidetext =b''
-    # no pagemap support for older mobis
-    # pagemapxml = None
-    guidematch = re.search(br'''<guide>(.*)</guide>''',srctext,re.IGNORECASE+re.DOTALL)
-    if guidematch:
-        guidetext = guidematch.group(1)
-        # sometimes old mobi guide from srctext horribly written so need to clean up
-        guidetext = guidetext.replace(b"\r", b"")
-        guidetext = guidetext.replace(b'<REFERENCE', b'<reference')
-        guidetext = guidetext.replace(b' HREF=', b' href=')
-        guidetext = guidetext.replace(b' TITLE=', b' title=')
-        guidetext = guidetext.replace(b' TYPE=', b' type=')
-        # reference must be a self-closing tag
-        # and any href must be replaced with filepos information
-        ref_tag_pattern = re.compile(br'''(<reference [^>]*>)''', re.IGNORECASE)
-        guidepieces = ref_tag_pattern.split(guidetext)
-        for i in range(1,len(guidepieces), 2):
-            reftag = guidepieces[i]
-            # remove any href there now to replace with filepos
-            reftag = re.sub(br'''href\s*=[^'"]*['"][^'"]*['"]''',b'', reftag)
-            # make sure the reference tag ends properly
-            if not reftag.endswith(b"/>"):
-                reftag = reftag[0:-1] + b"/>"
-                guidepieces[i] = reftag
-        guidetext = b''.join(guidepieces)
-        replacetext = br'''href="'''+utf8_str(fileinfo[0][2])+ br'''#filepos\1"'''
-        guidetext = re.sub(br'''filepos=['"]{0,1}0*(\d+)['"]{0,1}''', replacetext, guidetext)
-        guidetext += b'\n'
-
-    if 'StartOffset' in metadata:
-        for value in metadata['StartOffset']:
-            if int(value) == 0xffffffff:
-                value = '0'
-            starting_offset = value
-        # get guide items from metadata
-        metaguidetext = b'<reference type="text" href="'+utf8_str(fileinfo[0][2])+b'#filepos'+utf8_str(starting_offset)+b'" />\n'
-        guidetext += metaguidetext
-
-    if isinstance(guidetext, binary_type):
-        guidetext = guidetext.decode(mh.codec)
-
-    # create an OPF
-    opf = OPFProcessor(files, metadata, fileinfo, rscnames, ncx.isNCX, mh, usedmap, guidetext=guidetext)
-    opf.writeOPF()
-
-
-def processUnknownSections(mh, sect, files, K8Boundary):
-    global DUMP
-    global TERMINATION_INDICATOR1
-    global TERMINATION_INDICATOR2
-    global TERMINATION_INDICATOR3
-    if DUMP:
-        print("Unpacking any remaining unknown records")
-    beg = mh.start
-    end = sect.num_sections
-    if beg < K8Boundary:
-        # then we're processing the first part of a combination file
-        end = K8Boundary
-    for i in range(beg, end):
-        if sect.sectiondescriptions[i] == "":
-            data = sect.loadSection(i)
-            type = data[0:4]
-            if type == TERMINATION_INDICATOR3:
-                description = "Termination Marker 3 Nulls"
-            elif type == TERMINATION_INDICATOR2:
-                description = "Termination Marker 2 Nulls"
-            elif type == TERMINATION_INDICATOR1:
-                description = "Termination Marker 1 Null"
-            elif type == "INDX":
-                fname = "Unknown%05d_INDX.dat" % i
-                description = "Unknown INDX section"
-                if DUMP:
-                    outname= os.path.join(files.outdir, fname)
-                    with open(pathof(outname), 'wb') as f:
-                        f.write(data)
-                    print("Extracting %s: %s from section %d" % (description, fname, i))
-                    description = description + ", extracting as %s" % fname
-            else:
-                fname = "unknown%05d.dat" % i
-                description = "Mysterious Section, first four bytes %s" % describe(data[0:4])
-                if DUMP:
-                    outname= os.path.join(files.outdir, fname)
-                    with open(pathof(outname), 'wb') as f:
-                        f.write(data)
-                    print("Extracting %s: %s from section %d" % (description, fname, i))
-                    description = description + ", extracting as %s" % fname
-            sect.setsectiondescription(i, description)
-
-
-def process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, k8only=False, epubver='2', use_hd=False):
-    global DUMP
-    global WRITE_RAW_DATA
-    rscnames = []
-    rsc_ptr = -1
-    k8resc = None
-    obfuscate_data = []
-    for mh in mhlst:
-        pagemapproc = None
-        if mh.isK8():
-            sect.setsectiondescription(mh.start,"KF8 Header")
-            mhname = os.path.join(files.outdir,"header_K8.dat")
-            print("Processing K8 section of book...")
-        elif mh.isPrintReplica():
-            sect.setsectiondescription(mh.start,"Print Replica Header")
-            mhname = os.path.join(files.outdir,"header_PR.dat")
-            print("Processing PrintReplica section of book...")
-        else:
-            if mh.version == 0:
-                sect.setsectiondescription(mh.start, "PalmDoc Header".format(mh.version))
-            else:
-                sect.setsectiondescription(mh.start,"Mobipocket {0:d} Header".format(mh.version))
-            mhname = os.path.join(files.outdir,"header.dat")
-            print("Processing Mobipocket {0:d} section of book...".format(mh.version))
-
-        if DUMP:
-            # write out raw mobi header data
-            with open(pathof(mhname), 'wb') as f:
-                f.write(mh.header)
-
-        # process each mobi header
-        metadata = mh.getMetaData()
-        mh.describeHeader(DUMP)
-        if mh.isEncrypted():
-            raise unpackException('Book is encrypted')
-
-        pagemapproc = None
-
-        # first handle all of the different resource sections:  images, resources, fonts, and etc
-        # build up a list of image names to use to postprocess the ebook
-
-        print("Unpacking images, resources, fonts, etc")
-        beg = mh.firstresource
-        end = sect.num_sections
-        if beg < K8Boundary:
-            # processing first part of a combination file
-            end = K8Boundary
-
-        # Not sure the try/except is necessary, but just in case
-        try: 
-            thumb_offset = int(metadata.get('ThumbOffset', ['-1'])[0])
-        except:
-            thumb_offset = None
-
-        cover_offset = int(metadata.get('CoverOffset', ['-1'])[0])
-        if not CREATE_COVER_PAGE:
-            cover_offset = None
-
-        for i in range(beg, end):
-            data = sect.loadSection(i)
-            type = data[0:4]
-
-            # handle the basics first
-            if type in [b"FLIS", b"FCIS", b"FDST", b"DATP"]:
-                if DUMP:
-                    fname = unicode_str(type) + "%05d" % i
-                    if mh.isK8():
-                        fname += "_K8"
-                    fname += '.dat'
-                    outname= os.path.join(files.outdir, fname)
-                    with open(pathof(outname), 'wb') as f:
-                        f.write(data)
-                    print("Dumping section {0:d} type {1:s} to file {2:s} ".format(i,unicode_str(type),outname))
-                sect.setsectiondescription(i,"Type {0:s}".format(unicode_str(type)))
-                rscnames.append(None)
-            elif type == b"SRCS":
-                rscnames = processSRCS(i, files, rscnames, sect, data)
-            elif type == b"PAGE":
-                rscnames, pagemapproc = processPAGE(i, files, rscnames, sect, data, mh, pagemapproc)
-            elif type == b"CMET":
-                rscnames = processCMET(i, files, rscnames, sect, data)
-            elif type == b"FONT":
-                rscnames, obfuscate_data, rsc_ptr = processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr)
-            elif type == b"CRES":
-                rscnames, rsc_ptr = processCRES(i, files, rscnames, sect, data, beg, rsc_ptr, use_hd)
-            elif type == b"CONT":
-                rscnames = processCONT(i, files, rscnames, sect, data)
-            elif type == b"kind":
-                rscnames = processkind(i, files, rscnames, sect, data)
-            elif type == b'\xa0\xa0\xa0\xa0':
-                sect.setsectiondescription(i,"Empty_HD_Image/Resource_Placeholder")
-                rscnames.append(None)
-                rsc_ptr += 1
-            elif type == b"RESC":
-                rscnames, k8resc = processRESC(i, files, rscnames, sect, data, k8resc)
-            elif data == EOF_RECORD:
-                sect.setsectiondescription(i,"End Of File")
-                rscnames.append(None)
-            elif data[0:8] == b"BOUNDARY":
-                sect.setsectiondescription(i,"BOUNDARY Marker")
-                rscnames.append(None)
-            else:
-                # if reached here should be an image ow treat as unknown
-                rscnames, rsc_ptr  = processImage(i, files, rscnames, sect, data, beg, rsc_ptr, cover_offset, thumb_offset)
-        # done unpacking resources
-
-        # Print Replica
-        if mh.isPrintReplica() and not k8only:
-            processPrintReplica(metadata, files, rscnames, mh)
-            continue
-
-        # KF8 (Mobi 8)
-        if mh.isK8():
-            processMobi8(mh, metadata, sect, files, rscnames, pagemapproc, k8resc, obfuscate_data, apnxfile, epubver)
-
-        # Old Mobi (Mobi 7)
-        elif not k8only:
-            processMobi7(mh, metadata, sect, files, rscnames)
-
-        # process any remaining unknown sections of the palm file
-        processUnknownSections(mh, sect, files, K8Boundary)
-
-    return
-
-
-def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=False, dodump=False, dowriteraw=False, dosplitcombos=False):
-    global DUMP
-    global WRITE_RAW_DATA
-    global SPLIT_COMBO_MOBIS
-    if DUMP or dodump:
-        DUMP = True
-    if WRITE_RAW_DATA or dowriteraw:
-        WRITE_RAW_DATA = True
-    if SPLIT_COMBO_MOBIS or dosplitcombos:
-        SPLIT_COMBO_MOBIS = True
-
-    infile = unicode_str(infile)
-    outdir = unicode_str(outdir)
-    if apnxfile is not None:
-        apnxfile = unicode_str(apnxfile)
-
-    files = fileNames(infile, outdir)
-
-    # process the PalmDoc database header and verify it is a mobi
-    sect = Sectionizer(infile)
-    if sect.ident != b'BOOKMOBI' and sect.ident != b'TEXtREAd':
-        raise unpackException('Invalid file format')
-    if DUMP:
-        sect.dumppalmheader()
-    else:
-        print("Palm DB type: %s, %d sections." % (sect.ident.decode('utf-8'),sect.num_sections))
-
-    # scan sections to see if this is a compound mobi file (K8 format)
-    # and build a list of all mobi headers to process.
-    mhlst = []
-    mh = MobiHeader(sect,0)
-    # if this is a mobi8-only file hasK8 here will be true
-    mhlst.append(mh)
-    K8Boundary = -1
-
-    if mh.isK8():
-        print("Unpacking a KF8 book...")
-        hasK8 = True
-    else:
-        # This is either a Mobipocket 7 or earlier, or a combi M7/KF8
-        # Find out which
-        hasK8 = False
-        for i in range(len(sect.sectionoffsets)-1):
-            before, after = sect.sectionoffsets[i:i+2]
-            if (after - before) == 8:
-                data = sect.loadSection(i)
-                if data == K8_BOUNDARY:
-                    sect.setsectiondescription(i,"Mobi/KF8 Boundary Section")
-                    mh = MobiHeader(sect,i+1)
-                    hasK8 = True
-                    mhlst.append(mh)
-                    K8Boundary = i
-                    break
-        if hasK8:
-            print("Unpacking a Combination M{0:d}/KF8 book...".format(mh.version))
-            if SPLIT_COMBO_MOBIS:
-                # if this is a combination mobi7-mobi8 file split them up
-                mobisplit = mobi_split(infile)
-                if mobisplit.combo:
-                    outmobi7 = os.path.join(files.outdir, 'mobi7-'+files.getInputFileBasename() + '.mobi')
-                    outmobi8 = os.path.join(files.outdir, 'mobi8-'+files.getInputFileBasename() + '.azw3')
-                    with open(pathof(outmobi7), 'wb') as f:
-                        f.write(mobisplit.getResult7())
-                    with open(pathof(outmobi8), 'wb') as f:
-                        f.write(mobisplit.getResult8())
-        else:
-            print("Unpacking a Mobipocket {0:d} book...".format(mh.version))
-
-    if hasK8:
-        files.makeK8Struct()
-
-    process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, False, epubver, use_hd)
-
-    if DUMP:
-        sect.dumpsectionsinfo()
-    return
-
-
-def usage(progname):
-    print("")
-    print("Description:")
-    print("  Unpacks an unencrypted Kindle/MobiPocket ebook to html and images")
-    print("  or an unencrypted Kindle/Print Replica ebook to PDF and images")
-    print("  into the specified output folder.")
-    print("Usage:")
-    print("  %s -r -s -p apnxfile -d -h --epub_version= infile [outdir]" % progname)
-    print("Options:")
-    print("    -h                 print this help message")
-    print("    -i                 use HD Images, if present, to overwrite reduced resolution images")
-    print("    -s                 split combination mobis into mobi7 and mobi8 ebooks")
-    print("    -p APNXFILE        path to an .apnx file associated with the azw3 input (optional)")
-    print("    --epub_version=    specify epub version to unpack to: 2, 3, A (for automatic) or ")
-    print("                         F (force to fit to epub2 definitions), default is 2")
-    print("    -d                 dump headers and other info to output and extra files")
-    print("    -r                 write raw data to the output folder")
-
-
-def main(argv=unicode_argv()):
-    global DUMP
-    global WRITE_RAW_DATA
-    global SPLIT_COMBO_MOBIS
-
-    print("KindleUnpack v0.83")
-    print("   Based on initial mobipocket version Copyright © 2009 Charles M. Hannum <root@ihack.net>")
-    print("   Extensive Extensions and Improvements Copyright © 2009-2020 ")
-    print("       by:  P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding, tkeo.")
-    print("   This program is free software: you can redistribute it and/or modify")
-    print("   it under the terms of the GNU General Public License as published by")
-    print("   the Free Software Foundation, version 3.")
-
-    progname = os.path.basename(argv[0])
-    try:
-        opts, args = getopt.getopt(argv[1:], "dhirsp:", ['epub_version='])
-    except getopt.GetoptError as err:
-        print(str(err))
-        usage(progname)
-        sys.exit(2)
-
-    if len(args)<1:
-        usage(progname)
-        sys.exit(2)
-
-    apnxfile = None
-    epubver = '2'
-    use_hd = False
-
-    for o, a in opts:
-        if o == "-h":
-            usage(progname)
-            sys.exit(0)
-        if o == "-i":
-            use_hd = True
-        if o == "-d":
-            DUMP = True
-        if o == "-r":
-            WRITE_RAW_DATA = True
-        if o == "-s":
-            SPLIT_COMBO_MOBIS = True
-        if o == "-p":
-            apnxfile = a
-        if o == "--epub_version":
-            epubver = a
-
-    if len(args) > 1:
-        infile, outdir = args
-    else:
-        infile = args[0]
-        outdir = os.path.splitext(infile)[0]
-
-    infileext = os.path.splitext(infile)[1].upper()
-    if infileext not in ['.MOBI', '.PRC', '.AZW', '.AZW3', '.AZW4']:
-        print("Error: first parameter must be a Kindle/Mobipocket ebook or a Kindle/Print Replica ebook.")
-        return 1
-
-    try:
-        print('Unpacking Book...')
-        unpackBook(infile, outdir, apnxfile, epubver, use_hd)
-        print('Completed')
-
-    except ValueError as e:
-        print("Error: %s" % e)
-        print(traceback.format_exc())
-        return 1
-
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
diff --git a/epy_extras/KindleUnpack/mobi_cover.py b/epy_extras/KindleUnpack/mobi_cover.py
deleted file mode 100644
index 3078ac4..0000000
--- a/epy_extras/KindleUnpack/mobi_cover.py
+++ /dev/null
@@ -1,238 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import unicode_str
-
-from .unipath import pathof
-import os
-import imghdr
-
-import struct
-# note:  struct pack, unpack, unpack_from all require bytestring format
-# data all the way up to at least python 2.7.5, python 3 okay with bytestring
-
-USE_SVG_WRAPPER = True
-""" Set to True to use svg wrapper for default. """
-
-FORCE_DEFAULT_TITLE = False
-""" Set to True to force to use the default title. """
-
-COVER_PAGE_FINENAME = 'cover_page.xhtml'
-""" The name for the cover page. """
-
-DEFAULT_TITLE = 'Cover'
-""" The default title for the cover page. """
-
-MAX_WIDTH = 4096
-""" The max width for the svg cover page. """
-
-MAX_HEIGHT = 4096
-""" The max height for the svg cover page. """
-
-
-def get_image_type(imgname, imgdata=None):
-    imgtype = unicode_str(imghdr.what(pathof(imgname), imgdata))
-
-    # imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some
-    # with only the magic JPEG bytes out there...
-    # ImageMagick handles those, so, do it too.
-    if imgtype is None:
-        if imgdata is None:
-            with open(pathof(imgname), 'rb') as f:
-                imgdata = f.read()
-        if imgdata[0:2] == b'\xFF\xD8':
-            # Get last non-null bytes
-            last = len(imgdata)
-            while (imgdata[last-1:last] == b'\x00'):
-                last-=1
-            # Be extra safe, check the trailing bytes, too.
-            if imgdata[last-2:last] == b'\xFF\xD9':
-                imgtype = "jpeg"
-    return imgtype
-
-
-def get_image_size(imgname, imgdata=None):
-    '''Determine the image type of imgname (or imgdata) and return its size.
-
-    Originally,
-    Determine the image type of fhandle and return its size.
-    from draco'''
-    if imgdata is None:
-        fhandle = open(pathof(imgname), 'rb')
-        head = fhandle.read(24)
-    else:
-        head = imgdata[0:24]
-    if len(head) != 24:
-        return
-
-    imgtype = get_image_type(imgname, imgdata)
-    if imgtype == 'png':
-        check = struct.unpack(b'>i', head[4:8])[0]
-        if check != 0x0d0a1a0a:
-            return
-        width, height = struct.unpack(b'>ii', head[16:24])
-    elif imgtype == 'gif':
-        width, height = struct.unpack(b'<HH', head[6:10])
-    elif imgtype == 'jpeg' and imgdata is None:
-        try:
-            fhandle.seek(0)  # Read 0xff next
-            size = 2
-            ftype = 0
-            while not 0xc0 <= ftype <= 0xcf:
-                fhandle.seek(size, 1)
-                byte = fhandle.read(1)
-                while ord(byte) == 0xff:
-                    byte = fhandle.read(1)
-                ftype = ord(byte)
-                size = struct.unpack(b'>H', fhandle.read(2))[0] - 2
-            # We are at a SOFn block
-            fhandle.seek(1, 1)  # Skip `precision' byte.
-            height, width = struct.unpack(b'>HH', fhandle.read(4))
-        except Exception:  # IGNORE:W0703
-            return
-    elif imgtype == 'jpeg' and imgdata is not None:
-        try:
-            pos = 0
-            size = 2
-            ftype = 0
-            while not 0xc0 <= ftype <= 0xcf:
-                pos += size
-                byte = imgdata[pos:pos+1]
-                pos += 1
-                while ord(byte) == 0xff:
-                    byte = imgdata[pos:pos+1]
-                    pos += 1
-                ftype = ord(byte)
-                size = struct.unpack(b'>H', imgdata[pos:pos+2])[0] - 2
-                pos += 2
-            # We are at a SOFn block
-            pos += 1  # Skip `precision' byte.
-            height, width = struct.unpack(b'>HH', imgdata[pos:pos+4])
-            pos += 4
-        except Exception:  # IGNORE:W0703
-            return
-    else:
-        return
-    return width, height
-
-# XXX experimental
-class CoverProcessor(object):
-
-    """Create a cover page.
-
-    """
-    def __init__(self, files, metadata, rscnames, imgname=None, imgdata=None):
-        self.files = files
-        self.metadata = metadata
-        self.rscnames = rscnames
-        self.cover_page = COVER_PAGE_FINENAME
-        self.use_svg = USE_SVG_WRAPPER  # Use svg wrapper.
-        self.lang = metadata.get('Language', ['en'])[0]
-        # This should ensure that if the methods to find the cover image's
-        # dimensions should fail for any reason, the SVG routine will not be used.
-        [self.width, self.height] = (-1,-1)
-        if FORCE_DEFAULT_TITLE:
-            self.title = DEFAULT_TITLE
-        else:
-            self.title = metadata.get('Title', [DEFAULT_TITLE])[0]
-
-        self.cover_image = None
-        if imgname is not None:
-            self.cover_image = imgname
-        elif 'CoverOffset' in metadata:
-            imageNumber = int(metadata['CoverOffset'][0])
-            cover_image = self.rscnames[imageNumber]
-            if cover_image is not None:
-                self.cover_image = cover_image
-            else:
-                print('Warning: Cannot identify the cover image.')
-        if self.use_svg:
-            try:
-                if imgdata is None:
-                    fname = os.path.join(files.imgdir, self.cover_image)
-                    [self.width, self.height] = get_image_size(fname)
-                else:
-                    [self.width, self.height] = get_image_size(None, imgdata)
-            except:
-                self.use_svg = False
-            width = self.width
-            height = self.height
-            if width < 0 or height < 0 or width > MAX_WIDTH or height > MAX_HEIGHT:
-                self.use_svg = False
-        return
-
-    def getImageName(self):
-        return self.cover_image
-
-    def getXHTMLName(self):
-        return self.cover_page
-
-    def buildXHTML(self):
-        print('Building a cover page.')
-        files = self.files
-        cover_image = self.cover_image
-        title = self.title
-        lang = self.lang
-
-        image_dir = os.path.normpath(os.path.relpath(files.k8images, files.k8text))
-        image_path = os.path.join(image_dir, cover_image).replace('\\', '/')
-
-        if not self.use_svg:
-            data = ''
-            data += '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html>'
-            data += '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"'
-            data += ' xml:lang="{:s}">\n'.format(lang)
-            data += '<head>\n<title>{:s}</title>\n'.format(title)
-            data += '<style type="text/css">\n'
-            data += 'body {\n  margin: 0;\n  padding: 0;\n  text-align: center;\n}\n'
-            data += 'div {\n  height: 100%;\n  width: 100%;\n  text-align: center;\n  page-break-inside: avoid;\n}\n'
-            data += 'img {\n  display: inline-block;\n  height: 100%;\n  margin: 0 auto;\n}\n'
-            data += '</style>\n</head>\n'
-            data += '<body><div>\n'
-            data += '  <img src="{:s}" alt=""/>\n'.format(image_path)
-            data += '</div></body>\n</html>'
-        else:
-            width = self.width
-            height = self.height
-            viewBox = "0 0 {0:d} {1:d}".format(width, height)
-
-            data = ''
-            data += '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html>'
-            data += '<html xmlns="http://www.w3.org/1999/xhtml"'
-            data += ' xml:lang="{:s}">\n'.format(lang)
-            data += '<head>\n  <title>{:s}</title>\n'.format(title)
-            data += '<style type="text/css">\n'
-            data += 'svg {padding: 0pt; margin:0pt}\n'
-            data += 'body { text-align: center; padding:0pt; margin: 0pt; }\n'
-            data += '</style>\n</head>\n'
-            data += '<body>\n  <div>\n'
-            data += '    <svg xmlns="http://www.w3.org/2000/svg" height="100%" preserveAspectRatio="xMidYMid meet"'
-            data += ' version="1.1" viewBox="{0:s}" width="100%" xmlns:xlink="http://www.w3.org/1999/xlink">\n'.format(viewBox)
-            data += '      <image height="{0}" width="{1}" xlink:href="{2}"/>\n'.format(height, width, image_path)
-            data += '    </svg>\n'
-            data += '  </div>\n</body>\n</html>'
-        return data
-
-    def writeXHTML(self):
-        files = self.files
-        cover_page = self.cover_page
-
-        data = self.buildXHTML()
-
-        outfile = os.path.join(files.k8text, cover_page)
-        if os.path.exists(pathof(outfile)):
-            print('Warning: {:s} already exists.'.format(cover_page))
-            os.remove(pathof(outfile))
-        with open(pathof(outfile), 'wb') as f:
-            f.write(data.encode('utf-8'))
-        return
-
-    def guide_toxml(self):
-        files = self.files
-        text_dir = os.path.relpath(files.k8text, files.k8oebps)
-        data = '<reference type="cover" title="Cover" href="{:s}/{:s}" />\n'.format(
-                text_dir, self.cover_page)
-        return data
diff --git a/epy_extras/KindleUnpack/mobi_dict.py b/epy_extras/KindleUnpack/mobi_dict.py
deleted file mode 100644
index bfc2ea8..0000000
--- a/epy_extras/KindleUnpack/mobi_dict.py
+++ /dev/null
@@ -1,377 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import PY2, PY3, utf8_str, bstr, bchr
-
-if PY2:
-    range = xrange
-    array_format = b'B'
-if PY3:
-    unichr = chr
-    array_format = "B"
-
-import array
-
-import struct
-# note:  struct pack, unpack, unpack_from all require bytestring format
-# data all the way up to at least python 2.7.5, python 3 okay with bytestring
-
-from .mobi_index import getVariableWidthValue, readTagSection, getTagMap
-from .mobi_utils import toHex
-
-DEBUG_DICT = False
-
-class InflectionData(object):
-
-    def __init__(self, infldatas):
-        self.infldatas = infldatas
-        self.starts = []
-        self.counts = []
-        for idata in self.infldatas:
-            start, = struct.unpack_from(b'>L', idata, 0x14)
-            count, = struct.unpack_from(b'>L', idata, 0x18)
-            self.starts.append(start)
-            self.counts.append(count)
-
-    def lookup(self, lookupvalue):
-        i = 0
-        rvalue = lookupvalue
-        while rvalue >= self.counts[i]:
-            rvalue = rvalue - self.counts[i]
-            i += 1
-            if i == len(self.counts):
-                print("Error: Problem with multiple inflections data sections")
-                return lookupvalue, self.starts[0], self.counts[0], self.infldatas[0]
-        return rvalue, self.starts[i], self.counts[i], self.infldatas[i]
-
-    def offsets(self, value):
-        rvalue, start, count, data = self.lookup(value)
-        offset, = struct.unpack_from(b'>H', data, start + 4 + (2 * rvalue))
-        if rvalue + 1 < count:
-            nextOffset, = struct.unpack_from(b'>H',data, start + 4 + (2 * (rvalue + 1)))
-        else:
-            nextOffset = None
-        return offset, nextOffset, data
-
-
-class dictSupport(object):
-
-    def __init__(self, mh, sect):
-        self.mh = mh
-        self.header = mh.header
-        self.sect = sect
-        self.metaOrthIndex = mh.metaOrthIndex
-        self.metaInflIndex = mh.metaInflIndex
-
-    def parseHeader(self, data):
-        "read INDX header"
-        if not data[:4] == b'INDX':
-            print("Warning: index section is not INDX")
-            return False
-        words = (
-                'len', 'nul1', 'type', 'gen', 'start', 'count', 'code',
-                'lng', 'total', 'ordt', 'ligt', 'nligt', 'nctoc'
-        )
-        num = len(words)
-        values = struct.unpack(bstr('>%dL' % num), data[4:4*(num+1)])
-        header = {}
-        for n in range(num):
-            header[words[n]] = values[n]
-
-        ordt1 = None
-        ordt2 = None
-
-        otype, oentries, op1, op2, otagx  = struct.unpack_from(b'>LLLLL',data, 0xa4)
-        header['otype'] = otype
-        header['oentries'] = oentries
-
-        if DEBUG_DICT:
-            print("otype %d, oentries %d, op1 %d, op2 %d, otagx %d" % (otype, oentries, op1, op2, otagx))
-
-        if header['code'] == 0xfdea or oentries > 0:
-            # some dictionaries seem to be codepage 65002 (0xFDEA) which seems
-            # to be some sort of strange EBCDIC utf-8 or 16 encoded strings
-            # So we need to look for them and store them away to process leading text
-            # ORDT1 has 1 byte long entries, ORDT2 has 2 byte long entries
-            # we only ever seem to use the second but ...
-            #
-            # if otype = 0, ORDT table uses 16 bit values as offsets into the table
-            # if otype = 1, ORDT table uses 8 bit values as offsets inot the table
-
-            assert(data[op1:op1+4] == b'ORDT')
-            assert(data[op2:op2+4] == b'ORDT')
-            ordt1 = struct.unpack_from(bstr('>%dB' % oentries), data, op1+4)
-            ordt2 = struct.unpack_from(bstr('>%dH' % oentries), data, op2+4)
-
-        if DEBUG_DICT:
-            print("parsed INDX header:")
-            for key in header:
-                print(key, "%x" % header[key],)
-            print("\n")
-        return header, ordt1, ordt2
-
-    def getPositionMap(self):
-        sect = self.sect
-
-        positionMap = {}
-
-        metaOrthIndex = self.metaOrthIndex
-        metaInflIndex = self.metaInflIndex
-
-        decodeInflection = True
-        if metaOrthIndex != 0xFFFFFFFF:
-            print("Info: Document contains orthographic index, handle as dictionary")
-            if metaInflIndex == 0xFFFFFFFF:
-                decodeInflection = False
-            else:
-                metaInflIndexData = sect.loadSection(metaInflIndex)
-
-                print("\nParsing metaInflIndexData")
-                midxhdr, mhordt1, mhordt2 = self.parseHeader(metaInflIndexData)
-
-                metaIndexCount = midxhdr['count']
-                idatas = []
-                for j in range(metaIndexCount):
-                    idatas.append(sect.loadSection(metaInflIndex + 1 + j))
-                dinfl = InflectionData(idatas)
-
-                inflNameData = sect.loadSection(metaInflIndex + 1 + metaIndexCount)
-                tagSectionStart = midxhdr['len']
-                inflectionControlByteCount, inflectionTagTable = readTagSection(tagSectionStart, metaInflIndexData)
-                if DEBUG_DICT:
-                    print("inflectionTagTable: %s" % inflectionTagTable)
-                if self.hasTag(inflectionTagTable, 0x07):
-                    print("Error: Dictionary uses obsolete inflection rule scheme which is not yet supported")
-                    decodeInflection = False
-
-            data = sect.loadSection(metaOrthIndex)
-
-            print("\nParsing metaOrthIndex")
-            idxhdr, hordt1, hordt2 = self.parseHeader(data)
-
-            tagSectionStart = idxhdr['len']
-            controlByteCount, tagTable = readTagSection(tagSectionStart, data)
-            orthIndexCount = idxhdr['count']
-            print("orthIndexCount is", orthIndexCount)
-            if DEBUG_DICT:
-                print("orthTagTable: %s" % tagTable)
-            if hordt2 is not None:
-                print("orth entry uses ordt2 lookup table of type ", idxhdr['otype'])
-            hasEntryLength = self.hasTag(tagTable, 0x02)
-            if not hasEntryLength:
-                print("Info: Index doesn't contain entry length tags")
-
-            print("Read dictionary index data")
-            for i in range(metaOrthIndex + 1, metaOrthIndex + 1 + orthIndexCount):
-                data = sect.loadSection(i)
-                hdrinfo, ordt1, ordt2 = self.parseHeader(data)
-                idxtPos = hdrinfo['start']
-                entryCount = hdrinfo['count']
-                idxPositions = []
-                for j in range(entryCount):
-                    pos, = struct.unpack_from(b'>H', data, idxtPos + 4 + (2 * j))
-                    idxPositions.append(pos)
-                # The last entry ends before the IDXT tag (but there might be zero fill bytes we need to ignore!)
-                idxPositions.append(idxtPos)
-                for j in range(entryCount):
-                    startPos = idxPositions[j]
-                    endPos = idxPositions[j+1]
-                    textLength = ord(data[startPos:startPos+1])
-                    text = data[startPos+1:startPos+1+textLength]
-                    if hordt2 is not None:
-                        utext = u""
-                        if idxhdr['otype'] == 0:
-                            pattern = b'>H'
-                            inc = 2
-                        else:
-                            pattern = b'>B'
-                            inc = 1
-                        pos = 0
-                        while pos < textLength:
-                            off, = struct.unpack_from(pattern, text, pos)
-                            if off < len(hordt2):
-                                utext += unichr(hordt2[off])
-                            else:
-                                utext += unichr(off)
-                            pos += inc
-                        text = utext.encode('utf-8')
-
-                    tagMap = getTagMap(controlByteCount, tagTable, data, startPos+1+textLength, endPos)
-                    if 0x01 in tagMap:
-                        if decodeInflection and 0x2a in tagMap:
-                            inflectionGroups = self.getInflectionGroups(text, inflectionControlByteCount, inflectionTagTable,
-                                                                        dinfl, inflNameData, tagMap[0x2a])
-                        else:
-                            inflectionGroups = b''
-                        assert len(tagMap[0x01]) == 1
-                        entryStartPosition = tagMap[0x01][0]
-                        if hasEntryLength:
-                            # The idx:entry attribute "scriptable" must be present to create entry length tags.
-                            ml = b'<idx:entry scriptable="yes"><idx:orth value="' + text + b'">' + inflectionGroups + b'</idx:orth>'
-                            if entryStartPosition in positionMap:
-                                positionMap[entryStartPosition] = positionMap[entryStartPosition] + ml
-                            else:
-                                positionMap[entryStartPosition] = ml
-                            assert len(tagMap[0x02]) == 1
-                            entryEndPosition = entryStartPosition + tagMap[0x02][0]
-                            if entryEndPosition in positionMap:
-                                positionMap[entryEndPosition] = b"</idx:entry>" + positionMap[entryEndPosition]
-                            else:
-                                positionMap[entryEndPosition] = b"</idx:entry>"
-
-                        else:
-                            indexTags = b'<idx:entry>\n<idx:orth value="' + text + b'">\n' + inflectionGroups + b'</idx:entry>\n'
-                            if entryStartPosition in positionMap:
-                                positionMap[entryStartPosition] = positionMap[entryStartPosition] + indexTags
-                            else:
-                                positionMap[entryStartPosition] = indexTags
-        return positionMap
-
-    def hasTag(self, tagTable, tag):
-        '''
-        Test if tag table contains given tag.
-
-        @param tagTable: The tag table.
-        @param tag: The tag to search.
-        @return: True if tag table contains given tag; False otherwise.
-        '''
-        for currentTag, _, _, _ in tagTable:
-            if currentTag == tag:
-                return True
-        return False
-
-    def getInflectionGroups(self, mainEntry, controlByteCount, tagTable, dinfl, inflectionNames, groupList):
-        '''
-        Create string which contains the inflection groups with inflection rules as mobipocket tags.
-
-        @param mainEntry: The word to inflect.
-        @param controlByteCount: The number of control bytes.
-        @param tagTable: The tag table.
-        @param data: The Inflection data object to properly select the right inflection data section to use
-        @param inflectionNames: The inflection rule name data.
-        @param groupList: The list of inflection groups to process.
-        @return: String with inflection groups and rules or empty string if required tags are not available.
-        '''
-        result = b""
-        for value in groupList:
-            offset, nextOffset, data = dinfl.offsets(value)
-
-            # First byte seems to be always 0x00 and must be skipped.
-            assert ord(data[offset:offset+1]) == 0x00
-            tagMap = getTagMap(controlByteCount, tagTable, data, offset + 1, nextOffset)
-
-            # Make sure that the required tags are available.
-            if 0x05 not in tagMap:
-                print("Error: Required tag 0x05 not found in tagMap")
-                return ""
-            if 0x1a not in tagMap:
-                print("Error: Required tag 0x1a not found in tagMap")
-                return b''
-
-            result += b'<idx:infl>'
-
-            for i in range(len(tagMap[0x05])):
-
-                # Get name of inflection rule.
-                value = tagMap[0x05][i]
-                consumed, textLength = getVariableWidthValue(inflectionNames, value)
-                inflectionName = inflectionNames[value+consumed:value+consumed+textLength]
-
-                # Get and apply inflection rule across possibly multiple inflection data sections
-                value = tagMap[0x1a][i]
-                rvalue, start, count, data = dinfl.lookup(value)
-                offset, = struct.unpack_from(b'>H', data, start + 4 + (2 * rvalue))
-                textLength = ord(data[offset:offset+1])
-                inflection = self.applyInflectionRule(mainEntry, data, offset+1, offset+1+textLength)
-                if inflection is not None:
-                    result += b'  <idx:iform name="' + inflectionName + b'" value="' + inflection + b'"/>'
-
-            result += b'</idx:infl>'
-        return result
-
-    def applyInflectionRule(self, mainEntry, inflectionRuleData, start, end):
-        '''
-        Apply inflection rule.
-
-        @param mainEntry: The word to inflect.
-        @param inflectionRuleData: The inflection rules.
-        @param start: The start position of the inflection rule to use.
-        @param end: The end position of the inflection rule to use.
-        @return: The string with the inflected word or None if an error occurs.
-        '''
-        mode = -1
-        byteArray = array.array(array_format, mainEntry)
-        position = len(byteArray)
-        for charOffset in range(start, end):
-            char = inflectionRuleData[charOffset:charOffset+1]
-            abyte = ord(char)
-            if abyte >= 0x0a and abyte <= 0x13:
-                # Move cursor backwards
-                offset = abyte - 0x0a
-                if mode not in [0x02, 0x03]:
-                    mode = 0x02
-                    position = len(byteArray)
-                position -= offset
-            elif abyte > 0x13:
-                if mode == -1:
-                    print("Error: Unexpected first byte %i of inflection rule" % abyte)
-                    return None
-                elif position == -1:
-                    print("Error: Unexpected first byte %i of inflection rule" % abyte)
-                    return None
-                else:
-                    if mode == 0x01:
-                        # Insert at word start
-                        byteArray.insert(position, abyte)
-                        position += 1
-                    elif mode == 0x02:
-                        # Insert at word end
-                        byteArray.insert(position, abyte)
-                    elif mode == 0x03:
-                        # Delete at word end
-                        position -= 1
-                        deleted = byteArray.pop(position)
-                        if bchr(deleted) != char:
-                            if DEBUG_DICT:
-                                print("0x03: %s %s %s %s" % (mainEntry, toHex(inflectionRuleData[start:end]), char, bchr(deleted)))
-                            print("Error: Delete operation of inflection rule failed")
-                            return None
-                    elif mode == 0x04:
-                        # Delete at word start
-                        deleted = byteArray.pop(position)
-                        if bchr(deleted) != char:
-                            if DEBUG_DICT:
-                                print("0x03: %s %s %s %s" % (mainEntry, toHex(inflectionRuleData[start:end]), char, bchr(deleted)))
-                            print("Error: Delete operation of inflection rule failed")
-                            return None
-                    else:
-                        print("Error: Inflection rule mode %x is not implemented" % mode)
-                        return None
-            elif abyte == 0x01:
-                # Insert at word start
-                if mode not in [0x01, 0x04]:
-                    position = 0
-                mode = abyte
-            elif abyte == 0x02:
-                # Insert at word end
-                if mode not in [0x02, 0x03]:
-                    position = len(byteArray)
-                mode = abyte
-            elif abyte == 0x03:
-                # Delete at word end
-                if mode not in [0x02, 0x03]:
-                    position = len(byteArray)
-                mode = abyte
-            elif abyte == 0x04:
-                # Delete at word start
-                if mode not in [0x01, 0x04]:
-                    position = 0
-                # Delete at word start
-                mode = abyte
-            else:
-                print("Error: Inflection rule mode %x is not implemented" % abyte)
-                return None
-        return utf8_str(byteArray.tostring())
diff --git a/epy_extras/KindleUnpack/mobi_header.py b/epy_extras/KindleUnpack/mobi_header.py
deleted file mode 100644
index a15f636..0000000
--- a/epy_extras/KindleUnpack/mobi_header.py
+++ /dev/null
@@ -1,936 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-DEBUG_USE_ORDERED_DICTIONARY = False  # OrderedDict is supoorted >= python 2.7.
-""" set to True to use OrderedDict for MobiHeader.metadata."""
-
-if DEBUG_USE_ORDERED_DICTIONARY:
-    from collections import OrderedDict as dict_
-else:
-    dict_ = dict
-
-from .compatibility_utils import PY2, unicode_str, hexlify, bord
-
-if PY2:
-    range = xrange
-
-import struct
-import uuid
-
-# import the mobiunpack support libraries
-from .mobi_utils import getLanguage
-from .mobi_uncompress import HuffcdicReader, PalmdocReader, UncompressedReader
-
-class unpackException(Exception):
-    pass
-
-
-def sortedHeaderKeys(mheader):
-    hdrkeys = sorted(list(mheader.keys()), key=lambda akey: mheader[akey][0])
-    return hdrkeys
-
-
-# HD Containers have their own headers and their own EXTH
-# this is just guesswork so far, making big assumption that
-# metavalue key numbers remain the same in the CONT EXTH
-
-# Note:  The layout of the CONT Header is still unknown
-# so just deal with their EXTH sections for now
-
-def dump_contexth(cpage, extheader):
-    # determine text encoding
-    codec = 'windows-1252'
-    codec_map = {
-         1252 : 'windows-1252',
-         65001: 'utf-8',
-    }
-    if cpage in codec_map:
-        codec = codec_map[cpage]
-    if extheader == b'':
-        return
-    id_map_strings = {
-        1 : 'Drm Server Id',
-        2 : 'Drm Commerce Id',
-        3 : 'Drm Ebookbase Book Id',
-        4 : 'Drm Ebookbase Dep Id',
-        100 : 'Creator',
-        101 : 'Publisher',
-        102 : 'Imprint',
-        103 : 'Description',
-        104 : 'ISBN',
-        105 : 'Subject',
-        106 : 'Published',
-        107 : 'Review',
-        108 : 'Contributor',
-        109 : 'Rights',
-        110 : 'SubjectCode',
-        111 : 'Type',
-        112 : 'Source',
-        113 : 'ASIN',
-        # 114 : 'versionNumber',
-        117 : 'Adult',
-        118 : 'Retail-Price',
-        119 : 'Retail-Currency',
-        120 : 'TSC',
-        122 : 'fixed-layout',
-        123 : 'book-type',
-        124 : 'orientation-lock',
-        126 : 'original-resolution',
-        127 : 'zero-gutter',
-        128 : 'zero-margin',
-        129 : 'MetadataResourceURI',
-        132 : 'RegionMagnification',
-        150 : 'LendingEnabled',
-        200 : 'DictShortName',
-        501 : 'cdeType',
-        502 : 'last_update_time',
-        503 : 'Updated_Title',
-        504 : 'CDEContentKey',
-        505 : 'AmazonContentReference',
-        506 : 'Title-Language',
-        507 : 'Title-Display-Direction',
-        508 : 'Title-Pronunciation',
-        509 : 'Title-Collation',
-        510 : 'Secondary-Title',
-        511 : 'Secondary-Title-Language',
-        512 : 'Secondary-Title-Direction',
-        513 : 'Secondary-Title-Pronunciation',
-        514 : 'Secondary-Title-Collation',
-        515 : 'Author-Language',
-        516 : 'Author-Display-Direction',
-        517 : 'Author-Pronunciation',
-        518 : 'Author-Collation',
-        519 : 'Author-Type',
-        520 : 'Publisher-Language',
-        521 : 'Publisher-Display-Direction',
-        522 : 'Publisher-Pronunciation',
-        523 : 'Publisher-Collation',
-        524 : 'Content-Language-Tag',
-        525 : 'primary-writing-mode',
-        526 : 'NCX-Ingested-By-Software',
-        527 : 'page-progression-direction',
-        528 : 'override-kindle-fonts',
-        529 : 'Compression-Upgraded',
-        530 : 'Soft-Hyphens-In-Content',
-        531 : 'Dictionary_In_Langague',
-        532 : 'Dictionary_Out_Language',
-        533 : 'Font_Converted',
-        534 : 'Amazon_Creator_Info',
-        535 : 'Creator-Build-Tag',
-        536 : 'HD-Media-Containers-Info',  # CONT_Header is 0, Ends with CONTAINER_BOUNDARY (or Asset_Type?)
-        538 : 'Resource-Container-Fidelity',
-        539 : 'HD-Container-Mimetype',
-        540 : 'Sample-For_Special-Purpose',
-        541 : 'Kindletool-Operation-Information',
-        542 : 'Container_Id',
-        543 : 'Asset-Type',  # FONT_CONTAINER, BW_CONTAINER, HD_CONTAINER
-        544 : 'Unknown_544',
-    }
-    id_map_values = {
-        114 : 'versionNumber',
-        115 : 'sample',
-        116 : 'StartOffset',
-        121 : 'Mobi8-Boundary-Section',
-        125 : 'Embedded-Record-Count',
-        130 : 'Offline-Sample',
-        131 : 'Metadata-Record-Offset',
-        201 : 'CoverOffset',
-        202 : 'ThumbOffset',
-        203 : 'HasFakeCover',
-        204 : 'Creator-Software',
-        205 : 'Creator-Major-Version',
-        206 : 'Creator-Minor-Version',
-        207 : 'Creator-Build-Number',
-        401 : 'Clipping-Limit',
-        402 : 'Publisher-Limit',
-        404 : 'Text-to-Speech-Disabled',
-        406 : 'Rental-Expiration-Time',
-    }
-    id_map_hexstrings = {
-        208 : 'Watermark_(hex)',
-        209 : 'Tamper-Proof-Keys_(hex)',
-        300 : 'Font-Signature_(hex)',
-        403 : 'Unknown_(403)_(hex)',
-        405 : 'Ownership-Type_(hex)',
-        407 : 'Unknown_(407)_(hex)',
-        420 : 'Multimedia-Content-Reference_(hex)',
-        450 : 'Locations_Match_(hex)',
-        451 : 'Full-Story-Length_(hex)',
-        452 : 'Sample-Start_Location_(hex)',
-        453 : 'Sample-End-Location_(hex)',
-    }
-    _length, num_items = struct.unpack(b'>LL', extheader[4:12])
-    extheader = extheader[12:]
-    pos = 0
-    for _ in range(num_items):
-        id, size = struct.unpack(b'>LL', extheader[pos:pos+8])
-        content = extheader[pos + 8: pos + size]
-        if id in id_map_strings:
-            name = id_map_strings[id]
-            print('\n    Key: "%s"\n        Value: "%s"' % (name, content.decode(codec, errors='replace')))
-        elif id in id_map_values:
-            name = id_map_values[id]
-            if size == 9:
-                value, = struct.unpack(b'B',content)
-                print('\n    Key: "%s"\n        Value: 0x%01x' % (name, value))
-            elif size == 10:
-                value, = struct.unpack(b'>H',content)
-                print('\n    Key: "%s"\n        Value: 0x%02x' % (name, value))
-            elif size == 12:
-                value, = struct.unpack(b'>L',content)
-                print('\n    Key: "%s"\n        Value: 0x%04x' % (name, value))
-            else:
-                print("\nError: Value for %s has unexpected size of %s" % (name, size))
-        elif id in id_map_hexstrings:
-            name = id_map_hexstrings[id]
-            print('\n    Key: "%s"\n        Value: 0x%s' % (name, hexlify(content)))
-        else:
-            print("\nWarning: Unknown metadata with id %s found" % id)
-            name = str(id) + ' (hex)'
-            print('    Key: "%s"\n        Value: 0x%s' % (name, hexlify(content)))
-        pos += size
-    return
-
-
-class MobiHeader:
-    # all values are packed in big endian format
-    palmdoc_header = {
-            'compression_type'  : (0x00, b'>H', 2),
-            'fill0'             : (0x02, b'>H', 2),
-            'text_length'       : (0x04, b'>L', 4),
-            'text_records'      : (0x08, b'>H', 2),
-            'max_section_size'  : (0x0a, b'>H', 2),
-            'read_pos   '       : (0x0c, b'>L', 4),
-    }
-
-    mobi6_header = {
-            'compression_type'  : (0x00, b'>H', 2),
-            'fill0'             : (0x02, b'>H', 2),
-            'text_length'       : (0x04, b'>L', 4),
-            'text_records'      : (0x08, b'>H', 2),
-            'max_section_size'  : (0x0a, b'>H', 2),
-            'crypto_type'       : (0x0c, b'>H', 2),
-            'fill1'             : (0x0e, b'>H', 2),
-            'magic'             : (0x10, b'4s', 4),
-            'header_length (from MOBI)'     : (0x14, b'>L', 4),
-            'type'              : (0x18, b'>L', 4),
-            'codepage'          : (0x1c, b'>L', 4),
-            'unique_id'         : (0x20, b'>L', 4),
-            'version'           : (0x24, b'>L', 4),
-            'metaorthindex'     : (0x28, b'>L', 4),
-            'metainflindex'     : (0x2c, b'>L', 4),
-            'index_names'       : (0x30, b'>L', 4),
-            'index_keys'        : (0x34, b'>L', 4),
-            'extra_index0'      : (0x38, b'>L', 4),
-            'extra_index1'      : (0x3c, b'>L', 4),
-            'extra_index2'      : (0x40, b'>L', 4),
-            'extra_index3'      : (0x44, b'>L', 4),
-            'extra_index4'      : (0x48, b'>L', 4),
-            'extra_index5'      : (0x4c, b'>L', 4),
-            'first_nontext'     : (0x50, b'>L', 4),
-            'title_offset'      : (0x54, b'>L', 4),
-            'title_length'      : (0x58, b'>L', 4),
-            'language_code'     : (0x5c, b'>L', 4),
-            'dict_in_lang'      : (0x60, b'>L', 4),
-            'dict_out_lang'     : (0x64, b'>L', 4),
-            'min_version'       : (0x68, b'>L', 4),
-            'first_resc_offset' : (0x6c, b'>L', 4),
-            'huff_offset'       : (0x70, b'>L', 4),
-            'huff_num'          : (0x74, b'>L', 4),
-            'huff_tbl_offset'   : (0x78, b'>L', 4),
-            'huff_tbl_len'      : (0x7c, b'>L', 4),
-            'exth_flags'        : (0x80, b'>L', 4),
-            'fill3_a'           : (0x84, b'>L', 4),
-            'fill3_b'           : (0x88, b'>L', 4),
-            'fill3_c'           : (0x8c, b'>L', 4),
-            'fill3_d'           : (0x90, b'>L', 4),
-            'fill3_e'           : (0x94, b'>L', 4),
-            'fill3_f'           : (0x98, b'>L', 4),
-            'fill3_g'           : (0x9c, b'>L', 4),
-            'fill3_h'           : (0xa0, b'>L', 4),
-            'unknown0'          : (0xa4, b'>L', 4),
-            'drm_offset'        : (0xa8, b'>L', 4),
-            'drm_count'         : (0xac, b'>L', 4),
-            'drm_size'          : (0xb0, b'>L', 4),
-            'drm_flags'         : (0xb4, b'>L', 4),
-            'fill4_a'           : (0xb8, b'>L', 4),
-            'fill4_b'           : (0xbc, b'>L', 4),
-            'first_content'     : (0xc0, b'>H', 2),
-            'last_content'      : (0xc2, b'>H', 2),
-            'unknown0'          : (0xc4, b'>L', 4),
-            'fcis_offset'       : (0xc8, b'>L', 4),
-            'fcis_count'        : (0xcc, b'>L', 4),
-            'flis_offset'       : (0xd0, b'>L', 4),
-            'flis_count'        : (0xd4, b'>L', 4),
-            'unknown1'          : (0xd8, b'>L', 4),
-            'unknown2'          : (0xdc, b'>L', 4),
-            'srcs_offset'       : (0xe0, b'>L', 4),
-            'srcs_count'        : (0xe4, b'>L', 4),
-            'unknown3'          : (0xe8, b'>L', 4),
-            'unknown4'          : (0xec, b'>L', 4),
-            'fill5'             : (0xf0, b'>H', 2),
-            'traildata_flags'   : (0xf2, b'>H', 2),
-            'ncx_index'         : (0xf4, b'>L', 4),
-            'unknown5'          : (0xf8, b'>L', 4),
-            'unknown6'          : (0xfc, b'>L', 4),
-            'datp_offset'       : (0x100, b'>L', 4),
-            'unknown7'          : (0x104, b'>L', 4),
-            'Unknown    '       : (0x108, b'>L', 4),
-            'Unknown    '       : (0x10C, b'>L', 4),
-            'Unknown    '       : (0x110, b'>L', 4),
-            'Unknown    '       : (0x114, b'>L', 4),
-            'Unknown    '       : (0x118, b'>L', 4),
-            'Unknown    '       : (0x11C, b'>L', 4),
-            'Unknown    '       : (0x120, b'>L', 4),
-            'Unknown    '       : (0x124, b'>L', 4),
-            'Unknown    '       : (0x128, b'>L', 4),
-            'Unknown    '       : (0x12C, b'>L', 4),
-            'Unknown    '       : (0x130, b'>L', 4),
-            'Unknown    '       : (0x134, b'>L', 4),
-            'Unknown    '       : (0x138, b'>L', 4),
-            'Unknown    '       : (0x11C, b'>L', 4),
-            }
-
-    mobi8_header = {
-            'compression_type'  : (0x00, b'>H', 2),
-            'fill0'             : (0x02, b'>H', 2),
-            'text_length'       : (0x04, b'>L', 4),
-            'text_records'      : (0x08, b'>H', 2),
-            'max_section_size'  : (0x0a, b'>H', 2),
-            'crypto_type'       : (0x0c, b'>H', 2),
-            'fill1'             : (0x0e, b'>H', 2),
-            'magic'             : (0x10, b'4s', 4),
-            'header_length (from MOBI)'     : (0x14, b'>L', 4),
-            'type'              : (0x18, b'>L', 4),
-            'codepage'          : (0x1c, b'>L', 4),
-            'unique_id'         : (0x20, b'>L', 4),
-            'version'           : (0x24, b'>L', 4),
-            'metaorthindex'     : (0x28, b'>L', 4),
-            'metainflindex'     : (0x2c, b'>L', 4),
-            'index_names'       : (0x30, b'>L', 4),
-            'index_keys'        : (0x34, b'>L', 4),
-            'extra_index0'      : (0x38, b'>L', 4),
-            'extra_index1'      : (0x3c, b'>L', 4),
-            'extra_index2'      : (0x40, b'>L', 4),
-            'extra_index3'      : (0x44, b'>L', 4),
-            'extra_index4'      : (0x48, b'>L', 4),
-            'extra_index5'      : (0x4c, b'>L', 4),
-            'first_nontext'     : (0x50, b'>L', 4),
-            'title_offset'      : (0x54, b'>L', 4),
-            'title_length'      : (0x58, b'>L', 4),
-            'language_code'     : (0x5c, b'>L', 4),
-            'dict_in_lang'      : (0x60, b'>L', 4),
-            'dict_out_lang'     : (0x64, b'>L', 4),
-            'min_version'       : (0x68, b'>L', 4),
-            'first_resc_offset' : (0x6c, b'>L', 4),
-            'huff_offset'       : (0x70, b'>L', 4),
-            'huff_num'          : (0x74, b'>L', 4),
-            'huff_tbl_offset'   : (0x78, b'>L', 4),
-            'huff_tbl_len'      : (0x7c, b'>L', 4),
-            'exth_flags'        : (0x80, b'>L', 4),
-            'fill3_a'           : (0x84, b'>L', 4),
-            'fill3_b'           : (0x88, b'>L', 4),
-            'fill3_c'           : (0x8c, b'>L', 4),
-            'fill3_d'           : (0x90, b'>L', 4),
-            'fill3_e'           : (0x94, b'>L', 4),
-            'fill3_f'           : (0x98, b'>L', 4),
-            'fill3_g'           : (0x9c, b'>L', 4),
-            'fill3_h'           : (0xa0, b'>L', 4),
-            'unknown0'          : (0xa4, b'>L', 4),
-            'drm_offset'        : (0xa8, b'>L', 4),
-            'drm_count'         : (0xac, b'>L', 4),
-            'drm_size'          : (0xb0, b'>L', 4),
-            'drm_flags'         : (0xb4, b'>L', 4),
-            'fill4_a'           : (0xb8, b'>L', 4),
-            'fill4_b'           : (0xbc, b'>L', 4),
-            'fdst_offset'       : (0xc0, b'>L', 4),
-            'fdst_flow_count'   : (0xc4, b'>L', 4),
-            'fcis_offset'       : (0xc8, b'>L', 4),
-            'fcis_count'        : (0xcc, b'>L', 4),
-            'flis_offset'       : (0xd0, b'>L', 4),
-            'flis_count'        : (0xd4, b'>L', 4),
-            'unknown1'          : (0xd8, b'>L', 4),
-            'unknown2'          : (0xdc, b'>L', 4),
-            'srcs_offset'       : (0xe0, b'>L', 4),
-            'srcs_count'        : (0xe4, b'>L', 4),
-            'unknown3'          : (0xe8, b'>L', 4),
-            'unknown4'          : (0xec, b'>L', 4),
-            'fill5'             : (0xf0, b'>H', 2),
-            'traildata_flags'   : (0xf2, b'>H', 2),
-            'ncx_index'         : (0xf4, b'>L', 4),
-            'fragment_index'    : (0xf8, b'>L', 4),
-            'skeleton_index'    : (0xfc, b'>L', 4),
-            'datp_offset'       : (0x100, b'>L', 4),
-            'guide_index'       : (0x104, b'>L', 4),
-            'Unknown    '       : (0x108, b'>L', 4),
-            'Unknown    '       : (0x10C, b'>L', 4),
-            'Unknown    '       : (0x110, b'>L', 4),
-            'Unknown    '       : (0x114, b'>L', 4),
-            'Unknown    '       : (0x118, b'>L', 4),
-            'Unknown    '       : (0x11C, b'>L', 4),
-            'Unknown    '       : (0x120, b'>L', 4),
-            'Unknown    '       : (0x124, b'>L', 4),
-            'Unknown    '       : (0x128, b'>L', 4),
-            'Unknown    '       : (0x12C, b'>L', 4),
-            'Unknown    '       : (0x130, b'>L', 4),
-            'Unknown    '       : (0x134, b'>L', 4),
-            'Unknown    '       : (0x138, b'>L', 4),
-            'Unknown    '       : (0x11C, b'>L', 4),
-            }
-
-    palmdoc_header_sorted_keys = sortedHeaderKeys(palmdoc_header)
-    mobi6_header_sorted_keys = sortedHeaderKeys(mobi6_header)
-    mobi8_header_sorted_keys = sortedHeaderKeys(mobi8_header)
-
-    id_map_strings = {
-        1 : 'Drm Server Id',
-        2 : 'Drm Commerce Id',
-        3 : 'Drm Ebookbase Book Id',
-        4 : 'Drm Ebookbase Dep Id',
-        100 : 'Creator',
-        101 : 'Publisher',
-        102 : 'Imprint',
-        103 : 'Description',
-        104 : 'ISBN',
-        105 : 'Subject',
-        106 : 'Published',
-        107 : 'Review',
-        108 : 'Contributor',
-        109 : 'Rights',
-        110 : 'SubjectCode',
-        111 : 'Type',
-        112 : 'Source',
-        113 : 'ASIN',
-        # 114 : 'versionNumber',
-        117 : 'Adult',
-        118 : 'Retail-Price',
-        119 : 'Retail-Currency',
-        120 : 'TSC',
-        122 : 'fixed-layout',
-        123 : 'book-type',
-        124 : 'orientation-lock',
-        126 : 'original-resolution',
-        127 : 'zero-gutter',
-        128 : 'zero-margin',
-        129 : 'MetadataResourceURI',
-        132 : 'RegionMagnification',
-        150 : 'LendingEnabled',
-        200 : 'DictShortName',
-        501 : 'cdeType',
-        502 : 'last_update_time',
-        503 : 'Updated_Title',
-        504 : 'CDEContentKey',
-        505 : 'AmazonContentReference',
-        506 : 'Title-Language',
-        507 : 'Title-Display-Direction',
-        508 : 'Title-Pronunciation',
-        509 : 'Title-Collation',
-        510 : 'Secondary-Title',
-        511 : 'Secondary-Title-Language',
-        512 : 'Secondary-Title-Direction',
-        513 : 'Secondary-Title-Pronunciation',
-        514 : 'Secondary-Title-Collation',
-        515 : 'Author-Language',
-        516 : 'Author-Display-Direction',
-        517 : 'Author-Pronunciation',
-        518 : 'Author-Collation',
-        519 : 'Author-Type',
-        520 : 'Publisher-Language',
-        521 : 'Publisher-Display-Direction',
-        522 : 'Publisher-Pronunciation',
-        523 : 'Publisher-Collation',
-        524 : 'Content-Language-Tag',
-        525 : 'primary-writing-mode',
-        526 : 'NCX-Ingested-By-Software',
-        527 : 'page-progression-direction',
-        528 : 'override-kindle-fonts',
-        529 : 'Compression-Upgraded',
-        530 : 'Soft-Hyphens-In-Content',
-        531 : 'Dictionary_In_Langague',
-        532 : 'Dictionary_Out_Language',
-        533 : 'Font_Converted',
-        534 : 'Amazon_Creator_Info',
-        535 : 'Creator-Build-Tag',
-        536 : 'HD-Media-Containers-Info',  # CONT_Header is 0, Ends with CONTAINER_BOUNDARY (or Asset_Type?)
-        538 : 'Resource-Container-Fidelity',
-        539 : 'HD-Container-Mimetype',
-        540 : 'Sample-For_Special-Purpose',
-        541 : 'Kindletool-Operation-Information',
-        542 : 'Container_Id',
-        543 : 'Asset-Type',  # FONT_CONTAINER, BW_CONTAINER, HD_CONTAINER
-        544 : 'Unknown_544',
-    }
-    id_map_values = {
-        114 : 'versionNumber',
-        115 : 'sample',
-        116 : 'StartOffset',
-        121 : 'Mobi8-Boundary-Section',
-        125 : 'Embedded-Record-Count',
-        130 : 'Offline-Sample',
-        131 : 'Metadata-Record-Offset',
-        201 : 'CoverOffset',
-        202 : 'ThumbOffset',
-        203 : 'HasFakeCover',
-        204 : 'Creator-Software',
-        205 : 'Creator-Major-Version',
-        206 : 'Creator-Minor-Version',
-        207 : 'Creator-Build-Number',
-        401 : 'Clipping-Limit',
-        402 : 'Publisher-Limit',
-        404 : 'Text-to-Speech-Disabled',
-        406 : 'Rental-Expiration-Time',
-    }
-    id_map_hexstrings = {
-        208 : 'Watermark_(hex)',
-        209 : 'Tamper-Proof-Keys_(hex)',
-        300 : 'Font-Signature_(hex)',
-        403 : 'Unknown_(403)_(hex)',
-        405 : 'Ownership-Type_(hex)',
-        407 : 'Unknown_(407)_(hex)',
-        420 : 'Multimedia-Content-Reference_(hex)',
-        450 : 'Locations_Match_(hex)',
-        451 : 'Full-Story-Length_(hex)',
-        452 : 'Sample-Start_Location_(hex)',
-        453 : 'Sample-End-Location_(hex)',
-    }
-
-    def __init__(self, sect, sectNumber):
-        self.sect = sect
-        self.start = sectNumber
-        self.header = self.sect.loadSection(self.start)
-        if len(self.header)>20 and self.header[16:20] == b'MOBI':
-            self.sect.setsectiondescription(0,"Mobipocket Header")
-            self.palm = False
-        elif self.sect.ident == b'TEXtREAd':
-            self.sect.setsectiondescription(0, "PalmDOC Header")
-            self.palm = True
-        else:
-            raise unpackException('Unknown File Format')
-
-        self.records, = struct.unpack_from(b'>H', self.header, 0x8)
-
-        # set defaults in case this is a PalmDOC
-        self.title = self.sect.palmname.decode('latin-1', errors='replace')
-        self.length = len(self.header)-16
-        self.type = 3
-        self.codepage = 1252
-        self.codec = 'windows-1252'
-        self.unique_id = 0
-        self.version = 0
-        self.hasExth = False
-        self.exth = b''
-        self.exth_offset = self.length + 16
-        self.exth_length = 0
-        self.crypto_type = 0
-        self.firstnontext = self.start+self.records + 1
-        self.firstresource = self.start+self.records + 1
-        self.ncxidx = 0xffffffff
-        self.metaOrthIndex = 0xffffffff
-        self.metaInflIndex = 0xffffffff
-        self.skelidx = 0xffffffff
-        self.fragidx = 0xffffffff
-        self.guideidx = 0xffffffff
-        self.fdst = 0xffffffff
-        self.mlstart = self.sect.loadSection(self.start+1)[:4]
-        self.rawSize = 0
-        self.metadata = dict_()
-
-        # set up for decompression/unpacking
-        self.compression, = struct.unpack_from(b'>H', self.header, 0x0)
-        if self.compression == 0x4448:
-            reader = HuffcdicReader()
-            huffoff, huffnum = struct.unpack_from(b'>LL', self.header, 0x70)
-            huffoff = huffoff + self.start
-            self.sect.setsectiondescription(huffoff,"Huffman Compression Seed")
-            reader.loadHuff(self.sect.loadSection(huffoff))
-            for i in range(1, huffnum):
-                self.sect.setsectiondescription(huffoff+i,"Huffman CDIC Compression Seed %d" % i)
-                reader.loadCdic(self.sect.loadSection(huffoff+i))
-            self.unpack = reader.unpack
-        elif self.compression == 2:
-            self.unpack = PalmdocReader().unpack
-        elif self.compression == 1:
-            self.unpack = UncompressedReader().unpack
-        else:
-            raise unpackException('invalid compression type: 0x%4x' % self.compression)
-
-        if self.palm:
-            return
-
-        self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack(b'>LLLLL', self.header[20:40])
-        codec_map = {
-            1252 : 'windows-1252',
-            65001: 'utf-8',
-        }
-        if self.codepage in codec_map:
-            self.codec = codec_map[self.codepage]
-
-        # title
-        toff, tlen = struct.unpack(b'>II', self.header[0x54:0x5c])
-        tend = toff + tlen
-        self.title=self.header[toff:tend].decode(self.codec, errors='replace')
-
-        exth_flag, = struct.unpack(b'>L', self.header[0x80:0x84])
-        self.hasExth = exth_flag & 0x40
-        self.exth_offset = self.length + 16
-        self.exth_length = 0
-        if self.hasExth:
-            self.exth_length, = struct.unpack_from(b'>L', self.header, self.exth_offset+4)
-            self.exth_length = ((self.exth_length + 3)>>2)<<2  # round to next 4 byte boundary
-            self.exth = self.header[self.exth_offset:self.exth_offset+self.exth_length]
-
-        # parse the exth / metadata
-        self.parseMetaData()
-
-        # self.mlstart = self.sect.loadSection(self.start+1)
-        # self.mlstart = self.mlstart[0:4]
-        self.crypto_type, = struct.unpack_from(b'>H', self.header, 0xC)
-
-        # Start sector for additional files such as images, fonts, resources, etc
-        # Can be missing so fall back to default set previously
-        ofst, = struct.unpack_from(b'>L', self.header, 0x6C)
-        if ofst != 0xffffffff:
-            self.firstresource = ofst + self.start
-        ofst, = struct.unpack_from(b'>L', self.header, 0x50)
-        if ofst != 0xffffffff:
-            self.firstnontext = ofst + self.start
-
-        if self.isPrintReplica():
-            return
-
-        if self.version < 8:
-            # Dictionary metaOrthIndex
-            self.metaOrthIndex, = struct.unpack_from(b'>L', self.header, 0x28)
-            if self.metaOrthIndex != 0xffffffff:
-                self.metaOrthIndex += self.start
-
-            # Dictionary metaInflIndex
-            self.metaInflIndex, = struct.unpack_from(b'>L', self.header, 0x2C)
-            if self.metaInflIndex != 0xffffffff:
-                self.metaInflIndex += self.start
-
-        # handle older headers without any ncxindex info and later
-        # specifically 0xe4 headers
-        if self.length + 16 < 0xf8:
-            return
-
-        # NCX Index
-        self.ncxidx, = struct.unpack(b'>L', self.header[0xf4:0xf8])
-        if self.ncxidx != 0xffffffff:
-            self.ncxidx += self.start
-
-        # K8 specific Indexes
-        if self.start != 0 or self.version == 8:
-            # Index into <xml> file skeletons in RawML
-            self.skelidx, = struct.unpack_from(b'>L', self.header, 0xfc)
-            if self.skelidx != 0xffffffff:
-                self.skelidx += self.start
-
-            # Index into <div> sections in RawML
-            self.fragidx, = struct.unpack_from(b'>L', self.header, 0xf8)
-            if self.fragidx != 0xffffffff:
-                self.fragidx += self.start
-
-            # Index into Other files
-            self.guideidx, = struct.unpack_from(b'>L', self.header, 0x104)
-            if self.guideidx != 0xffffffff:
-                self.guideidx += self.start
-
-            # dictionaries do not seem to use the same approach in K8's
-            # so disable them
-            self.metaOrthIndex = 0xffffffff
-            self.metaInflIndex = 0xffffffff
-
-            # need to use the FDST record to find out how to properly unpack
-            # the rawML into pieces
-            # it is simply a table of start and end locations for each flow piece
-            self.fdst, = struct.unpack_from(b'>L', self.header, 0xc0)
-            self.fdstcnt, = struct.unpack_from(b'>L', self.header, 0xc4)
-            # if cnt is 1 or less, fdst section mumber can be garbage
-            if self.fdstcnt <= 1:
-                self.fdst = 0xffffffff
-            if self.fdst != 0xffffffff:
-                self.fdst += self.start
-                # setting of fdst section description properly handled in mobi_kf8proc
-
-    def dump_exth(self):
-        # determine text encoding
-        codec=self.codec
-        if (not self.hasExth) or (self.exth_length) == 0 or (self.exth == b''):
-            return
-        num_items, = struct.unpack(b'>L', self.exth[8:12])
-        pos = 12
-        print("Key Size Description                    Value")
-        for _ in range(num_items):
-            id, size = struct.unpack(b'>LL', self.exth[pos:pos+8])
-            contentsize = size-8
-            content = self.exth[pos + 8: pos + size]
-            if id in MobiHeader.id_map_strings:
-                exth_name = MobiHeader.id_map_strings[id]
-                print('{0: >3d} {1: >4d} {2: <30s} {3:s}'.format(id, contentsize, exth_name, content.decode(codec, errors='replace')))
-            elif id in MobiHeader.id_map_values:
-                exth_name = MobiHeader.id_map_values[id]
-                if size == 9:
-                    value, = struct.unpack(b'B',content)
-                    print('{0:3d} byte {1:<30s} {2:d}'.format(id, exth_name, value))
-                elif size == 10:
-                    value, = struct.unpack(b'>H',content)
-                    print('{0:3d} word {1:<30s} 0x{2:0>4X} ({2:d})'.format(id, exth_name, value))
-                elif size == 12:
-                    value, = struct.unpack(b'>L',content)
-                    print('{0:3d} long {1:<30s} 0x{2:0>8X} ({2:d})'.format(id, exth_name, value))
-                else:
-                    print('{0: >3d} {1: >4d} {2: <30s} (0x{3:s})'.format(id, contentsize, "Bad size for "+exth_name, hexlify(content)))
-            elif id in MobiHeader.id_map_hexstrings:
-                exth_name = MobiHeader.id_map_hexstrings[id]
-                print('{0:3d} {1:4d} {2:<30s} 0x{3:s}'.format(id, contentsize, exth_name, hexlify(content)))
-            else:
-                exth_name = "Unknown EXTH ID {0:d}".format(id)
-                print("{0: >3d} {1: >4d} {2: <30s} 0x{3:s}".format(id, contentsize, exth_name, hexlify(content)))
-            pos += size
-        return
-
-    def dumpheader(self):
-        # first 16 bytes are not part of the official mobiheader
-        # but we will treat it as such
-        # so section 0 is 16 (decimal) + self.length in total == at least 0x108 bytes for Mobi 8 headers
-        print("Dumping section %d, Mobipocket Header version: %d, total length %d" % (self.start,self.version, self.length+16))
-        self.hdr = {}
-        # set it up for the proper header version
-        if self.version == 0:
-            self.mobi_header = MobiHeader.palmdoc_header
-            self.mobi_header_sorted_keys = MobiHeader.palmdoc_header_sorted_keys
-        elif self.version < 8:
-            self.mobi_header = MobiHeader.mobi6_header
-            self.mobi_header_sorted_keys = MobiHeader.mobi6_header_sorted_keys
-        else:
-            self.mobi_header = MobiHeader.mobi8_header
-            self.mobi_header_sorted_keys = MobiHeader.mobi8_header_sorted_keys
-
-        # parse the header information
-        for key in self.mobi_header_sorted_keys:
-            (pos, format, tot_len) = self.mobi_header[key]
-            if pos < (self.length + 16):
-                val, = struct.unpack_from(format, self.header, pos)
-                self.hdr[key] = val
-
-        if 'title_offset' in self.hdr:
-            title_offset = self.hdr['title_offset']
-            title_length = self.hdr['title_length']
-        else:
-            title_offset = 0
-            title_length = 0
-        if title_offset == 0:
-            title_offset = len(self.header)
-            title_length = 0
-            self.title = self.sect.palmname.decode('latin-1', errors='replace')
-        else:
-            self.title = self.header[title_offset:title_offset+title_length].decode(self.codec, errors='replace')
-            # title record always padded with two nul bytes and then padded with nuls to next 4 byte boundary
-            title_length = ((title_length+2+3)>>2)<<2
-
-        self.extra1 = self.header[self.exth_offset+self.exth_length:title_offset]
-        self.extra2 = self.header[title_offset+title_length:]
-
-        print("Mobipocket header from section %d" % self.start)
-        print("     Offset  Value Hex Dec        Description")
-        for key in self.mobi_header_sorted_keys:
-            (pos, format, tot_len) = self.mobi_header[key]
-            if pos < (self.length + 16):
-                if key != 'magic':
-                    fmt_string = "0x{0:0>3X} ({0:3d}){1: >" + str(9-2*tot_len) +"s}0x{2:0>" + str(2*tot_len) + "X} {2:10d} {3:s}"
-                else:
-                    self.hdr[key] = unicode_str(self.hdr[key])
-                    fmt_string = "0x{0:0>3X} ({0:3d}){2:>11s}            {3:s}"
-                print(fmt_string.format(pos, " ",self.hdr[key], key))
-        print("")
-
-        if self.exth_length > 0:
-            print("EXTH metadata, offset %d, padded length %d" % (self.exth_offset,self.exth_length))
-            self.dump_exth()
-            print("")
-
-        if len(self.extra1) > 0:
-            print("Extra data between EXTH and Title, length %d" % len(self.extra1))
-            print(hexlify(self.extra1))
-            print("")
-
-        if title_length > 0:
-            print("Title in header at offset %d, padded length %d: '%s'" %(title_offset,title_length,self.title))
-            print("")
-
-        if len(self.extra2) > 0:
-            print("Extra data between Title and end of header, length %d" % len(self.extra2))
-            print(hexlify(self.extra2))
-            print("")
-
-    def isPrintReplica(self):
-        return self.mlstart[0:4] == b"%MOP"
-
-    def isK8(self):
-        return self.start != 0 or self.version == 8
-
-    def isEncrypted(self):
-        return self.crypto_type != 0
-
-    def hasNCX(self):
-        return self.ncxidx != 0xffffffff
-
-    def isDictionary(self):
-        return self.metaOrthIndex != 0xffffffff
-
-    def getncxIndex(self):
-        return self.ncxidx
-
-    def decompress(self, data):
-        return self.unpack(data)
-
-    def Language(self):
-        langcode = struct.unpack(b'!L', self.header[0x5c:0x60])[0]
-        langid = langcode & 0xFF
-        sublangid = (langcode >> 8) & 0xFF
-        return getLanguage(langid, sublangid)
-
-    def DictInLanguage(self):
-        if self.isDictionary():
-            langcode = struct.unpack(b'!L', self.header[0x60:0x64])[0]
-            langid = langcode & 0xFF
-            sublangid = (langcode >> 10) & 0xFF
-            if langid != 0:
-                return getLanguage(langid, sublangid)
-        return False
-
-    def DictOutLanguage(self):
-        if self.isDictionary():
-            langcode = struct.unpack(b'!L', self.header[0x64:0x68])[0]
-            langid = langcode & 0xFF
-            sublangid = (langcode >> 10) & 0xFF
-            if langid != 0:
-                return getLanguage(langid, sublangid)
-        return False
-
-    def getRawML(self):
-        def getSizeOfTrailingDataEntry(data):
-            num = 0
-            for v in data[-4:]:
-                if bord(v) & 0x80:
-                    num = 0
-                num = (num << 7) | (bord(v) & 0x7f)
-            return num
-        def trimTrailingDataEntries(data):
-            for _ in range(trailers):
-                num = getSizeOfTrailingDataEntry(data)
-                data = data[:-num]
-            if multibyte:
-                num = (ord(data[-1:]) & 3) + 1
-                data = data[:-num]
-            return data
-        multibyte = 0
-        trailers = 0
-        if self.sect.ident == b'BOOKMOBI':
-            mobi_length, = struct.unpack_from(b'>L', self.header, 0x14)
-            mobi_version, = struct.unpack_from(b'>L', self.header, 0x68)
-            if (mobi_length >= 0xE4) and (mobi_version >= 5):
-                flags, = struct.unpack_from(b'>H', self.header, 0xF2)
-                multibyte = flags & 1
-                while flags > 1:
-                    if flags & 2:
-                        trailers += 1
-                    flags = flags >> 1
-        # get raw mobi markup languge
-        print("Unpacking raw markup language")
-        dataList = []
-        # offset = 0
-        for i in range(1, self.records+1):
-            data = trimTrailingDataEntries(self.sect.loadSection(self.start + i))
-            dataList.append(self.unpack(data))
-            if self.isK8():
-                self.sect.setsectiondescription(self.start + i,"KF8 Text Section {0:d}".format(i))
-            elif self.version == 0:
-                self.sect.setsectiondescription(self.start + i,"PalmDOC Text Section {0:d}".format(i))
-            else:
-                self.sect.setsectiondescription(self.start + i,"Mobipocket Text Section {0:d}".format(i))
-        rawML = b''.join(dataList)
-        self.rawSize = len(rawML)
-        return rawML
-
-    # all metadata is stored in a dictionary with key and returns a *list* of values
-    # a list is used to allow for multiple creators, multiple contributors, etc
-    def parseMetaData(self):
-        def addValue(name, value):
-            if name not in self.metadata:
-                self.metadata[name] = [value]
-            else:
-                self.metadata[name].append(value)
-
-        codec=self.codec
-        if self.hasExth:
-            extheader=self.exth
-            _length, num_items = struct.unpack(b'>LL', extheader[4:12])
-            extheader = extheader[12:]
-            pos = 0
-            for _ in range(num_items):
-                id, size = struct.unpack(b'>LL', extheader[pos:pos+8])
-                content = extheader[pos + 8: pos + size]
-                if id in MobiHeader.id_map_strings:
-                    name = MobiHeader.id_map_strings[id]
-                    addValue(name, content.decode(codec, errors='replace'))
-                elif id in MobiHeader.id_map_values:
-                    name = MobiHeader.id_map_values[id]
-                    if size == 9:
-                        value, = struct.unpack(b'B',content)
-                        addValue(name, unicode_str(str(value)))
-                    elif size == 10:
-                        value, = struct.unpack(b'>H',content)
-                        addValue(name, unicode_str(str(value)))
-                    elif size == 12:
-                        value, = struct.unpack(b'>L',content)
-                        # handle special case of missing CoverOffset or missing ThumbOffset
-                        if id == 201 or id == 202:
-                            if value != 0xffffffff:
-                                addValue(name, unicode_str(str(value)))
-                        else:
-                            addValue(name, unicode_str(str(value)))
-                    else:
-                        print("Warning: Bad key, size, value combination detected in EXTH ", id, size, hexlify(content))
-                        addValue(name, hexlify(content))
-                elif id in MobiHeader.id_map_hexstrings:
-                    name = MobiHeader.id_map_hexstrings[id]
-                    addValue(name, hexlify(content))
-                else:
-                    name = unicode_str(str(id)) + ' (hex)'
-                    addValue(name, hexlify(content))
-                pos += size
-
-        # add the basics to the metadata each as a list element
-        self.metadata['Language'] = [self.Language()]
-        self.metadata['Title'] = [unicode_str(self.title,self.codec)]
-        self.metadata['Codec'] = [self.codec]
-        self.metadata['UniqueID'] = [unicode_str(str(self.unique_id))]
-        # if no asin create one using a uuid
-        if 'ASIN' not in self.metadata:
-            self.metadata['ASIN'] = [unicode_str(str(uuid.uuid4()))]
-        # if no cdeType set it to "EBOK"
-        if 'cdeType' not in self.metadata:
-            self.metadata['cdeType'] = ['EBOK']
-
-    def getMetaData(self):
-        return self.metadata
-
-    def describeHeader(self, DUMP):
-        print("Mobi Version:", self.version)
-        print("Codec:", self.codec)
-        print("Title:", self.title)
-        if 'Updated_Title' in self.metadata:
-            print("EXTH Title:", self.metadata['Updated_Title'][0])
-        if self.compression == 0x4448:
-            print("Huffdic compression")
-        elif self.compression == 2:
-            print("Palmdoc compression")
-        elif self.compression == 1:
-            print("No compression")
-        if DUMP:
-            self.dumpheader()
diff --git a/epy_extras/KindleUnpack/mobi_html.py b/epy_extras/KindleUnpack/mobi_html.py
deleted file mode 100644
index eda766c..0000000
--- a/epy_extras/KindleUnpack/mobi_html.py
+++ /dev/null
@@ -1,439 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import PY2, utf8_str
-
-if PY2:
-    range = xrange
-
-import re
-# note: re requites the pattern to be the exact same type as the data to be searched in python3
-# but u"" is not allowed for the pattern itself only b""
-
-from .mobi_utils import fromBase32
-
-class HTMLProcessor:
-
-    def __init__(self, files, metadata, rscnames):
-        self.files = files
-        self.metadata = metadata
-        self.rscnames = rscnames
-        # for original style mobis, default to including all image files in the opf manifest
-        self.used = {}
-        for name in rscnames:
-            self.used[name] = 'used'
-
-    def findAnchors(self, rawtext, indx_data, positionMap):
-        # process the raw text
-        # find anchors...
-        print("Find link anchors")
-        link_pattern = re.compile(br'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''', re.IGNORECASE)
-        # TEST NCX: merge in filepos from indx
-        pos_links = [int(m.group(1)) for m in link_pattern.finditer(rawtext)]
-        if indx_data:
-            pos_indx = [e['pos'] for e in indx_data if e['pos']>0]
-            pos_links = list(set(pos_links + pos_indx))
-
-        for position in pos_links:
-            if position in positionMap:
-                positionMap[position] = positionMap[position] + utf8_str('<a id="filepos%d" />' % position)
-            else:
-                positionMap[position] = utf8_str('<a id="filepos%d" />' % position)
-
-        # apply dictionary metadata and anchors
-        print("Insert data into html")
-        pos = 0
-        lastPos = len(rawtext)
-        dataList = []
-        for end in sorted(positionMap.keys()):
-            if end == 0 or end > lastPos:
-                continue  # something's up - can't put a tag in outside <html>...</html>
-            dataList.append(rawtext[pos:end])
-            dataList.append(positionMap[end])
-            pos = end
-        dataList.append(rawtext[pos:])
-        srctext = b"".join(dataList)
-        rawtext = None
-        dataList = None
-        self.srctext = srctext
-        self.indx_data = indx_data
-        return srctext
-
-    def insertHREFS(self):
-        srctext = self.srctext
-        rscnames = self.rscnames
-        metadata = self.metadata
-
-        # put in the hrefs
-        print("Insert hrefs into html")
-        # There doesn't seem to be a standard, so search as best as we can
-
-        link_pattern = re.compile(br'''<a([^>]*?)filepos=['"]{0,1}0*(\d+)['"]{0,1}([^>]*?)>''', re.IGNORECASE)
-        srctext = link_pattern.sub(br'''<a\1href="#filepos\2"\3>''', srctext)
-
-        # remove empty anchors
-        print("Remove empty anchors from html")
-        srctext = re.sub(br"<a\s*/>",br"", srctext)
-        srctext = re.sub(br"<a\s*>\s*</a>",br"", srctext)
-
-        # convert image references
-        print("Insert image references into html")
-        # split string into image tag pieces and other pieces
-        image_pattern = re.compile(br'''(<img.*?>)''', re.IGNORECASE)
-        image_index_pattern = re.compile(br'''recindex=['"]{0,1}([0-9]+)['"]{0,1}''', re.IGNORECASE)
-        srcpieces = image_pattern.split(srctext)
-        srctext = self.srctext = None
-
-        # all odd pieces are image tags (nulls string on even pieces if no space between them in srctext)
-        for i in range(1, len(srcpieces), 2):
-            tag = srcpieces[i]
-            for m in image_index_pattern.finditer(tag):
-                imageNumber = int(m.group(1))
-                imageName = rscnames[imageNumber-1]
-                if imageName is None:
-                    print("Error: Referenced image %s was not recognized as a valid image" % imageNumber)
-                else:
-                    replacement = b'src="Images/' + utf8_str(imageName) + b'"'
-                    tag = image_index_pattern.sub(replacement, tag, 1)
-            srcpieces[i] = tag
-        srctext = b"".join(srcpieces)
-
-        # add in character set meta into the html header if needed
-        if 'Codec' in metadata:
-            srctext = srctext[0:12]+b'<meta http-equiv="content-type" content="text/html; charset='+utf8_str(metadata.get('Codec')[0])+b'" />'+srctext[12:]
-        return srctext, self.used
-
-
-class XHTMLK8Processor:
-
-    def __init__(self, rscnames, k8proc):
-        self.rscnames = rscnames
-        self.k8proc = k8proc
-        self.used = {}
-
-    def buildXHTML(self):
-
-        # first need to update all links that are internal which
-        # are based on positions within the xhtml files **BEFORE**
-        # cutting and pasting any pieces into the xhtml text files
-
-        #   kindle:pos:fid:XXXX:off:YYYYYYYYYY  (used for internal link within xhtml)
-        #       XXXX is the offset in records into divtbl
-        #       YYYYYYYYYYYY is a base32 number you add to the divtbl insertpos to get final position
-
-        # pos:fid pattern
-        posfid_pattern = re.compile(br'''(<a.*?href=.*?>)''', re.IGNORECASE)
-        posfid_index_pattern = re.compile(br'''['"]kindle:pos:fid:([0-9|A-V]+):off:([0-9|A-V]+).*?["']''')
-
-        parts = []
-        print("Building proper xhtml for each file")
-        for i in range(self.k8proc.getNumberOfParts()):
-            part = self.k8proc.getPart(i)
-            [partnum, dir, filename, beg, end, aidtext] = self.k8proc.getPartInfo(i)
-
-            # internal links
-            srcpieces = posfid_pattern.split(part)
-            for j in range(1, len(srcpieces),2):
-                tag = srcpieces[j]
-                if tag.startswith(b'<'):
-                    for m in posfid_index_pattern.finditer(tag):
-                        posfid = m.group(1)
-                        offset = m.group(2)
-                        filename, idtag = self.k8proc.getIDTagByPosFid(posfid, offset)
-                        if idtag == b'':
-                            replacement= b'"' + utf8_str(filename) + b'"'
-                        else:
-                            replacement = b'"' + utf8_str(filename) + b'#' + idtag + b'"'
-                        tag = posfid_index_pattern.sub(replacement, tag, 1)
-                    srcpieces[j] = tag
-            part = b"".join(srcpieces)
-            parts.append(part)
-
-        # we are free to cut and paste as we see fit
-        # we can safely remove all of the Kindlegen generated aid tags
-        # change aid ids that are in k8proc.linked_aids to xhtml ids
-        find_tag_with_aid_pattern = re.compile(br'''(<[^>]*\said\s*=[^>]*>)''', re.IGNORECASE)
-        within_tag_aid_position_pattern = re.compile(br'''\said\s*=['"]([^'"]*)['"]''')
-        for i in range(len(parts)):
-            part = parts[i]
-            srcpieces = find_tag_with_aid_pattern.split(part)
-            for j in range(len(srcpieces)):
-                tag = srcpieces[j]
-                if tag.startswith(b'<'):
-                    for m in within_tag_aid_position_pattern.finditer(tag):
-                        try:
-                            aid = m.group(1)
-                        except IndexError:
-                            aid = None
-                        replacement = b''
-                        if aid in self.k8proc.linked_aids:
-                            replacement = b' id="aid-' + aid + b'"'
-                        tag = within_tag_aid_position_pattern.sub(replacement, tag, 1)
-                    srcpieces[j] = tag
-            part = b"".join(srcpieces)
-            parts[i] = part
-
-        # we can safely replace all of the Kindlegen generated data-AmznPageBreak tags
-        # with page-break-after style patterns
-        find_tag_with_AmznPageBreak_pattern = re.compile(br'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE)
-        within_tag_AmznPageBreak_position_pattern = re.compile(br'''\sdata-AmznPageBreak=['"]([^'"]*)['"]''')
-        for i in range(len(parts)):
-            part = parts[i]
-            srcpieces = find_tag_with_AmznPageBreak_pattern.split(part)
-            for j in range(len(srcpieces)):
-                tag = srcpieces[j]
-                if tag.startswith(b'<'):
-                    srcpieces[j] = within_tag_AmznPageBreak_position_pattern.sub(
-                        lambda m:b' style="page-break-after:' + m.group(1) + b'"', tag)
-            part = b"".join(srcpieces)
-            parts[i] = part
-
-        # we have to handle substitutions for the flows  pieces first as they may
-        # be inlined into the xhtml text
-        #   kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
-        #   kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
-        #   kindle:embed:XXXX   (used for fonts)
-
-        flows = []
-        flows.append(None)
-        flowinfo = []
-        flowinfo.append([None, None, None, None])
-
-        # regular expression search patterns
-        img_pattern = re.compile(br'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
-        img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)[^'"]*['")]''', re.IGNORECASE)
-
-        tag_pattern = re.compile(br'''(<[^>]*>)''')
-        flow_pattern = re.compile(br'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
-
-        url_pattern = re.compile(br'''(url\(.*?\))''', re.IGNORECASE)
-        url_img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)\?mime=image/[^\)]*["')]''', re.IGNORECASE)
-        font_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)["')]''', re.IGNORECASE)
-        url_css_index_pattern = re.compile(br'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''', re.IGNORECASE)
-        url_svg_image_pattern = re.compile(br'''kindle:flow:([0-9|A-V]+)\?mime=image/svg\+xml[^\)]*''', re.IGNORECASE)
-
-        for i in range(1, self.k8proc.getNumberOfFlows()):
-            [ftype, format, dir, filename] = self.k8proc.getFlowInfo(i)
-            flowpart = self.k8proc.getFlow(i)
-
-            # links to raster image files from image tags
-            # image_pattern
-            srcpieces = img_pattern.split(flowpart)
-            for j in range(1, len(srcpieces),2):
-                tag = srcpieces[j]
-                if tag.startswith(b'<im'):
-                    for m in img_index_pattern.finditer(tag):
-                        imageNumber = fromBase32(m.group(1))
-                        imageName = self.rscnames[imageNumber-1]
-                        if imageName is not None:
-                            replacement = b'"../Images/' + utf8_str(imageName) + b'"'
-                            self.used[imageName] = 'used'
-                            tag = img_index_pattern.sub(replacement, tag, 1)
-                        else:
-                            print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
-                    srcpieces[j] = tag
-            flowpart = b"".join(srcpieces)
-
-            # replacements inside css url():
-            srcpieces = url_pattern.split(flowpart)
-            for j in range(1, len(srcpieces),2):
-                tag = srcpieces[j]
-
-                #  process links to raster image files
-                for m in url_img_index_pattern.finditer(tag):
-                    imageNumber = fromBase32(m.group(1))
-                    imageName = self.rscnames[imageNumber-1]
-                    osep = m.group()[0:1]
-                    csep = m.group()[-1:]
-                    if imageName is not None:
-                        replacement = osep +  b'../Images/' + utf8_str(imageName) +  csep
-                        self.used[imageName] = 'used'
-                        tag = url_img_index_pattern.sub(replacement, tag, 1)
-                    else:
-                        print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
-
-                # process links to fonts
-                for m in font_index_pattern.finditer(tag):
-                    fontNumber = fromBase32(m.group(1))
-                    fontName = self.rscnames[fontNumber-1]
-                    osep = m.group()[0:1]
-                    csep = m.group()[-1:]
-                    if fontName is None:
-                        print("Error: Referenced font %s was not recognized as a valid font in %s" % (fontNumber, tag))
-                    else:
-                        replacement = osep +  b'../Fonts/' + utf8_str(fontName) +  csep
-                        tag = font_index_pattern.sub(replacement, tag, 1)
-                        self.used[fontName] = 'used'
-
-                # process links to other css pieces
-                for m in url_css_index_pattern.finditer(tag):
-                    num = fromBase32(m.group(1))
-                    [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
-                    replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
-                    tag = url_css_index_pattern.sub(replacement, tag, 1)
-                    self.used[fnm] = 'used'
-
-                # process links to svg images
-                for m in url_svg_image_pattern.finditer(tag):
-                    num = fromBase32(m.group(1))
-                    [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
-                    replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
-                    tag = url_svg_image_pattern.sub(replacement, tag, 1)
-                    self.used[fnm] = 'used'
-
-                srcpieces[j] = tag
-            flowpart = b"".join(srcpieces)
-
-            # store away in our own copy
-            flows.append(flowpart)
-
-            # I do not think this case exists and even if it does exist, it needs to be done in a separate
-            # pass to prevent inlining a flow piece into another flow piece before the inserted one or the
-            # target one has been fully processed
-
-            # but keep it around if it ends up we do need it
-
-            # flow pattern not inside url()
-            # srcpieces = tag_pattern.split(flowpart)
-            # for j in range(1, len(srcpieces),2):
-            #     tag = srcpieces[j]
-            #     if tag.startswith(b'<'):
-            #         for m in flow_pattern.finditer(tag):
-            #             num = fromBase32(m.group(1))
-            #             [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
-            #             flowtext = self.k8proc.getFlow(num)
-            #             if fmt == b'inline':
-            #                 tag = flowtext
-            #             else:
-            #                 replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
-            #                 tag = flow_pattern.sub(replacement, tag, 1)
-            #                 self.used[fnm] = 'used'
-            #         srcpieces[j] = tag
-            # flowpart = b"".join(srcpieces)
-
-        # now handle the main text xhtml parts
-
-        # Handle the flow items in the XHTML text pieces
-        # kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
-        tag_pattern = re.compile(br'''(<[^>]*>)''')
-        flow_pattern = re.compile(br'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
-        for i in range(len(parts)):
-            part = parts[i]
-            [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
-            # flow pattern
-            srcpieces = tag_pattern.split(part)
-            for j in range(1, len(srcpieces),2):
-                tag = srcpieces[j]
-                if tag.startswith(b'<'):
-                    for m in flow_pattern.finditer(tag):
-                        num = fromBase32(m.group(1))
-                        if num > 0 and num < len(self.k8proc.flowinfo):
-                            [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
-                            flowpart = flows[num]
-                            if fmt == b'inline':
-                                tag = flowpart
-                            else:
-                                replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
-                                tag = flow_pattern.sub(replacement, tag, 1)
-                                self.used[fnm] = 'used'
-                        else:
-                            print("warning: ignoring non-existent flow link", tag, " value 0x%x" % num)
-                    srcpieces[j] = tag
-            part = b''.join(srcpieces)
-
-            # store away modified version
-            parts[i] = part
-
-        # Handle any embedded raster images links in style= attributes urls
-        style_pattern = re.compile(br'''(<[a-zA-Z0-9]+\s[^>]*style\s*=\s*[^>]*>)''', re.IGNORECASE)
-        img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)[^'"]*['")]''', re.IGNORECASE)
-
-        for i in range(len(parts)):
-            part = parts[i]
-            [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
-
-            # replace urls in style attributes
-            srcpieces = style_pattern.split(part)
-            for j in range(1, len(srcpieces),2):
-                tag = srcpieces[j]
-                if b'kindle:embed' in tag:
-                    for m in img_index_pattern.finditer(tag):
-                        imageNumber = fromBase32(m.group(1))
-                        imageName = self.rscnames[imageNumber-1]
-                        osep = m.group()[0:1]
-                        csep = m.group()[-1:]
-                        if imageName is not None:
-                            replacement = osep + b'../Images/'+ utf8_str(imageName) + csep
-                            self.used[imageName] = 'used'
-                            tag = img_index_pattern.sub(replacement, tag, 1)
-                        else:
-                            print("Error: Referenced image %s in style url was not recognized in %s" % (imageNumber, tag))
-                    srcpieces[j] = tag
-            part = b"".join(srcpieces)
-
-            # store away modified version
-            parts[i] = part
-
-        # Handle any embedded raster images links in the xhtml text
-        # kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
-        img_pattern = re.compile(br'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
-        img_index_pattern = re.compile(br'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''')
-
-        for i in range(len(parts)):
-            part = parts[i]
-            [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
-
-            # links to raster image files
-            # image_pattern
-            srcpieces = img_pattern.split(part)
-            for j in range(1, len(srcpieces),2):
-                tag = srcpieces[j]
-                if tag.startswith(b'<im'):
-                    for m in img_index_pattern.finditer(tag):
-                        imageNumber = fromBase32(m.group(1))
-                        imageName = self.rscnames[imageNumber-1]
-                        if imageName is not None:
-                            replacement = b'"../Images/' + utf8_str(imageName) + b'"'
-                            self.used[imageName] = 'used'
-                            tag = img_index_pattern.sub(replacement, tag, 1)
-                        else:
-                            print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
-                    srcpieces[j] = tag
-            part = b"".join(srcpieces)
-            # store away modified version
-            parts[i] = part
-
-        # finally perform any general cleanups needed to make valid XHTML
-        # these include:
-        #   in svg tags replace "perserveaspectratio" attributes with "perserveAspectRatio"
-        #   in svg tags replace "viewbox" attributes with "viewBox"
-        #   in <li> remove value="XX" attributes since these are illegal
-        tag_pattern = re.compile(br'''(<[^>]*>)''')
-        li_value_pattern = re.compile(br'''\svalue\s*=\s*['"][^'"]*['"]''', re.IGNORECASE)
-
-        for i in range(len(parts)):
-            part = parts[i]
-            [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
-
-            # tag pattern
-            srcpieces = tag_pattern.split(part)
-            for j in range(1, len(srcpieces),2):
-                tag = srcpieces[j]
-                if tag.startswith(b'<svg') or tag.startswith(b'<SVG'):
-                    tag = tag.replace(b'preserveaspectratio',b'preserveAspectRatio')
-                    tag = tag.replace(b'viewbox',b'viewBox')
-                elif tag.startswith(b'<li ') or tag.startswith(b'<LI '):
-                    tagpieces = li_value_pattern.split(tag)
-                    tag = b"".join(tagpieces)
-                srcpieces[j] = tag
-            part = b"".join(srcpieces)
-            # store away modified version
-            parts[i] = part
-
-        self.k8proc.setFlows(flows)
-        self.k8proc.setParts(parts)
-
-        return self.used
diff --git a/epy_extras/KindleUnpack/mobi_index.py b/epy_extras/KindleUnpack/mobi_index.py
deleted file mode 100644
index 397aaf8..0000000
--- a/epy_extras/KindleUnpack/mobi_index.py
+++ /dev/null
@@ -1,276 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import PY2, bchr, bstr, bord
-if PY2:
-    range = xrange
-
-import struct
-# note:  struct pack, unpack, unpack_from all require bytestring format
-# data all the way up to at least python 2.7.5, python 3 okay with bytestring
-
-from .mobi_utils import toHex
-
-class MobiIndex:
-
-    def __init__(self, sect, DEBUG=False):
-        self.sect = sect
-        self.DEBUG = DEBUG
-
-    def getIndexData(self, idx, label="Unknown"):
-        sect = self.sect
-        outtbl = []
-        ctoc_text = {}
-        if idx != 0xffffffff:
-            sect.setsectiondescription(idx,"{0} Main INDX section".format(label))
-            data = sect.loadSection(idx)
-            idxhdr, hordt1, hordt2 = self.parseINDXHeader(data)
-            IndexCount = idxhdr['count']
-            # handle the case of multiple sections used for CTOC
-            rec_off = 0
-            off = idx + IndexCount + 1
-            for j in range(idxhdr['nctoc']):
-                cdata = sect.loadSection(off + j)
-                sect.setsectiondescription(off+j, label + ' CTOC Data ' + str(j))
-                ctocdict = self.readCTOC(cdata)
-                for k in ctocdict:
-                    ctoc_text[k + rec_off] = ctocdict[k]
-                rec_off += 0x10000
-            tagSectionStart = idxhdr['len']
-            controlByteCount, tagTable = readTagSection(tagSectionStart, data)
-            if self.DEBUG:
-                print("ControlByteCount is", controlByteCount)
-                print("IndexCount is", IndexCount)
-                print("TagTable: %s" % tagTable)
-            for i in range(idx + 1, idx + 1 + IndexCount):
-                sect.setsectiondescription(i,"{0} Extra {1:d} INDX section".format(label,i-idx))
-                data = sect.loadSection(i)
-                hdrinfo, ordt1, ordt2 = self.parseINDXHeader(data)
-                idxtPos = hdrinfo['start']
-                entryCount = hdrinfo['count']
-                if self.DEBUG:
-                    print(idxtPos, entryCount)
-                # loop through to build up the IDXT position starts
-                idxPositions = []
-                for j in range(entryCount):
-                    pos, = struct.unpack_from(b'>H', data, idxtPos + 4 + (2 * j))
-                    idxPositions.append(pos)
-                # The last entry ends before the IDXT tag (but there might be zero fill bytes we need to ignore!)
-                idxPositions.append(idxtPos)
-                # for each entry in the IDXT build up the tagMap and any associated text
-                for j in range(entryCount):
-                    startPos = idxPositions[j]
-                    endPos = idxPositions[j+1]
-                    textLength = ord(data[startPos:startPos+1])
-                    text = data[startPos+1:startPos+1+textLength]
-                    if hordt2 is not None:
-                        text = b''.join(bchr(hordt2[bord(x)]) for x in text)
-                    tagMap = getTagMap(controlByteCount, tagTable, data, startPos+1+textLength, endPos)
-                    outtbl.append([text, tagMap])
-                    if self.DEBUG:
-                        print(tagMap)
-                        print(text)
-        return outtbl, ctoc_text
-
-    def parseINDXHeader(self, data):
-        "read INDX header"
-        if not data[:4] == b'INDX':
-            print("Warning: index section is not INDX")
-            return False
-        words = (
-                'len', 'nul1', 'type', 'gen', 'start', 'count', 'code',
-                'lng', 'total', 'ordt', 'ligt', 'nligt', 'nctoc'
-        )
-        num = len(words)
-        values = struct.unpack(bstr('>%dL' % num), data[4:4*(num+1)])
-        header = {}
-        for n in range(num):
-            header[words[n]] = values[n]
-
-        ordt1 = None
-        ordt2 = None
-
-        ocnt, oentries, op1, op2, otagx  = struct.unpack_from(b'>LLLLL',data, 0xa4)
-        if header['code'] == 0xfdea or ocnt != 0 or oentries > 0:
-            # horribly hacked up ESP (sample) mobi books use two ORDT sections but never specify
-            # them in the proper place in the header.  They seem to be codepage 65002 which seems
-            # to be some sort of strange EBCDIC utf-8 or 16 encoded strings
-
-            # so we need to look for them and store them away to process leading text
-            # ORDT1 has 1 byte long entries, ORDT2 has 2 byte long entries
-            # we only ever seem to use the seocnd but ...
-            assert(ocnt == 1)
-            assert(data[op1:op1+4] == b'ORDT')
-            assert(data[op2:op2+4] == b'ORDT')
-            ordt1 = struct.unpack_from(bstr('>%dB' % oentries), data, op1+4)
-            ordt2 = struct.unpack_from(bstr('>%dH' % oentries), data, op2+4)
-
-        if self.DEBUG:
-            print("parsed INDX header:")
-            for n in words:
-                print(n, "%X" % header[n],)
-            print("")
-        return header, ordt1, ordt2
-
-    def readCTOC(self, txtdata):
-        # read all blocks from CTOC
-        ctoc_data = {}
-        offset = 0
-        while offset<len(txtdata):
-            if PY2:
-                if txtdata[offset] == b'\0':
-                    break
-            else:
-                if txtdata[offset] == 0:
-                    break
-            idx_offs = offset
-            # first n bytes: name len as vwi
-            pos, ilen = getVariableWidthValue(txtdata, offset)
-            offset += pos
-            # <len> next bytes: name
-            name = txtdata[offset:offset+ilen]
-            offset += ilen
-            if self.DEBUG:
-                print("name length is ", ilen)
-                print(idx_offs, name)
-            ctoc_data[idx_offs] = name
-        return ctoc_data
-
-
-def getVariableWidthValue(data, offset):
-    '''
-    Decode variable width value from given bytes.
-
-    @param data: The bytes to decode.
-    @param offset: The start offset into data.
-    @return: Tuple of consumed bytes count and decoded value.
-    '''
-    value = 0
-    consumed = 0
-    finished = False
-    while not finished:
-        v = data[offset + consumed: offset + consumed + 1]
-        consumed += 1
-        if ord(v) & 0x80:
-            finished = True
-        value = (value << 7) | (ord(v) & 0x7f)
-    return consumed, value
-
-
-def readTagSection(start, data):
-    '''
-    Read tag section from given data.
-
-    @param start: The start position in the data.
-    @param data: The data to process.
-    @return: Tuple of control byte count and list of tag tuples.
-    '''
-    controlByteCount = 0
-    tags = []
-    if data[start:start+4] == b"TAGX":
-        firstEntryOffset, = struct.unpack_from(b'>L', data, start + 0x04)
-        controlByteCount, = struct.unpack_from(b'>L', data, start + 0x08)
-
-        # Skip the first 12 bytes already read above.
-        for i in range(12, firstEntryOffset, 4):
-            pos = start + i
-            tags.append((ord(data[pos:pos+1]), ord(data[pos+1:pos+2]), ord(data[pos+2:pos+3]), ord(data[pos+3:pos+4])))
-    return controlByteCount, tags
-
-
-def countSetBits(value, bits=8):
-    '''
-    Count the set bits in the given value.
-
-    @param value: Integer value.
-    @param bits: The number of bits of the input value (defaults to 8).
-    @return: Number of set bits.
-    '''
-    count = 0
-    for _ in range(bits):
-        if value & 0x01 == 0x01:
-            count += 1
-        value = value >> 1
-    return count
-
-
-def getTagMap(controlByteCount, tagTable, entryData, startPos, endPos):
-    '''
-    Create a map of tags and values from the given byte section.
-
-    @param controlByteCount: The number of control bytes.
-    @param tagTable: The tag table.
-    @param entryData: The data to process.
-    @param startPos: The starting position in entryData.
-    @param endPos: The end position in entryData or None if it is unknown.
-    @return: Hashmap of tag and list of values.
-    '''
-    tags = []
-    tagHashMap = {}
-    controlByteIndex = 0
-    dataStart = startPos + controlByteCount
-
-    for tag, valuesPerEntry, mask, endFlag in tagTable:
-        if endFlag == 0x01:
-            controlByteIndex += 1
-            continue
-        cbyte = ord(entryData[startPos + controlByteIndex:startPos + controlByteIndex+1])
-        if 0:
-            print("Control Byte Index %0x , Control Byte Value %0x" % (controlByteIndex, cbyte))
-
-        value = ord(entryData[startPos + controlByteIndex:startPos + controlByteIndex+1]) & mask
-        if value != 0:
-            if value == mask:
-                if countSetBits(mask) > 1:
-                    # If all bits of masked value are set and the mask has more than one bit, a variable width value
-                    # will follow after the control bytes which defines the length of bytes (NOT the value count!)
-                    # which will contain the corresponding variable width values.
-                    consumed, value = getVariableWidthValue(entryData, dataStart)
-                    dataStart += consumed
-                    tags.append((tag, None, value, valuesPerEntry))
-                else:
-                    tags.append((tag, 1, None, valuesPerEntry))
-            else:
-                # Shift bits to get the masked value.
-                while mask & 0x01 == 0:
-                    mask = mask >> 1
-                    value = value >> 1
-                tags.append((tag, value, None, valuesPerEntry))
-    for tag, valueCount, valueBytes, valuesPerEntry in tags:
-        values = []
-        if valueCount is not None:
-            # Read valueCount * valuesPerEntry variable width values.
-            for _ in range(valueCount):
-                for _ in range(valuesPerEntry):
-                    consumed, data = getVariableWidthValue(entryData, dataStart)
-                    dataStart += consumed
-                    values.append(data)
-        else:
-            # Convert valueBytes to variable width values.
-            totalConsumed = 0
-            while totalConsumed < valueBytes:
-                # Does this work for valuesPerEntry != 1?
-                consumed, data = getVariableWidthValue(entryData, dataStart)
-                dataStart += consumed
-                totalConsumed += consumed
-                values.append(data)
-            if totalConsumed != valueBytes:
-                print("Error: Should consume %s bytes, but consumed %s" % (valueBytes, totalConsumed))
-        tagHashMap[tag] = values
-    # Test that all bytes have been processed if endPos is given.
-    if endPos is not None and dataStart != endPos:
-        # The last entry might have some zero padding bytes, so complain only if non zero bytes are left.
-        for char in entryData[dataStart:endPos]:
-            if bord(char) != 0:
-                print("Warning: There are unprocessed index bytes left: %s" % toHex(entryData[dataStart:endPos]))
-                if 0:
-                    print("controlByteCount: %s" % controlByteCount)
-                    print("tagTable: %s" % tagTable)
-                    print("data: %s" % toHex(entryData[startPos:endPos]))
-                    print("tagHashMap: %s" % tagHashMap)
-                break
-
-    return tagHashMap
diff --git a/epy_extras/KindleUnpack/mobi_k8proc.py b/epy_extras/KindleUnpack/mobi_k8proc.py
deleted file mode 100644
index 5b8274e..0000000
--- a/epy_extras/KindleUnpack/mobi_k8proc.py
+++ /dev/null
@@ -1,496 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import PY2, bstr, utf8_str
-
-if PY2:
-    range = xrange
-
-import os
-
-import struct
-# note:  struct pack, unpack, unpack_from all require bytestring format
-# data all the way up to at least python 2.7.5, python 3 okay with bytestring
-
-import re
-# note: re requites the pattern to be the exact same type as the data to be searched in python3
-# but u"" is not allowed for the pattern itself only b""
-
-from .mobi_index import MobiIndex
-from .mobi_utils import fromBase32
-from .unipath import pathof
-
-_guide_types = [b'cover',b'title-page',b'toc',b'index',b'glossary',b'acknowledgements',
-                b'bibliography',b'colophon',b'copyright-page',b'dedication',
-                b'epigraph',b'foreward',b'loi',b'lot',b'notes',b'preface',b'text']
-
-# locate beginning and ending positions of tag with specific aid attribute
-def locate_beg_end_of_tag(ml, aid):
-    pattern = utf8_str(r'''<[^>]*\said\s*=\s*['"]%s['"][^>]*>''' % aid)
-    aid_pattern = re.compile(pattern,re.IGNORECASE)
-    for m in re.finditer(aid_pattern, ml):
-        plt = m.start()
-        pgt = ml.find(b'>',plt+1)
-        return plt, pgt
-    return 0, 0
-
-
-# iterate over all tags in block in reverse order, i.e. last ta to first tag
-def reverse_tag_iter(block):
-    end = len(block)
-    while True:
-        pgt = block.rfind(b'>', 0, end)
-        if pgt == -1:
-            break
-        plt = block.rfind(b'<', 0, pgt)
-        if plt == -1:
-            break
-        yield block[plt:pgt+1]
-        end = plt
-
-
-class K8Processor:
-
-    def __init__(self, mh, sect, files, debug=False):
-        self.sect = sect
-        self.files = files
-        self.mi = MobiIndex(sect)
-        self.mh = mh
-        self.skelidx = mh.skelidx
-        self.fragidx = mh.fragidx
-        self.guideidx = mh.guideidx
-        self.fdst = mh.fdst
-        self.flowmap = {}
-        self.flows = None
-        self.flowinfo = []
-        self.parts = None
-        self.partinfo = []
-        self.linked_aids = set()
-        self.fdsttbl= [0,0xffffffff]
-        self.DEBUG = debug
-
-        # read in and parse the FDST info which is very similar in format to the Palm DB section
-        # parsing except it provides offsets into rawML file and not the Palm DB file
-        # this is needed to split up the final css, svg, etc flow section
-        # that can exist at the end of the rawML file
-        if self.fdst != 0xffffffff:
-            header = self.sect.loadSection(self.fdst)
-            if header[0:4] == b"FDST":
-                num_sections, = struct.unpack_from(b'>L', header, 0x08)
-                self.fdsttbl = struct.unpack_from(bstr('>%dL' % (num_sections*2)), header, 12)[::2] + (mh.rawSize, )
-                sect.setsectiondescription(self.fdst,"KF8 FDST INDX")
-                if self.DEBUG:
-                    print("\nFDST Section Map:  %d sections" % num_sections)
-                    for j in range(num_sections):
-                        print("Section %d: 0x%08X - 0x%08X" % (j, self.fdsttbl[j],self.fdsttbl[j+1]))
-            else:
-                print("\nError: K8 Mobi with Missing FDST info")
-
-        # read/process skeleton index info to create the skeleton table
-        skeltbl = []
-        if self.skelidx != 0xffffffff:
-            # for i in range(2):
-            #     fname = 'skel%04d.dat' % i
-            #     data = self.sect.loadSection(self.skelidx + i)
-            #     with open(pathof(fname), 'wb') as f:
-            #         f.write(data)
-            outtbl, ctoc_text = self.mi.getIndexData(self.skelidx, "KF8 Skeleton")
-            fileptr = 0
-            for [text, tagMap] in outtbl:
-                # file number, skeleton name, fragtbl record count, start position, length
-                skeltbl.append([fileptr, text, tagMap[1][0], tagMap[6][0], tagMap[6][1]])
-                fileptr += 1
-        self.skeltbl = skeltbl
-        if self.DEBUG:
-            print("\nSkel Table:  %d entries" % len(self.skeltbl))
-            print("table: filenum, skeleton name, frag tbl record count, start position, length")
-            for j in range(len(self.skeltbl)):
-                print(self.skeltbl[j])
-
-        # read/process the fragment index to create the fragment table
-        fragtbl = []
-        if self.fragidx != 0xffffffff:
-            # for i in range(3):
-            #     fname = 'frag%04d.dat' % i
-            #     data = self.sect.loadSection(self.fragidx + i)
-            #     with open(pathof(fname), 'wb') as f:
-            #         f.write(data)
-            outtbl, ctoc_text = self.mi.getIndexData(self.fragidx, "KF8 Fragment")
-            for [text, tagMap] in outtbl:
-                # insert position, ctoc offset (aidtext), file number, sequence number, start position, length
-                ctocoffset = tagMap[2][0]
-                ctocdata = ctoc_text[ctocoffset]
-                fragtbl.append([int(text), ctocdata, tagMap[3][0], tagMap[4][0], tagMap[6][0], tagMap[6][1]])
-        self.fragtbl = fragtbl
-        if self.DEBUG:
-            print("\nFragment Table: %d entries" % len(self.fragtbl))
-            print("table: file position, link id text, file num, sequence number, start position, length")
-            for j in range(len(self.fragtbl)):
-                print(self.fragtbl[j])
-
-        # read / process guide index for guide elements of opf
-        guidetbl = []
-        if self.guideidx != 0xffffffff:
-            # for i in range(3):
-            #     fname = 'guide%04d.dat' % i
-            #     data = self.sect.loadSection(self.guideidx + i)
-            #     with open(pathof(fname), 'wb') as f:
-            #         f.write(data)
-            outtbl, ctoc_text = self.mi.getIndexData(self.guideidx, "KF8 Guide elements)")
-            for [text, tagMap] in outtbl:
-                # ref_type, ref_title, frag number
-                ctocoffset = tagMap[1][0]
-                ref_title = ctoc_text[ctocoffset]
-                ref_type = text
-                fileno = None
-                if 3 in tagMap:
-                    fileno  = tagMap[3][0]
-                if 6 in tagMap:
-                    fileno = tagMap[6][0]
-                guidetbl.append([ref_type, ref_title, fileno])
-        self.guidetbl = guidetbl
-        if self.DEBUG:
-            print("\nGuide Table: %d entries" % len(self.guidetbl))
-            print("table: ref_type, ref_title, fragtbl entry number")
-            for j in range(len(self.guidetbl)):
-                print(self.guidetbl[j])
-
-    def buildParts(self, rawML):
-        # now split the rawML into its flow pieces
-        self.flows = []
-        for j in range(0, len(self.fdsttbl)-1):
-            start = self.fdsttbl[j]
-            end = self.fdsttbl[j+1]
-            self.flows.append(rawML[start:end])
-
-        # the first piece represents the xhtml text
-        text = self.flows[0]
-        self.flows[0] = b''
-
-        # walk the <skeleton> and fragment tables to build original source xhtml files
-        # *without* destroying any file position information needed for later href processing
-        # and create final list of file separation start: stop points and etc in partinfo
-        if self.DEBUG:
-            print("\nRebuilding flow piece 0: the main body of the ebook")
-        self.parts = []
-        self.partinfo = []
-        fragptr = 0
-        baseptr = 0
-        cnt = 0
-        filename = 'part%04d.xhtml' % cnt
-        for [skelnum, skelname, fragcnt, skelpos, skellen] in self.skeltbl:
-            baseptr = skelpos + skellen
-            skeleton = text[skelpos: baseptr]
-            aidtext = "0"
-            for i in range(fragcnt):
-                [insertpos, idtext, filenum, seqnum, startpos, length] = self.fragtbl[fragptr]
-                aidtext = idtext[12:-2]
-                if i == 0:
-                    filename = 'part%04d.xhtml' % filenum
-                slice = text[baseptr: baseptr + length]
-                insertpos = insertpos - skelpos
-                head = skeleton[:insertpos]
-                tail = skeleton[insertpos:]
-                actual_inspos = insertpos
-                if (tail.find(b'>') < tail.find(b'<') or head.rfind(b'>') < head.rfind(b'<')):
-                    # There is an incomplete tag in either the head or tail.
-                    # This can happen for some badly formed KF8 files
-                    print('The fragment table for %s has incorrect insert position. Calculating manually.' % skelname)
-                    bp, ep = locate_beg_end_of_tag(skeleton, aidtext)
-                    if bp != ep:
-                        actual_inspos = ep + 1 + startpos
-                if insertpos != actual_inspos:
-                    print("fixed corrupt fragment table insert position", insertpos+skelpos, actual_inspos+skelpos)
-                    insertpos = actual_inspos
-                    self.fragtbl[fragptr][0] = actual_inspos + skelpos
-                skeleton = skeleton[0:insertpos] + slice + skeleton[insertpos:]
-                baseptr = baseptr + length
-                fragptr += 1
-            cnt += 1
-            self.parts.append(skeleton)
-            self.partinfo.append([skelnum, 'Text', filename, skelpos, baseptr, aidtext])
-
-        assembled_text = b''.join(self.parts)
-        if self.DEBUG:
-            outassembled = os.path.join(self.files.k8dir, 'assembled_text.dat')
-            with open(pathof(outassembled),'wb') as f:
-                f.write(assembled_text)
-
-        # The primary css style sheet is typically stored next followed by any
-        # snippets of code that were previously inlined in the
-        # original xhtml but have been stripped out and placed here.
-        # This can include local CDATA snippets and and svg sections.
-
-        # The problem is that for most browsers and ereaders, you can not
-        # use <img src="imageXXXX.svg" /> to import any svg image that itself
-        # properly uses an <image/> tag to import some raster image - it
-        # should work according to the spec but does not for almost all browsers
-        # and ereaders and causes epub validation issues because those  raster
-        # images are in manifest but not in xhtml text - since they only
-        # referenced from an svg image
-
-        # So we need to check the remaining flow pieces to see if they are css
-        # or svg images.  if svg images, we must check if they have an <image />
-        # and if so inline them into the xhtml text pieces.
-
-        # there may be other sorts of pieces stored here but until we see one
-        # in the wild to reverse engineer we won't be able to tell
-        self.flowinfo.append([None, None, None, None])
-        svg_tag_pattern = re.compile(br'''(<svg[^>]*>)''', re.IGNORECASE)
-        image_tag_pattern = re.compile(br'''(<image[^>]*>)''', re.IGNORECASE)
-        for j in range(1,len(self.flows)):
-            flowpart = self.flows[j]
-            nstr = '%04d' % j
-            m = re.search(svg_tag_pattern, flowpart)
-            if m is not None:
-                # svg
-                ptype = b'svg'
-                start = m.start()
-                m2 = re.search(image_tag_pattern, flowpart)
-                if m2 is not None:
-                    pformat = b'inline'
-                    pdir = None
-                    fname = None
-                    # strip off anything before <svg if inlining
-                    flowpart = flowpart[start:]
-                else:
-                    pformat = b'file'
-                    pdir = "Images"
-                    fname = 'svgimg' + nstr + '.svg'
-            else:
-                # search for CDATA and if exists inline it
-                if flowpart.find(b'[CDATA[') >= 0:
-                    ptype = b'css'
-                    flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
-                    pformat = b'inline'
-                    pdir = None
-                    fname = None
-                else:
-                    # css - assume as standalone css file
-                    ptype = b'css'
-                    pformat = b'file'
-                    pdir = "Styles"
-                    fname = 'style' + nstr + '.css'
-
-            self.flows[j] = flowpart
-            self.flowinfo.append([ptype, pformat, pdir, fname])
-
-        if self.DEBUG:
-            print("\nFlow Map:  %d entries" % len(self.flowinfo))
-            for fi in self.flowinfo:
-                print(fi)
-            print("\n")
-
-            print("\nXHTML File Part Position Information: %d entries" % len(self.partinfo))
-            for pi in self.partinfo:
-                print(pi)
-
-        if False:  # self.Debug:
-            # dump all of the locations of the aid tags used in TEXT
-            # find id links only inside of tags
-            #    inside any < > pair find all "aid=' and return whatever is inside the quotes
-            #    [^>]* means match any amount of chars except for  '>' char
-            #    [^'"] match any amount of chars except for the quote character
-            #    \s* means match any amount of whitespace
-            print("\npositions of all aid= pieces")
-            id_pattern = re.compile(br'''<[^>]*\said\s*=\s*['"]([^'"]*)['"][^>]*>''',re.IGNORECASE)
-            for m in re.finditer(id_pattern, rawML):
-                [filename, partnum, start, end] = self.getFileInfo(m.start())
-                [seqnum, idtext] = self.getFragTblInfo(m.start())
-                value = fromBase32(m.group(1))
-                print("  aid: %s value: %d at: %d -> part: %d, start: %d, end: %d" % (m.group(1), value, m.start(), partnum, start, end))
-                print("       %s  fragtbl entry %d" % (idtext, seqnum))
-
-        return
-
-    # get information fragment table entry by pos
-    def getFragTblInfo(self, pos):
-        for j in range(len(self.fragtbl)):
-            [insertpos, idtext, filenum, seqnum, startpos, length] = self.fragtbl[j]
-            if pos >= insertpos and pos < (insertpos + length):
-                # why are these "in: and before: added here
-                return seqnum, b'in: ' + idtext
-            if pos < insertpos:
-                return seqnum, b'before: ' + idtext
-        return None, None
-
-    # get information about the part (file) that exists at pos in original rawML
-    def getFileInfo(self, pos):
-        for [partnum, pdir, filename, start, end, aidtext] in self.partinfo:
-            if pos >= start and pos < end:
-                return filename, partnum, start, end
-        return None, None, None, None
-
-    # accessor functions to properly protect the internal structure
-    def getNumberOfParts(self):
-        return len(self.parts)
-
-    def getPart(self,i):
-        if i >= 0 and i < len(self.parts):
-            return self.parts[i]
-        return None
-
-    def getPartInfo(self, i):
-        if i >= 0 and i < len(self.partinfo):
-            return self.partinfo[i]
-        return None
-
-    def getNumberOfFlows(self):
-        return len(self.flows)
-
-    def getFlow(self,i):
-        # note flows[0] is empty - it was all of the original text
-        if i > 0 and i < len(self.flows):
-            return self.flows[i]
-        return None
-
-    def getFlowInfo(self,i):
-        # note flowinfo[0] is empty - it was all of the original text
-        if i > 0 and i < len(self.flowinfo):
-            return self.flowinfo[i]
-        return None
-
-    def getIDTagByPosFid(self, posfid, offset):
-        # first convert kindle:pos:fid and offset info to position in file
-        # (fromBase32 can handle both string types on input)
-        row = fromBase32(posfid)
-        off = fromBase32(offset)
-        [insertpos, idtext, filenum, seqnm, startpos, length] = self.fragtbl[row]
-        pos = insertpos + off
-        fname, pn, skelpos, skelend = self.getFileInfo(pos)
-        if fname is None:
-            # pos does not exist
-            # default to skeleton pos instead
-            print("Link To Position", pos, "does not exist, retargeting to top of target")
-            pos = self.skeltbl[filenum][3]
-            fname, pn, skelpos, skelend = self.getFileInfo(pos)
-        # an existing "id=" or "name=" attribute must exist in original xhtml otherwise it would not have worked for linking.
-        # Amazon seems to have added its own additional "aid=" inside tags whose contents seem to represent
-        # some position information encoded into Base32 name.
-        # so find the closest "id=" before position the file  by actually searching in that file
-        idtext = self.getIDTag(pos)
-        return fname, idtext
-
-    def getIDTag(self, pos):
-        # find the first tag with a named anchor (name or id attribute) before pos
-        fname, pn, skelpos, skelend = self.getFileInfo(pos)
-        if pn is None and skelpos is None:
-            print("Error: getIDTag - no file contains ", pos)
-        textblock = self.parts[pn]
-        npos = pos - skelpos
-        # if npos inside a tag then search all text before the its end of tag marker
-        pgt = textblock.find(b'>',npos)
-        plt = textblock.find(b'<',npos)
-        if plt == npos or pgt < plt:
-            npos = pgt + 1
-        # find id and name attributes only inside of tags
-        # use a reverse tag search since that is faster
-        #    inside any < > pair find "id=" and "name=" attributes return it
-        #    [^>]* means match any amount of chars except for  '>' char
-        #    [^'"] match any amount of chars except for the quote character
-        #    \s* means match any amount of whitespace
-        textblock = textblock[0:npos]
-        id_pattern = re.compile(br'''<[^>]*\sid\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
-        name_pattern = re.compile(br'''<[^>]*\sname\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
-        aid_pattern = re.compile(br'''<[^>]+\s(?:aid|AID)\s*=\s*['"]([^'"]+)['"]''')
-        for tag in reverse_tag_iter(textblock):
-            # any ids in the body should default to top of file
-            if tag[0:6] == b'<body ':
-                return b''
-            if tag[0:6] != b'<meta ':
-                m = id_pattern.match(tag) or name_pattern.match(tag)
-                if m is not None:
-                    return m.group(1)
-                m = aid_pattern.match(tag)
-                if m is not None:
-                    self.linked_aids.add(m.group(1))
-                    return b'aid-' + m.group(1)
-        return b''
-
-    # do we need to do deep copying
-    def setParts(self, parts):
-        assert(len(parts) == len(self.parts))
-        for i in range(len(parts)):
-            self.parts[i] = parts[i]
-
-    # do we need to do deep copying
-    def setFlows(self, flows):
-        assert(len(flows) == len(self.flows))
-        for i in range(len(flows)):
-            self.flows[i] = flows[i]
-
-    # get information about the part (file) that exists at pos in original rawML
-    def getSkelInfo(self, pos):
-        for [partnum, pdir, filename, start, end, aidtext] in self.partinfo:
-            if pos >= start and pos < end:
-                return [partnum, pdir, filename, start, end, aidtext]
-        return [None, None, None, None, None, None]
-
-    # fileno is actually a reference into fragtbl (a fragment)
-    def getGuideText(self):
-        guidetext = b''
-        for [ref_type, ref_title, fileno] in self.guidetbl:
-            if ref_type == b'thumbimagestandard':
-                continue
-            if ref_type not in _guide_types and not ref_type.startswith(b'other.'):
-                if ref_type == b'start':
-                    ref_type = b'text'
-                else:
-                    ref_type = b'other.' + ref_type
-            [pos, idtext, filenum, seqnm, startpos, length] = self.fragtbl[fileno]
-            [pn, pdir, filename, skelpos, skelend, aidtext] = self.getSkelInfo(pos)
-            idtext = self.getIDTag(pos)
-            linktgt = filename.encode('utf-8')
-            if idtext != b'':
-                linktgt += b'#' + idtext
-            guidetext += b'<reference type="'+ref_type+b'" title="'+ref_title+b'" href="'+utf8_str(pdir)+b'/'+linktgt+b'" />\n'
-        # opf is encoded utf-8 so must convert any titles properly
-        guidetext = (guidetext.decode(self.mh.codec)).encode("utf-8")
-        return guidetext
-
-    def getPageIDTag(self, pos):
-        # find the first tag with a named anchor (name or id attribute) before pos
-        # but page map offsets need to little more leeway so if the offset points
-        # into a tag look for the next ending tag "/>" or "</" and start your search from there.
-        fname, pn, skelpos, skelend = self.getFileInfo(pos)
-        if pn is None and skelpos is None:
-            print("Error: getIDTag - no file contains ", pos)
-        textblock = self.parts[pn]
-        npos = pos - skelpos
-        # if npos inside a tag then search all text before next ending tag
-        pgt = textblock.find(b'>',npos)
-        plt = textblock.find(b'<',npos)
-        if plt == npos or pgt < plt:
-            # we are in a tag
-            # so find first ending tag
-            pend1 = textblock.find(b'/>', npos)
-            pend2 = textblock.find(b'</', npos)
-            if pend1 != -1 and pend2 != -1:
-                pend = min(pend1, pend2)
-            else:
-                pend = max(pend1, pend2)
-            if pend != -1:
-                npos = pend
-            else:
-                npos = pgt + 1
-        # find id and name attributes only inside of tags
-        # use a reverse tag search since that is faster
-        #    inside any < > pair find "id=" and "name=" attributes return it
-        #    [^>]* means match any amount of chars except for  '>' char
-        #    [^'"] match any amount of chars except for the quote character
-        #    \s* means match any amount of whitespace
-        textblock = textblock[0:npos]
-        id_pattern = re.compile(br'''<[^>]*\sid\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
-        name_pattern = re.compile(br'''<[^>]*\sname\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
-        for tag in reverse_tag_iter(textblock):
-            # any ids in the body should default to top of file
-            if tag[0:6] == b'<body ':
-                return b''
-            if tag[0:6] != b'<meta ':
-                m = id_pattern.match(tag) or name_pattern.match(tag)
-                if m is not None:
-                    return m.group(1)
-        return b''
diff --git a/epy_extras/KindleUnpack/mobi_k8resc.py b/epy_extras/KindleUnpack/mobi_k8resc.py
deleted file mode 100644
index 1e58e84..0000000
--- a/epy_extras/KindleUnpack/mobi_k8resc.py
+++ /dev/null
@@ -1,271 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-DEBUG_USE_ORDERED_DICTIONARY = False  # OrderedDict is supoorted >= python 2.7.
-""" set to True to use OrderedDict for K8RESCProcessor.parsetag.tattr."""
-
-if DEBUG_USE_ORDERED_DICTIONARY:
-    from collections import OrderedDict as dict_
-else:
-    dict_ = dict
-
-from .compatibility_utils import unicode_str
-
-from .mobi_utils import fromBase32
-
-_OPF_PARENT_TAGS = ['xml', 'package', 'metadata', 'dc-metadata',
-                    'x-metadata', 'manifest', 'spine', 'tours', 'guide']
-
-class K8RESCProcessor(object):
-
-    def __init__(self, data, debug=False):
-        self._debug = debug
-        self.resc = None
-        self.opos = 0
-        self.extrameta = []
-        self.cover_name = None
-        self.spine_idrefs = {}
-        self.spine_order = []
-        self.spine_pageattributes = {}
-        self.spine_ppd = None
-        # need3 indicate the book has fields which require epub3.
-        # but the estimation of the source epub version from the fields is difficult.
-        self.need3 = False
-        self.package_ver = None
-        self.extra_metadata = []
-        self.refines_metadata = []
-        self.extra_attributes = []
-        # get header
-        start_pos = data.find(b'<')
-        self.resc_header = data[:start_pos]
-        # get resc data length
-        start = self.resc_header.find(b'=') + 1
-        end = self.resc_header.find(b'&', start)
-        resc_size = 0
-        if end > 0:
-            resc_size = fromBase32(self.resc_header[start:end])
-        resc_rawbytes = len(data) - start_pos
-        if resc_rawbytes == resc_size:
-            self.resc_length = resc_size
-        else:
-            # Most RESC has a nul string at its tail but some do not.
-            end_pos = data.find(b'\x00', start_pos)
-            if end_pos < 0:
-                self.resc_length = resc_rawbytes
-            else:
-                self.resc_length = end_pos - start_pos
-        if self.resc_length != resc_size:
-            print("Warning: RESC section length({:d}bytes) does not match its size({:d}bytes).".format(self.resc_length, resc_size))
-        # now parse RESC after converting it to unicode from utf-8
-        try:
-            self.resc = unicode_str(data[start_pos:start_pos+self.resc_length])
-        except UnicodeDecodeError:
-            self.resc = unicode_str(data[start_pos:start_pos+self.resc_length], enc='latin-1')
-        self.parseData()
-
-    def prepend_to_spine(self, key, idref, linear, properties):
-        self.spine_order = [key] + self.spine_order
-        self.spine_idrefs[key] = idref
-        attributes = {}
-        if linear is not None:
-            attributes['linear'] = linear
-        if properties is not None:
-            attributes['properties'] = properties
-        self.spine_pageattributes[key] = attributes
-
-    # RESC tag iterator
-    def resc_tag_iter(self):
-        tcontent = last_tattr = None
-        prefix = ['']
-        while True:
-            text, tag = self.parseresc()
-            if text is None and tag is None:
-                break
-            if text is not None:
-                tcontent = text.rstrip(' \r\n')
-            else:  # we have a tag
-                ttype, tname, tattr = self.parsetag(tag)
-                if ttype == 'begin':
-                    tcontent = None
-                    prefix.append(tname + '.')
-                    if tname in _OPF_PARENT_TAGS:
-                        yield ''.join(prefix), tname, tattr, tcontent
-                    else:
-                        last_tattr = tattr
-                else:  # single or end
-                    if ttype == 'end':
-                        prefix.pop()
-                        tattr = last_tattr
-                        last_tattr = None
-                        if tname in _OPF_PARENT_TAGS:
-                            tname += '-end'
-                    yield ''.join(prefix), tname, tattr, tcontent
-                    tcontent = None
-
-    # now parse the RESC to extract spine and extra metadata info
-    def parseData(self):
-        for prefix, tname, tattr, tcontent in self.resc_tag_iter():
-            if self._debug:
-                print("   Parsing RESC: ", prefix, tname, tattr, tcontent)
-            if tname == 'package':
-                self.package_ver = tattr.get('version', '2.0')
-                package_prefix = tattr.get('prefix','')
-                if self.package_ver.startswith('3') or package_prefix.startswith('rendition'):
-                    self.need3 = True
-            if tname == 'spine':
-                self.spine_ppd = tattr.get('page-progession-direction', None)
-                if self.spine_ppd is not None and self.spine_ppd == 'rtl':
-                    self.need3 = True
-            if tname == 'itemref':
-                skelid = tattr.pop('skelid', None)
-                if skelid is None and len(self.spine_order) == 0:
-                    # assume it was removed initial coverpage
-                    skelid = 'coverpage'
-                    tattr['linear'] = 'no'
-                self.spine_order.append(skelid)
-                idref = tattr.pop('idref', None)
-                if idref is not None:
-                    idref = 'x_' + idref
-                self.spine_idrefs[skelid] = idref
-                if 'id' in tattr:
-                    del tattr['id']
-                # tattr["id"] = 'x_' + tattr["id"]
-                if 'properties' in tattr:
-                    self.need3 = True
-                self.spine_pageattributes[skelid] = tattr
-            if tname == 'meta' or tname.startswith('dc:'):
-                if 'refines' in tattr or 'property' in tattr:
-                    self.need3 = True
-                if tattr.get('name','') == 'cover':
-                    cover_name = tattr.get('content',None)
-                    if cover_name is not None:
-                        cover_name = 'x_' + cover_name
-                    self.cover_name = cover_name
-                else:
-                    self.extrameta.append([tname, tattr, tcontent])
-
-    # parse and return either leading text or the next tag
-    def parseresc(self):
-        p = self.opos
-        if p >= len(self.resc):
-            return None, None
-        if self.resc[p] != '<':
-            res = self.resc.find('<',p)
-            if res == -1 :
-                res = len(self.resc)
-            self.opos = res
-            return self.resc[p:res], None
-        # handle comment as a special case
-        if self.resc[p:p+4] == '<!--':
-            te = self.resc.find('-->',p+1)
-            if te != -1:
-                te = te+2
-        else:
-            te = self.resc.find('>',p+1)
-            ntb = self.resc.find('<',p+1)
-            if ntb != -1 and ntb < te:
-                self.opos = ntb
-                return self.resc[p:ntb], None
-        self.opos = te + 1
-        return None, self.resc[p:te+1]
-
-    # parses tag to identify:  [tname, ttype, tattr]
-    #    tname: tag name
-    #    ttype: tag type ('begin', 'end' or 'single');
-    #    tattr: dictionary of tag atributes
-    def parsetag(self, s):
-        p = 1
-        tname = None
-        ttype = None
-        tattr = dict_()
-        while s[p:p+1] == ' ' :
-            p += 1
-        if s[p:p+1] == '/':
-            ttype = 'end'
-            p += 1
-            while s[p:p+1] == ' ' :
-                p += 1
-        b = p
-        while s[p:p+1] not in ('>', '/', ' ', '"', "'",'\r','\n') :
-            p += 1
-        tname=s[b:p].lower()
-        # some special cases
-        if tname == '?xml':
-            tname = 'xml'
-        if tname == '!--':
-            ttype = 'single'
-            comment = s[p:-3].strip()
-            tattr['comment'] = comment
-        if ttype is None:
-            # parse any attributes of begin or single tags
-            while s.find('=',p) != -1 :
-                while s[p:p+1] == ' ' :
-                    p += 1
-                b = p
-                while s[p:p+1] != '=' :
-                    p += 1
-                aname = s[b:p].lower()
-                aname = aname.rstrip(' ')
-                p += 1
-                while s[p:p+1] == ' ' :
-                    p += 1
-                if s[p:p+1] in ('"', "'") :
-                    p = p + 1
-                    b = p
-                    while s[p:p+1] not in ('"', "'"):
-                        p += 1
-                    val = s[b:p]
-                    p += 1
-                else :
-                    b = p
-                    while s[p:p+1] not in ('>', '/', ' ') :
-                        p += 1
-                    val = s[b:p]
-                tattr[aname] = val
-        if ttype is None:
-            ttype = 'begin'
-            if s.find('/',p) >= 0:
-                ttype = 'single'
-        return ttype, tname, tattr
-
-    def taginfo_toxml(self, taginfo):
-        res = []
-        tname, tattr, tcontent = taginfo
-        res.append('<' + tname)
-        if tattr is not None:
-            for key in tattr:
-                res.append(' ' + key + '="'+tattr[key]+'"')
-        if tcontent is not None:
-            res.append('>' + tcontent + '</' + tname + '>\n')
-        else:
-            res.append('/>\n')
-        return "".join(res)
-
-    def hasSpine(self):
-        return len(self.spine_order) > 0
-
-    def needEPUB3(self):
-        return self.need3
-
-    def hasRefines(self):
-        for [tname, tattr, tcontent] in self.extrameta:
-            if 'refines' in tattr:
-                return True
-        return False
-
-    def createMetadata(self, epubver):
-        for taginfo in self.extrameta:
-            tname, tattr, tcontent = taginfo
-            if 'refines' in tattr:
-                if epubver == 'F' and 'property' in tattr:
-                    attr = ' id="%s" opf:%s="%s"\n' % (tattr['refines'], tattr['property'], tcontent)
-                    self.extra_attributes.append(attr)
-                else:
-                    tag = self.taginfo_toxml(taginfo)
-                    self.refines_metadata.append(tag)
-            else:
-                tag = self.taginfo_toxml(taginfo)
-                self.extra_metadata.append(tag)
diff --git a/epy_extras/KindleUnpack/mobi_nav.py b/epy_extras/KindleUnpack/mobi_nav.py
deleted file mode 100644
index 16fb0be..0000000
--- a/epy_extras/KindleUnpack/mobi_nav.py
+++ /dev/null
@@ -1,187 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import unicode_str
-import os
-from .unipath import pathof
-
-import re
-# note: re requites the pattern to be the exact same type as the data to be searched in python3
-# but u"" is not allowed for the pattern itself only b""
-
-DEBUG_NAV = False
-
-FORCE_DEFAULT_TITLE = False
-""" Set to True to force to use the default title. """
-
-NAVIGATION_FINENAME = 'nav.xhtml'
-""" The name for the navigation document. """
-
-DEFAULT_TITLE = 'Navigation'
-""" The default title for the navigation document. """
-
-class NAVProcessor(object):
-
-    def __init__(self, files):
-        self.files = files
-        self.navname = NAVIGATION_FINENAME
-
-    def buildLandmarks(self, guidetext):
-        header = ''
-        header += '  <nav epub:type="landmarks" id="landmarks" hidden="">\n'
-        header += '    <h2>Guide</h2>\n'
-        header += '    <ol>\n'
-        element = '      <li><a epub:type="{:s}" href="{:s}">{:s}</a></li>\n'
-        footer = ''
-        footer += '    </ol>\n'
-        footer += '  </nav>\n'
-
-        type_map = {
-            'cover' : 'cover',
-            'title-page' : 'title-page',
-            # ?: 'frontmatter',
-            'text' : 'bodymatter',
-            # ?: 'backmatter',
-            'toc' : 'toc',
-            'loi' : 'loi',
-            'lot' : 'lot',
-            'preface' : 'preface',
-            'bibliography' : 'bibliography',
-            'index' : 'index',
-            'glossary' : 'glossary',
-            'acknowledgements' : 'acknowledgements',
-            'colophon' : None,
-            'copyright-page' : None,
-            'dedication' : None,
-            'epigraph' : None,
-            'foreword' : None,
-            'notes' : None
-            }
-
-        re_type = re.compile(r'\s+type\s*=\s*"(.*?)"', re.I)
-        re_title = re.compile(r'\s+title\s*=\s*"(.*?)"', re.I)
-        re_link = re.compile(r'\s+href\s*=\s*"(.*?)"', re.I)
-        dir_ = os.path.relpath(self.files.k8text, self.files.k8oebps).replace('\\', '/')
-
-        data = ''
-        references = re.findall(r'<reference\s+.*?>', unicode_str(guidetext), re.I)
-        for reference in references:
-            mo_type = re_type.search(reference)
-            mo_title = re_title.search(reference)
-            mo_link = re_link.search(reference)
-            if mo_type is not None:
-                type_ = type_map.get(mo_type.group(1), None)
-            else:
-                type_ = None
-            if mo_title is not None:
-                title = mo_title.group(1)
-            else:
-                title = None
-            if mo_link is not None:
-                link = mo_link.group(1)
-            else:
-                link = None
-
-            if type_ is not None and title is not None and link is not None:
-                link = os.path.relpath(link, dir_).replace('\\', '/')
-                data += element.format(type_, link, title)
-        if len(data) > 0:
-            return header + data + footer
-        else:
-            return ''
-
-    def buildTOC(self, indx_data):
-        header = ''
-        header += '  <nav epub:type="toc" id="toc">\n'
-        header += '    <h1>Table of contents</h1>\n'
-        footer = '  </nav>\n'
-
-        # recursive part
-        def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
-            if start>len(indx_data) or end>len(indx_data):
-                print("Warning (in buildTOC): missing INDX child entries", start, end, len(indx_data))
-                return ''
-            if DEBUG_NAV:
-                print("recursINDX (in buildTOC) lvl %d from %d to %d" % (lvl, start, end))
-            xhtml = ''
-            if start <= 0:
-                start = 0
-            if end <= 0:
-                end = len(indx_data)
-            if lvl > max_lvl:
-                max_lvl = lvl
-
-            indent1 = '  ' * (2 + lvl * 2)
-            indent2 = '  ' * (3 + lvl * 2)
-            xhtml += indent1 + '<ol>\n'
-            for i in range(start, end):
-                e = indx_data[i]
-                htmlfile = e['filename']
-                desttag = e['idtag']
-                text = e['text']
-                if not e['hlvl'] == lvl:
-                    continue
-                num += 1
-                if desttag == '':
-                    link = htmlfile
-                else:
-                    link = '{:s}#{:s}'.format(htmlfile, desttag)
-                xhtml += indent2 + '<li>'
-                entry = '<a href="{:}">{:s}</a>'.format(link, text)
-                xhtml += entry
-                # recurs
-                if e['child1'] >= 0:
-                    xhtml += '\n'
-                    xhtmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
-                            e['child1'], e['childn'] + 1)
-                    xhtml += xhtmlrec
-                    xhtml += indent2
-                # close entry
-                xhtml += '</li>\n'
-            xhtml += indent1 + '</ol>\n'
-            return xhtml, max_lvl, num
-
-        data, max_lvl, num = recursINDX()
-        if not len(indx_data) == num:
-            print("Warning (in buildTOC): different number of entries in NCX", len(indx_data), num)
-        return header + data + footer
-
-    def buildNAV(self, ncx_data, guidetext, title, lang):
-        print("Building Navigation Document.")
-        if FORCE_DEFAULT_TITLE:
-            title = DEFAULT_TITLE
-        nav_header = ''
-        nav_header += '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>'
-        nav_header += '<html xmlns="http://www.w3.org/1999/xhtml"'
-        nav_header += ' xmlns:epub="http://www.idpf.org/2007/ops"'
-        nav_header += ' lang="{0:s}" xml:lang="{0:s}">\n'.format(lang)
-        nav_header += '<head>\n<title>{:s}</title>\n'.format(title)
-        nav_header += '<meta charset="UTF-8" />\n'
-        nav_header += '<style type="text/css">\n'
-        nav_header += 'nav#landmarks { display:none; }\n'
-        nav_header += 'ol { list-style-type: none; }'
-        nav_header += '</style>\n</head>\n<body>\n'
-        nav_footer = '</body>\n</html>\n'
-
-        landmarks =  self.buildLandmarks(guidetext)
-        toc = self.buildTOC(ncx_data)
-
-        data = nav_header
-        data += landmarks
-        data += toc
-        data += nav_footer
-        return data
-
-    def getNAVName(self):
-        return self.navname
-
-    def writeNAV(self, ncx_data, guidetext, metadata):
-        # build the xhtml
-        # print("Write Navigation Document.")
-        xhtml = self.buildNAV(ncx_data, guidetext, metadata.get('Title')[0], metadata.get('Language')[0])
-        fname = os.path.join(self.files.k8text, self.navname)
-        with open(pathof(fname), 'wb') as f:
-            f.write(xhtml.encode('utf-8'))
diff --git a/epy_extras/KindleUnpack/mobi_ncx.py b/epy_extras/KindleUnpack/mobi_ncx.py
deleted file mode 100644
index 60ef9a0..0000000
--- a/epy_extras/KindleUnpack/mobi_ncx.py
+++ /dev/null
@@ -1,275 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-import os
-from .unipath import pathof
-from .compatibility_utils import unescapeit
-
-
-import re
-# note: re requites the pattern to be the exact same type as the data to be searched in python3
-# but u"" is not allowed for the pattern itself only b""
-
-from xml.sax.saxutils import escape as xmlescape
-
-from .mobi_utils import toBase32
-from .mobi_index import MobiIndex
-
-DEBUG_NCX = False
-
-class ncxExtract:
-
-    def __init__(self, mh, files):
-        self.mh = mh
-        self.sect = self.mh.sect
-        self.files = files
-        self.isNCX = False
-        self.mi = MobiIndex(self.sect)
-        self.ncxidx = self.mh.ncxidx
-        self.indx_data = None
-
-    def parseNCX(self):
-        indx_data = []
-        tag_fieldname_map = {
-                1: ['pos',0],
-                2: ['len',0],
-                3: ['noffs',0],
-                4: ['hlvl',0],
-                5: ['koffs',0],
-                6: ['pos_fid',0],
-                21: ['parent',0],
-                22: ['child1',0],
-                23: ['childn',0]
-        }
-        if self.ncxidx != 0xffffffff:
-            outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx, "NCX")
-            if DEBUG_NCX:
-                print(ctoc_text)
-                print(outtbl)
-            num = 0
-            for [text, tagMap] in outtbl:
-                tmp = {
-                        'name': text.decode('utf-8'),
-                        'pos':  -1,
-                        'len':  0,
-                        'noffs': -1,
-                        'text' : "Unknown Text",
-                        'hlvl' : -1,
-                        'kind' : "Unknown Kind",
-                        'pos_fid' : None,
-                        'parent' : -1,
-                        'child1' : -1,
-                        'childn' : -1,
-                        'num'  : num
-                        }
-                for tag in tag_fieldname_map:
-                    [fieldname, i] = tag_fieldname_map[tag]
-                    if tag in tagMap:
-                        fieldvalue = tagMap[tag][i]
-                        if tag == 6:
-                            pos_fid = toBase32(fieldvalue,4).decode('utf-8')
-                            fieldvalue2 = tagMap[tag][i+1]
-                            pos_off = toBase32(fieldvalue2,10).decode('utf-8')
-                            fieldvalue = 'kindle:pos:fid:%s:off:%s' % (pos_fid, pos_off)
-                        tmp[fieldname] = fieldvalue
-                        if tag == 3:
-                            toctext = ctoc_text.get(fieldvalue, 'Unknown Text')
-                            toctext = toctext.decode(self.mh.codec)
-                            tmp['text'] = toctext
-                        if tag == 5:
-                            kindtext = ctoc_text.get(fieldvalue, 'Unknown Kind')
-                            kindtext = kindtext.decode(self.mh.codec)
-                            tmp['kind'] = kindtext
-                indx_data.append(tmp)
-                if DEBUG_NCX:
-                    print("record number: ", num)
-                    print("name: ", tmp['name'],)
-                    print("position", tmp['pos']," length: ", tmp['len'])
-                    print("text: ", tmp['text'])
-                    print("kind: ", tmp['kind'])
-                    print("heading level: ", tmp['hlvl'])
-                    print("parent:", tmp['parent'])
-                    print("first child: ",tmp['child1']," last child: ", tmp['childn'])
-                    print("pos_fid is ", tmp['pos_fid'])
-                    print("\n\n")
-                num += 1
-        self.indx_data = indx_data
-        return indx_data
-
-    def buildNCX(self, htmlfile, title, ident, lang):
-        indx_data = self.indx_data
-
-        ncx_header = \
-'''<?xml version='1.0' encoding='utf-8'?>
-<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%s">
-<head>
-<meta content="%s" name="dtb:uid"/>
-<meta content="%d" name="dtb:depth"/>
-<meta content="mobiunpack.py" name="dtb:generator"/>
-<meta content="0" name="dtb:totalPageCount"/>
-<meta content="0" name="dtb:maxPageNumber"/>
-</head>
-<docTitle>
-<text>%s</text>
-</docTitle>
-<navMap>
-'''
-
-        ncx_footer = \
-'''  </navMap>
-</ncx>
-'''
-
-        ncx_entry = \
-'''<navPoint id="%s" playOrder="%d">
-<navLabel>
-<text>%s</text>
-</navLabel>
-<content src="%s"/>'''
-
-        # recursive part
-        def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
-            if start>len(indx_data) or end>len(indx_data):
-                print("Warning: missing INDX child entries", start, end, len(indx_data))
-                return ''
-            if DEBUG_NCX:
-                print("recursINDX lvl %d from %d to %d" % (lvl, start, end))
-            xml = ''
-            if start <= 0:
-                start = 0
-            if end <= 0:
-                end = len(indx_data)
-            if lvl > max_lvl:
-                max_lvl = lvl
-            indent = '  ' * (2 + lvl)
-
-            for i in range(start, end):
-                e = indx_data[i]
-                if not e['hlvl'] == lvl:
-                    continue
-                # open entry
-                num += 1
-                link = '%s#filepos%d' % (htmlfile, e['pos'])
-                tagid = 'np_%d' % num
-                entry = ncx_entry % (tagid, num, xmlescape(unescapeit(e['text'])), link)
-                entry = re.sub(re.compile('^', re.M), indent, entry, 0)
-                xml += entry + '\n'
-                # recurs
-                if e['child1']>=0:
-                    xmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
-                            e['child1'], e['childn'] + 1)
-                    xml += xmlrec
-                # close entry
-                xml += indent + '</navPoint>\n'
-            return xml, max_lvl, num
-
-        body, max_lvl, num = recursINDX()
-        header = ncx_header % (lang, ident, max_lvl + 1, title)
-        ncx =  header + body + ncx_footer
-        if not len(indx_data) == num:
-            print("Warning: different number of entries in NCX", len(indx_data), num)
-        return ncx
-
-    def writeNCX(self, metadata):
-        # build the xml
-        self.isNCX = True
-        print("Write ncx")
-        # htmlname = os.path.basename(self.files.outbase)
-        # htmlname += '.html'
-        htmlname = 'book.html'
-        xml = self.buildNCX(htmlname, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
-        # write the ncx file
-        # ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx')
-        ncxname = os.path.join(self.files.mobi7dir, 'toc.ncx')
-        with open(pathof(ncxname), 'wb') as f:
-            f.write(xml.encode('utf-8'))
-
-    def buildK8NCX(self, indx_data, title, ident, lang):
-        ncx_header = \
-'''<?xml version='1.0' encoding='utf-8'?>
-<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%s">
-<head>
-<meta content="%s" name="dtb:uid"/>
-<meta content="%d" name="dtb:depth"/>
-<meta content="mobiunpack.py" name="dtb:generator"/>
-<meta content="0" name="dtb:totalPageCount"/>
-<meta content="0" name="dtb:maxPageNumber"/>
-</head>
-<docTitle>
-<text>%s</text>
-</docTitle>
-<navMap>
-'''
-
-        ncx_footer = \
-'''  </navMap>
-</ncx>
-'''
-
-        ncx_entry = \
-'''<navPoint id="%s" playOrder="%d">
-<navLabel>
-<text>%s</text>
-</navLabel>
-<content src="%s"/>'''
-
-        # recursive part
-        def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
-            if start>len(indx_data) or end>len(indx_data):
-                print("Warning: missing INDX child entries", start, end, len(indx_data))
-                return ''
-            if DEBUG_NCX:
-                print("recursINDX lvl %d from %d to %d" % (lvl, start, end))
-            xml = ''
-            if start <= 0:
-                start = 0
-            if end <= 0:
-                end = len(indx_data)
-            if lvl > max_lvl:
-                max_lvl = lvl
-            indent = '  ' * (2 + lvl)
-
-            for i in range(start, end):
-                e = indx_data[i]
-                htmlfile = e['filename']
-                desttag = e['idtag']
-                if not e['hlvl'] == lvl:
-                    continue
-                # open entry
-                num += 1
-                if desttag == '':
-                    link = 'Text/%s' % htmlfile
-                else:
-                    link = 'Text/%s#%s' % (htmlfile, desttag)
-                tagid = 'np_%d' % num
-                entry = ncx_entry % (tagid, num, xmlescape(unescapeit(e['text'])), link)
-                entry = re.sub(re.compile('^', re.M), indent, entry, 0)
-                xml += entry + '\n'
-                # recurs
-                if e['child1']>=0:
-                    xmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
-                            e['child1'], e['childn'] + 1)
-                    xml += xmlrec
-                # close entry
-                xml += indent + '</navPoint>\n'
-            return xml, max_lvl, num
-
-        body, max_lvl, num = recursINDX()
-        header = ncx_header % (lang, ident, max_lvl + 1, title)
-        ncx =  header + body + ncx_footer
-        if not len(indx_data) == num:
-            print("Warning: different number of entries in NCX", len(indx_data), num)
-        return ncx
-
-    def writeK8NCX(self, ncx_data, metadata):
-        # build the xml
-        self.isNCX = True
-        print("Write K8 ncx")
-        xml = self.buildK8NCX(ncx_data, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
-        bname = 'toc.ncx'
-        ncxname = os.path.join(self.files.k8oebps,bname)
-        with open(pathof(ncxname), 'wb') as f:
-            f.write(xml.encode('utf-8'))
diff --git a/epy_extras/KindleUnpack/mobi_opf.py b/epy_extras/KindleUnpack/mobi_opf.py
deleted file mode 100644
index 742d776..0000000
--- a/epy_extras/KindleUnpack/mobi_opf.py
+++ /dev/null
@@ -1,686 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import unicode_str, unescapeit
-from .compatibility_utils import lzip
-
-from .unipath import pathof
-
-from xml.sax.saxutils import escape as xmlescape
-
-import os
-import uuid
-from datetime import datetime
-
-# In EPUB3, NCX and <guide> MAY exist in OPF, although the NCX is superseded
-# by the Navigation Document and the <guide> is deprecated. Currently, EPUB3_WITH_NCX
-# and EPUB3_WITH_GUIDE are set to True due to compatibility with epub2 reading systems.
-# They might be change to set to False in the future.
-
-EPUB3_WITH_NCX = True  # Do not set to False except for debug.
-""" Set to True to create a toc.ncx when converting to epub3. """
-
-EPUB3_WITH_GUIDE = True  # Do not set to False except for debug.
-""" Set to True to create a guide element in an opf when converting to epub3. """
-
-EPUB_OPF = 'content.opf'
-""" The name for the OPF of EPUB. """
-
-TOC_NCX = 'toc.ncx'
-""" The name for the TOC of EPUB2. """
-
-NAVIGATION_DOCUMENT = 'nav.xhtml'
-""" The name for the navigation document of EPUB3. """
-
-BEGIN_INFO_ONLY = '<!-- BEGIN INFORMATION ONLY '
-""" The comment to indicate the beginning of metadata which will be ignored by kindlegen. """
-
-END_INFO_ONLY = 'END INFORMATION ONLY -->'
-""" The comment to indicate the end of metadata which will be ignored by kindlegen. """
-
-EXTH_TITLE_FURIGANA = 'Title-Pronunciation'
-""" The name for Title Furigana(similar to file-as) set by KDP. """
-
-EXTH_CREATOR_FURIGANA = 'Author-Pronunciation'
-""" The name for Creator Furigana(similar to file-as) set by KDP. """
-
-EXTH_PUBLISHER_FURIGANA = 'Publisher-Pronunciation'
-""" The name for Publisher Furigana(similar to file-as) set by KDP. """
-
-EXTRA_ENTITIES = {'"': '&quot;', "'": "&apos;"}
-
-class OPFProcessor(object):
-
-    def __init__(self, files, metadata, fileinfo, rscnames, hasNCX, mh, usedmap, pagemapxml='', guidetext='', k8resc=None, epubver='2'):
-        self.files = files
-        self.metadata = metadata
-        self.fileinfo = fileinfo
-        self.rscnames = rscnames
-        self.has_ncx = hasNCX
-        self.codec = mh.codec
-        self.isK8 = mh.isK8()
-        self.printReplica = mh.isPrintReplica()
-        self.guidetext = unicode_str(guidetext)
-        self.used = usedmap
-        self.k8resc = k8resc
-        self.covername = None
-        self.cover_id = 'cover_img'
-        if self.k8resc is not None and self.k8resc.cover_name is not None:
-            # update cover id info from RESC if available
-            self.cover_id = self.k8resc.cover_name
-        # Create a unique urn uuid
-        self.BookId = unicode_str(str(uuid.uuid4()))
-        self.pagemap = pagemapxml
-
-        self.ncxname = None
-        self.navname = None
-
-        # page-progression-direction is only set in spine
-        self.page_progression_direction = metadata.pop('page-progression-direction', [None])[0]
-        if 'rl' in metadata.get('primary-writing-mode', [''])[0]:
-            self.page_progression_direction = 'rtl'
-        self.epubver = epubver  # the epub version set by user
-        self.target_epubver = epubver  # the epub vertion set by user or detected automatically
-        if self.epubver == 'A':
-            self.target_epubver = self.autodetectEPUBVersion()
-        elif self.epubver == 'F':
-            self.target_epubver = '2'
-        elif self.epubver != '2' and self.epubver != '3':
-            self.target_epubver = '2'
-
-        # id for rifine attributes
-        self.title_id = {}
-        self.creator_id = {}
-        self.publisher_id = {}
-        # extra attributes
-        self.title_attrib = {}
-        self.creator_attrib = {}
-        self.publisher_attrib = {}
-        self.extra_attributes = []  # for force epub2 option
-        # Create epub3 metadata from EXTH.
-        self.exth_solved_refines_metadata = []
-        self.exth_refines_metadata = []
-        self.exth_fixedlayout_metadata = []
-
-        self.defineRefinesID()
-        self.processRefinesMetadata()
-        if self.k8resc is not None:
-            # Create metadata in RESC section.
-            self.k8resc.createMetadata(epubver)
-        if self.target_epubver == "3":
-            self.createMetadataForFixedlayout()
-
-    def escapeit(self, sval, EXTRAS=None):
-        # note, xmlescape and unescape do not work with utf-8 bytestrings
-        sval = unicode_str(sval)
-        if EXTRAS:
-            res = xmlescape(unescapeit(sval), EXTRAS)
-        else:
-            res = xmlescape(unescapeit(sval))
-        return res
-
-    def createMetaTag(self, data, property, content, refid=''):
-        refines = ''
-        if refid:
-            refines = ' refines="#%s"' % refid
-        data.append('<meta property="%s"%s>%s</meta>\n' % (property, refines, content))
-
-    def buildOPFMetadata(self, start_tag, has_obfuscated_fonts=False):
-        # convert from EXTH metadata format to target epub version metadata
-        # epub 3 will ignore <meta name="xxxx" content="yyyy" /> style metatags
-        #    but allows them to be present for backwards compatibility
-        #    instead the new format is
-        #    <meta property="xxxx" id="iiii" ... > property_value</meta>
-        #       and DCMES elements such as:
-        #    <dc:blah id="iiii">value</dc:blah>
-
-        metadata = self.metadata
-        k8resc = self.k8resc
-
-        META_TAGS = ['Drm Server Id', 'Drm Commerce Id', 'Drm Ebookbase Book Id', 'ASIN', 'ThumbOffset', 'Fake Cover',
-                                                'Creator Software', 'Creator Major Version', 'Creator Minor Version', 'Creator Build Number',
-                                                'Watermark', 'Clipping Limit', 'Publisher Limit', 'Text to Speech Disabled', 'CDE Type',
-                                                'Updated Title', 'Font Signature (hex)', 'Tamper Proof Keys (hex)',]
-
-        # def handleTag(data, metadata, key, tag, ids={}):
-        def handleTag(data, metadata, key, tag, attrib={}):
-            '''Format metadata values.
-
-            @param data: List of formatted metadata entries.
-            @param metadata: The metadata dictionary.
-            @param key: The key of the metadata value to handle.
-            @param tag: The opf tag corresponds to the metadata value.
-            ###@param ids: The ids in tags for refines property of epub3.
-            @param attrib: The extra attibute for refines or opf prefixs.
-           '''
-            if key in metadata:
-                for i, value in enumerate(metadata[key]):
-                    closingTag = tag.split(" ")[0]
-                    res = '<%s%s>%s</%s>\n' % (tag, attrib.get(i, ''), self.escapeit(value), closingTag)
-                    data.append(res)
-                del metadata[key]
-
-        # these are allowed but ignored by epub3
-        def handleMetaPairs(data, metadata, key, name):
-            if key in metadata:
-                for value in metadata[key]:
-                    res = '<meta name="%s" content="%s" />\n' % (name, self.escapeit(value, EXTRA_ENTITIES))
-                    data.append(res)
-                del metadata[key]
-
-        data = []
-        data.append(start_tag + '\n')
-        # Handle standard metadata
-        if 'Title' in metadata:
-            handleTag(data, metadata, 'Title', 'dc:title', self.title_attrib)
-        else:
-            data.append('<dc:title>Untitled</dc:title>\n')
-        handleTag(data, metadata, 'Language', 'dc:language')
-        if 'UniqueID' in metadata:
-            handleTag(data, metadata, 'UniqueID', 'dc:identifier id="uid"')
-        else:
-            # No unique ID in original, give it a generic one.
-            data.append('<dc:identifier id="uid">0</dc:identifier>\n')
-
-        if self.target_epubver == '3':
-            # epub version 3 minimal metadata requires a dcterms:modifed date tag
-            self.createMetaTag(data, 'dcterms:modified', datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))
-
-        if self.isK8 and has_obfuscated_fonts:
-            # Use the random generated urn:uuid so obuscated fonts work.
-            # It doesn't need to be _THE_ unique identifier to work as a key
-            # for obfuscated fonts in Sigil, ADE and calibre. Its just has
-            # to use the opf:scheme="UUID" and have the urn:uuid: prefix.
-            if self.target_epubver == '3':
-                data.append('<dc:identifier>urn:uuid:'+self.BookId+'</dc:identifier>\n')
-            else:
-                data.append('<dc:identifier opf:scheme="UUID">urn:uuid:'+self.BookId+'</dc:identifier>\n')
-
-        handleTag(data, metadata, 'Creator', 'dc:creator', self.creator_attrib)
-        handleTag(data, metadata, 'Contributor', 'dc:contributor')
-        handleTag(data, metadata, 'Publisher', 'dc:publisher', self.publisher_attrib)
-        handleTag(data, metadata, 'Source', 'dc:source')
-        handleTag(data, metadata, 'Type', 'dc:type')
-        if self.target_epubver == '3':
-            if 'ISBN' in metadata:
-                for i, value in enumerate(metadata['ISBN']):
-                    res = '<dc:identifier>urn:isbn:%s</dc:identifier>\n' % self.escapeit(value)
-                    data.append(res)
-        else:
-            handleTag(data, metadata, 'ISBN', 'dc:identifier opf:scheme="ISBN"')
-        if 'Subject' in metadata:
-            if 'SubjectCode' in metadata:
-                codeList = metadata['SubjectCode']
-                del metadata['SubjectCode']
-            else:
-                codeList = None
-            for i in range(len(metadata['Subject'])):
-                if codeList and i < len(codeList):
-                    data.append('<dc:subject BASICCode="'+codeList[i]+'">')
-                else:
-                    data.append('<dc:subject>')
-                data.append(self.escapeit(metadata['Subject'][i])+'</dc:subject>\n')
-            del metadata['Subject']
-        handleTag(data, metadata, 'Description', 'dc:description')
-        if self.target_epubver == '3':
-            if 'Published' in metadata:
-                for i, value in enumerate(metadata['Published']):
-                    res = '<dc:date>%s</dc:date>\n' % self.escapeit(value)
-                    data.append(res)
-        else:
-            handleTag(data, metadata, 'Published', 'dc:date opf:event="publication"')
-        handleTag(data, metadata, 'Rights', 'dc:rights')
-
-        if self.epubver == 'F':
-            if self.extra_attributes or k8resc is not None and k8resc.extra_attributes:
-                data.append('<!-- THE FOLLOWINGS ARE REQUIRED TO INSERT INTO <dc:xxx> MANUALLY\n')
-                if self.extra_attributes:
-                    data += self.extra_attributes
-                if k8resc is not None and k8resc.extra_attributes:
-                    data += k8resc.extra_attributes
-                data.append('-->\n')
-        else:
-            # Append refines metadata.
-            if self.exth_solved_refines_metadata:
-                data.append('<!-- Refines MetaData from EXTH -->\n')
-                data += self.exth_solved_refines_metadata
-            if self.exth_refines_metadata or k8resc is not None and k8resc.refines_metadata:
-                data.append('<!-- THE FOLLOWINGS ARE REQUIRED TO EDIT IDS MANUALLY\n')
-                if self.exth_refines_metadata:
-                    data += self.exth_refines_metadata
-                if k8resc is not None and k8resc.refines_metadata:
-                    data += k8resc.refines_metadata
-                data.append('-->\n')
-
-        # Append metadata in RESC section.
-        if k8resc is not None and k8resc.extra_metadata:
-            data.append('<!-- Extra MetaData from RESC\n')
-            data += k8resc.extra_metadata
-            data.append('-->\n')
-
-        if 'CoverOffset' in metadata:
-            imageNumber = int(metadata['CoverOffset'][0])
-            self.covername = self.rscnames[imageNumber]
-            if self.covername is None:
-                print("Error: Cover image %s was not recognized as a valid image" % imageNumber)
-            else:
-                # <meta name="cover"> is obsoleted in EPUB3, but kindlegen v2.9 requires it.
-                data.append('<meta name="cover" content="' + self.cover_id + '" />\n')
-                self.used[self.covername] = 'used'
-            del metadata['CoverOffset']
-
-        handleMetaPairs(data, metadata, 'Codec', 'output encoding')
-        # handle kindlegen specifc tags
-        handleTag(data, metadata, 'DictInLanguage', 'DictionaryInLanguage')
-        handleTag(data, metadata, 'DictOutLanguage', 'DictionaryOutLanguage')
-        handleMetaPairs(data, metadata, 'RegionMagnification', 'RegionMagnification')
-        handleMetaPairs(data, metadata, 'book-type', 'book-type')
-        handleMetaPairs(data, metadata, 'zero-gutter', 'zero-gutter')
-        handleMetaPairs(data, metadata, 'zero-margin', 'zero-margin')
-        handleMetaPairs(data, metadata, 'primary-writing-mode', 'primary-writing-mode')
-        handleMetaPairs(data, metadata, 'fixed-layout', 'fixed-layout')
-        handleMetaPairs(data, metadata, 'orientation-lock', 'orientation-lock')
-        handleMetaPairs(data, metadata, 'original-resolution', 'original-resolution')
-
-        # these are not allowed in epub2 or 3 so convert them to meta name content pairs
-        # perhaps these could better be mapped into the dcterms namespace instead
-        handleMetaPairs(data, metadata, 'Review', 'review')
-        handleMetaPairs(data, metadata, 'Imprint', 'imprint')
-        handleMetaPairs(data, metadata, 'Adult', 'adult')
-        handleMetaPairs(data, metadata, 'DictShortName', 'DictionaryVeryShortName')
-
-        # these are needed by kobo books upon submission but not sure if legal metadata in epub2 or epub3
-        if 'Price' in metadata and 'Currency' in metadata:
-            priceList = metadata['Price']
-            currencyList = metadata['Currency']
-            if len(priceList) != len(currencyList):
-                print("Error: found %s price entries, but %s currency entries.")
-            else:
-                for i in range(len(priceList)):
-                    data.append('<SRP Currency="'+currencyList[i]+'">'+priceList[i]+'</SRP>\n')
-            del metadata['Price']
-            del metadata['Currency']
-
-        if self.target_epubver == '3':
-            # Append metadata for EPUB3.
-            if self.exth_fixedlayout_metadata:
-                data.append('<!-- EPUB3 MedaData converted from EXTH -->\n')
-                data += self.exth_fixedlayout_metadata
-
-        # all that remains is extra EXTH info we will store inside a comment inside meta name/content pairs
-        # so it can not impact anything and will be automatically stripped out if found again in a RESC section
-        data.append(BEGIN_INFO_ONLY + '\n')
-        if 'ThumbOffset' in metadata:
-            imageNumber = int(metadata['ThumbOffset'][0])
-            # Some bad books give image indexes that are 'out of range'
-            try:
-                imageName = self.rscnames[imageNumber]
-            except:
-                print('Number given for Cover Thumbnail is out of range: %s' % imageNumber)
-                imageName = None
-            if imageName is None:
-                print("Error: Cover Thumbnail image %s was not recognized as a valid image" % imageNumber)
-            else:
-                data.append('<meta name="Cover ThumbNail Image" content="'+ 'Images/'+imageName+'" />\n')
-                # self.used[imageName] = 'used' # thumbnail image is always generated by Kindlegen, so don't include in manifest
-                self.used[imageName] = 'not used'
-            del metadata['ThumbOffset']
-        for metaName in META_TAGS:
-            if metaName in metadata:
-                for value in metadata[metaName]:
-                    data.append('<meta name="'+metaName+'" content="'+self.escapeit(value, EXTRA_ENTITIES)+'" />\n')
-                del metadata[metaName]
-        for key in list(metadata.keys()):
-            for value in metadata[key]:
-                data.append('<meta name="'+key+'" content="'+self.escapeit(value, EXTRA_ENTITIES)+'" />\n')
-            del metadata[key]
-        data.append(END_INFO_ONLY + '\n')
-        data.append('</metadata>\n')
-        return data
-
-    def buildOPFManifest(self, ncxname, navname=None):
-        # buildManifest for mobi7, azw4, epub2 and epub3.
-        k8resc = self.k8resc
-        cover_id = self.cover_id
-        hasK8RescSpine = k8resc is not None and k8resc.hasSpine()
-        self.ncxname = ncxname
-        self.navname = navname
-
-        data = []
-        data.append('<manifest>\n')
-        media_map = {
-                '.jpg'  : 'image/jpeg',
-                '.jpeg' : 'image/jpeg',
-                '.png'  : 'image/png',
-                '.gif'  : 'image/gif',
-                '.svg'  : 'image/svg+xml',
-                '.xhtml': 'application/xhtml+xml',
-                '.html' : 'text/html',                   # for mobi7
-                '.pdf'  : 'application/pdf',             # for azw4(print replica textbook)
-                '.ttf'  : 'application/x-font-ttf',
-                '.otf'  : 'application/x-font-opentype',  # replaced?
-                '.css'  : 'text/css',
-                # '.html' : 'text/x-oeb1-document',        # for mobi7
-                # '.otf'  : 'application/vnd.ms-opentype', # [OpenType] OpenType fonts
-                # '.woff' : 'application/font-woff',       # [WOFF] WOFF fonts
-                # '.smil' : 'application/smil+xml',        # [MediaOverlays301] EPUB Media Overlay documents
-                # '.pls'  : 'application/pls+xml',         # [PLS] Text-to-Speech (TTS) Pronunciation lexicons
-                # '.mp3'  : 'audio/mpeg',
-                # '.mp4'  : 'video/mp4',
-                # '.js'   : 'text/javascript',             # not supported in K8
-                }
-        spinerefs = []
-
-        idcnt = 0
-        for [key,dir,fname] in self.fileinfo:
-            name, ext = os.path.splitext(fname)
-            ext = ext.lower()
-            media = media_map.get(ext)
-            ref = "item%d" % idcnt
-            if hasK8RescSpine:
-                if key is not None and key in k8resc.spine_idrefs:
-                    ref = k8resc.spine_idrefs[key]
-            properties = ''
-            if dir != '':
-                fpath = dir + '/' + fname
-            else:
-                fpath = fname
-            data.append('<item id="{0:}" media-type="{1:}" href="{2:}" {3:}/>\n'.format(ref, media, fpath, properties))
-
-            if ext in ['.xhtml', '.html']:
-                spinerefs.append(ref)
-            idcnt += 1
-
-        for fname in self.rscnames:
-            if fname is not None:
-                if self.used.get(fname,'not used') == 'not used':
-                    continue
-                name, ext = os.path.splitext(fname)
-                ext = ext.lower()
-                media = media_map.get(ext,ext[1:])
-                properties = ''
-                if fname == self.covername:
-                    ref = cover_id
-                    if self.target_epubver == '3':
-                        properties = 'properties="cover-image"'
-                else:
-                    ref = "item%d" % idcnt
-                if ext == '.ttf' or ext == '.otf':
-                    if self.isK8:  # fonts are only used in Mobi 8
-                        fpath = 'Fonts/' + fname
-                        data.append('<item id="{0:}" media-type="{1:}" href="{2:}" {3:}/>\n'.format(ref, media, fpath, properties))
-                else:
-                    fpath = 'Images/' + fname
-                    data.append('<item id="{0:}" media-type="{1:}" href="{2:}" {3:}/>\n'.format(ref, media, fpath, properties))
-                idcnt += 1
-
-        if self.target_epubver == '3' and navname is not None:
-            data.append('<item id="nav" media-type="application/xhtml+xml" href="Text/' + navname + '" properties="nav"/>\n')
-        if self.has_ncx and ncxname is not None:
-            data.append('<item id="ncx" media-type="application/x-dtbncx+xml" href="' + ncxname +'" />\n')
-        if self.pagemap != '':
-            data.append('<item id="map" media-type="application/oebs-page-map+xml" href="page-map.xml" />\n')
-        data.append('</manifest>\n')
-        return [data, spinerefs]
-
-    def buildOPFSpine(self, spinerefs, isNCX):
-        # build spine
-        k8resc = self.k8resc
-        hasK8RescSpine = k8resc is not None and k8resc.hasSpine()
-        data = []
-        ppd = ''
-        if self.isK8 and self.page_progression_direction is not None:
-            ppd = ' page-progression-direction="{:s}"'.format(self.page_progression_direction)
-        ncx = ''
-        if isNCX:
-            ncx = ' toc="ncx"'
-        map=''
-        if self.pagemap != '':
-            map = ' page-map="map"'
-        if self.epubver == 'F':
-            if ppd:
-                ppd = '<!--' + ppd + ' -->'
-            spine_start_tag = '<spine{1:s}{2:s}>{0:s}\n'.format(ppd, map, ncx)
-        else:
-            spine_start_tag = '<spine{0:s}{1:s}{2:s}>\n'.format(ppd, map, ncx)
-        data.append(spine_start_tag)
-
-        if hasK8RescSpine:
-            for key in k8resc.spine_order:
-                idref = k8resc.spine_idrefs[key]
-                attribs = k8resc.spine_pageattributes[key]
-                tag = '<itemref idref="%s"' % idref
-                for aname, val in list(attribs.items()):
-                    if self.epubver == 'F' and aname == 'properties':
-                        continue
-                    if val is not None:
-                        tag += ' %s="%s"' % (aname, val)
-                tag += '/>'
-                if self.epubver == 'F' and 'properties' in attribs:
-                    val = attribs['properties']
-                    if val is not None:
-                        tag += '<!-- properties="%s" -->' % val
-                tag += '\n'
-                data.append(tag)
-        else:
-            start = 0
-            # special case the created coverpage if need be
-            [key, dir, fname] = self.fileinfo[0]
-            if key is not None and key == "coverpage":
-                entry = spinerefs[start]
-                data.append('<itemref idref="%s" linear="no"/>\n' % entry)
-                start += 1
-            for entry in spinerefs[start:]:
-                data.append('<itemref idref="' + entry + '"/>\n')
-        data.append('</spine>\n')
-        return data
-
-    def buildMobi7OPF(self):
-        # Build an OPF for mobi7 and azw4.
-        print("Building an opf for mobi7/azw4.")
-        data = []
-        data.append('<?xml version="1.0" encoding="utf-8"?>\n')
-        data.append('<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">\n')
-        metadata_tag = '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">'
-        opf_metadata = self.buildOPFMetadata(metadata_tag)
-        data += opf_metadata
-        if self.has_ncx:
-            # ncxname = self.files.getInputFileBasename() + '.ncx'
-            ncxname = 'toc.ncx'
-        else:
-            ncxname = None
-        [opf_manifest, spinerefs] = self.buildOPFManifest(ncxname)
-        data += opf_manifest
-        opf_spine = self.buildOPFSpine(spinerefs, self.has_ncx)
-        data += opf_spine
-        data.append('<tours>\n</tours>\n')
-        if not self.printReplica:
-            guide ='<guide>\n' + self.guidetext + '</guide>\n'
-            data.append(guide)
-        data.append('</package>\n')
-        return ''.join(data)
-
-    def buildEPUBOPF(self, has_obfuscated_fonts=False):
-        print("Building an opf for mobi8 using epub version: ", self.target_epubver)
-        if self.target_epubver == '2':
-            has_ncx = self.has_ncx
-            has_guide = True
-            ncxname = None
-            ncxname = TOC_NCX
-            navname = None
-            package = '<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">\n'
-            tours = '<tours>\n</tours>\n'
-            metadata_tag = '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">'
-        else:
-            has_ncx = EPUB3_WITH_NCX
-            has_guide = EPUB3_WITH_GUIDE
-            ncxname = None
-            if has_ncx:
-                ncxname = TOC_NCX
-            navname = NAVIGATION_DOCUMENT
-            package = '<package version="3.0" xmlns="http://www.idpf.org/2007/opf" prefix="rendition: http://www.idpf.org/vocab/rendition/#" unique-identifier="uid">\n'
-            tours = ''
-            metadata_tag = '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">'
-
-        data = []
-        data.append('<?xml version="1.0" encoding="utf-8"?>\n')
-        data.append(package)
-        opf_metadata = self.buildOPFMetadata(metadata_tag, has_obfuscated_fonts)
-        data += opf_metadata
-        [opf_manifest, spinerefs] = self.buildOPFManifest(ncxname, navname)
-        data += opf_manifest
-        opf_spine = self.buildOPFSpine(spinerefs, has_ncx)
-        data += opf_spine
-        data.append(tours)
-        if has_guide:
-            guide ='<guide>\n' + self.guidetext + '</guide>\n'
-            data.append(guide)
-        data.append('</package>\n')
-        return ''.join(data)
-
-    def writeOPF(self, has_obfuscated_fonts=False):
-        if self.isK8:
-            data = self.buildEPUBOPF(has_obfuscated_fonts)
-            outopf = os.path.join(self.files.k8oebps, EPUB_OPF)
-            with open(pathof(outopf), 'wb') as f:
-                f.write(data.encode('utf-8'))
-            return self.BookId
-        else:
-            data = self.buildMobi7OPF()
-            outopf = os.path.join(self.files.mobi7dir, 'content.opf')
-            with open(pathof(outopf), 'wb') as f:
-                f.write(data.encode('utf-8'))
-            return 0
-
-    def getBookId(self):
-        return self.BookId
-
-    def getNCXName(self):
-        return self.ncxname
-
-    def getNAVName(self):
-        return self.navname
-
-    def getEPUBVersion(self):
-        return self.target_epubver
-
-    def hasNCX(self):
-        return self.ncxname is not None and self.has_ncx
-
-    def hasNAV(self):
-        return self.navname is not None
-
-    def autodetectEPUBVersion(self):
-        # Determine EPUB version from metadata and RESC.
-        metadata = self.metadata
-        k8resc = self.k8resc
-        epubver = '2'
-        if 'true' == metadata.get('fixed-layout', [''])[0].lower():
-            epubver = '3'
-        elif metadata.get('orientation-lock', [''])[0].lower() in ['portrait', 'landscape']:
-            epubver = '3'
-        elif self.page_progression_direction == 'rtl':
-            epubver = '3'
-        elif EXTH_TITLE_FURIGANA in metadata:
-            epubver = '3'
-        elif EXTH_CREATOR_FURIGANA in metadata:
-            epubver = '3'
-        elif EXTH_PUBLISHER_FURIGANA in metadata:
-            epubver = '3'
-        elif k8resc is not None and k8resc.needEPUB3():
-            epubver = '3'
-        return epubver
-
-    def defineRefinesID(self):
-        # the following EXTH are set by KDP.
-        # 'Title_Furigana_(508)'
-        # 'Creator_Furigana_(517)',
-        # 'Publisher_Furigana_(522)'
-        # It is difficult to find correspondence between Title, Creator, Publisher
-        # and EXTH 508,512, 522 if they have more than two values since KDP seems not preserve the oders of EXTH 508,512 and 522.
-        # It is also difficult to find correspondence between them and tags which have refine attributes in RESC.
-        # So editing manually is required.
-        metadata = self.metadata
-
-        needRefinesId = False
-        if self.k8resc is not None:
-            needRefinesId = self.k8resc.hasRefines()
-        # Create id for rifine attributes
-        if (needRefinesId or EXTH_TITLE_FURIGANA in metadata) and 'Title' in metadata:
-            for i in range(len(metadata.get('Title'))):
-                self.title_id[i] = 'title%02d' % (i+1)
-
-        if (needRefinesId or EXTH_CREATOR_FURIGANA in metadata) and 'Creator' in metadata:
-            for i in range(len(metadata.get('Creator'))):
-                self.creator_id[i] = 'creator%02d' % (i+1)
-
-        if (needRefinesId or EXTH_PUBLISHER_FURIGANA in metadata) and 'Publisher' in metadata:
-            for i in range(len(metadata.get('Publisher'))):
-                self.publisher_id[i] = 'publisher%02d' % (i+1)
-
-    def processRefinesMetadata(self):
-        # create refines metadata defined in epub3 or convert refines property to opf: attribues for epub2.
-        metadata = self.metadata
-
-        refines_list = [
-                [EXTH_TITLE_FURIGANA, self.title_id, self.title_attrib, 'title00'],
-                [EXTH_CREATOR_FURIGANA, self.creator_id, self.creator_attrib, 'creator00'],
-                [EXTH_PUBLISHER_FURIGANA, self.publisher_id, self.publisher_attrib, 'publisher00']
-                ]
-
-        create_refines_metadata = False
-        for EXTH in lzip(*refines_list)[0]:
-            if EXTH in metadata:
-                create_refines_metadata = True
-                break
-        if create_refines_metadata:
-            for [EXTH, id, attrib, defaultid] in refines_list:
-                if self.target_epubver == '3':
-                    for i, value in list(id.items()):
-                        attrib[i] = ' id="%s"' % value
-
-                    if EXTH in metadata:
-                        if len(metadata[EXTH]) == 1 and len(id) == 1:
-                            self.createMetaTag(self.exth_solved_refines_metadata, 'file-as', metadata[EXTH][0], id[0])
-                        else:
-                            for i, value in enumerate(metadata[EXTH]):
-                                self.createMetaTag(self.exth_refines_metadata, 'file-as', value, id.get(i, defaultid))
-                else:
-                    if EXTH in metadata:
-                        if len(metadata[EXTH]) == 1 and len(id) == 1:
-                            attr = ' opf:file-as="%s"' % metadata[EXTH][0]
-                            attrib[0] = attr
-                        else:
-                            for i, value in enumerate(metadata[EXTH]):
-                                attr = ' id="#%s" opf:file-as="%s"\n' % (id.get(i, defaultid), value)
-                                self.extra_attributes.append(attr)
-
-    def createMetadataForFixedlayout(self):
-        # convert fixed layout to epub3 format if needed.
-        metadata = self.metadata
-
-        if 'fixed-layout' in metadata:
-            fixedlayout = metadata['fixed-layout'][0]
-            content = {'true' : 'pre-paginated'}.get(fixedlayout.lower(), 'reflowable')
-            self.createMetaTag(self.exth_fixedlayout_metadata, 'rendition:layout', content)
-
-        if 'orientation-lock' in metadata:
-            content = metadata['orientation-lock'][0].lower()
-            if content == 'portrait' or content == 'landscape':
-                self.createMetaTag(self.exth_fixedlayout_metadata, 'rendition:orientation', content)
-
-        # according to epub3 spec about correspondence with Amazon
-        # if 'original-resolution' is provided it needs to be converted to
-        # meta viewport property tag stored in the <head></head> of **each**
-        # xhtml page - so this tag would need to be handled by editing each part
-        # before reaching this routine
-        # we need to add support for this to the k8html routine
-        # if 'original-resolution' in metadata.keys():
-        #     resolution = metadata['original-resolution'][0].lower()
-        #     width, height = resolution.split('x')
-        #     if width.isdigit() and int(width) > 0 and height.isdigit() and int(height) > 0:
-        #         viewport = 'width=%s, height=%s' % (width, height)
-        #         self.createMetaTag(self.exth_fixedlayout_metadata, 'rendition:viewport', viewport)
diff --git a/epy_extras/KindleUnpack/mobi_pagemap.py b/epy_extras/KindleUnpack/mobi_pagemap.py
deleted file mode 100644
index 5228d4e..0000000
--- a/epy_extras/KindleUnpack/mobi_pagemap.py
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import PY2, unicode_str
-
-if PY2:
-    range = xrange
-
-import struct
-# note:  struct pack, unpack, unpack_from all require bytestring format
-# data all the way up to at least python 2.7.5, python 3 okay with bytestring
-
-import re
-# note: re requites the pattern to be the exact same type as the data to be searched in python3
-# but u"" is not allowed for the pattern itself only b""
-
-
-_TABLE = [('m', 1000), ('cm', 900), ('d', 500), ('cd', 400), ('c', 100), ('xc', 90), ('l', 50), ('xl', 40), ('x', 10), ('ix', 9), ('v', 5), ('iv', 4), ('i', 1)]
-
-def int_to_roman(i):
-    parts = []
-    num = i
-    for letter, value in _TABLE:
-        while value <= num:
-            num -= value
-            parts.append(letter)
-    return ''.join(parts)
-
-def roman_to_int(s):
-    result = 0
-    rnstr = s
-    for letter, value in _TABLE:
-        while rnstr.startswith(letter):
-            result += value
-            rnstr = rnstr[len(letter):]
-    return result
-
-_pattern = r'''\(([^\)]*)\)'''
-_tup_pattern = re.compile(_pattern,re.IGNORECASE)
-
-
-def _parseNames(numpages, data):
-    data = unicode_str(data)
-    pagenames = []
-    pageMap = ''
-    for i in range(numpages):
-        pagenames.append(None)
-    for m in re.finditer(_tup_pattern, data):
-        tup = m.group(1)
-        if pageMap != '':
-            pageMap += ','
-        pageMap += '(' + tup + ')'
-        spos, nametype, svalue = tup.split(",")
-        # print(spos, nametype, svalue)
-        if nametype == 'a' or nametype == 'r':
-            svalue = int(svalue)
-        spos = int(spos)
-        for i in range(spos - 1, numpages):
-            if nametype == 'r':
-                pname = int_to_roman(svalue)
-                svalue += 1
-            elif nametype == 'a':
-                pname = "%s" % svalue
-                svalue += 1
-            elif nametype == 'c':
-                sp = svalue.find('|')
-                if sp == -1:
-                    pname = svalue
-                else:
-                    pname = svalue[0:sp]
-                    svalue = svalue[sp+1:]
-            else:
-                print("Error: unknown page numbering type", nametype)
-            pagenames[i] = pname
-    return pagenames, pageMap
-
-
-class PageMapProcessor:
-
-    def __init__(self, mh, data):
-        self.mh = mh
-        self.data = data
-        self.pagenames = []
-        self.pageoffsets = []
-        self.pageMap = ''
-        self.pm_len = 0
-        self.pm_nn = 0
-        self.pn_bits = 0
-        self.pmoff = None
-        self.pmstr = ''
-        print("Extracting Page Map Information")
-        rev_len, = struct.unpack_from(b'>L', self.data, 0x10)
-        # skip over header, revision string length data, and revision string
-        ptr = 0x14 + rev_len
-        pm_1, self.pm_len, self.pm_nn, self.pm_bits  = struct.unpack_from(b'>4H', self.data, ptr)
-        # print(pm_1, self.pm_len, self.pm_nn, self.pm_bits)
-        self.pmstr = self.data[ptr+8:ptr+8+self.pm_len]
-        self.pmoff = self.data[ptr+8+self.pm_len:]
-        offsize = b">L"
-        offwidth = 4
-        if self.pm_bits == 16:
-            offsize = b">H"
-            offwidth = 2
-        ptr = 0
-        for i in range(self.pm_nn):
-            od, = struct.unpack_from(offsize, self.pmoff, ptr)
-            ptr += offwidth
-            self.pageoffsets.append(od)
-        self.pagenames, self.pageMap = _parseNames(self.pm_nn, self.pmstr)
-
-    def getPageMap(self):
-        return self.pageMap
-
-    def getNames(self):
-        return self.pagenames
-
-    def getOffsets(self):
-        return self.pageoffsets
-
-    # page-map.xml will be unicode but encoded to utf-8 immediately before being written to a file
-    def generateKF8PageMapXML(self, k8proc):
-        pagemapxml = '<page-map xmlns="http://www.idpf.org/2007/opf">\n'
-        for i in range(len(self.pagenames)):
-            pos = self.pageoffsets[i]
-            name = self.pagenames[i]
-            if name is not None and name != "":
-                [pn, dir, filename, skelpos, skelend, aidtext] = k8proc.getSkelInfo(pos)
-                idtext = unicode_str(k8proc.getPageIDTag(pos))
-                linktgt = unicode_str(filename)
-                if idtext != '':
-                    linktgt += '#' + idtext
-                pagemapxml += '<page name="%s" href="%s/%s" />\n' % (name, dir, linktgt)
-        pagemapxml += "</page-map>\n"
-        return pagemapxml
-
-    def generateAPNX(self, apnx_meta):
-        if apnx_meta['format'] == 'MOBI_8':
-            content_header = '{"contentGuid":"%(contentGuid)s","asin":"%(asin)s","cdeType":"%(cdeType)s","format":"%(format)s","fileRevisionId":"1","acr":"%(acr)s"}' %apnx_meta
-        else:
-            content_header = '{"contentGuid":"%(contentGuid)s","asin":"%(asin)s","cdeType":"%(cdeType)s","fileRevisionId":"1"}' % apnx_meta
-        content_header = content_header.encode('utf-8')
-        page_header = '{"asin":"%(asin)s","pageMap":"%(pageMap)s"}' % apnx_meta
-        page_header = page_header.encode('utf-8')
-        apnx = struct.pack(b'>H',1) + struct.pack(b'>H',1)
-        apnx += struct.pack(b'>I', 12 + len(content_header))
-        apnx += struct.pack(b'>I', len(content_header))
-        apnx += content_header
-        apnx += struct.pack(b'>H', 1)
-        apnx += struct.pack(b'>H', len(page_header))
-        apnx += struct.pack(b'>H', self.pm_nn)
-        apnx += struct.pack(b'>H', 32)
-        apnx += page_header
-        for page in self.pageoffsets:
-            apnx += struct.pack(b'>L', page)
-        return apnx
diff --git a/epy_extras/KindleUnpack/mobi_sectioner.py b/epy_extras/KindleUnpack/mobi_sectioner.py
deleted file mode 100644
index 81f62bb..0000000
--- a/epy_extras/KindleUnpack/mobi_sectioner.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import PY2, hexlify, bstr, bord, bchar
-
-import datetime
-
-if PY2:
-    range = xrange
-
-# note:  struct pack, unpack, unpack_from all require bytestring format
-# data all the way up to at least python 2.7.5, python 3 okay with bytestring
-import struct
-
-from .unipath import pathof
-
-DUMP = False
-""" Set to True to dump all possible information. """
-
-class unpackException(Exception):
-    pass
-
-
-def describe(data):
-    txtans = ''
-    hexans = hexlify(data)
-    for i in data:
-        if bord(i) < 32 or bord(i) > 127:
-            txtans += '?'
-        else:
-            txtans += bchar(i).decode('latin-1')
-    return '"' + txtans + '"' + ' 0x'+ hexans
-
-def datetimefrompalmtime(palmtime):
-    if palmtime > 0x7FFFFFFF:
-        pythondatetime = datetime.datetime(year=1904,month=1,day=1)+datetime.timedelta(seconds=palmtime)
-    else:
-        pythondatetime = datetime.datetime(year=1970,month=1,day=1)+datetime.timedelta(seconds=palmtime)
-    return pythondatetime
-
-
-class Sectionizer:
-
-    def __init__(self, filename):
-        self.data = b''
-        with open(pathof(filename), 'rb') as f:
-            self.data = f.read()
-        self.palmheader = self.data[:78]
-        self.palmname = self.data[:32]
-        self.ident = self.palmheader[0x3C:0x3C+8]
-        self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76)
-        self.filelength = len(self.data)
-        sectionsdata = struct.unpack_from(bstr('>%dL' % (self.num_sections*2)), self.data, 78) + (self.filelength, 0)
-        self.sectionoffsets = sectionsdata[::2]
-        self.sectionattributes = sectionsdata[1::2]
-        self.sectiondescriptions = ["" for x in range(self.num_sections+1)]
-        self.sectiondescriptions[-1] = "File Length Only"
-        return
-
-    def dumpsectionsinfo(self):
-        print("Section     Offset  Length      UID Attribs Description")
-        for i in range(self.num_sections):
-            print("%3d %3X  0x%07X 0x%05X % 8d % 7d %s" % (i,i, self.sectionoffsets[i], self.sectionoffsets[
-                  i+1] - self.sectionoffsets[i], self.sectionattributes[i]&0xFFFFFF, (self.sectionattributes[i]>>24)&0xFF, self.sectiondescriptions[i]))
-        print("%3d %3X  0x%07X                          %s" %
-              (self.num_sections,self.num_sections, self.sectionoffsets[self.num_sections], self.sectiondescriptions[self.num_sections]))
-
-    def setsectiondescription(self, section, description):
-        if section < len(self.sectiondescriptions):
-            self.sectiondescriptions[section] = description
-        else:
-            print("Section out of range: %d, description %s" % (section,description))
-
-    def dumppalmheader(self):
-        print("Palm Database Header")
-        print("Database name: " + repr(self.palmheader[:32]))
-        dbattributes, = struct.unpack_from(b'>H', self.palmheader, 32)
-        print("Bitfield attributes: 0x%0X" % dbattributes,)
-        if dbattributes != 0:
-            print(" (",)
-            if (dbattributes & 2):
-                print("Read-only; ",)
-            if (dbattributes & 4):
-                print("Dirty AppInfoArea; ",)
-            if (dbattributes & 8):
-                print("Needs to be backed up; ",)
-            if (dbattributes & 16):
-                print("OK to install over newer; ",)
-            if (dbattributes & 32):
-                print("Reset after installation; ",)
-            if (dbattributes & 64):
-                print("No copying by PalmPilot beaming; ",)
-            print(")")
-        else:
-            print("")
-        print("File version: %d" % struct.unpack_from(b'>H', self.palmheader, 34)[0])
-        dbcreation, = struct.unpack_from(b'>L', self.palmheader, 36)
-        print("Creation Date: " + str(datetimefrompalmtime(dbcreation))+ (" (0x%0X)" % dbcreation))
-        dbmodification, = struct.unpack_from(b'>L', self.palmheader, 40)
-        print("Modification Date: " + str(datetimefrompalmtime(dbmodification))+ (" (0x%0X)" % dbmodification))
-        dbbackup, = struct.unpack_from(b'>L', self.palmheader, 44)
-        if dbbackup != 0:
-            print("Backup Date: " + str(datetimefrompalmtime(dbbackup))+ (" (0x%0X)" % dbbackup))
-        print("Modification No.: %d" % struct.unpack_from(b'>L', self.palmheader, 48)[0])
-        print("App Info offset: 0x%0X" % struct.unpack_from(b'>L', self.palmheader, 52)[0])
-        print("Sort Info offset: 0x%0X" % struct.unpack_from(b'>L', self.palmheader, 56)[0])
-        print("Type/Creator: %s/%s" % (repr(self.palmheader[60:64]), repr(self.palmheader[64:68])))
-        print("Unique seed: 0x%0X" % struct.unpack_from(b'>L', self.palmheader, 68)[0])
-        expectedzero, = struct.unpack_from(b'>L', self.palmheader, 72)
-        if expectedzero != 0:
-            print("Should be zero but isn't: %d" % struct.unpack_from(b'>L', self.palmheader, 72)[0])
-        print("Number of sections: %d" % struct.unpack_from(b'>H', self.palmheader, 76)[0])
-        return
-
-    def loadSection(self, section):
-        before, after = self.sectionoffsets[section:section+2]
-        return self.data[before:after]
diff --git a/epy_extras/KindleUnpack/mobi_split.py b/epy_extras/KindleUnpack/mobi_split.py
deleted file mode 100755
index 3535029..0000000
--- a/epy_extras/KindleUnpack/mobi_split.py
+++ /dev/null
@@ -1,438 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-import struct
-# note:  struct pack, unpack, unpack_from all require bytestring format
-# data all the way up to at least python 2.7.5, python 3 okay with bytestring
-
-from .unipath import pathof
-
-
-# important  pdb header offsets
-unique_id_seed = 68
-number_of_pdb_records = 76
-
-# important palmdoc header offsets
-book_length = 4
-book_record_count = 8
-first_pdb_record = 78
-
-# important rec0 offsets
-length_of_book = 4
-mobi_header_base = 16
-mobi_header_length = 20
-mobi_type = 24
-mobi_version = 36
-first_non_text = 80
-title_offset = 84
-first_resc_record = 108
-first_content_index = 192
-last_content_index = 194
-kf8_fdst_index = 192  # for KF8 mobi headers
-fcis_index = 200
-flis_index = 208
-srcs_index = 224
-srcs_count = 228
-primary_index = 244
-datp_index = 256
-huffoff = 112
-hufftbloff = 120
-
-def getint(datain,ofs,sz=b'L'):
-    i, = struct.unpack_from(b'>'+sz,datain,ofs)
-    return i
-
-def writeint(datain,ofs,n,len=b'L'):
-    if len==b'L':
-        return datain[:ofs]+struct.pack(b'>L',n)+datain[ofs+4:]
-    else:
-        return datain[:ofs]+struct.pack(b'>H',n)+datain[ofs+2:]
-
-def getsecaddr(datain,secno):
-    nsec = getint(datain,number_of_pdb_records,b'H')
-    assert secno>=0 & secno<nsec,'secno %d out of range (nsec=%d)'%(secno,nsec)
-    secstart = getint(datain,first_pdb_record+secno*8)
-    if secno == nsec-1:
-        secend = len(datain)
-    else:
-        secend = getint(datain,first_pdb_record+(secno+1)*8)
-    return secstart,secend
-
-def readsection(datain,secno):
-    secstart, secend = getsecaddr(datain,secno)
-    return datain[secstart:secend]
-
-def writesection(datain,secno,secdata):  # overwrite, accounting for different length
-    # dataout = deletesectionrange(datain,secno, secno)
-    # return insertsection(dataout, secno, secdata)
-    datalst = []
-    nsec = getint(datain,number_of_pdb_records,b'H')
-    zerosecstart,zerosecend = getsecaddr(datain,0)
-    secstart,secend = getsecaddr(datain,secno)
-    dif = len(secdata) - (secend - secstart)
-    datalst.append(datain[:unique_id_seed])
-    datalst.append(struct.pack(b'>L',2*nsec+1))
-    datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
-    datalst.append(struct.pack(b'>H',nsec))
-    newstart = zerosecstart
-    for i in range(0,secno):
-        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
-        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
-    datalst.append(struct.pack(b'>L', secstart) + struct.pack(b'>L', (2*secno)))
-    for i in range(secno+1,nsec):
-        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
-        ofs = ofs + dif
-        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
-    lpad = newstart - (first_pdb_record + 8*nsec)
-    if lpad > 0:
-        datalst.append(b'\0' * lpad)
-    datalst.append(datain[zerosecstart:secstart])
-    datalst.append(secdata)
-    datalst.append(datain[secend:])
-    dataout = b''.join(datalst)
-    return dataout
-
-def nullsection(datain,secno):  # make it zero-length without deleting it
-    datalst = []
-    nsec = getint(datain,number_of_pdb_records,b'H')
-    secstart, secend = getsecaddr(datain,secno)
-    zerosecstart, zerosecend = getsecaddr(datain, 0)
-    dif =  secend-secstart
-    datalst.append(datain[:first_pdb_record])
-    for i in range(0,secno+1):
-        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
-        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
-    for i in range(secno+1, nsec):
-        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
-        ofs = ofs - dif
-        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
-    lpad = zerosecstart - (first_pdb_record + 8*nsec)
-    if lpad > 0:
-        datalst.append(b'\0' * lpad)
-    datalst.append(datain[zerosecstart: secstart])
-    datalst.append(datain[secend:])
-    dataout = b''.join(datalst)
-    return dataout
-
-def deletesectionrange(datain,firstsec,lastsec):  # delete a range of sections
-    datalst = []
-    firstsecstart,firstsecend = getsecaddr(datain,firstsec)
-    lastsecstart,lastsecend = getsecaddr(datain,lastsec)
-    zerosecstart, zerosecend = getsecaddr(datain, 0)
-    dif = lastsecend - firstsecstart + 8*(lastsec-firstsec+1)
-    nsec = getint(datain,number_of_pdb_records,b'H')
-    datalst.append(datain[:unique_id_seed])
-    datalst.append(struct.pack(b'>L',2*(nsec-(lastsec-firstsec+1))+1))
-    datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
-    datalst.append(struct.pack(b'>H',nsec-(lastsec-firstsec+1)))
-    newstart = zerosecstart - 8*(lastsec-firstsec+1)
-    for i in range(0,firstsec):
-        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
-        ofs = ofs-8*(lastsec-firstsec+1)
-        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
-    for i in range(lastsec+1,nsec):
-        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
-        ofs = ofs - dif
-        flgval = 2*(i-(lastsec-firstsec+1))
-        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
-    lpad = newstart - (first_pdb_record + 8*(nsec - (lastsec - firstsec + 1)))
-    if lpad > 0:
-        datalst.append(b'\0' * lpad)
-    datalst.append(datain[zerosecstart:firstsecstart])
-    datalst.append(datain[lastsecend:])
-    dataout = b''.join(datalst)
-    return dataout
-
-def insertsection(datain,secno,secdata):  # insert a new section
-    datalst = []
-    nsec = getint(datain,number_of_pdb_records,b'H')
-    # print("inserting secno" , secno,  "into" ,nsec, "sections")
-    secstart,secend = getsecaddr(datain,secno)
-    zerosecstart,zerosecend = getsecaddr(datain,0)
-    dif = len(secdata)
-    datalst.append(datain[:unique_id_seed])
-    datalst.append(struct.pack(b'>L',2*(nsec+1)+1))
-    datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
-    datalst.append(struct.pack(b'>H',nsec+1))
-    newstart = zerosecstart + 8
-    for i in range(0,secno):
-        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
-        ofs += 8
-        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
-    datalst.append(struct.pack(b'>L', secstart + 8) + struct.pack(b'>L', (2*secno)))
-    for i in range(secno,nsec):
-        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
-        ofs = ofs + dif + 8
-        flgval = 2*(i+1)
-        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
-    lpad = newstart - (first_pdb_record + 8*(nsec + 1))
-    if lpad > 0:
-        datalst.append(b'\0' * lpad)
-    datalst.append(datain[zerosecstart:secstart])
-    datalst.append(secdata)
-    datalst.append(datain[secstart:])
-    dataout = b''.join(datalst)
-    return dataout
-
-
-def insertsectionrange(sectionsource,firstsec,lastsec,sectiontarget,targetsec):  # insert a range of sections
-    # print("inserting secno" , firstsec,  "to", lastsec, "into" ,targetsec, "sections")
-    # dataout = sectiontarget
-    # for idx in range(lastsec,firstsec-1,-1):
-    #    dataout = insertsection(dataout,targetsec,readsection(sectionsource,idx))
-    # return dataout
-    datalst = []
-    nsec = getint(sectiontarget,number_of_pdb_records,b'H')
-    zerosecstart, zerosecend = getsecaddr(sectiontarget,0)
-    insstart, nul = getsecaddr(sectiontarget,targetsec)
-    nins = lastsec - firstsec + 1
-    srcstart, nul = getsecaddr(sectionsource,firstsec)
-    nul, srcend = getsecaddr(sectionsource,lastsec)
-    newstart = zerosecstart + 8*nins
-
-    datalst.append(sectiontarget[:unique_id_seed])
-    datalst.append(struct.pack(b'>L',2*(nsec+nins)+1))
-    datalst.append(sectiontarget[unique_id_seed+4:number_of_pdb_records])
-    datalst.append(struct.pack(b'>H',nsec+nins))
-    for i in range(0,targetsec):
-        ofs, flgval = struct.unpack_from(b'>2L',sectiontarget,first_pdb_record+i*8)
-        ofsnew = ofs + 8*nins
-        flgvalnew = flgval
-        datalst.append(struct.pack(b'>L',ofsnew) + struct.pack(b'>L', flgvalnew))
-        # print(ofsnew, flgvalnew, ofs, flgval)
-    srcstart0, nul = getsecaddr(sectionsource,firstsec)
-    for i in range(nins):
-        isrcstart, nul = getsecaddr(sectionsource,firstsec+i)
-        ofsnew = insstart + (isrcstart-srcstart0) + 8*nins
-        flgvalnew = 2*(targetsec+i)
-        datalst.append(struct.pack(b'>L',ofsnew) + struct.pack(b'>L', flgvalnew))
-        # print(ofsnew, flgvalnew)
-    dif = srcend - srcstart
-    for i in range(targetsec,nsec):
-        ofs, flgval = struct.unpack_from(b'>2L',sectiontarget,first_pdb_record+i*8)
-        ofsnew = ofs + dif + 8*nins
-        flgvalnew = 2*(i+nins)
-        datalst.append(struct.pack(b'>L',ofsnew) + struct.pack(b'>L',flgvalnew))
-        # print(ofsnew, flgvalnew, ofs, flgval)
-    lpad = newstart - (first_pdb_record + 8*(nsec + nins))
-    if lpad > 0:
-        datalst.append(b'\0' * lpad)
-    datalst.append(sectiontarget[zerosecstart:insstart])
-    datalst.append(sectionsource[srcstart:srcend])
-    datalst.append(sectiontarget[insstart:])
-    dataout = b''.join(datalst)
-    return dataout
-
-def get_exth_params(rec0):
-    ebase = mobi_header_base + getint(rec0,mobi_header_length)
-    elen = getint(rec0,ebase+4)
-    enum = getint(rec0,ebase+8)
-    return ebase,elen,enum
-
-def add_exth(rec0,exth_num,exth_bytes):
-    ebase,elen,enum = get_exth_params(rec0)
-    newrecsize = 8+len(exth_bytes)
-    newrec0 = rec0[0:ebase+4]+struct.pack(b'>L',elen+newrecsize)+struct.pack(b'>L',enum+1)+\
-              struct.pack(b'>L',exth_num)+struct.pack(b'>L',newrecsize)+exth_bytes+rec0[ebase+12:]
-    newrec0 = writeint(newrec0,title_offset,getint(newrec0,title_offset)+newrecsize)
-    return newrec0
-
-def read_exth(rec0,exth_num):
-    exth_values = []
-    ebase,elen,enum = get_exth_params(rec0)
-    ebase = ebase+12
-    while enum>0:
-        exth_id = getint(rec0,ebase)
-        if exth_id == exth_num:
-            # We might have multiple exths, so build a list.
-            exth_values.append(rec0[ebase+8:ebase+getint(rec0,ebase+4)])
-        enum = enum-1
-        ebase = ebase+getint(rec0,ebase+4)
-    return exth_values
-
-def write_exth(rec0,exth_num,exth_bytes):
-    ebase,elen,enum = get_exth_params(rec0)
-    ebase_idx = ebase+12
-    enum_idx = enum
-    while enum_idx>0:
-        exth_id = getint(rec0,ebase_idx)
-        if exth_id == exth_num:
-            dif = len(exth_bytes)+8-getint(rec0,ebase_idx+4)
-            newrec0 = rec0
-            if dif != 0:
-                newrec0 = writeint(newrec0,title_offset,getint(newrec0,title_offset)+dif)
-            return newrec0[:ebase+4]+struct.pack(b'>L',elen+len(exth_bytes)+8-getint(rec0,ebase_idx+4))+\
-                                              struct.pack(b'>L',enum)+rec0[ebase+12:ebase_idx+4]+\
-                                              struct.pack(b'>L',len(exth_bytes)+8)+exth_bytes+\
-                                              rec0[ebase_idx+getint(rec0,ebase_idx+4):]
-        enum_idx = enum_idx-1
-        ebase_idx = ebase_idx+getint(rec0,ebase_idx+4)
-    return rec0
-
-def del_exth(rec0,exth_num):
-    ebase,elen,enum = get_exth_params(rec0)
-    ebase_idx = ebase+12
-    enum_idx = 0
-    while enum_idx < enum:
-        exth_id = getint(rec0,ebase_idx)
-        exth_size = getint(rec0,ebase_idx+4)
-        if exth_id == exth_num:
-            newrec0 = rec0
-            newrec0 = writeint(newrec0,title_offset,getint(newrec0,title_offset)-exth_size)
-            newrec0 = newrec0[:ebase_idx]+newrec0[ebase_idx+exth_size:]
-            newrec0 = newrec0[0:ebase+4]+struct.pack(b'>L',elen-exth_size)+struct.pack(b'>L',enum-1)+newrec0[ebase+12:]
-            return newrec0
-        enum_idx += 1
-        ebase_idx = ebase_idx+exth_size
-    return rec0
-
-
-class mobi_split:
-
-    def __init__(self, infile):
-        datain = b''
-        with open(pathof(infile), 'rb') as f:
-            datain = f.read()
-        datain_rec0 = readsection(datain,0)
-        ver = getint(datain_rec0,mobi_version)
-        self.combo = (ver!=8)
-        if not self.combo:
-            return
-        exth121 = read_exth(datain_rec0,121)
-        if len(exth121) == 0:
-            self.combo = False
-            return
-        else:
-            # only pay attention to first exth121
-            # (there should only be one)
-            datain_kf8, = struct.unpack_from(b'>L',exth121[0],0)
-            if datain_kf8 == 0xffffffff:
-                self.combo = False
-                return
-        datain_kfrec0 =readsection(datain,datain_kf8)
-
-        # create the standalone mobi7
-        num_sec = getint(datain,number_of_pdb_records,b'H')
-        # remove BOUNDARY up to but not including ELF record
-        self.result_file7 = deletesectionrange(datain,datain_kf8-1,num_sec-2)
-        # check if there are SRCS records and delete them
-        srcs = getint(datain_rec0,srcs_index)
-        num_srcs = getint(datain_rec0,srcs_count)
-        if srcs != 0xffffffff and num_srcs > 0:
-            self.result_file7 = deletesectionrange(self.result_file7,srcs,srcs+num_srcs-1)
-            datain_rec0 = writeint(datain_rec0,srcs_index,0xffffffff)
-            datain_rec0 = writeint(datain_rec0,srcs_count,0)
-        # reset the EXTH 121 KF8 Boundary meta data to 0xffffffff
-        datain_rec0 = write_exth(datain_rec0,121, struct.pack(b'>L', 0xffffffff))
-        # datain_rec0 = del_exth(datain_rec0,121)
-        # datain_rec0 = del_exth(datain_rec0,534)
-        # don't remove the EXTH 125 KF8 Count of Resources, seems to be present in mobi6 files as well
-        # set the EXTH 129 KF8 Masthead / Cover Image string to the null string
-        datain_rec0 = write_exth(datain_rec0,129, b'')
-        # don't remove the EXTH 131 KF8 Unidentified Count, seems to be present in mobi6 files as well
-
-        # need to reset flags stored in 0x80-0x83
-        # old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050
-        # Bit Flags
-        # 0x1000 = Bit 12 indicates if embedded fonts are used or not
-        # 0x0800 = means this Header points to *shared* images/resource/fonts ??
-        # 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8?
-        # 0x0040 = exth exists
-        # 0x0010 = Not sure but this is always set so far
-        fval, = struct.unpack_from(b'>L',datain_rec0, 0x80)
-        # need to remove flag 0x0800 for KindlePreviewer 2.8 and unset Bit 12 for embedded fonts
-        fval = fval & 0x07FF
-        datain_rec0 = datain_rec0[:0x80] + struct.pack(b'>L',fval) + datain_rec0[0x84:]
-
-        self.result_file7 = writesection(self.result_file7,0,datain_rec0)
-
-        # no need to replace kf8 style fcis with mobi 7 one
-        # fcis_secnum, = struct.unpack_from(b'>L',datain_rec0, 0xc8)
-        # if fcis_secnum != 0xffffffff:
-        #     fcis_info = readsection(datain, fcis_secnum)
-        #     text_len,  = struct.unpack_from(b'>L', fcis_info, 0x14)
-        #     new_fcis = 'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
-        #     new_fcis += struct.pack(b'>L',text_len)
-        #     new_fcis += '\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
-        #     self.result_file7 = writesection(self.result_file7, fcis_secnum, new_fcis)
-
-        firstimage = getint(datain_rec0,first_resc_record)
-        lastimage = getint(datain_rec0,last_content_index,b'H')
-        # print("Old First Image, last Image", firstimage,lastimage)
-        if lastimage == 0xffff:
-            # find the lowest of the next sections and copy up to that.
-            ofs_list = [(fcis_index,b'L'),(flis_index,b'L'),(datp_index,b'L'),(hufftbloff, b'L')]
-            for ofs,sz in ofs_list:
-                n = getint(datain_rec0,ofs,sz)
-                # print("n",n)
-                if n > 0 and n < lastimage:
-                    lastimage = n-1
-        print("First Image, last Image", firstimage,lastimage)
-
-        # Try to null out FONT and RES, but leave the (empty) PDB record so image refs remain valid
-        for i in range(firstimage,lastimage):
-            imgsec = readsection(self.result_file7,i)
-            if imgsec[0:4] in [b'RESC',b'FONT']:
-                self.result_file7 = nullsection(self.result_file7,i)
-
-        # mobi7 finished
-
-        # create standalone mobi8
-        self.result_file8 = deletesectionrange(datain,0,datain_kf8-1)
-        target = getint(datain_kfrec0,first_resc_record)
-        self.result_file8 = insertsectionrange(datain,firstimage,lastimage,self.result_file8,target)
-        datain_kfrec0 =readsection(self.result_file8,0)
-
-        # Only keep the correct EXTH 116 StartOffset, KG 2.5 carries over the one from the mobi7 part, which then points at garbage in the mobi8 part, and confuses FW 3.4
-        kf8starts = read_exth(datain_kfrec0,116)
-        # If we have multiple StartOffset, keep only the last one
-        kf8start_count = len(kf8starts)
-        while kf8start_count > 1:
-            kf8start_count -= 1
-            datain_kfrec0 = del_exth(datain_kfrec0,116)
-
-        # update the EXTH 125 KF8 Count of Images/Fonts/Resources
-        datain_kfrec0 = write_exth(datain_kfrec0,125,struct.pack(b'>L',lastimage-firstimage+1))
-
-        # need to reset flags stored in 0x80-0x83
-        # old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050
-        # standalone mobi8 with exth: 0x0050
-        # Bit Flags
-        # 0x1000 = Bit 12 indicates if embedded fonts are used or not
-        # 0x0800 = means this Header points to *shared* images/resource/fonts ??
-        # 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8?
-        # 0x0040 = exth exists
-        # 0x0010 = Not sure but this is always set so far
-        fval, = struct.unpack_from('>L',datain_kfrec0, 0x80)
-        fval = fval & 0x1FFF
-        fval |= 0x0800
-        datain_kfrec0 = datain_kfrec0[:0x80] + struct.pack(b'>L',fval) + datain_kfrec0[0x84:]
-
-        # properly update other index pointers that have been shifted by the insertion of images
-        ofs_list = [(kf8_fdst_index,b'L'),(fcis_index,b'L'),(flis_index,b'L'),(datp_index,b'L'),(hufftbloff, b'L')]
-        for ofs,sz in ofs_list:
-            n = getint(datain_kfrec0,ofs,sz)
-            if n != 0xffffffff:
-                datain_kfrec0 = writeint(datain_kfrec0,ofs,n+lastimage-firstimage+1,sz)
-        self.result_file8 = writesection(self.result_file8,0,datain_kfrec0)
-
-        # no need to replace kf8 style fcis with mobi 7 one
-        # fcis_secnum, = struct.unpack_from(b'>L',datain_kfrec0, 0xc8)
-        # if fcis_secnum != 0xffffffff:
-        #     fcis_info = readsection(self.result_file8, fcis_secnum)
-        #     text_len,  = struct.unpack_from(b'>L', fcis_info, 0x14)
-        #     new_fcis = 'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
-        #     new_fcis += struct.pack(b'>L',text_len)
-        #     new_fcis += '\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
-        #     self.result_file8 = writesection(self.result_file8, fcis_secnum, new_fcis)
-
-        # mobi8 finished
-
-    def getResult8(self):
-        return self.result_file8
-
-    def getResult7(self):
-        return self.result_file7
diff --git a/epy_extras/KindleUnpack/mobi_uncompress.py b/epy_extras/KindleUnpack/mobi_uncompress.py
deleted file mode 100644
index c5fad85..0000000
--- a/epy_extras/KindleUnpack/mobi_uncompress.py
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import PY2, bchr, lmap, bstr
-
-if PY2:
-    range = xrange
-
-import struct
-# note:  struct pack, unpack, unpack_from all require bytestring format
-# data all the way up to at least python 2.7.5, python 3 okay with bytestring
-
-
-class unpackException(Exception):
-    pass
-
-class UncompressedReader:
-
-    def unpack(self, data):
-        return data
-
-class PalmdocReader:
-
-    def unpack(self, i):
-        o, p = b'', 0
-        while p < len(i):
-            # for python 3 must use slice since i[p] returns int while slice returns character
-            c = ord(i[p:p+1])
-            p += 1
-            if (c >= 1 and c <= 8):
-                o += i[p:p+c]
-                p += c
-            elif (c < 128):
-                o += bchr(c)
-            elif (c >= 192):
-                o += b' ' + bchr(c ^ 128)
-            else:
-                if p < len(i):
-                    c = (c << 8) | ord(i[p:p+1])
-                    p += 1
-                    m = (c >> 3) & 0x07ff
-                    n = (c & 7) + 3
-                    if (m > n):
-                        o += o[-m:n-m]
-                    else:
-                        for _ in range(n):
-                            # because of completely ass-backwards decision by python mainters for python 3
-                            # we must use slice for bytes as i[p] returns int while slice returns character
-                            if m == 1:
-                                o += o[-m:]
-                            else:
-                                o += o[-m:-m+1]
-        return o
-
-class HuffcdicReader:
-    q = struct.Struct(b'>Q').unpack_from
-
-    def loadHuff(self, huff):
-        if huff[0:8] != b'HUFF\x00\x00\x00\x18':
-            raise unpackException('invalid huff header')
-        off1, off2 = struct.unpack_from(b'>LL', huff, 8)
-
-        def dict1_unpack(v):
-            codelen, term, maxcode = v&0x1f, v&0x80, v>>8
-            assert codelen != 0
-            if codelen <= 8:
-                assert term
-            maxcode = ((maxcode + 1) << (32 - codelen)) - 1
-            return (codelen, term, maxcode)
-        self.dict1 = lmap(dict1_unpack, struct.unpack_from(b'>256L', huff, off1))
-
-        dict2 = struct.unpack_from(b'>64L', huff, off2)
-        self.mincode, self.maxcode = (), ()
-        for codelen, mincode in enumerate((0,) + dict2[0::2]):
-            self.mincode += (mincode << (32 - codelen), )
-        for codelen, maxcode in enumerate((0,) + dict2[1::2]):
-            self.maxcode += (((maxcode + 1) << (32 - codelen)) - 1, )
-
-        self.dictionary = []
-
-    def loadCdic(self, cdic):
-        if cdic[0:8] != b'CDIC\x00\x00\x00\x10':
-            raise unpackException('invalid cdic header')
-        phrases, bits = struct.unpack_from(b'>LL', cdic, 8)
-        n = min(1<<bits, phrases-len(self.dictionary))
-        h = struct.Struct(b'>H').unpack_from
-        def getslice(off):
-            blen, = h(cdic, 16+off)
-            slice = cdic[18+off:18+off+(blen&0x7fff)]
-            return (slice, blen&0x8000)
-        self.dictionary += lmap(getslice, struct.unpack_from(bstr('>%dH' % n), cdic, 16))
-
-    def unpack(self, data):
-        q = HuffcdicReader.q
-
-        bitsleft = len(data) * 8
-        data += b"\x00\x00\x00\x00\x00\x00\x00\x00"
-        pos = 0
-        x, = q(data, pos)
-        n = 32
-
-        s = b''
-        while True:
-            if n <= 0:
-                pos += 4
-                x, = q(data, pos)
-                n += 32
-            code = (x >> n) & ((1 << 32) - 1)
-
-            codelen, term, maxcode = self.dict1[code >> 24]
-            if not term:
-                while code < self.mincode[codelen]:
-                    codelen += 1
-                maxcode = self.maxcode[codelen]
-
-            n -= codelen
-            bitsleft -= codelen
-            if bitsleft < 0:
-                break
-
-            r = (maxcode - code) >> (32 - codelen)
-            slice, flag = self.dictionary[r]
-            if not flag:
-                self.dictionary[r] = None
-                slice = self.unpack(slice)
-                self.dictionary[r] = (slice, 1)
-            s += slice
-        return s
diff --git a/epy_extras/KindleUnpack/mobi_utils.py b/epy_extras/KindleUnpack/mobi_utils.py
deleted file mode 100644
index 6791e0d..0000000
--- a/epy_extras/KindleUnpack/mobi_utils.py
+++ /dev/null
@@ -1,191 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-# flake8: noqa
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import PY2, text_type, bchr, bord
-
-import binascii
-
-if PY2:
-    range = xrange
-
-from itertools import cycle
-
-def getLanguage(langID, sublangID):
-    mobilangdict = {
-            54 : {0 : 'af'},  # Afrikaans
-            28 : {0 : 'sq'},  # Albanian
-             1 : {0 : 'ar' , 5 : 'ar-dz' , 15 : 'ar-bh' , 3 : 'ar-eg' , 2 : 'ar-iq',  11 : 'ar-jo' , 13 : 'ar-kw' , 12 : 'ar-lb' , 4: 'ar-ly',
-                  6 : 'ar-ma' , 8 : 'ar-om' , 16 : 'ar-qa' , 1 : 'ar-sa' , 10 : 'ar-sy' , 7 : 'ar-tn' , 14 : 'ar-ae' , 9 : 'ar-ye'},
-             # Arabic,  Arabic (Algeria),  Arabic (Bahrain),  Arabic (Egypt),  Arabic
-             # (Iraq), Arabic (Jordan),  Arabic (Kuwait),  Arabic (Lebanon),  Arabic
-             # (Libya), Arabic (Morocco),  Arabic (Oman),  Arabic (Qatar),  Arabic
-             # (Saudi Arabia),  Arabic (Syria),  Arabic (Tunisia),  Arabic (United Arab
-             # Emirates),  Arabic (Yemen)
-            43 : {0 : 'hy'},  # Armenian
-            77 : {0 : 'as'},  # Assamese
-            44 : {0 : 'az'},  # "Azeri (IANA: Azerbaijani)
-            45 : {0 : 'eu'},  # Basque
-            35 : {0 : 'be'},  # Belarusian
-            69 : {0 : 'bn'},  # Bengali
-             2 : {0 : 'bg'},  # Bulgarian
-             3 : {0 : 'ca'},  # Catalan
-             4 : {0 : 'zh' , 3 : 'zh-hk' , 2 : 'zh-cn' , 4 : 'zh-sg' , 1 : 'zh-tw'},
-            # Chinese,  Chinese (Hong Kong),  Chinese (PRC),  Chinese (Singapore),  Chinese (Taiwan)
-            26 : {0 : 'hr', 3 : 'sr'},  # Croatian, Serbian
-             5 : {0 : 'cs'},  # Czech
-             6 : {0 : 'da'},  # Danish
-            19 : {0: 'nl', 1 : 'nl' , 2 : 'nl-be'},  # Dutch / Flemish,  Dutch (Belgium)
-             9 : {0: 'en', 1 : 'en' , 3 : 'en-au' , 40 : 'en-bz' , 4 : 'en-ca' , 6 : 'en-ie' , 8 : 'en-jm' , 5 : 'en-nz' , 13 : 'en-ph' ,
-                  7 : 'en-za' , 11 : 'en-tt' , 2 : 'en-gb', 1 : 'en-us' , 12 : 'en-zw'},
-             # English,  English (Australia),  English (Belize),  English (Canada),
-             # English (Ireland),  English (Jamaica),  English (New Zealand),  English
-             # (Philippines),  English (South Africa),  English (Trinidad),  English
-             # (United Kingdom),  English (United States),  English (Zimbabwe)
-            37 : {0 : 'et'},  # Estonian
-            56 : {0 : 'fo'},  # Faroese
-            41 : {0 : 'fa'},  # Farsi / Persian
-            11 : {0 : 'fi'},  # Finnish
-            12 : {0 : 'fr', 1 : 'fr' , 2 : 'fr-be' , 3 : 'fr-ca' , 5 : 'fr-lu' , 6 : 'fr-mc' , 4 : 'fr-ch'},
-            # French,  French (Belgium),  French (Canada),  French (Luxembourg),  French (Monaco),  French (Switzerland)
-            55 : {0 : 'ka'},  # Georgian
-             7 : {0 : 'de', 1 : 'de' , 3 : 'de-at' , 5 : 'de-li' , 4 : 'de-lu' , 2 : 'de-ch'},
-             # German,  German (Austria),  German (Liechtenstein),  German (Luxembourg),  German (Switzerland)
-             8 : {0 : 'el'},  # Greek, Modern (1453-)
-            71 : {0 : 'gu'},  # Gujarati
-            13 : {0 : 'he'},  # Hebrew (also code 'iw'?)
-            57 : {0 : 'hi'},  # Hindi
-            14 : {0 : 'hu'},  # Hungarian
-            15 : {0 : 'is'},  # Icelandic
-            33 : {0 : 'id'},  # Indonesian
-            16 : {0 : 'it', 1 : 'it' , 2 : 'it-ch'},  # Italian,  Italian (Switzerland)
-            17 : {0 : 'ja'},  # Japanese
-            75 : {0 : 'kn'},  # Kannada
-            63 : {0 : 'kk'},  # Kazakh
-            87 : {0 : 'x-kok'},  # Konkani (real language code is 'kok'?)
-            18 : {0 : 'ko'},  # Korean
-            38 : {0 : 'lv'},  # Latvian
-            39 : {0 : 'lt'},  # Lithuanian
-            47 : {0 : 'mk'},  # Macedonian
-            62 : {0 : 'ms'},  # Malay
-            76 : {0 : 'ml'},  # Malayalam
-            58 : {0 : 'mt'},  # Maltese
-            78 : {0 : 'mr'},  # Marathi
-            97 : {0 : 'ne'},  # Nepali
-            20 : {0 : 'no'},  # Norwegian
-            72 : {0 : 'or'},  # Oriya
-            21 : {0 : 'pl'},  # Polish
-            22 : {0 : 'pt', 2 : 'pt' , 1 : 'pt-br'},  # Portuguese,  Portuguese (Brazil)
-            70 : {0 : 'pa'},  # Punjabi
-            23 : {0 : 'rm'},  # "Rhaeto-Romanic" (IANA: Romansh)
-            24 : {0 : 'ro'},  # Romanian
-            25 : {0 : 'ru'},  # Russian
-            59 : {0 : 'sz'},  # "Sami (Lappish)" (not an IANA language code)
-            # IANA code for "Northern Sami" is 'se'
-            # 'SZ' is the IANA region code for Swaziland
-            79 : {0 : 'sa'},  # Sanskrit
-            27 : {0 : 'sk'},  # Slovak
-            36 : {0 : 'sl'},  # Slovenian
-            46 : {0 : 'sb'},  # "Sorbian" (not an IANA language code)
-            # 'SB' is IANA region code for 'Solomon Islands'
-            # Lower Sorbian = 'dsb'
-            # Upper Sorbian = 'hsb'
-            # Sorbian Languages = 'wen'
-            10 : {0 : 'es' , 4 : 'es' , 44 : 'es-ar' , 64 : 'es-bo' , 52 : 'es-cl' , 36 : 'es-co' , 20 : 'es-cr' , 28 : 'es-do' ,
-                  48 : 'es-ec' , 68 : 'es-sv' , 16 : 'es-gt' , 72 : 'es-hn' , 8 : 'es-mx' , 76 : 'es-ni' , 24 : 'es-pa' ,
-                  60 : 'es-py' , 40 : 'es-pe' , 80 : 'es-pr' , 56 : 'es-uy' , 32 : 'es-ve'},
-            # Spanish,  Spanish (Mobipocket bug?),  Spanish (Argentina),  Spanish
-            # (Bolivia),  Spanish (Chile),  Spanish (Colombia),  Spanish (Costa Rica),
-            # Spanish (Dominican Republic),  Spanish (Ecuador),  Spanish (El
-            # Salvador),  Spanish (Guatemala),  Spanish (Honduras),  Spanish (Mexico),
-            # Spanish (Nicaragua),  Spanish (Panama),  Spanish (Paraguay),  Spanish
-            # (Peru),  Spanish (Puerto Rico),  Spanish (Uruguay),  Spanish (Venezuela)
-            48 : {0 : 'sx'},  # "Sutu" (not an IANA language code)
-            # "Sutu" is another name for "Southern Sotho"?
-            # IANA code for "Southern Sotho" is 'st'
-            65 : {0 : 'sw'},  # Swahili
-            29 : {0 : 'sv' , 1 : 'sv' , 8 : 'sv-fi'},  # Swedish,  Swedish (Finland)
-            73 : {0 : 'ta'},  # Tamil
-            68 : {0 : 'tt'},  # Tatar
-            74 : {0 : 'te'},  # Telugu
-            30 : {0 : 'th'},  # Thai
-            49 : {0 : 'ts'},  # Tsonga
-            50 : {0 : 'tn'},  # Tswana
-            31 : {0 : 'tr'},  # Turkish
-            34 : {0 : 'uk'},  # Ukrainian
-            32 : {0 : 'ur'},  # Urdu
-            67 : {0 : 'uz', 2 : 'uz'},  # Uzbek
-            42 : {0 : 'vi'},  # Vietnamese
-            52 : {0 : 'xh'},  # Xhosa
-            53 : {0 : 'zu'},  # Zulu
-    }
-    lang = "en"
-    if langID in mobilangdict:
-        subdict = mobilangdict[langID]
-        lang = subdict[0]
-        if sublangID in subdict:
-            lang = subdict[sublangID]
-    return lang
-
-
-def toHex(byteList):
-    return binascii.hexlify(byteList)
-
-# returns base32 bytestring
-def toBase32(value, npad=4):
-    digits = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
-    num_string=b''
-    current = value
-    while current != 0:
-        next, remainder = divmod(current, 32)
-        rem_string = digits[remainder:remainder+1]
-        num_string = rem_string + num_string
-        current=next
-    if num_string == b'':
-        num_string = b'0'
-    pad = npad - len(num_string)
-    if pad > 0:
-        num_string = b'0' * pad + num_string
-    return num_string
-
-
-# converts base32 string to value
-def fromBase32(str_num):
-    if isinstance(str_num, text_type):
-        str_num = str_num.encode('latin-1')
-    scalelst = [1,32,1024,32768,1048576,33554432,1073741824,34359738368]
-    value = 0
-    j = 0
-    n = len(str_num)
-    scale = 0
-    for i in range(n):
-        c = str_num[n-i-1:n-i]
-        if c in b'0123456789':
-            v = ord(c) - ord(b'0')
-        else:
-            v = ord(c) - ord(b'A') + 10
-        if j < len(scalelst):
-            scale = scalelst[j]
-        else:
-            scale = scale * 32
-        j += 1
-        if v != 0:
-            value = value + (v * scale)
-    return value
-
-
-# note: if decode a bytestring using 'latin-1' (or any other 0-255 encoding)
-# in place of ascii you will get a byte to half-word or integer
-# one to one mapping of values from 0 - 255
-
-def mangle_fonts(encryption_key, data):
-    if isinstance(encryption_key, text_type):
-        encryption_key = encryption_key.encode('latin-1')
-    crypt = data[:1024]
-    key = cycle(iter(map(bord, encryption_key)))
-    # encrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
-    encrypt = b''.join([bchr(bord(x)^next(key)) for x in crypt])
-    return encrypt + data[1024:]
diff --git a/epy_extras/KindleUnpack/mobiml2xhtml.py b/epy_extras/KindleUnpack/mobiml2xhtml.py
deleted file mode 100755
index 94fc671..0000000
--- a/epy_extras/KindleUnpack/mobiml2xhtml.py
+++ /dev/null
@@ -1,527 +0,0 @@
-#! /usr/bin/python
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-
-# this program works in concert with the output from KindleUnpack
-
-'''
-Convert from Mobi ML to XHTML
-'''
-
-from __future__ import division, absolute_import, print_function
-
-import os
-import sys
-import re
-
-SPECIAL_HANDLING_TAGS = {
-    '?xml'     : ('xmlheader', -1),
-    '!--'      : ('comment', -3),
-    '!DOCTYPE' : ('doctype', -1),
-}
-
-SPECIAL_HANDLING_TYPES = ['xmlheader', 'doctype', 'comment']
-
-SELF_CLOSING_TAGS = ['br' , 'hr', 'input', 'img', 'image', 'meta', 'spacer', 'link', 'frame', 'base', 'col', 'reference']
-
-class MobiMLConverter(object):
-
-    PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
-    IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
-
-    def __init__(self, filename):
-        self.base_css_rules =  'blockquote { margin: 0em 0em 0em 1.25em }\n'
-        self.base_css_rules += 'p { margin: 0em }\n'
-        self.base_css_rules += '.bold { font-weight: bold }\n'
-        self.base_css_rules += '.italic { font-style: italic }\n'
-        self.base_css_rules += '.mbp_pagebreak { page-break-after: always; margin: 0; display: block }\n'
-        self.tag_css_rules = {}
-        self.tag_css_rule_cnt = 0
-        self.path = []
-        self.filename = filename
-        self.wipml = open(self.filename, 'r').read()
-        self.pos = 0
-        self.opfname = self.filename.rsplit('.',1)[0] + '.opf'
-        self.opos = 0
-        self.meta = ''
-        self.cssname = os.path.join(os.path.dirname(self.filename),'styles.css')
-        self.current_font_size = 3
-        self.font_history = []
-
-    def cleanup_html(self):
-        self.wipml = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.wipml)
-        self.wipml = self.wipml.replace('\r\n', '\n')
-        self.wipml = self.wipml.replace('> <', '>\n<')
-        self.wipml = self.wipml.replace('<mbp: ', '<mbp:')
-        # self.wipml = re.sub(r'<?xml[^>]*>', '', self.wipml)
-        self.wipml = self.wipml.replace('<br></br>','<br/>')
-
-    def replace_page_breaks(self):
-        self.wipml = self.PAGE_BREAK_PAT.sub(
-            '<div class="mbp_pagebreak" />',
-            self.wipml)
-
-    # parse leading text of ml and tag
-    def parseml(self):
-        p = self.pos
-        if p >= len(self.wipml):
-            return None
-        if self.wipml[p] != '<':
-            res = self.wipml.find('<',p)
-            if res == -1 :
-                res = len(self.wipml)
-            self.pos = res
-            return self.wipml[p:res], None
-        # handle comment as a special case to deal with multi-line comments
-        if self.wipml[p:p+4] == '<!--':
-            te = self.wipml.find('-->',p+1)
-            if te != -1:
-                te = te+2
-        else :
-            te = self.wipml.find('>',p+1)
-            ntb = self.wipml.find('<',p+1)
-            if ntb != -1 and ntb < te:
-                self.pos = ntb
-                return self.wipml[p:ntb], None
-        self.pos = te + 1
-        return None, self.wipml[p:te+1]
-
-    # parses string version of tag to identify its name,
-    # its type 'begin', 'end' or 'single',
-    # plus build a hashtable of its attributes
-    # code is written to handle the possiblity of very poor formating
-    def parsetag(self, s):
-        p = 1
-        # get the tag name
-        tname = None
-        ttype = None
-        tattr = {}
-        while s[p:p+1] == ' ' :
-            p += 1
-        if s[p:p+1] == '/':
-            ttype = 'end'
-            p += 1
-            while s[p:p+1] == ' ' :
-                p += 1
-        b = p
-        while s[p:p+1] not in ('>', '/', ' ', '"', "'", "\r", "\n") :
-            p += 1
-        tname=s[b:p].lower()
-        if tname == '!doctype':
-            tname = '!DOCTYPE'
-        # special cases
-        if tname in SPECIAL_HANDLING_TAGS:
-            ttype, backstep = SPECIAL_HANDLING_TAGS[tname]
-            tattr['special'] = s[p:backstep]
-        if ttype is None:
-            # parse any attributes
-            while s.find('=',p) != -1 :
-                while s[p:p+1] == ' ' :
-                    p += 1
-                b = p
-                while s[p:p+1] != '=' :
-                    p += 1
-                aname = s[b:p].lower()
-                aname = aname.rstrip(' ')
-                p += 1
-                while s[p:p+1] == ' ' :
-                    p += 1
-                if s[p:p+1] in ('"', "'") :
-                    p = p + 1
-                    b = p
-                    while s[p:p+1] not in ('"', "'") :
-                        p += 1
-                    val = s[b:p]
-                    p += 1
-                else :
-                    b = p
-                    while s[p:p+1] not in ('>', '/', ' ') :
-                        p += 1
-                    val = s[b:p]
-                tattr[aname] = val
-        # label beginning and single tags
-        if ttype is None:
-            ttype = 'begin'
-            if s.find(' /',p) >= 0:
-                ttype = 'single_ext'
-            elif s.find('/',p) >= 0:
-                ttype = 'single'
-        return ttype, tname, tattr
-
-    # main routine to convert from mobi markup language to html
-    def processml(self):
-
-        # are these really needed
-        html_done = False
-        head_done = False
-        body_done = False
-
-        skip = False
-
-        htmlstr = ''
-        self.replace_page_breaks()
-        self.cleanup_html()
-
-        # now parse the cleaned up ml into standard xhtml
-        while True:
-
-            r = self.parseml()
-            if not r:
-                break
-
-            text, tag = r
-
-            if text:
-                if not skip:
-                    htmlstr += text
-
-            if tag:
-                ttype, tname, tattr = self.parsetag(tag)
-
-                # If we run into a DTD or xml declarations inside the body ... bail.
-                if tname in SPECIAL_HANDLING_TAGS and tname != 'comment' and body_done:
-                    htmlstr += '\n</body></html>'
-                    break
-
-                # make sure self-closing tags actually self-close
-                if ttype == 'begin' and tname in SELF_CLOSING_TAGS:
-                    ttype = 'single'
-
-                # make sure any end tags of self-closing tags are discarded
-                if ttype == 'end' and tname in SELF_CLOSING_TAGS:
-                    continue
-
-                # remove embedded guide and refernces from old mobis
-                if tname in ('guide', 'ncx', 'reference') and ttype in ('begin', 'single', 'single_ext'):
-                    tname = 'removeme:{0}'.format(tname)
-                    tattr = None
-                if tname in ('guide', 'ncx', 'reference', 'font', 'span') and ttype == 'end':
-                    if self.path[-1] == 'removeme:{0}'.format(tname):
-                        tname = 'removeme:{0}'.format(tname)
-                        tattr = None
-
-                # Get rid of font tags that only have a color attribute.
-                if tname == 'font' and ttype in ('begin', 'single', 'single_ext'):
-                    if 'color' in tattr and len(tattr) == 1:
-                        tname = 'removeme:{0}'.format(tname)
-                        tattr = None
-
-                # Get rid of empty spans in the markup.
-                if tname == 'span' and ttype in ('begin', 'single', 'single_ext') and not len(tattr):
-                    tname = 'removeme:{0}'.format(tname)
-
-                # need to handle fonts outside of the normal methods
-                # so fonts tags won't be added to the self.path since we keep track
-                # of font tags separately with self.font_history
-                if tname == 'font' and ttype == 'begin':
-                    # check for nested font start tags
-                    if len(self.font_history) > 0 :
-                        # inject a font end tag
-                        taginfo = ('end', 'font', None)
-                        htmlstr += self.processtag(taginfo)
-                    self.font_history.append((ttype, tname, tattr))
-                    # handle the current font start tag
-                    taginfo = (ttype, tname, tattr)
-                    htmlstr += self.processtag(taginfo)
-                    continue
-
-                # check for nested font tags and unnest them
-                if tname == 'font' and ttype == 'end':
-                    self.font_history.pop()
-                    # handle this font end tag
-                    taginfo = ('end', 'font', None)
-                    htmlstr += self.processtag(taginfo)
-                    # check if we were nested
-                    if len(self.font_history) > 0:
-                        # inject a copy of the most recent font start tag from history
-                        taginfo = self.font_history[-1]
-                        htmlstr += self.processtag(taginfo)
-                    continue
-
-                # keep track of nesting path
-                if ttype == 'begin':
-                    self.path.append(tname)
-                elif ttype == 'end':
-                    if tname != self.path[-1]:
-                        print('improper nesting: ', self.path, tname, ttype)
-                        if tname not in self.path:
-                            # handle case of end tag with no beginning by injecting empty begin tag
-                            taginfo = ('begin', tname, None)
-                            htmlstr += self.processtag(taginfo)
-                            print("     - fixed by injecting empty start tag ", tname)
-                            self.path.append(tname)
-                        elif len(self.path) >  1 and tname == self.path[-2]:
-                            # handle case of dangling missing end
-                            taginfo = ('end', self.path[-1], None)
-                            htmlstr += self.processtag(taginfo)
-                            print("     - fixed by injecting end tag ", self.path[-1])
-                            self.path.pop()
-                    self.path.pop()
-
-                if tname == 'removeme:{0}'.format(tname):
-                    if ttype in ('begin', 'single', 'single_ext'):
-                        skip = True
-                    else:
-                        skip = False
-                else:
-                    taginfo = (ttype, tname, tattr)
-                    htmlstr += self.processtag(taginfo)
-
-                # handle potential issue of multiple html, head, and body sections
-                if tname == 'html' and ttype == 'begin' and not html_done:
-                    htmlstr += '\n'
-                    html_done = True
-
-                if tname == 'head' and ttype == 'begin' and not head_done:
-                    htmlstr += '\n'
-                    # also add in metadata and style link tags
-                    htmlstr += self.meta
-                    htmlstr += '<link href="styles.css" rel="stylesheet" type="text/css" />\n'
-                    head_done = True
-
-                if tname == 'body' and ttype == 'begin' and not body_done:
-                    htmlstr += '\n'
-                    body_done = True
-
-        # handle issue of possibly missing html, head, and body tags
-        # I have not seen this but the original did something like this so ...
-        if not body_done:
-            htmlstr = '<body>\n' + htmlstr + '</body>\n'
-        if not head_done:
-            headstr = '<head>\n'
-            headstr += self.meta
-            headstr += '<link href="styles.css" rel="stylesheet" type="text/css" />\n'
-            headstr += '</head>\n'
-            htmlstr = headstr + htmlstr
-        if not html_done:
-            htmlstr = '<html>\n' + htmlstr + '</html>\n'
-
-        # finally add DOCTYPE info
-        htmlstr = '<?xml version="1.0"?>\n<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n' + htmlstr
-
-        css = self.base_css_rules
-        for cls, rule in self.tag_css_rules.items():
-            css += '.%s { %s }\n' % (cls, rule)
-
-        return (htmlstr, css, self.cssname)
-
-    def ensure_unit(self, raw, unit='px'):
-        if re.search(r'\d+$', raw) is not None:
-            raw += unit
-        return raw
-
-    # flatten possibly modified tag back to string
-    def taginfo_tostring(self, taginfo):
-        (ttype, tname, tattr) = taginfo
-        if ttype is None or tname is None:
-            return ''
-        if ttype == 'end':
-            return '</%s>' % tname
-        if ttype in SPECIAL_HANDLING_TYPES and tattr is not None and 'special' in tattr:
-            info = tattr['special']
-            if ttype == 'comment':
-                return '<%s %s-->' % (tname, info)
-            else:
-                return '<%s %s>' % (tname, info)
-        res = []
-        res.append('<%s' % tname)
-        if tattr is not None:
-            for key in tattr:
-                res.append(' %s="%s"' % (key, tattr[key]))
-        if ttype == 'single':
-            res.append('/>')
-        elif ttype == 'single_ext':
-            res.append(' />')
-        else :
-            res.append('>')
-        return "".join(res)
-
-    # routines to convert from mobi ml tags atributes to xhtml attributes and styles
-    def processtag(self, taginfo):
-        # Converting mobi font sizes to numerics
-        size_map = {
-            'xx-small': '1',
-            'x-small': '2',
-            'small': '3',
-            'medium': '4',
-            'large': '5',
-            'x-large': '6',
-            'xx-large': '7',
-            }
-
-        size_to_em_map = {
-            '1': '.65em',
-            '2': '.75em',
-            '3': '1em',
-            '4': '1.125em',
-            '5': '1.25em',
-            '6': '1.5em',
-            '7': '2em',
-            }
-
-        # current tag to work on
-        (ttype, tname, tattr) = taginfo
-        if not tattr:
-            tattr = {}
-
-        styles = []
-
-        if tname is None or tname.startswith('removeme'):
-            return ''
-
-        # have not seen an example of this yet so keep it here to be safe
-        # until this is better understood
-        if tname in ('country-region', 'place', 'placetype', 'placename',
-                'state', 'city', 'street', 'address', 'content'):
-            tname = 'div' if tname == 'content' else 'span'
-            for key in tattr:
-                tattr.pop(key)
-
-        # handle general case of style, height, width, bgcolor in any tag
-        if 'style' in tattr:
-            style = tattr.pop('style').strip()
-            if style:
-                styles.append(style)
-
-        if 'align' in tattr:
-            align = tattr.pop('align').strip()
-            if align:
-                if tname in ('table', 'td', 'tr'):
-                    pass
-                else:
-                    styles.append('text-align: %s' % align)
-
-        if 'height' in tattr:
-            height = tattr.pop('height').strip()
-            if height and '<' not in height and '>' not in height and re.search(r'\d+', height):
-                if tname in ('table', 'td', 'tr'):
-                    pass
-                elif tname == 'img':
-                    tattr['height'] = height
-                else:
-                    styles.append('margin-top: %s' % self.ensure_unit(height))
-
-        if 'width' in tattr:
-            width = tattr.pop('width').strip()
-            if width and re.search(r'\d+', width):
-                if tname in ('table', 'td', 'tr'):
-                    pass
-                elif tname == 'img':
-                    tattr['width'] =  width
-                else:
-                    styles.append('text-indent: %s' % self.ensure_unit(width))
-                    if width.startswith('-'):
-                        styles.append('margin-left: %s' % self.ensure_unit(width[1:]))
-
-        if 'bgcolor' in tattr:
-            # no proprietary html allowed
-            if tname == 'div':
-                del tattr['bgcolor']
-
-        elif tname == 'font':
-            # Change font tags to span tags
-            tname = 'span'
-            if ttype in ('begin', 'single', 'single_ext'):
-                # move the face attribute to css font-family
-                if 'face' in tattr:
-                    face = tattr.pop('face').strip()
-                    styles.append('font-family: "%s"' % face)
-
-                    # Monitor the constantly changing font sizes, change them to ems and move
-                    # them to css. The following will work for 'flat' font tags, but nested font tags
-                    # will cause things to go wonky. Need to revert to the parent font tag's size
-                    # when a closing tag is encountered.
-                if 'size' in tattr:
-                    sz = tattr.pop('size').strip().lower()
-                    try:
-                        float(sz)
-                    except ValueError:
-                        if sz in size_map:
-                            sz = size_map[sz]
-                    else:
-                        if sz.startswith('-') or sz.startswith('+'):
-                            sz = self.current_font_size + float(sz)
-                            if sz > 7:
-                                sz = 7
-                            elif sz < 1:
-                                sz = 1
-                            sz = str(int(sz))
-                    styles.append('font-size: %s' % size_to_em_map[sz])
-                    self.current_font_size = int(sz)
-
-        elif tname == 'img':
-            for attr in ('width', 'height'):
-                if attr in tattr:
-                    val = tattr[attr]
-                    if val.lower().endswith('em'):
-                        try:
-                            nval = float(val[:-2])
-                            nval *= 16 * (168.451/72)  # Assume this was set using the Kindle profile
-                            tattr[attr] = "%dpx"%int(nval)
-                        except:
-                            del tattr[attr]
-                    elif val.lower().endswith('%'):
-                        del tattr[attr]
-
-        # convert the anchor tags
-        if 'filepos-id' in tattr:
-            tattr['id'] = tattr.pop('filepos-id')
-            if 'name' in tattr and tattr['name'] != tattr['id']:
-                tattr['name'] = tattr['id']
-
-        if 'filepos' in tattr:
-            filepos = tattr.pop('filepos')
-            try:
-                tattr['href'] = "#filepos%d" % int(filepos)
-            except ValueError:
-                pass
-
-        if styles:
-            ncls = None
-            rule = '; '.join(styles)
-            for sel, srule in self.tag_css_rules.items():
-                if srule == rule:
-                    ncls = sel
-                    break
-            if ncls is None:
-                self.tag_css_rule_cnt += 1
-                ncls = 'rule_%d' % self.tag_css_rule_cnt
-                self.tag_css_rules[ncls] = rule
-            cls = tattr.get('class', '')
-            cls = cls + (' ' if cls else '') + ncls
-            tattr['class'] = cls
-
-        # convert updated tag back to string representation
-        if len(tattr) == 0:
-            tattr = None
-        taginfo = (ttype, tname, tattr)
-        return self.taginfo_tostring(taginfo)
-
-''' main only left in for testing outside of plugin '''
-
-def main(argv=sys.argv):
-    if len(argv) != 2:
-        return 1
-    else:
-        infile = argv[1]
-
-    try:
-        print('Converting Mobi Markup Language to XHTML')
-        mlc = MobiMLConverter(infile)
-        print('Processing ...')
-        htmlstr, css, cssname = mlc.processml()
-        outname = infile.rsplit('.',1)[0] + '_converted.html'
-        open(outname, 'w').write(htmlstr)
-        open(cssname, 'w').write(css)
-        print('Completed')
-        print('XHTML version of book can be found at: ' + outname)
-
-    except ValueError as e:
-        print("Error: %s" % e)
-        return 1
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/epy_extras/KindleUnpack/unipath.py b/epy_extras/KindleUnpack/unipath.py
deleted file mode 100755
index 2416279..0000000
--- a/epy_extras/KindleUnpack/unipath.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without modification,
-# are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this list of
-# conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice, this list
-# of conditions and the following disclaimer in the documentation and/or other materials
-# provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
-# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
-# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
-# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-from .compatibility_utils import PY2, text_type, binary_type
-
-import sys
-import os
-
-# utility routines to convert all paths to be full unicode
-
-# Under Python 2, if a bytestring, try to convert it to unicode using sys.getfilesystemencoding
-# Under Python 3, if bytes, try to convert it to unicode using os.fsencode() to decode it
-
-# Mac OS X and Windows will happily support full unicode paths
-# Linux can support full unicode paths but allows arbitrary byte paths which may be inconsistent with unicode
-
-fsencoding = sys.getfilesystemencoding()
-
-def pathof(s, enc=fsencoding):
-    if s is None:
-        return None
-    if isinstance(s, text_type):
-        return s
-    if isinstance(s, binary_type):
-        try:
-            return s.decode(enc)
-        except:
-            pass
-    return s
-
-def exists(s):
-    return os.path.exists(pathof(s))
-
-def isfile(s):
-    return os.path.isfile(pathof(s))
-
-def isdir(s):
-    return os.path.isdir(pathof(s))
-
-def mkdir(s):
-    return os.mkdir(pathof(s))
-
-def listdir(s):
-    rv = []
-    for file in os.listdir(pathof(s)):
-        rv.append(pathof(file))
-    return rv
-
-def getcwd():
-    if PY2:
-        return os.getcwdu()
-    return os.getcwd()
-
-def walk(top):
-    top = pathof(top)
-    rv = []
-    for base, dnames, names in os.walk(top):
-        base = pathof(base)
-        for name in names:
-            name = pathof(name)
-            rv.append(relpath(os.path.join(base, name), top))
-    return rv
-
-def relpath(path, start=None):
-    return os.path.relpath(pathof(path) , pathof(start))
-
-def abspath(path):
-    return os.path.abspath(pathof(path))
diff --git a/epy_extras/KindleUnpack/unpack_structure.py b/epy_extras/KindleUnpack/unpack_structure.py
deleted file mode 100644
index 2e66eb8..0000000
--- a/epy_extras/KindleUnpack/unpack_structure.py
+++ /dev/null
@@ -1,167 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
-
-from __future__ import unicode_literals, division, absolute_import, print_function
-
-from .compatibility_utils import text_type
-
-from . import unipath
-from .unipath import pathof
-
-DUMP = False
-""" Set to True to dump all possible information. """
-
-import os
-
-import re
-# note: re requites the pattern to be the exact same type as the data to be searched in python3
-# but u"" is not allowed for the pattern itself only b""
-
-import zipfile
-import binascii
-from .mobi_utils import mangle_fonts
-
-class unpackException(Exception):
-    pass
-
-class ZipInfo(zipfile.ZipInfo):
-
-    def __init__(self, *args, **kwargs):
-        if 'compress_type' in kwargs:
-            compress_type = kwargs.pop('compress_type')
-        super(ZipInfo, self).__init__(*args, **kwargs)
-        self.compress_type = compress_type
-
-class fileNames:
-
-    def __init__(self, infile, outdir):
-        self.infile = infile
-        self.outdir = outdir
-        if not unipath.exists(self.outdir):
-            unipath.mkdir(self.outdir)
-        self.mobi7dir = os.path.join(self.outdir,'mobi7')
-        if not unipath.exists(self.mobi7dir):
-            unipath.mkdir(self.mobi7dir)
-        self.imgdir = os.path.join(self.mobi7dir, 'Images')
-        if not unipath.exists(self.imgdir):
-            unipath.mkdir(self.imgdir)
-        self.hdimgdir = os.path.join(self.outdir,'HDImages')
-        if not unipath.exists(self.hdimgdir):
-            unipath.mkdir(self.hdimgdir)
-        self.outbase = os.path.join(self.outdir, os.path.splitext(os.path.split(infile)[1])[0])
-
-    def getInputFileBasename(self):
-        return os.path.splitext(os.path.basename(self.infile))[0]
-
-    def makeK8Struct(self):
-        self.k8dir = os.path.join(self.outdir,'mobi8')
-        if not unipath.exists(self.k8dir):
-            unipath.mkdir(self.k8dir)
-        self.k8metainf = os.path.join(self.k8dir,'META-INF')
-        if not unipath.exists(self.k8metainf):
-            unipath.mkdir(self.k8metainf)
-        self.k8oebps = os.path.join(self.k8dir,'OEBPS')
-        if not unipath.exists(self.k8oebps):
-            unipath.mkdir(self.k8oebps)
-        self.k8images = os.path.join(self.k8oebps,'Images')
-        if not unipath.exists(self.k8images):
-            unipath.mkdir(self.k8images)
-        self.k8fonts = os.path.join(self.k8oebps,'Fonts')
-        if not unipath.exists(self.k8fonts):
-            unipath.mkdir(self.k8fonts)
-        self.k8styles = os.path.join(self.k8oebps,'Styles')
-        if not unipath.exists(self.k8styles):
-            unipath.mkdir(self.k8styles)
-        self.k8text = os.path.join(self.k8oebps,'Text')
-        if not unipath.exists(self.k8text):
-            unipath.mkdir(self.k8text)
-
-    # recursive zip creation support routine
-    def zipUpDir(self, myzip, tdir, localname):
-        currentdir = tdir
-        if localname != "":
-            currentdir = os.path.join(currentdir,localname)
-        list = unipath.listdir(currentdir)
-        for file in list:
-            afilename = file
-            localfilePath = os.path.join(localname, afilename)
-            realfilePath = os.path.join(currentdir,file)
-            if unipath.isfile(realfilePath):
-                myzip.write(pathof(realfilePath), pathof(localfilePath), zipfile.ZIP_DEFLATED)
-            elif unipath.isdir(realfilePath):
-                self.zipUpDir(myzip, tdir, localfilePath)
-
-    def makeEPUB(self, usedmap, obfuscate_data, uid):
-        bname = os.path.join(self.k8dir, self.getInputFileBasename() + '.epub')
-        # Create an encryption key for Adobe font obfuscation
-        # based on the epub's uid
-        if isinstance(uid,text_type):
-            uid = uid.encode('ascii')
-        if obfuscate_data:
-            key = re.sub(br'[^a-fA-F0-9]', b'', uid)
-            key = binascii.unhexlify((key + key)[:32])
-
-        # copy over all images and fonts that are actually used in the ebook
-        # and remove all font files from mobi7 since not supported
-        imgnames = unipath.listdir(self.imgdir)
-        for name in imgnames:
-            if usedmap.get(name,'not used') == 'used':
-                filein = os.path.join(self.imgdir,name)
-                if name.endswith(".ttf"):
-                    fileout = os.path.join(self.k8fonts,name)
-                elif name.endswith(".otf"):
-                    fileout = os.path.join(self.k8fonts,name)
-                elif name.endswith(".failed"):
-                    fileout = os.path.join(self.k8fonts,name)
-                else:
-                    fileout = os.path.join(self.k8images,name)
-                data = b''
-                with open(pathof(filein),'rb') as f:
-                    data = f.read()
-                if obfuscate_data:
-                    if name in obfuscate_data:
-                        data = mangle_fonts(key, data)
-                open(pathof(fileout),'wb').write(data)
-                if name.endswith(".ttf") or name.endswith(".otf"):
-                    os.remove(pathof(filein))
-
-        # opf file name hard coded to "content.opf"
-        container = '<?xml version="1.0" encoding="UTF-8"?>\n'
-        container += '<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">\n'
-        container += '    <rootfiles>\n'
-        container += '<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>'
-        container += '    </rootfiles>\n</container>\n'
-        fileout = os.path.join(self.k8metainf,'container.xml')
-        with open(pathof(fileout),'wb') as f:
-            f.write(container.encode('utf-8'))
-
-        if obfuscate_data:
-            encryption = '<encryption xmlns="urn:oasis:names:tc:opendocument:xmlns:container" \
-xmlns:enc="http://www.w3.org/2001/04/xmlenc#" xmlns:deenc="http://ns.adobe.com/digitaleditions/enc">\n'
-            for font in obfuscate_data:
-                encryption += '  <enc:EncryptedData>\n'
-                encryption += '    <enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/>\n'
-                encryption += '    <enc:CipherData>\n'
-                encryption += '      <enc:CipherReference URI="OEBPS/Fonts/' + font + '"/>\n'
-                encryption += '    </enc:CipherData>\n'
-                encryption += '  </enc:EncryptedData>\n'
-            encryption += '</encryption>\n'
-            fileout = os.path.join(self.k8metainf,'encryption.xml')
-            with open(pathof(fileout),'wb') as f:
-                f.write(encryption.encode('utf-8'))
-
-        # ready to build epub
-        self.outzip = zipfile.ZipFile(pathof(bname), 'w')
-
-        # add the mimetype file uncompressed
-        mimetype = b'application/epub+zip'
-        fileout = os.path.join(self.k8dir,'mimetype')
-        with open(pathof(fileout),'wb') as f:
-            f.write(mimetype)
-        nzinfo = ZipInfo('mimetype', compress_type=zipfile.ZIP_STORED)
-        nzinfo.external_attr = 0o600 << 16 # make this a normal file
-        self.outzip.writestr(nzinfo, mimetype)
-        self.zipUpDir(self.outzip,self.k8dir,'META-INF')
-        self.zipUpDir(self.outzip,self.k8dir,'OEBPS')
-        self.outzip.close()
diff --git a/epy_extras/__init__.py b/epy_extras/__init__.py
deleted file mode 100644
index c06e358..0000000
--- a/epy_extras/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-__all__ = ["unpackBook"]
-
-from .KindleUnpack.kindleunpack import unpackBook
diff --git a/poetry.lock b/poetry.lock
index 224fda9..0547abc 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -6,6 +6,20 @@ category = "dev"
 optional = false
 python-versions = "*"
 
+[[package]]
+name = "asttokens"
+version = "2.0.8"
+description = "Annotate AST trees with source code positions"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+six = "*"
+
+[package.extras]
+test = ["astroid (<=2.5.3)", "pytest"]
+
 [[package]]
 name = "attrs"
 version = "22.1.0"
@@ -42,7 +56,6 @@ mypy-extensions = ">=0.4.3"
 pathspec = ">=0.9.0"
 platformdirs = ">=2"
 tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""}
-typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""}
 typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
 
 [package.extras]
@@ -51,6 +64,72 @@ d = ["aiohttp (>=3.7.4)"]
 jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 uvloop = ["uvloop (>=0.15.2)"]
 
+[[package]]
+name = "bleach"
+version = "5.0.1"
+description = "An easy safelist-based HTML-sanitizing tool."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+six = ">=1.9.0"
+webencodings = "*"
+
+[package.extras]
+css = ["tinycss2 (>=1.1.0,<1.2)"]
+dev = ["build (==0.8.0)", "flake8 (==4.0.1)", "hashin (==0.17.0)", "pip-tools (==6.6.2)", "pytest (==7.1.2)", "Sphinx (==4.3.2)", "tox (==3.25.0)", "twine (==4.0.1)", "wheel (==0.37.1)", "black (==22.3.0)", "mypy (==0.961)"]
+
+[[package]]
+name = "build"
+version = "0.8.0"
+description = "A simple, correct PEP 517 build frontend"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+colorama = {version = "*", markers = "os_name == \"nt\""}
+packaging = ">=19.0"
+pep517 = ">=0.9.1"
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
+
+[package.extras]
+virtualenv = ["virtualenv (>=20.0.35)"]
+typing = ["typing-extensions (>=3.7.4.3)", "mypy (==0.950)", "importlib-metadata (>=4.6.4)"]
+test = ["setuptools (>=56.0.0)", "setuptools (>=42.0.0)", "wheel (>=0.36.0)", "toml (>=0.10.0)", "pytest-xdist (>=1.34)", "pytest-rerunfailures (>=9.1)", "pytest-mock (>=2)", "pytest-cov (>=2.12)", "pytest (>=6.2.4)", "filelock (>=3)"]
+docs = ["sphinx-autodoc-typehints (>=1.10)", "sphinx-argparse-cli (>=1.5)", "sphinx (>=4.0,<5.0)", "furo (>=2021.08.31)"]
+
+[[package]]
+name = "certifi"
+version = "2022.9.24"
+description = "Python package for providing Mozilla's CA Bundle."
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[[package]]
+name = "cffi"
+version = "1.15.1"
+description = "Foreign Function Interface for Python calling C code."
+category = "dev"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+pycparser = "*"
+
+[[package]]
+name = "charset-normalizer"
+version = "2.1.1"
+description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+category = "dev"
+optional = false
+python-versions = ">=3.6.0"
+
+[package.extras]
+unicode_backport = ["unicodedata2"]
+
 [[package]]
 name = "click"
 version = "8.1.3"
@@ -61,7 +140,6 @@ python-versions = ">=3.7"
 
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
-importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 
 [[package]]
 name = "colorama"
@@ -71,9 +149,20 @@ category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 
+[[package]]
+name = "commonmark"
+version = "0.9.1"
+description = "Python parser for the CommonMark Markdown spec"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[package.extras]
+test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
+
 [[package]]
 name = "coverage"
-version = "6.4.4"
+version = "6.5.0"
 description = "Code coverage measurement for Python"
 category = "dev"
 optional = false
@@ -82,6 +171,25 @@ python-versions = ">=3.7"
 [package.extras]
 toml = ["tomli"]
 
+[[package]]
+name = "cryptography"
+version = "38.0.1"
+description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+cffi = ">=1.12"
+
+[package.extras]
+docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"]
+docstest = ["pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"]
+pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"]
+sdist = ["setuptools-rust (>=0.11.4)"]
+ssh = ["bcrypt (>=3.1.5)"]
+test = ["pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"]
+
 [[package]]
 name = "debugpy"
 version = "1.6.3"
@@ -98,6 +206,25 @@ category = "dev"
 optional = false
 python-versions = ">=3.5"
 
+[[package]]
+name = "docutils"
+version = "0.19"
+description = "Docutils -- Python Documentation Utilities"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[[package]]
+name = "executing"
+version = "1.1.0"
+description = "Get the currently executing AST node of a frame, and other information"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[package.extras]
+tests = ["rich", "littleutils", "pytest", "asttokens"]
+
 [[package]]
 name = "greenlet"
 version = "1.1.3"
@@ -109,22 +236,29 @@ python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*"
 [package.extras]
 docs = ["sphinx"]
 
+[[package]]
+name = "idna"
+version = "3.4"
+description = "Internationalized Domain Names in Applications (IDNA)"
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
 [[package]]
 name = "importlib-metadata"
-version = "4.12.0"
+version = "5.0.0"
 description = "Read metadata from Python packages"
 category = "dev"
 optional = false
 python-versions = ">=3.7"
 
 [package.dependencies]
-typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""}
 zipp = ">=0.5"
 
 [package.extras]
-docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"]
+docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "furo", "jaraco.tidelift (>=1.4)"]
 perf = ["ipython"]
-testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "packaging", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)", "importlib-resources (>=1.3)"]
 
 [[package]]
 name = "iniconfig"
@@ -136,11 +270,11 @@ python-versions = "*"
 
 [[package]]
 name = "ipython"
-version = "7.34.0"
+version = "8.5.0"
 description = "IPython: Productive Interactive Computing"
 category = "dev"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 
 [package.dependencies]
 appnope = {version = "*", markers = "sys_platform == \"darwin\""}
@@ -151,20 +285,52 @@ jedi = ">=0.16"
 matplotlib-inline = "*"
 pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""}
 pickleshare = "*"
-prompt-toolkit = ">=2.0.0,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.1.0"
-pygments = "*"
-traitlets = ">=4.2"
+prompt-toolkit = ">3.0.1,<3.1.0"
+pygments = ">=2.4.0"
+stack-data = "*"
+traitlets = ">=5"
 
 [package.extras]
-all = ["Sphinx (>=1.3)", "ipykernel", "ipyparallel", "ipywidgets", "nbconvert", "nbformat", "nose (>=0.10.1)", "notebook", "numpy (>=1.17)", "pygments", "qtconsole", "requests", "testpath"]
+all = ["black", "Sphinx (>=1.3)", "ipykernel", "nbconvert", "nbformat", "ipywidgets", "notebook", "ipyparallel", "qtconsole", "pytest (<7.1)", "pytest-asyncio", "testpath", "curio", "matplotlib (!=3.2.0)", "numpy (>=1.19)", "pandas", "trio"]
+black = ["black"]
 doc = ["Sphinx (>=1.3)"]
 kernel = ["ipykernel"]
 nbconvert = ["nbconvert"]
 nbformat = ["nbformat"]
-notebook = ["notebook", "ipywidgets"]
+notebook = ["ipywidgets", "notebook"]
 parallel = ["ipyparallel"]
 qtconsole = ["qtconsole"]
-test = ["nose (>=0.10.1)", "requests", "testpath", "pygments", "nbformat", "ipykernel", "numpy (>=1.17)"]
+test = ["pytest (<7.1)", "pytest-asyncio", "testpath"]
+test_extra = ["pytest (<7.1)", "pytest-asyncio", "testpath", "curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.19)", "pandas", "trio"]
+
+[[package]]
+name = "isort"
+version = "5.10.1"
+description = "A Python utility / library to sort Python imports."
+category = "dev"
+optional = false
+python-versions = ">=3.6.1,<4.0"
+
+[package.extras]
+pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
+requirements_deprecated_finder = ["pipreqs", "pip-api"]
+colors = ["colorama (>=0.4.3,<0.5.0)"]
+plugins = ["setuptools"]
+
+[[package]]
+name = "jaraco.classes"
+version = "3.2.3"
+description = "Utility functions for Python class constructs"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+more-itertools = "*"
+
+[package.extras]
+docs = ["sphinx (>=3.5)", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
 
 [[package]]
 name = "jedi"
@@ -181,6 +347,37 @@ parso = ">=0.8.0,<0.9.0"
 qa = ["flake8 (==3.8.3)", "mypy (==0.782)"]
 testing = ["Django (<3.1)", "colorama", "docopt", "pytest (<7.0.0)"]
 
+[[package]]
+name = "jeepney"
+version = "0.8.0"
+description = "Low-level, pure Python DBus protocol wrapper."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+trio = ["async-generator", "trio"]
+test = ["async-timeout", "trio", "testpath", "pytest-asyncio (>=0.17)", "pytest-trio", "pytest"]
+
+[[package]]
+name = "keyring"
+version = "23.9.3"
+description = "Store and access your passwords safely."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""}
+"jaraco.classes" = "*"
+jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""}
+pywin32-ctypes = {version = "<0.1.0 || >0.1.0,<0.1.1 || >0.1.1", markers = "sys_platform == \"win32\""}
+SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""}
+
+[package.extras]
+docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)", "jaraco.tidelift (>=1.4)"]
+testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "flake8 (<5)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"]
+
 [[package]]
 name = "matplotlib-inline"
 version = "0.1.6"
@@ -192,6 +389,14 @@ python-versions = ">=3.5"
 [package.dependencies]
 traitlets = "*"
 
+[[package]]
+name = "more-itertools"
+version = "8.14.0"
+description = "More routines for operating on iterables, beyond itertools"
+category = "dev"
+optional = false
+python-versions = ">=3.5"
+
 [[package]]
 name = "msgpack"
 version = "1.0.4"
@@ -202,16 +407,15 @@ python-versions = "*"
 
 [[package]]
 name = "mypy"
-version = "0.971"
+version = "0.981"
 description = "Optional static typing for Python"
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
 
 [package.dependencies]
 mypy-extensions = ">=0.4.3"
 tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
-typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""}
 typing-extensions = ">=3.10"
 
 [package.extras]
@@ -258,6 +462,17 @@ category = "dev"
 optional = false
 python-versions = ">=3.7"
 
+[[package]]
+name = "pep517"
+version = "0.13.0"
+description = "Wrappers to build Python packages using PEP 517 hooks"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+
 [[package]]
 name = "pexpect"
 version = "4.8.0"
@@ -277,6 +492,17 @@ category = "dev"
 optional = false
 python-versions = "*"
 
+[[package]]
+name = "pkginfo"
+version = "1.8.3"
+description = "Query metadatdata from sdists / bdists / installed packages."
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+
+[package.extras]
+testing = ["nose", "coverage"]
+
 [[package]]
 name = "platformdirs"
 version = "2.5.2"
@@ -297,9 +523,6 @@ category = "dev"
 optional = false
 python-versions = ">=3.6"
 
-[package.dependencies]
-importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
-
 [package.extras]
 dev = ["pre-commit", "tox"]
 testing = ["pytest", "pytest-benchmark"]
@@ -323,6 +546,17 @@ category = "dev"
 optional = false
 python-versions = "*"
 
+[[package]]
+name = "pure-eval"
+version = "0.2.2"
+description = "Safely evaluate AST nodes without side effects"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[package.extras]
+tests = ["pytest"]
+
 [[package]]
 name = "py"
 version = "1.11.0"
@@ -331,6 +565,14 @@ category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 
+[[package]]
+name = "pycparser"
+version = "2.21"
+description = "C parser in Python"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+
 [[package]]
 name = "pygments"
 version = "2.13.0"
@@ -380,7 +622,6 @@ python-versions = ">=3.7"
 [package.dependencies]
 attrs = ">=19.2.0"
 colorama = {version = "*", markers = "sys_platform == \"win32\""}
-importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""}
 iniconfig = "*"
 packaging = "*"
 pluggy = ">=0.12,<2.0"
@@ -390,6 +631,122 @@ tomli = ">=1.0.0"
 [package.extras]
 testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
 
+[[package]]
+name = "pywin32-ctypes"
+version = "0.2.0"
+description = ""
+category = "dev"
+optional = false
+python-versions = "*"
+
+[[package]]
+name = "readme-renderer"
+version = "37.2"
+description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.dependencies]
+bleach = ">=2.1.0"
+docutils = ">=0.13.1"
+Pygments = ">=2.5.1"
+
+[package.extras]
+md = ["cmarkgfm (>=0.8.0)"]
+
+[[package]]
+name = "requests"
+version = "2.28.1"
+description = "Python HTTP for Humans."
+category = "dev"
+optional = false
+python-versions = ">=3.7, <4"
+
+[package.dependencies]
+certifi = ">=2017.4.17"
+charset-normalizer = ">=2,<3"
+idna = ">=2.5,<4"
+urllib3 = ">=1.21.1,<1.27"
+
+[package.extras]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"]
+
+[[package]]
+name = "requests-toolbelt"
+version = "0.9.1"
+description = "A utility belt for advanced users of python-requests"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+requests = ">=2.0.1,<3.0.0"
+
+[[package]]
+name = "rfc3986"
+version = "2.0.0"
+description = "Validating URI References per RFC 3986"
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+
+[package.extras]
+idna2008 = ["idna"]
+
+[[package]]
+name = "rich"
+version = "12.5.1"
+description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
+category = "dev"
+optional = false
+python-versions = ">=3.6.3,<4.0.0"
+
+[package.dependencies]
+commonmark = ">=0.9.0,<0.10.0"
+pygments = ">=2.6.0,<3.0.0"
+typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9\""}
+
+[package.extras]
+jupyter = ["ipywidgets (>=7.5.1,<8.0.0)"]
+
+[[package]]
+name = "secretstorage"
+version = "3.3.3"
+description = "Python bindings to FreeDesktop.org Secret Service API"
+category = "dev"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+cryptography = ">=2.0"
+jeepney = ">=0.6"
+
+[[package]]
+name = "six"
+version = "1.16.0"
+description = "Python 2 and 3 compatibility utilities"
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+
+[[package]]
+name = "stack-data"
+version = "0.5.1"
+description = "Extract data from python stack frames and tracebacks for informative displays"
+category = "dev"
+optional = false
+python-versions = "*"
+
+[package.dependencies]
+asttokens = "*"
+executing = "*"
+pure-eval = "*"
+
+[package.extras]
+tests = ["cython", "littleutils", "pygments", "typeguard", "pytest"]
+
 [[package]]
 name = "tomli"
 version = "2.0.1"
@@ -410,12 +767,23 @@ python-versions = ">=3.7"
 test = ["pre-commit", "pytest"]
 
 [[package]]
-name = "typed-ast"
-version = "1.5.4"
-description = "a fork of Python 2 and 3 ast modules with type comment support"
+name = "twine"
+version = "4.0.1"
+description = "Collection of utilities for publishing packages on PyPI"
 category = "dev"
 optional = false
-python-versions = ">=3.6"
+python-versions = ">=3.7"
+
+[package.dependencies]
+importlib-metadata = ">=3.6"
+keyring = ">=15.1"
+pkginfo = ">=1.8.1"
+readme-renderer = ">=35.0"
+requests = ">=2.20"
+requests-toolbelt = ">=0.8.0,<0.9.0 || >0.9.0"
+rfc3986 = ">=1.4.0"
+rich = ">=12.0.0"
+urllib3 = ">=1.26.0"
 
 [[package]]
 name = "typing-extensions"
@@ -425,6 +793,19 @@ category = "dev"
 optional = false
 python-versions = ">=3.7"
 
+[[package]]
+name = "urllib3"
+version = "1.26.12"
+description = "HTTP library with thread-safe connection pooling, file post, and more."
+category = "dev"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4"
+
+[package.extras]
+brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"]
+secure = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "certifi", "urllib3-secure-extra", "ipaddress"]
+socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
+
 [[package]]
 name = "wcwidth"
 version = "0.2.5"
@@ -433,6 +814,14 @@ category = "dev"
 optional = false
 python-versions = "*"
 
+[[package]]
+name = "webencodings"
+version = "0.5.1"
+description = "Character encoding aliases for legacy web content"
+category = "dev"
+optional = false
+python-versions = "*"
+
 [[package]]
 name = "windows-curses"
 version = "2.3.0"
@@ -455,40 +844,62 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
 
 [metadata]
 lock-version = "1.1"
-python-versions = "^3.7"
-content-hash = "688a03f5baf4d8cc6850f78d49f42245b7d3ae2eaf21ea326732af64617805ff"
+python-versions = "^3.8"
+content-hash = "936a54c993790e03f357a092c82f5d0fc8fdeba780d7c66be761b86ff4d00760"
 
 [metadata.files]
 appnope = [
     {file = "appnope-0.1.3-py2.py3-none-any.whl", hash = "sha256:265a455292d0bd8a72453494fa24df5a11eb18373a60c7c0430889f22548605e"},
     {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"},
 ]
+asttokens = []
 attrs = []
 backcall = [
     {file = "backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255"},
     {file = "backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e"},
 ]
 black = []
+bleach = []
+build = []
+certifi = []
+cffi = []
+charset-normalizer = []
 click = []
 colorama = []
+commonmark = [
+    {file = "commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9"},
+    {file = "commonmark-0.9.1.tar.gz", hash = "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60"},
+]
 coverage = []
+cryptography = []
 debugpy = []
 decorator = [
     {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
     {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
 ]
+docutils = []
+executing = []
 greenlet = []
+idna = []
 importlib-metadata = []
 iniconfig = [
     {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
     {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
 ]
 ipython = []
+isort = [
+    {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"},
+    {file = "isort-5.10.1.tar.gz", hash = "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"},
+]
+"jaraco.classes" = []
 jedi = [
     {file = "jedi-0.18.1-py2.py3-none-any.whl", hash = "sha256:637c9635fcf47945ceb91cd7f320234a7be540ded6f3e99a50cb6febdfd1ba8d"},
     {file = "jedi-0.18.1.tar.gz", hash = "sha256:74137626a64a99c8eb6ae5832d99b3bdd7d29a3850fe2aa80a4126b2a7d949ab"},
 ]
+jeepney = []
+keyring = []
 matplotlib-inline = []
+more-itertools = []
 msgpack = []
 mypy = []
 mypy-extensions = [
@@ -504,6 +915,7 @@ parso = [
     {file = "parso-0.8.3.tar.gz", hash = "sha256:8c07be290bb59f03588915921e29e8a50002acaf2cdc5fa0e0114f91709fafa0"},
 ]
 pathspec = []
+pep517 = []
 pexpect = [
     {file = "pexpect-4.8.0-py2.py3-none-any.whl", hash = "sha256:0b48a55dcb3c05f3329815901ea4fc1537514d6ba867a152b581d69ae3710937"},
     {file = "pexpect-4.8.0.tar.gz", hash = "sha256:fc65a43959d153d0114afe13997d439c22823a27cefceb5ff35c2178c6784c0c"},
@@ -512,6 +924,7 @@ pickleshare = [
     {file = "pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56"},
     {file = "pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca"},
 ]
+pkginfo = []
 platformdirs = [
     {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"},
     {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"},
@@ -525,27 +938,58 @@ ptyprocess = [
     {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
     {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
 ]
+pure-eval = [
+    {file = "pure_eval-0.2.2-py3-none-any.whl", hash = "sha256:01eaab343580944bc56080ebe0a674b39ec44a945e6d09ba7db3cb8cec289350"},
+    {file = "pure_eval-0.2.2.tar.gz", hash = "sha256:2b45320af6dfaa1750f543d714b6d1c520a1688dec6fd24d339063ce0aaa9ac3"},
+]
 py = [
     {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"},
     {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
 ]
+pycparser = [
+    {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
+    {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
+]
 pygments = []
 pynvim = [
     {file = "pynvim-0.4.3.tar.gz", hash = "sha256:3a795378bde5e8092fbeb3a1a99be9c613d2685542f1db0e5c6fd467eed56dff"},
 ]
 pyparsing = []
 pytest = []
+pywin32-ctypes = [
+    {file = "pywin32-ctypes-0.2.0.tar.gz", hash = "sha256:24ffc3b341d457d48e8922352130cf2644024a4ff09762a2261fd34c36ee5942"},
+    {file = "pywin32_ctypes-0.2.0-py2.py3-none-any.whl", hash = "sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98"},
+]
+readme-renderer = []
+requests = []
+requests-toolbelt = [
+    {file = "requests-toolbelt-0.9.1.tar.gz", hash = "sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0"},
+    {file = "requests_toolbelt-0.9.1-py2.py3-none-any.whl", hash = "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f"},
+]
+rfc3986 = []
+rich = []
+secretstorage = []
+six = [
+    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
+    {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
+]
+stack-data = []
 tomli = [
     {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
     {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
 ]
 traitlets = []
-typed-ast = []
+twine = []
 typing-extensions = []
+urllib3 = []
 wcwidth = [
     {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"},
     {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"},
 ]
+webencodings = [
+    {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
+    {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
+]
 windows-curses = [
     {file = "windows_curses-2.3.0-cp310-cp310-win32.whl", hash = "sha256:a3a63a0597729e10f923724c2cf972a23ea677b400d2387dee1d668cf7116177"},
     {file = "windows_curses-2.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:7a35eda4cb120b9e1a5ae795f3bc06c55b92c9d391baba6be1903285a05f3551"},
diff --git a/pyproject.toml b/pyproject.toml
index 41b3483..e567d3e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,37 +1,48 @@
 [tool.poetry]
 name = "epy-reader"
-version = "2022.9.24"
-description = "CLI Ebook Reader"
+version = "2022.10.2"
+description = "TUI Ebook Reader"
 authors = ["Benawi Adha <benawiadha@gmail.com>"]
 license = "GPL-3.0"
+readme = "README.md"
+repository = "https://github.com/wustho/epy"
+keywords = ["ebook", "epub", "epub3", "fb2", "mobi", "azw3", "TUI", "ebook reader"]
 packages = [
-    { include = "epy.py" },
-    { include = "epy_extras" },
+    { include = "epy_reader", from = "src" }
 ]
 
 [tool.poetry.scripts]
-epy = "epy:main"
+epy = "epy_reader.__main__:main"
 
 [tool.poetry.dependencies]
-python = "^3.7"
+python = "^3.8"
 windows-curses = { version = "*", markers = "platform_system == 'Windows'" }
 
 [tool.poetry.dev-dependencies]
-black = "*"
-coverage = "*"
-debugpy = "*"
-ipython = "*"
-mypy = "*"
-pynvim = "*"
-pytest = "*"
+pynvim = "^0.4.3"
+black = "^22.8.0"
+coverage = "^6.5.0"
+debugpy = "^1.6.3"
+ipython = "^8.5.0"
+mypy = "^0.981"
+pytest = "^7.1.3"
+isort = "^5.10.1"
+build = "^0.8.0"
+twine = "^4.0.1"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
 
 [tool.mypy]
 strict_optional = true
+follow_imports = "silent"
+exclude = ["src/epy_reader/tools/"]
 
 [tool.black]
 line-length = 100
 target-version = ['py38']
+exclude = "src/epy_reader/tools/"
 
-[build-system]
-requires = ["poetry-core>=1.0.0"]
-build-backend = "poetry.core.masonry.api"
+[tool.isort]
+skip = "src/epy_reader/tools/"
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 1cade5b..0000000
--- a/setup.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import sys
-from setuptools import setup
-
-with open("README.md", "r") as fh:
-    long_description = fh.read()
-
-setup(
-    name="epy-reader",
-    version="2022.9.24",
-    description="Terminal/CLI Ebook (epub, fb2, mobi, azw3) Reader",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    url="https://github.com/wustho/epy",
-    author="Benawi Adha",
-    author_email="benawiadha@gmail.com",
-    license="GPL-3.0",
-    keywords=["epub", "epub3", "fb2", "mobi", "azw3", "CLI", "Terminal", "Reader"],
-    python_requires="~=3.7",
-    py_modules=["epy"],
-    packages=["epy_extras", "epy_extras.KindleUnpack"],
-    entry_points={"console_scripts": ["epy = epy:main"]},
-    install_requires=["windows-curses;platform_system=='Windows'"],
-    classifiers=[
-        "Programming Language :: Python :: 3",
-        "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
-        "Operating System :: OS Independent",
-    ],
-)
diff --git a/src/epy_reader/__init__.py b/src/epy_reader/__init__.py
new file mode 100644
index 0000000..97e99a2
--- /dev/null
+++ b/src/epy_reader/__init__.py
@@ -0,0 +1,5 @@
+__version__ = "2022.10.2"
+__license__ = "GPL-3.0"
+__author__ = "Benawi Adha"
+__email__ = "benawiadha@gmail.com"
+__url__ = "https://github.com/wustho/epy"
diff --git a/src/epy_reader/__main__.py b/src/epy_reader/__main__.py
new file mode 100644
index 0000000..ce7d1b2
--- /dev/null
+++ b/src/epy_reader/__main__.py
@@ -0,0 +1,23 @@
+import curses
+import multiprocessing
+import sys
+
+import epy_reader.cli as cli
+import epy_reader.reader as reader
+
+
+def main():
+    # On Windows, calling this method is necessary
+    # On Linux/OSX, this method does nothing
+    multiprocessing.freeze_support()
+    filepath, dump_only = cli.find_file()
+    if dump_only:
+        sys.exit(cli.dump_ebook_content(filepath))
+
+    while True:
+        filepath = curses.wrapper(reader.start_reading, filepath)
+
+
+# https://setuptools.pypa.io/en/latest/userguide/entry_point.html
+if __name__ == "__main__":
+    main()
diff --git a/src/epy_reader/board.py b/src/epy_reader/board.py
new file mode 100644
index 0000000..0562d3f
--- /dev/null
+++ b/src/epy_reader/board.py
@@ -0,0 +1,148 @@
+import curses
+import re
+from typing import Optional, Tuple, Union
+
+from epy_reader.models import Direction, InlineStyle, Key, NoUpdate
+from epy_reader.settings import DoubleSpreadPadding
+
+
+class InfiniBoard:
+    """
+    Wrapper for curses screen to render infinite texts.
+    The idea is instead of pre render all the text before reading,
+    this will only renders part of text on demand by which available
+    page on screen.
+
+    And what this does is only drawing text/string on curses screen
+    without .clear() or .refresh() to optimize performance.
+    """
+
+    def __init__(
+        self,
+        screen,
+        text: Tuple[str, ...],
+        textwidth: int = 80,
+        default_style: Tuple[InlineStyle, ...] = tuple(),
+        spread: int = 1,
+    ):
+        self.screen = screen
+        self.screen_rows, self.screen_cols = self.screen.getmaxyx()
+        self.textwidth = textwidth
+        self.x = ((self.screen_cols - self.textwidth) // 2) + 1
+        self.text = text
+        self.total_lines = len(text)
+        self.default_style: Tuple[InlineStyle, ...] = default_style
+        self.temporary_style: Tuple[InlineStyle, ...] = ()
+        self.spread = spread
+
+        if self.spread == 2:
+            self.x = DoubleSpreadPadding.LEFT.value
+            self.x_alt = (
+                DoubleSpreadPadding.LEFT.value + self.textwidth + DoubleSpreadPadding.MIDDLE.value
+            )
+
+    def feed_temporary_style(self, styles: Optional[Tuple[InlineStyle, ...]] = None) -> None:
+        """Reset styling if `styles` is None"""
+        self.temporary_style = styles if styles else ()
+
+    def render_styles(
+        self, row: int, styles: Tuple[InlineStyle, ...] = (), bottom_padding: int = 0
+    ) -> None:
+        for i in styles:
+            if i.row in range(row, row + self.screen_rows - bottom_padding):
+                self.chgat(row, i.row, i.col, i.n_letters, self.screen.getbkgd() | i.attr)
+
+            if self.spread == 2 and i.row in range(
+                row + self.screen_rows - bottom_padding,
+                row + 2 * (self.screen_rows - bottom_padding),
+            ):
+                self.chgat(
+                    row,
+                    i.row - (self.screen_rows - bottom_padding),
+                    -self.x + self.x_alt + i.col,
+                    i.n_letters,
+                    self.screen.getbkgd() | i.attr,
+                )
+
+    def getch(self) -> Union[NoUpdate, Key]:
+        input = self.screen.getch()
+        if input == -1:
+            return NoUpdate()
+        return Key(input)
+
+    def getbkgd(self):
+        return self.screen.getbkgd()
+
+    def chgat(self, row: int, y: int, x: int, n: int, attr: int) -> None:
+        self.screen.chgat(y - row, self.x + x, n, attr)
+
+    def write(self, row: int, bottom_padding: int = 0) -> None:
+        for n_row in range(min(self.screen_rows - bottom_padding, self.total_lines - row)):
+            text_line = self.text[row + n_row]
+            self.screen.addstr(n_row, self.x, text_line)
+
+            if (
+                self.spread == 2
+                and row + self.screen_rows - bottom_padding + n_row < self.total_lines
+            ):
+                text_line = self.text[row + self.screen_rows - bottom_padding + n_row]
+                # TODO: clean this up
+                if re.search("\\[IMG:[0-9]+\\]", text_line):
+                    self.screen.addstr(
+                        n_row, self.x_alt, text_line.center(self.textwidth), curses.A_BOLD
+                    )
+                else:
+                    self.screen.addstr(n_row, self.x_alt, text_line)
+
+        self.render_styles(row, self.default_style, bottom_padding)
+        self.render_styles(row, self.temporary_style, bottom_padding)
+        # self.screen.refresh()
+
+    def write_n(
+        self,
+        row: int,
+        n: int = 1,
+        direction: Direction = Direction.FORWARD,
+        bottom_padding: int = 0,
+    ) -> None:
+        assert n > 0
+        for n_row in range(min(self.screen_rows - bottom_padding, self.total_lines - row)):
+            text_line = self.text[row + n_row]
+            if direction == Direction.FORWARD:
+                # self.screen.addnstr(n_row, self.x + self.textwidth - n, self.text[row+n_row], n)
+                # `+ " " * (self.textwidth - len(self.text[row + n_row]))` is workaround to
+                # to prevent curses trace because not calling screen.clear()
+                self.screen.addnstr(
+                    n_row,
+                    self.x + self.textwidth - n,
+                    text_line + " " * (self.textwidth - len(text_line)),
+                    n,
+                )
+
+                if (
+                    self.spread == 2
+                    and row + self.screen_rows - bottom_padding + n_row < self.total_lines
+                ):
+                    text_line_alt = self.text[row + n_row + self.screen_rows - bottom_padding]
+                    self.screen.addnstr(
+                        n_row,
+                        self.x_alt + self.textwidth - n,
+                        text_line_alt + " " * (self.textwidth - len(text_line_alt)),
+                        n,
+                    )
+
+            else:
+                if text_line[self.textwidth - n :]:
+                    self.screen.addnstr(n_row, self.x, text_line[self.textwidth - n :], n)
+
+                if (
+                    self.spread == 2
+                    and row + self.screen_rows - bottom_padding + n_row < self.total_lines
+                ):
+                    text_line_alt = self.text[row + n_row + self.screen_rows - bottom_padding]
+                    self.screen.addnstr(
+                        n_row,
+                        self.x_alt,
+                        text_line_alt[self.textwidth - n :],
+                        n,
+                    )
diff --git a/src/epy_reader/cli.py b/src/epy_reader/cli.py
new file mode 100644
index 0000000..e43b51c
--- /dev/null
+++ b/src/epy_reader/cli.py
@@ -0,0 +1,171 @@
+import argparse
+import os
+import shutil
+import sys
+import textwrap
+from difflib import SequenceMatcher as SM
+from typing import List, Optional, Tuple
+
+from epy_reader import __version__
+from epy_reader.lib import coerce_to_int, is_url, truncate
+from epy_reader.models import LibraryItem
+from epy_reader.parser import parse_html
+from epy_reader.state import State
+from epy_reader.utils import get_ebook_obj
+
+
+def cleanup_library(state: State) -> None:
+    """Cleanup non-existent file from library"""
+    library_items = state.get_from_history()
+    for item in library_items:
+        if not os.path.isfile(item.filepath) and not is_url(item.filepath):
+            state.delete_from_library(item.filepath)
+
+
+def get_nth_file_from_library(state: State, n) -> Optional[LibraryItem]:
+    library_items = state.get_from_history()
+    try:
+        return library_items[n - 1]
+    except IndexError:
+        return None
+
+
+def get_matching_library_item(
+    state: State, pattern: str, threshold: float = 0.5
+) -> Optional[LibraryItem]:
+    matches: List[Tuple[LibraryItem, float]] = []  # [(library_item, match_value), ...]
+    library_items = state.get_from_history()
+    if not library_items:
+        return None
+
+    for item in library_items:
+        tomatch = f"{item.title} - {item.author}"  # item.filepath
+        match_value = sum(
+            [i.size for i in SM(None, tomatch.lower(), pattern.lower()).get_matching_blocks()]
+        ) / float(len(pattern))
+        matches.append(
+            (
+                item,
+                match_value,
+            )
+        )
+
+    sorted_matches = sorted(matches, key=lambda x: -x[1])
+    first_match_item, first_match_value = sorted_matches[0]
+    if first_match_item and first_match_value >= threshold:
+        return first_match_item
+    else:
+        return None
+
+
+def print_reading_history(state: State) -> None:
+    termc, _ = shutil.get_terminal_size()
+    library_items = state.get_from_history()
+    if not library_items:
+        print("No Reading History.")
+        return
+
+    print("Reading History:")
+    dig = len(str(len(library_items) + 1))
+    tcols = termc - dig - 2
+    for n, item in enumerate(library_items):
+        print(
+            "{} {}".format(
+                str(n + 1).rjust(dig),
+                truncate(str(item), "...", tcols, tcols - 3),
+            )
+        )
+
+
+def parse_cli_args() -> argparse.Namespace:
+    prog = "epy"
+    positional_arg_help_str = "[PATH | # | PATTERN | URL]"
+    args_parser = argparse.ArgumentParser(
+        prog=prog,
+        usage=f"%(prog)s [-h] [-r] [-d] [-v] {positional_arg_help_str}",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description="Read ebook in terminal",
+        epilog=textwrap.dedent(
+            f"""\
+        examples:
+          {prog} /path/to/ebook    read /path/to/ebook file
+          {prog} 3                 read #3 file from reading history
+          {prog} count monte       read file matching 'count monte'
+                                from reading history
+        """
+        ),
+    )
+    args_parser.add_argument("-r", "--history", action="store_true", help="print reading history")
+    args_parser.add_argument("-d", "--dump", action="store_true", help="dump the content of ebook")
+    args_parser.add_argument(
+        "-v",
+        "--version",
+        action="version",
+        version=f"v{__version__}",
+        help="print version and exit",
+    )
+    args_parser.add_argument(
+        "ebook",
+        action="store",
+        nargs="*",
+        metavar=positional_arg_help_str,
+        help="ebook path, history number, pattern or URL",
+    )
+    return args_parser.parse_args()
+
+
+def find_file() -> Tuple[str, bool]:
+    args = parse_cli_args()
+    state = State()
+    cleanup_library(state)
+
+    if args.history:
+        print_reading_history(state)
+        sys.exit()
+
+    if len(args.ebook) == 0:
+        last_read = state.get_last_read()
+        if last_read:
+            return last_read, args.dump
+        else:
+            sys.exit("ERROR: Found no last read ebook file.")
+
+    elif len(args.ebook) == 1:
+        nth = coerce_to_int(args.ebook[0])
+        if nth is not None:
+            file = get_nth_file_from_library(state, nth)
+            if file:
+                return file.filepath, args.dump
+            else:
+                print(f"ERROR: #{nth} file not found.")
+                print_reading_history(state)
+                sys.exit(1)
+        elif is_url(args.ebook[0]):
+            return args.ebook[0], args.dump
+        elif os.path.isfile(args.ebook[0]):
+            return args.ebook[0], args.dump
+
+    pattern = " ".join(args.ebook)
+    match = get_matching_library_item(state, pattern)
+    if match:
+        return match.filepath, args.dump
+    else:
+        sys.exit("ERROR: Found no matching ebook from history.")
+
+
+def dump_ebook_content(filepath: str) -> None:
+    ebook = get_ebook_obj(filepath)
+    try:
+        try:
+            ebook.initialize()
+        except Exception as e:
+            sys.exit("ERROR: Badly-structured ebook.\n" + str(e))
+        for i in ebook.contents:
+            content = ebook.get_raw_text(i)
+            src_lines = parse_html(content)
+            assert isinstance(src_lines, tuple)
+            # sys.stdout.reconfigure(encoding="utf-8")  # Python>=3.7
+            for j in src_lines:
+                sys.stdout.buffer.write((j + "\n\n").encode("utf-8"))
+    finally:
+        ebook.cleanup()
diff --git a/src/epy_reader/config.py b/src/epy_reader/config.py
new file mode 100644
index 0000000..db70a98
--- /dev/null
+++ b/src/epy_reader/config.py
@@ -0,0 +1,80 @@
+import dataclasses
+import json
+import os
+import sys
+from typing import Mapping, Tuple, Union
+
+import epy_reader.settings as settings
+from epy_reader.models import AppData, Key
+
+
+class Config(AppData):
+    def __init__(self):
+        setting_dict = dataclasses.asdict(settings.Settings())
+        keymap_dict = dataclasses.asdict(settings.CfgDefaultKeymaps())
+        keymap_builtin_dict = dataclasses.asdict(settings.CfgBuiltinKeymaps())
+
+        if os.path.isfile(self.filepath):
+            with open(self.filepath) as f:
+                cfg_user = json.load(f)
+            setting_dict = Config.update_dict(setting_dict, cfg_user["Setting"])
+            keymap_dict = Config.update_dict(keymap_dict, cfg_user["Keymap"])
+        else:
+            self.save({"Setting": setting_dict, "Keymap": keymap_dict})
+
+        keymap_dict_tuple = {k: tuple(v) for k, v in keymap_dict.items()}
+        keymap_updated = {
+            k: tuple([Key(i) for i in v])
+            for k, v in Config.update_keys_tuple(keymap_dict_tuple, keymap_builtin_dict).items()
+        }
+
+        if sys.platform == "win32":
+            setting_dict["PageScrollAnimation"] = False
+
+        self.setting = settings.Settings(**setting_dict)
+        self.keymap = settings.Keymap(**keymap_updated)
+        # to build help menu text
+        self.keymap_user_dict = keymap_dict
+
+    @property
+    def filepath(self) -> str:
+        return os.path.join(self.prefix, "configuration.json") if self.prefix else os.devnull
+
+    def save(self, cfg_dict):
+        with open(self.filepath, "w") as file:
+            json.dump(cfg_dict, file, indent=2)
+
+    @staticmethod
+    def update_dict(
+        old_dict: Mapping[str, Union[str, int, bool]],
+        new_dict: Mapping[str, Union[str, int, bool]],
+        place_new=False,
+    ) -> Mapping[str, Union[str, int, bool]]:
+        """Returns a copy of `old_dict` after updating it with `new_dict`"""
+
+        result = {**old_dict}
+        for k, _ in new_dict.items():
+            if k in result:
+                result[k] = new_dict[k]
+            elif place_new:
+                result[k] = new_dict[k]
+
+        return result
+
+    @staticmethod
+    def update_keys_tuple(
+        old_keys: Mapping[str, Tuple[str, ...]],
+        new_keys: Mapping[str, Tuple[str, ...]],
+        place_new: bool = False,
+    ) -> Mapping[str, Tuple[str, ...]]:
+        """Returns a copy of `old_keys` after updating it with `new_keys`
+        by appending the tuple value and removes duplicate"""
+
+        result = {**old_keys}
+        for k, _ in new_keys.items():
+            if k in result:
+                result[k] = tuple(set(result[k] + new_keys[k]))
+            elif place_new:
+                result[k] = tuple(set(new_keys[k]))
+
+        return result
diff --git a/src/epy_reader/ebooks/__init__.py b/src/epy_reader/ebooks/__init__.py
new file mode 100644
index 0000000..da5cfc0
--- /dev/null
+++ b/src/epy_reader/ebooks/__init__.py
@@ -0,0 +1,15 @@
+__all__ = [
+    "Ebook",
+    "Epub",
+    "FictionBook",
+    "Mobi",
+    "Azw",
+    "URL",
+]
+
+from epy_reader.ebooks.azw import Azw
+from epy_reader.ebooks.base import Ebook
+from epy_reader.ebooks.epub import Epub
+from epy_reader.ebooks.fictionbook import FictionBook
+from epy_reader.ebooks.mobi import Mobi
+from epy_reader.ebooks.url import URL
diff --git a/src/epy_reader/ebooks/azw.py b/src/epy_reader/ebooks/azw.py
new file mode 100644
index 0000000..139fcc5
--- /dev/null
+++ b/src/epy_reader/ebooks/azw.py
@@ -0,0 +1,26 @@
+import contextlib
+import os
+import shutil
+import tempfile
+import zipfile
+
+from epy_reader.ebooks.epub import Epub
+from epy_reader.tools import unpack_kindle_book
+
+
+class Azw(Epub):
+    def __init__(self, fileepub):
+        self.path = os.path.abspath(fileepub)
+        self.tmpdir = tempfile.mkdtemp(prefix="epy-")
+        basename, _ = os.path.splitext(os.path.basename(self.path))
+        self.tmpepub = os.path.join(self.tmpdir, "mobi8", basename + ".epub")
+
+    def initialize(self):
+        with contextlib.redirect_stdout(None):
+            unpack_kindle_book(self.path, self.tmpdir, epubver="A", use_hd=True)
+        self.file = zipfile.ZipFile(self.tmpepub, "r")
+        Epub.initialize(self)
+
+    def cleanup(self) -> None:
+        shutil.rmtree(self.tmpdir)
+        return
diff --git a/src/epy_reader/ebooks/base.py b/src/epy_reader/ebooks/base.py
new file mode 100644
index 0000000..0869db9
--- /dev/null
+++ b/src/epy_reader/ebooks/base.py
@@ -0,0 +1,48 @@
+import xml.etree.ElementTree as ET
+from typing import Tuple, Union
+
+from epy_reader.models import BookMetadata, TocEntry
+
+
+class Ebook:
+    def __init__(self, fileepub: str):
+        raise NotImplementedError("Ebook.__init__() not implemented")
+
+    @property
+    def path(self) -> str:
+        return self._path
+
+    @path.setter
+    def path(self, value: str) -> None:
+        self._path = value
+
+    @property
+    def contents(self) -> Union[Tuple[str, ...], Tuple[ET.Element, ...]]:
+        return self._contents
+
+    @contents.setter
+    def contents(self, value: Union[Tuple[str, ...], Tuple[ET.Element, ...]]) -> None:
+        self._contents = value
+
+    @property
+    def toc_entries(self) -> Tuple[TocEntry, ...]:
+        return self._toc_entries
+
+    @toc_entries.setter
+    def toc_entries(self, value: Tuple[TocEntry, ...]) -> None:
+        self._toc_entries = value
+
+    def get_meta(self) -> BookMetadata:
+        raise NotImplementedError("Ebook.get_meta() not implemented")
+
+    def initialize(self) -> None:
+        raise NotImplementedError("Ebook.initialize() not implemented")
+
+    def get_raw_text(self, content: Union[str, ET.Element]) -> str:
+        raise NotImplementedError("Ebook.get_raw_text() not implemented")
+
+    def get_img_bytestr(self, impath: str) -> Tuple[str, bytes]:
+        raise NotImplementedError("Ebook.get_img_bytestr() not implemented")
+
+    def cleanup(self) -> None:
+        raise NotImplementedError("Ebook.cleanup() not implemented")
diff --git a/src/epy_reader/ebooks/epub.py b/src/epy_reader/ebooks/epub.py
new file mode 100644
index 0000000..a8cf0fa
--- /dev/null
+++ b/src/epy_reader/ebooks/epub.py
@@ -0,0 +1,202 @@
+import dataclasses
+import os
+import xml.etree.ElementTree as ET
+import zipfile
+import zlib
+from typing import Dict, List, Optional, Sequence, Tuple, Union
+from urllib.parse import unquote, urljoin
+
+from epy_reader.ebooks.base import Ebook
+from epy_reader.models import BookMetadata, TocEntry
+
+
+# TODO: to be deprecated
+DEBUG = False
+
+
+class Epub(Ebook):
+    NAMESPACE = {
+        "DAISY": "http://www.daisy.org/z3986/2005/ncx/",
+        "OPF": "http://www.idpf.org/2007/opf",
+        "CONT": "urn:oasis:names:tc:opendocument:xmlns:container",
+        "XHTML": "http://www.w3.org/1999/xhtml",
+        "EPUB": "http://www.idpf.org/2007/ops",
+        # Dublin Core
+        "DC": "http://purl.org/dc/elements/1.1/",
+    }
+
+    def __init__(self, fileepub: str):
+        self.path: str = os.path.abspath(fileepub)
+        self.file: Union[zipfile.ZipFile, str] = zipfile.ZipFile(fileepub, "r")
+
+        # populate these attributes
+        # by calling self.initialize()
+        self.root_filepath: str
+        self.root_dirpath: str
+
+    def get_meta(self) -> BookMetadata:
+        assert isinstance(self.file, zipfile.ZipFile)
+        # why self.file.read(self.root_filepath) problematic
+        # content_opf = ET.fromstring(self.file.open(self.root_filepath).read())
+        content_opf = ET.parse(self.file.open(self.root_filepath))
+        return Epub._get_metadata(content_opf)
+
+    @staticmethod
+    def _get_metadata(content_opf: ET.ElementTree) -> BookMetadata:
+        metadata: Dict[str, Optional[str]] = {}
+        for field in dataclasses.fields(BookMetadata):
+            element = content_opf.find(f".//DC:{field.name}", Epub.NAMESPACE)
+            if element is not None:
+                metadata[field.name] = element.text
+
+        return BookMetadata(**metadata)
+
+    @staticmethod
+    def _get_contents(content_opf: ET.ElementTree) -> Tuple[str, ...]:
+        # cont = ET.parse(self.file.open(self.root_filepath)).getroot()
+        manifests: List[Tuple[str, str]] = []
+        for manifest_elem in content_opf.findall("OPF:manifest/*", Epub.NAMESPACE):
+            # EPUB3
+            # if manifest_elem.get("id") != "ncx" and manifest_elem.get("properties") != "nav":
+            if (
+                manifest_elem.get("media-type") != "application/x-dtbncx+xml"
+                and manifest_elem.get("properties") != "nav"
+            ):
+                manifest_id = manifest_elem.get("id")
+                assert manifest_id is not None
+                manifest_href = manifest_elem.get("href")
+                assert manifest_href is not None
+                manifests.append((manifest_id, manifest_href))
+
+        spines: List[str] = []
+        contents: List[str] = []
+        for spine_elem in content_opf.findall("OPF:spine/*", Epub.NAMESPACE):
+            idref = spine_elem.get("idref")
+            assert idref is not None
+            spines.append(idref)
+        for spine in spines:
+            for manifest in manifests:
+                if spine == manifest[0]:
+                    # book_contents.append(root_dirpath + unquote(manifest[1]))
+                    contents.append(unquote(manifest[1]))
+                    manifests.remove(manifest)
+                    # TODO: test is break necessary
+                    break
+
+        return tuple(contents)
+
+    @staticmethod
+    def _get_tocs(toc: ET.Element, version: str, contents: Sequence[str]) -> Tuple[TocEntry, ...]:
+        try:
+            # EPUB3
+            if version in {"1.0", "2.0"}:
+                navPoints = toc.findall("DAISY:navMap//DAISY:navPoint", Epub.NAMESPACE)
+            elif version == "3.0":
+                navPoints = toc.findall(
+                    "XHTML:body//XHTML:nav[@EPUB:type='toc']//XHTML:a", Epub.NAMESPACE
+                )
+
+            toc_entries: List[TocEntry] = []
+            for navPoint in navPoints:
+                if version in {"1.0", "2.0"}:
+                    src_elem = navPoint.find("DAISY:content", Epub.NAMESPACE)
+                    assert src_elem is not None
+                    src = src_elem.get("src")
+
+                    name_elem = navPoint.find("DAISY:navLabel/DAISY:text", Epub.NAMESPACE)
+                    assert name_elem is not None
+                    name = name_elem.text
+                elif version == "3.0":
+                    src_elem = navPoint
+                    assert src_elem is not None
+                    src = src_elem.get("href")
+
+                    name = "".join(list(navPoint.itertext()))
+
+                assert src is not None
+                src_id = src.split("#")
+
+                try:
+                    idx = contents.index(unquote(src_id[0]))
+                except ValueError:
+                    continue
+
+                # assert name is not None
+                # NOTE: skip empty label
+                if name is not None:
+                    toc_entries.append(
+                        TocEntry(
+                            label=name,
+                            content_index=idx,
+                            section=src_id[1] if len(src_id) == 2 else None,
+                        )
+                    )
+        except AttributeError as e:
+            # TODO:
+            if DEBUG:
+                raise e
+
+        return tuple(toc_entries)
+
+    def initialize(self) -> None:
+        assert isinstance(self.file, zipfile.ZipFile)
+
+        container = ET.parse(self.file.open("META-INF/container.xml"))
+        rootfile_elem = container.find("CONT:rootfiles/CONT:rootfile", Epub.NAMESPACE)
+        assert rootfile_elem is not None
+        self.root_filepath = rootfile_elem.attrib["full-path"]
+        self.root_dirpath = (
+            os.path.dirname(self.root_filepath) + "/"
+            if os.path.dirname(self.root_filepath) != ""
+            else ""
+        )
+
+        content_opf = ET.parse(self.file.open(self.root_filepath))
+        version = content_opf.getroot().get("version")
+
+        contents = Epub._get_contents(content_opf)
+        self.contents = tuple(urljoin(self.root_dirpath, content) for content in contents)
+
+        if version in {"1.0", "2.0"}:
+            # "OPF:manifest/*[@id='ncx']"
+            relative_toc = content_opf.find(
+                "OPF:manifest/*[@media-type='application/x-dtbncx+xml']", Epub.NAMESPACE
+            )
+        elif version == "3.0":
+            relative_toc = content_opf.find("OPF:manifest/*[@properties='nav']", Epub.NAMESPACE)
+        else:
+            raise RuntimeError(f"Unsupported Epub version: {version}")
+        assert relative_toc is not None
+        relative_toc_path = relative_toc.get("href")
+        assert relative_toc_path is not None
+        toc_path = self.root_dirpath + relative_toc_path
+        toc = ET.parse(self.file.open(toc_path)).getroot()
+        self.toc_entries = Epub._get_tocs(toc, version, contents)  # *self.contents (absolute path)
+
+    def get_raw_text(self, content_path: Union[str, ET.Element]) -> str:
+        assert isinstance(self.file, zipfile.ZipFile)
+        assert isinstance(content_path, str)
+
+        max_tries: Optional[int] = None  # 1 if DEBUG else None
+
+        # use try-except block to catch
+        # zlib.error: Error -3 while decompressing data: invalid distance too far back
+        # seems like caused by multiprocessing
+        tries = 0
+        while True:
+            try:
+                content = self.file.open(content_path).read()
+                break
+            except zlib.error as e:
+                tries += 1
+                if max_tries is not None and tries >= max_tries:
+                    raise e
+
+        return content.decode("utf-8")
+
+    def get_img_bytestr(self, impath: str) -> Tuple[str, bytes]:
+        assert isinstance(self.file, zipfile.ZipFile)
+        return impath, self.file.read(impath)
+
+    def cleanup(self) -> None:
+        pass
diff --git a/src/epy_reader/ebooks/fictionbook.py b/src/epy_reader/ebooks/fictionbook.py
new file mode 100644
index 0000000..35611b2
--- /dev/null
+++ b/src/epy_reader/ebooks/fictionbook.py
@@ -0,0 +1,76 @@
+import base64
+import os
+import xml.etree.ElementTree as ET
+from typing import List, Tuple, Union
+
+from epy_reader.ebooks import Ebook
+from epy_reader.models import BookMetadata, TocEntry
+
+
+class FictionBook(Ebook):
+    NAMESPACE = {"FB2": "http://www.gribuser.ru/xml/fictionbook/2.0"}
+
+    def __init__(self, filefb: str):
+        self.path = os.path.abspath(filefb)
+        self.file = filefb
+
+        # populate these attribute
+        # by calling self.initialize()
+        self.root: ET.Element
+
+    def get_meta(self) -> BookMetadata:
+        title_elem = self.root.find(".//FB2:book-title", FictionBook.NAMESPACE)
+        first_name_elem = self.root.find(".//FB2:first-name", FictionBook.NAMESPACE)
+        last_name_elem = self.root.find(".//FB2:last-name", FictionBook.NAMESPACE)
+        date_elem = self.root.find(".//FB2:date", FictionBook.NAMESPACE)
+        identifier_elem = self.root.find(".//FB2:id", FictionBook.NAMESPACE)
+
+        author = first_name_elem.text if first_name_elem is not None else None
+        if last_name_elem is not None:
+            if author is not None and author != "":
+                author += f" {last_name_elem.text}"
+            else:
+                author = last_name_elem.text
+
+        return BookMetadata(
+            title=title_elem.text if title_elem is not None else None,
+            creator=author,
+            date=date_elem.text if date_elem is not None else None,
+            identifier=identifier_elem.text if identifier_elem is not None else None,
+        )
+
+    def initialize(self) -> None:
+        cont = ET.parse(self.file)
+        self.root = cont.getroot()
+
+        self.contents = tuple(self.root.findall("FB2:body/*", FictionBook.NAMESPACE))
+
+        # TODO
+        toc_entries: List[TocEntry] = []
+        for n, i in enumerate(self.contents):
+            title = i.find("FB2:title", FictionBook.NAMESPACE)
+            if title is not None:
+                toc_entries.append(
+                    TocEntry(label="".join(title.itertext()), content_index=n, section=None)
+                )
+        self.toc_entries = tuple(toc_entries)
+
+    def get_raw_text(self, node: Union[str, ET.Element]) -> str:
+        assert isinstance(node, ET.Element)
+        ET.register_namespace("", "http://www.gribuser.ru/xml/fictionbook/2.0")
+        # sys.exit(ET.tostring(node, encoding="utf8", method="html").decode("utf-8").replace("ns1:",""))
+        return ET.tostring(node, encoding="utf8", method="html").decode("utf-8").replace("ns1:", "")
+
+    def get_img_bytestr(self, imgid: str) -> Tuple[str, bytes]:
+        # TODO: test if image works
+        imgid = imgid.replace("#", "")
+        img_elem = self.root.find("*[@id='{}']".format(imgid))
+        assert img_elem is not None
+        imgtype = img_elem.get("content-type")
+        img_elem_text = img_elem.text
+        assert imgtype is not None
+        assert img_elem_text is not None
+        return imgid + "." + imgtype.split("/")[1], base64.b64decode(img_elem_text)
+
+    def cleanup(self) -> None:
+        return
diff --git a/src/epy_reader/ebooks/mobi.py b/src/epy_reader/ebooks/mobi.py
new file mode 100644
index 0000000..39f3be4
--- /dev/null
+++ b/src/epy_reader/ebooks/mobi.py
@@ -0,0 +1,69 @@
+import contextlib
+import os
+import shutil
+import tempfile
+import xml.etree.ElementTree as ET
+from typing import Tuple, Union
+
+from epy_reader.ebooks.epub import Epub
+from epy_reader.models import BookMetadata
+from epy_reader.tools import unpack_kindle_book
+
+
+class Mobi(Epub):
+    def __init__(self, filemobi: str):
+        self.path = os.path.abspath(filemobi)
+        self.file = tempfile.mkdtemp(prefix="epy-")
+
+        # populate these attribute
+        # by calling self.initialize()
+        self.root_filepath: str
+        self.root_dirpath: str
+
+    def get_meta(self) -> BookMetadata:
+        # why self.file.read(self.root_filepath) problematic
+        with open(os.path.join(self.root_dirpath, "content.opf")) as f:
+            content_opf = ET.parse(f)  # .getroot()
+        return Epub._get_metadata(content_opf)
+
+    def initialize(self) -> None:
+        assert isinstance(self.file, str)
+
+        with contextlib.redirect_stdout(None):
+            unpack_kindle_book(self.path, self.file, epubver="A", use_hd=True)
+            # TODO: add cleanup here
+
+        self.root_dirpath = os.path.join(self.file, "mobi7")
+        self.toc_path = os.path.join(self.root_dirpath, "toc.ncx")
+        version = "2.0"
+
+        with open(os.path.join(self.root_dirpath, "content.opf")) as f:
+            content_opf = ET.parse(f)  # .getroot()
+
+        contents = Epub._get_contents(content_opf)
+        self.contents = tuple(os.path.join(self.root_dirpath, content) for content in contents)
+
+        with open(self.toc_path) as f:
+            toc = ET.parse(f).getroot()
+        self.toc_entries = Epub._get_tocs(toc, version, contents)  # *self.contents (absolute path)
+
+    def get_raw_text(self, content_path: Union[str, ET.Element]) -> str:
+        assert isinstance(content_path, str)
+        with open(content_path, encoding="utf8") as f:
+            content = f.read()
+        # return content.decode("utf-8")
+        return content
+
+    def get_img_bytestr(self, impath: str) -> Tuple[str, bytes]:
+        # TODO: test on windows
+        # if impath "Images/asdf.png" is problematic
+        image_abspath = os.path.join(self.root_dirpath, impath)
+        image_abspath = os.path.normpath(image_abspath)  # handle crossplatform path
+        with open(image_abspath, "rb") as f:
+            src = f.read()
+        return impath, src
+
+    def cleanup(self) -> None:
+        assert isinstance(self.file, str)
+        shutil.rmtree(self.file)
+        return
diff --git a/src/epy_reader/ebooks/url.py b/src/epy_reader/ebooks/url.py
new file mode 100644
index 0000000..4356fa7
--- /dev/null
+++ b/src/epy_reader/ebooks/url.py
@@ -0,0 +1,49 @@
+from pathlib import PurePosixPath
+from typing import Tuple
+from urllib.error import HTTPError, URLError
+from urllib.parse import urljoin, urlparse
+from urllib.request import Request, urlopen
+
+from epy_reader import __version__
+from epy_reader.ebooks import Ebook
+from epy_reader.lib import is_url
+from epy_reader.models import BookMetadata
+
+
+class URL(Ebook):
+    _header = {
+        "User-Agent": f"epy/v{__version__}",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+        "Accept-Language": "en-US,en;q=0.8",
+    }
+
+    def __init__(self, url: str):
+        self.path = url
+        self.file = url
+        self.contents = ("_",)
+        self.toc_entries = tuple()
+
+    def get_meta(self) -> BookMetadata:
+        return BookMetadata()
+
+    def initialize(self) -> None:
+        try:
+            with urlopen(Request(self.path, headers=URL._header)) as response:
+                self.html = response.read().decode()
+        except HTTPError as e:
+            raise e
+        except URLError as e:
+            raise e
+
+    def get_raw_text(self, _) -> str:
+        return self.html
+
+    def get_img_bytestr(self, src: str) -> Tuple[str, bytes]:
+        image_url = src if is_url(src) else urljoin(self.path, src)
+        # TODO: catch error on request
+        with urlopen(Request(image_url, headers=URL._header)) as response:
+            byte_str = response.read()
+        return PurePosixPath(urlparse(src).path).name, byte_str
+
+    def cleanup(self) -> None:
+        return
diff --git a/src/epy_reader/lib.py b/src/epy_reader/lib.py
new file mode 100644
index 0000000..b010323
--- /dev/null
+++ b/src/epy_reader/lib.py
@@ -0,0 +1,63 @@
+from typing import Any, Optional, Tuple
+from urllib.parse import urljoin, urlparse
+
+
+def is_url(string: str) -> bool:
+    try:
+        tmp = urlparse(string)
+        return all([tmp.scheme, tmp.netloc])
+    except ValueError:
+        return False
+
+
+def coerce_to_int(string: str) -> Optional[int]:
+    try:
+        return int(string)
+    except ValueError:
+        return None
+
+
+def truncate(teks: str, subtitution_text: str, maxlen: int, startsub: int = 0) -> str:
+    """
+    Truncate text
+
+    eg.
+    :param teks: 'This is long silly dummy text'
+    :param subtitution_text:  '...'
+    :param maxlen: 12
+    :param startsub: 3
+    :return: 'This...ly dummy text'
+    """
+    if startsub > maxlen:
+        raise ValueError("Var startsub cannot be bigger than maxlen.")
+    elif len(teks) <= maxlen:
+        return teks
+    else:
+        lensu = len(subtitution_text)
+        beg = teks[:startsub]
+        mid = (
+            subtitution_text
+            if lensu <= maxlen - startsub
+            else subtitution_text[: maxlen - startsub]
+        )
+        end = teks[startsub + lensu - maxlen :] if lensu < maxlen - startsub else ""
+        return beg + mid + end
+
+
+def tuple_subtract(tuple_one: Tuple[Any, ...], tuple_two: Tuple[Any, ...]) -> Tuple[Any, ...]:
+    """
+    Returns tuple with members in tuple_one
+    but not in tuple_two
+    """
+    return tuple(i for i in tuple_one if i not in tuple_two)
+
+
+def resolve_path(current_dir: str, relative_path: str) -> str:
+    """
+    Resolve path containing dots
+    eg. '/foo/bar/book.html' + '../img.png' = '/foo/img.png'
+    NOTE: '/' suffix is important to tell that current dir in 'bar'
+    """
+    # can also using os.path.normpath()
+    # but if the image in zipfile then posix path is mandatory
+    return urljoin(current_dir, relative_path)
diff --git a/src/epy_reader/models.py b/src/epy_reader/models.py
new file mode 100644
index 0000000..db4701b
--- /dev/null
+++ b/src/epy_reader/models.py
@@ -0,0 +1,232 @@
+import os
+from dataclasses import dataclass
+from datetime import datetime
+from enum import Enum
+from typing import Any, Mapping, Optional, Tuple, Union
+
+
+class Direction(Enum):
+    FORWARD = "forward"
+    BACKWARD = "backward"
+
+
+@dataclass(frozen=True)
+class BookMetadata:
+    title: Optional[str] = None
+    creator: Optional[str] = None
+    description: Optional[str] = None
+    publisher: Optional[str] = None
+    date: Optional[str] = None
+    language: Optional[str] = None
+    format: Optional[str] = None
+    identifier: Optional[str] = None
+    source: Optional[str] = None
+
+
+@dataclass(frozen=True)
+class LibraryItem:
+    last_read: datetime
+    filepath: str
+    title: Optional[str] = None
+    author: Optional[str] = None
+    reading_progress: Optional[float] = None
+
+    def __str__(self) -> str:
+        if self.reading_progress is None:
+            reading_progress_str = "N/A"
+        else:
+            reading_progress_str = f"{int(self.reading_progress * 100)}%"
+        reading_progress_str = reading_progress_str.rjust(4)
+
+        book_name: str
+        filename = self.filepath.replace(os.path.expanduser("~"), "~", 1)
+        if self.title is not None and self.author is not None:
+            book_name = f"{self.title} - {self.author} ({filename})"
+        elif self.title is None and self.author:
+            book_name = f"{filename} - {self.author}"
+        else:
+            book_name = filename
+
+        last_read_str = self.last_read.strftime("%I:%M%p %b %d")
+
+        return f"{reading_progress_str} {last_read_str}: {book_name}"
+
+
+@dataclass(frozen=True)
+class ReadingState:
+    """
+    Data model for reading state.
+
+    `row` has to be explicitly assigned with value
+    because Seamless feature needs it to adjust from
+    relative (to book's content index) row to absolute
+    (to book's entire content) row.
+
+    `rel_pctg` and `section` default to None and if
+    either of them is assigned with value, then it
+    will be overriding the `row` value.
+    """
+
+    content_index: int
+    textwidth: int
+    row: int
+    rel_pctg: Optional[float] = None
+    section: Optional[str] = None
+
+
+@dataclass(frozen=True)
+class SearchData:
+    direction: Direction = Direction.FORWARD
+    value: str = ""
+
+
+@dataclass(frozen=True)
+class LettersCount:
+    """
+    all: total letters in book
+    cumulative: list of total letters for previous contents
+                eg. let's say cumulative = (0, 50, 89, ...) it means
+                    0  is total cumulative letters of book contents[-1] to contents[0]
+                    50 is total cumulative letters of book contents[0] to contents[1]
+                    89 is total cumulative letters of book contents[0] to contents[2]
+    """
+
+    all: int
+    cumulative: Tuple[int, ...]
+
+
+@dataclass(frozen=True)
+class CharPos:
+    """
+    Describes character position in text.
+    eg. ["Lorem ipsum dolor sit amet,",  # row=0
+         "consectetur adipiscing elit."]  # row=1
+             ^CharPos(row=1, col=3)
+    """
+
+    row: int
+    col: int
+
+
+@dataclass(frozen=True)
+class TextMark:
+    """
+    Describes marking in text.
+    eg. Interval [CharPos(row=0, col=3), CharPos(row=1, col=4)]
+    notice the marking inclusive [] for both side instead of right exclusive [)
+    """
+
+    start: CharPos
+    end: Optional[CharPos] = None
+
+    def is_valid(self) -> bool:
+        """
+        Assert validity and check if the mark is unterminated
+        eg. <div><i>This is italic text</div>
+        Missing </i> tag
+        """
+        if self.end is not None:
+            if self.start.row == self.end.row:
+                return self.start.col <= self.end.col
+            else:
+                return self.start.row < self.end.row
+
+        return False
+
+
+@dataclass(frozen=True)
+class TextSpan:
+    """
+    Like TextMark but using span of letters (n_letters)
+    """
+
+    start: CharPos
+    n_letters: int
+
+
+@dataclass(frozen=True)
+class InlineStyle:
+    """
+    eg. InlineStyle(attr=curses.A_BOLD, row=3, cols=4, n_letters=3)
+    """
+
+    row: int
+    col: int
+    n_letters: int
+    attr: int
+
+
+@dataclass(frozen=True)
+class TocEntry:
+    label: str
+    content_index: int
+    section: Optional[str]
+
+
+@dataclass(frozen=True)
+class TextStructure:
+    """
+    Object that describes how the text
+    should be displayed in screen.
+
+    text_lines: ("list of lines", "of text", ...)
+    image_maps: {line_num: path/to/image/in/ebook/zip}
+    section_rows: {section_id: line_num}
+    formatting: (InlineStyle, ...)
+    """
+
+    text_lines: Tuple[str, ...]
+    image_maps: Mapping[int, str]
+    section_rows: Mapping[str, int]
+    formatting: Tuple[InlineStyle, ...]
+
+
+@dataclass(frozen=True)
+class NoUpdate:
+    pass
+
+
+class Key:
+    """
+    Because ord("k") chr(34) are confusing
+    """
+
+    def __init__(self, char_or_int: Union[str, int]):
+        self.value: int = char_or_int if isinstance(char_or_int, int) else ord(char_or_int)
+        self.char: str = char_or_int if isinstance(char_or_int, str) else chr(char_or_int)
+
+    def __eq__(self, other: Any) -> bool:
+        if isinstance(other, Key):
+            return self.value == other.value
+        return False
+
+    def __ne__(self, other: Any) -> bool:
+        return self.__eq__(other)
+
+    def __hash__(self) -> int:
+        return hash(self.value)
+
+
+class AppData:
+    @property
+    def prefix(self) -> Optional[str]:
+        """Return None if there exists no homedir | userdir"""
+        prefix: Optional[str] = None
+
+        # UNIX filesystem
+        homedir = os.getenv("HOME")
+        # WIN filesystem
+        userdir = os.getenv("USERPROFILE")
+
+        if homedir:
+            if os.path.isdir(os.path.join(homedir, ".config")):
+                prefix = os.path.join(homedir, ".config", "epy")
+            else:
+                prefix = os.path.join(homedir, ".epy")
+        elif userdir:
+            prefix = os.path.join(userdir, ".epy")
+
+        if prefix:
+            os.makedirs(prefix, exist_ok=True)
+
+        return prefix
diff --git a/src/epy_reader/parser.py b/src/epy_reader/parser.py
new file mode 100644
index 0000000..6eced00
--- /dev/null
+++ b/src/epy_reader/parser.py
@@ -0,0 +1,421 @@
+import curses
+import dataclasses
+import re
+import textwrap
+from html import unescape
+from html.parser import HTMLParser
+from typing import Dict, List, Mapping, Optional, Sequence, Set, Tuple, Union
+from urllib.parse import unquote
+
+from epy_reader.models import CharPos, InlineStyle, TextMark, TextSpan, TextStructure
+
+
+class HTMLtoLines(HTMLParser):
+    para = {"p", "div"}
+    inde = {"q", "dt", "dd", "blockquote"}
+    pref = {"pre"}
+    bull = {"li"}
+    hide = {"script", "style", "head"}
+    ital = {"i", "em"}
+    bold = {"b", "strong"}
+    # hide = {"script", "style", "head", ", "sub}
+    # sup_lookup = "⁰¹²³⁴⁵⁶⁷⁸⁹"
+    # sub_lookup = "₀₁₂₃₄₅₆₇₈₉"
+
+    attr_bold = curses.A_BOLD
+    try:
+        attr_italic = curses.A_ITALIC
+    except AttributeError:
+        try:
+            attr_italic = curses.A_UNDERLINE
+        except AttributeError:
+            attr_italic = curses.A_NORMAL
+
+    @staticmethod
+    def _mark_to_spans(text: Sequence[str], marks: Sequence[TextMark]) -> List[TextSpan]:
+        """
+        Convert text marks in line of text to per line text span.
+        Keeping duplicate spans.
+        """
+        spans: List[TextSpan] = []
+        for mark in marks:
+            if mark.is_valid():
+                # mypy issue, should be handled by mark.is_valid()
+                assert mark.end is not None
+                if mark.start.row == mark.end.row:
+                    spans.append(
+                        TextSpan(start=mark.start, n_letters=mark.end.col - mark.start.col)
+                    )
+                else:
+                    spans.append(
+                        TextSpan(
+                            start=mark.start, n_letters=len(text[mark.start.row]) - mark.start.col
+                        )
+                    )
+                    for nth_line in range(mark.start.row + 1, mark.end.row):
+                        spans.append(
+                            TextSpan(
+                                start=CharPos(row=nth_line, col=0), n_letters=len(text[nth_line])
+                            )
+                        )
+                    spans.append(
+                        TextSpan(start=CharPos(row=mark.end.row, col=0), n_letters=mark.end.col)
+                    )
+
+        return spans  # list(set(spans))
+
+    @staticmethod
+    def _adjust_wrapped_spans(
+        wrapped_lines: Sequence[str],
+        span: TextSpan,
+        *,
+        line_adjustment: int = 0,
+        left_adjustment: int = 0,
+    ) -> List[TextSpan]:
+        """
+        Adjust text span to wrapped lines.
+        Not perfect, but should be good enough considering
+        the limitation on commandline interface.
+        """
+
+        # current_row = span.start.row + line_adjustment
+        current_row = line_adjustment
+        start_col = span.start.col
+        end_col = start_col + span.n_letters
+
+        prev = 0  # chars length before current line
+        spans: List[TextSpan] = []
+        for n, line in enumerate(wrapped_lines):
+            # + 1 compensates textwrap.wrap(*args, replace_whitespace=True, drop_whitespace=True)
+            line_len = len(line) + 1
+            current = prev + line_len  # chars length before next line
+
+            # -:unmarked *:marked
+            # |------*****--------|
+            if start_col in range(prev, current) and end_col in range(prev, current):
+                spans.append(
+                    TextSpan(
+                        start=CharPos(row=current_row + n, col=start_col - prev + left_adjustment),
+                        n_letters=span.n_letters,
+                    )
+                )
+
+            # |----------*********|
+            elif start_col in range(prev, current):
+                spans.append(
+                    TextSpan(
+                        start=CharPos(row=current_row + n, col=start_col - prev + left_adjustment),
+                        n_letters=current - start_col - 1,  # -1: dropped whitespace
+                    )
+                )
+
+            # |********-----------|
+            elif end_col in range(prev, current):
+                spans.append(
+                    TextSpan(
+                        start=CharPos(row=current_row + n, col=0 + left_adjustment),
+                        n_letters=end_col - prev + 1,  # +1: dropped whitespace
+                    )
+                )
+
+            # |*******************|
+            elif prev in range(start_col, end_col) and current in range(start_col, end_col):
+                spans.append(
+                    TextSpan(
+                        start=CharPos(row=current_row + n, col=0 + left_adjustment),
+                        n_letters=line_len - 1,  # -1: dropped whitespace
+                    )
+                )
+
+            elif prev > end_col:
+                break
+
+            prev = current
+
+        return spans
+
+    @staticmethod
+    def _group_spans_by_row(blocks: Sequence[TextSpan]) -> Mapping[int, List[TextSpan]]:
+        groups: Dict[int, List[TextSpan]] = {}
+        for block in blocks:
+            row = block.start.row
+            if row in groups:
+                groups[row].append(block)
+            else:
+                groups[row] = [block]
+        return groups
+
+    def __init__(self, sects={""}):
+        HTMLParser.__init__(self)
+        self.text = [""]
+        self.ishead = False
+        self.isinde = False
+        self.isbull = False
+        self.ispref = False
+        self.ishidden = False
+        self.idhead = set()
+        self.idinde = set()
+        self.idbull = set()
+        self.idpref = set()
+        self.idimgs = set()
+        self.sects = sects
+        self.sectsindex = {}
+        self.italic_marks: List[TextMark] = []
+        self.bold_marks: List[TextMark] = []
+        self.imgs: Dict[int, str] = dict()
+
+    def handle_starttag(self, tag, attrs):
+        if re.match("h[1-6]", tag) is not None:
+            self.ishead = True
+        elif tag in self.inde:
+            self.isinde = True
+        elif tag in self.pref:
+            self.ispref = True
+        elif tag in self.bull:
+            self.isbull = True
+        elif tag in self.hide:
+            self.ishidden = True
+        elif tag == "sup":
+            self.text[-1] += "^{"
+        elif tag == "sub":
+            self.text[-1] += "_{"
+        # NOTE: "img" and "image"
+        # In HTML, both are startendtag (no need endtag)
+        # but in XHTML both need endtag
+        elif tag in {"img", "image"}:
+            for i in attrs:
+                if (tag == "img" and i[0] == "src") or (tag == "image" and i[0].endswith("href")):
+                    this_line = len(self.text)
+                    self.idimgs.add(this_line)
+                    self.imgs[this_line] = unquote(i[1])
+                    self.text.append("[IMAGE]")
+        # formatting
+        elif tag in self.ital:
+            if len(self.italic_marks) == 0 or self.italic_marks[-1].is_valid():
+                char_pos = CharPos(row=len(self.text) - 1, col=len(self.text[-1]))
+                self.italic_marks.append(TextMark(start=char_pos))
+        elif tag in self.bold:
+            if len(self.bold_marks) == 0 or self.bold_marks[-1].is_valid():
+                char_pos = CharPos(row=len(self.text) - 1, col=len(self.text[-1]))
+                self.bold_marks.append(TextMark(start=char_pos))
+        if self.sects != {""}:
+            for i in attrs:
+                if i[0] == "id" and i[1] in self.sects:
+                    # self.text[-1] += " (#" + i[1] + ") "
+                    # self.sectsindex.append([len(self.text), i[1]])
+                    self.sectsindex[len(self.text) - 1] = i[1]
+
+    def handle_startendtag(self, tag, attrs):
+        if tag == "br":
+            self.text += [""]
+        elif tag in {"img", "image"}:
+            for i in attrs:
+                #  if (tag == "img" and i[0] == "src")\
+                #     or (tag == "image" and i[0] == "xlink:href"):
+                if (tag == "img" and i[0] == "src") or (tag == "image" and i[0].endswith("href")):
+                    this_line = len(self.text)
+                    self.idimgs.add(this_line)
+                    self.imgs[this_line] = unquote(i[1])
+                    self.text.append("[IMAGE]")
+                    self.text.append("")
+        # sometimes attribute "id" is inside "startendtag"
+        # especially html from mobi module (kindleunpack fork)
+        if self.sects != {""}:
+            for i in attrs:
+                if i[0] == "id" and i[1] in self.sects:
+                    # self.text[-1] += " (#" + i[1] + ") "
+                    self.sectsindex[len(self.text) - 1] = i[1]
+
+    def handle_endtag(self, tag):
+        if re.match("h[1-6]", tag) is not None:
+            self.text.append("")
+            self.text.append("")
+            self.ishead = False
+        elif tag in self.para:
+            self.text.append("")
+        elif tag in self.hide:
+            self.ishidden = False
+        elif tag in self.inde:
+            if self.text[-1] != "":
+                self.text.append("")
+            self.isinde = False
+        elif tag in self.pref:
+            if self.text[-1] != "":
+                self.text.append("")
+            self.ispref = False
+        elif tag in self.bull:
+            if self.text[-1] != "":
+                self.text.append("")
+            self.isbull = False
+        elif tag in {"sub", "sup"}:
+            self.text[-1] += "}"
+        elif tag in {"img", "image"}:
+            self.text.append("")
+        # formatting
+        elif tag in self.ital:
+            char_pos = CharPos(row=len(self.text) - 1, col=len(self.text[-1]))
+            last_mark = self.italic_marks[-1]
+            self.italic_marks[-1] = dataclasses.replace(last_mark, end=char_pos)
+        elif tag in self.bold:
+            char_pos = CharPos(row=len(self.text) - 1, col=len(self.text[-1]))
+            last_mark = self.bold_marks[-1]
+            self.bold_marks[-1] = dataclasses.replace(last_mark, end=char_pos)
+
+    def handle_data(self, raw):
+        if raw and not self.ishidden:
+            if self.text[-1] == "":
+                tmp = raw.lstrip()
+            else:
+                tmp = raw
+            if self.ispref:
+                line = unescape(tmp)
+            else:
+                line = unescape(re.sub(r"\s+", " ", tmp))
+            self.text[-1] += line
+            if self.ishead:
+                self.idhead.add(len(self.text) - 1)
+            elif self.isbull:
+                self.idbull.add(len(self.text) - 1)
+            elif self.isinde:
+                self.idinde.add(len(self.text) - 1)
+            elif self.ispref:
+                self.idpref.add(len(self.text) - 1)
+
+    def get_structured_text(
+        self, textwidth: Optional[int] = 0, starting_line: int = 0
+    ) -> Union[Tuple[str, ...], TextStructure]:
+
+        if not textwidth:
+            return tuple(self.text)
+
+        text: List[str] = []
+        images: Dict[int, str] = dict()  # {line_num: path/in/zip}
+        sect: Dict[str, int] = dict()  # {section_id: line_num}
+        formatting: List[InlineStyle] = []
+
+        italic_spans: List[TextSpan] = HTMLtoLines._mark_to_spans(self.text, self.italic_marks)
+        bold_spans: List[TextSpan] = HTMLtoLines._mark_to_spans(self.text, self.bold_marks)
+        italic_groups = HTMLtoLines._group_spans_by_row(italic_spans)
+        bold_groups = HTMLtoLines._group_spans_by_row(bold_spans)
+
+        for n, line in enumerate(self.text):
+
+            startline = len(text)
+            # findsect = re.search(r"(?<= \(#).*?(?=\) )", line)
+            # if findsect is not None and findsect.group() in self.sects:
+            # line = line.replace(" (#" + findsect.group() + ") ", "")
+            # # line = line.replace(" (#" + findsect.group() + ") ", " "*(5+len(findsect.group())))
+            # sect[findsect.group()] = len(text)
+            if n in self.sectsindex.keys():
+                sect[self.sectsindex[n]] = starting_line + len(text)
+            if n in self.idhead:
+                # text += [line.rjust(textwidth // 2 + len(line) // 2)] + [""]
+                text += [line.center(textwidth)] + [""]
+                formatting += [
+                    InlineStyle(
+                        row=starting_line + i, col=0, n_letters=len(text[i]), attr=self.attr_bold
+                    )
+                    for i in range(startline, len(text))
+                ]
+            elif n in self.idinde:
+                text += ["   " + i for i in textwrap.wrap(line, textwidth - 3)] + [""]
+            elif n in self.idbull:
+                tmp = textwrap.wrap(line, textwidth - 3)
+                text += [" - " + i if i == tmp[0] else "   " + i for i in tmp] + [""]
+            elif n in self.idpref:
+                tmp = line.splitlines()
+                wraptmp = []
+                for tmp_line in tmp:
+                    wraptmp += [i for i in textwrap.wrap(tmp_line, textwidth - 6)]
+                text += ["   " + i for i in wraptmp] + [""]
+            elif n in self.idimgs:
+                images[starting_line + len(text)] = self.imgs[n]
+                text += [line.center(textwidth)]
+                formatting += [
+                    InlineStyle(
+                        row=starting_line + len(text) - 1,
+                        col=0,
+                        n_letters=len(text[-1]),
+                        attr=self.attr_bold,
+                    )
+                ]
+                text += [""]
+            else:
+                text += textwrap.wrap(line, textwidth) + [""]
+
+            endline = len(text)  # -1
+
+            left_adjustment = 3 if n in self.idbull | self.idinde else 0
+
+            for spans in italic_groups.get(n, []):
+                italics = HTMLtoLines._adjust_wrapped_spans(
+                    text[startline:endline],
+                    spans,
+                    line_adjustment=startline,
+                    left_adjustment=left_adjustment,
+                )
+                for span in italics:
+                    formatting.append(
+                        InlineStyle(
+                            row=starting_line + span.start.row,
+                            col=span.start.col,
+                            n_letters=span.n_letters,
+                            attr=self.attr_italic,
+                        )
+                    )
+
+            for spans in bold_groups.get(n, []):
+                bolds = HTMLtoLines._adjust_wrapped_spans(
+                    text[startline:endline],
+                    spans,
+                    line_adjustment=startline,
+                    left_adjustment=left_adjustment,
+                )
+                for span in bolds:
+                    formatting.append(
+                        InlineStyle(
+                            row=starting_line + span.start.row,
+                            col=span.start.col,
+                            n_letters=span.n_letters,
+                            attr=self.attr_bold,
+                        )
+                    )
+
+        # chapter suffix
+        text += ["***".center(textwidth)]
+
+        return TextStructure(
+            text_lines=tuple(text),
+            image_maps=images,
+            section_rows=sect,
+            formatting=tuple(formatting),
+        )
+
+
+def parse_html(
+    html_src: str,
+    *,
+    textwidth: Optional[int] = None,
+    section_ids: Optional[Set[str]] = None,
+    starting_line: int = 0,
+) -> Union[Tuple[str, ...], TextStructure]:
+    """
+    Parse html string into TextStructure
+
+    :param html_src: html str to parse
+    :param textwidth: textwidth to count max length of returned TextStructure
+                      if None given, sequence of text as paragraph is returned
+    :param section_ids: set of section ids to look for inside html tag attr
+    :return: Tuple[str, ...] if textwidth not given else TextStructure
+    """
+    if not section_ids:
+        section_ids = set()
+
+    parser = HTMLtoLines(section_ids)
+    # try:
+    parser.feed(html_src)
+    parser.close()
+    # except:
+    #     pass
+
+    return parser.get_structured_text(textwidth, starting_line)
diff --git a/src/epy_reader/reader.py b/src/epy_reader/reader.py
new file mode 100644
index 0000000..a903b62
--- /dev/null
+++ b/src/epy_reader/reader.py
@@ -0,0 +1,1610 @@
+import curses
+import dataclasses
+import multiprocessing
+import os
+import re
+import shutil
+import signal
+import sqlite3
+import subprocess
+import sys
+import tempfile
+import uuid
+import xml.etree.ElementTree as ET
+from html import unescape
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+
+import epy_reader.settings as settings
+from epy_reader.board import InfiniBoard
+from epy_reader.config import Config
+from epy_reader.ebooks import Azw, Ebook, Epub, Mobi
+from epy_reader.lib import resolve_path
+from epy_reader.models import (
+    Direction,
+    InlineStyle,
+    Key,
+    LettersCount,
+    NoUpdate,
+    ReadingState,
+    SearchData,
+    TextStructure,
+    TocEntry,
+)
+from epy_reader.parser import parse_html
+from epy_reader.settings import DoubleSpreadPadding
+from epy_reader.speakers import SpeakerBaseModel
+from epy_reader.state import State
+from epy_reader.utils import (
+    choice_win,
+    construct_relative_reading_state,
+    construct_speaker,
+    count_letters,
+    count_letters_parallel,
+    find_current_content_index,
+    get_ebook_obj,
+    merge_text_structures,
+    pgend,
+    safe_curs_set,
+    text_win,
+)
+
+
+# TODO: to be deprecated
+DEBUG = False
+
+
+class Reader:
+    def __init__(self, screen, ebook: Ebook, config: Config, state: State):
+
+        self.setting = config.setting
+        self.keymap = config.keymap
+        # to build help menu text
+        self.keymap_user_dict = config.keymap_user_dict
+
+        self.seamless = self.setting.SeamlessBetweenChapters
+
+        # keys that will make
+        # windows exit and return the said key
+        self._win_keys = (
+            # curses.KEY_RESIZE is a must
+            (Key(curses.KEY_RESIZE),)
+            + self.keymap.TableOfContents
+            + self.keymap.Metadata
+            + self.keymap.Help
+        )
+
+        # screen initialization
+        self.screen = screen
+        self.screen.keypad(True)
+        safe_curs_set(0)
+        if self.setting.MouseSupport:
+            curses.mousemask(-1)
+        # curses.mouseinterval(0)
+        self.screen.clear()
+
+        # screen color
+        self.is_color_supported: bool = False
+        try:
+            curses.use_default_colors()
+            curses.init_pair(1, self.setting.DefaultColorFG, self.setting.DefaultColorBG)
+            curses.init_pair(2, self.setting.DarkColorFG, self.setting.DarkColorBG)
+            curses.init_pair(3, self.setting.LightColorFG, self.setting.LightColorBG)
+            self.screen.bkgd(curses.color_pair(1))
+            self.is_color_supported = True
+        except:
+            self.is_color_supported = False
+
+        # show loader and start heavy resources processes
+        self.show_loader(subtext="initalizing ebook")
+
+        # main ebook object
+        self.ebook = ebook
+        try:
+            self.ebook.initialize()
+        except (KeyboardInterrupt, Exception) as e:
+            self.ebook.cleanup()
+            if DEBUG:
+                raise e
+            else:
+                sys.exit("ERROR: Badly-structured ebook.\n" + str(e))
+
+        # state
+        self.state = state
+
+        # page scroll animation
+        self.page_animation: Optional[Direction] = None
+
+        # show reading progress
+        self.show_reading_progress: bool = self.setting.ShowProgressIndicator
+        self.reading_progress: Optional[float] = None  # calculate after count_letters()
+
+        # search storage
+        self.search_data: Optional[SearchData] = None
+
+        # double spread
+        self.spread = 2 if self.setting.StartWithDoubleSpread else 1
+
+        # jumps marker container
+        self.jump_list: Dict[str, ReadingState] = dict()
+
+        # TTS speaker utils
+        self._tts_speaker: Optional[SpeakerBaseModel] = construct_speaker(
+            self.setting.PreferredTTSEngine, self.setting.TTSEngineArgs
+        )
+        self.tts_support: bool = bool(self._tts_speaker)
+        self.is_speaking: bool = False
+
+        # multi process & progress percentage
+        self._multiprocess_support: bool = False if multiprocessing.cpu_count() == 1 else True
+        self._process_counting_letter: Optional[multiprocessing.Process] = None
+        self.letters_count: Optional[LettersCount] = None
+
+    def run_counting_letters(self):
+        if self._multiprocess_support:
+            try:
+                self._proc_parent, self._proc_child = multiprocessing.Pipe()
+                self._process_counting_letter = multiprocessing.Process(
+                    name="epy-subprocess-counting-letters",
+                    target=count_letters_parallel,
+                    args=(self.ebook, self._proc_child),
+                )
+                # forking will raise
+                # zlib.error: Error -3 while decompressing data: invalid distance too far back
+                self._process_counting_letter.start()
+            except Exception as e:
+                if DEBUG:
+                    raise e
+                self._multiprocess_support = False
+        if not self._multiprocess_support:
+            self.letters_count = count_letters(self.ebook)
+
+    def try_assign_letters_count(self, *, force_wait=False) -> None:
+        if isinstance(self._process_counting_letter, multiprocessing.Process):
+            if force_wait and self._process_counting_letter.is_alive():
+                self._process_counting_letter.join()
+
+            if self._process_counting_letter.exitcode == 0:
+                self.letters_count = self._proc_parent.recv()
+                self._proc_parent.close()
+                self._process_counting_letter.terminate()
+                self._process_counting_letter.close()
+                self._process_counting_letter = None
+
+    def calculate_reading_progress(
+        self, letters_per_content: List[int], reading_state: ReadingState
+    ) -> None:
+        if self.letters_count:
+            self.reading_progress = (
+                self.letters_count.cumulative[reading_state.content_index]
+                + sum(
+                    letters_per_content[: reading_state.row + (self.screen_rows * self.spread) - 1]
+                )
+            ) / self.letters_count.all
+
+    @property
+    def screen_rows(self) -> int:
+        return self.screen.getmaxyx()[0]
+
+    @property
+    def screen_cols(self) -> int:
+        return self.screen.getmaxyx()[1]
+
+    @property
+    def ext_dict_app(self) -> Optional[str]:
+        self._ext_dict_app: Optional[str] = None
+
+        if shutil.which(self.setting.DictionaryClient.split()[0]):
+            self._ext_dict_app = self.setting.DictionaryClient
+        else:
+            for i in settings.DICT_PRESET_LIST:
+                if shutil.which(i) is not None:
+                    self._ext_dict_app = i
+                    break
+            if self._ext_dict_app in {"sdcv"}:
+                self._ext_dict_app += " -n"
+
+        return self._ext_dict_app
+
+    @property
+    def image_viewer(self) -> Optional[str]:
+        self._image_viewer: Optional[str] = None
+
+        if shutil.which(self.setting.DefaultViewer.split()[0]) is not None:
+            self._image_viewer = self.setting.DefaultViewer
+        elif sys.platform == "win32":
+            self._image_viewer = "start"
+        elif sys.platform == "darwin":
+            self._image_viewer = "open"
+        else:
+            for i in settings.VIEWER_PRESET_LIST:
+                if shutil.which(i) is not None:
+                    self._image_viewer = i
+                    break
+
+        if self._image_viewer in {"gio"}:
+            self._image_viewer += " open"
+
+        return self._image_viewer
+
+    def open_image(self, pad, name, bstr):
+        sfx = os.path.splitext(name)[1]
+        fd, path = tempfile.mkstemp(suffix=sfx)
+        try:
+            with os.fdopen(fd, "wb") as tmp:
+                # tmp.write(epub.file.read(src))
+                tmp.write(bstr)
+            # run(VWR + " " + path, shell=True)
+            subprocess.call(
+                self.image_viewer + " " + path,
+                shell=True,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+            k = pad.getch()
+        finally:
+            os.remove(path)
+        return k
+
+    def show_loader(self, *, loader_str: str = "\u231B", subtext: Optional[str] = None):
+        self.screen.clear()
+        rows, cols = self.screen.getmaxyx()
+        middle_row = (rows - 1) // 2
+        self.screen.addstr(middle_row, 0, loader_str.center(cols))
+        if subtext:
+            self.screen.addstr(middle_row + 1, 0, subtext.center(cols))
+        # self.screen.addstr(((rows-2)//2)+1, (cols-len(msg))//2, msg)
+        self.screen.refresh()
+
+    @choice_win(True)
+    def show_win_options(self, title, options, active_index, key_set):
+        return title, options, active_index, key_set
+
+    @text_win
+    def show_win_error(self, title, msg, key):
+        return title, msg, key
+
+    @choice_win()
+    def toc(self, toc_entries: Tuple[TocEntry, ...], index: int):
+        return (
+            "Table of Contents",
+            [i.label for i in toc_entries],
+            index,
+            self.keymap.TableOfContents,
+        )
+
+    @text_win
+    def show_win_metadata(self):
+        if os.path.isfile(self.ebook.path):
+            mdata = "[File Info]\nPATH: {}\nSIZE: {} MB\n \n[Book Info]\n".format(
+                self.ebook.path, round(os.path.getsize(self.ebook.path) / 1024**2, 2)
+            )
+        else:
+            mdata = "[File Info]\nPATH: {}\n \n[Book Info]\n".format(self.ebook.path)
+
+        book_metadata = self.ebook.get_meta()
+        for field in dataclasses.fields(book_metadata):
+            value = getattr(book_metadata, field.name)
+            if value:
+                value = unescape(re.sub("<[^>]*>", "", value))
+                mdata += f"{field.name.title()}: {value}\n"
+
+        return "Metadata", mdata, self.keymap.Metadata
+
+    @text_win
+    def show_win_help(self):
+        src = "Key Bindings:\n"
+        dig = max([len(i) for i in self.keymap_user_dict.values()]) + 2
+        for i in self.keymap_user_dict.keys():
+            src += "{}  {}\n".format(
+                self.keymap_user_dict[i].rjust(dig), " ".join(re.findall("[A-Z][^A-Z]*", i))
+            )
+        return "Help", src, self.keymap.Help
+
+    @text_win
+    def define_word(self, word):
+        rows, cols = self.screen.getmaxyx()
+        hi, wi = 5, 16
+        Y, X = (rows - hi) // 2, (cols - wi) // 2
+
+        p = subprocess.Popen(
+            "{} {}".format(self.ext_dict_app, word),
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            shell=True,
+        )
+
+        dictwin = curses.newwin(hi, wi, Y, X)
+        dictwin.box()
+        dictwin.addstr((hi - 1) // 2, (wi - 10) // 2, "Loading...")
+        dictwin.refresh()
+
+        out, err = p.communicate()
+
+        dictwin.clear()
+        dictwin.refresh()
+
+        if err == b"":
+            return "Definition: " + word.upper(), out.decode(), self.keymap.DefineWord
+        else:
+            return "Error: " + self.ext_dict_app, err.decode(), self.keymap.DefineWord
+
+    def show_win_choices_bookmarks(self):
+        idx = 0
+        while True:
+            bookmarks = [i[0] for i in self.state.get_bookmarks(self.ebook)]
+            if not bookmarks:
+                return self.keymap.ShowBookmarks[0], None
+
+            retk, idx, todel = self.show_win_options(
+                "Bookmarks", bookmarks, idx, self.keymap.ShowBookmarks
+            )
+            if todel is not None:
+                self.state.delete_bookmark(self.ebook, bookmarks[todel])
+            else:
+                return retk, idx
+
+    def show_win_library(self):
+        while True:
+            library_items = self.state.get_from_history()
+            if not library_items:
+                return self.keymap.Library[0], None
+
+            retk, choice_index, todel_index = self.show_win_options(
+                "Library", [str(item) for item in library_items], 0, self.keymap.Library
+            )
+            if todel_index is not None:
+                self.state.delete_from_library(library_items[todel_index].filepath)
+            else:
+                return retk, choice_index
+
+    def input_prompt(self, prompt: str) -> Union[NoUpdate, Key, str]:
+        """
+        :param prompt: prompt text
+        :return: NoUpdate if cancelled or interrupted
+                 Key if curses.KEY_RESIZE triggered
+                 str for successful input
+        """
+        # prevent pad hole when prompting for input while
+        # other window is active
+        # pad.refresh(y, 0, 0, x, rows-2, x+width)
+        rows, cols = self.screen.getmaxyx()
+        stat = curses.newwin(1, cols, rows - 1, 0)
+        if self.is_color_supported:
+            stat.bkgd(self.screen.getbkgd())
+        stat.keypad(True)
+        curses.echo(True)
+        safe_curs_set(2)
+
+        init_text = ""
+
+        stat.addstr(0, 0, prompt, curses.A_REVERSE)
+        stat.addstr(0, len(prompt), init_text)
+        stat.refresh()
+
+        try:
+            while True:
+                # NOTE: getch() only handles ascii
+                # to handle wide char like: é, use get_wch()
+                ipt = Key(stat.get_wch())
+                # get_wch() return ambiguous type
+                # str for string input but int for function or special keys
+                # if type(ipt) == str:
+                #     ipt = ord(ipt)
+
+                if ipt == Key(27):
+                    stat.clear()
+                    stat.refresh()
+                    curses.echo(False)
+                    safe_curs_set(0)
+                    return NoUpdate()
+                elif ipt == Key(10):
+                    stat.clear()
+                    stat.refresh()
+                    curses.echo(False)
+                    safe_curs_set(0)
+                    return init_text
+                elif ipt in (Key(8), Key(127), Key(curses.KEY_BACKSPACE)):
+                    init_text = init_text[:-1]
+                elif ipt == Key(curses.KEY_RESIZE):
+                    stat.clear()
+                    stat.refresh()
+                    curses.echo(False)
+                    safe_curs_set(0)
+                    return Key(curses.KEY_RESIZE)
+                # elif len(init_text) <= maxlen:
+                else:
+                    init_text += ipt.char
+
+                stat.clear()
+                stat.addstr(0, 0, prompt, curses.A_REVERSE)
+                stat.addstr(
+                    0,
+                    len(prompt),
+                    init_text
+                    if len(prompt + init_text) < cols
+                    else "..." + init_text[len(prompt) - cols + 4 :],
+                )
+                stat.refresh()
+        except KeyboardInterrupt:
+            stat.clear()
+            stat.refresh()
+            curses.echo(False)
+            safe_curs_set(0)
+            return NoUpdate()
+
+    def searching(
+        self, board: InfiniBoard, src: Sequence[str], reading_state: ReadingState, tot
+    ) -> Union[NoUpdate, ReadingState, Key]:
+        # reusable loop indices
+        i: Any
+        j: Any
+
+        rows, cols = self.screen.getmaxyx()
+        # unnecessary
+        # if self.spread == 2:
+        #     reading_state = dataclasses.replace(reading_state, textwidth=(cols - 7) // 2)
+
+        x = (cols - reading_state.textwidth) // 2
+        if self.spread == 1:
+            x = (cols - reading_state.textwidth) // 2
+        else:
+            x = 2
+
+        if not self.search_data:
+            candidate_text = self.input_prompt(" Regex:")
+            # if isinstance(candidate_text, str) and candidate_text != "":
+            if isinstance(candidate_text, str) and candidate_text:
+                self.search_data = SearchData(value=candidate_text)
+            else:
+                assert isinstance(candidate_text, NoUpdate) or isinstance(candidate_text, Key)
+                return candidate_text
+
+        found = []
+        try:
+            pattern = re.compile(self.search_data.value, re.IGNORECASE)
+        except re.error as reerrmsg:
+            self.search_data = None
+            tmpk = self.show_win_error("!Regex Error", str(reerrmsg), tuple())
+            return tmpk
+
+        for n, i in enumerate(src):
+            for j in pattern.finditer(i):
+                found.append([n, j.span()[0], j.span()[1] - j.span()[0]])
+
+        if not found:
+            if (
+                self.search_data.direction == Direction.FORWARD
+                and reading_state.content_index + 1 < tot
+            ):
+                return ReadingState(
+                    content_index=reading_state.content_index + 1,
+                    textwidth=reading_state.textwidth,
+                    row=0,
+                )
+            elif (
+                self.search_data.direction == Direction.BACKWARD and reading_state.content_index > 0
+            ):
+                return ReadingState(
+                    content_index=reading_state.content_index - 1,
+                    textwidth=reading_state.textwidth,
+                    row=0,
+                )
+            else:
+                s: Union[NoUpdate, Key] = NoUpdate()
+                while True:
+                    if s in self.keymap.Quit:
+                        self.search_data = None
+                        self.screen.clear()
+                        self.screen.refresh()
+                        return reading_state
+                    # TODO: maybe >= 0?
+                    elif s == Key("n") and reading_state.content_index == 0:
+                        self.search_data = dataclasses.replace(
+                            self.search_data, direction=Direction.FORWARD
+                        )
+                        return ReadingState(
+                            content_index=reading_state.content_index + 1,
+                            textwidth=reading_state.textwidth,
+                            row=0,
+                        )
+                    elif s == Key("N") and reading_state.content_index + 1 == tot:
+                        self.search_data = dataclasses.replace(
+                            self.search_data, direction=Direction.BACKWARD
+                        )
+                        return ReadingState(
+                            content_index=reading_state.content_index - 1,
+                            textwidth=reading_state.textwidth,
+                            row=0,
+                        )
+
+                    self.screen.clear()
+                    self.screen.addstr(
+                        rows - 1,
+                        0,
+                        " Finished searching: " + self.search_data.value[: cols - 22] + " ",
+                        curses.A_REVERSE,
+                    )
+                    board.write(reading_state.row, 1)
+                    self.screen.refresh()
+                    s = board.getch()
+
+        sidx = len(found) - 1
+        if self.search_data.direction == Direction.FORWARD:
+            if reading_state.row > found[-1][0]:
+                return ReadingState(
+                    content_index=reading_state.content_index + 1,
+                    textwidth=reading_state.textwidth,
+                    row=0,
+                )
+            for n, i in enumerate(found):
+                if i[0] >= reading_state.row:
+                    sidx = n
+                    break
+
+        s = NoUpdate()
+        msg = (
+            " Searching: "
+            + self.search_data.value
+            + " --- Res {}/{} Ch {}/{} ".format(
+                sidx + 1, len(found), reading_state.content_index + 1, tot
+            )
+        )
+        while True:
+            if s in self.keymap.Quit:
+                self.search_data = None
+                # for i in found:
+                #     pad.chgat(i[0], i[1], i[2], pad.getbkgd())
+                board.feed_temporary_style()
+                # pad.format()
+                # self.screen.clear()
+                # self.screen.refresh()
+                return reading_state
+            elif s == Key("n"):
+                self.search_data = dataclasses.replace(
+                    self.search_data, direction=Direction.FORWARD
+                )
+                if sidx == len(found) - 1:
+                    if reading_state.content_index + 1 < tot:
+                        return ReadingState(
+                            content_index=reading_state.content_index + 1,
+                            textwidth=reading_state.textwidth,
+                            row=0,
+                        )
+                    else:
+                        s = NoUpdate()
+                        msg = " Finished searching: " + self.search_data.value + " "
+                        continue
+                else:
+                    sidx += 1
+                    msg = (
+                        " Searching: "
+                        + self.search_data.value
+                        + " --- Res {}/{} Ch {}/{} ".format(
+                            sidx + 1, len(found), reading_state.content_index + 1, tot
+                        )
+                    )
+            elif s == Key("N"):
+                self.search_data = dataclasses.replace(
+                    self.search_data, direction=Direction.BACKWARD
+                )
+                if sidx == 0:
+                    if reading_state.content_index > 0:
+                        return ReadingState(
+                            content_index=reading_state.content_index - 1,
+                            textwidth=reading_state.textwidth,
+                            row=0,
+                        )
+                    else:
+                        s = NoUpdate()
+                        msg = " Finished searching: " + self.search_data.value + " "
+                        continue
+                else:
+                    sidx -= 1
+                    msg = (
+                        " Searching: "
+                        + self.search_data.value
+                        + " --- Res {}/{} Ch {}/{} ".format(
+                            sidx + 1, len(found), reading_state.content_index + 1, tot
+                        )
+                    )
+            elif s == Key(curses.KEY_RESIZE):
+                return Key(curses.KEY_RESIZE)
+
+            # if reading_state.row + rows - 1 > pad.chunks[pad.find_chunkidx(reading_state.row)]:
+            #     reading_state = dataclasses.replace(
+            #         reading_state, row=pad.chunks[pad.find_chunkidx(reading_state.row)] + 1
+            #     )
+
+            while found[sidx][0] not in list(
+                range(reading_state.row, reading_state.row + (rows - 1) * self.spread)
+            ):
+                if found[sidx][0] > reading_state.row:
+                    reading_state = dataclasses.replace(
+                        reading_state, row=reading_state.row + ((rows - 1) * self.spread)
+                    )
+                else:
+                    reading_state = dataclasses.replace(
+                        reading_state, row=reading_state.row - ((rows - 1) * self.spread)
+                    )
+                    if reading_state.row < 0:
+                        reading_state = dataclasses.replace(reading_state, row=0)
+
+            # formats = [InlineStyle(row=i[0], col=i[1], n_letters=i[2], attr=curses.A_REVERSE) for i in found]
+            # pad.feed_style(formats)
+            styles: List[InlineStyle] = []
+            for n, i in enumerate(found):
+                attr = curses.A_REVERSE if n == sidx else curses.A_NORMAL
+                # pad.chgat(i[0], i[1], i[2], pad.getbkgd() | attr)
+                styles.append(
+                    InlineStyle(row=i[0], col=i[1], n_letters=i[2], attr=board.getbkgd() | attr)
+                )
+            board.feed_temporary_style(tuple(styles))
+
+            self.screen.clear()
+            self.screen.addstr(rows - 1, 0, msg, curses.A_REVERSE)
+            self.screen.refresh()
+            # pad.refresh(reading_state.row, 0, 0, x, rows - 2, x + reading_state.textwidth)
+            board.write(reading_state.row, 1)
+            s = board.getch()
+
+    def speaking(self, text):
+        self.is_speaking = True
+        self.screen.addstr(self.screen_rows - 1, 0, " Speaking! ", curses.A_REVERSE)
+        self.screen.refresh()
+        self.screen.timeout(1)
+        try:
+            self._tts_speaker.speak(text)
+
+            while True:
+                if self._tts_speaker.is_done():
+                    k = self.keymap.PageDown[0]
+                    break
+                tmp = self.screen.getch()
+                k = NoUpdate() if tmp == -1 else Key(tmp)
+                if k == Key(curses.KEY_MOUSE):
+                    mouse_event = curses.getmouse()
+                    if mouse_event[4] == curses.BUTTON2_CLICKED:
+                        k = self.keymap.Quit[0]
+                    elif mouse_event[4] == curses.BUTTON1_CLICKED:
+                        if mouse_event[1] < self.screen_cols // 2:
+                            k = self.keymap.PageUp[0]
+                        else:
+                            k = self.keymap.PageDown[0]
+                    elif mouse_event[4] == curses.BUTTON4_PRESSED:
+                        k = self.keymap.ScrollUp[0]
+                    elif mouse_event[4] == 2097152:
+                        k = self.keymap.ScrollDown[0]
+                if (
+                    k
+                    in self.keymap.Quit
+                    + self.keymap.PageUp
+                    + self.keymap.PageDown
+                    + self.keymap.ScrollUp
+                    + self.keymap.ScrollDown
+                    + (curses.KEY_RESIZE,)
+                ):
+                    self._tts_speaker.stop()
+                    break
+        finally:
+            self.screen.timeout(-1)
+            self._tts_speaker.cleanup()
+
+        if k in self.keymap.Quit:
+            self.is_speaking = False
+            k = NoUpdate()
+        return k
+
+    def savestate(self, reading_state: ReadingState) -> None:
+        if self.seamless:
+            reading_state = self.convert_absolute_reading_state_to_relative(reading_state)
+        self.state.set_last_reading_state(self.ebook, reading_state)
+        self.state.update_library(self.ebook, self.reading_progress)
+
+    def cleanup(self) -> None:
+        self.ebook.cleanup()
+
+        if isinstance(self._process_counting_letter, multiprocessing.Process):
+            if self._process_counting_letter.is_alive():
+                self._process_counting_letter.terminate()
+                # weird python multiprocessing issue, need to call .join() before .close()
+                # ValueError: Cannot close a process while it is still running.
+                # You should first call join() or terminate().
+                self._process_counting_letter.join()
+                self._process_counting_letter.close()
+
+    def convert_absolute_reading_state_to_relative(self, reading_state) -> ReadingState:
+        if not self.seamless:
+            raise RuntimeError(
+                "Reader.convert_absolute_reading_state_to_relative() only implemented when Seamless=True"
+            )
+        return construct_relative_reading_state(reading_state, self.totlines_per_content)
+
+    def convert_relative_reading_state_to_absolute(
+        self, reading_state: ReadingState
+    ) -> ReadingState:
+        if not self.seamless:
+            raise RuntimeError(
+                "Reader.convert_relative_reading_state_to_absolute() only implemented when Seamless=True"
+            )
+
+        absolute_row = reading_state.row + sum(
+            self.totlines_per_content[: reading_state.content_index]
+        )
+        absolute_pctg = (
+            absolute_row / sum(self.totlines_per_content) if reading_state.rel_pctg else None
+        )
+
+        return dataclasses.replace(
+            reading_state, content_index=0, row=absolute_row, rel_pctg=absolute_pctg
+        )
+
+    def get_all_book_contents(
+        self, reading_state: ReadingState
+    ) -> Tuple[TextStructure, Tuple[TocEntry, ...], Union[Tuple[str, ...], Tuple[ET.Element, ...]]]:
+        if not self.seamless:
+            raise RuntimeError("Reader.get_all_book_contents() only implemented when Seamless=True")
+
+        contents = self.ebook.contents
+        toc_entries = self.ebook.toc_entries
+
+        text_structure: TextStructure = TextStructure(
+            text_lines=tuple(), image_maps=dict(), section_rows=dict(), formatting=tuple()
+        )
+        toc_entries_tmp: List[TocEntry] = []
+        section_rows_tmp: Dict[str, int] = dict()
+
+        # self.totlines_per_content only defined when Seamless=True
+        self.totlines_per_content: Tuple[int, ...] = tuple()
+
+        for n, content in enumerate(contents):
+            self.show_loader(subtext=f"loading contents ({n+1}/{len(contents)})")
+            starting_line = sum(self.totlines_per_content)
+            assert isinstance(content, str) or isinstance(content, ET.Element)
+            text_structure_tmp = parse_html(
+                self.ebook.get_raw_text(content),
+                textwidth=reading_state.textwidth,
+                section_ids=set(toc_entry.section for toc_entry in toc_entries),  # type: ignore
+                starting_line=starting_line,
+            )
+            assert isinstance(text_structure_tmp, TextStructure)
+            # self.totlines_per_content.append(len(text_structure_tmp.text_lines))
+            self.totlines_per_content += (len(text_structure_tmp.text_lines),)
+
+            for toc_entry in toc_entries:
+                if toc_entry.content_index == n:
+                    if toc_entry.section:
+                        toc_entries_tmp.append(dataclasses.replace(toc_entry, content_index=0))
+                    else:
+                        section_id_tmp = str(uuid.uuid4())
+                        toc_entries_tmp.append(
+                            TocEntry(label=toc_entry.label, content_index=0, section=section_id_tmp)
+                        )
+                        section_rows_tmp[section_id_tmp] = starting_line
+
+            text_structure = merge_text_structures(text_structure, text_structure_tmp)
+
+        text_structure = dataclasses.replace(
+            text_structure, section_rows={**text_structure.section_rows, **section_rows_tmp}
+        )
+
+        return text_structure, tuple(toc_entries_tmp), (self.ebook.contents[0],)
+
+    def get_current_book_content(
+        self, reading_state: ReadingState
+    ) -> Tuple[TextStructure, Tuple[TocEntry, ...], Union[Tuple[str, ...], Tuple[ET.Element, ...]]]:
+        contents = self.ebook.contents
+        toc_entries = self.ebook.toc_entries
+        content_path = contents[reading_state.content_index]
+        content = self.ebook.get_raw_text(content_path)
+        text_structure = parse_html(  # type: ignore
+            content,
+            textwidth=reading_state.textwidth,
+            section_ids=set(toc_entry.section for toc_entry in toc_entries),  # type: ignore
+        )
+        return text_structure, toc_entries, contents
+
+    def read(self, reading_state: ReadingState) -> Union[ReadingState, Ebook]:
+        # reusable loop indices
+        i: Any
+
+        k = self.keymap.RegexSearch[0] if self.search_data else NoUpdate()
+        rows, cols = self.screen.getmaxyx()
+
+        mincols_doublespr = (
+            DoubleSpreadPadding.LEFT.value
+            + 22
+            + DoubleSpreadPadding.MIDDLE.value
+            + 22
+            + DoubleSpreadPadding.RIGHT.value
+        )
+        if cols < mincols_doublespr:
+            self.spread = 1
+        if self.spread == 2:
+            reading_state = dataclasses.replace(
+                reading_state,
+                textwidth=(
+                    cols
+                    - sum(
+                        [
+                            DoubleSpreadPadding.LEFT.value,
+                            DoubleSpreadPadding.MIDDLE.value,
+                            DoubleSpreadPadding.RIGHT.value,
+                        ]
+                    )
+                )
+                // 2,
+            )
+        x = (cols - reading_state.textwidth) // 2
+        if self.spread == 2:
+            x = DoubleSpreadPadding.LEFT.value
+
+        self.show_loader(subtext="loading contents")
+        # get text structure, toc entries and contents of the book
+        if self.seamless:
+            text_structure, toc_entries, contents = self.get_all_book_contents(reading_state)
+            # adjustment
+            reading_state = self.convert_relative_reading_state_to_absolute(reading_state)
+        else:
+            text_structure, toc_entries, contents = self.get_current_book_content(reading_state)
+
+        totlines = len(text_structure.text_lines)
+
+        if reading_state.row < 0 and totlines <= rows * self.spread:
+            reading_state = dataclasses.replace(reading_state, row=0)
+        elif reading_state.rel_pctg is not None:
+            reading_state = dataclasses.replace(
+                reading_state, row=round(reading_state.rel_pctg * totlines)
+            )
+        else:
+            reading_state = dataclasses.replace(reading_state, row=reading_state.row % totlines)
+
+        board = InfiniBoard(
+            screen=self.screen,
+            text=text_structure.text_lines,
+            textwidth=reading_state.textwidth,
+            default_style=text_structure.formatting,
+            spread=self.spread,
+        )
+
+        letters_per_content: List[int] = []
+        for i in text_structure.text_lines:
+            letters_per_content.append(len(re.sub(r"\s", "", i)))
+
+        self.screen.clear()
+        self.screen.refresh()
+        # try-except clause if there is issue
+        # with curses resize event
+        board.write(reading_state.row)
+
+        # if reading_state.section is not None
+        # then override reading_state.row to follow the section
+        if reading_state.section:
+            reading_state = dataclasses.replace(
+                reading_state, row=text_structure.section_rows.get(reading_state.section, 0)
+            )
+
+        checkpoint_row: Optional[int] = None
+        countstring = ""
+
+        try:
+            while True:
+                if countstring == "":
+                    count = 1
+                else:
+                    count = int(countstring)
+                if k in tuple(Key(i) for i in range(48, 58)):  # i.e., k is a numeral
+                    countstring = countstring + k.char
+                else:
+                    if k in self.keymap.Quit:
+                        if k == Key(27) and countstring != "":
+                            countstring = ""
+                        else:
+                            self.try_assign_letters_count(force_wait=True)
+                            self.calculate_reading_progress(letters_per_content, reading_state)
+
+                            self.savestate(
+                                dataclasses.replace(
+                                    reading_state, rel_pctg=reading_state.row / totlines
+                                )
+                            )
+                            sys.exit()
+
+                    elif k in self.keymap.TTSToggle and self.tts_support:
+                        tospeak = ""
+                        for i in text_structure.text_lines[
+                            reading_state.row : reading_state.row + (rows * self.spread)
+                        ]:
+                            if re.match(r"^\s*$", i) is not None:
+                                tospeak += "\n. \n"
+                            else:
+                                tospeak += i + " "
+                        k = self.speaking(tospeak)
+                        if (
+                            totlines - reading_state.row <= rows
+                            and reading_state.content_index == len(contents) - 1
+                        ):
+                            self.is_speaking = False
+                        continue
+
+                    elif k in self.keymap.DoubleSpreadToggle:
+                        if cols < mincols_doublespr:
+                            k = self.show_win_error(
+                                "Screen is too small",
+                                "Min: {} cols x {} rows".format(mincols_doublespr, 12),
+                                (Key("D"),),
+                            )
+                        self.spread = (self.spread % 2) + 1
+                        return ReadingState(
+                            content_index=reading_state.content_index,
+                            textwidth=reading_state.textwidth,
+                            row=reading_state.row,
+                            rel_pctg=reading_state.row / totlines,
+                        )
+
+                    elif k in self.keymap.ScrollUp:
+                        if self.spread == 2:
+                            k = self.keymap.PageUp[0]
+                            continue
+                        if count > 1:
+                            checkpoint_row = reading_state.row - 1
+                        if reading_state.row >= count:
+                            reading_state = dataclasses.replace(
+                                reading_state, row=reading_state.row - count
+                            )
+                        elif reading_state.row == 0 and reading_state.content_index != 0:
+                            self.page_animation = Direction.BACKWARD
+                            # return -1, width, -rows, None, ""
+                            return ReadingState(
+                                content_index=reading_state.content_index - 1,
+                                textwidth=reading_state.textwidth,
+                                row=-rows,
+                            )
+                        else:
+                            reading_state = dataclasses.replace(reading_state, row=0)
+
+                    elif k in self.keymap.PageUp:
+                        if reading_state.row == 0 and reading_state.content_index != 0:
+                            self.page_animation = Direction.BACKWARD
+                            text_structure_content_before = parse_html(
+                                self.ebook.get_raw_text(contents[reading_state.content_index - 1]),
+                                textwidth=reading_state.textwidth,
+                            )
+                            assert isinstance(text_structure_content_before, TextStructure)
+                            return ReadingState(
+                                content_index=reading_state.content_index - 1,
+                                textwidth=reading_state.textwidth,
+                                row=rows
+                                * self.spread
+                                * (
+                                    len(text_structure_content_before.text_lines)
+                                    // (rows * self.spread)
+                                ),
+                            )
+                        else:
+                            if reading_state.row >= rows * self.spread * count:
+                                self.page_animation = Direction.BACKWARD
+                                reading_state = dataclasses.replace(
+                                    reading_state,
+                                    row=reading_state.row - (rows * self.spread * count),
+                                )
+                            else:
+                                reading_state = dataclasses.replace(reading_state, row=0)
+
+                    elif k in self.keymap.ScrollDown:
+                        if self.spread == 2:
+                            k = self.keymap.PageDown[0]
+                            continue
+                        if count > 1:
+                            checkpoint_row = reading_state.row + rows - 1
+                        if reading_state.row + count <= totlines - rows:
+                            reading_state = dataclasses.replace(
+                                reading_state, row=reading_state.row + count
+                            )
+                        elif (
+                            reading_state.row >= totlines - rows
+                            and reading_state.content_index != len(contents) - 1
+                        ):
+                            self.page_animation = Direction.FORWARD
+                            return ReadingState(
+                                content_index=reading_state.content_index + 1,
+                                textwidth=reading_state.textwidth,
+                                row=0,
+                            )
+
+                    elif k in self.keymap.PageDown:
+                        if totlines - reading_state.row > rows * self.spread:
+                            self.page_animation = Direction.FORWARD
+                            reading_state = dataclasses.replace(
+                                reading_state, row=reading_state.row + (rows * self.spread)
+                            )
+                        elif reading_state.content_index != len(contents) - 1:
+                            self.page_animation = Direction.FORWARD
+                            return ReadingState(
+                                content_index=reading_state.content_index + 1,
+                                textwidth=reading_state.textwidth,
+                                row=0,
+                            )
+
+                    # elif k in K["HalfScreenUp"] | K["HalfScreenDown"]:
+                    #     countstring = str(rows // 2)
+                    #     k = list(K["ScrollUp" if k in K["HalfScreenUp"] else "ScrollDown"])[0]
+                    #     continue
+
+                    elif k in self.keymap.NextChapter:
+                        ntoc = find_current_content_index(
+                            toc_entries,
+                            text_structure.section_rows,
+                            reading_state.content_index,
+                            reading_state.row,
+                        )
+                        if ntoc < len(toc_entries) - 1:
+                            if reading_state.content_index == toc_entries[ntoc + 1].content_index:
+                                try:
+                                    reading_state = dataclasses.replace(
+                                        reading_state,
+                                        row=text_structure.section_rows[
+                                            toc_entries[ntoc + 1].section  # type: ignore
+                                        ],
+                                    )
+                                except KeyError:
+                                    pass
+                            else:
+                                return ReadingState(
+                                    content_index=toc_entries[ntoc + 1].content_index,
+                                    textwidth=reading_state.textwidth,
+                                    row=0,
+                                    section=toc_entries[ntoc + 1].section,
+                                )
+
+                    elif k in self.keymap.PrevChapter:
+                        ntoc = find_current_content_index(
+                            toc_entries,
+                            text_structure.section_rows,
+                            reading_state.content_index,
+                            reading_state.row,
+                        )
+                        if ntoc > 0:
+                            if reading_state.content_index == toc_entries[ntoc - 1].content_index:
+                                reading_state = dataclasses.replace(
+                                    reading_state,
+                                    row=text_structure.section_rows.get(
+                                        toc_entries[ntoc - 1].section, 0  # type: ignore
+                                    ),
+                                )
+                            else:
+                                return ReadingState(
+                                    content_index=toc_entries[ntoc - 1].content_index,
+                                    textwidth=reading_state.textwidth,
+                                    row=0,
+                                    section=toc_entries[ntoc - 1].section,
+                                )
+
+                    elif k in self.keymap.BeginningOfCh:
+                        ntoc = find_current_content_index(
+                            toc_entries,
+                            text_structure.section_rows,
+                            reading_state.content_index,
+                            reading_state.row,
+                        )
+                        try:
+                            reading_state = dataclasses.replace(
+                                reading_state,
+                                row=text_structure.section_rows[toc_entries[ntoc].section],  # type: ignore
+                            )
+                        except (KeyError, IndexError):
+                            reading_state = dataclasses.replace(reading_state, row=0)
+
+                    elif k in self.keymap.EndOfCh:
+                        ntoc = find_current_content_index(
+                            toc_entries,
+                            text_structure.section_rows,
+                            reading_state.content_index,
+                            reading_state.row,
+                        )
+                        try:
+                            if (
+                                text_structure.section_rows[toc_entries[ntoc + 1].section] - rows  # type: ignore
+                                >= 0
+                            ):
+                                reading_state = dataclasses.replace(
+                                    reading_state,
+                                    row=text_structure.section_rows[toc_entries[ntoc + 1].section]  # type: ignore
+                                    - rows,
+                                )
+                            else:
+                                reading_state = dataclasses.replace(
+                                    reading_state,
+                                    row=text_structure.section_rows[toc_entries[ntoc].section],  # type: ignore
+                                )
+                        except (KeyError, IndexError):
+                            reading_state = dataclasses.replace(
+                                reading_state, row=pgend(totlines, rows)
+                            )
+
+                    elif k in self.keymap.TableOfContents:
+                        if not toc_entries:
+                            k = self.show_win_error(
+                                "Table of Contents",
+                                "N/A: TableOfContents is unavailable for this book.",
+                                self.keymap.TableOfContents,
+                            )
+                            continue
+                        ntoc = find_current_content_index(
+                            toc_entries,
+                            text_structure.section_rows,
+                            reading_state.content_index,
+                            reading_state.row,
+                        )
+                        rettock, fllwd, _ = self.toc(toc_entries, ntoc)
+                        if rettock is not None:  # and rettock in WINKEYS:
+                            k = rettock
+                            continue
+                        elif fllwd is not None:
+                            if reading_state.content_index == toc_entries[fllwd].content_index:
+                                try:
+                                    reading_state = dataclasses.replace(
+                                        reading_state,
+                                        row=text_structure.section_rows[toc_entries[fllwd].section],
+                                    )
+                                except KeyError:
+                                    reading_state = dataclasses.replace(reading_state, row=0)
+                            else:
+                                return ReadingState(
+                                    content_index=toc_entries[fllwd].content_index,
+                                    textwidth=reading_state.textwidth,
+                                    row=0,
+                                    section=toc_entries[fllwd].section,
+                                )
+
+                    elif k in self.keymap.Metadata:
+                        k = self.show_win_metadata()
+                        if k in self._win_keys:
+                            continue
+
+                    elif k in self.keymap.Help:
+                        k = self.show_win_help()
+                        if k in self._win_keys:
+                            continue
+
+                    elif (
+                        k in self.keymap.Enlarge
+                        and (reading_state.textwidth + count) < cols - 4
+                        and self.spread == 1
+                    ):
+                        return dataclasses.replace(
+                            reading_state,
+                            textwidth=reading_state.textwidth + count,
+                            rel_pctg=reading_state.row / totlines,
+                        )
+
+                    elif (
+                        k in self.keymap.Shrink
+                        and reading_state.textwidth >= 22
+                        and self.spread == 1
+                    ):
+                        return dataclasses.replace(
+                            reading_state,
+                            textwidth=reading_state.textwidth - count,
+                            rel_pctg=reading_state.row / totlines,
+                        )
+
+                    elif k in self.keymap.SetWidth and self.spread == 1:
+                        if countstring == "":
+                            # if called without a count, toggle between 80 cols and full width
+                            if reading_state.textwidth != 80 and cols - 4 >= 80:
+                                return ReadingState(
+                                    content_index=reading_state.content_index,
+                                    textwidth=80,
+                                    row=reading_state.row,
+                                    rel_pctg=reading_state.row / totlines,
+                                )
+                            else:
+                                return ReadingState(
+                                    content_index=reading_state.content_index,
+                                    textwidth=cols - 4,
+                                    row=reading_state.row,
+                                    rel_pctg=reading_state.row / totlines,
+                                )
+                        else:
+                            reading_state = dataclasses.replace(reading_state, textwidth=count)
+                        if reading_state.textwidth < 20:
+                            reading_state = dataclasses.replace(reading_state, textwidth=20)
+                        elif reading_state.textwidth >= cols - 4:
+                            reading_state = dataclasses.replace(reading_state, textwidth=cols - 4)
+
+                        return ReadingState(
+                            content_index=reading_state.content_index,
+                            textwidth=reading_state.textwidth,
+                            row=reading_state.row,
+                            rel_pctg=reading_state.row / totlines,
+                        )
+
+                    elif k in self.keymap.RegexSearch:
+                        ret_object = self.searching(
+                            board,
+                            text_structure.text_lines,
+                            reading_state,
+                            len(contents),
+                        )
+                        if isinstance(ret_object, Key) or isinstance(ret_object, NoUpdate):
+                            k = ret_object
+                            # k = ret_object.value
+                            continue
+                        elif isinstance(ret_object, ReadingState) and self.search_data:
+                            return ret_object
+                        # else:
+                        elif isinstance(ret_object, ReadingState):
+                            # y = ret_object
+                            reading_state = ret_object
+
+                    elif k in self.keymap.OpenImage and self.image_viewer:
+                        imgs_in_screen = list(
+                            set(
+                                range(reading_state.row, reading_state.row + rows * self.spread + 1)
+                            )
+                            & set(text_structure.image_maps.keys())
+                        )
+                        if not imgs_in_screen:
+                            k = NoUpdate()
+                            continue
+
+                        imgs_in_screen.sort()
+                        image_path: Optional[str] = None
+                        if len(imgs_in_screen) == 1:
+                            image_path = text_structure.image_maps[imgs_in_screen[0]]
+                        elif len(imgs_in_screen) > 1:
+                            imgs_rel_to_row = [i - reading_state.row for i in imgs_in_screen]
+                            p: Union[NoUpdate, Key] = NoUpdate()
+                            i = 0
+                            while p not in self.keymap.Quit and p not in self.keymap.Follow:
+                                self.screen.move(
+                                    imgs_rel_to_row[i] % rows,
+                                    (
+                                        x
+                                        if imgs_rel_to_row[i] // rows == 0
+                                        else cols
+                                        - DoubleSpreadPadding.RIGHT.value
+                                        - reading_state.textwidth
+                                    )
+                                    + reading_state.textwidth // 2,
+                                )
+                                self.screen.refresh()
+                                safe_curs_set(2)
+                                p = board.getch()
+                                if p in self.keymap.ScrollDown:
+                                    i += 1
+                                elif p in self.keymap.ScrollUp:
+                                    i -= 1
+                                i = i % len(imgs_rel_to_row)
+
+                            safe_curs_set(0)
+                            if p in self.keymap.Follow:
+                                image_path = text_structure.image_maps[imgs_in_screen[i]]
+
+                        if image_path:
+                            try:
+                                # if self.ebook.__class__.__name__ in {"Epub", "Mobi", "Azw"}:
+                                if isinstance(self.ebook, (Epub, Mobi, Azw)):
+                                    # self.seamless adjustment
+                                    if self.seamless:
+                                        current_content_index = (
+                                            self.convert_absolute_reading_state_to_relative(
+                                                reading_state
+                                            ).content_index
+                                        )
+                                    else:
+                                        current_content_index = reading_state.content_index
+                                        # for n, content in enumerate(self.ebook.contents):
+                                        #     content_path = content
+                                        #     if reading_state.row < sum(totlines_per_content[:n]):
+                                        #         break
+
+                                    content_path = self.ebook.contents[current_content_index]
+                                    assert isinstance(content_path, str)
+                                    image_path = resolve_path(content_path, image_path)
+                                imgnm, imgbstr = self.ebook.get_img_bytestr(image_path)
+                                k = self.open_image(board, imgnm, imgbstr)
+                                continue
+                            except Exception as e:
+                                self.show_win_error("Error Opening Image", str(e), tuple())
+                                if DEBUG:
+                                    raise e
+
+                    elif (
+                        k in self.keymap.SwitchColor
+                        and self.is_color_supported
+                        and countstring in {"", "0", "1", "2"}
+                    ):
+                        if countstring == "":
+                            count_color = curses.pair_number(self.screen.getbkgd())
+                            if count_color not in {2, 3}:
+                                count_color = 1
+                            count_color = count_color % 3
+                        else:
+                            count_color = count
+                        self.screen.bkgd(curses.color_pair(count_color + 1))
+                        # pad.format()
+                        return ReadingState(
+                            content_index=reading_state.content_index,
+                            textwidth=reading_state.textwidth,
+                            row=reading_state.row,
+                        )
+
+                    elif k in self.keymap.AddBookmark:
+                        bmname = self.input_prompt(" Add bookmark:")
+                        if isinstance(bmname, str) and bmname:
+                            try:
+                                self.state.insert_bookmark(
+                                    self.ebook,
+                                    bmname,
+                                    dataclasses.replace(
+                                        reading_state, rel_pctg=reading_state.row / totlines
+                                    ),
+                                )
+                            except sqlite3.IntegrityError:
+                                k = self.show_win_error(
+                                    "Error: Add Bookmarks",
+                                    f"Bookmark with name '{bmname}' already exists.",
+                                    (Key("B"),),
+                                )
+                                continue
+                        else:
+                            k = bmname
+                            continue
+
+                    elif k in self.keymap.ShowBookmarks:
+                        bookmarks = self.state.get_bookmarks(self.ebook)
+                        if not bookmarks:
+                            k = self.show_win_error(
+                                "Bookmarks",
+                                "N/A: Bookmarks are not found in this book.",
+                                self.keymap.ShowBookmarks,
+                            )
+                            continue
+                        else:
+                            retk, idxchoice = self.show_win_choices_bookmarks()
+                            if retk is not None:
+                                k = retk
+                                continue
+                            elif idxchoice is not None:
+                                bookmark_to_jump = self.state.get_bookmarks(self.ebook)[idxchoice][
+                                    1
+                                ]
+                                if (
+                                    bookmark_to_jump.content_index == reading_state.content_index
+                                    and bookmark_to_jump.textwidth == reading_state.textwidth
+                                ):
+                                    reading_state = bookmark_to_jump
+                                else:
+                                    return ReadingState(
+                                        content_index=bookmark_to_jump.content_index,
+                                        textwidth=reading_state.textwidth,
+                                        row=bookmark_to_jump.row,
+                                        rel_pctg=bookmark_to_jump.rel_pctg,
+                                    )
+
+                    elif k in self.keymap.DefineWord and self.ext_dict_app:
+                        word = self.input_prompt(" Define:")
+                        if isinstance(word, str) and word:
+                            defin = self.define_word(word)
+                            if defin in self._win_keys:
+                                k = defin
+                                continue
+                        else:
+                            k = word
+                            continue
+
+                    elif k in self.keymap.MarkPosition:
+                        jumnum = board.getch()
+                        if isinstance(jumnum, Key) and jumnum in tuple(
+                            Key(i) for i in range(48, 58)
+                        ):
+                            self.jump_list[jumnum.char] = reading_state
+                        else:
+                            k = NoUpdate()
+                            continue
+
+                    elif k in self.keymap.JumpToPosition:
+                        jumnum = board.getch()
+                        if (
+                            isinstance(jumnum, Key)
+                            and jumnum in tuple(Key(i) for i in range(48, 58))
+                            and jumnum.char in self.jump_list
+                        ):
+                            marked_reading_state = self.jump_list[jumnum.char]
+                            return dataclasses.replace(
+                                marked_reading_state,
+                                textwidth=reading_state.textwidth,
+                                rel_pctg=None
+                                if marked_reading_state.textwidth == reading_state.textwidth
+                                else marked_reading_state.rel_pctg,
+                                section="",
+                            )
+                        else:
+                            k = NoUpdate()
+                            continue
+
+                    elif k in self.keymap.ShowHideProgress:
+                        self.show_reading_progress = not self.show_reading_progress
+
+                    elif k in self.keymap.Library:
+                        self.try_assign_letters_count(force_wait=True)
+                        self.calculate_reading_progress(letters_per_content, reading_state)
+
+                        self.savestate(
+                            dataclasses.replace(
+                                reading_state, rel_pctg=reading_state.row / totlines
+                            )
+                        )
+                        library_items = self.state.get_from_history()
+                        if not library_items:
+                            k = self.show_win_error(
+                                "Library",
+                                "N/A: No reading history.",
+                                self.keymap.Library,
+                            )
+                            continue
+                        else:
+                            retk, choice_index = self.show_win_library()
+                            if retk is not None:
+                                k = retk
+                                continue
+                            elif choice_index is not None:
+                                return get_ebook_obj(library_items[choice_index].filepath)
+
+                    elif k == Key(curses.KEY_RESIZE):
+                        self.savestate(
+                            dataclasses.replace(
+                                reading_state, rel_pctg=reading_state.row / totlines
+                            )
+                        )
+                        # stated in pypi windows-curses page:
+                        # to call resize_term right after KEY_RESIZE
+                        if sys.platform == "win32":
+                            curses.resize_term(rows, cols)
+                            rows, cols = self.screen.getmaxyx()
+                        else:
+                            rows, cols = self.screen.getmaxyx()
+                            curses.resize_term(rows, cols)
+                        if cols < 22 or rows < 12:
+                            sys.exit("ERROR: Screen was too small (min 22cols x 12rows).")
+                        if cols <= reading_state.textwidth + 4:
+                            return ReadingState(
+                                content_index=reading_state.content_index,
+                                textwidth=cols - 4,
+                                row=reading_state.row,
+                                rel_pctg=reading_state.row / totlines,
+                            )
+                        else:
+                            return ReadingState(
+                                content_index=reading_state.content_index,
+                                textwidth=reading_state.textwidth,
+                                row=reading_state.row,
+                            )
+
+                    countstring = ""
+
+                if checkpoint_row:
+                    board.feed_temporary_style(
+                        (
+                            InlineStyle(
+                                row=checkpoint_row,
+                                col=0,
+                                n_letters=reading_state.textwidth,
+                                attr=curses.A_UNDERLINE,
+                            ),
+                        )
+                    )
+
+                try:
+                    if self.setting.PageScrollAnimation and self.page_animation:
+                        self.screen.clear()
+                        for i in range(1, reading_state.textwidth + 1):
+                            curses.napms(1)
+                            # self.screen.clear()
+                            board.write_n(reading_state.row, i, self.page_animation)
+                            self.screen.refresh()
+                        self.page_animation = None
+
+                    self.screen.clear()
+                    self.screen.addstr(0, 0, countstring)
+                    board.write(reading_state.row)
+
+                    # check if letters counting process is done
+                    self.try_assign_letters_count()
+
+                    # reading progress
+                    self.calculate_reading_progress(letters_per_content, reading_state)
+
+                    # display reading progress
+                    if (
+                        self.reading_progress
+                        and self.show_reading_progress
+                        and (cols - reading_state.textwidth - 2) // 2 > 3
+                    ):
+                        reading_progress_str = "{}%".format(int(self.reading_progress * 100))
+                        self.screen.addstr(
+                            0, cols - len(reading_progress_str), reading_progress_str
+                        )
+
+                    self.screen.refresh()
+                except curses.error:
+                    pass
+
+                if self.is_speaking:
+                    k = self.keymap.TTSToggle[0]
+                    continue
+
+                k = board.getch()
+                if k == Key(curses.KEY_MOUSE):
+                    mouse_event = curses.getmouse()
+                    if mouse_event[4] == curses.BUTTON1_CLICKED:
+                        if mouse_event[1] < cols // 2:
+                            k = self.keymap.PageUp[0]
+                        else:
+                            k = self.keymap.PageDown[0]
+                    elif mouse_event[4] == curses.BUTTON3_CLICKED:
+                        k = self.keymap.TableOfContents[0]
+                    elif mouse_event[4] == curses.BUTTON4_PRESSED:
+                        k = self.keymap.ScrollUp[0]
+                    elif mouse_event[4] == 2097152:
+                        k = self.keymap.ScrollDown[0]
+                    elif mouse_event[4] == curses.BUTTON4_PRESSED + curses.BUTTON_CTRL:
+                        k = self.keymap.Enlarge[0]
+                    elif mouse_event[4] == 2097152 + curses.BUTTON_CTRL:
+                        k = self.keymap.Shrink[0]
+                    elif mouse_event[4] == curses.BUTTON2_CLICKED:
+                        k = self.keymap.TTSToggle[0]
+
+                if checkpoint_row:
+                    board.feed_temporary_style()
+                    checkpoint_row = None
+
+        except KeyboardInterrupt:
+            self.savestate(
+                dataclasses.replace(reading_state, rel_pctg=reading_state.row / totlines)
+            )
+            sys.exit()
+
+
+def start_reading(stdscr, filepath: str):
+
+    ebook = get_ebook_obj(filepath)
+    state = State()
+    config = Config()
+
+    reader = Reader(screen=stdscr, ebook=ebook, config=config, state=state)
+
+    def handle_signal(signum, _):
+        """
+        Method to raise SystemExit based on signal received
+        to trigger `try-finally` clause
+        """
+        msg = f"[{os.getpid()}] killed"
+        if signal.Signals(signum) == signal.SIGTERM:
+            msg = f"[{os.getpid()}] terminated"
+        sys.exit(msg)
+
+    signal.signal(signal.SIGTERM, handle_signal)
+
+    try:
+        reader.run_counting_letters()
+
+        reading_state = state.get_last_reading_state(reader.ebook)
+        if reader.screen_cols <= reading_state.textwidth + 4:
+            reading_state = dataclasses.replace(reading_state, textwidth=reader.screen_cols - 4)
+        else:
+            reading_state = dataclasses.replace(reading_state, rel_pctg=None)
+
+        while True:
+            reading_state_or_ebook = reader.read(reading_state)
+
+            if isinstance(reading_state_or_ebook, Ebook):
+                return reading_state_or_ebook.path
+            else:
+                reading_state = reading_state_or_ebook
+                if reader.seamless:
+                    reading_state = reader.convert_absolute_reading_state_to_relative(reading_state)
+
+    finally:
+        reader.cleanup()
diff --git a/src/epy_reader/settings.py b/src/epy_reader/settings.py
new file mode 100644
index 0000000..f09bc98
--- /dev/null
+++ b/src/epy_reader/settings.py
@@ -0,0 +1,133 @@
+import curses
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import List, Optional, Tuple
+
+from epy_reader.models import Key
+
+
+class DoubleSpreadPadding(Enum):
+    LEFT = 10
+    MIDDLE = 7
+    RIGHT = 10
+
+
+# add image viewers here
+# sorted by most widely used
+VIEWER_PRESET_LIST = (
+    "feh",
+    "imv",
+    "gio",
+    "gnome-open",
+    "gvfs-open",
+    "xdg-open",
+    "kde-open",
+    "firefox",
+)
+
+DICT_PRESET_LIST = (
+    "wkdict",
+    "sdcv",
+    "dict",
+)
+
+
+@dataclass(frozen=True)
+class Settings:
+    DefaultViewer: str = "auto"
+    DictionaryClient: str = "auto"
+    ShowProgressIndicator: bool = True
+    PageScrollAnimation: bool = True
+    MouseSupport: bool = False
+    StartWithDoubleSpread: bool = False
+    # -1 is default terminal fg/bg colors
+    DefaultColorFG: int = -1
+    DefaultColorBG: int = -1
+    DarkColorFG: int = 252
+    DarkColorBG: int = 235
+    LightColorFG: int = 238
+    LightColorBG: int = 253
+    SeamlessBetweenChapters: bool = False
+    PreferredTTSEngine: Optional[str] = None
+    TTSEngineArgs: List[str] = field(default_factory=list)
+
+
+@dataclass(frozen=True)
+class CfgDefaultKeymaps:
+    ScrollUp: str = "k"
+    ScrollDown: str = "j"
+    PageUp: str = "h"
+    PageDown: str = "l"
+    # HalfScreenUp: str = "h"
+    # HalfScreenDown: str
+    NextChapter: str = "L"
+    PrevChapter: str = "H"
+    BeginningOfCh: str = "g"
+    EndOfCh: str = "G"
+    Shrink: str = "-"
+    Enlarge: str = "+"
+    SetWidth: str = "="
+    Metadata: str = "M"
+    DefineWord: str = "d"
+    TableOfContents: str = "t"
+    Follow: str = "f"
+    OpenImage: str = "o"
+    RegexSearch: str = "/"
+    ShowHideProgress: str = "s"
+    MarkPosition: str = "m"
+    JumpToPosition: str = "`"
+    AddBookmark: str = "b"
+    ShowBookmarks: str = "B"
+    Quit: str = "q"
+    Help: str = "?"
+    SwitchColor: str = "c"
+    TTSToggle: str = "!"
+    DoubleSpreadToggle: str = "D"
+    Library: str = "R"
+
+
+@dataclass(frozen=True)
+class CfgBuiltinKeymaps:
+    ScrollUp: Tuple[int, ...] = (curses.KEY_UP,)
+    ScrollDown: Tuple[int, ...] = (curses.KEY_DOWN,)
+    PageUp: Tuple[int, ...] = (curses.KEY_PPAGE, curses.KEY_LEFT)
+    PageDown: Tuple[int, ...] = (curses.KEY_NPAGE, ord(" "), curses.KEY_RIGHT)
+    BeginningOfCh: Tuple[int, ...] = (curses.KEY_HOME,)
+    EndOfCh: Tuple[int, ...] = (curses.KEY_END,)
+    TableOfContents: Tuple[int, ...] = (9, ord("\t"))
+    Follow: Tuple[int, ...] = (10,)
+    Quit: Tuple[int, ...] = (3, 27, 304)
+
+
+@dataclass(frozen=True)
+class Keymap:
+    # HalfScreenDown: Tuple[Key, ...]
+    # HalfScreenUp: Tuple[Key, ...]
+    AddBookmark: Tuple[Key, ...]
+    BeginningOfCh: Tuple[Key, ...]
+    DefineWord: Tuple[Key, ...]
+    DoubleSpreadToggle: Tuple[Key, ...]
+    EndOfCh: Tuple[Key, ...]
+    Enlarge: Tuple[Key, ...]
+    Follow: Tuple[Key, ...]
+    Help: Tuple[Key, ...]
+    JumpToPosition: Tuple[Key, ...]
+    Library: Tuple[Key, ...]
+    MarkPosition: Tuple[Key, ...]
+    Metadata: Tuple[Key, ...]
+    NextChapter: Tuple[Key, ...]
+    OpenImage: Tuple[Key, ...]
+    PageDown: Tuple[Key, ...]
+    PageUp: Tuple[Key, ...]
+    PrevChapter: Tuple[Key, ...]
+    Quit: Tuple[Key, ...]
+    RegexSearch: Tuple[Key, ...]
+    ScrollDown: Tuple[Key, ...]
+    ScrollUp: Tuple[Key, ...]
+    SetWidth: Tuple[Key, ...]
+    ShowBookmarks: Tuple[Key, ...]
+    ShowHideProgress: Tuple[Key, ...]
+    Shrink: Tuple[Key, ...]
+    SwitchColor: Tuple[Key, ...]
+    TTSToggle: Tuple[Key, ...]
+    TableOfContents: Tuple[Key, ...]
diff --git a/src/epy_reader/speakers/__init__.py b/src/epy_reader/speakers/__init__.py
new file mode 100644
index 0000000..078be31
--- /dev/null
+++ b/src/epy_reader/speakers/__init__.py
@@ -0,0 +1,9 @@
+__all__ = [
+    "SpeakerBaseModel",
+    "SpeakerMimic",
+    "SpeakerPico",
+]
+
+from epy_reader.speakers.base import SpeakerBaseModel
+from epy_reader.speakers.mimic import SpeakerMimic
+from epy_reader.speakers.pico import SpeakerPico
diff --git a/src/epy_reader/speakers/base.py b/src/epy_reader/speakers/base.py
new file mode 100644
index 0000000..7c1a8d5
--- /dev/null
+++ b/src/epy_reader/speakers/base.py
@@ -0,0 +1,21 @@
+from typing import List
+
+
+class SpeakerBaseModel:
+    cmd: str = "tts_engine_binary"
+    available: bool = False
+
+    def __init__(self, args: List[str] = []):
+        self.args = args
+
+    def speak(self, text: str) -> None:
+        raise NotImplementedError("Speaker.speak() not implemented")
+
+    def is_done(self) -> bool:
+        raise NotImplementedError("Speaker.is_done() not implemented")
+
+    def stop(self) -> None:
+        raise NotImplementedError("Speaker.stop() not implemented")
+
+    def cleanup(self) -> None:
+        raise NotImplementedError("Speaker.cleanup() not implemented")
diff --git a/src/epy_reader/speakers/mimic.py b/src/epy_reader/speakers/mimic.py
new file mode 100644
index 0000000..0db4ed8
--- /dev/null
+++ b/src/epy_reader/speakers/mimic.py
@@ -0,0 +1,31 @@
+import shutil
+import subprocess
+
+from epy_reader.speakers.base import SpeakerBaseModel
+
+
+class SpeakerMimic(SpeakerBaseModel):
+    cmd = "mimic"
+    available = bool(shutil.which("mimic"))
+
+    def speak(self, text: str) -> None:
+        self.process = subprocess.Popen(
+            [self.cmd, *self.args],
+            text=True,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.STDOUT,
+        )
+        assert self.process.stdin
+        self.process.stdin.write(text)
+        self.process.stdin.close()
+
+    def is_done(self) -> bool:
+        return self.process.poll() is not None
+
+    def stop(self) -> None:
+        self.process.terminate()
+        # self.process.kill()
+
+    def cleanup(self) -> None:
+        pass
diff --git a/src/epy_reader/speakers/pico.py b/src/epy_reader/speakers/pico.py
new file mode 100644
index 0000000..95065f1
--- /dev/null
+++ b/src/epy_reader/speakers/pico.py
@@ -0,0 +1,43 @@
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+
+from epy_reader.speakers.base import SpeakerBaseModel
+
+
+class SpeakerPico(SpeakerBaseModel):
+    cmd = "pico2wave"
+    available = all([shutil.which(dep) for dep in ["pico2wave", "play"]])
+
+    def speak(self, text: str) -> None:
+        _, self.tmp_path = tempfile.mkstemp(suffix=".wav")
+
+        try:
+            subprocess.run(
+                [self.cmd, *self.args, "-w", self.tmp_path, text],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                check=True,
+            )
+        except subprocess.CalledProcessError as e:
+            if "invalid pointer" not in e.output:
+                sys.exit(e.output)
+
+        self.process = subprocess.Popen(
+            ["play", self.tmp_path],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
+
+    def is_done(self) -> bool:
+        return self.process.poll() is not None
+
+    def stop(self) -> None:
+        self.process.terminate()
+        # self.process.kill()
+
+    def cleanup(self) -> None:
+        os.remove(self.tmp_path)
diff --git a/src/epy_reader/state.py b/src/epy_reader/state.py
new file mode 100644
index 0000000..5129394
--- /dev/null
+++ b/src/epy_reader/state.py
@@ -0,0 +1,195 @@
+import dataclasses
+import hashlib
+import os
+import sqlite3
+from datetime import datetime
+from typing import List, Tuple
+
+from epy_reader.ebooks import Ebook
+from epy_reader.models import AppData, LibraryItem, Optional, ReadingState
+
+
+class State(AppData):
+    """
+    Use sqlite3 instead of JSON (in older version)
+    to shift the weight from memory to process
+    """
+
+    def __init__(self):
+        if not os.path.isfile(self.filepath):
+            self.init_db()
+
+    @property
+    def filepath(self) -> str:
+        return os.path.join(self.prefix, "states.db") if self.prefix else os.devnull
+
+    def get_from_history(self) -> List[LibraryItem]:
+        try:
+            conn = sqlite3.connect(self.filepath)
+            cur = conn.cursor()
+            cur.execute(
+                """
+                SELECT last_read, filepath, title, author, reading_progress
+                FROM library ORDER BY last_read DESC
+                """
+            )
+            results = cur.fetchall()
+            library_items: List[LibraryItem] = []
+            for result in results:
+                library_items.append(
+                    LibraryItem(
+                        last_read=datetime.fromisoformat(result[0]),
+                        filepath=result[1],
+                        title=result[2],
+                        author=result[3],
+                        reading_progress=result[4],
+                    )
+                )
+            return library_items
+        finally:
+            conn.close()
+
+    def delete_from_library(self, filepath: str) -> None:
+        try:
+            conn = sqlite3.connect(self.filepath)
+            conn.execute("PRAGMA foreign_keys = ON")
+            conn.execute("DELETE FROM reading_states WHERE filepath=?", (filepath,))
+            conn.commit()
+        finally:
+            conn.close()
+
+    def get_last_read(self) -> Optional[str]:
+        library = self.get_from_history()
+        return library[0].filepath if library else None
+
+    def update_library(self, ebook: Ebook, reading_progress: Optional[float]) -> None:
+        try:
+            metadata = ebook.get_meta()
+            conn = sqlite3.connect(self.filepath)
+            conn.execute(
+                """
+                INSERT OR REPLACE INTO library (filepath, title, author, reading_progress)
+                VALUES (?, ?, ?, ?)
+                """,
+                (ebook.path, metadata.title, metadata.creator, reading_progress),
+            )
+            conn.commit()
+        finally:
+            conn.close()
+
+    def get_last_reading_state(self, ebook: Ebook) -> ReadingState:
+        try:
+            conn = sqlite3.connect(self.filepath)
+            conn.row_factory = sqlite3.Row
+            cur = conn.cursor()
+            cur.execute("SELECT * FROM reading_states WHERE filepath=?", (ebook.path,))
+            result = cur.fetchone()
+            if result:
+                result = dict(result)
+                del result["filepath"]
+                return ReadingState(**result, section=None)
+            return ReadingState(content_index=0, textwidth=80, row=0, rel_pctg=None, section=None)
+        finally:
+            conn.close()
+
+    def set_last_reading_state(self, ebook: Ebook, reading_state: ReadingState) -> None:
+        try:
+            conn = sqlite3.connect(self.filepath)
+            conn.execute(
+                """
+                INSERT OR REPLACE INTO reading_states
+                VALUES (:filepath, :content_index, :textwidth, :row, :rel_pctg)
+                """,
+                {"filepath": ebook.path, **dataclasses.asdict(reading_state)},
+            )
+            conn.commit()
+        finally:
+            conn.close()
+
+    def insert_bookmark(self, ebook: Ebook, name: str, reading_state: ReadingState) -> None:
+        try:
+            conn = sqlite3.connect(self.filepath)
+            conn.execute(
+                """
+                INSERT INTO bookmarks
+                VALUES (:id, :filepath, :name, :content_index, :textwidth, :row, :rel_pctg)
+                """,
+                {
+                    "id": hashlib.sha1(f"{ebook.path}{name}".encode()).hexdigest()[:10],
+                    "filepath": ebook.path,
+                    "name": name,
+                    **dataclasses.asdict(reading_state),
+                },
+            )
+            conn.commit()
+        finally:
+            conn.close()
+
+    def delete_bookmark(self, ebook: Ebook, name: str) -> None:
+        try:
+            conn = sqlite3.connect(self.filepath)
+            conn.execute("DELETE FROM bookmarks WHERE filepath=? AND name=?", (ebook.path, name))
+            conn.commit()
+        finally:
+            conn.close()
+
+    def get_bookmarks(self, ebook: Ebook) -> List[Tuple[str, ReadingState]]:
+        try:
+            conn = sqlite3.connect(self.filepath)
+            conn.row_factory = sqlite3.Row
+            cur = conn.cursor()
+            cur.execute("SELECT * FROM bookmarks WHERE filepath=?", (ebook.path,))
+            results = cur.fetchall()
+            bookmarks: List[Tuple[str, ReadingState]] = []
+            for result in results:
+                tmp_dict = dict(result)
+                name = tmp_dict["name"]
+                tmp_dict = {
+                    k: v
+                    for k, v in tmp_dict.items()
+                    if k in ("content_index", "textwidth", "row", "rel_pctg")
+                }
+                bookmarks.append((name, ReadingState(**tmp_dict)))
+            return bookmarks
+        finally:
+            conn.close()
+
+    def init_db(self) -> None:
+        try:
+            conn = sqlite3.connect(self.filepath)
+            conn.executescript(
+                """
+                CREATE TABLE reading_states (
+                    filepath TEXT PRIMARY KEY,
+                    content_index INTEGER,
+                    textwidth INTEGER,
+                    row INTEGER,
+                    rel_pctg REAL
+                );
+
+                CREATE TABLE library (
+                    last_read DATETIME DEFAULT (datetime('now','localtime')),
+                    filepath TEXT PRIMARY KEY,
+                    title TEXT,
+                    author TEXT,
+                    reading_progress REAL,
+                    FOREIGN KEY (filepath) REFERENCES reading_states(filepath)
+                    ON DELETE CASCADE
+                );
+
+                CREATE TABLE bookmarks (
+                    id TEXT PRIMARY KEY,
+                    filepath TEXT,
+                    name TEXT,
+                    content_index INTEGER,
+                    textwidth INTEGER,
+                    row INTEGER,
+                    rel_pctg REAL,
+                    FOREIGN KEY (filepath) REFERENCES reading_states(filepath)
+                    ON DELETE CASCADE
+                );
+                """
+            )
+            conn.commit()
+        finally:
+            conn.close()
diff --git a/src/epy_reader/tools/KindleUnpack/__init__.py b/src/epy_reader/tools/KindleUnpack/__init__.py
new file mode 100644
index 0000000..0077258
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/__init__.py
@@ -0,0 +1,2 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
diff --git a/src/epy_reader/tools/KindleUnpack/compatibility_utils.py b/src/epy_reader/tools/KindleUnpack/compatibility_utils.py
new file mode 100755
index 0000000..c46c0bb
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/compatibility_utils.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this list of
+# conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice, this list
+# of conditions and the following disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import sys
+import codecs
+
+PY2 = sys.version_info[0] == 2
+PY3 = sys.version_info[0] == 3
+
+iswindows = sys.platform.startswith('win')
+
+try:
+    from urllib.parse import unquote
+except ImportError:
+    from urllib import unquote
+
+if PY2:
+    from HTMLParser import HTMLParser
+    _h = HTMLParser()
+elif sys.version_info[1] < 4:
+    import html.parser
+    _h = html.parser.HTMLParser()
+else:
+    import html as _h
+
+if PY3:
+    text_type = str
+    binary_type = bytes
+    # if will be printing arbitraty binary data to stdout on python 3
+    # sys.stdin = sys.stdin.detach()
+    # sys.stdout = sys.stdout.detach()
+    # sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
+else:
+    range = xrange
+    text_type = unicode
+    binary_type = str
+    # if will be printing unicode under python 2 need to protect
+    # against sys.stdout.encoding being None stupidly forcing forcing ascii encoding of unicode
+    # sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
+    # alternatively set environment variable as follows **before** launching python:  export PYTHONIOENCODING=UTF-8
+
+# NOTE: Python 3 is completely broken when accessing single bytes in bytes strings
+# (and they amazingly claim by design and no bug!)
+
+# To illustrate: this works for unicode in Python 3 and for all Python 2.X for both bytestrings and unicode
+# >>> o = '123456789'
+# >>> o[-3]
+# '7'
+# >>> type(o[-3])
+# <class 'str'>
+# >>> type(o)
+# <class 'str'>
+
+# Unfortunately, this is what Python 3 does for no sane reason and only for bytestrings
+# >>> o = b'123456789'
+# >>> o[-3]
+# 55
+# >>> type(o[-3])
+# <class 'int'>
+# >>> type(o)
+# <class 'bytes'>
+
+# This mind boggling  behaviour also happens when indexing a bytestring and/or
+# iteratoring over a bytestring.  In other words it will return an int but not
+# the byte itself!!!!!!!
+
+# The only way to access a single byte as a byte in bytestring and get the byte in both
+# Python 2 and Python 3 is to use a slice
+
+# This problem is so common there are horrible hacks floating around the net to **try**
+# to work around it, so that code that works on both Python 2 and Python 3 is possible.
+
+# So in order to write code that works on both Python 2 and Python 3
+# if you index or access a single byte and want its ord() then use the bord() function.
+# If instead you want it as a single character byte use the bchar() function
+# both of which are defined below.
+
+if PY3:
+    # Also Note: if decode a bytestring using 'latin-1' (or any other full range 0-255 encoding)
+    # in place of ascii you will get a byte value to half-word or integer value
+    # one-to-one mapping (in the 0 - 255 range)
+
+    def bchr(s):
+        return bytes([s])
+
+    def bstr(s):
+        if isinstance(s, str):
+            return bytes(s, 'latin-1')
+        else:
+            return bytes(s)
+
+    def bord(s):
+        return s
+
+    def bchar(s):
+        return bytes([s])
+
+else:
+    def bchr(s):
+        return chr(s)
+
+    def bstr(s):
+        return str(s)
+
+    def bord(s):
+        return ord(s)
+
+    def bchar(s):
+        return s
+
+if PY3:
+    # list-producing versions of the major Python iterating functions
+    def lrange(*args, **kwargs):
+        return list(range(*args, **kwargs))
+
+    def lzip(*args, **kwargs):
+        return list(zip(*args, **kwargs))
+
+    def lmap(*args, **kwargs):
+        return list(map(*args, **kwargs))
+
+    def lfilter(*args, **kwargs):
+        return list(filter(*args, **kwargs))
+else:
+    import __builtin__
+    # Python 2-builtin ranges produce lists
+    lrange = __builtin__.range
+    lzip = __builtin__.zip
+    lmap = __builtin__.map
+    lfilter = __builtin__.filter
+
+# In Python 3 you can no longer use .encode('hex') on a bytestring
+# instead use the following on both platforms
+import binascii
+def hexlify(bdata):
+    return (binascii.hexlify(bdata)).decode('ascii')
+
+# If you: import struct
+# Note:  struct pack, unpack, unpack_from all *require* bytestring format
+# data all the way up to at least Python 2.7.5, Python 3 is okay with either
+
+# If you: import re
+# note: Python 3 "re" requires the pattern to be the exact same type as the data to be
+# searched ... but u"" is not allowed for the pattern itself only b""
+# Python 2.X allows the pattern to be any type and converts it to match the data
+# and returns the same type as the data
+
+# convert string to be utf-8 encoded
+def utf8_str(p, enc='utf-8'):
+    if p is None:
+        return None
+    if isinstance(p, text_type):
+        return p.encode('utf-8')
+    if enc != 'utf-8':
+        return p.decode(enc).encode('utf-8')
+    return p
+
+# convert string to be unicode encoded
+def unicode_str(p, enc='utf-8'):
+    if p is None:
+        return None
+    if isinstance(p, text_type):
+        return p
+    return p.decode(enc)
+
+ASCII_CHARS   = set(chr(x) for x in range(128))
+URL_SAFE      = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+                    'abcdefghijklmnopqrstuvwxyz'
+                    '0123456789' '#' '_.-/~')
+IRI_UNSAFE = ASCII_CHARS - URL_SAFE
+
+# returns a quoted IRI (not a URI)
+def quoteurl(href):
+    if isinstance(href,binary_type):
+        href = href.decode('utf-8')
+    result = []
+    for char in href:
+        if char in IRI_UNSAFE:
+            char = "%%%02x" % ord(char)
+        result.append(char)
+    return ''.join(result)
+
+# unquotes url/iri
+def unquoteurl(href):
+    if isinstance(href,binary_type):
+        href = href.decode('utf-8')
+    href = unquote(href)
+    return href
+
+# unescape html
+def unescapeit(sval):
+    return _h.unescape(sval)
+
+# Python 2.X commandline parsing under Windows has been horribly broken for years!
+# Use the following code to emulate full unicode commandline parsing on Python 2
+# ie. To get  sys.argv arguments and properly encode them as unicode
+
+def unicode_argv():
+    global iswindows
+    global PY3
+    if PY3:
+        return sys.argv
+    if iswindows:
+        # Versions 2.x of Python don't support Unicode in sys.argv on
+        # Windows, with the underlying Windows API instead replacing multi-byte
+        # characters with '?'.  So use shell32.GetCommandLineArgvW to get sys.argv
+        # as a list of Unicode strings
+        from ctypes import POINTER, byref, cdll, c_int, windll
+        from ctypes.wintypes import LPCWSTR, LPWSTR
+
+        GetCommandLineW = cdll.kernel32.GetCommandLineW
+        GetCommandLineW.argtypes = []
+        GetCommandLineW.restype = LPCWSTR
+
+        CommandLineToArgvW = windll.shell32.CommandLineToArgvW
+        CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
+        CommandLineToArgvW.restype = POINTER(LPWSTR)
+
+        cmd = GetCommandLineW()
+        argc = c_int(0)
+        argv = CommandLineToArgvW(cmd, byref(argc))
+        if argc.value > 0:
+            # Remove Python executable and commands if present
+            start = argc.value - len(sys.argv)
+            return [argv[i] for i in
+                    range(start, argc.value)]
+        # this should never happen
+        return None
+    else:
+        argv = []
+        argvencoding = sys.stdin.encoding
+        if argvencoding is None:
+            argvencoding = sys.getfilesystemencoding()
+        if argvencoding is None:
+            argvencoding = 'utf-8'
+        for arg in sys.argv:
+            if isinstance(arg, text_type):
+                argv.append(arg)
+            else:
+                argv.append(arg.decode(argvencoding))
+        return argv
+
+
+# Python 2.X is broken in that it does not recognize CP65001 as UTF-8
+def add_cp65001_codec():
+    if PY2:
+        try:
+            codecs.lookup('cp65001')
+        except LookupError:
+            codecs.register(
+                lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
+    return
diff --git a/src/epy_reader/tools/KindleUnpack/kindleunpack.py b/src/epy_reader/tools/KindleUnpack/kindleunpack.py
new file mode 100644
index 0000000..317941a
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/kindleunpack.py
@@ -0,0 +1,1029 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import os
+
+__path__ = ["lib", os.path.dirname(os.path.realpath(__file__)), "kindleunpack"]
+
+import sys
+import codecs
+import traceback
+
+from .compatibility_utils import PY2, binary_type, utf8_str, unicode_str
+from .compatibility_utils import unicode_argv, add_cp65001_codec
+from .compatibility_utils import hexlify
+
+add_cp65001_codec()
+
+from .unipath import pathof
+
+if PY2:
+    range = xrange
+    # since will be printing unicode under python 2 need to protect
+    # against sys.stdout.encoding being None stupidly forcing forcing ascii encoding
+    if sys.stdout.encoding is None:
+        sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
+    else:
+        encoding = sys.stdout.encoding
+        sys.stdout = codecs.getwriter(encoding)(sys.stdout)
+
+# Changelog
+#  0.11 - Version by adamselene
+#  0.11pd - Tweaked version by pdurrant
+#  0.12 - extracts pictures too, and all into a folder.
+#  0.13 - added back in optional output dir for those who don't want it based on infile
+#  0.14 - auto flush stdout and wrapped in main, added proper return codes
+#  0.15 - added support for metadata
+#  0.16 - metadata now starting to be output as an opf file (PD)
+#  0.17 - Also created tweaked text as source for Mobipocket Creator
+#  0.18 - removed raw mobi file completely but kept _meta.html file for ease of conversion
+#  0.19 - added in metadata for ASIN, Updated Title and Rights to the opf
+#  0.20 - remove _meta.html since no longer needed
+#  0.21 - Fixed some typos in the opf output, and also updated handling
+#         of test for trailing data/multibyte characters
+#  0.22 - Fixed problem with > 9 images
+#  0.23 - Now output Start guide item
+#  0.24 - Set firstaddl value for 'TEXtREAd'
+#  0.25 - Now added character set metadata to html file for utf-8 files.
+#  0.26 - Dictionary support added. Image handling speed improved.
+#         For huge files create temp files to speed up decoding.
+#         Language decoding fixed. Metadata is now converted to utf-8 when written to opf file.
+#  0.27 - Add idx:entry attribute "scriptable" if dictionary contains entry length tags.
+#         Don't save non-image sections as images. Extract and save source zip file
+#         included by kindlegen as kindlegensrc.zip.
+#  0.28 - Added back correct image file name extensions, created FastConcat class to simplify and clean up
+#  0.29 - Metadata handling reworked, multiple entries of the same type are now supported.
+#         Several missing types added.
+#         FastConcat class has been removed as in-memory handling with lists is faster, even for huge files.
+#  0.30 - Add support for outputting **all** metadata values - encode content with hex if of unknown type
+#  0.31 - Now supports Print Replica ebooks, outputting PDF and mysterious data sections
+#  0.32 - Now supports NCX file extraction/building.
+#                 Overhauled the structure of mobiunpack to be more class oriented.
+#  0.33 - Split Classes ito separate files and added prelim support for KF8 format eBooks
+#  0.34 - Improved KF8 support, guide support, bug fixes
+#  0.35 - Added splitting combo mobi7/mobi8 into standalone mobi7 and mobi8 files
+#         Also handle mobi8-only file properly
+#  0.36 - very minor changes to support KF8 mobis with no flow items, no ncx, etc
+#  0.37 - separate output, add command line switches to control, interface to Mobi_Unpack.pyw
+#  0.38 - improve split function by resetting flags properly, fix bug in Thumbnail Images
+#  0.39 - improve split function so that ToC info is not lost for standalone mobi8s
+#  0.40 - make mobi7 split match official versions, add support for graphic novel metadata,
+#         improve debug for KF8
+#  0.41 - fix when StartOffset set to 0xffffffff, fix to work with older mobi versions,
+#         fix other minor metadata issues
+#  0.42 - add new class interface to allow it to integrate more easily with internal calibre routines
+#  0.43 - bug fixes for new class interface
+#  0.44 - more bug fixes and fix for potnetial bug caused by not properly closing created zip archive
+#  0.45 - sync to version in the new Mobi_Unpack plugin
+#  0.46 - fixes for: obfuscated fonts, improper toc links and ncx, add support for opentype fonts
+#  0.47 - minor opf improvements
+#  0.48 - ncx link fixes
+#  0.49 - use azw3 when splitting mobis
+#  0.50 - unknown change
+#  0.51 - fix for converting filepos links to hrefs, Added GPL3 notice, made KF8 extension just '.azw3'
+#  0.52 - fix for cover metadata (no support for Mobipocket Creator)
+#  0.53 - fix for proper identification of embedded fonts, added new metadata items
+#  0.54 - Added error-handling so wonky embedded fonts don't bomb the whole unpack process,
+#         entity escape KF8 metadata to ensure valid OPF.
+#  0.55  Strip extra StartOffset EXTH from the mobi8 header when splitting, keeping only the relevant one
+#         For mobi8 files, don't generate duplicate guide entries from the metadata if we could extract one
+#         from the OTH table.
+#  0.56 - Added further entity escaping of OPF text.
+#         Allow unicode string file paths to be passed as arguments to the unpackBook method without blowing up later
+#         when the attempt to "re"-unicode a portion of that filename occurs in the process_all_mobi_headers method.
+#  0.57 - Fixed eror when splitting Preview files downloaded from KDP website
+#  0.58 - Output original kindlegen build log ('CMET' record) if included in the package.
+#  0.58 - Include and extend functionality of DumpMobiHeader, replacing DEBUG with DUMP
+#  0.59 - Much added DUMP functionality, including full dumping and descriptions of sections
+#  0.60 - Bug fixes in opf, div tables, bad links, page breaks, section descriptions
+#       - plus a number of other bug fixed that were found by Sergey Dubinets
+#       - fixs for file/paths that require full unicode to work properly
+#       - replace subprocess with multiprocessing to remove need for unbuffered stdout
+#  0.61 - renamed to be KindleUnpack and more unicode/utf-8 path bug fixes and other minor fixes
+#  0.62 - fix for multiprocessing on Windows, split fixes, opf improvements
+#  0.63 - Modified to process right to left page progression books properly.
+#       - Added some id_map_strings and RESC section processing; metadata and
+#       - spine in the RESC are integrated partly to content.opf.
+#  0.63a- Separated K8 RESC processor to an individual file. Bug fixes. Added cover page creation.
+#  0.64 - minor bug fixes to more properly handle unicode command lines, and support for more jpeg types
+#  0.64a- Modifed to handle something irregular mobi and azw3 files.
+#  0.64b- Modifed to create k8resc.spine for no RECS files.
+#  0.65 - Bug fixes to shorten title and remove epub3 "properties" to make the output epub2 compliant
+#  0.65a- Bug fixes to extract RESC section correctly, to prevent item id confliction
+#       - and to process multiline comments in RESC.
+#  0.66 - Bug fix to deal with missing first resource information sometimes generated by calibre
+#  0.66a- Fixed minor bugs, which probably do not affect the output anything
+#  0.67 - Fixed Mobi Split functionality bug with azw3 images not being properly copied
+#  0.68 - preliminary support for handling PAGE sections to create page-map.xml
+#  0.69 - preliminary support for CONT and CRES for HD Images
+#  0.70 - preliminary support for decoding apnx files when used with azw3 ebooks
+#  0.71 - extensive refactoring of kindleunpack.py to make it more manageable
+#  0.72 - many bug fixes from tkeo: fix pageProcessing, fix print replica, fix resc usage, fix font mangling, etc.
+#  0.72a- fix for still broken PrintReplica support
+#  0.72b- preview for primary epub3 support. A parameter epubver(default='2') is added to process_all_mobi_headers(), unpackBook().
+#  0.72c- preview for apnx page support
+#  0.72d- more bugs fixed in preview features, much improved GUI with ability to dynaically grow the Log Window with preference support
+#  0.72e- more bug fixes, Tk GUI adds support for epub version and HDImage use
+#  0.72f- more bug fixes, implement use hd images if present
+#  0.72g- minor bug fixes and cleanups from tkeo
+#  0.72h- updated mobi_header and mobi_k8proc to use the correct fragment and guide terms in place of div and other
+#         to better match the terms that both Calibre and Amazon use internally to their own software
+#  0.72x- very experimental conversion to use new mobi_k8resc.py and some of its associated changes
+#  0.72y- more changes to simplify and integrate in epub3 support in a simpler manner
+#  0.72z- remove redundancy in mobi_opf.py and bug fixes for mobi_k8resc.py
+#  0.73   faster mobi split, numerous bug fixes in mobi_k8proc, mobi_header, mobi_opf, mobi_k8resc, etc
+#  0.74   added refines metadata, fixed language code in ncx and title in nav, added support for opf: from refines
+#  0.75   much improved dictioanry support including support for multiple inflection sections, minor mobi_opf fixes
+#  0.76   pre-release version only fix name related issues in opf by not using original file name in mobi7
+#  0.77   bug fix for unpacking HDImages with included Fonts
+#  0.80   converted to work with both python 2.7 and Python 3.3 and later
+#  0.81   various fixes
+#  0.82   Handle calibre-generated mobis that can have skeletons with no fragments
+#  0.83   Fix header item 114 being mistakenly treated as a string instead of a value
+
+DUMP = False
+""" Set to True to dump all possible information. """
+
+WRITE_RAW_DATA = False
+""" Set to True to create additional files with raw data for debugging/reverse engineering. """
+
+SPLIT_COMBO_MOBIS = False
+""" Set to True to split combination mobis into mobi7 and mobi8 pieces. """
+
+CREATE_COVER_PAGE = True  # XXX experimental
+""" Create and insert a cover xhtml page. """
+
+EOF_RECORD = b'\xe9\x8e' + b'\r\n'
+""" The EOF record content. """
+
+TERMINATION_INDICATOR1 = b'\x00'
+TERMINATION_INDICATOR2 = b'\x00\x00'
+TERMINATION_INDICATOR3 = b'\x00\x00\x00'
+
+KINDLEGENSRC_FILENAME = "kindlegensrc.zip"
+""" The name for the kindlegen source archive. """
+
+KINDLEGENLOG_FILENAME = "kindlegenbuild.log"
+""" The name for the kindlegen build log. """
+
+K8_BOUNDARY = b'BOUNDARY'
+""" The section data that divides K8 mobi ebooks. """
+
+import os
+import struct
+import re
+import zlib
+import getopt
+
+class unpackException(Exception):
+    pass
+
+
+# import the kindleunpack support libraries
+from .unpack_structure import fileNames
+from .mobi_sectioner import Sectionizer, describe
+from .mobi_header import MobiHeader, dump_contexth
+from .mobi_utils import toBase32
+from .mobi_opf import OPFProcessor
+from .mobi_html import HTMLProcessor, XHTMLK8Processor
+from .mobi_ncx import ncxExtract
+from .mobi_k8proc import K8Processor
+from .mobi_split import mobi_split
+from .mobi_k8resc import K8RESCProcessor
+from .mobi_nav import NAVProcessor
+from .mobi_cover import CoverProcessor, get_image_type
+from .mobi_pagemap import PageMapProcessor
+from .mobi_dict import dictSupport
+
+
+def processSRCS(i, files, rscnames, sect, data):
+    # extract the source zip archive and save it.
+    print("File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME)
+    srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME)
+    with open(pathof(srcname), 'wb') as f:
+        f.write(data[16:])
+    rscnames.append(None)
+    sect.setsectiondescription(i,"Zipped Source Files")
+    return rscnames
+
+
+def processPAGE(i, files, rscnames, sect, data, mh, pagemapproc):
+    # process any page map information and create an apnx file
+    pagemapproc = PageMapProcessor(mh, data)
+    rscnames.append(None)
+    sect.setsectiondescription(i,"PageMap")
+    apnx_meta = {}
+    acr = sect.palmname.decode('latin-1').rstrip('\x00')
+    apnx_meta['acr'] = acr
+    apnx_meta['cdeType'] = mh.metadata['cdeType'][0]
+    apnx_meta['contentGuid'] = hex(int(mh.metadata['UniqueID'][0]))[2:]
+    apnx_meta['asin'] = mh.metadata['ASIN'][0]
+    apnx_meta['pageMap'] = pagemapproc.getPageMap()
+    if mh.version == 8:
+        apnx_meta['format'] = 'MOBI_8'
+    else:
+        apnx_meta['format'] = 'MOBI_7'
+    apnx_data = pagemapproc.generateAPNX(apnx_meta)
+    if mh.isK8():
+        outname = os.path.join(files.outdir, 'mobi8-'+files.getInputFileBasename() + '.apnx')
+    else:
+        outname = os.path.join(files.outdir, 'mobi7-'+files.getInputFileBasename() + '.apnx')
+    with open(pathof(outname), 'wb') as f:
+        f.write(apnx_data)
+    return rscnames, pagemapproc
+
+
+def processCMET(i, files, rscnames, sect, data):
+    # extract the build log
+    print("File contains kindlegen build log, extracting as %s" % KINDLEGENLOG_FILENAME)
+    srcname = os.path.join(files.outdir, KINDLEGENLOG_FILENAME)
+    with open(pathof(srcname), 'wb') as f:
+        f.write(data[10:])
+    rscnames.append(None)
+    sect.setsectiondescription(i,"Kindlegen log")
+    return rscnames
+
+
+# fonts only exist in KF8 ebooks
+# Format:  bytes  0 -  3:  'FONT'
+#          bytes  4 -  7:  uncompressed size
+#          bytes  8 - 11:  flags
+#              flag bit 0x0001 - zlib compression
+#              flag bit 0x0002 - obfuscated with xor string
+#          bytes 12 - 15:  offset to start of compressed font data
+#          bytes 16 - 19:  length of xor string stored before the start of the comnpress font data
+#          bytes 20 - 23:  start of xor string
+def processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr):
+    fontname = "font%05d" % i
+    ext = '.dat'
+    font_error = False
+    font_data = data
+    try:
+        usize, fflags, dstart, xor_len, xor_start = struct.unpack_from(b'>LLLLL',data,4)
+    except:
+        print("Failed to extract font: {0:s} from section {1:d}".format(fontname,i))
+        font_error = True
+        ext = '.failed'
+        pass
+    if not font_error:
+        print("Extracting font:", fontname)
+        font_data = data[dstart:]
+        extent = len(font_data)
+        extent = min(extent, 1040)
+        if fflags & 0x0002:
+            # obfuscated so need to de-obfuscate the first 1040 bytes
+            key = bytearray(data[xor_start: xor_start+ xor_len])
+            buf = bytearray(font_data)
+            for n in range(extent):
+                buf[n] ^=  key[n%xor_len]
+            font_data = bytes(buf)
+        if fflags & 0x0001:
+            # ZLIB compressed data
+            font_data = zlib.decompress(font_data)
+        hdr = font_data[0:4]
+        if hdr == b'\0\1\0\0' or hdr == b'true' or hdr == b'ttcf':
+            ext = '.ttf'
+        elif hdr == b'OTTO':
+            ext = '.otf'
+        else:
+            print("Warning: unknown font header %s" % hexlify(hdr))
+        if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002):
+            obfuscate_data.append(fontname + ext)
+        fontname += ext
+        outfnt = os.path.join(files.imgdir, fontname)
+        with open(pathof(outfnt), 'wb') as f:
+            f.write(font_data)
+        rscnames.append(fontname)
+        sect.setsectiondescription(i,"Font {0:s}".format(fontname))
+        if rsc_ptr == -1:
+            rsc_ptr = i - beg
+    return rscnames, obfuscate_data, rsc_ptr
+
+
+def processCRES(i, files, rscnames, sect, data, beg, rsc_ptr, use_hd):
+    # extract an HDImage
+    global DUMP
+    data = data[12:]
+    imgtype = get_image_type(None, data)
+
+    if imgtype is None:
+        print("Warning: CRES Section %s does not contain a recognised resource" % i)
+        rscnames.append(None)
+        sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s" % describe(data[0:4]))
+        if DUMP:
+            fname = "unknown%05d.dat" % i
+            outname= os.path.join(files.outdir, fname)
+            with open(pathof(outname), 'wb') as f:
+                f.write(data)
+            sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s extracting as %s" % (describe(data[0:4]), fname))
+        rsc_ptr += 1
+        return rscnames, rsc_ptr
+
+    if use_hd:
+        # overwrite corresponding lower res image with hd version
+        imgname = rscnames[rsc_ptr]
+        imgdest = files.imgdir
+    else:
+        imgname = "HDimage%05d.%s" % (i, imgtype)
+        imgdest = files.hdimgdir
+    print("Extracting HD image: {0:s} from section {1:d}".format(imgname,i))
+    outimg = os.path.join(imgdest, imgname)
+    with open(pathof(outimg), 'wb') as f:
+        f.write(data)
+    rscnames.append(None)
+    sect.setsectiondescription(i,"Optional HD Image {0:s}".format(imgname))
+    rsc_ptr += 1
+    return rscnames, rsc_ptr
+
+
+def processCONT(i, files, rscnames, sect, data):
+    global DUMP
+    # process a container header, most of this is unknown
+    # right now only extract its EXTH
+    dt = data[0:12]
+    if dt == b"CONTBOUNDARY":
+        rscnames.append(None)
+        sect.setsectiondescription(i,"CONTAINER BOUNDARY")
+    else:
+        sect.setsectiondescription(i,"CONT Header")
+        rscnames.append(None)
+        if DUMP:
+            cpage, = struct.unpack_from(b'>L', data, 12)
+            contexth = data[48:]
+            print("\n\nContainer EXTH Dump")
+            dump_contexth(cpage, contexth)
+            fname = "CONT_Header%05d.dat" % i
+            outname= os.path.join(files.outdir, fname)
+            with open(pathof(outname), 'wb') as f:
+                f.write(data)
+    return rscnames
+
+
+def processkind(i, files, rscnames, sect, data):
+    global DUMP
+    dt = data[0:12]
+    if dt == b"kindle:embed":
+        if DUMP:
+            print("\n\nHD Image Container Description String")
+            print(data)
+        sect.setsectiondescription(i,"HD Image Container Description String")
+        rscnames.append(None)
+    return rscnames
+
+
+# spine information from the original content.opf
+def processRESC(i, files, rscnames, sect, data, k8resc):
+    global DUMP
+    if DUMP:
+        rescname = "RESC%05d.dat" % i
+        print("Extracting Resource: ", rescname)
+        outrsc = os.path.join(files.outdir, rescname)
+        with open(pathof(outrsc), 'wb') as f:
+            f.write(data)
+    if True:  # try:
+        # parse the spine and metadata from RESC
+        k8resc = K8RESCProcessor(data[16:], DUMP)
+    else:  # except:
+        print("Warning: cannot extract information from RESC.")
+        k8resc = None
+    rscnames.append(None)
+    sect.setsectiondescription(i,"K8 RESC section")
+    return rscnames, k8resc
+
+
+def processImage(i, files, rscnames, sect, data, beg, rsc_ptr, cover_offset, thumb_offset):
+    global DUMP
+    # Extract an Image
+    imgtype = get_image_type(None, data)
+    if imgtype is None:
+        print("Warning: Section %s does not contain a recognised resource" % i)
+        rscnames.append(None)
+        sect.setsectiondescription(i,"Mysterious Section, first four bytes %s" % describe(data[0:4]))
+        if DUMP:
+            fname = "unknown%05d.dat" % i
+            outname= os.path.join(files.outdir, fname)
+            with open(pathof(outname), 'wb') as f:
+                f.write(data)
+            sect.setsectiondescription(i,"Mysterious Section, first four bytes %s extracting as %s" % (describe(data[0:4]), fname))
+        return rscnames, rsc_ptr
+
+    imgname = "image%05d.%s" % (i, imgtype)
+    if cover_offset is not None and i == beg + cover_offset:
+        imgname = "cover%05d.%s" % (i, imgtype)
+    if thumb_offset is not None and i == beg + thumb_offset:
+        imgname = "thumb%05d.%s" % (i, imgtype)
+    print("Extracting image: {0:s} from section {1:d}".format(imgname,i))
+    outimg = os.path.join(files.imgdir, imgname)
+    with open(pathof(outimg), 'wb') as f:
+        f.write(data)
+    rscnames.append(imgname)
+    sect.setsectiondescription(i,"Image {0:s}".format(imgname))
+    if rsc_ptr == -1:
+        rsc_ptr = i - beg
+    return rscnames, rsc_ptr
+
+
+def processPrintReplica(metadata, files, rscnames, mh):
+    global DUMP
+    global WRITE_RAW_DATA
+    rawML = mh.getRawML()
+    if DUMP or WRITE_RAW_DATA:
+        outraw = os.path.join(files.outdir,files.getInputFileBasename() + '.rawpr')
+        with open(pathof(outraw),'wb') as f:
+            f.write(rawML)
+
+    fileinfo = []
+    print("Print Replica ebook detected")
+    try:
+        numTables, = struct.unpack_from(b'>L', rawML, 0x04)
+        tableIndexOffset = 8 + 4*numTables
+        # for each table, read in count of sections, assume first section is a PDF
+        # and output other sections as binary files
+        for i in range(numTables):
+            sectionCount, = struct.unpack_from(b'>L', rawML, 0x08 + 4*i)
+            for j in range(sectionCount):
+                sectionOffset, sectionLength, = struct.unpack_from(b'>LL', rawML, tableIndexOffset)
+                tableIndexOffset += 8
+                if j == 0:
+                    entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.pdf' % (i+1)))
+                else:
+                    entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.%03d.data' % ((i+1),j)))
+                with open(pathof(entryName), 'wb') as f:
+                    f.write(rawML[sectionOffset:(sectionOffset+sectionLength)])
+    except Exception as e:
+        print('Error processing Print Replica: ' + str(e))
+
+    fileinfo.append([None,'', files.getInputFileBasename() + '.pdf'])
+    usedmap = {}
+    for name in rscnames:
+        if name is not None:
+            usedmap[name] = 'used'
+    opf = OPFProcessor(files, metadata, fileinfo, rscnames, False, mh, usedmap)
+    opf.writeOPF()
+
+
+def processMobi8(mh, metadata, sect, files, rscnames, pagemapproc, k8resc, obfuscate_data, apnxfile=None, epubver='2'):
+    global DUMP
+    global WRITE_RAW_DATA
+
+    # extract raw markup langauge
+    rawML = mh.getRawML()
+    if DUMP or WRITE_RAW_DATA:
+        outraw = os.path.join(files.k8dir,files.getInputFileBasename() + '.rawml')
+        with open(pathof(outraw),'wb') as f:
+            f.write(rawML)
+
+    # KF8 require other indexes which contain parsing information and the FDST info
+    # to process the rawml back into the xhtml files, css files, svg image files, etc
+    k8proc = K8Processor(mh, sect, files, DUMP)
+    k8proc.buildParts(rawML)
+
+    # collect information for the guide first
+    guidetext = unicode_str(k8proc.getGuideText())
+
+    # if the guide was empty, add in any guide info from metadata, such as StartOffset
+    if not guidetext and 'StartOffset' in metadata:
+        # Apparently, KG 2.5 carries over the StartOffset from the mobi7 part...
+        # Taking that into account, we only care about the *last* StartOffset, which
+        # should always be the correct one in these cases (the one actually pointing
+        # to the right place in the mobi8 part).
+        starts = metadata['StartOffset']
+        last_start = starts[-1]
+        last_start = int(last_start)
+        if last_start == 0xffffffff:
+            last_start = 0
+        seq, idtext = k8proc.getFragTblInfo(last_start)
+        filename, idtext = k8proc.getIDTagByPosFid(toBase32(seq), b'0000000000')
+        linktgt = filename
+        idtext = unicode_str(idtext, mh.codec)
+        if idtext != '':
+            linktgt += '#' + idtext
+        guidetext += '<reference type="text" href="Text/%s" />\n' % linktgt
+
+    # if apnxfile is passed in use it for page map information
+    if apnxfile is not None and pagemapproc is None:
+        with open(apnxfile, 'rb') as f:
+            apnxdata = b"00000000" + f.read()
+        pagemapproc = PageMapProcessor(mh, apnxdata)
+
+    # generate the page map
+    pagemapxml = ''
+    if pagemapproc is not None:
+        pagemapxml = pagemapproc.generateKF8PageMapXML(k8proc)
+        outpm = os.path.join(files.k8oebps,'page-map.xml')
+        with open(pathof(outpm),'wb') as f:
+            f.write(pagemapxml.encode('utf-8'))
+        if DUMP:
+            print(pagemapproc.getNames())
+            print(pagemapproc.getOffsets())
+            print("\n\nPage Map")
+            print(pagemapxml)
+
+    # process the toc ncx
+    # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
+    print("Processing ncx / toc")
+    ncx = ncxExtract(mh, files)
+    ncx_data = ncx.parseNCX()
+    # extend the ncx data with filenames and proper internal idtags
+    for i in range(len(ncx_data)):
+        ncxmap = ncx_data[i]
+        [junk1, junk2, junk3, fid, junk4, off] = ncxmap['pos_fid'].split(':')
+        filename, idtag = k8proc.getIDTagByPosFid(fid, off)
+        ncxmap['filename'] = filename
+        ncxmap['idtag'] = unicode_str(idtag)
+        ncx_data[i] = ncxmap
+
+    # convert the rawML to a set of xhtml files
+    print("Building an epub-like structure")
+    htmlproc = XHTMLK8Processor(rscnames, k8proc)
+    usedmap = htmlproc.buildXHTML()
+
+    # write out the xhtml svg, and css files
+    # fileinfo = [skelid|coverpage, dir, name]
+    fileinfo = []
+    # first create a cover page if none exists
+    if CREATE_COVER_PAGE:
+        cover = CoverProcessor(files, metadata, rscnames)
+        cover_img = utf8_str(cover.getImageName())
+        need_to_create_cover_page = False
+        if cover_img is not None:
+            if k8resc is None or not k8resc.hasSpine():
+                part = k8proc.getPart(0)
+                if part.find(cover_img) == -1:
+                    need_to_create_cover_page = True
+            else:
+                if "coverpage" not in k8resc.spine_idrefs:
+                    part = k8proc.getPart(int(k8resc.spine_order[0]))
+                    if part.find(cover_img) == -1:
+                        k8resc.prepend_to_spine("coverpage", "inserted", "no", None)
+                if k8resc.spine_order[0] == "coverpage":
+                    need_to_create_cover_page = True
+            if need_to_create_cover_page:
+                filename = cover.getXHTMLName()
+                fileinfo.append(["coverpage", 'Text', filename])
+                guidetext += cover.guide_toxml()
+                cover.writeXHTML()
+
+    n =  k8proc.getNumberOfParts()
+    for i in range(n):
+        part = k8proc.getPart(i)
+        [skelnum, dir, filename, beg, end, aidtext] = k8proc.getPartInfo(i)
+        fileinfo.append([str(skelnum), dir, filename])
+        fname = os.path.join(files.k8oebps,dir,filename)
+        with open(pathof(fname),'wb') as f:
+            f.write(part)
+    n = k8proc.getNumberOfFlows()
+    for i in range(1, n):
+        [ptype, pformat, pdir, filename] = k8proc.getFlowInfo(i)
+        flowpart = k8proc.getFlow(i)
+        if pformat == b'file':
+            fileinfo.append([None, pdir, filename])
+            fname = os.path.join(files.k8oebps,pdir,filename)
+            with open(pathof(fname),'wb') as f:
+                f.write(flowpart)
+
+    # create the opf
+    opf = OPFProcessor(files, metadata.copy(), fileinfo, rscnames, True, mh, usedmap,
+                       pagemapxml=pagemapxml, guidetext=guidetext, k8resc=k8resc, epubver=epubver)
+    uuid = opf.writeOPF(bool(obfuscate_data))
+
+    if opf.hasNCX():
+        # Create a toc.ncx.
+        ncx.writeK8NCX(ncx_data, metadata)
+    if opf.hasNAV():
+        # Create a navigation document.
+        nav = NAVProcessor(files)
+        nav.writeNAV(ncx_data, guidetext, metadata)
+
+    # make an epub-like structure of it all
+    print("Creating an epub-like file")
+    files.makeEPUB(usedmap, obfuscate_data, uuid)
+
+
+def processMobi7(mh, metadata, sect, files, rscnames):
+    global DUMP
+    global WRITE_RAW_DATA
+    # An original Mobi
+    rawML = mh.getRawML()
+    if DUMP or WRITE_RAW_DATA:
+        outraw = os.path.join(files.mobi7dir,files.getInputFileBasename() + '.rawml')
+        with open(pathof(outraw),'wb') as f:
+            f.write(rawML)
+
+    # process the toc ncx
+    # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
+    ncx = ncxExtract(mh, files)
+    ncx_data = ncx.parseNCX()
+    ncx.writeNCX(metadata)
+
+    positionMap = {}
+
+    # if Dictionary build up the positionMap
+    if mh.isDictionary():
+        if mh.DictInLanguage():
+            metadata['DictInLanguage'] = [mh.DictInLanguage()]
+        if mh.DictOutLanguage():
+            metadata['DictOutLanguage'] = [mh.DictOutLanguage()]
+        positionMap = dictSupport(mh, sect).getPositionMap()
+
+    # convert the rawml back to Mobi ml
+    proc = HTMLProcessor(files, metadata, rscnames)
+    srctext = proc.findAnchors(rawML, ncx_data, positionMap)
+    srctext, usedmap = proc.insertHREFS()
+
+    # write the proper mobi html
+    fileinfo=[]
+    # fname = files.getInputFileBasename() + '.html'
+    fname = 'book.html'
+    fileinfo.append([None,'', fname])
+    outhtml = os.path.join(files.mobi7dir, fname)
+    with open(pathof(outhtml), 'wb') as f:
+        f.write(srctext)
+
+    # extract guidetext from srctext
+    guidetext =b''
+    # no pagemap support for older mobis
+    # pagemapxml = None
+    guidematch = re.search(br'''<guide>(.*)</guide>''',srctext,re.IGNORECASE+re.DOTALL)
+    if guidematch:
+        guidetext = guidematch.group(1)
+        # sometimes old mobi guide from srctext horribly written so need to clean up
+        guidetext = guidetext.replace(b"\r", b"")
+        guidetext = guidetext.replace(b'<REFERENCE', b'<reference')
+        guidetext = guidetext.replace(b' HREF=', b' href=')
+        guidetext = guidetext.replace(b' TITLE=', b' title=')
+        guidetext = guidetext.replace(b' TYPE=', b' type=')
+        # reference must be a self-closing tag
+        # and any href must be replaced with filepos information
+        ref_tag_pattern = re.compile(br'''(<reference [^>]*>)''', re.IGNORECASE)
+        guidepieces = ref_tag_pattern.split(guidetext)
+        for i in range(1,len(guidepieces), 2):
+            reftag = guidepieces[i]
+            # remove any href there now to replace with filepos
+            reftag = re.sub(br'''href\s*=[^'"]*['"][^'"]*['"]''',b'', reftag)
+            # make sure the reference tag ends properly
+            if not reftag.endswith(b"/>"):
+                reftag = reftag[0:-1] + b"/>"
+                guidepieces[i] = reftag
+        guidetext = b''.join(guidepieces)
+        replacetext = br'''href="'''+utf8_str(fileinfo[0][2])+ br'''#filepos\1"'''
+        guidetext = re.sub(br'''filepos=['"]{0,1}0*(\d+)['"]{0,1}''', replacetext, guidetext)
+        guidetext += b'\n'
+
+    if 'StartOffset' in metadata:
+        for value in metadata['StartOffset']:
+            if int(value) == 0xffffffff:
+                value = '0'
+            starting_offset = value
+        # get guide items from metadata
+        metaguidetext = b'<reference type="text" href="'+utf8_str(fileinfo[0][2])+b'#filepos'+utf8_str(starting_offset)+b'" />\n'
+        guidetext += metaguidetext
+
+    if isinstance(guidetext, binary_type):
+        guidetext = guidetext.decode(mh.codec)
+
+    # create an OPF
+    opf = OPFProcessor(files, metadata, fileinfo, rscnames, ncx.isNCX, mh, usedmap, guidetext=guidetext)
+    opf.writeOPF()
+
+
+def processUnknownSections(mh, sect, files, K8Boundary):
+    global DUMP
+    global TERMINATION_INDICATOR1
+    global TERMINATION_INDICATOR2
+    global TERMINATION_INDICATOR3
+    if DUMP:
+        print("Unpacking any remaining unknown records")
+    beg = mh.start
+    end = sect.num_sections
+    if beg < K8Boundary:
+        # then we're processing the first part of a combination file
+        end = K8Boundary
+    for i in range(beg, end):
+        if sect.sectiondescriptions[i] == "":
+            data = sect.loadSection(i)
+            type = data[0:4]
+            if type == TERMINATION_INDICATOR3:
+                description = "Termination Marker 3 Nulls"
+            elif type == TERMINATION_INDICATOR2:
+                description = "Termination Marker 2 Nulls"
+            elif type == TERMINATION_INDICATOR1:
+                description = "Termination Marker 1 Null"
+            elif type == "INDX":
+                fname = "Unknown%05d_INDX.dat" % i
+                description = "Unknown INDX section"
+                if DUMP:
+                    outname= os.path.join(files.outdir, fname)
+                    with open(pathof(outname), 'wb') as f:
+                        f.write(data)
+                    print("Extracting %s: %s from section %d" % (description, fname, i))
+                    description = description + ", extracting as %s" % fname
+            else:
+                fname = "unknown%05d.dat" % i
+                description = "Mysterious Section, first four bytes %s" % describe(data[0:4])
+                if DUMP:
+                    outname= os.path.join(files.outdir, fname)
+                    with open(pathof(outname), 'wb') as f:
+                        f.write(data)
+                    print("Extracting %s: %s from section %d" % (description, fname, i))
+                    description = description + ", extracting as %s" % fname
+            sect.setsectiondescription(i, description)
+
+
+def process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, k8only=False, epubver='2', use_hd=False):
+    global DUMP
+    global WRITE_RAW_DATA
+    rscnames = []
+    rsc_ptr = -1
+    k8resc = None
+    obfuscate_data = []
+    for mh in mhlst:
+        pagemapproc = None
+        if mh.isK8():
+            sect.setsectiondescription(mh.start,"KF8 Header")
+            mhname = os.path.join(files.outdir,"header_K8.dat")
+            print("Processing K8 section of book...")
+        elif mh.isPrintReplica():
+            sect.setsectiondescription(mh.start,"Print Replica Header")
+            mhname = os.path.join(files.outdir,"header_PR.dat")
+            print("Processing PrintReplica section of book...")
+        else:
+            if mh.version == 0:
+                sect.setsectiondescription(mh.start, "PalmDoc Header".format(mh.version))
+            else:
+                sect.setsectiondescription(mh.start,"Mobipocket {0:d} Header".format(mh.version))
+            mhname = os.path.join(files.outdir,"header.dat")
+            print("Processing Mobipocket {0:d} section of book...".format(mh.version))
+
+        if DUMP:
+            # write out raw mobi header data
+            with open(pathof(mhname), 'wb') as f:
+                f.write(mh.header)
+
+        # process each mobi header
+        metadata = mh.getMetaData()
+        mh.describeHeader(DUMP)
+        if mh.isEncrypted():
+            raise unpackException('Book is encrypted')
+
+        pagemapproc = None
+
+        # first handle all of the different resource sections:  images, resources, fonts, and etc
+        # build up a list of image names to use to postprocess the ebook
+
+        print("Unpacking images, resources, fonts, etc")
+        beg = mh.firstresource
+        end = sect.num_sections
+        if beg < K8Boundary:
+            # processing first part of a combination file
+            end = K8Boundary
+
+        # Not sure the try/except is necessary, but just in case
+        try: 
+            thumb_offset = int(metadata.get('ThumbOffset', ['-1'])[0])
+        except:
+            thumb_offset = None
+
+        cover_offset = int(metadata.get('CoverOffset', ['-1'])[0])
+        if not CREATE_COVER_PAGE:
+            cover_offset = None
+
+        for i in range(beg, end):
+            data = sect.loadSection(i)
+            type = data[0:4]
+
+            # handle the basics first
+            if type in [b"FLIS", b"FCIS", b"FDST", b"DATP"]:
+                if DUMP:
+                    fname = unicode_str(type) + "%05d" % i
+                    if mh.isK8():
+                        fname += "_K8"
+                    fname += '.dat'
+                    outname= os.path.join(files.outdir, fname)
+                    with open(pathof(outname), 'wb') as f:
+                        f.write(data)
+                    print("Dumping section {0:d} type {1:s} to file {2:s} ".format(i,unicode_str(type),outname))
+                sect.setsectiondescription(i,"Type {0:s}".format(unicode_str(type)))
+                rscnames.append(None)
+            elif type == b"SRCS":
+                rscnames = processSRCS(i, files, rscnames, sect, data)
+            elif type == b"PAGE":
+                rscnames, pagemapproc = processPAGE(i, files, rscnames, sect, data, mh, pagemapproc)
+            elif type == b"CMET":
+                rscnames = processCMET(i, files, rscnames, sect, data)
+            elif type == b"FONT":
+                rscnames, obfuscate_data, rsc_ptr = processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr)
+            elif type == b"CRES":
+                rscnames, rsc_ptr = processCRES(i, files, rscnames, sect, data, beg, rsc_ptr, use_hd)
+            elif type == b"CONT":
+                rscnames = processCONT(i, files, rscnames, sect, data)
+            elif type == b"kind":
+                rscnames = processkind(i, files, rscnames, sect, data)
+            elif type == b'\xa0\xa0\xa0\xa0':
+                sect.setsectiondescription(i,"Empty_HD_Image/Resource_Placeholder")
+                rscnames.append(None)
+                rsc_ptr += 1
+            elif type == b"RESC":
+                rscnames, k8resc = processRESC(i, files, rscnames, sect, data, k8resc)
+            elif data == EOF_RECORD:
+                sect.setsectiondescription(i,"End Of File")
+                rscnames.append(None)
+            elif data[0:8] == b"BOUNDARY":
+                sect.setsectiondescription(i,"BOUNDARY Marker")
+                rscnames.append(None)
+            else:
+                # if reached here should be an image ow treat as unknown
+                rscnames, rsc_ptr  = processImage(i, files, rscnames, sect, data, beg, rsc_ptr, cover_offset, thumb_offset)
+        # done unpacking resources
+
+        # Print Replica
+        if mh.isPrintReplica() and not k8only:
+            processPrintReplica(metadata, files, rscnames, mh)
+            continue
+
+        # KF8 (Mobi 8)
+        if mh.isK8():
+            processMobi8(mh, metadata, sect, files, rscnames, pagemapproc, k8resc, obfuscate_data, apnxfile, epubver)
+
+        # Old Mobi (Mobi 7)
+        elif not k8only:
+            processMobi7(mh, metadata, sect, files, rscnames)
+
+        # process any remaining unknown sections of the palm file
+        processUnknownSections(mh, sect, files, K8Boundary)
+
+    return
+
+
+def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=False, dodump=False, dowriteraw=False, dosplitcombos=False):
+    global DUMP
+    global WRITE_RAW_DATA
+    global SPLIT_COMBO_MOBIS
+    if DUMP or dodump:
+        DUMP = True
+    if WRITE_RAW_DATA or dowriteraw:
+        WRITE_RAW_DATA = True
+    if SPLIT_COMBO_MOBIS or dosplitcombos:
+        SPLIT_COMBO_MOBIS = True
+
+    infile = unicode_str(infile)
+    outdir = unicode_str(outdir)
+    if apnxfile is not None:
+        apnxfile = unicode_str(apnxfile)
+
+    files = fileNames(infile, outdir)
+
+    # process the PalmDoc database header and verify it is a mobi
+    sect = Sectionizer(infile)
+    if sect.ident != b'BOOKMOBI' and sect.ident != b'TEXtREAd':
+        raise unpackException('Invalid file format')
+    if DUMP:
+        sect.dumppalmheader()
+    else:
+        print("Palm DB type: %s, %d sections." % (sect.ident.decode('utf-8'),sect.num_sections))
+
+    # scan sections to see if this is a compound mobi file (K8 format)
+    # and build a list of all mobi headers to process.
+    mhlst = []
+    mh = MobiHeader(sect,0)
+    # if this is a mobi8-only file hasK8 here will be true
+    mhlst.append(mh)
+    K8Boundary = -1
+
+    if mh.isK8():
+        print("Unpacking a KF8 book...")
+        hasK8 = True
+    else:
+        # This is either a Mobipocket 7 or earlier, or a combi M7/KF8
+        # Find out which
+        hasK8 = False
+        for i in range(len(sect.sectionoffsets)-1):
+            before, after = sect.sectionoffsets[i:i+2]
+            if (after - before) == 8:
+                data = sect.loadSection(i)
+                if data == K8_BOUNDARY:
+                    sect.setsectiondescription(i,"Mobi/KF8 Boundary Section")
+                    mh = MobiHeader(sect,i+1)
+                    hasK8 = True
+                    mhlst.append(mh)
+                    K8Boundary = i
+                    break
+        if hasK8:
+            print("Unpacking a Combination M{0:d}/KF8 book...".format(mh.version))
+            if SPLIT_COMBO_MOBIS:
+                # if this is a combination mobi7-mobi8 file split them up
+                mobisplit = mobi_split(infile)
+                if mobisplit.combo:
+                    outmobi7 = os.path.join(files.outdir, 'mobi7-'+files.getInputFileBasename() + '.mobi')
+                    outmobi8 = os.path.join(files.outdir, 'mobi8-'+files.getInputFileBasename() + '.azw3')
+                    with open(pathof(outmobi7), 'wb') as f:
+                        f.write(mobisplit.getResult7())
+                    with open(pathof(outmobi8), 'wb') as f:
+                        f.write(mobisplit.getResult8())
+        else:
+            print("Unpacking a Mobipocket {0:d} book...".format(mh.version))
+
+    if hasK8:
+        files.makeK8Struct()
+
+    process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, False, epubver, use_hd)
+
+    if DUMP:
+        sect.dumpsectionsinfo()
+    return
+
+
+def usage(progname):
+    print("")
+    print("Description:")
+    print("  Unpacks an unencrypted Kindle/MobiPocket ebook to html and images")
+    print("  or an unencrypted Kindle/Print Replica ebook to PDF and images")
+    print("  into the specified output folder.")
+    print("Usage:")
+    print("  %s -r -s -p apnxfile -d -h --epub_version= infile [outdir]" % progname)
+    print("Options:")
+    print("    -h                 print this help message")
+    print("    -i                 use HD Images, if present, to overwrite reduced resolution images")
+    print("    -s                 split combination mobis into mobi7 and mobi8 ebooks")
+    print("    -p APNXFILE        path to an .apnx file associated with the azw3 input (optional)")
+    print("    --epub_version=    specify epub version to unpack to: 2, 3, A (for automatic) or ")
+    print("                         F (force to fit to epub2 definitions), default is 2")
+    print("    -d                 dump headers and other info to output and extra files")
+    print("    -r                 write raw data to the output folder")
+
+
+def main(argv=unicode_argv()):
+    global DUMP
+    global WRITE_RAW_DATA
+    global SPLIT_COMBO_MOBIS
+
+    print("KindleUnpack v0.83")
+    print("   Based on initial mobipocket version Copyright © 2009 Charles M. Hannum <root@ihack.net>")
+    print("   Extensive Extensions and Improvements Copyright © 2009-2020 ")
+    print("       by:  P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding, tkeo.")
+    print("   This program is free software: you can redistribute it and/or modify")
+    print("   it under the terms of the GNU General Public License as published by")
+    print("   the Free Software Foundation, version 3.")
+
+    progname = os.path.basename(argv[0])
+    try:
+        opts, args = getopt.getopt(argv[1:], "dhirsp:", ['epub_version='])
+    except getopt.GetoptError as err:
+        print(str(err))
+        usage(progname)
+        sys.exit(2)
+
+    if len(args)<1:
+        usage(progname)
+        sys.exit(2)
+
+    apnxfile = None
+    epubver = '2'
+    use_hd = False
+
+    for o, a in opts:
+        if o == "-h":
+            usage(progname)
+            sys.exit(0)
+        if o == "-i":
+            use_hd = True
+        if o == "-d":
+            DUMP = True
+        if o == "-r":
+            WRITE_RAW_DATA = True
+        if o == "-s":
+            SPLIT_COMBO_MOBIS = True
+        if o == "-p":
+            apnxfile = a
+        if o == "--epub_version":
+            epubver = a
+
+    if len(args) > 1:
+        infile, outdir = args
+    else:
+        infile = args[0]
+        outdir = os.path.splitext(infile)[0]
+
+    infileext = os.path.splitext(infile)[1].upper()
+    if infileext not in ['.MOBI', '.PRC', '.AZW', '.AZW3', '.AZW4']:
+        print("Error: first parameter must be a Kindle/Mobipocket ebook or a Kindle/Print Replica ebook.")
+        return 1
+
+    try:
+        print('Unpacking Book...')
+        unpackBook(infile, outdir, apnxfile, epubver, use_hd)
+        print('Completed')
+
+    except ValueError as e:
+        print("Error: %s" % e)
+        print(traceback.format_exc())
+        return 1
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_cover.py b/src/epy_reader/tools/KindleUnpack/mobi_cover.py
new file mode 100644
index 0000000..3078ac4
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_cover.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import unicode_str
+
+from .unipath import pathof
+import os
+import imghdr
+
+import struct
+# note:  struct pack, unpack, unpack_from all require bytestring format
+# data all the way up to at least python 2.7.5, python 3 okay with bytestring
+
+USE_SVG_WRAPPER = True
+""" Set to True to use svg wrapper for default. """
+
+FORCE_DEFAULT_TITLE = False
+""" Set to True to force to use the default title. """
+
+COVER_PAGE_FINENAME = 'cover_page.xhtml'
+""" The name for the cover page. """
+
+DEFAULT_TITLE = 'Cover'
+""" The default title for the cover page. """
+
+MAX_WIDTH = 4096
+""" The max width for the svg cover page. """
+
+MAX_HEIGHT = 4096
+""" The max height for the svg cover page. """
+
+
+def get_image_type(imgname, imgdata=None):
+    imgtype = unicode_str(imghdr.what(pathof(imgname), imgdata))
+
+    # imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some
+    # with only the magic JPEG bytes out there...
+    # ImageMagick handles those, so, do it too.
+    if imgtype is None:
+        if imgdata is None:
+            with open(pathof(imgname), 'rb') as f:
+                imgdata = f.read()
+        if imgdata[0:2] == b'\xFF\xD8':
+            # Get last non-null bytes
+            last = len(imgdata)
+            while (imgdata[last-1:last] == b'\x00'):
+                last-=1
+            # Be extra safe, check the trailing bytes, too.
+            if imgdata[last-2:last] == b'\xFF\xD9':
+                imgtype = "jpeg"
+    return imgtype
+
+
+def get_image_size(imgname, imgdata=None):
+    '''Determine the image type of imgname (or imgdata) and return its size.
+
+    Originally,
+    Determine the image type of fhandle and return its size.
+    from draco'''
+    if imgdata is None:
+        fhandle = open(pathof(imgname), 'rb')
+        head = fhandle.read(24)
+    else:
+        head = imgdata[0:24]
+    if len(head) != 24:
+        return
+
+    imgtype = get_image_type(imgname, imgdata)
+    if imgtype == 'png':
+        check = struct.unpack(b'>i', head[4:8])[0]
+        if check != 0x0d0a1a0a:
+            return
+        width, height = struct.unpack(b'>ii', head[16:24])
+    elif imgtype == 'gif':
+        width, height = struct.unpack(b'<HH', head[6:10])
+    elif imgtype == 'jpeg' and imgdata is None:
+        try:
+            fhandle.seek(0)  # Read 0xff next
+            size = 2
+            ftype = 0
+            while not 0xc0 <= ftype <= 0xcf:
+                fhandle.seek(size, 1)
+                byte = fhandle.read(1)
+                while ord(byte) == 0xff:
+                    byte = fhandle.read(1)
+                ftype = ord(byte)
+                size = struct.unpack(b'>H', fhandle.read(2))[0] - 2
+            # We are at a SOFn block
+            fhandle.seek(1, 1)  # Skip `precision' byte.
+            height, width = struct.unpack(b'>HH', fhandle.read(4))
+        except Exception:  # IGNORE:W0703
+            return
+    elif imgtype == 'jpeg' and imgdata is not None:
+        try:
+            pos = 0
+            size = 2
+            ftype = 0
+            while not 0xc0 <= ftype <= 0xcf:
+                pos += size
+                byte = imgdata[pos:pos+1]
+                pos += 1
+                while ord(byte) == 0xff:
+                    byte = imgdata[pos:pos+1]
+                    pos += 1
+                ftype = ord(byte)
+                size = struct.unpack(b'>H', imgdata[pos:pos+2])[0] - 2
+                pos += 2
+            # We are at a SOFn block
+            pos += 1  # Skip `precision' byte.
+            height, width = struct.unpack(b'>HH', imgdata[pos:pos+4])
+            pos += 4
+        except Exception:  # IGNORE:W0703
+            return
+    else:
+        return
+    return width, height
+
+# XXX experimental
+class CoverProcessor(object):
+
+    """Create a cover page.
+
+    """
+    def __init__(self, files, metadata, rscnames, imgname=None, imgdata=None):
+        self.files = files
+        self.metadata = metadata
+        self.rscnames = rscnames
+        self.cover_page = COVER_PAGE_FINENAME
+        self.use_svg = USE_SVG_WRAPPER  # Use svg wrapper.
+        self.lang = metadata.get('Language', ['en'])[0]
+        # This should ensure that if the methods to find the cover image's
+        # dimensions should fail for any reason, the SVG routine will not be used.
+        [self.width, self.height] = (-1,-1)
+        if FORCE_DEFAULT_TITLE:
+            self.title = DEFAULT_TITLE
+        else:
+            self.title = metadata.get('Title', [DEFAULT_TITLE])[0]
+
+        self.cover_image = None
+        if imgname is not None:
+            self.cover_image = imgname
+        elif 'CoverOffset' in metadata:
+            imageNumber = int(metadata['CoverOffset'][0])
+            cover_image = self.rscnames[imageNumber]
+            if cover_image is not None:
+                self.cover_image = cover_image
+            else:
+                print('Warning: Cannot identify the cover image.')
+        if self.use_svg:
+            try:
+                if imgdata is None:
+                    fname = os.path.join(files.imgdir, self.cover_image)
+                    [self.width, self.height] = get_image_size(fname)
+                else:
+                    [self.width, self.height] = get_image_size(None, imgdata)
+            except:
+                self.use_svg = False
+            width = self.width
+            height = self.height
+            if width < 0 or height < 0 or width > MAX_WIDTH or height > MAX_HEIGHT:
+                self.use_svg = False
+        return
+
+    def getImageName(self):
+        return self.cover_image
+
+    def getXHTMLName(self):
+        return self.cover_page
+
+    def buildXHTML(self):
+        print('Building a cover page.')
+        files = self.files
+        cover_image = self.cover_image
+        title = self.title
+        lang = self.lang
+
+        image_dir = os.path.normpath(os.path.relpath(files.k8images, files.k8text))
+        image_path = os.path.join(image_dir, cover_image).replace('\\', '/')
+
+        if not self.use_svg:
+            data = ''
+            data += '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html>'
+            data += '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"'
+            data += ' xml:lang="{:s}">\n'.format(lang)
+            data += '<head>\n<title>{:s}</title>\n'.format(title)
+            data += '<style type="text/css">\n'
+            data += 'body {\n  margin: 0;\n  padding: 0;\n  text-align: center;\n}\n'
+            data += 'div {\n  height: 100%;\n  width: 100%;\n  text-align: center;\n  page-break-inside: avoid;\n}\n'
+            data += 'img {\n  display: inline-block;\n  height: 100%;\n  margin: 0 auto;\n}\n'
+            data += '</style>\n</head>\n'
+            data += '<body><div>\n'
+            data += '  <img src="{:s}" alt=""/>\n'.format(image_path)
+            data += '</div></body>\n</html>'
+        else:
+            width = self.width
+            height = self.height
+            viewBox = "0 0 {0:d} {1:d}".format(width, height)
+
+            data = ''
+            data += '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html>'
+            data += '<html xmlns="http://www.w3.org/1999/xhtml"'
+            data += ' xml:lang="{:s}">\n'.format(lang)
+            data += '<head>\n  <title>{:s}</title>\n'.format(title)
+            data += '<style type="text/css">\n'
+            data += 'svg {padding: 0pt; margin:0pt}\n'
+            data += 'body { text-align: center; padding:0pt; margin: 0pt; }\n'
+            data += '</style>\n</head>\n'
+            data += '<body>\n  <div>\n'
+            data += '    <svg xmlns="http://www.w3.org/2000/svg" height="100%" preserveAspectRatio="xMidYMid meet"'
+            data += ' version="1.1" viewBox="{0:s}" width="100%" xmlns:xlink="http://www.w3.org/1999/xlink">\n'.format(viewBox)
+            data += '      <image height="{0}" width="{1}" xlink:href="{2}"/>\n'.format(height, width, image_path)
+            data += '    </svg>\n'
+            data += '  </div>\n</body>\n</html>'
+        return data
+
+    def writeXHTML(self):
+        files = self.files
+        cover_page = self.cover_page
+
+        data = self.buildXHTML()
+
+        outfile = os.path.join(files.k8text, cover_page)
+        if os.path.exists(pathof(outfile)):
+            print('Warning: {:s} already exists.'.format(cover_page))
+            os.remove(pathof(outfile))
+        with open(pathof(outfile), 'wb') as f:
+            f.write(data.encode('utf-8'))
+        return
+
+    def guide_toxml(self):
+        files = self.files
+        text_dir = os.path.relpath(files.k8text, files.k8oebps)
+        data = '<reference type="cover" title="Cover" href="{:s}/{:s}" />\n'.format(
+                text_dir, self.cover_page)
+        return data
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_dict.py b/src/epy_reader/tools/KindleUnpack/mobi_dict.py
new file mode 100644
index 0000000..bfc2ea8
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_dict.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import PY2, PY3, utf8_str, bstr, bchr
+
+if PY2:
+    range = xrange
+    array_format = b'B'
+if PY3:
+    unichr = chr
+    array_format = "B"
+
+import array
+
+import struct
+# note:  struct pack, unpack, unpack_from all require bytestring format
+# data all the way up to at least python 2.7.5, python 3 okay with bytestring
+
+from .mobi_index import getVariableWidthValue, readTagSection, getTagMap
+from .mobi_utils import toHex
+
+DEBUG_DICT = False
+
+class InflectionData(object):
+
+    def __init__(self, infldatas):
+        self.infldatas = infldatas
+        self.starts = []
+        self.counts = []
+        for idata in self.infldatas:
+            start, = struct.unpack_from(b'>L', idata, 0x14)
+            count, = struct.unpack_from(b'>L', idata, 0x18)
+            self.starts.append(start)
+            self.counts.append(count)
+
+    def lookup(self, lookupvalue):
+        i = 0
+        rvalue = lookupvalue
+        while rvalue >= self.counts[i]:
+            rvalue = rvalue - self.counts[i]
+            i += 1
+            if i == len(self.counts):
+                print("Error: Problem with multiple inflections data sections")
+                return lookupvalue, self.starts[0], self.counts[0], self.infldatas[0]
+        return rvalue, self.starts[i], self.counts[i], self.infldatas[i]
+
+    def offsets(self, value):
+        rvalue, start, count, data = self.lookup(value)
+        offset, = struct.unpack_from(b'>H', data, start + 4 + (2 * rvalue))
+        if rvalue + 1 < count:
+            nextOffset, = struct.unpack_from(b'>H',data, start + 4 + (2 * (rvalue + 1)))
+        else:
+            nextOffset = None
+        return offset, nextOffset, data
+
+
+class dictSupport(object):
+
+    def __init__(self, mh, sect):
+        self.mh = mh
+        self.header = mh.header
+        self.sect = sect
+        self.metaOrthIndex = mh.metaOrthIndex
+        self.metaInflIndex = mh.metaInflIndex
+
+    def parseHeader(self, data):
+        "read INDX header"
+        if not data[:4] == b'INDX':
+            print("Warning: index section is not INDX")
+            return False
+        words = (
+                'len', 'nul1', 'type', 'gen', 'start', 'count', 'code',
+                'lng', 'total', 'ordt', 'ligt', 'nligt', 'nctoc'
+        )
+        num = len(words)
+        values = struct.unpack(bstr('>%dL' % num), data[4:4*(num+1)])
+        header = {}
+        for n in range(num):
+            header[words[n]] = values[n]
+
+        ordt1 = None
+        ordt2 = None
+
+        otype, oentries, op1, op2, otagx  = struct.unpack_from(b'>LLLLL',data, 0xa4)
+        header['otype'] = otype
+        header['oentries'] = oentries
+
+        if DEBUG_DICT:
+            print("otype %d, oentries %d, op1 %d, op2 %d, otagx %d" % (otype, oentries, op1, op2, otagx))
+
+        if header['code'] == 0xfdea or oentries > 0:
+            # some dictionaries seem to be codepage 65002 (0xFDEA) which seems
+            # to be some sort of strange EBCDIC utf-8 or 16 encoded strings
+            # So we need to look for them and store them away to process leading text
+            # ORDT1 has 1 byte long entries, ORDT2 has 2 byte long entries
+            # we only ever seem to use the second but ...
+            #
+            # if otype = 0, ORDT table uses 16 bit values as offsets into the table
+            # if otype = 1, ORDT table uses 8 bit values as offsets inot the table
+
+            assert(data[op1:op1+4] == b'ORDT')
+            assert(data[op2:op2+4] == b'ORDT')
+            ordt1 = struct.unpack_from(bstr('>%dB' % oentries), data, op1+4)
+            ordt2 = struct.unpack_from(bstr('>%dH' % oentries), data, op2+4)
+
+        if DEBUG_DICT:
+            print("parsed INDX header:")
+            for key in header:
+                print(key, "%x" % header[key],)
+            print("\n")
+        return header, ordt1, ordt2
+
+    def getPositionMap(self):
+        sect = self.sect
+
+        positionMap = {}
+
+        metaOrthIndex = self.metaOrthIndex
+        metaInflIndex = self.metaInflIndex
+
+        decodeInflection = True
+        if metaOrthIndex != 0xFFFFFFFF:
+            print("Info: Document contains orthographic index, handle as dictionary")
+            if metaInflIndex == 0xFFFFFFFF:
+                decodeInflection = False
+            else:
+                metaInflIndexData = sect.loadSection(metaInflIndex)
+
+                print("\nParsing metaInflIndexData")
+                midxhdr, mhordt1, mhordt2 = self.parseHeader(metaInflIndexData)
+
+                metaIndexCount = midxhdr['count']
+                idatas = []
+                for j in range(metaIndexCount):
+                    idatas.append(sect.loadSection(metaInflIndex + 1 + j))
+                dinfl = InflectionData(idatas)
+
+                inflNameData = sect.loadSection(metaInflIndex + 1 + metaIndexCount)
+                tagSectionStart = midxhdr['len']
+                inflectionControlByteCount, inflectionTagTable = readTagSection(tagSectionStart, metaInflIndexData)
+                if DEBUG_DICT:
+                    print("inflectionTagTable: %s" % inflectionTagTable)
+                if self.hasTag(inflectionTagTable, 0x07):
+                    print("Error: Dictionary uses obsolete inflection rule scheme which is not yet supported")
+                    decodeInflection = False
+
+            data = sect.loadSection(metaOrthIndex)
+
+            print("\nParsing metaOrthIndex")
+            idxhdr, hordt1, hordt2 = self.parseHeader(data)
+
+            tagSectionStart = idxhdr['len']
+            controlByteCount, tagTable = readTagSection(tagSectionStart, data)
+            orthIndexCount = idxhdr['count']
+            print("orthIndexCount is", orthIndexCount)
+            if DEBUG_DICT:
+                print("orthTagTable: %s" % tagTable)
+            if hordt2 is not None:
+                print("orth entry uses ordt2 lookup table of type ", idxhdr['otype'])
+            hasEntryLength = self.hasTag(tagTable, 0x02)
+            if not hasEntryLength:
+                print("Info: Index doesn't contain entry length tags")
+
+            print("Read dictionary index data")
+            for i in range(metaOrthIndex + 1, metaOrthIndex + 1 + orthIndexCount):
+                data = sect.loadSection(i)
+                hdrinfo, ordt1, ordt2 = self.parseHeader(data)
+                idxtPos = hdrinfo['start']
+                entryCount = hdrinfo['count']
+                idxPositions = []
+                for j in range(entryCount):
+                    pos, = struct.unpack_from(b'>H', data, idxtPos + 4 + (2 * j))
+                    idxPositions.append(pos)
+                # The last entry ends before the IDXT tag (but there might be zero fill bytes we need to ignore!)
+                idxPositions.append(idxtPos)
+                for j in range(entryCount):
+                    startPos = idxPositions[j]
+                    endPos = idxPositions[j+1]
+                    textLength = ord(data[startPos:startPos+1])
+                    text = data[startPos+1:startPos+1+textLength]
+                    if hordt2 is not None:
+                        utext = u""
+                        if idxhdr['otype'] == 0:
+                            pattern = b'>H'
+                            inc = 2
+                        else:
+                            pattern = b'>B'
+                            inc = 1
+                        pos = 0
+                        while pos < textLength:
+                            off, = struct.unpack_from(pattern, text, pos)
+                            if off < len(hordt2):
+                                utext += unichr(hordt2[off])
+                            else:
+                                utext += unichr(off)
+                            pos += inc
+                        text = utext.encode('utf-8')
+
+                    tagMap = getTagMap(controlByteCount, tagTable, data, startPos+1+textLength, endPos)
+                    if 0x01 in tagMap:
+                        if decodeInflection and 0x2a in tagMap:
+                            inflectionGroups = self.getInflectionGroups(text, inflectionControlByteCount, inflectionTagTable,
+                                                                        dinfl, inflNameData, tagMap[0x2a])
+                        else:
+                            inflectionGroups = b''
+                        assert len(tagMap[0x01]) == 1
+                        entryStartPosition = tagMap[0x01][0]
+                        if hasEntryLength:
+                            # The idx:entry attribute "scriptable" must be present to create entry length tags.
+                            ml = b'<idx:entry scriptable="yes"><idx:orth value="' + text + b'">' + inflectionGroups + b'</idx:orth>'
+                            if entryStartPosition in positionMap:
+                                positionMap[entryStartPosition] = positionMap[entryStartPosition] + ml
+                            else:
+                                positionMap[entryStartPosition] = ml
+                            assert len(tagMap[0x02]) == 1
+                            entryEndPosition = entryStartPosition + tagMap[0x02][0]
+                            if entryEndPosition in positionMap:
+                                positionMap[entryEndPosition] = b"</idx:entry>" + positionMap[entryEndPosition]
+                            else:
+                                positionMap[entryEndPosition] = b"</idx:entry>"
+
+                        else:
+                            indexTags = b'<idx:entry>\n<idx:orth value="' + text + b'">\n' + inflectionGroups + b'</idx:entry>\n'
+                            if entryStartPosition in positionMap:
+                                positionMap[entryStartPosition] = positionMap[entryStartPosition] + indexTags
+                            else:
+                                positionMap[entryStartPosition] = indexTags
+        return positionMap
+
+    def hasTag(self, tagTable, tag):
+        '''
+        Test if tag table contains given tag.
+
+        @param tagTable: The tag table.
+        @param tag: The tag to search.
+        @return: True if tag table contains given tag; False otherwise.
+        '''
+        for currentTag, _, _, _ in tagTable:
+            if currentTag == tag:
+                return True
+        return False
+
+    def getInflectionGroups(self, mainEntry, controlByteCount, tagTable, dinfl, inflectionNames, groupList):
+        '''
+        Create string which contains the inflection groups with inflection rules as mobipocket tags.
+
+        @param mainEntry: The word to inflect.
+        @param controlByteCount: The number of control bytes.
+        @param tagTable: The tag table.
+        @param data: The Inflection data object to properly select the right inflection data section to use
+        @param inflectionNames: The inflection rule name data.
+        @param groupList: The list of inflection groups to process.
+        @return: String with inflection groups and rules or empty string if required tags are not available.
+        '''
+        result = b""
+        for value in groupList:
+            offset, nextOffset, data = dinfl.offsets(value)
+
+            # First byte seems to be always 0x00 and must be skipped.
+            assert ord(data[offset:offset+1]) == 0x00
+            tagMap = getTagMap(controlByteCount, tagTable, data, offset + 1, nextOffset)
+
+            # Make sure that the required tags are available.
+            if 0x05 not in tagMap:
+                print("Error: Required tag 0x05 not found in tagMap")
+                return ""
+            if 0x1a not in tagMap:
+                print("Error: Required tag 0x1a not found in tagMap")
+                return b''
+
+            result += b'<idx:infl>'
+
+            for i in range(len(tagMap[0x05])):
+
+                # Get name of inflection rule.
+                value = tagMap[0x05][i]
+                consumed, textLength = getVariableWidthValue(inflectionNames, value)
+                inflectionName = inflectionNames[value+consumed:value+consumed+textLength]
+
+                # Get and apply inflection rule across possibly multiple inflection data sections
+                value = tagMap[0x1a][i]
+                rvalue, start, count, data = dinfl.lookup(value)
+                offset, = struct.unpack_from(b'>H', data, start + 4 + (2 * rvalue))
+                textLength = ord(data[offset:offset+1])
+                inflection = self.applyInflectionRule(mainEntry, data, offset+1, offset+1+textLength)
+                if inflection is not None:
+                    result += b'  <idx:iform name="' + inflectionName + b'" value="' + inflection + b'"/>'
+
+            result += b'</idx:infl>'
+        return result
+
+    def applyInflectionRule(self, mainEntry, inflectionRuleData, start, end):
+        '''
+        Apply inflection rule.
+
+        @param mainEntry: The word to inflect.
+        @param inflectionRuleData: The inflection rules.
+        @param start: The start position of the inflection rule to use.
+        @param end: The end position of the inflection rule to use.
+        @return: The string with the inflected word or None if an error occurs.
+        '''
+        mode = -1
+        byteArray = array.array(array_format, mainEntry)
+        position = len(byteArray)
+        for charOffset in range(start, end):
+            char = inflectionRuleData[charOffset:charOffset+1]
+            abyte = ord(char)
+            if abyte >= 0x0a and abyte <= 0x13:
+                # Move cursor backwards
+                offset = abyte - 0x0a
+                if mode not in [0x02, 0x03]:
+                    mode = 0x02
+                    position = len(byteArray)
+                position -= offset
+            elif abyte > 0x13:
+                if mode == -1:
+                    print("Error: Unexpected first byte %i of inflection rule" % abyte)
+                    return None
+                elif position == -1:
+                    print("Error: Unexpected first byte %i of inflection rule" % abyte)
+                    return None
+                else:
+                    if mode == 0x01:
+                        # Insert at word start
+                        byteArray.insert(position, abyte)
+                        position += 1
+                    elif mode == 0x02:
+                        # Insert at word end
+                        byteArray.insert(position, abyte)
+                    elif mode == 0x03:
+                        # Delete at word end
+                        position -= 1
+                        deleted = byteArray.pop(position)
+                        if bchr(deleted) != char:
+                            if DEBUG_DICT:
+                                print("0x03: %s %s %s %s" % (mainEntry, toHex(inflectionRuleData[start:end]), char, bchr(deleted)))
+                            print("Error: Delete operation of inflection rule failed")
+                            return None
+                    elif mode == 0x04:
+                        # Delete at word start
+                        deleted = byteArray.pop(position)
+                        if bchr(deleted) != char:
+                            if DEBUG_DICT:
+                                print("0x03: %s %s %s %s" % (mainEntry, toHex(inflectionRuleData[start:end]), char, bchr(deleted)))
+                            print("Error: Delete operation of inflection rule failed")
+                            return None
+                    else:
+                        print("Error: Inflection rule mode %x is not implemented" % mode)
+                        return None
+            elif abyte == 0x01:
+                # Insert at word start
+                if mode not in [0x01, 0x04]:
+                    position = 0
+                mode = abyte
+            elif abyte == 0x02:
+                # Insert at word end
+                if mode not in [0x02, 0x03]:
+                    position = len(byteArray)
+                mode = abyte
+            elif abyte == 0x03:
+                # Delete at word end
+                if mode not in [0x02, 0x03]:
+                    position = len(byteArray)
+                mode = abyte
+            elif abyte == 0x04:
+                # Delete at word start
+                if mode not in [0x01, 0x04]:
+                    position = 0
+                # Delete at word start
+                mode = abyte
+            else:
+                print("Error: Inflection rule mode %x is not implemented" % abyte)
+                return None
+        return utf8_str(byteArray.tostring())
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_header.py b/src/epy_reader/tools/KindleUnpack/mobi_header.py
new file mode 100644
index 0000000..a15f636
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_header.py
@@ -0,0 +1,936 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+DEBUG_USE_ORDERED_DICTIONARY = False  # OrderedDict is supoorted >= python 2.7.
+""" set to True to use OrderedDict for MobiHeader.metadata."""
+
+if DEBUG_USE_ORDERED_DICTIONARY:
+    from collections import OrderedDict as dict_
+else:
+    dict_ = dict
+
+from .compatibility_utils import PY2, unicode_str, hexlify, bord
+
+if PY2:
+    range = xrange
+
+import struct
+import uuid
+
+# import the mobiunpack support libraries
+from .mobi_utils import getLanguage
+from .mobi_uncompress import HuffcdicReader, PalmdocReader, UncompressedReader
+
+class unpackException(Exception):
+    pass
+
+
+def sortedHeaderKeys(mheader):
+    hdrkeys = sorted(list(mheader.keys()), key=lambda akey: mheader[akey][0])
+    return hdrkeys
+
+
+# HD Containers have their own headers and their own EXTH
+# this is just guesswork so far, making big assumption that
+# metavalue key numbers remain the same in the CONT EXTH
+
+# Note:  The layout of the CONT Header is still unknown
+# so just deal with their EXTH sections for now
+
+def dump_contexth(cpage, extheader):
+    # determine text encoding
+    codec = 'windows-1252'
+    codec_map = {
+         1252 : 'windows-1252',
+         65001: 'utf-8',
+    }
+    if cpage in codec_map:
+        codec = codec_map[cpage]
+    if extheader == b'':
+        return
+    id_map_strings = {
+        1 : 'Drm Server Id',
+        2 : 'Drm Commerce Id',
+        3 : 'Drm Ebookbase Book Id',
+        4 : 'Drm Ebookbase Dep Id',
+        100 : 'Creator',
+        101 : 'Publisher',
+        102 : 'Imprint',
+        103 : 'Description',
+        104 : 'ISBN',
+        105 : 'Subject',
+        106 : 'Published',
+        107 : 'Review',
+        108 : 'Contributor',
+        109 : 'Rights',
+        110 : 'SubjectCode',
+        111 : 'Type',
+        112 : 'Source',
+        113 : 'ASIN',
+        # 114 : 'versionNumber',
+        117 : 'Adult',
+        118 : 'Retail-Price',
+        119 : 'Retail-Currency',
+        120 : 'TSC',
+        122 : 'fixed-layout',
+        123 : 'book-type',
+        124 : 'orientation-lock',
+        126 : 'original-resolution',
+        127 : 'zero-gutter',
+        128 : 'zero-margin',
+        129 : 'MetadataResourceURI',
+        132 : 'RegionMagnification',
+        150 : 'LendingEnabled',
+        200 : 'DictShortName',
+        501 : 'cdeType',
+        502 : 'last_update_time',
+        503 : 'Updated_Title',
+        504 : 'CDEContentKey',
+        505 : 'AmazonContentReference',
+        506 : 'Title-Language',
+        507 : 'Title-Display-Direction',
+        508 : 'Title-Pronunciation',
+        509 : 'Title-Collation',
+        510 : 'Secondary-Title',
+        511 : 'Secondary-Title-Language',
+        512 : 'Secondary-Title-Direction',
+        513 : 'Secondary-Title-Pronunciation',
+        514 : 'Secondary-Title-Collation',
+        515 : 'Author-Language',
+        516 : 'Author-Display-Direction',
+        517 : 'Author-Pronunciation',
+        518 : 'Author-Collation',
+        519 : 'Author-Type',
+        520 : 'Publisher-Language',
+        521 : 'Publisher-Display-Direction',
+        522 : 'Publisher-Pronunciation',
+        523 : 'Publisher-Collation',
+        524 : 'Content-Language-Tag',
+        525 : 'primary-writing-mode',
+        526 : 'NCX-Ingested-By-Software',
+        527 : 'page-progression-direction',
+        528 : 'override-kindle-fonts',
+        529 : 'Compression-Upgraded',
+        530 : 'Soft-Hyphens-In-Content',
+        531 : 'Dictionary_In_Langague',
+        532 : 'Dictionary_Out_Language',
+        533 : 'Font_Converted',
+        534 : 'Amazon_Creator_Info',
+        535 : 'Creator-Build-Tag',
+        536 : 'HD-Media-Containers-Info',  # CONT_Header is 0, Ends with CONTAINER_BOUNDARY (or Asset_Type?)
+        538 : 'Resource-Container-Fidelity',
+        539 : 'HD-Container-Mimetype',
+        540 : 'Sample-For_Special-Purpose',
+        541 : 'Kindletool-Operation-Information',
+        542 : 'Container_Id',
+        543 : 'Asset-Type',  # FONT_CONTAINER, BW_CONTAINER, HD_CONTAINER
+        544 : 'Unknown_544',
+    }
+    id_map_values = {
+        114 : 'versionNumber',
+        115 : 'sample',
+        116 : 'StartOffset',
+        121 : 'Mobi8-Boundary-Section',
+        125 : 'Embedded-Record-Count',
+        130 : 'Offline-Sample',
+        131 : 'Metadata-Record-Offset',
+        201 : 'CoverOffset',
+        202 : 'ThumbOffset',
+        203 : 'HasFakeCover',
+        204 : 'Creator-Software',
+        205 : 'Creator-Major-Version',
+        206 : 'Creator-Minor-Version',
+        207 : 'Creator-Build-Number',
+        401 : 'Clipping-Limit',
+        402 : 'Publisher-Limit',
+        404 : 'Text-to-Speech-Disabled',
+        406 : 'Rental-Expiration-Time',
+    }
+    id_map_hexstrings = {
+        208 : 'Watermark_(hex)',
+        209 : 'Tamper-Proof-Keys_(hex)',
+        300 : 'Font-Signature_(hex)',
+        403 : 'Unknown_(403)_(hex)',
+        405 : 'Ownership-Type_(hex)',
+        407 : 'Unknown_(407)_(hex)',
+        420 : 'Multimedia-Content-Reference_(hex)',
+        450 : 'Locations_Match_(hex)',
+        451 : 'Full-Story-Length_(hex)',
+        452 : 'Sample-Start_Location_(hex)',
+        453 : 'Sample-End-Location_(hex)',
+    }
+    _length, num_items = struct.unpack(b'>LL', extheader[4:12])
+    extheader = extheader[12:]
+    pos = 0
+    for _ in range(num_items):
+        id, size = struct.unpack(b'>LL', extheader[pos:pos+8])
+        content = extheader[pos + 8: pos + size]
+        if id in id_map_strings:
+            name = id_map_strings[id]
+            print('\n    Key: "%s"\n        Value: "%s"' % (name, content.decode(codec, errors='replace')))
+        elif id in id_map_values:
+            name = id_map_values[id]
+            if size == 9:
+                value, = struct.unpack(b'B',content)
+                print('\n    Key: "%s"\n        Value: 0x%01x' % (name, value))
+            elif size == 10:
+                value, = struct.unpack(b'>H',content)
+                print('\n    Key: "%s"\n        Value: 0x%02x' % (name, value))
+            elif size == 12:
+                value, = struct.unpack(b'>L',content)
+                print('\n    Key: "%s"\n        Value: 0x%04x' % (name, value))
+            else:
+                print("\nError: Value for %s has unexpected size of %s" % (name, size))
+        elif id in id_map_hexstrings:
+            name = id_map_hexstrings[id]
+            print('\n    Key: "%s"\n        Value: 0x%s' % (name, hexlify(content)))
+        else:
+            print("\nWarning: Unknown metadata with id %s found" % id)
+            name = str(id) + ' (hex)'
+            print('    Key: "%s"\n        Value: 0x%s' % (name, hexlify(content)))
+        pos += size
+    return
+
+
+class MobiHeader:
+    # all values are packed in big endian format
+    palmdoc_header = {
+            'compression_type'  : (0x00, b'>H', 2),
+            'fill0'             : (0x02, b'>H', 2),
+            'text_length'       : (0x04, b'>L', 4),
+            'text_records'      : (0x08, b'>H', 2),
+            'max_section_size'  : (0x0a, b'>H', 2),
+            'read_pos   '       : (0x0c, b'>L', 4),
+    }
+
+    mobi6_header = {
+            'compression_type'  : (0x00, b'>H', 2),
+            'fill0'             : (0x02, b'>H', 2),
+            'text_length'       : (0x04, b'>L', 4),
+            'text_records'      : (0x08, b'>H', 2),
+            'max_section_size'  : (0x0a, b'>H', 2),
+            'crypto_type'       : (0x0c, b'>H', 2),
+            'fill1'             : (0x0e, b'>H', 2),
+            'magic'             : (0x10, b'4s', 4),
+            'header_length (from MOBI)'     : (0x14, b'>L', 4),
+            'type'              : (0x18, b'>L', 4),
+            'codepage'          : (0x1c, b'>L', 4),
+            'unique_id'         : (0x20, b'>L', 4),
+            'version'           : (0x24, b'>L', 4),
+            'metaorthindex'     : (0x28, b'>L', 4),
+            'metainflindex'     : (0x2c, b'>L', 4),
+            'index_names'       : (0x30, b'>L', 4),
+            'index_keys'        : (0x34, b'>L', 4),
+            'extra_index0'      : (0x38, b'>L', 4),
+            'extra_index1'      : (0x3c, b'>L', 4),
+            'extra_index2'      : (0x40, b'>L', 4),
+            'extra_index3'      : (0x44, b'>L', 4),
+            'extra_index4'      : (0x48, b'>L', 4),
+            'extra_index5'      : (0x4c, b'>L', 4),
+            'first_nontext'     : (0x50, b'>L', 4),
+            'title_offset'      : (0x54, b'>L', 4),
+            'title_length'      : (0x58, b'>L', 4),
+            'language_code'     : (0x5c, b'>L', 4),
+            'dict_in_lang'      : (0x60, b'>L', 4),
+            'dict_out_lang'     : (0x64, b'>L', 4),
+            'min_version'       : (0x68, b'>L', 4),
+            'first_resc_offset' : (0x6c, b'>L', 4),
+            'huff_offset'       : (0x70, b'>L', 4),
+            'huff_num'          : (0x74, b'>L', 4),
+            'huff_tbl_offset'   : (0x78, b'>L', 4),
+            'huff_tbl_len'      : (0x7c, b'>L', 4),
+            'exth_flags'        : (0x80, b'>L', 4),
+            'fill3_a'           : (0x84, b'>L', 4),
+            'fill3_b'           : (0x88, b'>L', 4),
+            'fill3_c'           : (0x8c, b'>L', 4),
+            'fill3_d'           : (0x90, b'>L', 4),
+            'fill3_e'           : (0x94, b'>L', 4),
+            'fill3_f'           : (0x98, b'>L', 4),
+            'fill3_g'           : (0x9c, b'>L', 4),
+            'fill3_h'           : (0xa0, b'>L', 4),
+            'unknown0'          : (0xa4, b'>L', 4),
+            'drm_offset'        : (0xa8, b'>L', 4),
+            'drm_count'         : (0xac, b'>L', 4),
+            'drm_size'          : (0xb0, b'>L', 4),
+            'drm_flags'         : (0xb4, b'>L', 4),
+            'fill4_a'           : (0xb8, b'>L', 4),
+            'fill4_b'           : (0xbc, b'>L', 4),
+            'first_content'     : (0xc0, b'>H', 2),
+            'last_content'      : (0xc2, b'>H', 2),
+            'unknown0'          : (0xc4, b'>L', 4),
+            'fcis_offset'       : (0xc8, b'>L', 4),
+            'fcis_count'        : (0xcc, b'>L', 4),
+            'flis_offset'       : (0xd0, b'>L', 4),
+            'flis_count'        : (0xd4, b'>L', 4),
+            'unknown1'          : (0xd8, b'>L', 4),
+            'unknown2'          : (0xdc, b'>L', 4),
+            'srcs_offset'       : (0xe0, b'>L', 4),
+            'srcs_count'        : (0xe4, b'>L', 4),
+            'unknown3'          : (0xe8, b'>L', 4),
+            'unknown4'          : (0xec, b'>L', 4),
+            'fill5'             : (0xf0, b'>H', 2),
+            'traildata_flags'   : (0xf2, b'>H', 2),
+            'ncx_index'         : (0xf4, b'>L', 4),
+            'unknown5'          : (0xf8, b'>L', 4),
+            'unknown6'          : (0xfc, b'>L', 4),
+            'datp_offset'       : (0x100, b'>L', 4),
+            'unknown7'          : (0x104, b'>L', 4),
+            'Unknown    '       : (0x108, b'>L', 4),
+            'Unknown    '       : (0x10C, b'>L', 4),
+            'Unknown    '       : (0x110, b'>L', 4),
+            'Unknown    '       : (0x114, b'>L', 4),
+            'Unknown    '       : (0x118, b'>L', 4),
+            'Unknown    '       : (0x11C, b'>L', 4),
+            'Unknown    '       : (0x120, b'>L', 4),
+            'Unknown    '       : (0x124, b'>L', 4),
+            'Unknown    '       : (0x128, b'>L', 4),
+            'Unknown    '       : (0x12C, b'>L', 4),
+            'Unknown    '       : (0x130, b'>L', 4),
+            'Unknown    '       : (0x134, b'>L', 4),
+            'Unknown    '       : (0x138, b'>L', 4),
+            'Unknown    '       : (0x11C, b'>L', 4),
+            }
+
+    mobi8_header = {
+            'compression_type'  : (0x00, b'>H', 2),
+            'fill0'             : (0x02, b'>H', 2),
+            'text_length'       : (0x04, b'>L', 4),
+            'text_records'      : (0x08, b'>H', 2),
+            'max_section_size'  : (0x0a, b'>H', 2),
+            'crypto_type'       : (0x0c, b'>H', 2),
+            'fill1'             : (0x0e, b'>H', 2),
+            'magic'             : (0x10, b'4s', 4),
+            'header_length (from MOBI)'     : (0x14, b'>L', 4),
+            'type'              : (0x18, b'>L', 4),
+            'codepage'          : (0x1c, b'>L', 4),
+            'unique_id'         : (0x20, b'>L', 4),
+            'version'           : (0x24, b'>L', 4),
+            'metaorthindex'     : (0x28, b'>L', 4),
+            'metainflindex'     : (0x2c, b'>L', 4),
+            'index_names'       : (0x30, b'>L', 4),
+            'index_keys'        : (0x34, b'>L', 4),
+            'extra_index0'      : (0x38, b'>L', 4),
+            'extra_index1'      : (0x3c, b'>L', 4),
+            'extra_index2'      : (0x40, b'>L', 4),
+            'extra_index3'      : (0x44, b'>L', 4),
+            'extra_index4'      : (0x48, b'>L', 4),
+            'extra_index5'      : (0x4c, b'>L', 4),
+            'first_nontext'     : (0x50, b'>L', 4),
+            'title_offset'      : (0x54, b'>L', 4),
+            'title_length'      : (0x58, b'>L', 4),
+            'language_code'     : (0x5c, b'>L', 4),
+            'dict_in_lang'      : (0x60, b'>L', 4),
+            'dict_out_lang'     : (0x64, b'>L', 4),
+            'min_version'       : (0x68, b'>L', 4),
+            'first_resc_offset' : (0x6c, b'>L', 4),
+            'huff_offset'       : (0x70, b'>L', 4),
+            'huff_num'          : (0x74, b'>L', 4),
+            'huff_tbl_offset'   : (0x78, b'>L', 4),
+            'huff_tbl_len'      : (0x7c, b'>L', 4),
+            'exth_flags'        : (0x80, b'>L', 4),
+            'fill3_a'           : (0x84, b'>L', 4),
+            'fill3_b'           : (0x88, b'>L', 4),
+            'fill3_c'           : (0x8c, b'>L', 4),
+            'fill3_d'           : (0x90, b'>L', 4),
+            'fill3_e'           : (0x94, b'>L', 4),
+            'fill3_f'           : (0x98, b'>L', 4),
+            'fill3_g'           : (0x9c, b'>L', 4),
+            'fill3_h'           : (0xa0, b'>L', 4),
+            'unknown0'          : (0xa4, b'>L', 4),
+            'drm_offset'        : (0xa8, b'>L', 4),
+            'drm_count'         : (0xac, b'>L', 4),
+            'drm_size'          : (0xb0, b'>L', 4),
+            'drm_flags'         : (0xb4, b'>L', 4),
+            'fill4_a'           : (0xb8, b'>L', 4),
+            'fill4_b'           : (0xbc, b'>L', 4),
+            'fdst_offset'       : (0xc0, b'>L', 4),
+            'fdst_flow_count'   : (0xc4, b'>L', 4),
+            'fcis_offset'       : (0xc8, b'>L', 4),
+            'fcis_count'        : (0xcc, b'>L', 4),
+            'flis_offset'       : (0xd0, b'>L', 4),
+            'flis_count'        : (0xd4, b'>L', 4),
+            'unknown1'          : (0xd8, b'>L', 4),
+            'unknown2'          : (0xdc, b'>L', 4),
+            'srcs_offset'       : (0xe0, b'>L', 4),
+            'srcs_count'        : (0xe4, b'>L', 4),
+            'unknown3'          : (0xe8, b'>L', 4),
+            'unknown4'          : (0xec, b'>L', 4),
+            'fill5'             : (0xf0, b'>H', 2),
+            'traildata_flags'   : (0xf2, b'>H', 2),
+            'ncx_index'         : (0xf4, b'>L', 4),
+            'fragment_index'    : (0xf8, b'>L', 4),
+            'skeleton_index'    : (0xfc, b'>L', 4),
+            'datp_offset'       : (0x100, b'>L', 4),
+            'guide_index'       : (0x104, b'>L', 4),
+            'Unknown    '       : (0x108, b'>L', 4),
+            'Unknown    '       : (0x10C, b'>L', 4),
+            'Unknown    '       : (0x110, b'>L', 4),
+            'Unknown    '       : (0x114, b'>L', 4),
+            'Unknown    '       : (0x118, b'>L', 4),
+            'Unknown    '       : (0x11C, b'>L', 4),
+            'Unknown    '       : (0x120, b'>L', 4),
+            'Unknown    '       : (0x124, b'>L', 4),
+            'Unknown    '       : (0x128, b'>L', 4),
+            'Unknown    '       : (0x12C, b'>L', 4),
+            'Unknown    '       : (0x130, b'>L', 4),
+            'Unknown    '       : (0x134, b'>L', 4),
+            'Unknown    '       : (0x138, b'>L', 4),
+            'Unknown    '       : (0x11C, b'>L', 4),
+            }
+
+    palmdoc_header_sorted_keys = sortedHeaderKeys(palmdoc_header)
+    mobi6_header_sorted_keys = sortedHeaderKeys(mobi6_header)
+    mobi8_header_sorted_keys = sortedHeaderKeys(mobi8_header)
+
+    id_map_strings = {
+        1 : 'Drm Server Id',
+        2 : 'Drm Commerce Id',
+        3 : 'Drm Ebookbase Book Id',
+        4 : 'Drm Ebookbase Dep Id',
+        100 : 'Creator',
+        101 : 'Publisher',
+        102 : 'Imprint',
+        103 : 'Description',
+        104 : 'ISBN',
+        105 : 'Subject',
+        106 : 'Published',
+        107 : 'Review',
+        108 : 'Contributor',
+        109 : 'Rights',
+        110 : 'SubjectCode',
+        111 : 'Type',
+        112 : 'Source',
+        113 : 'ASIN',
+        # 114 : 'versionNumber',
+        117 : 'Adult',
+        118 : 'Retail-Price',
+        119 : 'Retail-Currency',
+        120 : 'TSC',
+        122 : 'fixed-layout',
+        123 : 'book-type',
+        124 : 'orientation-lock',
+        126 : 'original-resolution',
+        127 : 'zero-gutter',
+        128 : 'zero-margin',
+        129 : 'MetadataResourceURI',
+        132 : 'RegionMagnification',
+        150 : 'LendingEnabled',
+        200 : 'DictShortName',
+        501 : 'cdeType',
+        502 : 'last_update_time',
+        503 : 'Updated_Title',
+        504 : 'CDEContentKey',
+        505 : 'AmazonContentReference',
+        506 : 'Title-Language',
+        507 : 'Title-Display-Direction',
+        508 : 'Title-Pronunciation',
+        509 : 'Title-Collation',
+        510 : 'Secondary-Title',
+        511 : 'Secondary-Title-Language',
+        512 : 'Secondary-Title-Direction',
+        513 : 'Secondary-Title-Pronunciation',
+        514 : 'Secondary-Title-Collation',
+        515 : 'Author-Language',
+        516 : 'Author-Display-Direction',
+        517 : 'Author-Pronunciation',
+        518 : 'Author-Collation',
+        519 : 'Author-Type',
+        520 : 'Publisher-Language',
+        521 : 'Publisher-Display-Direction',
+        522 : 'Publisher-Pronunciation',
+        523 : 'Publisher-Collation',
+        524 : 'Content-Language-Tag',
+        525 : 'primary-writing-mode',
+        526 : 'NCX-Ingested-By-Software',
+        527 : 'page-progression-direction',
+        528 : 'override-kindle-fonts',
+        529 : 'Compression-Upgraded',
+        530 : 'Soft-Hyphens-In-Content',
+        531 : 'Dictionary_In_Langague',
+        532 : 'Dictionary_Out_Language',
+        533 : 'Font_Converted',
+        534 : 'Amazon_Creator_Info',
+        535 : 'Creator-Build-Tag',
+        536 : 'HD-Media-Containers-Info',  # CONT_Header is 0, Ends with CONTAINER_BOUNDARY (or Asset_Type?)
+        538 : 'Resource-Container-Fidelity',
+        539 : 'HD-Container-Mimetype',
+        540 : 'Sample-For_Special-Purpose',
+        541 : 'Kindletool-Operation-Information',
+        542 : 'Container_Id',
+        543 : 'Asset-Type',  # FONT_CONTAINER, BW_CONTAINER, HD_CONTAINER
+        544 : 'Unknown_544',
+    }
+    id_map_values = {
+        114 : 'versionNumber',
+        115 : 'sample',
+        116 : 'StartOffset',
+        121 : 'Mobi8-Boundary-Section',
+        125 : 'Embedded-Record-Count',
+        130 : 'Offline-Sample',
+        131 : 'Metadata-Record-Offset',
+        201 : 'CoverOffset',
+        202 : 'ThumbOffset',
+        203 : 'HasFakeCover',
+        204 : 'Creator-Software',
+        205 : 'Creator-Major-Version',
+        206 : 'Creator-Minor-Version',
+        207 : 'Creator-Build-Number',
+        401 : 'Clipping-Limit',
+        402 : 'Publisher-Limit',
+        404 : 'Text-to-Speech-Disabled',
+        406 : 'Rental-Expiration-Time',
+    }
+    id_map_hexstrings = {
+        208 : 'Watermark_(hex)',
+        209 : 'Tamper-Proof-Keys_(hex)',
+        300 : 'Font-Signature_(hex)',
+        403 : 'Unknown_(403)_(hex)',
+        405 : 'Ownership-Type_(hex)',
+        407 : 'Unknown_(407)_(hex)',
+        420 : 'Multimedia-Content-Reference_(hex)',
+        450 : 'Locations_Match_(hex)',
+        451 : 'Full-Story-Length_(hex)',
+        452 : 'Sample-Start_Location_(hex)',
+        453 : 'Sample-End-Location_(hex)',
+    }
+
+    def __init__(self, sect, sectNumber):
+        self.sect = sect
+        self.start = sectNumber
+        self.header = self.sect.loadSection(self.start)
+        if len(self.header)>20 and self.header[16:20] == b'MOBI':
+            self.sect.setsectiondescription(0,"Mobipocket Header")
+            self.palm = False
+        elif self.sect.ident == b'TEXtREAd':
+            self.sect.setsectiondescription(0, "PalmDOC Header")
+            self.palm = True
+        else:
+            raise unpackException('Unknown File Format')
+
+        self.records, = struct.unpack_from(b'>H', self.header, 0x8)
+
+        # set defaults in case this is a PalmDOC
+        self.title = self.sect.palmname.decode('latin-1', errors='replace')
+        self.length = len(self.header)-16
+        self.type = 3
+        self.codepage = 1252
+        self.codec = 'windows-1252'
+        self.unique_id = 0
+        self.version = 0
+        self.hasExth = False
+        self.exth = b''
+        self.exth_offset = self.length + 16
+        self.exth_length = 0
+        self.crypto_type = 0
+        self.firstnontext = self.start+self.records + 1
+        self.firstresource = self.start+self.records + 1
+        self.ncxidx = 0xffffffff
+        self.metaOrthIndex = 0xffffffff
+        self.metaInflIndex = 0xffffffff
+        self.skelidx = 0xffffffff
+        self.fragidx = 0xffffffff
+        self.guideidx = 0xffffffff
+        self.fdst = 0xffffffff
+        self.mlstart = self.sect.loadSection(self.start+1)[:4]
+        self.rawSize = 0
+        self.metadata = dict_()
+
+        # set up for decompression/unpacking
+        self.compression, = struct.unpack_from(b'>H', self.header, 0x0)
+        if self.compression == 0x4448:
+            reader = HuffcdicReader()
+            huffoff, huffnum = struct.unpack_from(b'>LL', self.header, 0x70)
+            huffoff = huffoff + self.start
+            self.sect.setsectiondescription(huffoff,"Huffman Compression Seed")
+            reader.loadHuff(self.sect.loadSection(huffoff))
+            for i in range(1, huffnum):
+                self.sect.setsectiondescription(huffoff+i,"Huffman CDIC Compression Seed %d" % i)
+                reader.loadCdic(self.sect.loadSection(huffoff+i))
+            self.unpack = reader.unpack
+        elif self.compression == 2:
+            self.unpack = PalmdocReader().unpack
+        elif self.compression == 1:
+            self.unpack = UncompressedReader().unpack
+        else:
+            raise unpackException('invalid compression type: 0x%4x' % self.compression)
+
+        if self.palm:
+            return
+
+        self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack(b'>LLLLL', self.header[20:40])
+        codec_map = {
+            1252 : 'windows-1252',
+            65001: 'utf-8',
+        }
+        if self.codepage in codec_map:
+            self.codec = codec_map[self.codepage]
+
+        # title
+        toff, tlen = struct.unpack(b'>II', self.header[0x54:0x5c])
+        tend = toff + tlen
+        self.title=self.header[toff:tend].decode(self.codec, errors='replace')
+
+        exth_flag, = struct.unpack(b'>L', self.header[0x80:0x84])
+        self.hasExth = exth_flag & 0x40
+        self.exth_offset = self.length + 16
+        self.exth_length = 0
+        if self.hasExth:
+            self.exth_length, = struct.unpack_from(b'>L', self.header, self.exth_offset+4)
+            self.exth_length = ((self.exth_length + 3)>>2)<<2  # round to next 4 byte boundary
+            self.exth = self.header[self.exth_offset:self.exth_offset+self.exth_length]
+
+        # parse the exth / metadata
+        self.parseMetaData()
+
+        # self.mlstart = self.sect.loadSection(self.start+1)
+        # self.mlstart = self.mlstart[0:4]
+        self.crypto_type, = struct.unpack_from(b'>H', self.header, 0xC)
+
+        # Start sector for additional files such as images, fonts, resources, etc
+        # Can be missing so fall back to default set previously
+        ofst, = struct.unpack_from(b'>L', self.header, 0x6C)
+        if ofst != 0xffffffff:
+            self.firstresource = ofst + self.start
+        ofst, = struct.unpack_from(b'>L', self.header, 0x50)
+        if ofst != 0xffffffff:
+            self.firstnontext = ofst + self.start
+
+        if self.isPrintReplica():
+            return
+
+        if self.version < 8:
+            # Dictionary metaOrthIndex
+            self.metaOrthIndex, = struct.unpack_from(b'>L', self.header, 0x28)
+            if self.metaOrthIndex != 0xffffffff:
+                self.metaOrthIndex += self.start
+
+            # Dictionary metaInflIndex
+            self.metaInflIndex, = struct.unpack_from(b'>L', self.header, 0x2C)
+            if self.metaInflIndex != 0xffffffff:
+                self.metaInflIndex += self.start
+
+        # handle older headers without any ncxindex info and later
+        # specifically 0xe4 headers
+        if self.length + 16 < 0xf8:
+            return
+
+        # NCX Index
+        self.ncxidx, = struct.unpack(b'>L', self.header[0xf4:0xf8])
+        if self.ncxidx != 0xffffffff:
+            self.ncxidx += self.start
+
+        # K8 specific Indexes
+        if self.start != 0 or self.version == 8:
+            # Index into <xml> file skeletons in RawML
+            self.skelidx, = struct.unpack_from(b'>L', self.header, 0xfc)
+            if self.skelidx != 0xffffffff:
+                self.skelidx += self.start
+
+            # Index into <div> sections in RawML
+            self.fragidx, = struct.unpack_from(b'>L', self.header, 0xf8)
+            if self.fragidx != 0xffffffff:
+                self.fragidx += self.start
+
+            # Index into Other files
+            self.guideidx, = struct.unpack_from(b'>L', self.header, 0x104)
+            if self.guideidx != 0xffffffff:
+                self.guideidx += self.start
+
+            # dictionaries do not seem to use the same approach in K8's
+            # so disable them
+            self.metaOrthIndex = 0xffffffff
+            self.metaInflIndex = 0xffffffff
+
+            # need to use the FDST record to find out how to properly unpack
+            # the rawML into pieces
+            # it is simply a table of start and end locations for each flow piece
+            self.fdst, = struct.unpack_from(b'>L', self.header, 0xc0)
+            self.fdstcnt, = struct.unpack_from(b'>L', self.header, 0xc4)
+            # if cnt is 1 or less, fdst section mumber can be garbage
+            if self.fdstcnt <= 1:
+                self.fdst = 0xffffffff
+            if self.fdst != 0xffffffff:
+                self.fdst += self.start
+                # setting of fdst section description properly handled in mobi_kf8proc
+
+    def dump_exth(self):
+        # determine text encoding
+        codec=self.codec
+        if (not self.hasExth) or (self.exth_length) == 0 or (self.exth == b''):
+            return
+        num_items, = struct.unpack(b'>L', self.exth[8:12])
+        pos = 12
+        print("Key Size Description                    Value")
+        for _ in range(num_items):
+            id, size = struct.unpack(b'>LL', self.exth[pos:pos+8])
+            contentsize = size-8
+            content = self.exth[pos + 8: pos + size]
+            if id in MobiHeader.id_map_strings:
+                exth_name = MobiHeader.id_map_strings[id]
+                print('{0: >3d} {1: >4d} {2: <30s} {3:s}'.format(id, contentsize, exth_name, content.decode(codec, errors='replace')))
+            elif id in MobiHeader.id_map_values:
+                exth_name = MobiHeader.id_map_values[id]
+                if size == 9:
+                    value, = struct.unpack(b'B',content)
+                    print('{0:3d} byte {1:<30s} {2:d}'.format(id, exth_name, value))
+                elif size == 10:
+                    value, = struct.unpack(b'>H',content)
+                    print('{0:3d} word {1:<30s} 0x{2:0>4X} ({2:d})'.format(id, exth_name, value))
+                elif size == 12:
+                    value, = struct.unpack(b'>L',content)
+                    print('{0:3d} long {1:<30s} 0x{2:0>8X} ({2:d})'.format(id, exth_name, value))
+                else:
+                    print('{0: >3d} {1: >4d} {2: <30s} (0x{3:s})'.format(id, contentsize, "Bad size for "+exth_name, hexlify(content)))
+            elif id in MobiHeader.id_map_hexstrings:
+                exth_name = MobiHeader.id_map_hexstrings[id]
+                print('{0:3d} {1:4d} {2:<30s} 0x{3:s}'.format(id, contentsize, exth_name, hexlify(content)))
+            else:
+                exth_name = "Unknown EXTH ID {0:d}".format(id)
+                print("{0: >3d} {1: >4d} {2: <30s} 0x{3:s}".format(id, contentsize, exth_name, hexlify(content)))
+            pos += size
+        return
+
+    def dumpheader(self):
+        # first 16 bytes are not part of the official mobiheader
+        # but we will treat it as such
+        # so section 0 is 16 (decimal) + self.length in total == at least 0x108 bytes for Mobi 8 headers
+        print("Dumping section %d, Mobipocket Header version: %d, total length %d" % (self.start,self.version, self.length+16))
+        self.hdr = {}
+        # set it up for the proper header version
+        if self.version == 0:
+            self.mobi_header = MobiHeader.palmdoc_header
+            self.mobi_header_sorted_keys = MobiHeader.palmdoc_header_sorted_keys
+        elif self.version < 8:
+            self.mobi_header = MobiHeader.mobi6_header
+            self.mobi_header_sorted_keys = MobiHeader.mobi6_header_sorted_keys
+        else:
+            self.mobi_header = MobiHeader.mobi8_header
+            self.mobi_header_sorted_keys = MobiHeader.mobi8_header_sorted_keys
+
+        # parse the header information
+        for key in self.mobi_header_sorted_keys:
+            (pos, format, tot_len) = self.mobi_header[key]
+            if pos < (self.length + 16):
+                val, = struct.unpack_from(format, self.header, pos)
+                self.hdr[key] = val
+
+        if 'title_offset' in self.hdr:
+            title_offset = self.hdr['title_offset']
+            title_length = self.hdr['title_length']
+        else:
+            title_offset = 0
+            title_length = 0
+        if title_offset == 0:
+            title_offset = len(self.header)
+            title_length = 0
+            self.title = self.sect.palmname.decode('latin-1', errors='replace')
+        else:
+            self.title = self.header[title_offset:title_offset+title_length].decode(self.codec, errors='replace')
+            # title record always padded with two nul bytes and then padded with nuls to next 4 byte boundary
+            title_length = ((title_length+2+3)>>2)<<2
+
+        self.extra1 = self.header[self.exth_offset+self.exth_length:title_offset]
+        self.extra2 = self.header[title_offset+title_length:]
+
+        print("Mobipocket header from section %d" % self.start)
+        print("     Offset  Value Hex Dec        Description")
+        for key in self.mobi_header_sorted_keys:
+            (pos, format, tot_len) = self.mobi_header[key]
+            if pos < (self.length + 16):
+                if key != 'magic':
+                    fmt_string = "0x{0:0>3X} ({0:3d}){1: >" + str(9-2*tot_len) +"s}0x{2:0>" + str(2*tot_len) + "X} {2:10d} {3:s}"
+                else:
+                    self.hdr[key] = unicode_str(self.hdr[key])
+                    fmt_string = "0x{0:0>3X} ({0:3d}){2:>11s}            {3:s}"
+                print(fmt_string.format(pos, " ",self.hdr[key], key))
+        print("")
+
+        if self.exth_length > 0:
+            print("EXTH metadata, offset %d, padded length %d" % (self.exth_offset,self.exth_length))
+            self.dump_exth()
+            print("")
+
+        if len(self.extra1) > 0:
+            print("Extra data between EXTH and Title, length %d" % len(self.extra1))
+            print(hexlify(self.extra1))
+            print("")
+
+        if title_length > 0:
+            print("Title in header at offset %d, padded length %d: '%s'" %(title_offset,title_length,self.title))
+            print("")
+
+        if len(self.extra2) > 0:
+            print("Extra data between Title and end of header, length %d" % len(self.extra2))
+            print(hexlify(self.extra2))
+            print("")
+
+    def isPrintReplica(self):
+        return self.mlstart[0:4] == b"%MOP"
+
+    def isK8(self):
+        return self.start != 0 or self.version == 8
+
+    def isEncrypted(self):
+        return self.crypto_type != 0
+
+    def hasNCX(self):
+        return self.ncxidx != 0xffffffff
+
+    def isDictionary(self):
+        return self.metaOrthIndex != 0xffffffff
+
+    def getncxIndex(self):
+        return self.ncxidx
+
+    def decompress(self, data):
+        return self.unpack(data)
+
+    def Language(self):
+        langcode = struct.unpack(b'!L', self.header[0x5c:0x60])[0]
+        langid = langcode & 0xFF
+        sublangid = (langcode >> 8) & 0xFF
+        return getLanguage(langid, sublangid)
+
+    def DictInLanguage(self):
+        if self.isDictionary():
+            langcode = struct.unpack(b'!L', self.header[0x60:0x64])[0]
+            langid = langcode & 0xFF
+            sublangid = (langcode >> 10) & 0xFF
+            if langid != 0:
+                return getLanguage(langid, sublangid)
+        return False
+
+    def DictOutLanguage(self):
+        if self.isDictionary():
+            langcode = struct.unpack(b'!L', self.header[0x64:0x68])[0]
+            langid = langcode & 0xFF
+            sublangid = (langcode >> 10) & 0xFF
+            if langid != 0:
+                return getLanguage(langid, sublangid)
+        return False
+
+    def getRawML(self):
+        def getSizeOfTrailingDataEntry(data):
+            num = 0
+            for v in data[-4:]:
+                if bord(v) & 0x80:
+                    num = 0
+                num = (num << 7) | (bord(v) & 0x7f)
+            return num
+        def trimTrailingDataEntries(data):
+            for _ in range(trailers):
+                num = getSizeOfTrailingDataEntry(data)
+                data = data[:-num]
+            if multibyte:
+                num = (ord(data[-1:]) & 3) + 1
+                data = data[:-num]
+            return data
+        multibyte = 0
+        trailers = 0
+        if self.sect.ident == b'BOOKMOBI':
+            mobi_length, = struct.unpack_from(b'>L', self.header, 0x14)
+            mobi_version, = struct.unpack_from(b'>L', self.header, 0x68)
+            if (mobi_length >= 0xE4) and (mobi_version >= 5):
+                flags, = struct.unpack_from(b'>H', self.header, 0xF2)
+                multibyte = flags & 1
+                while flags > 1:
+                    if flags & 2:
+                        trailers += 1
+                    flags = flags >> 1
+        # get raw mobi markup languge
+        print("Unpacking raw markup language")
+        dataList = []
+        # offset = 0
+        for i in range(1, self.records+1):
+            data = trimTrailingDataEntries(self.sect.loadSection(self.start + i))
+            dataList.append(self.unpack(data))
+            if self.isK8():
+                self.sect.setsectiondescription(self.start + i,"KF8 Text Section {0:d}".format(i))
+            elif self.version == 0:
+                self.sect.setsectiondescription(self.start + i,"PalmDOC Text Section {0:d}".format(i))
+            else:
+                self.sect.setsectiondescription(self.start + i,"Mobipocket Text Section {0:d}".format(i))
+        rawML = b''.join(dataList)
+        self.rawSize = len(rawML)
+        return rawML
+
+    # all metadata is stored in a dictionary with key and returns a *list* of values
+    # a list is used to allow for multiple creators, multiple contributors, etc
+    def parseMetaData(self):
+        def addValue(name, value):
+            if name not in self.metadata:
+                self.metadata[name] = [value]
+            else:
+                self.metadata[name].append(value)
+
+        codec=self.codec
+        if self.hasExth:
+            extheader=self.exth
+            _length, num_items = struct.unpack(b'>LL', extheader[4:12])
+            extheader = extheader[12:]
+            pos = 0
+            for _ in range(num_items):
+                id, size = struct.unpack(b'>LL', extheader[pos:pos+8])
+                content = extheader[pos + 8: pos + size]
+                if id in MobiHeader.id_map_strings:
+                    name = MobiHeader.id_map_strings[id]
+                    addValue(name, content.decode(codec, errors='replace'))
+                elif id in MobiHeader.id_map_values:
+                    name = MobiHeader.id_map_values[id]
+                    if size == 9:
+                        value, = struct.unpack(b'B',content)
+                        addValue(name, unicode_str(str(value)))
+                    elif size == 10:
+                        value, = struct.unpack(b'>H',content)
+                        addValue(name, unicode_str(str(value)))
+                    elif size == 12:
+                        value, = struct.unpack(b'>L',content)
+                        # handle special case of missing CoverOffset or missing ThumbOffset
+                        if id == 201 or id == 202:
+                            if value != 0xffffffff:
+                                addValue(name, unicode_str(str(value)))
+                        else:
+                            addValue(name, unicode_str(str(value)))
+                    else:
+                        print("Warning: Bad key, size, value combination detected in EXTH ", id, size, hexlify(content))
+                        addValue(name, hexlify(content))
+                elif id in MobiHeader.id_map_hexstrings:
+                    name = MobiHeader.id_map_hexstrings[id]
+                    addValue(name, hexlify(content))
+                else:
+                    name = unicode_str(str(id)) + ' (hex)'
+                    addValue(name, hexlify(content))
+                pos += size
+
+        # add the basics to the metadata each as a list element
+        self.metadata['Language'] = [self.Language()]
+        self.metadata['Title'] = [unicode_str(self.title,self.codec)]
+        self.metadata['Codec'] = [self.codec]
+        self.metadata['UniqueID'] = [unicode_str(str(self.unique_id))]
+        # if no asin create one using a uuid
+        if 'ASIN' not in self.metadata:
+            self.metadata['ASIN'] = [unicode_str(str(uuid.uuid4()))]
+        # if no cdeType set it to "EBOK"
+        if 'cdeType' not in self.metadata:
+            self.metadata['cdeType'] = ['EBOK']
+
+    def getMetaData(self):
+        return self.metadata
+
+    def describeHeader(self, DUMP):
+        print("Mobi Version:", self.version)
+        print("Codec:", self.codec)
+        print("Title:", self.title)
+        if 'Updated_Title' in self.metadata:
+            print("EXTH Title:", self.metadata['Updated_Title'][0])
+        if self.compression == 0x4448:
+            print("Huffdic compression")
+        elif self.compression == 2:
+            print("Palmdoc compression")
+        elif self.compression == 1:
+            print("No compression")
+        if DUMP:
+            self.dumpheader()
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_html.py b/src/epy_reader/tools/KindleUnpack/mobi_html.py
new file mode 100644
index 0000000..eda766c
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_html.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import PY2, utf8_str
+
+if PY2:
+    range = xrange
+
+import re
+# note: re requites the pattern to be the exact same type as the data to be searched in python3
+# but u"" is not allowed for the pattern itself only b""
+
+from .mobi_utils import fromBase32
+
+class HTMLProcessor:
+
+    def __init__(self, files, metadata, rscnames):
+        self.files = files
+        self.metadata = metadata
+        self.rscnames = rscnames
+        # for original style mobis, default to including all image files in the opf manifest
+        self.used = {}
+        for name in rscnames:
+            self.used[name] = 'used'
+
+    def findAnchors(self, rawtext, indx_data, positionMap):
+        # process the raw text
+        # find anchors...
+        print("Find link anchors")
+        link_pattern = re.compile(br'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''', re.IGNORECASE)
+        # TEST NCX: merge in filepos from indx
+        pos_links = [int(m.group(1)) for m in link_pattern.finditer(rawtext)]
+        if indx_data:
+            pos_indx = [e['pos'] for e in indx_data if e['pos']>0]
+            pos_links = list(set(pos_links + pos_indx))
+
+        for position in pos_links:
+            if position in positionMap:
+                positionMap[position] = positionMap[position] + utf8_str('<a id="filepos%d" />' % position)
+            else:
+                positionMap[position] = utf8_str('<a id="filepos%d" />' % position)
+
+        # apply dictionary metadata and anchors
+        print("Insert data into html")
+        pos = 0
+        lastPos = len(rawtext)
+        dataList = []
+        for end in sorted(positionMap.keys()):
+            if end == 0 or end > lastPos:
+                continue  # something's up - can't put a tag in outside <html>...</html>
+            dataList.append(rawtext[pos:end])
+            dataList.append(positionMap[end])
+            pos = end
+        dataList.append(rawtext[pos:])
+        srctext = b"".join(dataList)
+        rawtext = None
+        dataList = None
+        self.srctext = srctext
+        self.indx_data = indx_data
+        return srctext
+
+    def insertHREFS(self):
+        srctext = self.srctext
+        rscnames = self.rscnames
+        metadata = self.metadata
+
+        # put in the hrefs
+        print("Insert hrefs into html")
+        # There doesn't seem to be a standard, so search as best as we can
+
+        link_pattern = re.compile(br'''<a([^>]*?)filepos=['"]{0,1}0*(\d+)['"]{0,1}([^>]*?)>''', re.IGNORECASE)
+        srctext = link_pattern.sub(br'''<a\1href="#filepos\2"\3>''', srctext)
+
+        # remove empty anchors
+        print("Remove empty anchors from html")
+        srctext = re.sub(br"<a\s*/>",br"", srctext)
+        srctext = re.sub(br"<a\s*>\s*</a>",br"", srctext)
+
+        # convert image references
+        print("Insert image references into html")
+        # split string into image tag pieces and other pieces
+        image_pattern = re.compile(br'''(<img.*?>)''', re.IGNORECASE)
+        image_index_pattern = re.compile(br'''recindex=['"]{0,1}([0-9]+)['"]{0,1}''', re.IGNORECASE)
+        srcpieces = image_pattern.split(srctext)
+        srctext = self.srctext = None
+
+        # all odd pieces are image tags (nulls string on even pieces if no space between them in srctext)
+        for i in range(1, len(srcpieces), 2):
+            tag = srcpieces[i]
+            for m in image_index_pattern.finditer(tag):
+                imageNumber = int(m.group(1))
+                imageName = rscnames[imageNumber-1]
+                if imageName is None:
+                    print("Error: Referenced image %s was not recognized as a valid image" % imageNumber)
+                else:
+                    replacement = b'src="Images/' + utf8_str(imageName) + b'"'
+                    tag = image_index_pattern.sub(replacement, tag, 1)
+            srcpieces[i] = tag
+        srctext = b"".join(srcpieces)
+
+        # add in character set meta into the html header if needed
+        if 'Codec' in metadata:
+            srctext = srctext[0:12]+b'<meta http-equiv="content-type" content="text/html; charset='+utf8_str(metadata.get('Codec')[0])+b'" />'+srctext[12:]
+        return srctext, self.used
+
+
+class XHTMLK8Processor:
+
+    def __init__(self, rscnames, k8proc):
+        self.rscnames = rscnames
+        self.k8proc = k8proc
+        self.used = {}
+
+    def buildXHTML(self):
+
+        # first need to update all links that are internal which
+        # are based on positions within the xhtml files **BEFORE**
+        # cutting and pasting any pieces into the xhtml text files
+
+        #   kindle:pos:fid:XXXX:off:YYYYYYYYYY  (used for internal link within xhtml)
+        #       XXXX is the offset in records into divtbl
+        #       YYYYYYYYYYYY is a base32 number you add to the divtbl insertpos to get final position
+
+        # pos:fid pattern
+        posfid_pattern = re.compile(br'''(<a.*?href=.*?>)''', re.IGNORECASE)
+        posfid_index_pattern = re.compile(br'''['"]kindle:pos:fid:([0-9|A-V]+):off:([0-9|A-V]+).*?["']''')
+
+        parts = []
+        print("Building proper xhtml for each file")
+        for i in range(self.k8proc.getNumberOfParts()):
+            part = self.k8proc.getPart(i)
+            [partnum, dir, filename, beg, end, aidtext] = self.k8proc.getPartInfo(i)
+
+            # internal links
+            srcpieces = posfid_pattern.split(part)
+            for j in range(1, len(srcpieces),2):
+                tag = srcpieces[j]
+                if tag.startswith(b'<'):
+                    for m in posfid_index_pattern.finditer(tag):
+                        posfid = m.group(1)
+                        offset = m.group(2)
+                        filename, idtag = self.k8proc.getIDTagByPosFid(posfid, offset)
+                        if idtag == b'':
+                            replacement= b'"' + utf8_str(filename) + b'"'
+                        else:
+                            replacement = b'"' + utf8_str(filename) + b'#' + idtag + b'"'
+                        tag = posfid_index_pattern.sub(replacement, tag, 1)
+                    srcpieces[j] = tag
+            part = b"".join(srcpieces)
+            parts.append(part)
+
+        # we are free to cut and paste as we see fit
+        # we can safely remove all of the Kindlegen generated aid tags
+        # change aid ids that are in k8proc.linked_aids to xhtml ids
+        find_tag_with_aid_pattern = re.compile(br'''(<[^>]*\said\s*=[^>]*>)''', re.IGNORECASE)
+        within_tag_aid_position_pattern = re.compile(br'''\said\s*=['"]([^'"]*)['"]''')
+        for i in range(len(parts)):
+            part = parts[i]
+            srcpieces = find_tag_with_aid_pattern.split(part)
+            for j in range(len(srcpieces)):
+                tag = srcpieces[j]
+                if tag.startswith(b'<'):
+                    for m in within_tag_aid_position_pattern.finditer(tag):
+                        try:
+                            aid = m.group(1)
+                        except IndexError:
+                            aid = None
+                        replacement = b''
+                        if aid in self.k8proc.linked_aids:
+                            replacement = b' id="aid-' + aid + b'"'
+                        tag = within_tag_aid_position_pattern.sub(replacement, tag, 1)
+                    srcpieces[j] = tag
+            part = b"".join(srcpieces)
+            parts[i] = part
+
+        # we can safely replace all of the Kindlegen generated data-AmznPageBreak tags
+        # with page-break-after style patterns
+        find_tag_with_AmznPageBreak_pattern = re.compile(br'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE)
+        within_tag_AmznPageBreak_position_pattern = re.compile(br'''\sdata-AmznPageBreak=['"]([^'"]*)['"]''')
+        for i in range(len(parts)):
+            part = parts[i]
+            srcpieces = find_tag_with_AmznPageBreak_pattern.split(part)
+            for j in range(len(srcpieces)):
+                tag = srcpieces[j]
+                if tag.startswith(b'<'):
+                    srcpieces[j] = within_tag_AmznPageBreak_position_pattern.sub(
+                        lambda m:b' style="page-break-after:' + m.group(1) + b'"', tag)
+            part = b"".join(srcpieces)
+            parts[i] = part
+
+        # we have to handle substitutions for the flows  pieces first as they may
+        # be inlined into the xhtml text
+        #   kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
+        #   kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
+        #   kindle:embed:XXXX   (used for fonts)
+
+        flows = []
+        flows.append(None)
+        flowinfo = []
+        flowinfo.append([None, None, None, None])
+
+        # regular expression search patterns
+        img_pattern = re.compile(br'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
+        img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)[^'"]*['")]''', re.IGNORECASE)
+
+        tag_pattern = re.compile(br'''(<[^>]*>)''')
+        flow_pattern = re.compile(br'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
+
+        url_pattern = re.compile(br'''(url\(.*?\))''', re.IGNORECASE)
+        url_img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)\?mime=image/[^\)]*["')]''', re.IGNORECASE)
+        font_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)["')]''', re.IGNORECASE)
+        url_css_index_pattern = re.compile(br'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''', re.IGNORECASE)
+        url_svg_image_pattern = re.compile(br'''kindle:flow:([0-9|A-V]+)\?mime=image/svg\+xml[^\)]*''', re.IGNORECASE)
+
+        for i in range(1, self.k8proc.getNumberOfFlows()):
+            [ftype, format, dir, filename] = self.k8proc.getFlowInfo(i)
+            flowpart = self.k8proc.getFlow(i)
+
+            # links to raster image files from image tags
+            # image_pattern
+            srcpieces = img_pattern.split(flowpart)
+            for j in range(1, len(srcpieces),2):
+                tag = srcpieces[j]
+                if tag.startswith(b'<im'):
+                    for m in img_index_pattern.finditer(tag):
+                        imageNumber = fromBase32(m.group(1))
+                        imageName = self.rscnames[imageNumber-1]
+                        if imageName is not None:
+                            replacement = b'"../Images/' + utf8_str(imageName) + b'"'
+                            self.used[imageName] = 'used'
+                            tag = img_index_pattern.sub(replacement, tag, 1)
+                        else:
+                            print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
+                    srcpieces[j] = tag
+            flowpart = b"".join(srcpieces)
+
+            # replacements inside css url():
+            srcpieces = url_pattern.split(flowpart)
+            for j in range(1, len(srcpieces),2):
+                tag = srcpieces[j]
+
+                #  process links to raster image files
+                for m in url_img_index_pattern.finditer(tag):
+                    imageNumber = fromBase32(m.group(1))
+                    imageName = self.rscnames[imageNumber-1]
+                    osep = m.group()[0:1]
+                    csep = m.group()[-1:]
+                    if imageName is not None:
+                        replacement = osep +  b'../Images/' + utf8_str(imageName) +  csep
+                        self.used[imageName] = 'used'
+                        tag = url_img_index_pattern.sub(replacement, tag, 1)
+                    else:
+                        print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
+
+                # process links to fonts
+                for m in font_index_pattern.finditer(tag):
+                    fontNumber = fromBase32(m.group(1))
+                    fontName = self.rscnames[fontNumber-1]
+                    osep = m.group()[0:1]
+                    csep = m.group()[-1:]
+                    if fontName is None:
+                        print("Error: Referenced font %s was not recognized as a valid font in %s" % (fontNumber, tag))
+                    else:
+                        replacement = osep +  b'../Fonts/' + utf8_str(fontName) +  csep
+                        tag = font_index_pattern.sub(replacement, tag, 1)
+                        self.used[fontName] = 'used'
+
+                # process links to other css pieces
+                for m in url_css_index_pattern.finditer(tag):
+                    num = fromBase32(m.group(1))
+                    [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
+                    replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
+                    tag = url_css_index_pattern.sub(replacement, tag, 1)
+                    self.used[fnm] = 'used'
+
+                # process links to svg images
+                for m in url_svg_image_pattern.finditer(tag):
+                    num = fromBase32(m.group(1))
+                    [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
+                    replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
+                    tag = url_svg_image_pattern.sub(replacement, tag, 1)
+                    self.used[fnm] = 'used'
+
+                srcpieces[j] = tag
+            flowpart = b"".join(srcpieces)
+
+            # store away in our own copy
+            flows.append(flowpart)
+
+            # I do not think this case exists and even if it does exist, it needs to be done in a separate
+            # pass to prevent inlining a flow piece into another flow piece before the inserted one or the
+            # target one has been fully processed
+
+            # but keep it around if it ends up we do need it
+
+            # flow pattern not inside url()
+            # srcpieces = tag_pattern.split(flowpart)
+            # for j in range(1, len(srcpieces),2):
+            #     tag = srcpieces[j]
+            #     if tag.startswith(b'<'):
+            #         for m in flow_pattern.finditer(tag):
+            #             num = fromBase32(m.group(1))
+            #             [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
+            #             flowtext = self.k8proc.getFlow(num)
+            #             if fmt == b'inline':
+            #                 tag = flowtext
+            #             else:
+            #                 replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
+            #                 tag = flow_pattern.sub(replacement, tag, 1)
+            #                 self.used[fnm] = 'used'
+            #         srcpieces[j] = tag
+            # flowpart = b"".join(srcpieces)
+
+        # now handle the main text xhtml parts
+
+        # Handle the flow items in the XHTML text pieces
+        # kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
+        tag_pattern = re.compile(br'''(<[^>]*>)''')
+        flow_pattern = re.compile(br'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
+        for i in range(len(parts)):
+            part = parts[i]
+            [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
+            # flow pattern
+            srcpieces = tag_pattern.split(part)
+            for j in range(1, len(srcpieces),2):
+                tag = srcpieces[j]
+                if tag.startswith(b'<'):
+                    for m in flow_pattern.finditer(tag):
+                        num = fromBase32(m.group(1))
+                        if num > 0 and num < len(self.k8proc.flowinfo):
+                            [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
+                            flowpart = flows[num]
+                            if fmt == b'inline':
+                                tag = flowpart
+                            else:
+                                replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
+                                tag = flow_pattern.sub(replacement, tag, 1)
+                                self.used[fnm] = 'used'
+                        else:
+                            print("warning: ignoring non-existent flow link", tag, " value 0x%x" % num)
+                    srcpieces[j] = tag
+            part = b''.join(srcpieces)
+
+            # store away modified version
+            parts[i] = part
+
+        # Handle any embedded raster images links in style= attributes urls
+        style_pattern = re.compile(br'''(<[a-zA-Z0-9]+\s[^>]*style\s*=\s*[^>]*>)''', re.IGNORECASE)
+        img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)[^'"]*['")]''', re.IGNORECASE)
+
+        for i in range(len(parts)):
+            part = parts[i]
+            [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
+
+            # replace urls in style attributes
+            srcpieces = style_pattern.split(part)
+            for j in range(1, len(srcpieces),2):
+                tag = srcpieces[j]
+                if b'kindle:embed' in tag:
+                    for m in img_index_pattern.finditer(tag):
+                        imageNumber = fromBase32(m.group(1))
+                        imageName = self.rscnames[imageNumber-1]
+                        osep = m.group()[0:1]
+                        csep = m.group()[-1:]
+                        if imageName is not None:
+                            replacement = osep + b'../Images/'+ utf8_str(imageName) + csep
+                            self.used[imageName] = 'used'
+                            tag = img_index_pattern.sub(replacement, tag, 1)
+                        else:
+                            print("Error: Referenced image %s in style url was not recognized in %s" % (imageNumber, tag))
+                    srcpieces[j] = tag
+            part = b"".join(srcpieces)
+
+            # store away modified version
+            parts[i] = part
+
+        # Handle any embedded raster images links in the xhtml text
+        # kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
+        img_pattern = re.compile(br'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
+        img_index_pattern = re.compile(br'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''')
+
+        for i in range(len(parts)):
+            part = parts[i]
+            [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
+
+            # links to raster image files
+            # image_pattern
+            srcpieces = img_pattern.split(part)
+            for j in range(1, len(srcpieces),2):
+                tag = srcpieces[j]
+                if tag.startswith(b'<im'):
+                    for m in img_index_pattern.finditer(tag):
+                        imageNumber = fromBase32(m.group(1))
+                        imageName = self.rscnames[imageNumber-1]
+                        if imageName is not None:
+                            replacement = b'"../Images/' + utf8_str(imageName) + b'"'
+                            self.used[imageName] = 'used'
+                            tag = img_index_pattern.sub(replacement, tag, 1)
+                        else:
+                            print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
+                    srcpieces[j] = tag
+            part = b"".join(srcpieces)
+            # store away modified version
+            parts[i] = part
+
+        # finally perform any general cleanups needed to make valid XHTML
+        # these include:
+        #   in svg tags replace "perserveaspectratio" attributes with "perserveAspectRatio"
+        #   in svg tags replace "viewbox" attributes with "viewBox"
+        #   in <li> remove value="XX" attributes since these are illegal
+        tag_pattern = re.compile(br'''(<[^>]*>)''')
+        li_value_pattern = re.compile(br'''\svalue\s*=\s*['"][^'"]*['"]''', re.IGNORECASE)
+
+        for i in range(len(parts)):
+            part = parts[i]
+            [partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
+
+            # tag pattern
+            srcpieces = tag_pattern.split(part)
+            for j in range(1, len(srcpieces),2):
+                tag = srcpieces[j]
+                if tag.startswith(b'<svg') or tag.startswith(b'<SVG'):
+                    tag = tag.replace(b'preserveaspectratio',b'preserveAspectRatio')
+                    tag = tag.replace(b'viewbox',b'viewBox')
+                elif tag.startswith(b'<li ') or tag.startswith(b'<LI '):
+                    tagpieces = li_value_pattern.split(tag)
+                    tag = b"".join(tagpieces)
+                srcpieces[j] = tag
+            part = b"".join(srcpieces)
+            # store away modified version
+            parts[i] = part
+
+        self.k8proc.setFlows(flows)
+        self.k8proc.setParts(parts)
+
+        return self.used
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_index.py b/src/epy_reader/tools/KindleUnpack/mobi_index.py
new file mode 100644
index 0000000..397aaf8
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_index.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import PY2, bchr, bstr, bord
+if PY2:
+    range = xrange
+
+import struct
+# note:  struct pack, unpack, unpack_from all require bytestring format
+# data all the way up to at least python 2.7.5, python 3 okay with bytestring
+
+from .mobi_utils import toHex
+
+class MobiIndex:
+
+    def __init__(self, sect, DEBUG=False):
+        self.sect = sect
+        self.DEBUG = DEBUG
+
+    def getIndexData(self, idx, label="Unknown"):
+        sect = self.sect
+        outtbl = []
+        ctoc_text = {}
+        if idx != 0xffffffff:
+            sect.setsectiondescription(idx,"{0} Main INDX section".format(label))
+            data = sect.loadSection(idx)
+            idxhdr, hordt1, hordt2 = self.parseINDXHeader(data)
+            IndexCount = idxhdr['count']
+            # handle the case of multiple sections used for CTOC
+            rec_off = 0
+            off = idx + IndexCount + 1
+            for j in range(idxhdr['nctoc']):
+                cdata = sect.loadSection(off + j)
+                sect.setsectiondescription(off+j, label + ' CTOC Data ' + str(j))
+                ctocdict = self.readCTOC(cdata)
+                for k in ctocdict:
+                    ctoc_text[k + rec_off] = ctocdict[k]
+                rec_off += 0x10000
+            tagSectionStart = idxhdr['len']
+            controlByteCount, tagTable = readTagSection(tagSectionStart, data)
+            if self.DEBUG:
+                print("ControlByteCount is", controlByteCount)
+                print("IndexCount is", IndexCount)
+                print("TagTable: %s" % tagTable)
+            for i in range(idx + 1, idx + 1 + IndexCount):
+                sect.setsectiondescription(i,"{0} Extra {1:d} INDX section".format(label,i-idx))
+                data = sect.loadSection(i)
+                hdrinfo, ordt1, ordt2 = self.parseINDXHeader(data)
+                idxtPos = hdrinfo['start']
+                entryCount = hdrinfo['count']
+                if self.DEBUG:
+                    print(idxtPos, entryCount)
+                # loop through to build up the IDXT position starts
+                idxPositions = []
+                for j in range(entryCount):
+                    pos, = struct.unpack_from(b'>H', data, idxtPos + 4 + (2 * j))
+                    idxPositions.append(pos)
+                # The last entry ends before the IDXT tag (but there might be zero fill bytes we need to ignore!)
+                idxPositions.append(idxtPos)
+                # for each entry in the IDXT build up the tagMap and any associated text
+                for j in range(entryCount):
+                    startPos = idxPositions[j]
+                    endPos = idxPositions[j+1]
+                    textLength = ord(data[startPos:startPos+1])
+                    text = data[startPos+1:startPos+1+textLength]
+                    if hordt2 is not None:
+                        text = b''.join(bchr(hordt2[bord(x)]) for x in text)
+                    tagMap = getTagMap(controlByteCount, tagTable, data, startPos+1+textLength, endPos)
+                    outtbl.append([text, tagMap])
+                    if self.DEBUG:
+                        print(tagMap)
+                        print(text)
+        return outtbl, ctoc_text
+
+    def parseINDXHeader(self, data):
+        "read INDX header"
+        if not data[:4] == b'INDX':
+            print("Warning: index section is not INDX")
+            return False
+        words = (
+                'len', 'nul1', 'type', 'gen', 'start', 'count', 'code',
+                'lng', 'total', 'ordt', 'ligt', 'nligt', 'nctoc'
+        )
+        num = len(words)
+        values = struct.unpack(bstr('>%dL' % num), data[4:4*(num+1)])
+        header = {}
+        for n in range(num):
+            header[words[n]] = values[n]
+
+        ordt1 = None
+        ordt2 = None
+
+        ocnt, oentries, op1, op2, otagx  = struct.unpack_from(b'>LLLLL',data, 0xa4)
+        if header['code'] == 0xfdea or ocnt != 0 or oentries > 0:
+            # horribly hacked up ESP (sample) mobi books use two ORDT sections but never specify
+            # them in the proper place in the header.  They seem to be codepage 65002 which seems
+            # to be some sort of strange EBCDIC utf-8 or 16 encoded strings
+
+            # so we need to look for them and store them away to process leading text
+            # ORDT1 has 1 byte long entries, ORDT2 has 2 byte long entries
+            # we only ever seem to use the seocnd but ...
+            assert(ocnt == 1)
+            assert(data[op1:op1+4] == b'ORDT')
+            assert(data[op2:op2+4] == b'ORDT')
+            ordt1 = struct.unpack_from(bstr('>%dB' % oentries), data, op1+4)
+            ordt2 = struct.unpack_from(bstr('>%dH' % oentries), data, op2+4)
+
+        if self.DEBUG:
+            print("parsed INDX header:")
+            for n in words:
+                print(n, "%X" % header[n],)
+            print("")
+        return header, ordt1, ordt2
+
+    def readCTOC(self, txtdata):
+        # read all blocks from CTOC
+        ctoc_data = {}
+        offset = 0
+        while offset<len(txtdata):
+            if PY2:
+                if txtdata[offset] == b'\0':
+                    break
+            else:
+                if txtdata[offset] == 0:
+                    break
+            idx_offs = offset
+            # first n bytes: name len as vwi
+            pos, ilen = getVariableWidthValue(txtdata, offset)
+            offset += pos
+            # <len> next bytes: name
+            name = txtdata[offset:offset+ilen]
+            offset += ilen
+            if self.DEBUG:
+                print("name length is ", ilen)
+                print(idx_offs, name)
+            ctoc_data[idx_offs] = name
+        return ctoc_data
+
+
+def getVariableWidthValue(data, offset):
+    '''
+    Decode variable width value from given bytes.
+
+    @param data: The bytes to decode.
+    @param offset: The start offset into data.
+    @return: Tuple of consumed bytes count and decoded value.
+    '''
+    value = 0
+    consumed = 0
+    finished = False
+    while not finished:
+        v = data[offset + consumed: offset + consumed + 1]
+        consumed += 1
+        if ord(v) & 0x80:
+            finished = True
+        value = (value << 7) | (ord(v) & 0x7f)
+    return consumed, value
+
+
+def readTagSection(start, data):
+    '''
+    Read tag section from given data.
+
+    @param start: The start position in the data.
+    @param data: The data to process.
+    @return: Tuple of control byte count and list of tag tuples.
+    '''
+    controlByteCount = 0
+    tags = []
+    if data[start:start+4] == b"TAGX":
+        firstEntryOffset, = struct.unpack_from(b'>L', data, start + 0x04)
+        controlByteCount, = struct.unpack_from(b'>L', data, start + 0x08)
+
+        # Skip the first 12 bytes already read above.
+        for i in range(12, firstEntryOffset, 4):
+            pos = start + i
+            tags.append((ord(data[pos:pos+1]), ord(data[pos+1:pos+2]), ord(data[pos+2:pos+3]), ord(data[pos+3:pos+4])))
+    return controlByteCount, tags
+
+
+def countSetBits(value, bits=8):
+    '''
+    Count the set bits in the given value.
+
+    @param value: Integer value.
+    @param bits: The number of bits of the input value (defaults to 8).
+    @return: Number of set bits.
+    '''
+    count = 0
+    for _ in range(bits):
+        if value & 0x01 == 0x01:
+            count += 1
+        value = value >> 1
+    return count
+
+
+def getTagMap(controlByteCount, tagTable, entryData, startPos, endPos):
+    '''
+    Create a map of tags and values from the given byte section.
+
+    @param controlByteCount: The number of control bytes.
+    @param tagTable: The tag table.
+    @param entryData: The data to process.
+    @param startPos: The starting position in entryData.
+    @param endPos: The end position in entryData or None if it is unknown.
+    @return: Hashmap of tag and list of values.
+    '''
+    tags = []
+    tagHashMap = {}
+    controlByteIndex = 0
+    dataStart = startPos + controlByteCount
+
+    for tag, valuesPerEntry, mask, endFlag in tagTable:
+        if endFlag == 0x01:
+            controlByteIndex += 1
+            continue
+        cbyte = ord(entryData[startPos + controlByteIndex:startPos + controlByteIndex+1])
+        if 0:
+            print("Control Byte Index %0x , Control Byte Value %0x" % (controlByteIndex, cbyte))
+
+        value = ord(entryData[startPos + controlByteIndex:startPos + controlByteIndex+1]) & mask
+        if value != 0:
+            if value == mask:
+                if countSetBits(mask) > 1:
+                    # If all bits of masked value are set and the mask has more than one bit, a variable width value
+                    # will follow after the control bytes which defines the length of bytes (NOT the value count!)
+                    # which will contain the corresponding variable width values.
+                    consumed, value = getVariableWidthValue(entryData, dataStart)
+                    dataStart += consumed
+                    tags.append((tag, None, value, valuesPerEntry))
+                else:
+                    tags.append((tag, 1, None, valuesPerEntry))
+            else:
+                # Shift bits to get the masked value.
+                while mask & 0x01 == 0:
+                    mask = mask >> 1
+                    value = value >> 1
+                tags.append((tag, value, None, valuesPerEntry))
+    for tag, valueCount, valueBytes, valuesPerEntry in tags:
+        values = []
+        if valueCount is not None:
+            # Read valueCount * valuesPerEntry variable width values.
+            for _ in range(valueCount):
+                for _ in range(valuesPerEntry):
+                    consumed, data = getVariableWidthValue(entryData, dataStart)
+                    dataStart += consumed
+                    values.append(data)
+        else:
+            # Convert valueBytes to variable width values.
+            totalConsumed = 0
+            while totalConsumed < valueBytes:
+                # Does this work for valuesPerEntry != 1?
+                consumed, data = getVariableWidthValue(entryData, dataStart)
+                dataStart += consumed
+                totalConsumed += consumed
+                values.append(data)
+            if totalConsumed != valueBytes:
+                print("Error: Should consume %s bytes, but consumed %s" % (valueBytes, totalConsumed))
+        tagHashMap[tag] = values
+    # Test that all bytes have been processed if endPos is given.
+    if endPos is not None and dataStart != endPos:
+        # The last entry might have some zero padding bytes, so complain only if non zero bytes are left.
+        for char in entryData[dataStart:endPos]:
+            if bord(char) != 0:
+                print("Warning: There are unprocessed index bytes left: %s" % toHex(entryData[dataStart:endPos]))
+                if 0:
+                    print("controlByteCount: %s" % controlByteCount)
+                    print("tagTable: %s" % tagTable)
+                    print("data: %s" % toHex(entryData[startPos:endPos]))
+                    print("tagHashMap: %s" % tagHashMap)
+                break
+
+    return tagHashMap
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_k8proc.py b/src/epy_reader/tools/KindleUnpack/mobi_k8proc.py
new file mode 100644
index 0000000..5b8274e
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_k8proc.py
@@ -0,0 +1,496 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import PY2, bstr, utf8_str
+
+if PY2:
+    range = xrange
+
+import os
+
+import struct
+# note:  struct pack, unpack, unpack_from all require bytestring format
+# data all the way up to at least python 2.7.5, python 3 okay with bytestring
+
+import re
+# note: re requites the pattern to be the exact same type as the data to be searched in python3
+# but u"" is not allowed for the pattern itself only b""
+
+from .mobi_index import MobiIndex
+from .mobi_utils import fromBase32
+from .unipath import pathof
+
+_guide_types = [b'cover',b'title-page',b'toc',b'index',b'glossary',b'acknowledgements',
+                b'bibliography',b'colophon',b'copyright-page',b'dedication',
+                b'epigraph',b'foreward',b'loi',b'lot',b'notes',b'preface',b'text']
+
+# locate beginning and ending positions of tag with specific aid attribute
+def locate_beg_end_of_tag(ml, aid):
+    pattern = utf8_str(r'''<[^>]*\said\s*=\s*['"]%s['"][^>]*>''' % aid)
+    aid_pattern = re.compile(pattern,re.IGNORECASE)
+    for m in re.finditer(aid_pattern, ml):
+        plt = m.start()
+        pgt = ml.find(b'>',plt+1)
+        return plt, pgt
+    return 0, 0
+
+
+# iterate over all tags in block in reverse order, i.e. last ta to first tag
+def reverse_tag_iter(block):
+    end = len(block)
+    while True:
+        pgt = block.rfind(b'>', 0, end)
+        if pgt == -1:
+            break
+        plt = block.rfind(b'<', 0, pgt)
+        if plt == -1:
+            break
+        yield block[plt:pgt+1]
+        end = plt
+
+
+class K8Processor:
+
+    def __init__(self, mh, sect, files, debug=False):
+        self.sect = sect
+        self.files = files
+        self.mi = MobiIndex(sect)
+        self.mh = mh
+        self.skelidx = mh.skelidx
+        self.fragidx = mh.fragidx
+        self.guideidx = mh.guideidx
+        self.fdst = mh.fdst
+        self.flowmap = {}
+        self.flows = None
+        self.flowinfo = []
+        self.parts = None
+        self.partinfo = []
+        self.linked_aids = set()
+        self.fdsttbl= [0,0xffffffff]
+        self.DEBUG = debug
+
+        # read in and parse the FDST info which is very similar in format to the Palm DB section
+        # parsing except it provides offsets into rawML file and not the Palm DB file
+        # this is needed to split up the final css, svg, etc flow section
+        # that can exist at the end of the rawML file
+        if self.fdst != 0xffffffff:
+            header = self.sect.loadSection(self.fdst)
+            if header[0:4] == b"FDST":
+                num_sections, = struct.unpack_from(b'>L', header, 0x08)
+                self.fdsttbl = struct.unpack_from(bstr('>%dL' % (num_sections*2)), header, 12)[::2] + (mh.rawSize, )
+                sect.setsectiondescription(self.fdst,"KF8 FDST INDX")
+                if self.DEBUG:
+                    print("\nFDST Section Map:  %d sections" % num_sections)
+                    for j in range(num_sections):
+                        print("Section %d: 0x%08X - 0x%08X" % (j, self.fdsttbl[j],self.fdsttbl[j+1]))
+            else:
+                print("\nError: K8 Mobi with Missing FDST info")
+
+        # read/process skeleton index info to create the skeleton table
+        skeltbl = []
+        if self.skelidx != 0xffffffff:
+            # for i in range(2):
+            #     fname = 'skel%04d.dat' % i
+            #     data = self.sect.loadSection(self.skelidx + i)
+            #     with open(pathof(fname), 'wb') as f:
+            #         f.write(data)
+            outtbl, ctoc_text = self.mi.getIndexData(self.skelidx, "KF8 Skeleton")
+            fileptr = 0
+            for [text, tagMap] in outtbl:
+                # file number, skeleton name, fragtbl record count, start position, length
+                skeltbl.append([fileptr, text, tagMap[1][0], tagMap[6][0], tagMap[6][1]])
+                fileptr += 1
+        self.skeltbl = skeltbl
+        if self.DEBUG:
+            print("\nSkel Table:  %d entries" % len(self.skeltbl))
+            print("table: filenum, skeleton name, frag tbl record count, start position, length")
+            for j in range(len(self.skeltbl)):
+                print(self.skeltbl[j])
+
+        # read/process the fragment index to create the fragment table
+        fragtbl = []
+        if self.fragidx != 0xffffffff:
+            # for i in range(3):
+            #     fname = 'frag%04d.dat' % i
+            #     data = self.sect.loadSection(self.fragidx + i)
+            #     with open(pathof(fname), 'wb') as f:
+            #         f.write(data)
+            outtbl, ctoc_text = self.mi.getIndexData(self.fragidx, "KF8 Fragment")
+            for [text, tagMap] in outtbl:
+                # insert position, ctoc offset (aidtext), file number, sequence number, start position, length
+                ctocoffset = tagMap[2][0]
+                ctocdata = ctoc_text[ctocoffset]
+                fragtbl.append([int(text), ctocdata, tagMap[3][0], tagMap[4][0], tagMap[6][0], tagMap[6][1]])
+        self.fragtbl = fragtbl
+        if self.DEBUG:
+            print("\nFragment Table: %d entries" % len(self.fragtbl))
+            print("table: file position, link id text, file num, sequence number, start position, length")
+            for j in range(len(self.fragtbl)):
+                print(self.fragtbl[j])
+
+        # read / process guide index for guide elements of opf
+        guidetbl = []
+        if self.guideidx != 0xffffffff:
+            # for i in range(3):
+            #     fname = 'guide%04d.dat' % i
+            #     data = self.sect.loadSection(self.guideidx + i)
+            #     with open(pathof(fname), 'wb') as f:
+            #         f.write(data)
+            outtbl, ctoc_text = self.mi.getIndexData(self.guideidx, "KF8 Guide elements)")
+            for [text, tagMap] in outtbl:
+                # ref_type, ref_title, frag number
+                ctocoffset = tagMap[1][0]
+                ref_title = ctoc_text[ctocoffset]
+                ref_type = text
+                fileno = None
+                if 3 in tagMap:
+                    fileno  = tagMap[3][0]
+                if 6 in tagMap:
+                    fileno = tagMap[6][0]
+                guidetbl.append([ref_type, ref_title, fileno])
+        self.guidetbl = guidetbl
+        if self.DEBUG:
+            print("\nGuide Table: %d entries" % len(self.guidetbl))
+            print("table: ref_type, ref_title, fragtbl entry number")
+            for j in range(len(self.guidetbl)):
+                print(self.guidetbl[j])
+
+    def buildParts(self, rawML):
+        # now split the rawML into its flow pieces
+        self.flows = []
+        for j in range(0, len(self.fdsttbl)-1):
+            start = self.fdsttbl[j]
+            end = self.fdsttbl[j+1]
+            self.flows.append(rawML[start:end])
+
+        # the first piece represents the xhtml text
+        text = self.flows[0]
+        self.flows[0] = b''
+
+        # walk the <skeleton> and fragment tables to build original source xhtml files
+        # *without* destroying any file position information needed for later href processing
+        # and create final list of file separation start: stop points and etc in partinfo
+        if self.DEBUG:
+            print("\nRebuilding flow piece 0: the main body of the ebook")
+        self.parts = []
+        self.partinfo = []
+        fragptr = 0
+        baseptr = 0
+        cnt = 0
+        filename = 'part%04d.xhtml' % cnt
+        for [skelnum, skelname, fragcnt, skelpos, skellen] in self.skeltbl:
+            baseptr = skelpos + skellen
+            skeleton = text[skelpos: baseptr]
+            aidtext = "0"
+            for i in range(fragcnt):
+                [insertpos, idtext, filenum, seqnum, startpos, length] = self.fragtbl[fragptr]
+                aidtext = idtext[12:-2]
+                if i == 0:
+                    filename = 'part%04d.xhtml' % filenum
+                slice = text[baseptr: baseptr + length]
+                insertpos = insertpos - skelpos
+                head = skeleton[:insertpos]
+                tail = skeleton[insertpos:]
+                actual_inspos = insertpos
+                if (tail.find(b'>') < tail.find(b'<') or head.rfind(b'>') < head.rfind(b'<')):
+                    # There is an incomplete tag in either the head or tail.
+                    # This can happen for some badly formed KF8 files
+                    print('The fragment table for %s has incorrect insert position. Calculating manually.' % skelname)
+                    bp, ep = locate_beg_end_of_tag(skeleton, aidtext)
+                    if bp != ep:
+                        actual_inspos = ep + 1 + startpos
+                if insertpos != actual_inspos:
+                    print("fixed corrupt fragment table insert position", insertpos+skelpos, actual_inspos+skelpos)
+                    insertpos = actual_inspos
+                    self.fragtbl[fragptr][0] = actual_inspos + skelpos
+                skeleton = skeleton[0:insertpos] + slice + skeleton[insertpos:]
+                baseptr = baseptr + length
+                fragptr += 1
+            cnt += 1
+            self.parts.append(skeleton)
+            self.partinfo.append([skelnum, 'Text', filename, skelpos, baseptr, aidtext])
+
+        assembled_text = b''.join(self.parts)
+        if self.DEBUG:
+            outassembled = os.path.join(self.files.k8dir, 'assembled_text.dat')
+            with open(pathof(outassembled),'wb') as f:
+                f.write(assembled_text)
+
+        # The primary css style sheet is typically stored next followed by any
+        # snippets of code that were previously inlined in the
+        # original xhtml but have been stripped out and placed here.
+        # This can include local CDATA snippets and and svg sections.
+
+        # The problem is that for most browsers and ereaders, you can not
+        # use <img src="imageXXXX.svg" /> to import any svg image that itself
+        # properly uses an <image/> tag to import some raster image - it
+        # should work according to the spec but does not for almost all browsers
+        # and ereaders and causes epub validation issues because those  raster
+        # images are in manifest but not in xhtml text - since they only
+        # referenced from an svg image
+
+        # So we need to check the remaining flow pieces to see if they are css
+        # or svg images.  if svg images, we must check if they have an <image />
+        # and if so inline them into the xhtml text pieces.
+
+        # there may be other sorts of pieces stored here but until we see one
+        # in the wild to reverse engineer we won't be able to tell
+        self.flowinfo.append([None, None, None, None])
+        svg_tag_pattern = re.compile(br'''(<svg[^>]*>)''', re.IGNORECASE)
+        image_tag_pattern = re.compile(br'''(<image[^>]*>)''', re.IGNORECASE)
+        for j in range(1,len(self.flows)):
+            flowpart = self.flows[j]
+            nstr = '%04d' % j
+            m = re.search(svg_tag_pattern, flowpart)
+            if m is not None:
+                # svg
+                ptype = b'svg'
+                start = m.start()
+                m2 = re.search(image_tag_pattern, flowpart)
+                if m2 is not None:
+                    pformat = b'inline'
+                    pdir = None
+                    fname = None
+                    # strip off anything before <svg if inlining
+                    flowpart = flowpart[start:]
+                else:
+                    pformat = b'file'
+                    pdir = "Images"
+                    fname = 'svgimg' + nstr + '.svg'
+            else:
+                # search for CDATA and if exists inline it
+                if flowpart.find(b'[CDATA[') >= 0:
+                    ptype = b'css'
+                    flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
+                    pformat = b'inline'
+                    pdir = None
+                    fname = None
+                else:
+                    # css - assume as standalone css file
+                    ptype = b'css'
+                    pformat = b'file'
+                    pdir = "Styles"
+                    fname = 'style' + nstr + '.css'
+
+            self.flows[j] = flowpart
+            self.flowinfo.append([ptype, pformat, pdir, fname])
+
+        if self.DEBUG:
+            print("\nFlow Map:  %d entries" % len(self.flowinfo))
+            for fi in self.flowinfo:
+                print(fi)
+            print("\n")
+
+            print("\nXHTML File Part Position Information: %d entries" % len(self.partinfo))
+            for pi in self.partinfo:
+                print(pi)
+
+        if False:  # self.Debug:
+            # dump all of the locations of the aid tags used in TEXT
+            # find id links only inside of tags
+            #    inside any < > pair find all "aid=' and return whatever is inside the quotes
+            #    [^>]* means match any amount of chars except for  '>' char
+            #    [^'"] match any amount of chars except for the quote character
+            #    \s* means match any amount of whitespace
+            print("\npositions of all aid= pieces")
+            id_pattern = re.compile(br'''<[^>]*\said\s*=\s*['"]([^'"]*)['"][^>]*>''',re.IGNORECASE)
+            for m in re.finditer(id_pattern, rawML):
+                [filename, partnum, start, end] = self.getFileInfo(m.start())
+                [seqnum, idtext] = self.getFragTblInfo(m.start())
+                value = fromBase32(m.group(1))
+                print("  aid: %s value: %d at: %d -> part: %d, start: %d, end: %d" % (m.group(1), value, m.start(), partnum, start, end))
+                print("       %s  fragtbl entry %d" % (idtext, seqnum))
+
+        return
+
+    # get information fragment table entry by pos
+    def getFragTblInfo(self, pos):
+        for j in range(len(self.fragtbl)):
+            [insertpos, idtext, filenum, seqnum, startpos, length] = self.fragtbl[j]
+            if pos >= insertpos and pos < (insertpos + length):
+                # why are these "in: and before: added here
+                return seqnum, b'in: ' + idtext
+            if pos < insertpos:
+                return seqnum, b'before: ' + idtext
+        return None, None
+
+    # get information about the part (file) that exists at pos in original rawML
+    def getFileInfo(self, pos):
+        for [partnum, pdir, filename, start, end, aidtext] in self.partinfo:
+            if pos >= start and pos < end:
+                return filename, partnum, start, end
+        return None, None, None, None
+
+    # accessor functions to properly protect the internal structure
+    def getNumberOfParts(self):
+        return len(self.parts)
+
+    def getPart(self,i):
+        if i >= 0 and i < len(self.parts):
+            return self.parts[i]
+        return None
+
+    def getPartInfo(self, i):
+        if i >= 0 and i < len(self.partinfo):
+            return self.partinfo[i]
+        return None
+
+    def getNumberOfFlows(self):
+        return len(self.flows)
+
+    def getFlow(self,i):
+        # note flows[0] is empty - it was all of the original text
+        if i > 0 and i < len(self.flows):
+            return self.flows[i]
+        return None
+
+    def getFlowInfo(self,i):
+        # note flowinfo[0] is empty - it was all of the original text
+        if i > 0 and i < len(self.flowinfo):
+            return self.flowinfo[i]
+        return None
+
+    def getIDTagByPosFid(self, posfid, offset):
+        # first convert kindle:pos:fid and offset info to position in file
+        # (fromBase32 can handle both string types on input)
+        row = fromBase32(posfid)
+        off = fromBase32(offset)
+        [insertpos, idtext, filenum, seqnm, startpos, length] = self.fragtbl[row]
+        pos = insertpos + off
+        fname, pn, skelpos, skelend = self.getFileInfo(pos)
+        if fname is None:
+            # pos does not exist
+            # default to skeleton pos instead
+            print("Link To Position", pos, "does not exist, retargeting to top of target")
+            pos = self.skeltbl[filenum][3]
+            fname, pn, skelpos, skelend = self.getFileInfo(pos)
+        # an existing "id=" or "name=" attribute must exist in original xhtml otherwise it would not have worked for linking.
+        # Amazon seems to have added its own additional "aid=" inside tags whose contents seem to represent
+        # some position information encoded into Base32 name.
+        # so find the closest "id=" before position the file  by actually searching in that file
+        idtext = self.getIDTag(pos)
+        return fname, idtext
+
+    def getIDTag(self, pos):
+        # find the first tag with a named anchor (name or id attribute) before pos
+        fname, pn, skelpos, skelend = self.getFileInfo(pos)
+        if pn is None and skelpos is None:
+            print("Error: getIDTag - no file contains ", pos)
+        textblock = self.parts[pn]
+        npos = pos - skelpos
+        # if npos inside a tag then search all text before the its end of tag marker
+        pgt = textblock.find(b'>',npos)
+        plt = textblock.find(b'<',npos)
+        if plt == npos or pgt < plt:
+            npos = pgt + 1
+        # find id and name attributes only inside of tags
+        # use a reverse tag search since that is faster
+        #    inside any < > pair find "id=" and "name=" attributes return it
+        #    [^>]* means match any amount of chars except for  '>' char
+        #    [^'"] match any amount of chars except for the quote character
+        #    \s* means match any amount of whitespace
+        textblock = textblock[0:npos]
+        id_pattern = re.compile(br'''<[^>]*\sid\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
+        name_pattern = re.compile(br'''<[^>]*\sname\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
+        aid_pattern = re.compile(br'''<[^>]+\s(?:aid|AID)\s*=\s*['"]([^'"]+)['"]''')
+        for tag in reverse_tag_iter(textblock):
+            # any ids in the body should default to top of file
+            if tag[0:6] == b'<body ':
+                return b''
+            if tag[0:6] != b'<meta ':
+                m = id_pattern.match(tag) or name_pattern.match(tag)
+                if m is not None:
+                    return m.group(1)
+                m = aid_pattern.match(tag)
+                if m is not None:
+                    self.linked_aids.add(m.group(1))
+                    return b'aid-' + m.group(1)
+        return b''
+
+    # do we need to do deep copying
+    def setParts(self, parts):
+        assert(len(parts) == len(self.parts))
+        for i in range(len(parts)):
+            self.parts[i] = parts[i]
+
+    # do we need to do deep copying
+    def setFlows(self, flows):
+        assert(len(flows) == len(self.flows))
+        for i in range(len(flows)):
+            self.flows[i] = flows[i]
+
+    # get information about the part (file) that exists at pos in original rawML
+    def getSkelInfo(self, pos):
+        for [partnum, pdir, filename, start, end, aidtext] in self.partinfo:
+            if pos >= start and pos < end:
+                return [partnum, pdir, filename, start, end, aidtext]
+        return [None, None, None, None, None, None]
+
+    # fileno is actually a reference into fragtbl (a fragment)
+    def getGuideText(self):
+        guidetext = b''
+        for [ref_type, ref_title, fileno] in self.guidetbl:
+            if ref_type == b'thumbimagestandard':
+                continue
+            if ref_type not in _guide_types and not ref_type.startswith(b'other.'):
+                if ref_type == b'start':
+                    ref_type = b'text'
+                else:
+                    ref_type = b'other.' + ref_type
+            [pos, idtext, filenum, seqnm, startpos, length] = self.fragtbl[fileno]
+            [pn, pdir, filename, skelpos, skelend, aidtext] = self.getSkelInfo(pos)
+            idtext = self.getIDTag(pos)
+            linktgt = filename.encode('utf-8')
+            if idtext != b'':
+                linktgt += b'#' + idtext
+            guidetext += b'<reference type="'+ref_type+b'" title="'+ref_title+b'" href="'+utf8_str(pdir)+b'/'+linktgt+b'" />\n'
+        # opf is encoded utf-8 so must convert any titles properly
+        guidetext = (guidetext.decode(self.mh.codec)).encode("utf-8")
+        return guidetext
+
+    def getPageIDTag(self, pos):
+        # find the first tag with a named anchor (name or id attribute) before pos
+        # but page map offsets need to little more leeway so if the offset points
+        # into a tag look for the next ending tag "/>" or "</" and start your search from there.
+        fname, pn, skelpos, skelend = self.getFileInfo(pos)
+        if pn is None and skelpos is None:
+            print("Error: getIDTag - no file contains ", pos)
+        textblock = self.parts[pn]
+        npos = pos - skelpos
+        # if npos inside a tag then search all text before next ending tag
+        pgt = textblock.find(b'>',npos)
+        plt = textblock.find(b'<',npos)
+        if plt == npos or pgt < plt:
+            # we are in a tag
+            # so find first ending tag
+            pend1 = textblock.find(b'/>', npos)
+            pend2 = textblock.find(b'</', npos)
+            if pend1 != -1 and pend2 != -1:
+                pend = min(pend1, pend2)
+            else:
+                pend = max(pend1, pend2)
+            if pend != -1:
+                npos = pend
+            else:
+                npos = pgt + 1
+        # find id and name attributes only inside of tags
+        # use a reverse tag search since that is faster
+        #    inside any < > pair find "id=" and "name=" attributes return it
+        #    [^>]* means match any amount of chars except for  '>' char
+        #    [^'"] match any amount of chars except for the quote character
+        #    \s* means match any amount of whitespace
+        textblock = textblock[0:npos]
+        id_pattern = re.compile(br'''<[^>]*\sid\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
+        name_pattern = re.compile(br'''<[^>]*\sname\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
+        for tag in reverse_tag_iter(textblock):
+            # any ids in the body should default to top of file
+            if tag[0:6] == b'<body ':
+                return b''
+            if tag[0:6] != b'<meta ':
+                m = id_pattern.match(tag) or name_pattern.match(tag)
+                if m is not None:
+                    return m.group(1)
+        return b''
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_k8resc.py b/src/epy_reader/tools/KindleUnpack/mobi_k8resc.py
new file mode 100644
index 0000000..1e58e84
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_k8resc.py
@@ -0,0 +1,271 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+DEBUG_USE_ORDERED_DICTIONARY = False  # OrderedDict is supoorted >= python 2.7.
+""" set to True to use OrderedDict for K8RESCProcessor.parsetag.tattr."""
+
+if DEBUG_USE_ORDERED_DICTIONARY:
+    from collections import OrderedDict as dict_
+else:
+    dict_ = dict
+
+from .compatibility_utils import unicode_str
+
+from .mobi_utils import fromBase32
+
+_OPF_PARENT_TAGS = ['xml', 'package', 'metadata', 'dc-metadata',
+                    'x-metadata', 'manifest', 'spine', 'tours', 'guide']
+
+class K8RESCProcessor(object):
+
+    def __init__(self, data, debug=False):
+        self._debug = debug
+        self.resc = None
+        self.opos = 0
+        self.extrameta = []
+        self.cover_name = None
+        self.spine_idrefs = {}
+        self.spine_order = []
+        self.spine_pageattributes = {}
+        self.spine_ppd = None
+        # need3 indicate the book has fields which require epub3.
+        # but the estimation of the source epub version from the fields is difficult.
+        self.need3 = False
+        self.package_ver = None
+        self.extra_metadata = []
+        self.refines_metadata = []
+        self.extra_attributes = []
+        # get header
+        start_pos = data.find(b'<')
+        self.resc_header = data[:start_pos]
+        # get resc data length
+        start = self.resc_header.find(b'=') + 1
+        end = self.resc_header.find(b'&', start)
+        resc_size = 0
+        if end > 0:
+            resc_size = fromBase32(self.resc_header[start:end])
+        resc_rawbytes = len(data) - start_pos
+        if resc_rawbytes == resc_size:
+            self.resc_length = resc_size
+        else:
+            # Most RESC has a nul string at its tail but some do not.
+            end_pos = data.find(b'\x00', start_pos)
+            if end_pos < 0:
+                self.resc_length = resc_rawbytes
+            else:
+                self.resc_length = end_pos - start_pos
+        if self.resc_length != resc_size:
+            print("Warning: RESC section length({:d}bytes) does not match its size({:d}bytes).".format(self.resc_length, resc_size))
+        # now parse RESC after converting it to unicode from utf-8
+        try:
+            self.resc = unicode_str(data[start_pos:start_pos+self.resc_length])
+        except UnicodeDecodeError:
+            self.resc = unicode_str(data[start_pos:start_pos+self.resc_length], enc='latin-1')
+        self.parseData()
+
+    def prepend_to_spine(self, key, idref, linear, properties):
+        self.spine_order = [key] + self.spine_order
+        self.spine_idrefs[key] = idref
+        attributes = {}
+        if linear is not None:
+            attributes['linear'] = linear
+        if properties is not None:
+            attributes['properties'] = properties
+        self.spine_pageattributes[key] = attributes
+
+    # RESC tag iterator
+    def resc_tag_iter(self):
+        tcontent = last_tattr = None
+        prefix = ['']
+        while True:
+            text, tag = self.parseresc()
+            if text is None and tag is None:
+                break
+            if text is not None:
+                tcontent = text.rstrip(' \r\n')
+            else:  # we have a tag
+                ttype, tname, tattr = self.parsetag(tag)
+                if ttype == 'begin':
+                    tcontent = None
+                    prefix.append(tname + '.')
+                    if tname in _OPF_PARENT_TAGS:
+                        yield ''.join(prefix), tname, tattr, tcontent
+                    else:
+                        last_tattr = tattr
+                else:  # single or end
+                    if ttype == 'end':
+                        prefix.pop()
+                        tattr = last_tattr
+                        last_tattr = None
+                        if tname in _OPF_PARENT_TAGS:
+                            tname += '-end'
+                    yield ''.join(prefix), tname, tattr, tcontent
+                    tcontent = None
+
+    # now parse the RESC to extract spine and extra metadata info
+    def parseData(self):
+        for prefix, tname, tattr, tcontent in self.resc_tag_iter():
+            if self._debug:
+                print("   Parsing RESC: ", prefix, tname, tattr, tcontent)
+            if tname == 'package':
+                self.package_ver = tattr.get('version', '2.0')
+                package_prefix = tattr.get('prefix','')
+                if self.package_ver.startswith('3') or package_prefix.startswith('rendition'):
+                    self.need3 = True
+            if tname == 'spine':
+                self.spine_ppd = tattr.get('page-progession-direction', None)
+                if self.spine_ppd is not None and self.spine_ppd == 'rtl':
+                    self.need3 = True
+            if tname == 'itemref':
+                skelid = tattr.pop('skelid', None)
+                if skelid is None and len(self.spine_order) == 0:
+                    # assume it was removed initial coverpage
+                    skelid = 'coverpage'
+                    tattr['linear'] = 'no'
+                self.spine_order.append(skelid)
+                idref = tattr.pop('idref', None)
+                if idref is not None:
+                    idref = 'x_' + idref
+                self.spine_idrefs[skelid] = idref
+                if 'id' in tattr:
+                    del tattr['id']
+                # tattr["id"] = 'x_' + tattr["id"]
+                if 'properties' in tattr:
+                    self.need3 = True
+                self.spine_pageattributes[skelid] = tattr
+            if tname == 'meta' or tname.startswith('dc:'):
+                if 'refines' in tattr or 'property' in tattr:
+                    self.need3 = True
+                if tattr.get('name','') == 'cover':
+                    cover_name = tattr.get('content',None)
+                    if cover_name is not None:
+                        cover_name = 'x_' + cover_name
+                    self.cover_name = cover_name
+                else:
+                    self.extrameta.append([tname, tattr, tcontent])
+
+    # parse and return either leading text or the next tag
+    def parseresc(self):
+        p = self.opos
+        if p >= len(self.resc):
+            return None, None
+        if self.resc[p] != '<':
+            res = self.resc.find('<',p)
+            if res == -1 :
+                res = len(self.resc)
+            self.opos = res
+            return self.resc[p:res], None
+        # handle comment as a special case
+        if self.resc[p:p+4] == '<!--':
+            te = self.resc.find('-->',p+1)
+            if te != -1:
+                te = te+2
+        else:
+            te = self.resc.find('>',p+1)
+            ntb = self.resc.find('<',p+1)
+            if ntb != -1 and ntb < te:
+                self.opos = ntb
+                return self.resc[p:ntb], None
+        self.opos = te + 1
+        return None, self.resc[p:te+1]
+
+    # parses tag to identify:  [tname, ttype, tattr]
+    #    tname: tag name
+    #    ttype: tag type ('begin', 'end' or 'single');
+    #    tattr: dictionary of tag atributes
+    def parsetag(self, s):
+        p = 1
+        tname = None
+        ttype = None
+        tattr = dict_()
+        while s[p:p+1] == ' ' :
+            p += 1
+        if s[p:p+1] == '/':
+            ttype = 'end'
+            p += 1
+            while s[p:p+1] == ' ' :
+                p += 1
+        b = p
+        while s[p:p+1] not in ('>', '/', ' ', '"', "'",'\r','\n') :
+            p += 1
+        tname=s[b:p].lower()
+        # some special cases
+        if tname == '?xml':
+            tname = 'xml'
+        if tname == '!--':
+            ttype = 'single'
+            comment = s[p:-3].strip()
+            tattr['comment'] = comment
+        if ttype is None:
+            # parse any attributes of begin or single tags
+            while s.find('=',p) != -1 :
+                while s[p:p+1] == ' ' :
+                    p += 1
+                b = p
+                while s[p:p+1] != '=' :
+                    p += 1
+                aname = s[b:p].lower()
+                aname = aname.rstrip(' ')
+                p += 1
+                while s[p:p+1] == ' ' :
+                    p += 1
+                if s[p:p+1] in ('"', "'") :
+                    p = p + 1
+                    b = p
+                    while s[p:p+1] not in ('"', "'"):
+                        p += 1
+                    val = s[b:p]
+                    p += 1
+                else :
+                    b = p
+                    while s[p:p+1] not in ('>', '/', ' ') :
+                        p += 1
+                    val = s[b:p]
+                tattr[aname] = val
+        if ttype is None:
+            ttype = 'begin'
+            if s.find('/',p) >= 0:
+                ttype = 'single'
+        return ttype, tname, tattr
+
+    def taginfo_toxml(self, taginfo):
+        res = []
+        tname, tattr, tcontent = taginfo
+        res.append('<' + tname)
+        if tattr is not None:
+            for key in tattr:
+                res.append(' ' + key + '="'+tattr[key]+'"')
+        if tcontent is not None:
+            res.append('>' + tcontent + '</' + tname + '>\n')
+        else:
+            res.append('/>\n')
+        return "".join(res)
+
+    def hasSpine(self):
+        return len(self.spine_order) > 0
+
+    def needEPUB3(self):
+        return self.need3
+
+    def hasRefines(self):
+        for [tname, tattr, tcontent] in self.extrameta:
+            if 'refines' in tattr:
+                return True
+        return False
+
+    def createMetadata(self, epubver):
+        for taginfo in self.extrameta:
+            tname, tattr, tcontent = taginfo
+            if 'refines' in tattr:
+                if epubver == 'F' and 'property' in tattr:
+                    attr = ' id="%s" opf:%s="%s"\n' % (tattr['refines'], tattr['property'], tcontent)
+                    self.extra_attributes.append(attr)
+                else:
+                    tag = self.taginfo_toxml(taginfo)
+                    self.refines_metadata.append(tag)
+            else:
+                tag = self.taginfo_toxml(taginfo)
+                self.extra_metadata.append(tag)
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_nav.py b/src/epy_reader/tools/KindleUnpack/mobi_nav.py
new file mode 100644
index 0000000..16fb0be
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_nav.py
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import unicode_str
+import os
+from .unipath import pathof
+
+import re
+# note: re requites the pattern to be the exact same type as the data to be searched in python3
+# but u"" is not allowed for the pattern itself only b""
+
+DEBUG_NAV = False
+
+FORCE_DEFAULT_TITLE = False
+""" Set to True to force to use the default title. """
+
+NAVIGATION_FINENAME = 'nav.xhtml'
+""" The name for the navigation document. """
+
+DEFAULT_TITLE = 'Navigation'
+""" The default title for the navigation document. """
+
+class NAVProcessor(object):
+
+    def __init__(self, files):
+        self.files = files
+        self.navname = NAVIGATION_FINENAME
+
+    def buildLandmarks(self, guidetext):
+        header = ''
+        header += '  <nav epub:type="landmarks" id="landmarks" hidden="">\n'
+        header += '    <h2>Guide</h2>\n'
+        header += '    <ol>\n'
+        element = '      <li><a epub:type="{:s}" href="{:s}">{:s}</a></li>\n'
+        footer = ''
+        footer += '    </ol>\n'
+        footer += '  </nav>\n'
+
+        type_map = {
+            'cover' : 'cover',
+            'title-page' : 'title-page',
+            # ?: 'frontmatter',
+            'text' : 'bodymatter',
+            # ?: 'backmatter',
+            'toc' : 'toc',
+            'loi' : 'loi',
+            'lot' : 'lot',
+            'preface' : 'preface',
+            'bibliography' : 'bibliography',
+            'index' : 'index',
+            'glossary' : 'glossary',
+            'acknowledgements' : 'acknowledgements',
+            'colophon' : None,
+            'copyright-page' : None,
+            'dedication' : None,
+            'epigraph' : None,
+            'foreword' : None,
+            'notes' : None
+            }
+
+        re_type = re.compile(r'\s+type\s*=\s*"(.*?)"', re.I)
+        re_title = re.compile(r'\s+title\s*=\s*"(.*?)"', re.I)
+        re_link = re.compile(r'\s+href\s*=\s*"(.*?)"', re.I)
+        dir_ = os.path.relpath(self.files.k8text, self.files.k8oebps).replace('\\', '/')
+
+        data = ''
+        references = re.findall(r'<reference\s+.*?>', unicode_str(guidetext), re.I)
+        for reference in references:
+            mo_type = re_type.search(reference)
+            mo_title = re_title.search(reference)
+            mo_link = re_link.search(reference)
+            if mo_type is not None:
+                type_ = type_map.get(mo_type.group(1), None)
+            else:
+                type_ = None
+            if mo_title is not None:
+                title = mo_title.group(1)
+            else:
+                title = None
+            if mo_link is not None:
+                link = mo_link.group(1)
+            else:
+                link = None
+
+            if type_ is not None and title is not None and link is not None:
+                link = os.path.relpath(link, dir_).replace('\\', '/')
+                data += element.format(type_, link, title)
+        if len(data) > 0:
+            return header + data + footer
+        else:
+            return ''
+
+    def buildTOC(self, indx_data):
+        header = ''
+        header += '  <nav epub:type="toc" id="toc">\n'
+        header += '    <h1>Table of contents</h1>\n'
+        footer = '  </nav>\n'
+
+        # recursive part
+        def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
+            if start>len(indx_data) or end>len(indx_data):
+                print("Warning (in buildTOC): missing INDX child entries", start, end, len(indx_data))
+                return ''
+            if DEBUG_NAV:
+                print("recursINDX (in buildTOC) lvl %d from %d to %d" % (lvl, start, end))
+            xhtml = ''
+            if start <= 0:
+                start = 0
+            if end <= 0:
+                end = len(indx_data)
+            if lvl > max_lvl:
+                max_lvl = lvl
+
+            indent1 = '  ' * (2 + lvl * 2)
+            indent2 = '  ' * (3 + lvl * 2)
+            xhtml += indent1 + '<ol>\n'
+            for i in range(start, end):
+                e = indx_data[i]
+                htmlfile = e['filename']
+                desttag = e['idtag']
+                text = e['text']
+                if not e['hlvl'] == lvl:
+                    continue
+                num += 1
+                if desttag == '':
+                    link = htmlfile
+                else:
+                    link = '{:s}#{:s}'.format(htmlfile, desttag)
+                xhtml += indent2 + '<li>'
+                entry = '<a href="{:}">{:s}</a>'.format(link, text)
+                xhtml += entry
+                # recurs
+                if e['child1'] >= 0:
+                    xhtml += '\n'
+                    xhtmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
+                            e['child1'], e['childn'] + 1)
+                    xhtml += xhtmlrec
+                    xhtml += indent2
+                # close entry
+                xhtml += '</li>\n'
+            xhtml += indent1 + '</ol>\n'
+            return xhtml, max_lvl, num
+
+        data, max_lvl, num = recursINDX()
+        if not len(indx_data) == num:
+            print("Warning (in buildTOC): different number of entries in NCX", len(indx_data), num)
+        return header + data + footer
+
+    def buildNAV(self, ncx_data, guidetext, title, lang):
+        print("Building Navigation Document.")
+        if FORCE_DEFAULT_TITLE:
+            title = DEFAULT_TITLE
+        nav_header = ''
+        nav_header += '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>'
+        nav_header += '<html xmlns="http://www.w3.org/1999/xhtml"'
+        nav_header += ' xmlns:epub="http://www.idpf.org/2007/ops"'
+        nav_header += ' lang="{0:s}" xml:lang="{0:s}">\n'.format(lang)
+        nav_header += '<head>\n<title>{:s}</title>\n'.format(title)
+        nav_header += '<meta charset="UTF-8" />\n'
+        nav_header += '<style type="text/css">\n'
+        nav_header += 'nav#landmarks { display:none; }\n'
+        nav_header += 'ol { list-style-type: none; }'
+        nav_header += '</style>\n</head>\n<body>\n'
+        nav_footer = '</body>\n</html>\n'
+
+        landmarks =  self.buildLandmarks(guidetext)
+        toc = self.buildTOC(ncx_data)
+
+        data = nav_header
+        data += landmarks
+        data += toc
+        data += nav_footer
+        return data
+
+    def getNAVName(self):
+        return self.navname
+
+    def writeNAV(self, ncx_data, guidetext, metadata):
+        # build the xhtml
+        # print("Write Navigation Document.")
+        xhtml = self.buildNAV(ncx_data, guidetext, metadata.get('Title')[0], metadata.get('Language')[0])
+        fname = os.path.join(self.files.k8text, self.navname)
+        with open(pathof(fname), 'wb') as f:
+            f.write(xhtml.encode('utf-8'))
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_ncx.py b/src/epy_reader/tools/KindleUnpack/mobi_ncx.py
new file mode 100644
index 0000000..60ef9a0
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_ncx.py
@@ -0,0 +1,275 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import os
+from .unipath import pathof
+from .compatibility_utils import unescapeit
+
+
+import re
+# note: re requites the pattern to be the exact same type as the data to be searched in python3
+# but u"" is not allowed for the pattern itself only b""
+
+from xml.sax.saxutils import escape as xmlescape
+
+from .mobi_utils import toBase32
+from .mobi_index import MobiIndex
+
+DEBUG_NCX = False
+
+class ncxExtract:
+
+    def __init__(self, mh, files):
+        self.mh = mh
+        self.sect = self.mh.sect
+        self.files = files
+        self.isNCX = False
+        self.mi = MobiIndex(self.sect)
+        self.ncxidx = self.mh.ncxidx
+        self.indx_data = None
+
+    def parseNCX(self):
+        indx_data = []
+        tag_fieldname_map = {
+                1: ['pos',0],
+                2: ['len',0],
+                3: ['noffs',0],
+                4: ['hlvl',0],
+                5: ['koffs',0],
+                6: ['pos_fid',0],
+                21: ['parent',0],
+                22: ['child1',0],
+                23: ['childn',0]
+        }
+        if self.ncxidx != 0xffffffff:
+            outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx, "NCX")
+            if DEBUG_NCX:
+                print(ctoc_text)
+                print(outtbl)
+            num = 0
+            for [text, tagMap] in outtbl:
+                tmp = {
+                        'name': text.decode('utf-8'),
+                        'pos':  -1,
+                        'len':  0,
+                        'noffs': -1,
+                        'text' : "Unknown Text",
+                        'hlvl' : -1,
+                        'kind' : "Unknown Kind",
+                        'pos_fid' : None,
+                        'parent' : -1,
+                        'child1' : -1,
+                        'childn' : -1,
+                        'num'  : num
+                        }
+                for tag in tag_fieldname_map:
+                    [fieldname, i] = tag_fieldname_map[tag]
+                    if tag in tagMap:
+                        fieldvalue = tagMap[tag][i]
+                        if tag == 6:
+                            pos_fid = toBase32(fieldvalue,4).decode('utf-8')
+                            fieldvalue2 = tagMap[tag][i+1]
+                            pos_off = toBase32(fieldvalue2,10).decode('utf-8')
+                            fieldvalue = 'kindle:pos:fid:%s:off:%s' % (pos_fid, pos_off)
+                        tmp[fieldname] = fieldvalue
+                        if tag == 3:
+                            toctext = ctoc_text.get(fieldvalue, 'Unknown Text')
+                            toctext = toctext.decode(self.mh.codec)
+                            tmp['text'] = toctext
+                        if tag == 5:
+                            kindtext = ctoc_text.get(fieldvalue, 'Unknown Kind')
+                            kindtext = kindtext.decode(self.mh.codec)
+                            tmp['kind'] = kindtext
+                indx_data.append(tmp)
+                if DEBUG_NCX:
+                    print("record number: ", num)
+                    print("name: ", tmp['name'],)
+                    print("position", tmp['pos']," length: ", tmp['len'])
+                    print("text: ", tmp['text'])
+                    print("kind: ", tmp['kind'])
+                    print("heading level: ", tmp['hlvl'])
+                    print("parent:", tmp['parent'])
+                    print("first child: ",tmp['child1']," last child: ", tmp['childn'])
+                    print("pos_fid is ", tmp['pos_fid'])
+                    print("\n\n")
+                num += 1
+        self.indx_data = indx_data
+        return indx_data
+
+    def buildNCX(self, htmlfile, title, ident, lang):
+        indx_data = self.indx_data
+
+        ncx_header = \
+'''<?xml version='1.0' encoding='utf-8'?>
+<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%s">
+<head>
+<meta content="%s" name="dtb:uid"/>
+<meta content="%d" name="dtb:depth"/>
+<meta content="mobiunpack.py" name="dtb:generator"/>
+<meta content="0" name="dtb:totalPageCount"/>
+<meta content="0" name="dtb:maxPageNumber"/>
+</head>
+<docTitle>
+<text>%s</text>
+</docTitle>
+<navMap>
+'''
+
+        ncx_footer = \
+'''  </navMap>
+</ncx>
+'''
+
+        ncx_entry = \
+'''<navPoint id="%s" playOrder="%d">
+<navLabel>
+<text>%s</text>
+</navLabel>
+<content src="%s"/>'''
+
+        # recursive part
+        def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
+            if start>len(indx_data) or end>len(indx_data):
+                print("Warning: missing INDX child entries", start, end, len(indx_data))
+                return ''
+            if DEBUG_NCX:
+                print("recursINDX lvl %d from %d to %d" % (lvl, start, end))
+            xml = ''
+            if start <= 0:
+                start = 0
+            if end <= 0:
+                end = len(indx_data)
+            if lvl > max_lvl:
+                max_lvl = lvl
+            indent = '  ' * (2 + lvl)
+
+            for i in range(start, end):
+                e = indx_data[i]
+                if not e['hlvl'] == lvl:
+                    continue
+                # open entry
+                num += 1
+                link = '%s#filepos%d' % (htmlfile, e['pos'])
+                tagid = 'np_%d' % num
+                entry = ncx_entry % (tagid, num, xmlescape(unescapeit(e['text'])), link)
+                entry = re.sub(re.compile('^', re.M), indent, entry, 0)
+                xml += entry + '\n'
+                # recurs
+                if e['child1']>=0:
+                    xmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
+                            e['child1'], e['childn'] + 1)
+                    xml += xmlrec
+                # close entry
+                xml += indent + '</navPoint>\n'
+            return xml, max_lvl, num
+
+        body, max_lvl, num = recursINDX()
+        header = ncx_header % (lang, ident, max_lvl + 1, title)
+        ncx =  header + body + ncx_footer
+        if not len(indx_data) == num:
+            print("Warning: different number of entries in NCX", len(indx_data), num)
+        return ncx
+
+    def writeNCX(self, metadata):
+        # build the xml
+        self.isNCX = True
+        print("Write ncx")
+        # htmlname = os.path.basename(self.files.outbase)
+        # htmlname += '.html'
+        htmlname = 'book.html'
+        xml = self.buildNCX(htmlname, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
+        # write the ncx file
+        # ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx')
+        ncxname = os.path.join(self.files.mobi7dir, 'toc.ncx')
+        with open(pathof(ncxname), 'wb') as f:
+            f.write(xml.encode('utf-8'))
+
+    def buildK8NCX(self, indx_data, title, ident, lang):
+        ncx_header = \
+'''<?xml version='1.0' encoding='utf-8'?>
+<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%s">
+<head>
+<meta content="%s" name="dtb:uid"/>
+<meta content="%d" name="dtb:depth"/>
+<meta content="mobiunpack.py" name="dtb:generator"/>
+<meta content="0" name="dtb:totalPageCount"/>
+<meta content="0" name="dtb:maxPageNumber"/>
+</head>
+<docTitle>
+<text>%s</text>
+</docTitle>
+<navMap>
+'''
+
+        ncx_footer = \
+'''  </navMap>
+</ncx>
+'''
+
+        ncx_entry = \
+'''<navPoint id="%s" playOrder="%d">
+<navLabel>
+<text>%s</text>
+</navLabel>
+<content src="%s"/>'''
+
+        # recursive part
+        def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
+            if start>len(indx_data) or end>len(indx_data):
+                print("Warning: missing INDX child entries", start, end, len(indx_data))
+                return ''
+            if DEBUG_NCX:
+                print("recursINDX lvl %d from %d to %d" % (lvl, start, end))
+            xml = ''
+            if start <= 0:
+                start = 0
+            if end <= 0:
+                end = len(indx_data)
+            if lvl > max_lvl:
+                max_lvl = lvl
+            indent = '  ' * (2 + lvl)
+
+            for i in range(start, end):
+                e = indx_data[i]
+                htmlfile = e['filename']
+                desttag = e['idtag']
+                if not e['hlvl'] == lvl:
+                    continue
+                # open entry
+                num += 1
+                if desttag == '':
+                    link = 'Text/%s' % htmlfile
+                else:
+                    link = 'Text/%s#%s' % (htmlfile, desttag)
+                tagid = 'np_%d' % num
+                entry = ncx_entry % (tagid, num, xmlescape(unescapeit(e['text'])), link)
+                entry = re.sub(re.compile('^', re.M), indent, entry, 0)
+                xml += entry + '\n'
+                # recurs
+                if e['child1']>=0:
+                    xmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
+                            e['child1'], e['childn'] + 1)
+                    xml += xmlrec
+                # close entry
+                xml += indent + '</navPoint>\n'
+            return xml, max_lvl, num
+
+        body, max_lvl, num = recursINDX()
+        header = ncx_header % (lang, ident, max_lvl + 1, title)
+        ncx =  header + body + ncx_footer
+        if not len(indx_data) == num:
+            print("Warning: different number of entries in NCX", len(indx_data), num)
+        return ncx
+
+    def writeK8NCX(self, ncx_data, metadata):
+        # build the xml
+        self.isNCX = True
+        print("Write K8 ncx")
+        xml = self.buildK8NCX(ncx_data, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
+        bname = 'toc.ncx'
+        ncxname = os.path.join(self.files.k8oebps,bname)
+        with open(pathof(ncxname), 'wb') as f:
+            f.write(xml.encode('utf-8'))
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_opf.py b/src/epy_reader/tools/KindleUnpack/mobi_opf.py
new file mode 100644
index 0000000..742d776
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_opf.py
@@ -0,0 +1,686 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import unicode_str, unescapeit
+from .compatibility_utils import lzip
+
+from .unipath import pathof
+
+from xml.sax.saxutils import escape as xmlescape
+
+import os
+import uuid
+from datetime import datetime
+
+# In EPUB3, NCX and <guide> MAY exist in OPF, although the NCX is superseded
+# by the Navigation Document and the <guide> is deprecated. Currently, EPUB3_WITH_NCX
+# and EPUB3_WITH_GUIDE are set to True due to compatibility with epub2 reading systems.
+# They might be change to set to False in the future.
+
+EPUB3_WITH_NCX = True  # Do not set to False except for debug.
+""" Set to True to create a toc.ncx when converting to epub3. """
+
+EPUB3_WITH_GUIDE = True  # Do not set to False except for debug.
+""" Set to True to create a guide element in an opf when converting to epub3. """
+
+EPUB_OPF = 'content.opf'
+""" The name for the OPF of EPUB. """
+
+TOC_NCX = 'toc.ncx'
+""" The name for the TOC of EPUB2. """
+
+NAVIGATION_DOCUMENT = 'nav.xhtml'
+""" The name for the navigation document of EPUB3. """
+
+BEGIN_INFO_ONLY = '<!-- BEGIN INFORMATION ONLY '
+""" The comment to indicate the beginning of metadata which will be ignored by kindlegen. """
+
+END_INFO_ONLY = 'END INFORMATION ONLY -->'
+""" The comment to indicate the end of metadata which will be ignored by kindlegen. """
+
+EXTH_TITLE_FURIGANA = 'Title-Pronunciation'
+""" The name for Title Furigana(similar to file-as) set by KDP. """
+
+EXTH_CREATOR_FURIGANA = 'Author-Pronunciation'
+""" The name for Creator Furigana(similar to file-as) set by KDP. """
+
+EXTH_PUBLISHER_FURIGANA = 'Publisher-Pronunciation'
+""" The name for Publisher Furigana(similar to file-as) set by KDP. """
+
+EXTRA_ENTITIES = {'"': '&quot;', "'": "&apos;"}
+
+class OPFProcessor(object):
+
+    def __init__(self, files, metadata, fileinfo, rscnames, hasNCX, mh, usedmap, pagemapxml='', guidetext='', k8resc=None, epubver='2'):
+        self.files = files
+        self.metadata = metadata
+        self.fileinfo = fileinfo
+        self.rscnames = rscnames
+        self.has_ncx = hasNCX
+        self.codec = mh.codec
+        self.isK8 = mh.isK8()
+        self.printReplica = mh.isPrintReplica()
+        self.guidetext = unicode_str(guidetext)
+        self.used = usedmap
+        self.k8resc = k8resc
+        self.covername = None
+        self.cover_id = 'cover_img'
+        if self.k8resc is not None and self.k8resc.cover_name is not None:
+            # update cover id info from RESC if available
+            self.cover_id = self.k8resc.cover_name
+        # Create a unique urn uuid
+        self.BookId = unicode_str(str(uuid.uuid4()))
+        self.pagemap = pagemapxml
+
+        self.ncxname = None
+        self.navname = None
+
+        # page-progression-direction is only set in spine
+        self.page_progression_direction = metadata.pop('page-progression-direction', [None])[0]
+        if 'rl' in metadata.get('primary-writing-mode', [''])[0]:
+            self.page_progression_direction = 'rtl'
+        self.epubver = epubver  # the epub version set by user
+        self.target_epubver = epubver  # the epub vertion set by user or detected automatically
+        if self.epubver == 'A':
+            self.target_epubver = self.autodetectEPUBVersion()
+        elif self.epubver == 'F':
+            self.target_epubver = '2'
+        elif self.epubver != '2' and self.epubver != '3':
+            self.target_epubver = '2'
+
+        # id for rifine attributes
+        self.title_id = {}
+        self.creator_id = {}
+        self.publisher_id = {}
+        # extra attributes
+        self.title_attrib = {}
+        self.creator_attrib = {}
+        self.publisher_attrib = {}
+        self.extra_attributes = []  # for force epub2 option
+        # Create epub3 metadata from EXTH.
+        self.exth_solved_refines_metadata = []
+        self.exth_refines_metadata = []
+        self.exth_fixedlayout_metadata = []
+
+        self.defineRefinesID()
+        self.processRefinesMetadata()
+        if self.k8resc is not None:
+            # Create metadata in RESC section.
+            self.k8resc.createMetadata(epubver)
+        if self.target_epubver == "3":
+            self.createMetadataForFixedlayout()
+
+    def escapeit(self, sval, EXTRAS=None):
+        # note, xmlescape and unescape do not work with utf-8 bytestrings
+        sval = unicode_str(sval)
+        if EXTRAS:
+            res = xmlescape(unescapeit(sval), EXTRAS)
+        else:
+            res = xmlescape(unescapeit(sval))
+        return res
+
+    def createMetaTag(self, data, property, content, refid=''):
+        refines = ''
+        if refid:
+            refines = ' refines="#%s"' % refid
+        data.append('<meta property="%s"%s>%s</meta>\n' % (property, refines, content))
+
+    def buildOPFMetadata(self, start_tag, has_obfuscated_fonts=False):
+        # convert from EXTH metadata format to target epub version metadata
+        # epub 3 will ignore <meta name="xxxx" content="yyyy" /> style metatags
+        #    but allows them to be present for backwards compatibility
+        #    instead the new format is
+        #    <meta property="xxxx" id="iiii" ... > property_value</meta>
+        #       and DCMES elements such as:
+        #    <dc:blah id="iiii">value</dc:blah>
+
+        metadata = self.metadata
+        k8resc = self.k8resc
+
+        META_TAGS = ['Drm Server Id', 'Drm Commerce Id', 'Drm Ebookbase Book Id', 'ASIN', 'ThumbOffset', 'Fake Cover',
+                                                'Creator Software', 'Creator Major Version', 'Creator Minor Version', 'Creator Build Number',
+                                                'Watermark', 'Clipping Limit', 'Publisher Limit', 'Text to Speech Disabled', 'CDE Type',
+                                                'Updated Title', 'Font Signature (hex)', 'Tamper Proof Keys (hex)',]
+
+        # def handleTag(data, metadata, key, tag, ids={}):
+        def handleTag(data, metadata, key, tag, attrib={}):
+            '''Format metadata values.
+
+            @param data: List of formatted metadata entries.
+            @param metadata: The metadata dictionary.
+            @param key: The key of the metadata value to handle.
+            @param tag: The opf tag corresponds to the metadata value.
+            ###@param ids: The ids in tags for refines property of epub3.
+            @param attrib: The extra attibute for refines or opf prefixs.
+           '''
+            if key in metadata:
+                for i, value in enumerate(metadata[key]):
+                    closingTag = tag.split(" ")[0]
+                    res = '<%s%s>%s</%s>\n' % (tag, attrib.get(i, ''), self.escapeit(value), closingTag)
+                    data.append(res)
+                del metadata[key]
+
+        # these are allowed but ignored by epub3
+        def handleMetaPairs(data, metadata, key, name):
+            if key in metadata:
+                for value in metadata[key]:
+                    res = '<meta name="%s" content="%s" />\n' % (name, self.escapeit(value, EXTRA_ENTITIES))
+                    data.append(res)
+                del metadata[key]
+
+        data = []
+        data.append(start_tag + '\n')
+        # Handle standard metadata
+        if 'Title' in metadata:
+            handleTag(data, metadata, 'Title', 'dc:title', self.title_attrib)
+        else:
+            data.append('<dc:title>Untitled</dc:title>\n')
+        handleTag(data, metadata, 'Language', 'dc:language')
+        if 'UniqueID' in metadata:
+            handleTag(data, metadata, 'UniqueID', 'dc:identifier id="uid"')
+        else:
+            # No unique ID in original, give it a generic one.
+            data.append('<dc:identifier id="uid">0</dc:identifier>\n')
+
+        if self.target_epubver == '3':
+            # epub version 3 minimal metadata requires a dcterms:modifed date tag
+            self.createMetaTag(data, 'dcterms:modified', datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))
+
+        if self.isK8 and has_obfuscated_fonts:
+            # Use the random generated urn:uuid so obuscated fonts work.
+            # It doesn't need to be _THE_ unique identifier to work as a key
+            # for obfuscated fonts in Sigil, ADE and calibre. Its just has
+            # to use the opf:scheme="UUID" and have the urn:uuid: prefix.
+            if self.target_epubver == '3':
+                data.append('<dc:identifier>urn:uuid:'+self.BookId+'</dc:identifier>\n')
+            else:
+                data.append('<dc:identifier opf:scheme="UUID">urn:uuid:'+self.BookId+'</dc:identifier>\n')
+
+        handleTag(data, metadata, 'Creator', 'dc:creator', self.creator_attrib)
+        handleTag(data, metadata, 'Contributor', 'dc:contributor')
+        handleTag(data, metadata, 'Publisher', 'dc:publisher', self.publisher_attrib)
+        handleTag(data, metadata, 'Source', 'dc:source')
+        handleTag(data, metadata, 'Type', 'dc:type')
+        if self.target_epubver == '3':
+            if 'ISBN' in metadata:
+                for i, value in enumerate(metadata['ISBN']):
+                    res = '<dc:identifier>urn:isbn:%s</dc:identifier>\n' % self.escapeit(value)
+                    data.append(res)
+        else:
+            handleTag(data, metadata, 'ISBN', 'dc:identifier opf:scheme="ISBN"')
+        if 'Subject' in metadata:
+            if 'SubjectCode' in metadata:
+                codeList = metadata['SubjectCode']
+                del metadata['SubjectCode']
+            else:
+                codeList = None
+            for i in range(len(metadata['Subject'])):
+                if codeList and i < len(codeList):
+                    data.append('<dc:subject BASICCode="'+codeList[i]+'">')
+                else:
+                    data.append('<dc:subject>')
+                data.append(self.escapeit(metadata['Subject'][i])+'</dc:subject>\n')
+            del metadata['Subject']
+        handleTag(data, metadata, 'Description', 'dc:description')
+        if self.target_epubver == '3':
+            if 'Published' in metadata:
+                for i, value in enumerate(metadata['Published']):
+                    res = '<dc:date>%s</dc:date>\n' % self.escapeit(value)
+                    data.append(res)
+        else:
+            handleTag(data, metadata, 'Published', 'dc:date opf:event="publication"')
+        handleTag(data, metadata, 'Rights', 'dc:rights')
+
+        if self.epubver == 'F':
+            if self.extra_attributes or k8resc is not None and k8resc.extra_attributes:
+                data.append('<!-- THE FOLLOWINGS ARE REQUIRED TO INSERT INTO <dc:xxx> MANUALLY\n')
+                if self.extra_attributes:
+                    data += self.extra_attributes
+                if k8resc is not None and k8resc.extra_attributes:
+                    data += k8resc.extra_attributes
+                data.append('-->\n')
+        else:
+            # Append refines metadata.
+            if self.exth_solved_refines_metadata:
+                data.append('<!-- Refines MetaData from EXTH -->\n')
+                data += self.exth_solved_refines_metadata
+            if self.exth_refines_metadata or k8resc is not None and k8resc.refines_metadata:
+                data.append('<!-- THE FOLLOWINGS ARE REQUIRED TO EDIT IDS MANUALLY\n')
+                if self.exth_refines_metadata:
+                    data += self.exth_refines_metadata
+                if k8resc is not None and k8resc.refines_metadata:
+                    data += k8resc.refines_metadata
+                data.append('-->\n')
+
+        # Append metadata in RESC section.
+        if k8resc is not None and k8resc.extra_metadata:
+            data.append('<!-- Extra MetaData from RESC\n')
+            data += k8resc.extra_metadata
+            data.append('-->\n')
+
+        if 'CoverOffset' in metadata:
+            imageNumber = int(metadata['CoverOffset'][0])
+            self.covername = self.rscnames[imageNumber]
+            if self.covername is None:
+                print("Error: Cover image %s was not recognized as a valid image" % imageNumber)
+            else:
+                # <meta name="cover"> is obsoleted in EPUB3, but kindlegen v2.9 requires it.
+                data.append('<meta name="cover" content="' + self.cover_id + '" />\n')
+                self.used[self.covername] = 'used'
+            del metadata['CoverOffset']
+
+        handleMetaPairs(data, metadata, 'Codec', 'output encoding')
+        # handle kindlegen specifc tags
+        handleTag(data, metadata, 'DictInLanguage', 'DictionaryInLanguage')
+        handleTag(data, metadata, 'DictOutLanguage', 'DictionaryOutLanguage')
+        handleMetaPairs(data, metadata, 'RegionMagnification', 'RegionMagnification')
+        handleMetaPairs(data, metadata, 'book-type', 'book-type')
+        handleMetaPairs(data, metadata, 'zero-gutter', 'zero-gutter')
+        handleMetaPairs(data, metadata, 'zero-margin', 'zero-margin')
+        handleMetaPairs(data, metadata, 'primary-writing-mode', 'primary-writing-mode')
+        handleMetaPairs(data, metadata, 'fixed-layout', 'fixed-layout')
+        handleMetaPairs(data, metadata, 'orientation-lock', 'orientation-lock')
+        handleMetaPairs(data, metadata, 'original-resolution', 'original-resolution')
+
+        # these are not allowed in epub2 or 3 so convert them to meta name content pairs
+        # perhaps these could better be mapped into the dcterms namespace instead
+        handleMetaPairs(data, metadata, 'Review', 'review')
+        handleMetaPairs(data, metadata, 'Imprint', 'imprint')
+        handleMetaPairs(data, metadata, 'Adult', 'adult')
+        handleMetaPairs(data, metadata, 'DictShortName', 'DictionaryVeryShortName')
+
+        # these are needed by kobo books upon submission but not sure if legal metadata in epub2 or epub3
+        if 'Price' in metadata and 'Currency' in metadata:
+            priceList = metadata['Price']
+            currencyList = metadata['Currency']
+            if len(priceList) != len(currencyList):
+                print("Error: found %s price entries, but %s currency entries.")
+            else:
+                for i in range(len(priceList)):
+                    data.append('<SRP Currency="'+currencyList[i]+'">'+priceList[i]+'</SRP>\n')
+            del metadata['Price']
+            del metadata['Currency']
+
+        if self.target_epubver == '3':
+            # Append metadata for EPUB3.
+            if self.exth_fixedlayout_metadata:
+                data.append('<!-- EPUB3 MedaData converted from EXTH -->\n')
+                data += self.exth_fixedlayout_metadata
+
+        # all that remains is extra EXTH info we will store inside a comment inside meta name/content pairs
+        # so it can not impact anything and will be automatically stripped out if found again in a RESC section
+        data.append(BEGIN_INFO_ONLY + '\n')
+        if 'ThumbOffset' in metadata:
+            imageNumber = int(metadata['ThumbOffset'][0])
+            # Some bad books give image indexes that are 'out of range'
+            try:
+                imageName = self.rscnames[imageNumber]
+            except:
+                print('Number given for Cover Thumbnail is out of range: %s' % imageNumber)
+                imageName = None
+            if imageName is None:
+                print("Error: Cover Thumbnail image %s was not recognized as a valid image" % imageNumber)
+            else:
+                data.append('<meta name="Cover ThumbNail Image" content="'+ 'Images/'+imageName+'" />\n')
+                # self.used[imageName] = 'used' # thumbnail image is always generated by Kindlegen, so don't include in manifest
+                self.used[imageName] = 'not used'
+            del metadata['ThumbOffset']
+        for metaName in META_TAGS:
+            if metaName in metadata:
+                for value in metadata[metaName]:
+                    data.append('<meta name="'+metaName+'" content="'+self.escapeit(value, EXTRA_ENTITIES)+'" />\n')
+                del metadata[metaName]
+        for key in list(metadata.keys()):
+            for value in metadata[key]:
+                data.append('<meta name="'+key+'" content="'+self.escapeit(value, EXTRA_ENTITIES)+'" />\n')
+            del metadata[key]
+        data.append(END_INFO_ONLY + '\n')
+        data.append('</metadata>\n')
+        return data
+
+    def buildOPFManifest(self, ncxname, navname=None):
+        # buildManifest for mobi7, azw4, epub2 and epub3.
+        k8resc = self.k8resc
+        cover_id = self.cover_id
+        hasK8RescSpine = k8resc is not None and k8resc.hasSpine()
+        self.ncxname = ncxname
+        self.navname = navname
+
+        data = []
+        data.append('<manifest>\n')
+        media_map = {
+                '.jpg'  : 'image/jpeg',
+                '.jpeg' : 'image/jpeg',
+                '.png'  : 'image/png',
+                '.gif'  : 'image/gif',
+                '.svg'  : 'image/svg+xml',
+                '.xhtml': 'application/xhtml+xml',
+                '.html' : 'text/html',                   # for mobi7
+                '.pdf'  : 'application/pdf',             # for azw4(print replica textbook)
+                '.ttf'  : 'application/x-font-ttf',
+                '.otf'  : 'application/x-font-opentype',  # replaced?
+                '.css'  : 'text/css',
+                # '.html' : 'text/x-oeb1-document',        # for mobi7
+                # '.otf'  : 'application/vnd.ms-opentype', # [OpenType] OpenType fonts
+                # '.woff' : 'application/font-woff',       # [WOFF] WOFF fonts
+                # '.smil' : 'application/smil+xml',        # [MediaOverlays301] EPUB Media Overlay documents
+                # '.pls'  : 'application/pls+xml',         # [PLS] Text-to-Speech (TTS) Pronunciation lexicons
+                # '.mp3'  : 'audio/mpeg',
+                # '.mp4'  : 'video/mp4',
+                # '.js'   : 'text/javascript',             # not supported in K8
+                }
+        spinerefs = []
+
+        idcnt = 0
+        for [key,dir,fname] in self.fileinfo:
+            name, ext = os.path.splitext(fname)
+            ext = ext.lower()
+            media = media_map.get(ext)
+            ref = "item%d" % idcnt
+            if hasK8RescSpine:
+                if key is not None and key in k8resc.spine_idrefs:
+                    ref = k8resc.spine_idrefs[key]
+            properties = ''
+            if dir != '':
+                fpath = dir + '/' + fname
+            else:
+                fpath = fname
+            data.append('<item id="{0:}" media-type="{1:}" href="{2:}" {3:}/>\n'.format(ref, media, fpath, properties))
+
+            if ext in ['.xhtml', '.html']:
+                spinerefs.append(ref)
+            idcnt += 1
+
+        for fname in self.rscnames:
+            if fname is not None:
+                if self.used.get(fname,'not used') == 'not used':
+                    continue
+                name, ext = os.path.splitext(fname)
+                ext = ext.lower()
+                media = media_map.get(ext,ext[1:])
+                properties = ''
+                if fname == self.covername:
+                    ref = cover_id
+                    if self.target_epubver == '3':
+                        properties = 'properties="cover-image"'
+                else:
+                    ref = "item%d" % idcnt
+                if ext == '.ttf' or ext == '.otf':
+                    if self.isK8:  # fonts are only used in Mobi 8
+                        fpath = 'Fonts/' + fname
+                        data.append('<item id="{0:}" media-type="{1:}" href="{2:}" {3:}/>\n'.format(ref, media, fpath, properties))
+                else:
+                    fpath = 'Images/' + fname
+                    data.append('<item id="{0:}" media-type="{1:}" href="{2:}" {3:}/>\n'.format(ref, media, fpath, properties))
+                idcnt += 1
+
+        if self.target_epubver == '3' and navname is not None:
+            data.append('<item id="nav" media-type="application/xhtml+xml" href="Text/' + navname + '" properties="nav"/>\n')
+        if self.has_ncx and ncxname is not None:
+            data.append('<item id="ncx" media-type="application/x-dtbncx+xml" href="' + ncxname +'" />\n')
+        if self.pagemap != '':
+            data.append('<item id="map" media-type="application/oebs-page-map+xml" href="page-map.xml" />\n')
+        data.append('</manifest>\n')
+        return [data, spinerefs]
+
+    def buildOPFSpine(self, spinerefs, isNCX):
+        # build spine
+        k8resc = self.k8resc
+        hasK8RescSpine = k8resc is not None and k8resc.hasSpine()
+        data = []
+        ppd = ''
+        if self.isK8 and self.page_progression_direction is not None:
+            ppd = ' page-progression-direction="{:s}"'.format(self.page_progression_direction)
+        ncx = ''
+        if isNCX:
+            ncx = ' toc="ncx"'
+        map=''
+        if self.pagemap != '':
+            map = ' page-map="map"'
+        if self.epubver == 'F':
+            if ppd:
+                ppd = '<!--' + ppd + ' -->'
+            spine_start_tag = '<spine{1:s}{2:s}>{0:s}\n'.format(ppd, map, ncx)
+        else:
+            spine_start_tag = '<spine{0:s}{1:s}{2:s}>\n'.format(ppd, map, ncx)
+        data.append(spine_start_tag)
+
+        if hasK8RescSpine:
+            for key in k8resc.spine_order:
+                idref = k8resc.spine_idrefs[key]
+                attribs = k8resc.spine_pageattributes[key]
+                tag = '<itemref idref="%s"' % idref
+                for aname, val in list(attribs.items()):
+                    if self.epubver == 'F' and aname == 'properties':
+                        continue
+                    if val is not None:
+                        tag += ' %s="%s"' % (aname, val)
+                tag += '/>'
+                if self.epubver == 'F' and 'properties' in attribs:
+                    val = attribs['properties']
+                    if val is not None:
+                        tag += '<!-- properties="%s" -->' % val
+                tag += '\n'
+                data.append(tag)
+        else:
+            start = 0
+            # special case the created coverpage if need be
+            [key, dir, fname] = self.fileinfo[0]
+            if key is not None and key == "coverpage":
+                entry = spinerefs[start]
+                data.append('<itemref idref="%s" linear="no"/>\n' % entry)
+                start += 1
+            for entry in spinerefs[start:]:
+                data.append('<itemref idref="' + entry + '"/>\n')
+        data.append('</spine>\n')
+        return data
+
+    def buildMobi7OPF(self):
+        # Build an OPF for mobi7 and azw4.
+        print("Building an opf for mobi7/azw4.")
+        data = []
+        data.append('<?xml version="1.0" encoding="utf-8"?>\n')
+        data.append('<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">\n')
+        metadata_tag = '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">'
+        opf_metadata = self.buildOPFMetadata(metadata_tag)
+        data += opf_metadata
+        if self.has_ncx:
+            # ncxname = self.files.getInputFileBasename() + '.ncx'
+            ncxname = 'toc.ncx'
+        else:
+            ncxname = None
+        [opf_manifest, spinerefs] = self.buildOPFManifest(ncxname)
+        data += opf_manifest
+        opf_spine = self.buildOPFSpine(spinerefs, self.has_ncx)
+        data += opf_spine
+        data.append('<tours>\n</tours>\n')
+        if not self.printReplica:
+            guide ='<guide>\n' + self.guidetext + '</guide>\n'
+            data.append(guide)
+        data.append('</package>\n')
+        return ''.join(data)
+
+    def buildEPUBOPF(self, has_obfuscated_fonts=False):
+        print("Building an opf for mobi8 using epub version: ", self.target_epubver)
+        if self.target_epubver == '2':
+            has_ncx = self.has_ncx
+            has_guide = True
+            ncxname = None
+            ncxname = TOC_NCX
+            navname = None
+            package = '<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">\n'
+            tours = '<tours>\n</tours>\n'
+            metadata_tag = '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">'
+        else:
+            has_ncx = EPUB3_WITH_NCX
+            has_guide = EPUB3_WITH_GUIDE
+            ncxname = None
+            if has_ncx:
+                ncxname = TOC_NCX
+            navname = NAVIGATION_DOCUMENT
+            package = '<package version="3.0" xmlns="http://www.idpf.org/2007/opf" prefix="rendition: http://www.idpf.org/vocab/rendition/#" unique-identifier="uid">\n'
+            tours = ''
+            metadata_tag = '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">'
+
+        data = []
+        data.append('<?xml version="1.0" encoding="utf-8"?>\n')
+        data.append(package)
+        opf_metadata = self.buildOPFMetadata(metadata_tag, has_obfuscated_fonts)
+        data += opf_metadata
+        [opf_manifest, spinerefs] = self.buildOPFManifest(ncxname, navname)
+        data += opf_manifest
+        opf_spine = self.buildOPFSpine(spinerefs, has_ncx)
+        data += opf_spine
+        data.append(tours)
+        if has_guide:
+            guide ='<guide>\n' + self.guidetext + '</guide>\n'
+            data.append(guide)
+        data.append('</package>\n')
+        return ''.join(data)
+
+    def writeOPF(self, has_obfuscated_fonts=False):
+        if self.isK8:
+            data = self.buildEPUBOPF(has_obfuscated_fonts)
+            outopf = os.path.join(self.files.k8oebps, EPUB_OPF)
+            with open(pathof(outopf), 'wb') as f:
+                f.write(data.encode('utf-8'))
+            return self.BookId
+        else:
+            data = self.buildMobi7OPF()
+            outopf = os.path.join(self.files.mobi7dir, 'content.opf')
+            with open(pathof(outopf), 'wb') as f:
+                f.write(data.encode('utf-8'))
+            return 0
+
+    def getBookId(self):
+        return self.BookId
+
+    def getNCXName(self):
+        return self.ncxname
+
+    def getNAVName(self):
+        return self.navname
+
+    def getEPUBVersion(self):
+        return self.target_epubver
+
+    def hasNCX(self):
+        return self.ncxname is not None and self.has_ncx
+
+    def hasNAV(self):
+        return self.navname is not None
+
+    def autodetectEPUBVersion(self):
+        # Determine EPUB version from metadata and RESC.
+        metadata = self.metadata
+        k8resc = self.k8resc
+        epubver = '2'
+        if 'true' == metadata.get('fixed-layout', [''])[0].lower():
+            epubver = '3'
+        elif metadata.get('orientation-lock', [''])[0].lower() in ['portrait', 'landscape']:
+            epubver = '3'
+        elif self.page_progression_direction == 'rtl':
+            epubver = '3'
+        elif EXTH_TITLE_FURIGANA in metadata:
+            epubver = '3'
+        elif EXTH_CREATOR_FURIGANA in metadata:
+            epubver = '3'
+        elif EXTH_PUBLISHER_FURIGANA in metadata:
+            epubver = '3'
+        elif k8resc is not None and k8resc.needEPUB3():
+            epubver = '3'
+        return epubver
+
+    def defineRefinesID(self):
+        # the following EXTH are set by KDP.
+        # 'Title_Furigana_(508)'
+        # 'Creator_Furigana_(517)',
+        # 'Publisher_Furigana_(522)'
+        # It is difficult to find correspondence between Title, Creator, Publisher
+        # and EXTH 508,512, 522 if they have more than two values since KDP seems not preserve the oders of EXTH 508,512 and 522.
+        # It is also difficult to find correspondence between them and tags which have refine attributes in RESC.
+        # So editing manually is required.
+        metadata = self.metadata
+
+        needRefinesId = False
+        if self.k8resc is not None:
+            needRefinesId = self.k8resc.hasRefines()
+        # Create id for rifine attributes
+        if (needRefinesId or EXTH_TITLE_FURIGANA in metadata) and 'Title' in metadata:
+            for i in range(len(metadata.get('Title'))):
+                self.title_id[i] = 'title%02d' % (i+1)
+
+        if (needRefinesId or EXTH_CREATOR_FURIGANA in metadata) and 'Creator' in metadata:
+            for i in range(len(metadata.get('Creator'))):
+                self.creator_id[i] = 'creator%02d' % (i+1)
+
+        if (needRefinesId or EXTH_PUBLISHER_FURIGANA in metadata) and 'Publisher' in metadata:
+            for i in range(len(metadata.get('Publisher'))):
+                self.publisher_id[i] = 'publisher%02d' % (i+1)
+
+    def processRefinesMetadata(self):
+        # create refines metadata defined in epub3 or convert refines property to opf: attribues for epub2.
+        metadata = self.metadata
+
+        refines_list = [
+                [EXTH_TITLE_FURIGANA, self.title_id, self.title_attrib, 'title00'],
+                [EXTH_CREATOR_FURIGANA, self.creator_id, self.creator_attrib, 'creator00'],
+                [EXTH_PUBLISHER_FURIGANA, self.publisher_id, self.publisher_attrib, 'publisher00']
+                ]
+
+        create_refines_metadata = False
+        for EXTH in lzip(*refines_list)[0]:
+            if EXTH in metadata:
+                create_refines_metadata = True
+                break
+        if create_refines_metadata:
+            for [EXTH, id, attrib, defaultid] in refines_list:
+                if self.target_epubver == '3':
+                    for i, value in list(id.items()):
+                        attrib[i] = ' id="%s"' % value
+
+                    if EXTH in metadata:
+                        if len(metadata[EXTH]) == 1 and len(id) == 1:
+                            self.createMetaTag(self.exth_solved_refines_metadata, 'file-as', metadata[EXTH][0], id[0])
+                        else:
+                            for i, value in enumerate(metadata[EXTH]):
+                                self.createMetaTag(self.exth_refines_metadata, 'file-as', value, id.get(i, defaultid))
+                else:
+                    if EXTH in metadata:
+                        if len(metadata[EXTH]) == 1 and len(id) == 1:
+                            attr = ' opf:file-as="%s"' % metadata[EXTH][0]
+                            attrib[0] = attr
+                        else:
+                            for i, value in enumerate(metadata[EXTH]):
+                                attr = ' id="#%s" opf:file-as="%s"\n' % (id.get(i, defaultid), value)
+                                self.extra_attributes.append(attr)
+
+    def createMetadataForFixedlayout(self):
+        # convert fixed layout to epub3 format if needed.
+        metadata = self.metadata
+
+        if 'fixed-layout' in metadata:
+            fixedlayout = metadata['fixed-layout'][0]
+            content = {'true' : 'pre-paginated'}.get(fixedlayout.lower(), 'reflowable')
+            self.createMetaTag(self.exth_fixedlayout_metadata, 'rendition:layout', content)
+
+        if 'orientation-lock' in metadata:
+            content = metadata['orientation-lock'][0].lower()
+            if content == 'portrait' or content == 'landscape':
+                self.createMetaTag(self.exth_fixedlayout_metadata, 'rendition:orientation', content)
+
+        # according to epub3 spec about correspondence with Amazon
+        # if 'original-resolution' is provided it needs to be converted to
+        # meta viewport property tag stored in the <head></head> of **each**
+        # xhtml page - so this tag would need to be handled by editing each part
+        # before reaching this routine
+        # we need to add support for this to the k8html routine
+        # if 'original-resolution' in metadata.keys():
+        #     resolution = metadata['original-resolution'][0].lower()
+        #     width, height = resolution.split('x')
+        #     if width.isdigit() and int(width) > 0 and height.isdigit() and int(height) > 0:
+        #         viewport = 'width=%s, height=%s' % (width, height)
+        #         self.createMetaTag(self.exth_fixedlayout_metadata, 'rendition:viewport', viewport)
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_pagemap.py b/src/epy_reader/tools/KindleUnpack/mobi_pagemap.py
new file mode 100644
index 0000000..5228d4e
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_pagemap.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import PY2, unicode_str
+
+if PY2:
+    range = xrange
+
+import struct
+# note:  struct pack, unpack, unpack_from all require bytestring format
+# data all the way up to at least python 2.7.5, python 3 okay with bytestring
+
+import re
+# note: re requites the pattern to be the exact same type as the data to be searched in python3
+# but u"" is not allowed for the pattern itself only b""
+
+
+_TABLE = [('m', 1000), ('cm', 900), ('d', 500), ('cd', 400), ('c', 100), ('xc', 90), ('l', 50), ('xl', 40), ('x', 10), ('ix', 9), ('v', 5), ('iv', 4), ('i', 1)]
+
+def int_to_roman(i):
+    parts = []
+    num = i
+    for letter, value in _TABLE:
+        while value <= num:
+            num -= value
+            parts.append(letter)
+    return ''.join(parts)
+
+def roman_to_int(s):
+    result = 0
+    rnstr = s
+    for letter, value in _TABLE:
+        while rnstr.startswith(letter):
+            result += value
+            rnstr = rnstr[len(letter):]
+    return result
+
+_pattern = r'''\(([^\)]*)\)'''
+_tup_pattern = re.compile(_pattern,re.IGNORECASE)
+
+
+def _parseNames(numpages, data):
+    data = unicode_str(data)
+    pagenames = []
+    pageMap = ''
+    for i in range(numpages):
+        pagenames.append(None)
+    for m in re.finditer(_tup_pattern, data):
+        tup = m.group(1)
+        if pageMap != '':
+            pageMap += ','
+        pageMap += '(' + tup + ')'
+        spos, nametype, svalue = tup.split(",")
+        # print(spos, nametype, svalue)
+        if nametype == 'a' or nametype == 'r':
+            svalue = int(svalue)
+        spos = int(spos)
+        for i in range(spos - 1, numpages):
+            if nametype == 'r':
+                pname = int_to_roman(svalue)
+                svalue += 1
+            elif nametype == 'a':
+                pname = "%s" % svalue
+                svalue += 1
+            elif nametype == 'c':
+                sp = svalue.find('|')
+                if sp == -1:
+                    pname = svalue
+                else:
+                    pname = svalue[0:sp]
+                    svalue = svalue[sp+1:]
+            else:
+                print("Error: unknown page numbering type", nametype)
+            pagenames[i] = pname
+    return pagenames, pageMap
+
+
+class PageMapProcessor:
+
+    def __init__(self, mh, data):
+        self.mh = mh
+        self.data = data
+        self.pagenames = []
+        self.pageoffsets = []
+        self.pageMap = ''
+        self.pm_len = 0
+        self.pm_nn = 0
+        self.pn_bits = 0
+        self.pmoff = None
+        self.pmstr = ''
+        print("Extracting Page Map Information")
+        rev_len, = struct.unpack_from(b'>L', self.data, 0x10)
+        # skip over header, revision string length data, and revision string
+        ptr = 0x14 + rev_len
+        pm_1, self.pm_len, self.pm_nn, self.pm_bits  = struct.unpack_from(b'>4H', self.data, ptr)
+        # print(pm_1, self.pm_len, self.pm_nn, self.pm_bits)
+        self.pmstr = self.data[ptr+8:ptr+8+self.pm_len]
+        self.pmoff = self.data[ptr+8+self.pm_len:]
+        offsize = b">L"
+        offwidth = 4
+        if self.pm_bits == 16:
+            offsize = b">H"
+            offwidth = 2
+        ptr = 0
+        for i in range(self.pm_nn):
+            od, = struct.unpack_from(offsize, self.pmoff, ptr)
+            ptr += offwidth
+            self.pageoffsets.append(od)
+        self.pagenames, self.pageMap = _parseNames(self.pm_nn, self.pmstr)
+
+    def getPageMap(self):
+        return self.pageMap
+
+    def getNames(self):
+        return self.pagenames
+
+    def getOffsets(self):
+        return self.pageoffsets
+
+    # page-map.xml will be unicode but encoded to utf-8 immediately before being written to a file
+    def generateKF8PageMapXML(self, k8proc):
+        pagemapxml = '<page-map xmlns="http://www.idpf.org/2007/opf">\n'
+        for i in range(len(self.pagenames)):
+            pos = self.pageoffsets[i]
+            name = self.pagenames[i]
+            if name is not None and name != "":
+                [pn, dir, filename, skelpos, skelend, aidtext] = k8proc.getSkelInfo(pos)
+                idtext = unicode_str(k8proc.getPageIDTag(pos))
+                linktgt = unicode_str(filename)
+                if idtext != '':
+                    linktgt += '#' + idtext
+                pagemapxml += '<page name="%s" href="%s/%s" />\n' % (name, dir, linktgt)
+        pagemapxml += "</page-map>\n"
+        return pagemapxml
+
+    def generateAPNX(self, apnx_meta):
+        if apnx_meta['format'] == 'MOBI_8':
+            content_header = '{"contentGuid":"%(contentGuid)s","asin":"%(asin)s","cdeType":"%(cdeType)s","format":"%(format)s","fileRevisionId":"1","acr":"%(acr)s"}' %apnx_meta
+        else:
+            content_header = '{"contentGuid":"%(contentGuid)s","asin":"%(asin)s","cdeType":"%(cdeType)s","fileRevisionId":"1"}' % apnx_meta
+        content_header = content_header.encode('utf-8')
+        page_header = '{"asin":"%(asin)s","pageMap":"%(pageMap)s"}' % apnx_meta
+        page_header = page_header.encode('utf-8')
+        apnx = struct.pack(b'>H',1) + struct.pack(b'>H',1)
+        apnx += struct.pack(b'>I', 12 + len(content_header))
+        apnx += struct.pack(b'>I', len(content_header))
+        apnx += content_header
+        apnx += struct.pack(b'>H', 1)
+        apnx += struct.pack(b'>H', len(page_header))
+        apnx += struct.pack(b'>H', self.pm_nn)
+        apnx += struct.pack(b'>H', 32)
+        apnx += page_header
+        for page in self.pageoffsets:
+            apnx += struct.pack(b'>L', page)
+        return apnx
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_sectioner.py b/src/epy_reader/tools/KindleUnpack/mobi_sectioner.py
new file mode 100644
index 0000000..81f62bb
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_sectioner.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import PY2, hexlify, bstr, bord, bchar
+
+import datetime
+
+if PY2:
+    range = xrange
+
+# note:  struct pack, unpack, unpack_from all require bytestring format
+# data all the way up to at least python 2.7.5, python 3 okay with bytestring
+import struct
+
+from .unipath import pathof
+
+DUMP = False
+""" Set to True to dump all possible information. """
+
+class unpackException(Exception):
+    pass
+
+
+def describe(data):
+    txtans = ''
+    hexans = hexlify(data)
+    for i in data:
+        if bord(i) < 32 or bord(i) > 127:
+            txtans += '?'
+        else:
+            txtans += bchar(i).decode('latin-1')
+    return '"' + txtans + '"' + ' 0x'+ hexans
+
+def datetimefrompalmtime(palmtime):
+    if palmtime > 0x7FFFFFFF:
+        pythondatetime = datetime.datetime(year=1904,month=1,day=1)+datetime.timedelta(seconds=palmtime)
+    else:
+        pythondatetime = datetime.datetime(year=1970,month=1,day=1)+datetime.timedelta(seconds=palmtime)
+    return pythondatetime
+
+
+class Sectionizer:
+
+    def __init__(self, filename):
+        self.data = b''
+        with open(pathof(filename), 'rb') as f:
+            self.data = f.read()
+        self.palmheader = self.data[:78]
+        self.palmname = self.data[:32]
+        self.ident = self.palmheader[0x3C:0x3C+8]
+        self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76)
+        self.filelength = len(self.data)
+        sectionsdata = struct.unpack_from(bstr('>%dL' % (self.num_sections*2)), self.data, 78) + (self.filelength, 0)
+        self.sectionoffsets = sectionsdata[::2]
+        self.sectionattributes = sectionsdata[1::2]
+        self.sectiondescriptions = ["" for x in range(self.num_sections+1)]
+        self.sectiondescriptions[-1] = "File Length Only"
+        return
+
+    def dumpsectionsinfo(self):
+        print("Section     Offset  Length      UID Attribs Description")
+        for i in range(self.num_sections):
+            print("%3d %3X  0x%07X 0x%05X % 8d % 7d %s" % (i,i, self.sectionoffsets[i], self.sectionoffsets[
+                  i+1] - self.sectionoffsets[i], self.sectionattributes[i]&0xFFFFFF, (self.sectionattributes[i]>>24)&0xFF, self.sectiondescriptions[i]))
+        print("%3d %3X  0x%07X                          %s" %
+              (self.num_sections,self.num_sections, self.sectionoffsets[self.num_sections], self.sectiondescriptions[self.num_sections]))
+
+    def setsectiondescription(self, section, description):
+        if section < len(self.sectiondescriptions):
+            self.sectiondescriptions[section] = description
+        else:
+            print("Section out of range: %d, description %s" % (section,description))
+
+    def dumppalmheader(self):
+        print("Palm Database Header")
+        print("Database name: " + repr(self.palmheader[:32]))
+        dbattributes, = struct.unpack_from(b'>H', self.palmheader, 32)
+        print("Bitfield attributes: 0x%0X" % dbattributes,)
+        if dbattributes != 0:
+            print(" (",)
+            if (dbattributes & 2):
+                print("Read-only; ",)
+            if (dbattributes & 4):
+                print("Dirty AppInfoArea; ",)
+            if (dbattributes & 8):
+                print("Needs to be backed up; ",)
+            if (dbattributes & 16):
+                print("OK to install over newer; ",)
+            if (dbattributes & 32):
+                print("Reset after installation; ",)
+            if (dbattributes & 64):
+                print("No copying by PalmPilot beaming; ",)
+            print(")")
+        else:
+            print("")
+        print("File version: %d" % struct.unpack_from(b'>H', self.palmheader, 34)[0])
+        dbcreation, = struct.unpack_from(b'>L', self.palmheader, 36)
+        print("Creation Date: " + str(datetimefrompalmtime(dbcreation))+ (" (0x%0X)" % dbcreation))
+        dbmodification, = struct.unpack_from(b'>L', self.palmheader, 40)
+        print("Modification Date: " + str(datetimefrompalmtime(dbmodification))+ (" (0x%0X)" % dbmodification))
+        dbbackup, = struct.unpack_from(b'>L', self.palmheader, 44)
+        if dbbackup != 0:
+            print("Backup Date: " + str(datetimefrompalmtime(dbbackup))+ (" (0x%0X)" % dbbackup))
+        print("Modification No.: %d" % struct.unpack_from(b'>L', self.palmheader, 48)[0])
+        print("App Info offset: 0x%0X" % struct.unpack_from(b'>L', self.palmheader, 52)[0])
+        print("Sort Info offset: 0x%0X" % struct.unpack_from(b'>L', self.palmheader, 56)[0])
+        print("Type/Creator: %s/%s" % (repr(self.palmheader[60:64]), repr(self.palmheader[64:68])))
+        print("Unique seed: 0x%0X" % struct.unpack_from(b'>L', self.palmheader, 68)[0])
+        expectedzero, = struct.unpack_from(b'>L', self.palmheader, 72)
+        if expectedzero != 0:
+            print("Should be zero but isn't: %d" % struct.unpack_from(b'>L', self.palmheader, 72)[0])
+        print("Number of sections: %d" % struct.unpack_from(b'>H', self.palmheader, 76)[0])
+        return
+
+    def loadSection(self, section):
+        before, after = self.sectionoffsets[section:section+2]
+        return self.data[before:after]
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_split.py b/src/epy_reader/tools/KindleUnpack/mobi_split.py
new file mode 100755
index 0000000..3535029
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_split.py
@@ -0,0 +1,438 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import struct
+# note:  struct pack, unpack, unpack_from all require bytestring format
+# data all the way up to at least python 2.7.5, python 3 okay with bytestring
+
+from .unipath import pathof
+
+
+# important  pdb header offsets
+unique_id_seed = 68
+number_of_pdb_records = 76
+
+# important palmdoc header offsets
+book_length = 4
+book_record_count = 8
+first_pdb_record = 78
+
+# important rec0 offsets
+length_of_book = 4
+mobi_header_base = 16
+mobi_header_length = 20
+mobi_type = 24
+mobi_version = 36
+first_non_text = 80
+title_offset = 84
+first_resc_record = 108
+first_content_index = 192
+last_content_index = 194
+kf8_fdst_index = 192  # for KF8 mobi headers
+fcis_index = 200
+flis_index = 208
+srcs_index = 224
+srcs_count = 228
+primary_index = 244
+datp_index = 256
+huffoff = 112
+hufftbloff = 120
+
+def getint(datain,ofs,sz=b'L'):
+    i, = struct.unpack_from(b'>'+sz,datain,ofs)
+    return i
+
+def writeint(datain,ofs,n,len=b'L'):
+    if len==b'L':
+        return datain[:ofs]+struct.pack(b'>L',n)+datain[ofs+4:]
+    else:
+        return datain[:ofs]+struct.pack(b'>H',n)+datain[ofs+2:]
+
+def getsecaddr(datain,secno):
+    nsec = getint(datain,number_of_pdb_records,b'H')
+    assert secno>=0 & secno<nsec,'secno %d out of range (nsec=%d)'%(secno,nsec)
+    secstart = getint(datain,first_pdb_record+secno*8)
+    if secno == nsec-1:
+        secend = len(datain)
+    else:
+        secend = getint(datain,first_pdb_record+(secno+1)*8)
+    return secstart,secend
+
+def readsection(datain,secno):
+    secstart, secend = getsecaddr(datain,secno)
+    return datain[secstart:secend]
+
+def writesection(datain,secno,secdata):  # overwrite, accounting for different length
+    # dataout = deletesectionrange(datain,secno, secno)
+    # return insertsection(dataout, secno, secdata)
+    datalst = []
+    nsec = getint(datain,number_of_pdb_records,b'H')
+    zerosecstart,zerosecend = getsecaddr(datain,0)
+    secstart,secend = getsecaddr(datain,secno)
+    dif = len(secdata) - (secend - secstart)
+    datalst.append(datain[:unique_id_seed])
+    datalst.append(struct.pack(b'>L',2*nsec+1))
+    datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
+    datalst.append(struct.pack(b'>H',nsec))
+    newstart = zerosecstart
+    for i in range(0,secno):
+        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
+        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
+    datalst.append(struct.pack(b'>L', secstart) + struct.pack(b'>L', (2*secno)))
+    for i in range(secno+1,nsec):
+        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
+        ofs = ofs + dif
+        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
+    lpad = newstart - (first_pdb_record + 8*nsec)
+    if lpad > 0:
+        datalst.append(b'\0' * lpad)
+    datalst.append(datain[zerosecstart:secstart])
+    datalst.append(secdata)
+    datalst.append(datain[secend:])
+    dataout = b''.join(datalst)
+    return dataout
+
+def nullsection(datain,secno):  # make it zero-length without deleting it
+    datalst = []
+    nsec = getint(datain,number_of_pdb_records,b'H')
+    secstart, secend = getsecaddr(datain,secno)
+    zerosecstart, zerosecend = getsecaddr(datain, 0)
+    dif =  secend-secstart
+    datalst.append(datain[:first_pdb_record])
+    for i in range(0,secno+1):
+        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
+        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
+    for i in range(secno+1, nsec):
+        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
+        ofs = ofs - dif
+        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
+    lpad = zerosecstart - (first_pdb_record + 8*nsec)
+    if lpad > 0:
+        datalst.append(b'\0' * lpad)
+    datalst.append(datain[zerosecstart: secstart])
+    datalst.append(datain[secend:])
+    dataout = b''.join(datalst)
+    return dataout
+
+def deletesectionrange(datain,firstsec,lastsec):  # delete a range of sections
+    datalst = []
+    firstsecstart,firstsecend = getsecaddr(datain,firstsec)
+    lastsecstart,lastsecend = getsecaddr(datain,lastsec)
+    zerosecstart, zerosecend = getsecaddr(datain, 0)
+    dif = lastsecend - firstsecstart + 8*(lastsec-firstsec+1)
+    nsec = getint(datain,number_of_pdb_records,b'H')
+    datalst.append(datain[:unique_id_seed])
+    datalst.append(struct.pack(b'>L',2*(nsec-(lastsec-firstsec+1))+1))
+    datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
+    datalst.append(struct.pack(b'>H',nsec-(lastsec-firstsec+1)))
+    newstart = zerosecstart - 8*(lastsec-firstsec+1)
+    for i in range(0,firstsec):
+        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
+        ofs = ofs-8*(lastsec-firstsec+1)
+        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
+    for i in range(lastsec+1,nsec):
+        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
+        ofs = ofs - dif
+        flgval = 2*(i-(lastsec-firstsec+1))
+        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
+    lpad = newstart - (first_pdb_record + 8*(nsec - (lastsec - firstsec + 1)))
+    if lpad > 0:
+        datalst.append(b'\0' * lpad)
+    datalst.append(datain[zerosecstart:firstsecstart])
+    datalst.append(datain[lastsecend:])
+    dataout = b''.join(datalst)
+    return dataout
+
+def insertsection(datain,secno,secdata):  # insert a new section
+    datalst = []
+    nsec = getint(datain,number_of_pdb_records,b'H')
+    # print("inserting secno" , secno,  "into" ,nsec, "sections")
+    secstart,secend = getsecaddr(datain,secno)
+    zerosecstart,zerosecend = getsecaddr(datain,0)
+    dif = len(secdata)
+    datalst.append(datain[:unique_id_seed])
+    datalst.append(struct.pack(b'>L',2*(nsec+1)+1))
+    datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
+    datalst.append(struct.pack(b'>H',nsec+1))
+    newstart = zerosecstart + 8
+    for i in range(0,secno):
+        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
+        ofs += 8
+        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
+    datalst.append(struct.pack(b'>L', secstart + 8) + struct.pack(b'>L', (2*secno)))
+    for i in range(secno,nsec):
+        ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
+        ofs = ofs + dif + 8
+        flgval = 2*(i+1)
+        datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
+    lpad = newstart - (first_pdb_record + 8*(nsec + 1))
+    if lpad > 0:
+        datalst.append(b'\0' * lpad)
+    datalst.append(datain[zerosecstart:secstart])
+    datalst.append(secdata)
+    datalst.append(datain[secstart:])
+    dataout = b''.join(datalst)
+    return dataout
+
+
+def insertsectionrange(sectionsource,firstsec,lastsec,sectiontarget,targetsec):  # insert a range of sections
+    # print("inserting secno" , firstsec,  "to", lastsec, "into" ,targetsec, "sections")
+    # dataout = sectiontarget
+    # for idx in range(lastsec,firstsec-1,-1):
+    #    dataout = insertsection(dataout,targetsec,readsection(sectionsource,idx))
+    # return dataout
+    datalst = []
+    nsec = getint(sectiontarget,number_of_pdb_records,b'H')
+    zerosecstart, zerosecend = getsecaddr(sectiontarget,0)
+    insstart, nul = getsecaddr(sectiontarget,targetsec)
+    nins = lastsec - firstsec + 1
+    srcstart, nul = getsecaddr(sectionsource,firstsec)
+    nul, srcend = getsecaddr(sectionsource,lastsec)
+    newstart = zerosecstart + 8*nins
+
+    datalst.append(sectiontarget[:unique_id_seed])
+    datalst.append(struct.pack(b'>L',2*(nsec+nins)+1))
+    datalst.append(sectiontarget[unique_id_seed+4:number_of_pdb_records])
+    datalst.append(struct.pack(b'>H',nsec+nins))
+    for i in range(0,targetsec):
+        ofs, flgval = struct.unpack_from(b'>2L',sectiontarget,first_pdb_record+i*8)
+        ofsnew = ofs + 8*nins
+        flgvalnew = flgval
+        datalst.append(struct.pack(b'>L',ofsnew) + struct.pack(b'>L', flgvalnew))
+        # print(ofsnew, flgvalnew, ofs, flgval)
+    srcstart0, nul = getsecaddr(sectionsource,firstsec)
+    for i in range(nins):
+        isrcstart, nul = getsecaddr(sectionsource,firstsec+i)
+        ofsnew = insstart + (isrcstart-srcstart0) + 8*nins
+        flgvalnew = 2*(targetsec+i)
+        datalst.append(struct.pack(b'>L',ofsnew) + struct.pack(b'>L', flgvalnew))
+        # print(ofsnew, flgvalnew)
+    dif = srcend - srcstart
+    for i in range(targetsec,nsec):
+        ofs, flgval = struct.unpack_from(b'>2L',sectiontarget,first_pdb_record+i*8)
+        ofsnew = ofs + dif + 8*nins
+        flgvalnew = 2*(i+nins)
+        datalst.append(struct.pack(b'>L',ofsnew) + struct.pack(b'>L',flgvalnew))
+        # print(ofsnew, flgvalnew, ofs, flgval)
+    lpad = newstart - (first_pdb_record + 8*(nsec + nins))
+    if lpad > 0:
+        datalst.append(b'\0' * lpad)
+    datalst.append(sectiontarget[zerosecstart:insstart])
+    datalst.append(sectionsource[srcstart:srcend])
+    datalst.append(sectiontarget[insstart:])
+    dataout = b''.join(datalst)
+    return dataout
+
+def get_exth_params(rec0):
+    ebase = mobi_header_base + getint(rec0,mobi_header_length)
+    elen = getint(rec0,ebase+4)
+    enum = getint(rec0,ebase+8)
+    return ebase,elen,enum
+
+def add_exth(rec0,exth_num,exth_bytes):
+    ebase,elen,enum = get_exth_params(rec0)
+    newrecsize = 8+len(exth_bytes)
+    newrec0 = rec0[0:ebase+4]+struct.pack(b'>L',elen+newrecsize)+struct.pack(b'>L',enum+1)+\
+              struct.pack(b'>L',exth_num)+struct.pack(b'>L',newrecsize)+exth_bytes+rec0[ebase+12:]
+    newrec0 = writeint(newrec0,title_offset,getint(newrec0,title_offset)+newrecsize)
+    return newrec0
+
+def read_exth(rec0,exth_num):
+    exth_values = []
+    ebase,elen,enum = get_exth_params(rec0)
+    ebase = ebase+12
+    while enum>0:
+        exth_id = getint(rec0,ebase)
+        if exth_id == exth_num:
+            # We might have multiple exths, so build a list.
+            exth_values.append(rec0[ebase+8:ebase+getint(rec0,ebase+4)])
+        enum = enum-1
+        ebase = ebase+getint(rec0,ebase+4)
+    return exth_values
+
+def write_exth(rec0,exth_num,exth_bytes):
+    ebase,elen,enum = get_exth_params(rec0)
+    ebase_idx = ebase+12
+    enum_idx = enum
+    while enum_idx>0:
+        exth_id = getint(rec0,ebase_idx)
+        if exth_id == exth_num:
+            dif = len(exth_bytes)+8-getint(rec0,ebase_idx+4)
+            newrec0 = rec0
+            if dif != 0:
+                newrec0 = writeint(newrec0,title_offset,getint(newrec0,title_offset)+dif)
+            return newrec0[:ebase+4]+struct.pack(b'>L',elen+len(exth_bytes)+8-getint(rec0,ebase_idx+4))+\
+                                              struct.pack(b'>L',enum)+rec0[ebase+12:ebase_idx+4]+\
+                                              struct.pack(b'>L',len(exth_bytes)+8)+exth_bytes+\
+                                              rec0[ebase_idx+getint(rec0,ebase_idx+4):]
+        enum_idx = enum_idx-1
+        ebase_idx = ebase_idx+getint(rec0,ebase_idx+4)
+    return rec0
+
+def del_exth(rec0,exth_num):
+    ebase,elen,enum = get_exth_params(rec0)
+    ebase_idx = ebase+12
+    enum_idx = 0
+    while enum_idx < enum:
+        exth_id = getint(rec0,ebase_idx)
+        exth_size = getint(rec0,ebase_idx+4)
+        if exth_id == exth_num:
+            newrec0 = rec0
+            newrec0 = writeint(newrec0,title_offset,getint(newrec0,title_offset)-exth_size)
+            newrec0 = newrec0[:ebase_idx]+newrec0[ebase_idx+exth_size:]
+            newrec0 = newrec0[0:ebase+4]+struct.pack(b'>L',elen-exth_size)+struct.pack(b'>L',enum-1)+newrec0[ebase+12:]
+            return newrec0
+        enum_idx += 1
+        ebase_idx = ebase_idx+exth_size
+    return rec0
+
+
+class mobi_split:
+
+    def __init__(self, infile):
+        datain = b''
+        with open(pathof(infile), 'rb') as f:
+            datain = f.read()
+        datain_rec0 = readsection(datain,0)
+        ver = getint(datain_rec0,mobi_version)
+        self.combo = (ver!=8)
+        if not self.combo:
+            return
+        exth121 = read_exth(datain_rec0,121)
+        if len(exth121) == 0:
+            self.combo = False
+            return
+        else:
+            # only pay attention to first exth121
+            # (there should only be one)
+            datain_kf8, = struct.unpack_from(b'>L',exth121[0],0)
+            if datain_kf8 == 0xffffffff:
+                self.combo = False
+                return
+        datain_kfrec0 =readsection(datain,datain_kf8)
+
+        # create the standalone mobi7
+        num_sec = getint(datain,number_of_pdb_records,b'H')
+        # remove BOUNDARY up to but not including ELF record
+        self.result_file7 = deletesectionrange(datain,datain_kf8-1,num_sec-2)
+        # check if there are SRCS records and delete them
+        srcs = getint(datain_rec0,srcs_index)
+        num_srcs = getint(datain_rec0,srcs_count)
+        if srcs != 0xffffffff and num_srcs > 0:
+            self.result_file7 = deletesectionrange(self.result_file7,srcs,srcs+num_srcs-1)
+            datain_rec0 = writeint(datain_rec0,srcs_index,0xffffffff)
+            datain_rec0 = writeint(datain_rec0,srcs_count,0)
+        # reset the EXTH 121 KF8 Boundary meta data to 0xffffffff
+        datain_rec0 = write_exth(datain_rec0,121, struct.pack(b'>L', 0xffffffff))
+        # datain_rec0 = del_exth(datain_rec0,121)
+        # datain_rec0 = del_exth(datain_rec0,534)
+        # don't remove the EXTH 125 KF8 Count of Resources, seems to be present in mobi6 files as well
+        # set the EXTH 129 KF8 Masthead / Cover Image string to the null string
+        datain_rec0 = write_exth(datain_rec0,129, b'')
+        # don't remove the EXTH 131 KF8 Unidentified Count, seems to be present in mobi6 files as well
+
+        # need to reset flags stored in 0x80-0x83
+        # old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050
+        # Bit Flags
+        # 0x1000 = Bit 12 indicates if embedded fonts are used or not
+        # 0x0800 = means this Header points to *shared* images/resource/fonts ??
+        # 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8?
+        # 0x0040 = exth exists
+        # 0x0010 = Not sure but this is always set so far
+        fval, = struct.unpack_from(b'>L',datain_rec0, 0x80)
+        # need to remove flag 0x0800 for KindlePreviewer 2.8 and unset Bit 12 for embedded fonts
+        fval = fval & 0x07FF
+        datain_rec0 = datain_rec0[:0x80] + struct.pack(b'>L',fval) + datain_rec0[0x84:]
+
+        self.result_file7 = writesection(self.result_file7,0,datain_rec0)
+
+        # no need to replace kf8 style fcis with mobi 7 one
+        # fcis_secnum, = struct.unpack_from(b'>L',datain_rec0, 0xc8)
+        # if fcis_secnum != 0xffffffff:
+        #     fcis_info = readsection(datain, fcis_secnum)
+        #     text_len,  = struct.unpack_from(b'>L', fcis_info, 0x14)
+        #     new_fcis = 'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
+        #     new_fcis += struct.pack(b'>L',text_len)
+        #     new_fcis += '\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
+        #     self.result_file7 = writesection(self.result_file7, fcis_secnum, new_fcis)
+
+        firstimage = getint(datain_rec0,first_resc_record)
+        lastimage = getint(datain_rec0,last_content_index,b'H')
+        # print("Old First Image, last Image", firstimage,lastimage)
+        if lastimage == 0xffff:
+            # find the lowest of the next sections and copy up to that.
+            ofs_list = [(fcis_index,b'L'),(flis_index,b'L'),(datp_index,b'L'),(hufftbloff, b'L')]
+            for ofs,sz in ofs_list:
+                n = getint(datain_rec0,ofs,sz)
+                # print("n",n)
+                if n > 0 and n < lastimage:
+                    lastimage = n-1
+        print("First Image, last Image", firstimage,lastimage)
+
+        # Try to null out FONT and RES, but leave the (empty) PDB record so image refs remain valid
+        for i in range(firstimage,lastimage):
+            imgsec = readsection(self.result_file7,i)
+            if imgsec[0:4] in [b'RESC',b'FONT']:
+                self.result_file7 = nullsection(self.result_file7,i)
+
+        # mobi7 finished
+
+        # create standalone mobi8
+        self.result_file8 = deletesectionrange(datain,0,datain_kf8-1)
+        target = getint(datain_kfrec0,first_resc_record)
+        self.result_file8 = insertsectionrange(datain,firstimage,lastimage,self.result_file8,target)
+        datain_kfrec0 =readsection(self.result_file8,0)
+
+        # Only keep the correct EXTH 116 StartOffset, KG 2.5 carries over the one from the mobi7 part, which then points at garbage in the mobi8 part, and confuses FW 3.4
+        kf8starts = read_exth(datain_kfrec0,116)
+        # If we have multiple StartOffset, keep only the last one
+        kf8start_count = len(kf8starts)
+        while kf8start_count > 1:
+            kf8start_count -= 1
+            datain_kfrec0 = del_exth(datain_kfrec0,116)
+
+        # update the EXTH 125 KF8 Count of Images/Fonts/Resources
+        datain_kfrec0 = write_exth(datain_kfrec0,125,struct.pack(b'>L',lastimage-firstimage+1))
+
+        # need to reset flags stored in 0x80-0x83
+        # old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050
+        # standalone mobi8 with exth: 0x0050
+        # Bit Flags
+        # 0x1000 = Bit 12 indicates if embedded fonts are used or not
+        # 0x0800 = means this Header points to *shared* images/resource/fonts ??
+        # 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8?
+        # 0x0040 = exth exists
+        # 0x0010 = Not sure but this is always set so far
+        fval, = struct.unpack_from('>L',datain_kfrec0, 0x80)
+        fval = fval & 0x1FFF
+        fval |= 0x0800
+        datain_kfrec0 = datain_kfrec0[:0x80] + struct.pack(b'>L',fval) + datain_kfrec0[0x84:]
+
+        # properly update other index pointers that have been shifted by the insertion of images
+        ofs_list = [(kf8_fdst_index,b'L'),(fcis_index,b'L'),(flis_index,b'L'),(datp_index,b'L'),(hufftbloff, b'L')]
+        for ofs,sz in ofs_list:
+            n = getint(datain_kfrec0,ofs,sz)
+            if n != 0xffffffff:
+                datain_kfrec0 = writeint(datain_kfrec0,ofs,n+lastimage-firstimage+1,sz)
+        self.result_file8 = writesection(self.result_file8,0,datain_kfrec0)
+
+        # no need to replace kf8 style fcis with mobi 7 one
+        # fcis_secnum, = struct.unpack_from(b'>L',datain_kfrec0, 0xc8)
+        # if fcis_secnum != 0xffffffff:
+        #     fcis_info = readsection(self.result_file8, fcis_secnum)
+        #     text_len,  = struct.unpack_from(b'>L', fcis_info, 0x14)
+        #     new_fcis = 'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
+        #     new_fcis += struct.pack(b'>L',text_len)
+        #     new_fcis += '\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
+        #     self.result_file8 = writesection(self.result_file8, fcis_secnum, new_fcis)
+
+        # mobi8 finished
+
+    def getResult8(self):
+        return self.result_file8
+
+    def getResult7(self):
+        return self.result_file7
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_uncompress.py b/src/epy_reader/tools/KindleUnpack/mobi_uncompress.py
new file mode 100644
index 0000000..c5fad85
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_uncompress.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import PY2, bchr, lmap, bstr
+
+if PY2:
+    range = xrange
+
+import struct
+# note:  struct pack, unpack, unpack_from all require bytestring format
+# data all the way up to at least python 2.7.5, python 3 okay with bytestring
+
+
+class unpackException(Exception):
+    pass
+
+class UncompressedReader:
+
+    def unpack(self, data):
+        return data
+
+class PalmdocReader:
+
+    def unpack(self, i):
+        o, p = b'', 0
+        while p < len(i):
+            # for python 3 must use slice since i[p] returns int while slice returns character
+            c = ord(i[p:p+1])
+            p += 1
+            if (c >= 1 and c <= 8):
+                o += i[p:p+c]
+                p += c
+            elif (c < 128):
+                o += bchr(c)
+            elif (c >= 192):
+                o += b' ' + bchr(c ^ 128)
+            else:
+                if p < len(i):
+                    c = (c << 8) | ord(i[p:p+1])
+                    p += 1
+                    m = (c >> 3) & 0x07ff
+                    n = (c & 7) + 3
+                    if (m > n):
+                        o += o[-m:n-m]
+                    else:
+                        for _ in range(n):
+                            # because of completely ass-backwards decision by python mainters for python 3
+                            # we must use slice for bytes as i[p] returns int while slice returns character
+                            if m == 1:
+                                o += o[-m:]
+                            else:
+                                o += o[-m:-m+1]
+        return o
+
+class HuffcdicReader:
+    q = struct.Struct(b'>Q').unpack_from
+
+    def loadHuff(self, huff):
+        if huff[0:8] != b'HUFF\x00\x00\x00\x18':
+            raise unpackException('invalid huff header')
+        off1, off2 = struct.unpack_from(b'>LL', huff, 8)
+
+        def dict1_unpack(v):
+            codelen, term, maxcode = v&0x1f, v&0x80, v>>8
+            assert codelen != 0
+            if codelen <= 8:
+                assert term
+            maxcode = ((maxcode + 1) << (32 - codelen)) - 1
+            return (codelen, term, maxcode)
+        self.dict1 = lmap(dict1_unpack, struct.unpack_from(b'>256L', huff, off1))
+
+        dict2 = struct.unpack_from(b'>64L', huff, off2)
+        self.mincode, self.maxcode = (), ()
+        for codelen, mincode in enumerate((0,) + dict2[0::2]):
+            self.mincode += (mincode << (32 - codelen), )
+        for codelen, maxcode in enumerate((0,) + dict2[1::2]):
+            self.maxcode += (((maxcode + 1) << (32 - codelen)) - 1, )
+
+        self.dictionary = []
+
+    def loadCdic(self, cdic):
+        if cdic[0:8] != b'CDIC\x00\x00\x00\x10':
+            raise unpackException('invalid cdic header')
+        phrases, bits = struct.unpack_from(b'>LL', cdic, 8)
+        n = min(1<<bits, phrases-len(self.dictionary))
+        h = struct.Struct(b'>H').unpack_from
+        def getslice(off):
+            blen, = h(cdic, 16+off)
+            slice = cdic[18+off:18+off+(blen&0x7fff)]
+            return (slice, blen&0x8000)
+        self.dictionary += lmap(getslice, struct.unpack_from(bstr('>%dH' % n), cdic, 16))
+
+    def unpack(self, data):
+        q = HuffcdicReader.q
+
+        bitsleft = len(data) * 8
+        data += b"\x00\x00\x00\x00\x00\x00\x00\x00"
+        pos = 0
+        x, = q(data, pos)
+        n = 32
+
+        s = b''
+        while True:
+            if n <= 0:
+                pos += 4
+                x, = q(data, pos)
+                n += 32
+            code = (x >> n) & ((1 << 32) - 1)
+
+            codelen, term, maxcode = self.dict1[code >> 24]
+            if not term:
+                while code < self.mincode[codelen]:
+                    codelen += 1
+                maxcode = self.maxcode[codelen]
+
+            n -= codelen
+            bitsleft -= codelen
+            if bitsleft < 0:
+                break
+
+            r = (maxcode - code) >> (32 - codelen)
+            slice, flag = self.dictionary[r]
+            if not flag:
+                self.dictionary[r] = None
+                slice = self.unpack(slice)
+                self.dictionary[r] = (slice, 1)
+            s += slice
+        return s
diff --git a/src/epy_reader/tools/KindleUnpack/mobi_utils.py b/src/epy_reader/tools/KindleUnpack/mobi_utils.py
new file mode 100644
index 0000000..6791e0d
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobi_utils.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+# flake8: noqa
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import PY2, text_type, bchr, bord
+
+import binascii
+
+if PY2:
+    range = xrange
+
+from itertools import cycle
+
+def getLanguage(langID, sublangID):
+    mobilangdict = {
+            54 : {0 : 'af'},  # Afrikaans
+            28 : {0 : 'sq'},  # Albanian
+             1 : {0 : 'ar' , 5 : 'ar-dz' , 15 : 'ar-bh' , 3 : 'ar-eg' , 2 : 'ar-iq',  11 : 'ar-jo' , 13 : 'ar-kw' , 12 : 'ar-lb' , 4: 'ar-ly',
+                  6 : 'ar-ma' , 8 : 'ar-om' , 16 : 'ar-qa' , 1 : 'ar-sa' , 10 : 'ar-sy' , 7 : 'ar-tn' , 14 : 'ar-ae' , 9 : 'ar-ye'},
+             # Arabic,  Arabic (Algeria),  Arabic (Bahrain),  Arabic (Egypt),  Arabic
+             # (Iraq), Arabic (Jordan),  Arabic (Kuwait),  Arabic (Lebanon),  Arabic
+             # (Libya), Arabic (Morocco),  Arabic (Oman),  Arabic (Qatar),  Arabic
+             # (Saudi Arabia),  Arabic (Syria),  Arabic (Tunisia),  Arabic (United Arab
+             # Emirates),  Arabic (Yemen)
+            43 : {0 : 'hy'},  # Armenian
+            77 : {0 : 'as'},  # Assamese
+            44 : {0 : 'az'},  # "Azeri (IANA: Azerbaijani)
+            45 : {0 : 'eu'},  # Basque
+            35 : {0 : 'be'},  # Belarusian
+            69 : {0 : 'bn'},  # Bengali
+             2 : {0 : 'bg'},  # Bulgarian
+             3 : {0 : 'ca'},  # Catalan
+             4 : {0 : 'zh' , 3 : 'zh-hk' , 2 : 'zh-cn' , 4 : 'zh-sg' , 1 : 'zh-tw'},
+            # Chinese,  Chinese (Hong Kong),  Chinese (PRC),  Chinese (Singapore),  Chinese (Taiwan)
+            26 : {0 : 'hr', 3 : 'sr'},  # Croatian, Serbian
+             5 : {0 : 'cs'},  # Czech
+             6 : {0 : 'da'},  # Danish
+            19 : {0: 'nl', 1 : 'nl' , 2 : 'nl-be'},  # Dutch / Flemish,  Dutch (Belgium)
+             9 : {0: 'en', 1 : 'en' , 3 : 'en-au' , 40 : 'en-bz' , 4 : 'en-ca' , 6 : 'en-ie' , 8 : 'en-jm' , 5 : 'en-nz' , 13 : 'en-ph' ,
+                  7 : 'en-za' , 11 : 'en-tt' , 2 : 'en-gb', 1 : 'en-us' , 12 : 'en-zw'},
+             # English,  English (Australia),  English (Belize),  English (Canada),
+             # English (Ireland),  English (Jamaica),  English (New Zealand),  English
+             # (Philippines),  English (South Africa),  English (Trinidad),  English
+             # (United Kingdom),  English (United States),  English (Zimbabwe)
+            37 : {0 : 'et'},  # Estonian
+            56 : {0 : 'fo'},  # Faroese
+            41 : {0 : 'fa'},  # Farsi / Persian
+            11 : {0 : 'fi'},  # Finnish
+            12 : {0 : 'fr', 1 : 'fr' , 2 : 'fr-be' , 3 : 'fr-ca' , 5 : 'fr-lu' , 6 : 'fr-mc' , 4 : 'fr-ch'},
+            # French,  French (Belgium),  French (Canada),  French (Luxembourg),  French (Monaco),  French (Switzerland)
+            55 : {0 : 'ka'},  # Georgian
+             7 : {0 : 'de', 1 : 'de' , 3 : 'de-at' , 5 : 'de-li' , 4 : 'de-lu' , 2 : 'de-ch'},
+             # German,  German (Austria),  German (Liechtenstein),  German (Luxembourg),  German (Switzerland)
+             8 : {0 : 'el'},  # Greek, Modern (1453-)
+            71 : {0 : 'gu'},  # Gujarati
+            13 : {0 : 'he'},  # Hebrew (also code 'iw'?)
+            57 : {0 : 'hi'},  # Hindi
+            14 : {0 : 'hu'},  # Hungarian
+            15 : {0 : 'is'},  # Icelandic
+            33 : {0 : 'id'},  # Indonesian
+            16 : {0 : 'it', 1 : 'it' , 2 : 'it-ch'},  # Italian,  Italian (Switzerland)
+            17 : {0 : 'ja'},  # Japanese
+            75 : {0 : 'kn'},  # Kannada
+            63 : {0 : 'kk'},  # Kazakh
+            87 : {0 : 'x-kok'},  # Konkani (real language code is 'kok'?)
+            18 : {0 : 'ko'},  # Korean
+            38 : {0 : 'lv'},  # Latvian
+            39 : {0 : 'lt'},  # Lithuanian
+            47 : {0 : 'mk'},  # Macedonian
+            62 : {0 : 'ms'},  # Malay
+            76 : {0 : 'ml'},  # Malayalam
+            58 : {0 : 'mt'},  # Maltese
+            78 : {0 : 'mr'},  # Marathi
+            97 : {0 : 'ne'},  # Nepali
+            20 : {0 : 'no'},  # Norwegian
+            72 : {0 : 'or'},  # Oriya
+            21 : {0 : 'pl'},  # Polish
+            22 : {0 : 'pt', 2 : 'pt' , 1 : 'pt-br'},  # Portuguese,  Portuguese (Brazil)
+            70 : {0 : 'pa'},  # Punjabi
+            23 : {0 : 'rm'},  # "Rhaeto-Romanic" (IANA: Romansh)
+            24 : {0 : 'ro'},  # Romanian
+            25 : {0 : 'ru'},  # Russian
+            59 : {0 : 'sz'},  # "Sami (Lappish)" (not an IANA language code)
+            # IANA code for "Northern Sami" is 'se'
+            # 'SZ' is the IANA region code for Swaziland
+            79 : {0 : 'sa'},  # Sanskrit
+            27 : {0 : 'sk'},  # Slovak
+            36 : {0 : 'sl'},  # Slovenian
+            46 : {0 : 'sb'},  # "Sorbian" (not an IANA language code)
+            # 'SB' is IANA region code for 'Solomon Islands'
+            # Lower Sorbian = 'dsb'
+            # Upper Sorbian = 'hsb'
+            # Sorbian Languages = 'wen'
+            10 : {0 : 'es' , 4 : 'es' , 44 : 'es-ar' , 64 : 'es-bo' , 52 : 'es-cl' , 36 : 'es-co' , 20 : 'es-cr' , 28 : 'es-do' ,
+                  48 : 'es-ec' , 68 : 'es-sv' , 16 : 'es-gt' , 72 : 'es-hn' , 8 : 'es-mx' , 76 : 'es-ni' , 24 : 'es-pa' ,
+                  60 : 'es-py' , 40 : 'es-pe' , 80 : 'es-pr' , 56 : 'es-uy' , 32 : 'es-ve'},
+            # Spanish,  Spanish (Mobipocket bug?),  Spanish (Argentina),  Spanish
+            # (Bolivia),  Spanish (Chile),  Spanish (Colombia),  Spanish (Costa Rica),
+            # Spanish (Dominican Republic),  Spanish (Ecuador),  Spanish (El
+            # Salvador),  Spanish (Guatemala),  Spanish (Honduras),  Spanish (Mexico),
+            # Spanish (Nicaragua),  Spanish (Panama),  Spanish (Paraguay),  Spanish
+            # (Peru),  Spanish (Puerto Rico),  Spanish (Uruguay),  Spanish (Venezuela)
+            48 : {0 : 'sx'},  # "Sutu" (not an IANA language code)
+            # "Sutu" is another name for "Southern Sotho"?
+            # IANA code for "Southern Sotho" is 'st'
+            65 : {0 : 'sw'},  # Swahili
+            29 : {0 : 'sv' , 1 : 'sv' , 8 : 'sv-fi'},  # Swedish,  Swedish (Finland)
+            73 : {0 : 'ta'},  # Tamil
+            68 : {0 : 'tt'},  # Tatar
+            74 : {0 : 'te'},  # Telugu
+            30 : {0 : 'th'},  # Thai
+            49 : {0 : 'ts'},  # Tsonga
+            50 : {0 : 'tn'},  # Tswana
+            31 : {0 : 'tr'},  # Turkish
+            34 : {0 : 'uk'},  # Ukrainian
+            32 : {0 : 'ur'},  # Urdu
+            67 : {0 : 'uz', 2 : 'uz'},  # Uzbek
+            42 : {0 : 'vi'},  # Vietnamese
+            52 : {0 : 'xh'},  # Xhosa
+            53 : {0 : 'zu'},  # Zulu
+    }
+    lang = "en"
+    if langID in mobilangdict:
+        subdict = mobilangdict[langID]
+        lang = subdict[0]
+        if sublangID in subdict:
+            lang = subdict[sublangID]
+    return lang
+
+
+def toHex(byteList):
+    return binascii.hexlify(byteList)
+
+# returns base32 bytestring
+def toBase32(value, npad=4):
+    digits = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
+    num_string=b''
+    current = value
+    while current != 0:
+        next, remainder = divmod(current, 32)
+        rem_string = digits[remainder:remainder+1]
+        num_string = rem_string + num_string
+        current=next
+    if num_string == b'':
+        num_string = b'0'
+    pad = npad - len(num_string)
+    if pad > 0:
+        num_string = b'0' * pad + num_string
+    return num_string
+
+
+# converts base32 string to value
+def fromBase32(str_num):
+    if isinstance(str_num, text_type):
+        str_num = str_num.encode('latin-1')
+    scalelst = [1,32,1024,32768,1048576,33554432,1073741824,34359738368]
+    value = 0
+    j = 0
+    n = len(str_num)
+    scale = 0
+    for i in range(n):
+        c = str_num[n-i-1:n-i]
+        if c in b'0123456789':
+            v = ord(c) - ord(b'0')
+        else:
+            v = ord(c) - ord(b'A') + 10
+        if j < len(scalelst):
+            scale = scalelst[j]
+        else:
+            scale = scale * 32
+        j += 1
+        if v != 0:
+            value = value + (v * scale)
+    return value
+
+
+# note: if decode a bytestring using 'latin-1' (or any other 0-255 encoding)
+# in place of ascii you will get a byte to half-word or integer
+# one to one mapping of values from 0 - 255
+
+def mangle_fonts(encryption_key, data):
+    if isinstance(encryption_key, text_type):
+        encryption_key = encryption_key.encode('latin-1')
+    crypt = data[:1024]
+    key = cycle(iter(map(bord, encryption_key)))
+    # encrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
+    encrypt = b''.join([bchr(bord(x)^next(key)) for x in crypt])
+    return encrypt + data[1024:]
diff --git a/src/epy_reader/tools/KindleUnpack/mobiml2xhtml.py b/src/epy_reader/tools/KindleUnpack/mobiml2xhtml.py
new file mode 100755
index 0000000..94fc671
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/mobiml2xhtml.py
@@ -0,0 +1,527 @@
+#! /usr/bin/python
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+
+# this program works in concert with the output from KindleUnpack
+
+'''
+Convert from Mobi ML to XHTML
+'''
+
+from __future__ import division, absolute_import, print_function
+
+import os
+import sys
+import re
+
+SPECIAL_HANDLING_TAGS = {
+    '?xml'     : ('xmlheader', -1),
+    '!--'      : ('comment', -3),
+    '!DOCTYPE' : ('doctype', -1),
+}
+
+SPECIAL_HANDLING_TYPES = ['xmlheader', 'doctype', 'comment']
+
+SELF_CLOSING_TAGS = ['br' , 'hr', 'input', 'img', 'image', 'meta', 'spacer', 'link', 'frame', 'base', 'col', 'reference']
+
+class MobiMLConverter(object):
+
+    PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
+    IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
+
+    def __init__(self, filename):
+        self.base_css_rules =  'blockquote { margin: 0em 0em 0em 1.25em }\n'
+        self.base_css_rules += 'p { margin: 0em }\n'
+        self.base_css_rules += '.bold { font-weight: bold }\n'
+        self.base_css_rules += '.italic { font-style: italic }\n'
+        self.base_css_rules += '.mbp_pagebreak { page-break-after: always; margin: 0; display: block }\n'
+        self.tag_css_rules = {}
+        self.tag_css_rule_cnt = 0
+        self.path = []
+        self.filename = filename
+        self.wipml = open(self.filename, 'r').read()
+        self.pos = 0
+        self.opfname = self.filename.rsplit('.',1)[0] + '.opf'
+        self.opos = 0
+        self.meta = ''
+        self.cssname = os.path.join(os.path.dirname(self.filename),'styles.css')
+        self.current_font_size = 3
+        self.font_history = []
+
+    def cleanup_html(self):
+        self.wipml = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.wipml)
+        self.wipml = self.wipml.replace('\r\n', '\n')
+        self.wipml = self.wipml.replace('> <', '>\n<')
+        self.wipml = self.wipml.replace('<mbp: ', '<mbp:')
+        # self.wipml = re.sub(r'<?xml[^>]*>', '', self.wipml)
+        self.wipml = self.wipml.replace('<br></br>','<br/>')
+
+    def replace_page_breaks(self):
+        self.wipml = self.PAGE_BREAK_PAT.sub(
+            '<div class="mbp_pagebreak" />',
+            self.wipml)
+
+    # parse leading text of ml and tag
+    def parseml(self):
+        p = self.pos
+        if p >= len(self.wipml):
+            return None
+        if self.wipml[p] != '<':
+            res = self.wipml.find('<',p)
+            if res == -1 :
+                res = len(self.wipml)
+            self.pos = res
+            return self.wipml[p:res], None
+        # handle comment as a special case to deal with multi-line comments
+        if self.wipml[p:p+4] == '<!--':
+            te = self.wipml.find('-->',p+1)
+            if te != -1:
+                te = te+2
+        else :
+            te = self.wipml.find('>',p+1)
+            ntb = self.wipml.find('<',p+1)
+            if ntb != -1 and ntb < te:
+                self.pos = ntb
+                return self.wipml[p:ntb], None
+        self.pos = te + 1
+        return None, self.wipml[p:te+1]
+
+    # parses string version of tag to identify its name,
+    # its type 'begin', 'end' or 'single',
+    # plus build a hashtable of its attributes
+    # code is written to handle the possiblity of very poor formating
+    def parsetag(self, s):
+        p = 1
+        # get the tag name
+        tname = None
+        ttype = None
+        tattr = {}
+        while s[p:p+1] == ' ' :
+            p += 1
+        if s[p:p+1] == '/':
+            ttype = 'end'
+            p += 1
+            while s[p:p+1] == ' ' :
+                p += 1
+        b = p
+        while s[p:p+1] not in ('>', '/', ' ', '"', "'", "\r", "\n") :
+            p += 1
+        tname=s[b:p].lower()
+        if tname == '!doctype':
+            tname = '!DOCTYPE'
+        # special cases
+        if tname in SPECIAL_HANDLING_TAGS:
+            ttype, backstep = SPECIAL_HANDLING_TAGS[tname]
+            tattr['special'] = s[p:backstep]
+        if ttype is None:
+            # parse any attributes
+            while s.find('=',p) != -1 :
+                while s[p:p+1] == ' ' :
+                    p += 1
+                b = p
+                while s[p:p+1] != '=' :
+                    p += 1
+                aname = s[b:p].lower()
+                aname = aname.rstrip(' ')
+                p += 1
+                while s[p:p+1] == ' ' :
+                    p += 1
+                if s[p:p+1] in ('"', "'") :
+                    p = p + 1
+                    b = p
+                    while s[p:p+1] not in ('"', "'") :
+                        p += 1
+                    val = s[b:p]
+                    p += 1
+                else :
+                    b = p
+                    while s[p:p+1] not in ('>', '/', ' ') :
+                        p += 1
+                    val = s[b:p]
+                tattr[aname] = val
+        # label beginning and single tags
+        if ttype is None:
+            ttype = 'begin'
+            if s.find(' /',p) >= 0:
+                ttype = 'single_ext'
+            elif s.find('/',p) >= 0:
+                ttype = 'single'
+        return ttype, tname, tattr
+
+    # main routine to convert from mobi markup language to html
+    def processml(self):
+
+        # are these really needed
+        html_done = False
+        head_done = False
+        body_done = False
+
+        skip = False
+
+        htmlstr = ''
+        self.replace_page_breaks()
+        self.cleanup_html()
+
+        # now parse the cleaned up ml into standard xhtml
+        while True:
+
+            r = self.parseml()
+            if not r:
+                break
+
+            text, tag = r
+
+            if text:
+                if not skip:
+                    htmlstr += text
+
+            if tag:
+                ttype, tname, tattr = self.parsetag(tag)
+
+                # If we run into a DTD or xml declarations inside the body ... bail.
+                if tname in SPECIAL_HANDLING_TAGS and tname != 'comment' and body_done:
+                    htmlstr += '\n</body></html>'
+                    break
+
+                # make sure self-closing tags actually self-close
+                if ttype == 'begin' and tname in SELF_CLOSING_TAGS:
+                    ttype = 'single'
+
+                # make sure any end tags of self-closing tags are discarded
+                if ttype == 'end' and tname in SELF_CLOSING_TAGS:
+                    continue
+
+                # remove embedded guide and refernces from old mobis
+                if tname in ('guide', 'ncx', 'reference') and ttype in ('begin', 'single', 'single_ext'):
+                    tname = 'removeme:{0}'.format(tname)
+                    tattr = None
+                if tname in ('guide', 'ncx', 'reference', 'font', 'span') and ttype == 'end':
+                    if self.path[-1] == 'removeme:{0}'.format(tname):
+                        tname = 'removeme:{0}'.format(tname)
+                        tattr = None
+
+                # Get rid of font tags that only have a color attribute.
+                if tname == 'font' and ttype in ('begin', 'single', 'single_ext'):
+                    if 'color' in tattr and len(tattr) == 1:
+                        tname = 'removeme:{0}'.format(tname)
+                        tattr = None
+
+                # Get rid of empty spans in the markup.
+                if tname == 'span' and ttype in ('begin', 'single', 'single_ext') and not len(tattr):
+                    tname = 'removeme:{0}'.format(tname)
+
+                # need to handle fonts outside of the normal methods
+                # so fonts tags won't be added to the self.path since we keep track
+                # of font tags separately with self.font_history
+                if tname == 'font' and ttype == 'begin':
+                    # check for nested font start tags
+                    if len(self.font_history) > 0 :
+                        # inject a font end tag
+                        taginfo = ('end', 'font', None)
+                        htmlstr += self.processtag(taginfo)
+                    self.font_history.append((ttype, tname, tattr))
+                    # handle the current font start tag
+                    taginfo = (ttype, tname, tattr)
+                    htmlstr += self.processtag(taginfo)
+                    continue
+
+                # check for nested font tags and unnest them
+                if tname == 'font' and ttype == 'end':
+                    self.font_history.pop()
+                    # handle this font end tag
+                    taginfo = ('end', 'font', None)
+                    htmlstr += self.processtag(taginfo)
+                    # check if we were nested
+                    if len(self.font_history) > 0:
+                        # inject a copy of the most recent font start tag from history
+                        taginfo = self.font_history[-1]
+                        htmlstr += self.processtag(taginfo)
+                    continue
+
+                # keep track of nesting path
+                if ttype == 'begin':
+                    self.path.append(tname)
+                elif ttype == 'end':
+                    if tname != self.path[-1]:
+                        print('improper nesting: ', self.path, tname, ttype)
+                        if tname not in self.path:
+                            # handle case of end tag with no beginning by injecting empty begin tag
+                            taginfo = ('begin', tname, None)
+                            htmlstr += self.processtag(taginfo)
+                            print("     - fixed by injecting empty start tag ", tname)
+                            self.path.append(tname)
+                        elif len(self.path) >  1 and tname == self.path[-2]:
+                            # handle case of dangling missing end
+                            taginfo = ('end', self.path[-1], None)
+                            htmlstr += self.processtag(taginfo)
+                            print("     - fixed by injecting end tag ", self.path[-1])
+                            self.path.pop()
+                    self.path.pop()
+
+                if tname == 'removeme:{0}'.format(tname):
+                    if ttype in ('begin', 'single', 'single_ext'):
+                        skip = True
+                    else:
+                        skip = False
+                else:
+                    taginfo = (ttype, tname, tattr)
+                    htmlstr += self.processtag(taginfo)
+
+                # handle potential issue of multiple html, head, and body sections
+                if tname == 'html' and ttype == 'begin' and not html_done:
+                    htmlstr += '\n'
+                    html_done = True
+
+                if tname == 'head' and ttype == 'begin' and not head_done:
+                    htmlstr += '\n'
+                    # also add in metadata and style link tags
+                    htmlstr += self.meta
+                    htmlstr += '<link href="styles.css" rel="stylesheet" type="text/css" />\n'
+                    head_done = True
+
+                if tname == 'body' and ttype == 'begin' and not body_done:
+                    htmlstr += '\n'
+                    body_done = True
+
+        # handle issue of possibly missing html, head, and body tags
+        # I have not seen this but the original did something like this so ...
+        if not body_done:
+            htmlstr = '<body>\n' + htmlstr + '</body>\n'
+        if not head_done:
+            headstr = '<head>\n'
+            headstr += self.meta
+            headstr += '<link href="styles.css" rel="stylesheet" type="text/css" />\n'
+            headstr += '</head>\n'
+            htmlstr = headstr + htmlstr
+        if not html_done:
+            htmlstr = '<html>\n' + htmlstr + '</html>\n'
+
+        # finally add DOCTYPE info
+        htmlstr = '<?xml version="1.0"?>\n<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n' + htmlstr
+
+        css = self.base_css_rules
+        for cls, rule in self.tag_css_rules.items():
+            css += '.%s { %s }\n' % (cls, rule)
+
+        return (htmlstr, css, self.cssname)
+
+    def ensure_unit(self, raw, unit='px'):
+        if re.search(r'\d+$', raw) is not None:
+            raw += unit
+        return raw
+
+    # flatten possibly modified tag back to string
+    def taginfo_tostring(self, taginfo):
+        (ttype, tname, tattr) = taginfo
+        if ttype is None or tname is None:
+            return ''
+        if ttype == 'end':
+            return '</%s>' % tname
+        if ttype in SPECIAL_HANDLING_TYPES and tattr is not None and 'special' in tattr:
+            info = tattr['special']
+            if ttype == 'comment':
+                return '<%s %s-->' % (tname, info)
+            else:
+                return '<%s %s>' % (tname, info)
+        res = []
+        res.append('<%s' % tname)
+        if tattr is not None:
+            for key in tattr:
+                res.append(' %s="%s"' % (key, tattr[key]))
+        if ttype == 'single':
+            res.append('/>')
+        elif ttype == 'single_ext':
+            res.append(' />')
+        else :
+            res.append('>')
+        return "".join(res)
+
+    # routines to convert from mobi ml tags atributes to xhtml attributes and styles
+    def processtag(self, taginfo):
+        # Converting mobi font sizes to numerics
+        size_map = {
+            'xx-small': '1',
+            'x-small': '2',
+            'small': '3',
+            'medium': '4',
+            'large': '5',
+            'x-large': '6',
+            'xx-large': '7',
+            }
+
+        size_to_em_map = {
+            '1': '.65em',
+            '2': '.75em',
+            '3': '1em',
+            '4': '1.125em',
+            '5': '1.25em',
+            '6': '1.5em',
+            '7': '2em',
+            }
+
+        # current tag to work on
+        (ttype, tname, tattr) = taginfo
+        if not tattr:
+            tattr = {}
+
+        styles = []
+
+        if tname is None or tname.startswith('removeme'):
+            return ''
+
+        # have not seen an example of this yet so keep it here to be safe
+        # until this is better understood
+        if tname in ('country-region', 'place', 'placetype', 'placename',
+                'state', 'city', 'street', 'address', 'content'):
+            tname = 'div' if tname == 'content' else 'span'
+            for key in tattr:
+                tattr.pop(key)
+
+        # handle general case of style, height, width, bgcolor in any tag
+        if 'style' in tattr:
+            style = tattr.pop('style').strip()
+            if style:
+                styles.append(style)
+
+        if 'align' in tattr:
+            align = tattr.pop('align').strip()
+            if align:
+                if tname in ('table', 'td', 'tr'):
+                    pass
+                else:
+                    styles.append('text-align: %s' % align)
+
+        if 'height' in tattr:
+            height = tattr.pop('height').strip()
+            if height and '<' not in height and '>' not in height and re.search(r'\d+', height):
+                if tname in ('table', 'td', 'tr'):
+                    pass
+                elif tname == 'img':
+                    tattr['height'] = height
+                else:
+                    styles.append('margin-top: %s' % self.ensure_unit(height))
+
+        if 'width' in tattr:
+            width = tattr.pop('width').strip()
+            if width and re.search(r'\d+', width):
+                if tname in ('table', 'td', 'tr'):
+                    pass
+                elif tname == 'img':
+                    tattr['width'] =  width
+                else:
+                    styles.append('text-indent: %s' % self.ensure_unit(width))
+                    if width.startswith('-'):
+                        styles.append('margin-left: %s' % self.ensure_unit(width[1:]))
+
+        if 'bgcolor' in tattr:
+            # no proprietary html allowed
+            if tname == 'div':
+                del tattr['bgcolor']
+
+        elif tname == 'font':
+            # Change font tags to span tags
+            tname = 'span'
+            if ttype in ('begin', 'single', 'single_ext'):
+                # move the face attribute to css font-family
+                if 'face' in tattr:
+                    face = tattr.pop('face').strip()
+                    styles.append('font-family: "%s"' % face)
+
+                    # Monitor the constantly changing font sizes, change them to ems and move
+                    # them to css. The following will work for 'flat' font tags, but nested font tags
+                    # will cause things to go wonky. Need to revert to the parent font tag's size
+                    # when a closing tag is encountered.
+                if 'size' in tattr:
+                    sz = tattr.pop('size').strip().lower()
+                    try:
+                        float(sz)
+                    except ValueError:
+                        if sz in size_map:
+                            sz = size_map[sz]
+                    else:
+                        if sz.startswith('-') or sz.startswith('+'):
+                            sz = self.current_font_size + float(sz)
+                            if sz > 7:
+                                sz = 7
+                            elif sz < 1:
+                                sz = 1
+                            sz = str(int(sz))
+                    styles.append('font-size: %s' % size_to_em_map[sz])
+                    self.current_font_size = int(sz)
+
+        elif tname == 'img':
+            for attr in ('width', 'height'):
+                if attr in tattr:
+                    val = tattr[attr]
+                    if val.lower().endswith('em'):
+                        try:
+                            nval = float(val[:-2])
+                            nval *= 16 * (168.451/72)  # Assume this was set using the Kindle profile
+                            tattr[attr] = "%dpx"%int(nval)
+                        except:
+                            del tattr[attr]
+                    elif val.lower().endswith('%'):
+                        del tattr[attr]
+
+        # convert the anchor tags
+        if 'filepos-id' in tattr:
+            tattr['id'] = tattr.pop('filepos-id')
+            if 'name' in tattr and tattr['name'] != tattr['id']:
+                tattr['name'] = tattr['id']
+
+        if 'filepos' in tattr:
+            filepos = tattr.pop('filepos')
+            try:
+                tattr['href'] = "#filepos%d" % int(filepos)
+            except ValueError:
+                pass
+
+        if styles:
+            ncls = None
+            rule = '; '.join(styles)
+            for sel, srule in self.tag_css_rules.items():
+                if srule == rule:
+                    ncls = sel
+                    break
+            if ncls is None:
+                self.tag_css_rule_cnt += 1
+                ncls = 'rule_%d' % self.tag_css_rule_cnt
+                self.tag_css_rules[ncls] = rule
+            cls = tattr.get('class', '')
+            cls = cls + (' ' if cls else '') + ncls
+            tattr['class'] = cls
+
+        # convert updated tag back to string representation
+        if len(tattr) == 0:
+            tattr = None
+        taginfo = (ttype, tname, tattr)
+        return self.taginfo_tostring(taginfo)
+
+''' main only left in for testing outside of plugin '''
+
+def main(argv=sys.argv):
+    if len(argv) != 2:
+        return 1
+    else:
+        infile = argv[1]
+
+    try:
+        print('Converting Mobi Markup Language to XHTML')
+        mlc = MobiMLConverter(infile)
+        print('Processing ...')
+        htmlstr, css, cssname = mlc.processml()
+        outname = infile.rsplit('.',1)[0] + '_converted.html'
+        open(outname, 'w').write(htmlstr)
+        open(cssname, 'w').write(css)
+        print('Completed')
+        print('XHTML version of book can be found at: ' + outname)
+
+    except ValueError as e:
+        print("Error: %s" % e)
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/epy_reader/tools/KindleUnpack/unipath.py b/src/epy_reader/tools/KindleUnpack/unipath.py
new file mode 100755
index 0000000..2416279
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/unipath.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this list of
+# conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice, this list
+# of conditions and the following disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+from .compatibility_utils import PY2, text_type, binary_type
+
+import sys
+import os
+
+# utility routines to convert all paths to be full unicode
+
+# Under Python 2, if a bytestring, try to convert it to unicode using sys.getfilesystemencoding
+# Under Python 3, if bytes, try to convert it to unicode using os.fsencode() to decode it
+
+# Mac OS X and Windows will happily support full unicode paths
+# Linux can support full unicode paths but allows arbitrary byte paths which may be inconsistent with unicode
+
+fsencoding = sys.getfilesystemencoding()
+
+def pathof(s, enc=fsencoding):
+    if s is None:
+        return None
+    if isinstance(s, text_type):
+        return s
+    if isinstance(s, binary_type):
+        try:
+            return s.decode(enc)
+        except:
+            pass
+    return s
+
+def exists(s):
+    return os.path.exists(pathof(s))
+
+def isfile(s):
+    return os.path.isfile(pathof(s))
+
+def isdir(s):
+    return os.path.isdir(pathof(s))
+
+def mkdir(s):
+    return os.mkdir(pathof(s))
+
+def listdir(s):
+    rv = []
+    for file in os.listdir(pathof(s)):
+        rv.append(pathof(file))
+    return rv
+
+def getcwd():
+    if PY2:
+        return os.getcwdu()
+    return os.getcwd()
+
+def walk(top):
+    top = pathof(top)
+    rv = []
+    for base, dnames, names in os.walk(top):
+        base = pathof(base)
+        for name in names:
+            name = pathof(name)
+            rv.append(relpath(os.path.join(base, name), top))
+    return rv
+
+def relpath(path, start=None):
+    return os.path.relpath(pathof(path) , pathof(start))
+
+def abspath(path):
+    return os.path.abspath(pathof(path))
diff --git a/src/epy_reader/tools/KindleUnpack/unpack_structure.py b/src/epy_reader/tools/KindleUnpack/unpack_structure.py
new file mode 100644
index 0000000..2e66eb8
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/unpack_structure.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+from .compatibility_utils import text_type
+
+from . import unipath
+from .unipath import pathof
+
+DUMP = False
+""" Set to True to dump all possible information. """
+
+import os
+
+import re
+# note: re requites the pattern to be the exact same type as the data to be searched in python3
+# but u"" is not allowed for the pattern itself only b""
+
+import zipfile
+import binascii
+from .mobi_utils import mangle_fonts
+
+class unpackException(Exception):
+    pass
+
+class ZipInfo(zipfile.ZipInfo):
+
+    def __init__(self, *args, **kwargs):
+        if 'compress_type' in kwargs:
+            compress_type = kwargs.pop('compress_type')
+        super(ZipInfo, self).__init__(*args, **kwargs)
+        self.compress_type = compress_type
+
+class fileNames:
+
+    def __init__(self, infile, outdir):
+        self.infile = infile
+        self.outdir = outdir
+        if not unipath.exists(self.outdir):
+            unipath.mkdir(self.outdir)
+        self.mobi7dir = os.path.join(self.outdir,'mobi7')
+        if not unipath.exists(self.mobi7dir):
+            unipath.mkdir(self.mobi7dir)
+        self.imgdir = os.path.join(self.mobi7dir, 'Images')
+        if not unipath.exists(self.imgdir):
+            unipath.mkdir(self.imgdir)
+        self.hdimgdir = os.path.join(self.outdir,'HDImages')
+        if not unipath.exists(self.hdimgdir):
+            unipath.mkdir(self.hdimgdir)
+        self.outbase = os.path.join(self.outdir, os.path.splitext(os.path.split(infile)[1])[0])
+
+    def getInputFileBasename(self):
+        return os.path.splitext(os.path.basename(self.infile))[0]
+
+    def makeK8Struct(self):
+        self.k8dir = os.path.join(self.outdir,'mobi8')
+        if not unipath.exists(self.k8dir):
+            unipath.mkdir(self.k8dir)
+        self.k8metainf = os.path.join(self.k8dir,'META-INF')
+        if not unipath.exists(self.k8metainf):
+            unipath.mkdir(self.k8metainf)
+        self.k8oebps = os.path.join(self.k8dir,'OEBPS')
+        if not unipath.exists(self.k8oebps):
+            unipath.mkdir(self.k8oebps)
+        self.k8images = os.path.join(self.k8oebps,'Images')
+        if not unipath.exists(self.k8images):
+            unipath.mkdir(self.k8images)
+        self.k8fonts = os.path.join(self.k8oebps,'Fonts')
+        if not unipath.exists(self.k8fonts):
+            unipath.mkdir(self.k8fonts)
+        self.k8styles = os.path.join(self.k8oebps,'Styles')
+        if not unipath.exists(self.k8styles):
+            unipath.mkdir(self.k8styles)
+        self.k8text = os.path.join(self.k8oebps,'Text')
+        if not unipath.exists(self.k8text):
+            unipath.mkdir(self.k8text)
+
+    # recursive zip creation support routine
+    def zipUpDir(self, myzip, tdir, localname):
+        currentdir = tdir
+        if localname != "":
+            currentdir = os.path.join(currentdir,localname)
+        list = unipath.listdir(currentdir)
+        for file in list:
+            afilename = file
+            localfilePath = os.path.join(localname, afilename)
+            realfilePath = os.path.join(currentdir,file)
+            if unipath.isfile(realfilePath):
+                myzip.write(pathof(realfilePath), pathof(localfilePath), zipfile.ZIP_DEFLATED)
+            elif unipath.isdir(realfilePath):
+                self.zipUpDir(myzip, tdir, localfilePath)
+
+    def makeEPUB(self, usedmap, obfuscate_data, uid):
+        bname = os.path.join(self.k8dir, self.getInputFileBasename() + '.epub')
+        # Create an encryption key for Adobe font obfuscation
+        # based on the epub's uid
+        if isinstance(uid,text_type):
+            uid = uid.encode('ascii')
+        if obfuscate_data:
+            key = re.sub(br'[^a-fA-F0-9]', b'', uid)
+            key = binascii.unhexlify((key + key)[:32])
+
+        # copy over all images and fonts that are actually used in the ebook
+        # and remove all font files from mobi7 since not supported
+        imgnames = unipath.listdir(self.imgdir)
+        for name in imgnames:
+            if usedmap.get(name,'not used') == 'used':
+                filein = os.path.join(self.imgdir,name)
+                if name.endswith(".ttf"):
+                    fileout = os.path.join(self.k8fonts,name)
+                elif name.endswith(".otf"):
+                    fileout = os.path.join(self.k8fonts,name)
+                elif name.endswith(".failed"):
+                    fileout = os.path.join(self.k8fonts,name)
+                else:
+                    fileout = os.path.join(self.k8images,name)
+                data = b''
+                with open(pathof(filein),'rb') as f:
+                    data = f.read()
+                if obfuscate_data:
+                    if name in obfuscate_data:
+                        data = mangle_fonts(key, data)
+                open(pathof(fileout),'wb').write(data)
+                if name.endswith(".ttf") or name.endswith(".otf"):
+                    os.remove(pathof(filein))
+
+        # opf file name hard coded to "content.opf"
+        container = '<?xml version="1.0" encoding="UTF-8"?>\n'
+        container += '<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">\n'
+        container += '    <rootfiles>\n'
+        container += '<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>'
+        container += '    </rootfiles>\n</container>\n'
+        fileout = os.path.join(self.k8metainf,'container.xml')
+        with open(pathof(fileout),'wb') as f:
+            f.write(container.encode('utf-8'))
+
+        if obfuscate_data:
+            encryption = '<encryption xmlns="urn:oasis:names:tc:opendocument:xmlns:container" \
+xmlns:enc="http://www.w3.org/2001/04/xmlenc#" xmlns:deenc="http://ns.adobe.com/digitaleditions/enc">\n'
+            for font in obfuscate_data:
+                encryption += '  <enc:EncryptedData>\n'
+                encryption += '    <enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/>\n'
+                encryption += '    <enc:CipherData>\n'
+                encryption += '      <enc:CipherReference URI="OEBPS/Fonts/' + font + '"/>\n'
+                encryption += '    </enc:CipherData>\n'
+                encryption += '  </enc:EncryptedData>\n'
+            encryption += '</encryption>\n'
+            fileout = os.path.join(self.k8metainf,'encryption.xml')
+            with open(pathof(fileout),'wb') as f:
+                f.write(encryption.encode('utf-8'))
+
+        # ready to build epub
+        self.outzip = zipfile.ZipFile(pathof(bname), 'w')
+
+        # add the mimetype file uncompressed
+        mimetype = b'application/epub+zip'
+        fileout = os.path.join(self.k8dir,'mimetype')
+        with open(pathof(fileout),'wb') as f:
+            f.write(mimetype)
+        nzinfo = ZipInfo('mimetype', compress_type=zipfile.ZIP_STORED)
+        nzinfo.external_attr = 0o600 << 16 # make this a normal file
+        self.outzip.writestr(nzinfo, mimetype)
+        self.zipUpDir(self.outzip,self.k8dir,'META-INF')
+        self.zipUpDir(self.outzip,self.k8dir,'OEBPS')
+        self.outzip.close()
diff --git a/src/epy_reader/tools/__init__.py b/src/epy_reader/tools/__init__.py
new file mode 100644
index 0000000..d97cba1
--- /dev/null
+++ b/src/epy_reader/tools/__init__.py
@@ -0,0 +1,3 @@
+__all__ = ["unpack_kindle_book"]
+
+from epy_reader.tools.KindleUnpack.kindleunpack import unpackBook as unpack_kindle_book
diff --git a/src/epy_reader/utils.py b/src/epy_reader/utils.py
new file mode 100644
index 0000000..5bba7f6
--- /dev/null
+++ b/src/epy_reader/utils.py
@@ -0,0 +1,377 @@
+import curses
+import os
+import re
+import sys
+import textwrap
+from functools import wraps
+from typing import List, Mapping, Optional, Sequence, Tuple, Union
+
+from epy_reader.ebooks import URL, Azw, Ebook, Epub, FictionBook, Mobi
+from epy_reader.lib import is_url, tuple_subtract
+from epy_reader.models import Key, LettersCount, NoUpdate, ReadingState, TextStructure, TocEntry
+from epy_reader.parser import parse_html
+from epy_reader.speakers import SpeakerBaseModel, SpeakerMimic, SpeakerPico
+
+
+def get_ebook_obj(filepath: str) -> Ebook:
+    file_ext = os.path.splitext(filepath)[1].lower()
+    if is_url(filepath):
+        return URL(filepath)
+    elif file_ext in {".epub", ".epub3"}:
+        return Epub(filepath)
+    elif file_ext == ".fb2":
+        return FictionBook(filepath)
+    elif file_ext == ".mobi":
+        return Mobi(filepath)
+    elif file_ext in {".azw", ".azw3"}:
+        return Azw(filepath)
+    else:
+        sys.exit("ERROR: Format not supported. (Supported: epub, fb2)")
+
+
+def safe_curs_set(state: int) -> None:
+    try:
+        curses.curs_set(state)
+    except:
+        return
+
+
+def find_current_content_index(
+    toc_entries: Tuple[TocEntry, ...], toc_secid: Mapping[str, int], index: int, y: int
+) -> int:
+    ntoc = 0
+    for n, toc_entry in enumerate(toc_entries):
+        if toc_entry.content_index <= index:
+            if y >= toc_secid.get(toc_entry.section, 0):  # type: ignore
+                ntoc = n
+    return ntoc
+
+
+def pgup(current_row: int, window_height: int, counter: int = 1) -> int:
+    if current_row >= (window_height) * counter:
+        return current_row - (window_height) * counter
+    else:
+        return 0
+
+
+def pgdn(current_row: int, total_lines: int, window_height: int, counter: int = 1) -> int:
+    if current_row + (window_height * counter) <= total_lines - window_height:
+        return current_row + (window_height * counter)
+    else:
+        current_row = total_lines - window_height
+        if current_row < 0:
+            return 0
+        return current_row
+
+
+def pgend(total_lines: int, window_height: int) -> int:
+    if total_lines - window_height >= 0:
+        return total_lines - window_height
+    else:
+        return 0
+
+
+def choice_win(allowdel=False):
+    """
+    Conjure options window by wrapping a window function
+    which has a return type of tuple in the form of
+    (title, list_to_chose, initial_active_index, windows_key_to_toggle)
+    and return tuple of (returned_key, chosen_index, chosen_index_to_delete)
+    """
+
+    def inner_f(listgen):
+        @wraps(listgen)
+        def wrapper(self, *args, **kwargs):
+            rows, cols = self.screen.getmaxyx()
+            hi, wi = rows - 4, cols - 4
+            Y, X = 2, 2
+            chwin = curses.newwin(hi, wi, Y, X)
+            if self.is_color_supported:
+                chwin.bkgd(self.screen.getbkgd())
+
+            title, ch_list, index, key = listgen(self, *args, **kwargs)
+
+            if len(title) > cols - 8:
+                title = title[: cols - 8]
+
+            chwin.box()
+            chwin.keypad(True)
+            chwin.addstr(1, 2, title)
+            chwin.addstr(2, 2, "-" * len(title))
+            if allowdel:
+                chwin.addstr(3, 2, "HINT: Press 'd' to delete.")
+            key_chwin = 0
+
+            totlines = len(ch_list)
+            chwin.refresh()
+            pad = curses.newpad(totlines, wi - 2)
+            if self.is_color_supported:
+                pad.bkgd(self.screen.getbkgd())
+
+            pad.keypad(True)
+
+            padhi = rows - 5 - Y - 4 + 1 - (1 if allowdel else 0)
+            # padhi = rows - 5 - Y - 4 + 1 - 1
+            y = 0
+            if index in range(padhi // 2, totlines - padhi // 2):
+                y = index - padhi // 2 + 1
+            span = []
+
+            for n, i in enumerate(ch_list):
+                # strs = "  " + str(n+1).rjust(d) + " " + i[0]
+                # remove newline from choice entries
+                # mostly happens in FictionBook (.fb2) format
+                strs = "  " + i.replace("\n", " ")
+                strs = strs[0 : wi - 3]
+                pad.addstr(n, 0, strs)
+                span.append(len(strs))
+
+            countstring = ""
+            while key_chwin not in self.keymap.Quit + key:
+                if countstring == "":
+                    count = 1
+                else:
+                    count = int(countstring)
+                if key_chwin in tuple(Key(i) for i in range(48, 58)):  # i.e., k is a numeral
+                    countstring = countstring + key_chwin.char
+                else:
+                    if key_chwin in self.keymap.ScrollUp + self.keymap.PageUp:
+                        index -= count
+                        if index < 0:
+                            index = 0
+                    elif key_chwin in self.keymap.ScrollDown or key_chwin in self.keymap.PageDown:
+                        index += count
+                        if index + 1 >= totlines:
+                            index = totlines - 1
+                    elif key_chwin in self.keymap.Follow:
+                        chwin.clear()
+                        chwin.refresh()
+                        return None, index, None
+                    elif key_chwin in self.keymap.BeginningOfCh:
+                        index = 0
+                    elif key_chwin in self.keymap.EndOfCh:
+                        index = totlines - 1
+                    elif key_chwin == Key("D") and allowdel:
+                        return None, (0 if index == 0 else index - 1), index
+                        # chwin.redrawwin()
+                        # chwin.refresh()
+                    elif key_chwin == Key("d") and allowdel:
+                        resk, resp, _ = self.show_win_options(
+                            "Delete '{}'?".format(ch_list[index]),
+                            ["(Y)es", "(N)o"],
+                            0,
+                            (Key("n"),),
+                        )
+                        if resk is not None:
+                            key_chwin = resk
+                            continue
+                        elif resp == 0:
+                            return None, (0 if index == 0 else index - 1), index
+                        chwin.redrawwin()
+                        chwin.refresh()
+                    elif key_chwin in {Key(i) for i in ["Y", "y", "N", "n"]} and ch_list == [
+                        "(Y)es",
+                        "(N)o",
+                    ]:
+                        if key_chwin in {Key("Y"), Key("y")}:
+                            return None, 0, None
+                        else:
+                            return None, 1, None
+                    elif key_chwin in tuple_subtract(self._win_keys, key):
+                        chwin.clear()
+                        chwin.refresh()
+                        return key_chwin, index, None
+                    countstring = ""
+
+                while index not in range(y, y + padhi):
+                    if index < y:
+                        y -= 1
+                    else:
+                        y += 1
+
+                for n in range(totlines):
+                    att = curses.A_REVERSE if index == n else curses.A_NORMAL
+                    pre = ">>" if index == n else "  "
+                    pad.addstr(n, 0, pre)
+                    pad.chgat(n, 0, span[n], pad.getbkgd() | att)
+
+                pad.refresh(y, 0, Y + 4 + (1 if allowdel else 0), X + 4, rows - 5, cols - 6)
+                # pad.refresh(y, 0, Y+5, X+4, rows - 5, cols - 6)
+                key_chwin = Key(chwin.getch())
+                if key_chwin == Key(curses.KEY_MOUSE):
+                    mouse_event = curses.getmouse()
+                    if mouse_event[4] == curses.BUTTON4_PRESSED:
+                        key_chwin = self.keymap.ScrollUp[0]
+                    elif mouse_event[4] == 2097152:
+                        key_chwin = self.keymap.ScrollDown[0]
+                    elif mouse_event[4] == curses.BUTTON1_DOUBLE_CLICKED:
+                        if (
+                            mouse_event[2] >= 6
+                            and mouse_event[2] < rows - 4
+                            and mouse_event[2] < 6 + totlines
+                        ):
+                            index = mouse_event[2] - 6 + y
+                        key_chwin = self.keymap.Follow[0]
+                    elif (
+                        mouse_event[4] == curses.BUTTON1_CLICKED
+                        and mouse_event[2] >= 6
+                        and mouse_event[2] < rows - 4
+                        and mouse_event[2] < 6 + totlines
+                    ):
+                        if index == mouse_event[2] - 6 + y:
+                            key_chwin = self.keymap.Follow[0]
+                            continue
+                        index = mouse_event[2] - 6 + y
+                    elif mouse_event[4] == curses.BUTTON3_CLICKED:
+                        key_chwin = self.keymap.Quit[0]
+
+            chwin.clear()
+            chwin.refresh()
+            return None, None, None
+
+        return wrapper
+
+    return inner_f
+
+
+def text_win(textfunc):
+    @wraps(textfunc)
+    def wrapper(self, *args, **kwargs) -> Union[NoUpdate, Key]:
+        rows, cols = self.screen.getmaxyx()
+        hi, wi = rows - 4, cols - 4
+        Y, X = 2, 2
+        textw = curses.newwin(hi, wi, Y, X)
+        if self.is_color_supported:
+            textw.bkgd(self.screen.getbkgd())
+
+        title, raw_texts, key = textfunc(self, *args, **kwargs)
+
+        if len(title) > cols - 8:
+            title = title[: cols - 8]
+
+        texts = []
+        for i in raw_texts.splitlines():
+            texts += textwrap.wrap(i, wi - 6, drop_whitespace=False)
+
+        textw.box()
+        textw.keypad(True)
+        textw.addstr(1, 2, title)
+        textw.addstr(2, 2, "-" * len(title))
+        key_textw: Union[NoUpdate, Key] = NoUpdate()
+
+        totlines = len(texts)
+
+        pad = curses.newpad(totlines, wi - 2)
+        if self.is_color_supported:
+            pad.bkgd(self.screen.getbkgd())
+
+        pad.keypad(True)
+        for n, i in enumerate(texts):
+            pad.addstr(n, 0, i)
+        y = 0
+        textw.refresh()
+        pad.refresh(y, 0, Y + 4, X + 4, rows - 5, cols - 6)
+        padhi = rows - 8 - Y
+
+        while key_textw not in self.keymap.Quit + key:
+            if key_textw in self.keymap.ScrollUp and y > 0:
+                y -= 1
+            elif key_textw in self.keymap.ScrollDown and y < totlines - hi + 6:
+                y += 1
+            elif key_textw in self.keymap.PageUp:
+                y = pgup(y, padhi)
+            elif key_textw in self.keymap.PageDown:
+                y = pgdn(y, totlines, padhi)
+            elif key_textw in self.keymap.BeginningOfCh:
+                y = 0
+            elif key_textw in self.keymap.EndOfCh:
+                y = pgend(totlines, padhi)
+            elif key_textw in tuple_subtract(self._win_keys, key):
+                textw.clear()
+                textw.refresh()
+                return key_textw
+            pad.refresh(y, 0, 6, 5, rows - 5, cols - 5)
+            key_textw = Key(textw.getch())
+
+        textw.clear()
+        textw.refresh()
+        return NoUpdate()
+
+    return wrapper
+
+
+def merge_text_structures(
+    text_structure_first: TextStructure, text_structure_second: TextStructure
+) -> TextStructure:
+    return TextStructure(
+        text_lines=text_structure_first.text_lines + text_structure_second.text_lines,
+        image_maps={**text_structure_first.image_maps, **text_structure_second.image_maps},
+        section_rows={**text_structure_first.section_rows, **text_structure_second.section_rows},
+        formatting=text_structure_first.formatting + text_structure_second.formatting,
+    )
+
+
+def construct_relative_reading_state(
+    abs_reading_state: ReadingState, totlines_per_content: Sequence[int]
+) -> ReadingState:
+    """
+    :param abs_reading_state: ReadingState absolute to whole book when Setting.Seamless==True
+    :param totlines_per_content: sequence of total lines per book content
+    :return: new ReadingState relative to per content of the book
+    """
+    index = 0
+    cumulative_contents_lines = 0
+    all_contents_lines = sum(totlines_per_content)
+    # for n, content_lines in enumerate(totlines_per_content):
+    #     cumulative_contents_lines += content_lines
+    #     if cumulative_contents_lines > abs_reading_state.row:
+    #         return
+    while True:
+        content_lines = totlines_per_content[index]
+        cumulative_contents_lines += content_lines
+        if cumulative_contents_lines > abs_reading_state.row:
+            break
+        index += 1
+
+    return ReadingState(
+        content_index=index,
+        textwidth=abs_reading_state.textwidth,
+        row=abs_reading_state.row - cumulative_contents_lines + content_lines,
+        rel_pctg=abs_reading_state.rel_pctg
+        - ((cumulative_contents_lines - content_lines) / all_contents_lines)
+        if abs_reading_state.rel_pctg
+        else None,
+        section=abs_reading_state.section,
+    )
+
+
+def count_letters(ebook: Ebook) -> LettersCount:
+    per_content_counts: List[int] = []
+    cumulative_counts: List[int] = []
+    # assert isinstance(ebook.contents, tuple)
+    for i in ebook.contents:
+        content = ebook.get_raw_text(i)
+        src_lines = parse_html(content)
+        assert isinstance(src_lines, tuple)
+        cumulative_counts.append(sum(per_content_counts))
+        per_content_counts.append(sum([len(re.sub(r"\s", "", j)) for j in src_lines]))
+
+    return LettersCount(all=sum(per_content_counts), cumulative=tuple(cumulative_counts))
+
+
+def count_letters_parallel(ebook: Ebook, child_conn) -> None:
+    child_conn.send(count_letters(ebook))
+    child_conn.close()
+
+
+def construct_speaker(
+    preferred: Optional[str] = None, args: List[str] = []
+) -> Optional[SpeakerBaseModel]:
+    available_speakers = [SpeakerMimic, SpeakerPico]
+    sorted_speakers = (
+        sorted(available_speakers, key=lambda x: int(x.cmd == preferred), reverse=True)
+        if preferred
+        else available_speakers
+    )
+    speaker = next((speaker for speaker in sorted_speakers if speaker.available), None)
+    return speaker(args) if speaker else None
-- 
cgit