aboutsummaryrefslogtreecommitdiffstats
path: root/src/epy_reader/tools/KindleUnpack/compatibility_utils.py
diff options
context:
space:
mode:
authorBenawi Adha <benawiadha@gmail.com>2022-10-02 21:22:38 +0700
committerBenawi Adha <benawiadha@gmail.com>2022-10-02 21:22:38 +0700
commit258c30d2e088cd4ab091a53794da3f93af79915d (patch)
treef49340bf565deb20c730358af74a01bcc231de53 /src/epy_reader/tools/KindleUnpack/compatibility_utils.py
parentd43533f01d9d5baf5f78b71f832641382bd5962a (diff)
downloadepy-258c30d2e088cd4ab091a53794da3f93af79915d.tar.gz
Major refactor: breakdown epy.py script
into package project structure for easier development Squashed commit of the following: commit 01309b961a4ab32394bff0d90949b57435dfda47 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:15:04 2022 +0700 Fix missing objects commit aab2e773c30b255c81b1250b3b20967d5da40338 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:09:31 2022 +0700 Update README.md commit d4e98926bcd9b00ce0410ad71249d24e6315abc5 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:07:28 2022 +0700 Add keywords in pyproject.toml commit 432055af8245560a3ff2e046aef0b4e87da44930 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:04:34 2022 +0700 Bump version and deprecete setup.py commit 51dd15aab8f8ff5996f822f8378e813f0b9fb80d Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 20:56:38 2022 +0700 Formatting commit 81fb35e3b6fa0e27d79ef1da77202ed81eb99500 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 20:55:08 2022 +0700 Fix speakers module commit 3b852e7c59b38d5a28520038e35f50a95270d2f1 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:52:46 2022 +0700 Fix circular import commit 061e8a2649dabacd28a9e2f972559475316c654c Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:39:27 2022 +0700 Run formatting commit abc2d0ab156992c63dc04745d14a69679a60accb Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:39:00 2022 +0700 Update isort and black config in pyproject commit 5dc2e41bab5b997bd719bdc1561eb51ba0c17a83 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:31:00 2022 +0700 Add app Config commit ed485a2ea8281585bf86dc5772f0c6dd9c803cc4 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:23:02 2022 +0700 Update debugpy script commit 68b0553dd4d63eb4b847132c68ea4018587fa8ec Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:14:11 2022 +0700 Connect reader to main script commit 63c3dd176f18a784a4ed2e88aa72b13d1c2b0990 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:11:17 2022 +0700 Implement reader commit ce5eec8fb4e1db3870a16a07541365cd777d6c4c Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:29:49 2022 +0700 Fix script in pyproject.toml commit 941e8e49f1593731fb582d92084206772b3f0442 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:28:39 2022 +0700 Rename modules commit 5a3e7f766aee774c09b3b5336f3a2968e9cb1d0c Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:28:20 2022 +0700 Rename tool method commit 3c0503ff475cb7eff8b12d3be0bda7a38efe1072 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:27:03 2022 +0700 Add ebooks lib commit b5f71c3296a7d6f36454f6e1cbe84e15a45092ee Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 17:25:11 2022 +0700 Initial reorganization
Diffstat (limited to 'src/epy_reader/tools/KindleUnpack/compatibility_utils.py')
-rwxr-xr-xsrc/epy_reader/tools/KindleUnpack/compatibility_utils.py278
1 files changed, 278 insertions, 0 deletions
diff --git a/src/epy_reader/tools/KindleUnpack/compatibility_utils.py b/src/epy_reader/tools/KindleUnpack/compatibility_utils.py
new file mode 100755
index 0000000..c46c0bb
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/compatibility_utils.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this list of
+# conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice, this list
+# of conditions and the following disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import sys
+import codecs
+
+PY2 = sys.version_info[0] == 2
+PY3 = sys.version_info[0] == 3
+
+iswindows = sys.platform.startswith('win')
+
+try:
+ from urllib.parse import unquote
+except ImportError:
+ from urllib import unquote
+
+if PY2:
+ from HTMLParser import HTMLParser
+ _h = HTMLParser()
+elif sys.version_info[1] < 4:
+ import html.parser
+ _h = html.parser.HTMLParser()
+else:
+ import html as _h
+
+if PY3:
+ text_type = str
+ binary_type = bytes
+ # if will be printing arbitraty binary data to stdout on python 3
+ # sys.stdin = sys.stdin.detach()
+ # sys.stdout = sys.stdout.detach()
+ # sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
+else:
+ range = xrange
+ text_type = unicode
+ binary_type = str
+ # if will be printing unicode under python 2 need to protect
+ # against sys.stdout.encoding being None stupidly forcing forcing ascii encoding of unicode
+ # sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
+ # alternatively set environment variable as follows **before** launching python: export PYTHONIOENCODING=UTF-8
+
+# NOTE: Python 3 is completely broken when accessing single bytes in bytes strings
+# (and they amazingly claim by design and no bug!)
+
+# To illustrate: this works for unicode in Python 3 and for all Python 2.X for both bytestrings and unicode
+# >>> o = '123456789'
+# >>> o[-3]
+# '7'
+# >>> type(o[-3])
+# <class 'str'>
+# >>> type(o)
+# <class 'str'>
+
+# Unfortunately, this is what Python 3 does for no sane reason and only for bytestrings
+# >>> o = b'123456789'
+# >>> o[-3]
+# 55
+# >>> type(o[-3])
+# <class 'int'>
+# >>> type(o)
+# <class 'bytes'>
+
+# This mind boggling behaviour also happens when indexing a bytestring and/or
+# iteratoring over a bytestring. In other words it will return an int but not
+# the byte itself!!!!!!!
+
+# The only way to access a single byte as a byte in bytestring and get the byte in both
+# Python 2 and Python 3 is to use a slice
+
+# This problem is so common there are horrible hacks floating around the net to **try**
+# to work around it, so that code that works on both Python 2 and Python 3 is possible.
+
+# So in order to write code that works on both Python 2 and Python 3
+# if you index or access a single byte and want its ord() then use the bord() function.
+# If instead you want it as a single character byte use the bchar() function
+# both of which are defined below.
+
+if PY3:
+ # Also Note: if decode a bytestring using 'latin-1' (or any other full range 0-255 encoding)
+ # in place of ascii you will get a byte value to half-word or integer value
+ # one-to-one mapping (in the 0 - 255 range)
+
+ def bchr(s):
+ return bytes([s])
+
+ def bstr(s):
+ if isinstance(s, str):
+ return bytes(s, 'latin-1')
+ else:
+ return bytes(s)
+
+ def bord(s):
+ return s
+
+ def bchar(s):
+ return bytes([s])
+
+else:
+ def bchr(s):
+ return chr(s)
+
+ def bstr(s):
+ return str(s)
+
+ def bord(s):
+ return ord(s)
+
+ def bchar(s):
+ return s
+
+if PY3:
+ # list-producing versions of the major Python iterating functions
+ def lrange(*args, **kwargs):
+ return list(range(*args, **kwargs))
+
+ def lzip(*args, **kwargs):
+ return list(zip(*args, **kwargs))
+
+ def lmap(*args, **kwargs):
+ return list(map(*args, **kwargs))
+
+ def lfilter(*args, **kwargs):
+ return list(filter(*args, **kwargs))
+else:
+ import __builtin__
+ # Python 2-builtin ranges produce lists
+ lrange = __builtin__.range
+ lzip = __builtin__.zip
+ lmap = __builtin__.map
+ lfilter = __builtin__.filter
+
+# In Python 3 you can no longer use .encode('hex') on a bytestring
+# instead use the following on both platforms
+import binascii
+def hexlify(bdata):
+ return (binascii.hexlify(bdata)).decode('ascii')
+
+# If you: import struct
+# Note: struct pack, unpack, unpack_from all *require* bytestring format
+# data all the way up to at least Python 2.7.5, Python 3 is okay with either
+
+# If you: import re
+# note: Python 3 "re" requires the pattern to be the exact same type as the data to be
+# searched ... but u"" is not allowed for the pattern itself only b""
+# Python 2.X allows the pattern to be any type and converts it to match the data
+# and returns the same type as the data
+
+# convert string to be utf-8 encoded
+def utf8_str(p, enc='utf-8'):
+ if p is None:
+ return None
+ if isinstance(p, text_type):
+ return p.encode('utf-8')
+ if enc != 'utf-8':
+ return p.decode(enc).encode('utf-8')
+ return p
+
+# convert string to be unicode encoded
+def unicode_str(p, enc='utf-8'):
+ if p is None:
+ return None
+ if isinstance(p, text_type):
+ return p
+ return p.decode(enc)
+
+ASCII_CHARS = set(chr(x) for x in range(128))
+URL_SAFE = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ 'abcdefghijklmnopqrstuvwxyz'
+ '0123456789' '#' '_.-/~')
+IRI_UNSAFE = ASCII_CHARS - URL_SAFE
+
+# returns a quoted IRI (not a URI)
+def quoteurl(href):
+ if isinstance(href,binary_type):
+ href = href.decode('utf-8')
+ result = []
+ for char in href:
+ if char in IRI_UNSAFE:
+ char = "%%%02x" % ord(char)
+ result.append(char)
+ return ''.join(result)
+
+# unquotes url/iri
+def unquoteurl(href):
+ if isinstance(href,binary_type):
+ href = href.decode('utf-8')
+ href = unquote(href)
+ return href
+
+# unescape html
+def unescapeit(sval):
+ return _h.unescape(sval)
+
+# Python 2.X commandline parsing under Windows has been horribly broken for years!
+# Use the following code to emulate full unicode commandline parsing on Python 2
+# ie. To get sys.argv arguments and properly encode them as unicode
+
+def unicode_argv():
+ global iswindows
+ global PY3
+ if PY3:
+ return sys.argv
+ if iswindows:
+ # Versions 2.x of Python don't support Unicode in sys.argv on
+ # Windows, with the underlying Windows API instead replacing multi-byte
+ # characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv
+ # as a list of Unicode strings
+ from ctypes import POINTER, byref, cdll, c_int, windll
+ from ctypes.wintypes import LPCWSTR, LPWSTR
+
+ GetCommandLineW = cdll.kernel32.GetCommandLineW
+ GetCommandLineW.argtypes = []
+ GetCommandLineW.restype = LPCWSTR
+
+ CommandLineToArgvW = windll.shell32.CommandLineToArgvW
+ CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
+ CommandLineToArgvW.restype = POINTER(LPWSTR)
+
+ cmd = GetCommandLineW()
+ argc = c_int(0)
+ argv = CommandLineToArgvW(cmd, byref(argc))
+ if argc.value > 0:
+ # Remove Python executable and commands if present
+ start = argc.value - len(sys.argv)
+ return [argv[i] for i in
+ range(start, argc.value)]
+ # this should never happen
+ return None
+ else:
+ argv = []
+ argvencoding = sys.stdin.encoding
+ if argvencoding is None:
+ argvencoding = sys.getfilesystemencoding()
+ if argvencoding is None:
+ argvencoding = 'utf-8'
+ for arg in sys.argv:
+ if isinstance(arg, text_type):
+ argv.append(arg)
+ else:
+ argv.append(arg.decode(argvencoding))
+ return argv
+
+
+# Python 2.X is broken in that it does not recognize CP65001 as UTF-8
+def add_cp65001_codec():
+ if PY2:
+ try:
+ codecs.lookup('cp65001')
+ except LookupError:
+ codecs.register(
+ lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
+ return