Major refactor: breakdown epy.py script

into package project structure for easier development Squashed commit of the following: commit 01309b961a4ab32394bff0d90949b57435dfda47 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:15:04 2022 +0700 Fix missing objects commit aab2e773c30b255c81b1250b3b20967d5da40338 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:09:31 2022 +0700 Update README.md commit d4e98926bcd9b00ce0410ad71249d24e6315abc5 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:07:28 2022 +0700 Add keywords in pyproject.toml commit 432055af8245560a3ff2e046aef0b4e87da44930 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 21:04:34 2022 +0700 Bump version and deprecete setup.py commit 51dd15aab8f8ff5996f822f8378e813f0b9fb80d Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 20:56:38 2022 +0700 Formatting commit 81fb35e3b6fa0e27d79ef1da77202ed81eb99500 Author: Benawi Adha <benawiadha@gmail.com> Date: Sun Oct 2 20:55:08 2022 +0700 Fix speakers module commit 3b852e7c59b38d5a28520038e35f50a95270d2f1 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:52:46 2022 +0700 Fix circular import commit 061e8a2649dabacd28a9e2f972559475316c654c Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:39:27 2022 +0700 Run formatting commit abc2d0ab156992c63dc04745d14a69679a60accb Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:39:00 2022 +0700 Update isort and black config in pyproject commit 5dc2e41bab5b997bd719bdc1561eb51ba0c17a83 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:31:00 2022 +0700 Add app Config commit ed485a2ea8281585bf86dc5772f0c6dd9c803cc4 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:23:02 2022 +0700 Update debugpy script commit 68b0553dd4d63eb4b847132c68ea4018587fa8ec Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:14:11 2022 +0700 Connect reader to main script commit 63c3dd176f18a784a4ed2e88aa72b13d1c2b0990 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 20:11:17 2022 +0700 Implement reader commit ce5eec8fb4e1db3870a16a07541365cd777d6c4c Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:29:49 2022 +0700 Fix script in pyproject.toml commit 941e8e49f1593731fb582d92084206772b3f0442 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:28:39 2022 +0700 Rename modules commit 5a3e7f766aee774c09b3b5336f3a2968e9cb1d0c Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:28:20 2022 +0700 Rename tool method commit 3c0503ff475cb7eff8b12d3be0bda7a38efe1072 Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 19:27:03 2022 +0700 Add ebooks lib commit b5f71c3296a7d6f36454f6e1cbe84e15a45092ee Author: Benawi Adha <benawiadha@gmail.com> Date: Sat Oct 1 17:25:11 2022 +0700 Initial reorganization
author: Benawi Adha <benawiadha@gmail.com> 2022-10-02 21:22:38 +0700
committer: Benawi Adha <benawiadha@gmail.com> 2022-10-02 21:22:38 +0700
commit: 258c30d2e088cd4ab091a53794da3f93af79915d (patch)
tree: f49340bf565deb20c730358af74a01bcc231de53 /src/epy_reader/tools/KindleUnpack/compatibility_utils.py
parent: d43533f01d9d5baf5f78b71f832641382bd5962a (diff)
download: epy-258c30d2e088cd4ab091a53794da3f93af79915d.tar.gz
1 files changed, 278 insertions, 0 deletions
diff --git a/src/epy_reader/tools/KindleUnpack/compatibility_utils.py b/src/epy_reader/tools/KindleUnpack/compatibility_utils.py
new file mode 100755
index 0000000..c46c0bb
--- /dev/null
+++ b/src/epy_reader/tools/KindleUnpack/compatibility_utils.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this list of
+# conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice, this list
+# of conditions and the following disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import sys
+import codecs
+
+PY2 = sys.version_info[0] == 2
+PY3 = sys.version_info[0] == 3
+
+iswindows = sys.platform.startswith('win')
+
+try:
+    from urllib.parse import unquote
+except ImportError:
+    from urllib import unquote
+
+if PY2:
+    from HTMLParser import HTMLParser
+    _h = HTMLParser()
+elif sys.version_info[1] < 4:
+    import html.parser
+    _h = html.parser.HTMLParser()
+else:
+    import html as _h
+
+if PY3:
+    text_type = str
+    binary_type = bytes
+    # if will be printing arbitraty binary data to stdout on python 3
+    # sys.stdin = sys.stdin.detach()
+    # sys.stdout = sys.stdout.detach()
+    # sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
+else:
+    range = xrange
+    text_type = unicode
+    binary_type = str
+    # if will be printing unicode under python 2 need to protect
+    # against sys.stdout.encoding being None stupidly forcing forcing ascii encoding of unicode
+    # sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
+    # alternatively set environment variable as follows **before** launching python:  export PYTHONIOENCODING=UTF-8
+
+# NOTE: Python 3 is completely broken when accessing single bytes in bytes strings
+# (and they amazingly claim by design and no bug!)
+
+# To illustrate: this works for unicode in Python 3 and for all Python 2.X for both bytestrings and unicode
+# >>> o = '123456789'
+# >>> o[-3]
+# '7'
+# >>> type(o[-3])
+# <class 'str'>
+# >>> type(o)
+# <class 'str'>
+
+# Unfortunately, this is what Python 3 does for no sane reason and only for bytestrings
+# >>> o = b'123456789'
+# >>> o[-3]
+# 55
+# >>> type(o[-3])
+# <class 'int'>
+# >>> type(o)
+# <class 'bytes'>
+
+# This mind boggling  behaviour also happens when indexing a bytestring and/or
+# iteratoring over a bytestring.  In other words it will return an int but not
+# the byte itself!!!!!!!
+
+# The only way to access a single byte as a byte in bytestring and get the byte in both
+# Python 2 and Python 3 is to use a slice
+
+# This problem is so common there are horrible hacks floating around the net to **try**
+# to work around it, so that code that works on both Python 2 and Python 3 is possible.
+
+# So in order to write code that works on both Python 2 and Python 3
+# if you index or access a single byte and want its ord() then use the bord() function.
+# If instead you want it as a single character byte use the bchar() function
+# both of which are defined below.
+
+if PY3:
+    # Also Note: if decode a bytestring using 'latin-1' (or any other full range 0-255 encoding)
+    # in place of ascii you will get a byte value to half-word or integer value
+    # one-to-one mapping (in the 0 - 255 range)
+
+    def bchr(s):
+        return bytes([s])
+
+    def bstr(s):
+        if isinstance(s, str):
+            return bytes(s, 'latin-1')
+        else:
+            return bytes(s)
+
+    def bord(s):
+        return s
+
+    def bchar(s):
+        return bytes([s])
+
+else:
+    def bchr(s):
+        return chr(s)
+
+    def bstr(s):
+        return str(s)
+
+    def bord(s):
+        return ord(s)
+
+    def bchar(s):
+        return s
+
+if PY3:
+    # list-producing versions of the major Python iterating functions
+    def lrange(*args, **kwargs):
+        return list(range(*args, **kwargs))
+
+    def lzip(*args, **kwargs):
+        return list(zip(*args, **kwargs))
+
+    def lmap(*args, **kwargs):
+        return list(map(*args, **kwargs))
+
+    def lfilter(*args, **kwargs):
+        return list(filter(*args, **kwargs))
+else:
+    import __builtin__
+    # Python 2-builtin ranges produce lists
+    lrange = __builtin__.range
+    lzip = __builtin__.zip
+    lmap = __builtin__.map
+    lfilter = __builtin__.filter
+
+# In Python 3 you can no longer use .encode('hex') on a bytestring
+# instead use the following on both platforms
+import binascii
+def hexlify(bdata):
+    return (binascii.hexlify(bdata)).decode('ascii')
+
+# If you: import struct
+# Note:  struct pack, unpack, unpack_from all *require* bytestring format
+# data all the way up to at least Python 2.7.5, Python 3 is okay with either
+
+# If you: import re
+# note: Python 3 "re" requires the pattern to be the exact same type as the data to be
+# searched ... but u"" is not allowed for the pattern itself only b""
+# Python 2.X allows the pattern to be any type and converts it to match the data
+# and returns the same type as the data
+
+# convert string to be utf-8 encoded
+def utf8_str(p, enc='utf-8'):
+    if p is None:
+        return None
+    if isinstance(p, text_type):
+        return p.encode('utf-8')
+    if enc != 'utf-8':
+        return p.decode(enc).encode('utf-8')
+    return p
+
+# convert string to be unicode encoded
+def unicode_str(p, enc='utf-8'):
+    if p is None:
+        return None
+    if isinstance(p, text_type):
+        return p
+    return p.decode(enc)
+
+ASCII_CHARS   = set(chr(x) for x in range(128))
+URL_SAFE      = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+                    'abcdefghijklmnopqrstuvwxyz'
+                    '0123456789' '#' '_.-/~')
+IRI_UNSAFE = ASCII_CHARS - URL_SAFE
+
+# returns a quoted IRI (not a URI)
+def quoteurl(href):
+    if isinstance(href,binary_type):
+        href = href.decode('utf-8')
+    result = []
+    for char in href:
+        if char in IRI_UNSAFE:
+            char = "%%%02x" % ord(char)
+        result.append(char)
+    return ''.join(result)
+
+# unquotes url/iri
+def unquoteurl(href):
+    if isinstance(href,binary_type):
+        href = href.decode('utf-8')
+    href = unquote(href)
+    return href
+
+# unescape html
+def unescapeit(sval):
+    return _h.unescape(sval)
+
+# Python 2.X commandline parsing under Windows has been horribly broken for years!
+# Use the following code to emulate full unicode commandline parsing on Python 2
+# ie. To get  sys.argv arguments and properly encode them as unicode
+
+def unicode_argv():
+    global iswindows
+    global PY3
+    if PY3:
+        return sys.argv
+    if iswindows:
+        # Versions 2.x of Python don't support Unicode in sys.argv on
+        # Windows, with the underlying Windows API instead replacing multi-byte
+        # characters with '?'.  So use shell32.GetCommandLineArgvW to get sys.argv
+        # as a list of Unicode strings
+        from ctypes import POINTER, byref, cdll, c_int, windll
+        from ctypes.wintypes import LPCWSTR, LPWSTR
+
+        GetCommandLineW = cdll.kernel32.GetCommandLineW
+        GetCommandLineW.argtypes = []
+        GetCommandLineW.restype = LPCWSTR
+
+        CommandLineToArgvW = windll.shell32.CommandLineToArgvW
+        CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
+        CommandLineToArgvW.restype = POINTER(LPWSTR)
+
+        cmd = GetCommandLineW()
+        argc = c_int(0)
+        argv = CommandLineToArgvW(cmd, byref(argc))
+        if argc.value > 0:
+            # Remove Python executable and commands if present
+            start = argc.value - len(sys.argv)
+            return [argv[i] for i in
+                    range(start, argc.value)]
+        # this should never happen
+        return None
+    else:
+        argv = []
+        argvencoding = sys.stdin.encoding
+        if argvencoding is None:
+            argvencoding = sys.getfilesystemencoding()
+        if argvencoding is None:
+            argvencoding = 'utf-8'
+        for arg in sys.argv:
+            if isinstance(arg, text_type):
+                argv.append(arg)
+            else:
+                argv.append(arg.decode(argvencoding))
+        return argv
+
+
+# Python 2.X is broken in that it does not recognize CP65001 as UTF-8
+def add_cp65001_codec():
+    if PY2:
+        try:
+            codecs.lookup('cp65001')
+        except LookupError:
+            codecs.register(
+                lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
+    return
author	Benawi Adha <benawiadha@gmail.com>	2022-10-02 21:22:38 +0700
committer	Benawi Adha <benawiadha@gmail.com>	2022-10-02 21:22:38 +0700
commit	258c30d2e088cd4ab091a53794da3f93af79915d (patch)
tree	f49340bf565deb20c730358af74a01bcc231de53 /src/epy_reader/tools/KindleUnpack/compatibility_utils.py
parent	d43533f01d9d5baf5f78b71f832641382bd5962a (diff)
download	epy-258c30d2e088cd4ab091a53794da3f93af79915d.tar.gz