Initial mobi support.

author: wustho <benawiadha@gmail.com> 2020-09-20 19:50:52 +0700
committer: wustho <benawiadha@gmail.com> 2020-09-20 19:50:52 +0700
commit: b376bc44d45213596578b9afa378c62f9743bf56 (patch)
tree: 6d6f47b100530061cd525f33874f9688e3ead8ca /epy.py
parent: 62741b8302d28405afac8f0178dfba331ce6bdc8 (diff)
download: epy-b376bc44d45213596578b9afa378c62f9743bf56.tar.gz
1 files changed, 37 insertions, 13 deletions
diff --git a/epy.py b/epy.py
index 41e0e92..c0d445c 100755
--- a/epy.py
+++ b/epy.py
@@ -14,8 +14,8 @@ Options:
 """
 
 
-__version__ = "2020.6.3"
-__license__ = "MIT"
+__version__ = "2020.9.20"
+__license__ = "GPL-3.0"
 __author__ = "Benawi Adha"
 __url__ = "https://github.com/wustho/epy"
 
@@ -227,23 +227,34 @@ class Epub:
     def get_img_bytestr(self, impath):
         return impath, self.file.read(impath)
 
+    def cleanup(self):
+        return
+
 
 class Mobi(Epub):
     def __init__(self, filemobi):
         self.path = os.path.abspath(filemobi)
-        # self.file = zipfile.ZipFile(fileepub, "r")
         self.file, _ = mobi.extract(filemobi)
-        self.toc = os.path.join(self.file, "mobi7", "toc.ncx")
         self.rootdir = os.path.join(self.file, "mobi7")
+        self.toc = os.path.join(self.rootdir, "toc.ncx")
         self.version = "2.0"
 
         self.contents = []
         self.toc_entries = [[], [], []]
 
+    def get_meta(self):
+        meta = []
+        # why self.file.read(self.rootfile) problematic
+        with open(os.path.join(self.rootdir, "content.opf")) as f:
+            cont = ET.parse(f).getroot()
+        for i in cont.findall("OPF:metadata/*", self.NS):
+            if i.text is not None:
+                meta.append([re.sub("{.*?}", "", i.tag), i.text])
+        return meta
+
     def initialize(self):
-        tmpfile = open(os.path.join(self.file, "mobi7", "content.opf"))
-        cont = ET.parse(tmpfile).getroot()
-        tmpfile.close()
+        with open(os.path.join(self.rootdir, "content.opf")) as f:
+            cont = ET.parse(f).getroot()
         manifest = []
         for i in cont.findall("OPF:manifest/*", self.NS):
             # EPUB3
@@ -267,9 +278,8 @@ class Mobi(Epub):
                     # TODO: test is break necessary
                     break
 
-        tmpfile = open(self.toc)
-        toc = ET.parse(tmpfile).getroot()
-        tmpfile.close()
+        with open(self.toc) as f:
+            toc = ET.parse(f).getroot()
         # EPUB3
         if self.version == "2.0":
             navPoints = toc.findall("DAISY:navMap//DAISY:navPoint", self.NS)
@@ -300,15 +310,18 @@ class Mobi(Epub):
         # caused by forking PROC_COUNTLETTERS
         while True:
             try:
-                tmpfile = open(chpath)
-                content = tmpfile.read()
-                tmpfile.close()
+                with open(chpath) as f:
+                    content = f.read()
                 break
             except:
                 continue
         # return content.decode("utf-8")
         return content
 
+    def cleanup(self):
+        shutil.rmtree(self.file)
+        return
+
 
 class FictionBook:
     NS = {
@@ -349,6 +362,9 @@ class FictionBook:
         imgtype = img.get("content-type").split("/")[1]
         return imgid+"."+imgtype, base64.b64decode(img.text)
 
+    def cleanup():
+        return
+
 
 class HTMLtoLines(HTMLParser):
     para = {"p", "div"}
@@ -409,6 +425,12 @@ class HTMLtoLines(HTMLParser):
                     self.text.append("[IMG:{}]".format(len(self.imgs)))
                     self.imgs.append(unquote(i[1]))
                     self.text.append("")
+        # sometimes attribute "id" is inside "startendtag"
+        # especially html from mobi module (kindleunpack fork)
+        if self.sects != {""}:
+            for i in attrs:
+                if i[1] in self.sects:
+                    self.text[-1] += " (#" + i[1] + ") "
 
     def handle_endtag(self, tag):
         if re.match("h[1-6]", tag) is not None:
@@ -1308,6 +1330,7 @@ def reader(ebook, index, width, y, pctg, sect):
                         countstring = ""
                     else:
                         savestate(ebook.path, index, width, y, y/totlines)
+                        ebook.cleanup()
                         sys.exit()
                 elif k in K["ScrollUp"]:
                     if count > 1:
@@ -1588,6 +1611,7 @@ def reader(ebook, index, width, y, pctg, sect):
                 svline = "dontsave"
     except KeyboardInterrupt:
         savestate(ebook.path, index, width, y, y/totlines)
+        ebook.cleanup()
         sys.exit()
author	wustho <benawiadha@gmail.com>	2020-09-20 19:50:52 +0700
committer	wustho <benawiadha@gmail.com>	2020-09-20 19:50:52 +0700
commit	b376bc44d45213596578b9afa378c62f9743bf56 (patch)
tree	6d6f47b100530061cd525f33874f9688e3ead8ca /epy.py
parent	62741b8302d28405afac8f0178dfba331ce6bdc8 (diff)
download	epy-b376bc44d45213596578b9afa378c62f9743bf56.tar.gz