Added support for FictionBook (fb2).

author: wustho <benawiadha@gmail.com> 2020-04-17 06:21:25 +0700
committer: wustho <benawiadha@gmail.com> 2020-04-17 06:21:25 +0700
commit: 883e26bcef74a11b0f9a54ef8817bf936d0155eb (patch)
tree: befbb62031846a2e29a034b02f308cf22533dacf
parent: 769001c70705b776e4ef4c6c3d0cfbfee5697036 (diff)
download: epy-883e26bcef74a11b0f9a54ef8817bf936d0155eb.tar.gz
1 files changed, 92 insertions, 26 deletions
diff --git a/epy.py b/epy.py
index b940f3e..20ec2d4 100755
--- a/epy.py
+++ b/epy.py
@@ -14,12 +14,13 @@ Options:
 """
 
 
-__version__ = "2020.4.16"
+__version__ = "2020.4.17"
 __license__ = "MIT"
 __author__ = "Benawi Adha"
 __url__ = "https://github.com/wustho/epy"
 
 
+import base64
 import curses
 import zipfile
 import sys
@@ -202,6 +203,53 @@ class Epub:
             elif len(src) == 1:
                 self.toc_entries[2].append("")
 
+    def get_raw_text(self, chpath):
+        content = self.file.open(chpath).read()
+        return content.decode("utf-8")
+
+    def get_img_bytestr(self, impath):
+        return impath, self.file.read(impath)
+
+
+class FictionBook:
+    NS = {
+        "FB2": "http://www.gribuser.ru/xml/fictionbook/2.0"
+    }
+
+    def __init__(self, filefb):
+        self.path = os.path.abspath(filefb)
+        cont = ET.parse(filefb)
+        self.root = cont.getroot()
+
+        self.contents = []
+        self.toc_entries = [[], [], []]
+
+    def get_meta(self):
+        desc = self.root.find("FB2:description", self.NS)
+        alltags = desc.findall("*/*")
+        return [[re.sub("{.*?}", "", i.tag), " ".join(i.itertext())] for i in alltags]
+
+    def initialize(self):
+        self.contents = self.root.findall("FB2:body/*", self.NS)
+        # TODO
+        for n, i in enumerate(self.contents):
+            title = i.find("FB2:title", self.NS)
+            if title is not None:
+                self.toc_entries[0].append("".join(title.itertext()))
+                self.toc_entries[1].append(n)
+                self.toc_entries[2].append("")
+
+    def get_raw_text(self, node):
+        ET.register_namespace("", "http://www.gribuser.ru/xml/fictionbook/2.0")
+        # sys.exit(ET.tostring(node, encoding="utf8", method="html").decode("utf-8").replace("ns1:",""))
+        return ET.tostring(node, encoding="utf8", method="html").decode("utf-8").replace("ns1:","")
+
+    def get_img_bytestr(self, imgid):
+        imgid = imgid.replace("#", "")
+        img = self.root.find("*[@id='{}']".format(imgid))
+        imgtype = img.get("content-type").split("/")[1]
+        return imgid+"."+imgtype, base64.b64decode(img.text)
+
 
 class HTMLtoLines(HTMLParser):
     para = {"p", "div"}
@@ -243,7 +291,8 @@ class HTMLtoLines(HTMLParser):
             self.text[-1] += "_{"
         elif tag == "image":
             for i in attrs:
-                if i[0] == "xlink:href":
+                # if i[0] == "xlink:href":
+                if i[0].endswith("href"):
                     self.text.append("[IMG:{}]".format(len(self.imgs)))
                     self.imgs.append(unquote(i[1]))
         if self.sects != {""}:
@@ -645,6 +694,16 @@ def input_prompt(prompt):
         return
 
 
+def det_ebook_cls(file):
+    filext = os.path.splitext(file)[1]
+    if filext == ".epub":
+        return Epub(file)
+    elif filext == ".fb2":
+        return FictionBook(file)
+    else:
+        sys.exit("ERR: Format not supported. (Supported: epub, fb2)")
+
+
 def dots_path(curr, tofi):
     candir = curr.split("/")
     tofi = tofi.split("/")
@@ -703,12 +762,13 @@ def find_media_viewer():
         VWR += " open"
 
 
-def open_media(scr, epub, src):
-    sfx = os.path.splitext(src)[1]
+def open_media(scr, name, bstr):
+    sfx = os.path.splitext(name)[1]
     fd, path = tempfile.mkstemp(suffix=sfx)
     try:
         with os.fdopen(fd, "wb") as tmp:
-            tmp.write(epub.file.read(src))
+            # tmp.write(epub.file.read(src))
+            tmp.write(bstr)
         # run(VWR + " " + path, shell=True)
         subprocess.call(
             VWR + " " + path,
@@ -911,8 +971,7 @@ def reader(ebook, index, width, y, pctg, sect):
     toc_sect = ebook.toc_entries[2]
     toc_secid = {}
     chpath = contents[index]
-    content = ebook.file.open(chpath).read()
-    content = content.decode("utf-8")
+    content = ebook.get_raw_text(chpath)
 
     parser = HTMLtoLines(set(toc_sect))
     # parser = HTMLtoLines()
@@ -1039,6 +1098,13 @@ def reader(ebook, index, width, y, pctg, sect):
                     except (KeyError, IndexError):
                         y = pgend(totlines, rows)
                 elif k in K["ToC"]:
+                    if ebook.toc_entries == [[], [], []]:
+                        k = text_win(lambda: (
+                            "Table of Contents",
+                            "N/A: ToC is unavailable for this book.",
+                            K["ToC"]
+                            ))()
+                        continue
                     ntoc = find_curr_toc_id(toc_idx, toc_sect, toc_secid, index, y)
                     fllwd = toc(toc_name, ntoc)
                     if fllwd is not None:
@@ -1128,8 +1194,10 @@ def reader(ebook, index, width, y, pctg, sect):
                             impath = imgs[int(gambar[i])]
 
                     if impath != "":
-                        imgsrc = dots_path(chpath, impath)
-                        k = open_media(pad, ebook, imgsrc)
+                        if ebook.__class__.__name__ == "Epub":
+                            impath = dots_path(chpath, impath)
+                        imgnm, imgbstr = ebook.get_img_bytestr(impath)
+                        k = open_media(pad, imgnm, imgbstr)
                         continue
                 elif k in K["SwitchColor"] and COLORSUPPORT and countstring in {"", "0", "1", "2"}:
                     if countstring == "":
@@ -1227,14 +1295,14 @@ def preread(stdscr, file):
     SCREEN.addstr(rows-1, 0, "Loading...")
     SCREEN.refresh()
 
-    epub = Epub(file)
+    ebook = det_ebook_cls(file)
 
-    if epub.path in STATE["States"]:
-        idx = STATE["States"][epub.path]["index"]
-        width = STATE["States"][epub.path]["width"]
-        y = STATE["States"][epub.path]["pos"]
+    if ebook.path in STATE["States"]:
+        idx = STATE["States"][ebook.path]["index"]
+        width = STATE["States"][ebook.path]["width"]
+        y = STATE["States"][ebook.path]["pos"]
     else:
-        STATE["States"][epub.path] = {}
+        STATE["States"][ebook.path] = {}
         idx = 0
         y = 0
         width = 80
@@ -1242,18 +1310,17 @@ def preread(stdscr, file):
 
     if cols <= width:
         width = cols - 2
-        pctg = STATE["States"][epub.path].get("pctg", None)
+        pctg = STATE["States"][ebook.path].get("pctg", None)
 
-    epub.initialize()
+    ebook.initialize()
     find_media_viewer()
     find_dict_client()
     parse_keys()
 
     SHOWPROGRESS = CFG["EnableProgressIndicator"]
     if SHOWPROGRESS:
-        for i in epub.contents:
-            content = epub.file.open(i).read()
-            content = content.decode("utf-8")
+        for i in ebook.contents:
+            content = ebook.get_raw_text(i)
             parser = HTMLtoLines()
             # try:
             parser.feed(content)
@@ -1266,7 +1333,7 @@ def preread(stdscr, file):
     sec = ""
     while True:
         incr, width, y, pctg, sec = reader(
-            epub, idx, width, y, pctg, sec
+            ebook, idx, width, y, pctg, sec
         )
         idx += incr
 
@@ -1350,11 +1417,10 @@ def main():
             sys.exit(xitmsg)
 
     if dump:
-        epub = Epub(file)
-        epub.initialize()
-        for i in epub.contents:
-            content = epub.file.open(i).read()
-            content = content.decode("utf-8")
+        ebook = det_ebook_cls(file)
+        ebook.initialize()
+        for i in ebook.contents:
+            content = ebook.get_raw_text(i)
             parser = HTMLtoLines()
             # try:
             parser.feed(content)
author	wustho <benawiadha@gmail.com>	2020-04-17 06:21:25 +0700
committer	wustho <benawiadha@gmail.com>	2020-04-17 06:21:25 +0700
commit	883e26bcef74a11b0f9a54ef8817bf936d0155eb (patch)
tree	befbb62031846a2e29a034b02f308cf22533dacf
parent	769001c70705b776e4ef4c6c3d0cfbfee5697036 (diff)
download	epy-883e26bcef74a11b0f9a54ef8817bf936d0155eb.tar.gz