1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
import base64
import os
import xml.etree.ElementTree as ET
from typing import List, Tuple, Union
from epy_reader.ebooks import Ebook
from epy_reader.models import BookMetadata, TocEntry
class FictionBook(Ebook):
NAMESPACE = {"FB2": "http://www.gribuser.ru/xml/fictionbook/2.0"}
def __init__(self, filefb: str):
self.path = os.path.abspath(filefb)
self.file = filefb
# populate these attribute
# by calling self.initialize()
self.root: ET.Element
def get_meta(self) -> BookMetadata:
title_elem = self.root.find(".//FB2:book-title", FictionBook.NAMESPACE)
first_name_elem = self.root.find(".//FB2:first-name", FictionBook.NAMESPACE)
last_name_elem = self.root.find(".//FB2:last-name", FictionBook.NAMESPACE)
date_elem = self.root.find(".//FB2:date", FictionBook.NAMESPACE)
identifier_elem = self.root.find(".//FB2:id", FictionBook.NAMESPACE)
author = first_name_elem.text if first_name_elem is not None else None
if last_name_elem is not None:
if author is not None and author != "":
author += f" {last_name_elem.text}"
else:
author = last_name_elem.text
return BookMetadata(
title=title_elem.text if title_elem is not None else None,
creator=author,
date=date_elem.text if date_elem is not None else None,
identifier=identifier_elem.text if identifier_elem is not None else None,
)
def initialize(self) -> None:
cont = ET.parse(self.file)
self.root = cont.getroot()
self.contents = tuple(self.root.findall("FB2:body/*", FictionBook.NAMESPACE))
# TODO
toc_entries: List[TocEntry] = []
for n, i in enumerate(self.contents):
title = i.find("FB2:title", FictionBook.NAMESPACE)
if title is not None:
toc_entries.append(
TocEntry(label="".join(title.itertext()), content_index=n, section=None)
)
self.toc_entries = tuple(toc_entries)
def get_raw_text(self, node: Union[str, ET.Element]) -> str:
assert isinstance(node, ET.Element)
ET.register_namespace("", "http://www.gribuser.ru/xml/fictionbook/2.0")
# sys.exit(ET.tostring(node, encoding="utf8", method="html").decode("utf-8").replace("ns1:",""))
return ET.tostring(node, encoding="utf8", method="html").decode("utf-8").replace("ns1:", "")
def get_img_bytestr(self, imgid: str) -> Tuple[str, bytes]:
# TODO: test if image works
imgid = imgid.replace("#", "")
img_elem = self.root.find("*[@id='{}']".format(imgid))
assert img_elem is not None
imgtype = img_elem.get("content-type")
img_elem_text = img_elem.text
assert imgtype is not None
assert img_elem_text is not None
return imgid + "." + imgtype.split("/")[1], base64.b64decode(img_elem_text)
def cleanup(self) -> None:
return
|