aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbenadha <benawiadha@gmail.com>2022-01-15 14:07:52 +0700
committerbenadha <benawiadha@gmail.com>2022-01-15 14:07:52 +0700
commit75b5e6a04a116902703cc99d12889b10bcd23023 (patch)
tree7c64fe14788ff55210bd255eed7fe7ac1aa5a535
parentbde59b1d0eeac75301459331e76df337b06f6749 (diff)
downloadepy-75b5e6a04a116902703cc99d12889b10bcd23023.tar.gz
URL early support
-rw-r--r--README.md1
-rwxr-xr-xepy.py83
-rw-r--r--tests/test_text_parser.py9
3 files changed, 78 insertions, 15 deletions
diff --git a/README.md b/README.md
index 5865152..6a3bfa4 100644
--- a/README.md
+++ b/README.md
@@ -13,6 +13,7 @@ This is just a fork of my own [epr](https://github.com/wustho/epr) with these ex
- FictionBook (.fb2)
- Mobi (.mobi)
- AZW3 (.azw, .azw3)
+ - URL
- Reading progress percentage
- Bookmarks
- External dictionary integration (`sdcv` or `dict`)
diff --git a/epy.py b/epy.py
index 695cc81..5060850 100755
--- a/epy.py
+++ b/epy.py
@@ -59,7 +59,9 @@ from enum import Enum
from functools import wraps
from html import unescape
from html.parser import HTMLParser
-from urllib.parse import unquote, urljoin
+from urllib.parse import unquote, urljoin, urlparse
+from urllib.request import Request, urlopen
+from urllib.error import HTTPError, URLError
try:
from epy_extras import unpackBook # type: ignore
@@ -158,13 +160,13 @@ class LibraryItem:
reading_progress_str = reading_progress_str.rjust(4)
book_name: str
- file_basename = os.path.basename(self.filepath)
+ filename = self.filepath.replace(os.path.expanduser("~"), "~", 1)
if self.title is not None and self.author is not None:
- book_name = f"{self.title} - {self.author} ({file_basename})"
- elif self.title is None:
- book_name = f"{file_basename} - {self.author}"
+ book_name = f"{self.title} - {self.author} ({filename})"
+ elif self.title is None and self.author:
+ book_name = f"{filename} - {self.author}"
else:
- book_name = file_basename
+ book_name = filename
last_read_str = self.last_read.strftime("%I:%M%p %b %d")
@@ -245,11 +247,12 @@ class TextMark:
Missing </i> tag
"""
if self.end is not None:
- assert self.start.row <= self.end.row
- if self.start.row <= self.end.row:
- assert self.start.col <= self.end.col
+ if self.start.row == self.end.row:
+ return self.start.col <= self.end.col
+ else:
+ return self.start.row < self.end.row
- return self.end is not None
+ return False
@dataclass(frozen=True)
@@ -899,6 +902,45 @@ class FictionBook(Ebook):
return
+class URL(Ebook):
+ _header = {
+ "User-Agent": f"epy/v{__version__}",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ "Accept-Language": "en-US,en;q=0.8",
+ }
+
+ def __init__(self, url: str):
+ self.path = url
+ self.file = url
+ self.contents = ("_",)
+ self.toc_entries = tuple()
+
+ def get_meta(self) -> BookMetadata:
+ return BookMetadata()
+
+ def initialize(self) -> None:
+ try:
+ with urlopen(Request(self.path, headers=URL._header)) as response:
+ self.html = response.read().decode()
+ except HTTPError as e:
+ raise e
+ except URLError as e:
+ raise e
+
+ def get_raw_text(self, _) -> str:
+ return self.html
+
+ def get_img_bytestr(self, src: str) -> Tuple[str, bytes]:
+ image_url = urljoin(self.path, src)
+ # TODO: catch error on request
+ with urlopen(Request(image_url, headers=URL._header)) as response:
+ byte_str = response.read()
+ return src.split("/")[-1], byte_str
+
+ def cleanup(self) -> None:
+ return
+
+
# }}}
@@ -1744,7 +1786,7 @@ def cleanup_library(state: State) -> None:
"""Cleanup non-existent file from library"""
library_items = state.get_from_history()
for item in library_items:
- if not os.path.isfile(item.filepath):
+ if not os.path.isfile(item.filepath) and not is_url(item.filepath):
state.delete_from_library(item.filepath)
@@ -1800,6 +1842,14 @@ def print_reading_history(state: State) -> None:
)
+def is_url(string: str) -> bool:
+ try:
+ tmp = urlparse(string)
+ return all([tmp.scheme, tmp.netloc])
+ except ValueError:
+ return False
+
+
def construct_speaker(
preferred: Optional[str] = None, args: List[str] = []
) -> Optional[SpeakerBaseModel]:
@@ -1906,7 +1956,9 @@ def construct_relative_reading_state(
def get_ebook_obj(filepath: str) -> Ebook:
file_ext = os.path.splitext(filepath)[1].lower()
- if file_ext == ".epub":
+ if is_url(filepath):
+ return URL(filepath)
+ elif file_ext in {".epub", ".epub3"}:
return Epub(filepath)
elif file_ext == ".fb2":
return FictionBook(filepath)
@@ -3814,9 +3866,10 @@ def parse_cli_args() -> Tuple[str, bool]:
print(f"ERROR: #{nth} file not found.")
print_reading_history(state)
sys.exit(1)
- else:
- if os.path.isfile(args.ebook[0]):
- return args.ebook[0], args.dump
+ elif is_url(args.ebook[0]):
+ return args.ebook[0], args.dump
+ elif os.path.isfile(args.ebook[0]):
+ return args.ebook[0], args.dump
pattern = " ".join(args.ebook)
match = get_matching_library_item(state, pattern)
diff --git a/tests/test_text_parser.py b/tests/test_text_parser.py
index 42a98d3..9573915 100644
--- a/tests/test_text_parser.py
+++ b/tests/test_text_parser.py
@@ -1,6 +1,15 @@
from epy import CharPos, TextMark, TextSpan, HTMLtoLines
+def test_text_mark_validation_check():
+ assert TextMark(start=CharPos(row=3, col=1), end=CharPos(row=3, col=5)).is_valid()
+ assert TextMark(start=CharPos(row=3, col=5), end=CharPos(row=3, col=5)).is_valid()
+ assert not TextMark(start=CharPos(row=3, col=5), end=CharPos(row=3, col=2)).is_valid()
+ assert TextMark(start=CharPos(row=3, col=5), end=CharPos(row=5, col=2)).is_valid()
+ assert not TextMark(start=CharPos(row=8, col=5), end=CharPos(row=5, col=2)).is_valid()
+ assert not TextMark(start=CharPos(row=0, col=3)).is_valid()
+
+
def test_mark_to_span():
text = [
"Lorem ipsum dolor sit amet,",