import argparse
import os
import shutil
import sys
import textwrap
from difflib import SequenceMatcher as SM
from typing import List, Optional, Tuple
from epy_reader import __version__
from epy_reader.lib import coerce_to_int, is_url, truncate
from epy_reader.models import LibraryItem
from epy_reader.parser import parse_html
from epy_reader.state import State
from epy_reader.utils import get_ebook_obj
def cleanup_library(state: State) -> None:
"""Cleanup non-existent file from library"""
library_items = state.get_from_history()
for item in library_items:
if not os.path.isfile(item.filepath) and not is_url(item.filepath):
state.delete_from_library(item.filepath)
def get_nth_file_from_library(state: State, n) -> Optional[LibraryItem]:
library_items = state.get_from_history()
try:
return library_items[n - 1]
except IndexError:
return None
def get_matching_library_item(
state: State, pattern: str, threshold: float = 0.5
) -> Optional[LibraryItem]:
matches: List[Tuple[LibraryItem, float]] = [] # [(library_item, match_value), ...]
library_items = state.get_from_history()
if not library_items:
return None
for item in library_items:
tomatch = f"{item.title} - {item.author}" # item.filepath
match_value = sum(
[i.size for i in SM(None, tomatch.lower(), pattern.lower()).get_matching_blocks()]
) / float(len(pattern))
matches.append(
(
item,
match_value,
)
)
sorted_matches = sorted(matches, key=lambda x: -x[1])
first_match_item, first_match_value = sorted_matches[0]
if first_match_item and first_match_value >= threshold:
return first_match_item
else:
return None
def print_reading_history(state: State) -> None:
termc, _ = shutil.get_terminal_size()
library_items = state.get_from_history()
if not library_items:
print("No Reading History.")
return
print("Reading History:")
dig = len(str(len(library_items) + 1))
tcols = termc - dig - 2
for n, item in enumerate(library_items):
print(
"{} {}".format(
str(n + 1).rjust(dig),
truncate(str(item), "...", tcols, tcols - 3),
)
)
def parse_cli_args() -> argparse.Namespace:
prog = "epy"
positional_arg_help_str = "[PATH | # | PATTERN | URL]"
args_parser = argparse.ArgumentParser(
prog=prog,
usage=f"%(prog)s [-h] [-r] [-d] [-v] {positional_arg_help_str}",
formatter_class=argparse.RawDescriptionHelpFormatter,
description="Read ebook in terminal",
epilog=textwrap.dedent(
f"""\
examples:
{prog} /path/to/ebook read /path/to/ebook file
{prog} 3 read #3 file from reading history
{prog} count monte read file matching 'count monte'
from reading history
"""
),
)
args_parser.add_argument("-r", "--history", action="store_true", help="print reading history")
args_parser.add_argument("-d", "--dump", action="store_true", help="dump the content of ebook")
args_parser.add_argument(
"-v",
"--version",
action="version",
version=f"v{__version__}",
help="print version and exit",
)
args_parser.add_argument(
"ebook",
action="store",
nargs="*",
metavar=positional_arg_help_str,
help="ebook path, history number, pattern or URL",
)
return args_parser.parse_args()
def find_file() -> Tuple[str, bool]:
args = parse_cli_args()
state = State()
cleanup_library(state)
if args.history:
print_reading_history(state)
sys.exit()
if len(args.ebook) == 0:
last_read = state.get_last_read()
if last_read:
return last_read, args.dump
else:
sys.exit("ERROR: Found no last read ebook file.")
elif len(args.ebook) == 1:
nth = coerce_to_int(args.ebook[0])
if nth is not None:
file = get_nth_file_from_library(state, nth)
if file:
return file.filepath, args.dump
else:
print(f"ERROR: #{nth} file not found.")
print_reading_history(state)
sys.exit(1)
elif is_url(args.ebook[0]):
return args.ebook[0], args.dump
elif os.path.isfile(args.ebook[0]):
return args.ebook[0], args.dump
pattern = " ".join(args.ebook)
match = get_matching_library_item(state, pattern)
if match:
return match.filepath, args.dump
else:
sys.exit("ERROR: Found no matching ebook from history.")
def dump_ebook_content(filepath: str) -> None:
ebook = get_ebook_obj(filepath)
try:
try:
ebook.initialize()
except Exception as e:
sys.exit("ERROR: Badly-structured ebook.\n" + str(e))
for i in ebook.contents:
content = ebook.get_raw_text(i)
src_lines = parse_html(content)
assert isinstance(src_lines, tuple)
# sys.stdout.reconfigure(encoding="utf-8") # Python>=3.7
for j in src_lines:
sys.stdout.buffer.write((j + "\n\n").encode("utf-8"))
finally:
ebook.cleanup()