aboutsummaryrefslogtreecommitdiffstats
path: root/epubgrep.py
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@cepl.eu>2018-11-30 11:35:50 +0100
committerMatěj Cepl <mcepl@cepl.eu>2018-11-30 11:35:50 +0100
commit0c6d0acdf3b93eee221a3093ce22c0c76b0b2127 (patch)
tree13359b6f5f6ef1c42de85f604c25ed9a65305139 /epubgrep.py
downloadepubgrep-0c6d0acdf3b93eee221a3093ce22c0c76b0b2127.tar.gz
Initial version0.1.0
Diffstat (limited to 'epubgrep.py')
-rwxr-xr-xepubgrep.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/epubgrep.py b/epubgrep.py
new file mode 100755
index 0000000..e3051bf
--- /dev/null
+++ b/epubgrep.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+import argparse
+import os.path
+import re
+import zipfile
+
+from typing import Any, Dict, List, Optional, Tuple
+
+import epub_meta
+
+parser = argparse.ArgumentParser(description='Grep through EPub book')
+parser.add_argument('pattern')
+parser.add_argument('filename')
+parser.add_argument('-i', '--ignore-case',
+ action='store_true',
+ help="make search case insensitive")
+args = parser.parse_args()
+
+search_flags = 0
+if args.ignore_case:
+ search_flags |= re.I
+
+
+def get_chapter_title(mdata:List[Dict[str, Any]], fname:str) -> Optional[Tuple[str, int]]:
+ found_list = [(x['title'], x['index']) for x in mdata if x['src'] == fname]
+ if len(found_list) > 0:
+ chap_title = found_list[0][0].strip(' \t.0123456789')
+ return chap_title, found_list[0][1]
+ else:
+ return ('Unknown', 0)
+
+
+def grep_book(filename:str, pattern:str, flags):
+ sought_RE = re.compile(pattern, flags)
+
+ metadata = epub_meta.get_epub_metadata(filename)
+ book = zipfile.ZipFile(filename)
+
+ for zif in book.infolist():
+ with book.open(zif) as inf:
+ printed_title = False
+ for line in inf:
+ decoded_line = line.decode()
+ if sought_RE.search(decoded_line):
+ if not printed_title:
+ chap_info = get_chapter_title(metadata.toc,
+ zif.filename)
+ print("{}. {}:\n".format(chap_info[1], chap_info[0]))
+ printed_title = True
+ print(decoded_line)
+
+assert os.path.isfile(args.filename), "{} is not EPub file.".format(args.filename)
+book_fname = os.path.realpath(args.filename)
+grep_book(book_fname, args.pattern, search_flags)