aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@cepl.eu>2019-05-05 17:35:31 +0200
committerMatěj Cepl <mcepl@cepl.eu>2019-05-05 17:38:16 +0200
commit61904c3386e00e8c213dad4261ffa1450f28bc2b (patch)
tree89d6152ad5effbfe95c85363b534726d1fac44b0
parentfa0946dd7b53d0fae3b9c118bf82945ce9aa60c1 (diff)
downloadepubgrep-61904c3386e00e8c213dad4261ffa1450f28bc2b.tar.gz
Print also the filename of the EPub where the line was found.
-rwxr-xr-xepubgrep.py20
1 files changed, 16 insertions, 4 deletions
diff --git a/epubgrep.py b/epubgrep.py
index 578c20b..f2da727 100755
--- a/epubgrep.py
+++ b/epubgrep.py
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
import argparse
+import logging
import os.path
import re
import zipfile
@@ -8,8 +9,14 @@ from typing import Any, Dict, List, Optional, Tuple
import epub_meta
-def get_chapter_title(mdata:List[Dict[str, Any]], fname:str) -> Optional[Tuple[str, int]]:
+logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
+ level=logging.INFO)
+log = logging.getLogger('epubgrep')
+
+
+def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple[str, int]]:
found_list = [(x['title'], x['index']) for x in mdata if x['src'] == fname]
+ log.debug('found_list = %s', found_list)
if len(found_list) > 0:
chap_title = found_list[0][0].strip(' \t.0123456789')
return chap_title, found_list[0][1]
@@ -17,26 +24,31 @@ def get_chapter_title(mdata:List[Dict[str, Any]], fname:str) -> Optional[Tuple[s
return ('Unknown', 0)
-def grep_book(filename:str, pattern:str, flags:int):
+def grep_book(filename: str, pattern: str, flags: int):
assert os.path.isfile(filename), "{} is not EPub file.".format(filename)
sought_RE = re.compile(pattern, flags)
metadata = epub_meta.get_epub_metadata(filename)
book = zipfile.ZipFile(filename)
+ printed_booktitle = False
for zif in book.infolist():
with book.open(zif) as inf:
printed_title = False
for line in inf:
- decoded_line = line.decode()
+ decoded_line = line.decode(errors='replace').strip()
if sought_RE.search(decoded_line):
+ if not printed_booktitle:
+ print('{}'.format(filename))
+ printed_booktitle = True
if not printed_title:
chap_info = get_chapter_title(metadata.toc,
- zif.filename)
+ zif.filename)
print("{}. {}:\n".format(chap_info[1], chap_info[0]))
printed_title = True
print(decoded_line)
+
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Grep through EPub book')
parser.add_argument('pattern')