diff options
author | Matěj Cepl <mcepl@cepl.eu> | 2019-05-05 17:35:31 +0200 |
---|---|---|
committer | Matěj Cepl <mcepl@cepl.eu> | 2019-05-05 17:38:16 +0200 |
commit | 61904c3386e00e8c213dad4261ffa1450f28bc2b (patch) | |
tree | 89d6152ad5effbfe95c85363b534726d1fac44b0 | |
parent | fa0946dd7b53d0fae3b9c118bf82945ce9aa60c1 (diff) | |
download | epubgrep-61904c3386e00e8c213dad4261ffa1450f28bc2b.tar.gz |
Print also the filename of the EPub where the line was found.
-rwxr-xr-x | epubgrep.py | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/epubgrep.py b/epubgrep.py index 578c20b..f2da727 100755 --- a/epubgrep.py +++ b/epubgrep.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import argparse +import logging import os.path import re import zipfile @@ -8,8 +9,14 @@ from typing import Any, Dict, List, Optional, Tuple import epub_meta -def get_chapter_title(mdata:List[Dict[str, Any]], fname:str) -> Optional[Tuple[str, int]]: +logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s', + level=logging.INFO) +log = logging.getLogger('epubgrep') + + +def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple[str, int]]: found_list = [(x['title'], x['index']) for x in mdata if x['src'] == fname] + log.debug('found_list = %s', found_list) if len(found_list) > 0: chap_title = found_list[0][0].strip(' \t.0123456789') return chap_title, found_list[0][1] @@ -17,26 +24,31 @@ def get_chapter_title(mdata:List[Dict[str, Any]], fname:str) -> Optional[Tuple[s return ('Unknown', 0) -def grep_book(filename:str, pattern:str, flags:int): +def grep_book(filename: str, pattern: str, flags: int): assert os.path.isfile(filename), "{} is not EPub file.".format(filename) sought_RE = re.compile(pattern, flags) metadata = epub_meta.get_epub_metadata(filename) book = zipfile.ZipFile(filename) + printed_booktitle = False for zif in book.infolist(): with book.open(zif) as inf: printed_title = False for line in inf: - decoded_line = line.decode() + decoded_line = line.decode(errors='replace').strip() if sought_RE.search(decoded_line): + if not printed_booktitle: + print('{}'.format(filename)) + printed_booktitle = True if not printed_title: chap_info = get_chapter_title(metadata.toc, - zif.filename) + zif.filename) print("{}. {}:\n".format(chap_info[1], chap_info[0])) printed_title = True print(decoded_line) + if __name__ == "__main__": parser = argparse.ArgumentParser(description='Grep through EPub book') parser.add_argument('pattern') |