diff options
author | Matěj Cepl <mcepl@cepl.eu> | 2020-09-06 17:14:27 +0200 |
---|---|---|
committer | Matěj Cepl <mcepl@cepl.eu> | 2020-09-06 17:17:28 +0200 |
commit | f9544bfe5bc10065526e2a2a81bb30a7a334b472 (patch) | |
tree | d125412f01f6ac7ded3b2a149e65c30f1c11b6cc | |
parent | 4aa027030c3d404f5094d74f55182236c6b0f3b9 (diff) | |
download | epubgrep-f9544bfe5bc10065526e2a2a81bb30a7a334b472.tar.gz |
Add --weighted-count option.0.5.0
-rwxr-xr-x | epubgrep.py | 19 | ||||
-rw-r--r-- | setup.py | 2 |
2 files changed, 14 insertions, 7 deletions
diff --git a/epubgrep.py b/epubgrep.py index 0b960a2..4122906 100755 --- a/epubgrep.py +++ b/epubgrep.py @@ -27,7 +27,7 @@ def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple return ('Unknown', 0) -def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, color: bool=False): +def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, w_counting: bool=False, color: bool=False): assert os.path.isfile(filename), "{} is not EPub file.".format(filename) sought_RE = re.compile('(' + pattern + ')', flags) count = 0 @@ -52,7 +52,7 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col if not printed_booktitle: out += '{}\n'.format(filename) printed_booktitle = True - if counting: + if counting or w_counting: count += 1 else: chap_info = get_chapter_title(metadata.toc, @@ -67,7 +67,7 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col if res: if not out_title: out_title = '{}'.format(filename) - if counting: + if counting or w_counting: count += 1 else: if not printed_booktitle: @@ -82,7 +82,7 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col printed_title = True # https://stackoverflow.com/a/33206814 # print("\033[31;1;4mHello\033[0m") - if not counting: + if not (counting or w_counting): if color: found_line = decoded_line.replace( res.group(1), @@ -92,7 +92,11 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col out += decoded_line + '\n' if count > 0: - out += '{:02d}:{}'.format(count, out_title) + if counting: + out += '{:02d}:{}'.format(count, out_title) + if w_counting: + size = metadata['file_size_in_bytes'] + out += '{:05d}:{}'.format(int((count/size)*1e5), out_title) return out @@ -104,6 +108,9 @@ def main(): parser.add_argument('-c', '--count', action='store_true', help="just counts of found patterns") + parser.add_argument('-C', '--weighted-count', + action='store_true', + help="counts of found patterns as a proportion of whole text") parser.add_argument('-i', '--ignore-case', action='store_true', help="make search case insensitive") @@ -127,7 +134,7 @@ def main(): fut_to_fname = {executor.submit(grep_book, os.path.realpath(filename), args.pattern, search_flags, - args.count, args.color): + args.count, args.weighted_count, args.color): filename for filename in args.files} for future in concurrent.futures.as_completed(fut_to_fname): try: @@ -4,7 +4,7 @@ from setuptools import setup, find_packages setup( name="epubgrep", - version="0.4.1", + version="0.5.0", description='Grep through EPub files', author=u'Matěj Cepl', author_email='mcepl@cepl.eu', |