aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatěj Cepl <mcepl@cepl.eu>2020-09-06 17:14:27 +0200
committerMatěj Cepl <mcepl@cepl.eu>2020-09-06 17:17:28 +0200
commitf9544bfe5bc10065526e2a2a81bb30a7a334b472 (patch)
treed125412f01f6ac7ded3b2a149e65c30f1c11b6cc
parent4aa027030c3d404f5094d74f55182236c6b0f3b9 (diff)
downloadepubgrep-f9544bfe5bc10065526e2a2a81bb30a7a334b472.tar.gz
Add --weighted-count option.0.5.0
-rwxr-xr-xepubgrep.py19
-rw-r--r--setup.py2
2 files changed, 14 insertions, 7 deletions
diff --git a/epubgrep.py b/epubgrep.py
index 0b960a2..4122906 100755
--- a/epubgrep.py
+++ b/epubgrep.py
@@ -27,7 +27,7 @@ def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple
return ('Unknown', 0)
-def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, color: bool=False):
+def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, w_counting: bool=False, color: bool=False):
assert os.path.isfile(filename), "{} is not EPub file.".format(filename)
sought_RE = re.compile('(' + pattern + ')', flags)
count = 0
@@ -52,7 +52,7 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
if not printed_booktitle:
out += '{}\n'.format(filename)
printed_booktitle = True
- if counting:
+ if counting or w_counting:
count += 1
else:
chap_info = get_chapter_title(metadata.toc,
@@ -67,7 +67,7 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
if res:
if not out_title:
out_title = '{}'.format(filename)
- if counting:
+ if counting or w_counting:
count += 1
else:
if not printed_booktitle:
@@ -82,7 +82,7 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
printed_title = True
# https://stackoverflow.com/a/33206814
# print("\033[31;1;4mHello\033[0m")
- if not counting:
+ if not (counting or w_counting):
if color:
found_line = decoded_line.replace(
res.group(1),
@@ -92,7 +92,11 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
out += decoded_line + '\n'
if count > 0:
- out += '{:02d}:{}'.format(count, out_title)
+ if counting:
+ out += '{:02d}:{}'.format(count, out_title)
+ if w_counting:
+ size = metadata['file_size_in_bytes']
+ out += '{:05d}:{}'.format(int((count/size)*1e5), out_title)
return out
@@ -104,6 +108,9 @@ def main():
parser.add_argument('-c', '--count',
action='store_true',
help="just counts of found patterns")
+ parser.add_argument('-C', '--weighted-count',
+ action='store_true',
+ help="counts of found patterns as a proportion of whole text")
parser.add_argument('-i', '--ignore-case',
action='store_true',
help="make search case insensitive")
@@ -127,7 +134,7 @@ def main():
fut_to_fname = {executor.submit(grep_book,
os.path.realpath(filename),
args.pattern, search_flags,
- args.count, args.color):
+ args.count, args.weighted_count, args.color):
filename for filename in args.files}
for future in concurrent.futures.as_completed(fut_to_fname):
try:
diff --git a/setup.py b/setup.py
index 9ba5c6f..804ccd4 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
setup(
name="epubgrep",
- version="0.4.1",
+ version="0.5.0",
description='Grep through EPub files',
author=u'Matěj Cepl',
author_email='mcepl@cepl.eu',