Add --weighted-count option.0.5.0

author: Matěj Cepl <mcepl@cepl.eu> 2020-09-06 17:14:27 +0200
committer: Matěj Cepl <mcepl@cepl.eu> 2020-09-06 17:17:28 +0200
commit: f9544bfe5bc10065526e2a2a81bb30a7a334b472 (patch)
tree: d125412f01f6ac7ded3b2a149e65c30f1c11b6cc
parent: 4aa027030c3d404f5094d74f55182236c6b0f3b9 (diff)
download: epubgrep-f9544bfe5bc10065526e2a2a81bb30a7a334b472.tar.gz
2 files changed, 14 insertions, 7 deletions
diff --git a/epubgrep.py b/epubgrep.py
index 0b960a2..4122906 100755
--- a/epubgrep.py
+++ b/epubgrep.py
@@ -27,7 +27,7 @@ def get_chapter_title(mdata: List[Dict[str, Any]], fname: str) -> Optional[Tuple
             return ('Unknown', 0)
 
 
-def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, color: bool=False):
+def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, w_counting: bool=False, color: bool=False):
     assert os.path.isfile(filename), "{} is not EPub file.".format(filename)
     sought_RE = re.compile('(' + pattern + ')', flags)
     count = 0
@@ -52,7 +52,7 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
                     if not printed_booktitle:
                         out += '{}\n'.format(filename)
                         printed_booktitle = True
-                    if counting:
+                    if counting or w_counting:
                         count += 1
                     else:
                         chap_info = get_chapter_title(metadata.toc,
@@ -67,7 +67,7 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
                     if res:
                         if not out_title:
                             out_title = '{}'.format(filename)
-                        if counting:
+                        if counting or w_counting:
                             count += 1
                         else:
                             if not printed_booktitle:
@@ -82,7 +82,7 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
                                     printed_title = True
                         # https://stackoverflow.com/a/33206814
                         # print("\033[31;1;4mHello\033[0m")
-                        if not counting:
+                        if not (counting or w_counting):
                             if color:
                                 found_line = decoded_line.replace(
                                     res.group(1),
@@ -92,7 +92,11 @@ def grep_book(filename: str, pattern: str, flags: int, counting: bool=False, col
                                 out += decoded_line + '\n'
 
     if count > 0:
-        out += '{:02d}:{}'.format(count, out_title)
+        if counting:
+            out += '{:02d}:{}'.format(count, out_title)
+        if w_counting:
+            size = metadata['file_size_in_bytes']
+            out += '{:05d}:{}'.format(int((count/size)*1e5), out_title)
 
     return out
 
@@ -104,6 +108,9 @@ def main():
     parser.add_argument('-c', '--count',
                         action='store_true',
                         help="just counts of found patterns")
+    parser.add_argument('-C', '--weighted-count',
+                        action='store_true',
+                        help="counts of found patterns as a proportion of whole text")
     parser.add_argument('-i', '--ignore-case',
                         action='store_true',
                         help="make search case insensitive")
@@ -127,7 +134,7 @@ def main():
         fut_to_fname = {executor.submit(grep_book,
                                         os.path.realpath(filename),
                                         args.pattern, search_flags,
-                                        args.count, args.color):
+                                        args.count, args.weighted_count, args.color):
                         filename for filename in args.files}
     for future in concurrent.futures.as_completed(fut_to_fname):
         try:
diff --git a/setup.py b/setup.py
index 9ba5c6f..804ccd4 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
 
 setup(
     name="epubgrep",
-    version="0.4.1",
+    version="0.5.0",
     description='Grep through EPub files',
     author=u'Matěj Cepl',
     author_email='mcepl@cepl.eu',
author	Matěj Cepl <mcepl@cepl.eu>	2020-09-06 17:14:27 +0200
committer	Matěj Cepl <mcepl@cepl.eu>	2020-09-06 17:17:28 +0200
commit	f9544bfe5bc10065526e2a2a81bb30a7a334b472 (patch)
tree	d125412f01f6ac7ded3b2a149e65c30f1c11b6cc
parent	4aa027030c3d404f5094d74f55182236c6b0f3b9 (diff)
download	epubgrep-f9544bfe5bc10065526e2a2a81bb30a7a334b472.tar.gz