diff options
author | benadha <benawiadha@gmail.com> | 2022-01-07 05:47:08 +0700 |
---|---|---|
committer | benadha <benawiadha@gmail.com> | 2022-01-07 05:47:08 +0700 |
commit | 25af4c4e3adcf8318d39ffd43ee643cd7e02184b (patch) | |
tree | b8888b166db792e33f854aa35c6fd57b69da14e9 | |
parent | edcd6d672ec6bbb9a82e41c6e6b78b43963a8bf2 (diff) | |
download | epy-25af4c4e3adcf8318d39ffd43ee643cd7e02184b.tar.gz |
Add text mark/span grouper method
-rwxr-xr-x | epy.py | 13 | ||||
-rw-r--r-- | tests.py | 24 |
2 files changed, 37 insertions, 0 deletions
@@ -914,6 +914,19 @@ class HTMLtoLines(HTMLParser): return spans + @staticmethod + def _group_span_by_row( + blocks: Sequence[Union[TextMark, TextSpan]] + ) -> Mapping[int, List[Union[TextMark, TextSpan]]]: + groups: Dict[int, List[Union[TextMark, TextSpan]]] = {} + for block in blocks: + row = block.start.row + if row in groups: + groups[row].append(block) + else: + groups[row] = [block] + return groups + def __init__(self, sects={""}): HTMLParser.__init__(self) self.text = [""] @@ -102,3 +102,27 @@ def test_span_adjustment(): # assert HTMLtoLines._adjust_wrapped_spans( # text, TextSpan(start=CharPos(row=1, col=7), n_letters=20) # ) == [TextSpan(start=CharPos(row=0, col=14), n_letters=3), TextSpan(start=CharPos(row=1, col=0), n_letters=4)] + + +def test_group_blocks(): + block_list = [ + TextSpan(start=CharPos(row=0, col=0), n_letters=4), + TextSpan(start=CharPos(row=1, col=0), n_letters=4), + TextSpan(start=CharPos(row=3, col=0), n_letters=4), + TextSpan(start=CharPos(row=3, col=0), n_letters=4), + TextSpan(start=CharPos(row=15, col=0), n_letters=4), + TextSpan(start=CharPos(row=15, col=0), n_letters=4), + ] + + assert HTMLtoLines._group_span_by_row(block_list) == { + 0: [TextSpan(start=CharPos(row=0, col=0), n_letters=4)], + 1: [TextSpan(start=CharPos(row=1, col=0), n_letters=4)], + 3: [ + TextSpan(start=CharPos(row=3, col=0), n_letters=4), + TextSpan(start=CharPos(row=3, col=0), n_letters=4), + ], + 15: [ + TextSpan(start=CharPos(row=15, col=0), n_letters=4), + TextSpan(start=CharPos(row=15, col=0), n_letters=4), + ], + } |