diff options
author | benadha <benawiadha@gmail.com> | 2022-01-10 20:38:20 +0700 |
---|---|---|
committer | benadha <benawiadha@gmail.com> | 2022-01-10 20:38:20 +0700 |
commit | e93199843aa9a03e3fdd2ce6ee2d60ac36e43518 (patch) | |
tree | 8211c2a4627ce7a60c4a47006175b9a7df386f84 /tests | |
parent | bb20ff2710af248a5e8b2e91f66018ce028a98ee (diff) | |
download | epy-e93199843aa9a03e3fdd2ce6ee2d60ac36e43518.tar.gz |
Restructured tests
Diffstat (limited to 'tests')
-rw-r--r-- | tests/test_text_parser.py | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/tests/test_text_parser.py b/tests/test_text_parser.py new file mode 100644 index 0000000..3c61657 --- /dev/null +++ b/tests/test_text_parser.py @@ -0,0 +1,132 @@ +from collections import namedtuple + +from epy import CharPos, TextMark, TextSpan, resolve_path, HTMLtoLines + + +def test_resolve_path(): + UnresolvedPath = namedtuple("UnresolvedPath", ["current_dir", "relative_path"]) + + inputs = [ + UnresolvedPath("/aaa/bbb/book.html", "../ccc.png"), + UnresolvedPath("/aaa/bbb/book.html", "../../ccc.png"), + UnresolvedPath("aaa/bbb/book.html", "../../ccc.png"), + ] + + expecteds = [ + "/aaa/ccc.png", + "/ccc.png", + "ccc.png", + ] + + for input, expected in zip(inputs, expecteds): + assert resolve_path(input.current_dir, input.relative_path) == expected + + +def test_mark_to_span(): + text = [ + "Lorem ipsum dolor sit amet,", + "consectetur adipiscing elit.", + "Curabitur rutrum massa", # 2 + "pretium, pulvinar ligula a,", # 3 + "aliquam est. Proin ut lectus", # 4 + "ac massa fermentum commodo.", # 5 + "Duis ac urna a felis mollis", + "laoreet. Nullam finibus nibh", + "convallis, commodo nisl sit", + "amet, vestibulum mauris. Nulla", + "lacinia ultrices lacinia. Duis", + "auctor nunc non felis", + "ultricies, ut egestas tellus", + "rhoncus. Aenean ultrices", + "efficitur lacinia. Aliquam", + "eros lacus, luctus eu lacinia", + "in, eleifend nec nunc. Nam", + "condimentum malesuada", + "facilisis.", + ] + + assert HTMLtoLines._mark_to_spans( + text, [TextMark(start=CharPos(row=2, col=3), end=CharPos(row=2, col=19))] + ) == [TextSpan(start=CharPos(row=2, col=3), n_letters=16)] + + assert HTMLtoLines._mark_to_spans( + text, + [ + TextMark(start=CharPos(row=2, col=3), end=CharPos(row=3, col=5)), + ], + ) == [ + TextSpan(start=CharPos(row=2, col=3), n_letters=19), + TextSpan(start=CharPos(row=3, col=0), n_letters=5), + ] + + assert HTMLtoLines._mark_to_spans( + text, + [ + TextMark(start=CharPos(row=2, col=3), end=CharPos(row=5, col=3)), + ], + ) == [ + TextSpan(start=CharPos(row=2, col=3), n_letters=19), + TextSpan(start=CharPos(row=3, col=0), n_letters=27), + TextSpan(start=CharPos(row=4, col=0), n_letters=28), + TextSpan(start=CharPos(row=5, col=0), n_letters=3), + ] + + +def test_span_adjustment(): + # 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur rutrum massa.' + + text = [ + "Lorem ipsum dolor", + "sit amet,", + "consectetur", + "adipiscing elit.", + "Curabitur rutrum", + "massa.", + ] + + assert HTMLtoLines._adjust_wrapped_spans( + text, TextSpan(start=CharPos(row=0, col=2), n_letters=0) + ) == [TextSpan(start=CharPos(row=0, col=2), n_letters=0)] + + assert HTMLtoLines._adjust_wrapped_spans( + text, TextSpan(start=CharPos(row=0, col=2), n_letters=5) + ) == [TextSpan(start=CharPos(row=0, col=2), n_letters=5)] + + assert HTMLtoLines._adjust_wrapped_spans( + text, TextSpan(start=CharPos(row=0, col=15), n_letters=2) + ) == [TextSpan(start=CharPos(row=0, col=15), n_letters=2)] + + assert HTMLtoLines._adjust_wrapped_spans( + text, TextSpan(start=CharPos(row=0, col=14), n_letters=7) + ) == [ + TextSpan(start=CharPos(row=0, col=14), n_letters=3), + TextSpan(start=CharPos(row=1, col=0), n_letters=4), + ] + + # assert HTMLtoLines._adjust_wrapped_spans( + # text, TextSpan(start=CharPos(row=1, col=7), n_letters=20) + # ) == [TextSpan(start=CharPos(row=0, col=14), n_letters=3), TextSpan(start=CharPos(row=1, col=0), n_letters=4)] + + +def test_group_blocks(): + block_list = [ + TextSpan(start=CharPos(row=0, col=0), n_letters=4), + TextSpan(start=CharPos(row=1, col=0), n_letters=4), + TextSpan(start=CharPos(row=3, col=0), n_letters=4), + TextSpan(start=CharPos(row=3, col=0), n_letters=4), + TextSpan(start=CharPos(row=15, col=0), n_letters=4), + TextSpan(start=CharPos(row=15, col=0), n_letters=4), + ] + + assert HTMLtoLines._group_spans_by_row(block_list) == { + 0: [TextSpan(start=CharPos(row=0, col=0), n_letters=4)], + 1: [TextSpan(start=CharPos(row=1, col=0), n_letters=4)], + 3: [ + TextSpan(start=CharPos(row=3, col=0), n_letters=4), + TextSpan(start=CharPos(row=3, col=0), n_letters=4), + ], + 15: [ + TextSpan(start=CharPos(row=15, col=0), n_letters=4), + TextSpan(start=CharPos(row=15, col=0), n_letters=4), + ], + } |