diff options
author | Florian Fischer <florian.fl.fischer@fau.de> | 2020-08-26 14:54:49 +0200 |
---|---|---|
committer | Florian Fischer <florian.fl.fischer@fau.de> | 2020-08-26 15:02:38 +0200 |
commit | bf63501ac14207e687b85eba2aa585fc103111f7 (patch) | |
tree | c7cf55580eee1718bb4f23d971818ffc18494b0f | |
parent | e0c59627c5adf95a2da57867f82c81970d123a93 (diff) | |
download | vis-spellcheck-bf63501ac14207e687b85eba2aa585fc103111f7.tar.gz |
implement a new algorithm which calls the external spellchecker at most once
We collect all typos for the current viewport (This is not sound because
the viewport we get from vis.win.viewport must not be from the window we currently lex).
Then we iterate over both the token stream and the typo stream,
adding the token end to the new token stream if it is smaller than the start of
the current typo or not a token we spellcheck and advance the token stream.
Otherwise we add the token part before our current typo if present and
the highlight token representing the typo and advance our typo stream.
After we handled all typos we add each leftover token to the new token stream.
Typos are cached and reused if the possible viewport and the data
we lex are unchanged. (This is sound because, either data is unchanged then we may
call the external spellchecker for nothing, or data is the same as last time
then the typos haven't changed)
-rw-r--r-- | spellcheck.lua | 113 |
1 files changed, 62 insertions, 51 deletions
diff --git a/spellcheck.lua b/spellcheck.lua index 73b4650..fe6c976 100644 --- a/spellcheck.lua +++ b/spellcheck.lua @@ -122,6 +122,10 @@ end) local wrapped_lex_funcs = {} local wrap_lex_func = function(old_lex_func) + local old_viewport = vis.win.viewport + local old_viewport_text = "" + local old_typos = {} + return function(lexer, data, index, redrawtime_max) local tokens, timedout = old_lex_func(lexer, data, index, redrawtime_max) @@ -135,68 +139,75 @@ local wrap_lex_func = function(old_lex_func) local win = vis.win local new_tokens = {} - -- get file position we lex + -- get possible file position we lex -- duplicated code with vis-std.lua + -- this is totally broken and unsound + -- to be sound we have to spellcheck all data that was passed to us + -- investigate if a stateless approach is much slower local viewport = win.viewport local horizon_max = win.horizon or 32768 local horizon = viewport.start < horizon_max and viewport.start or horizon_max local view_start = viewport.start local lex_start = viewport.start - horizon - local token_end = lex_start + (tokens[#tokens] or 1) - 1 - - for i = 1, #tokens - 1, 2 do - local token_start = lex_start + (tokens[i-1] or 1) - 1 - local token_end = tokens[i+1] - local token_range = {start = token_start, finish = token_end - 1} - - -- check if token is visable - if token_start >= view_start or token_end > view_start then - local token_name = tokens[i] - -- token is not listed for spellchecking just add it to the token stream - if not spellcheck.check_tokens[token_name] then - table.insert(new_tokens, tokens[i]) + + local viewport_text = data:sub(view_start) + + local typos = "" + if old_viewport.start ~= view_start + or old_viewport.finish ~= viewport.finish + or old_viewport_text ~= viewport_text + then + typos = get_typos(viewport_text) + old_typos = typos + old_viewport = viewport + old_viewport_text = viewport_text + else + typos = old_typos + end + + local i = 1 + for typo, start, finish in typo_iter(viewport_text, typos, ignored) do + local typo_start = view_start + start + local typo_end = view_start + finish + repeat + -- no tokens left + if i > #tokens -1 then + break + end + + local token_type = tokens[i] + local token_start = lex_start + (tokens[i-1] or 1) - 1 + local token_end = tokens[i+1] + + -- the current token ends before our typo -> append to new stream + -- or is not spellchecked + if token_end < typo_start or not spellcheck.check_tokens[token_type] then + table.insert(new_tokens, token_type) table.insert(new_tokens, token_end) - -- spellcheck the token + + -- done with this token -> advance token stream + i = i + 2 + -- typo and checked token overlap else - local ret, stdout, stderr = vis:pipe(win.file, token_range, cmd) - if ret ~= 0 then - vis:info("calling cmd: `" .. cmd .. "` failed ("..ret..")") - -- we got misspellings - elseif stdout then - local typo_iter = stdout:gmatch("(.-)\n") - local token_content = win.file:content(token_range) - - -- current position in token_content - local index = 1 - for typo in typo_iter do - if not ignored[typo] then - local start, finish = token_content:find(typo, index, true) - -- split token - local pre_typo_end = start - 1 - -- correct part before typo - if pre_typo_end > index then - table.insert(new_tokens, token_name) - table.insert(new_tokens, token_start + pre_typo_end) - end - -- typo - -- TODO make style configurable - table.insert(new_tokens, vis.lexers.ERROR) - table.insert(new_tokens, token_start + finish + 1) - index = finish - end - end - -- rest which is not already inserted into the token stream - table.insert(new_tokens, token_name) - table.insert(new_tokens, token_end) - -- found no misspellings just add it to the token stream - else - table.insert(new_tokens, token_name) - table.insert(new_tokens, token_end) + local pre_typo_end = typo_start - 1 + -- unchanged token part before typo + if pre_typo_end > token_start then + table.insert(new_tokens, token_type) + table.insert(new_tokens, pre_typo_end + 1) end - -- comment with mispellings anf oter stuf + + -- highlight typo + table.insert(new_tokens, vis.lexers.ERROR) + table.insert(new_tokens, typo_end + 1) end - end + until(not token_end or token_end > typo_end) end + + -- add tokens left after we handled all typos + for i = i, #tokens, 1 do + table.insert(new_tokens, tokens[i]) + end + return new_tokens, timedout end end |