Module:Diff and Module:Diff/sandbox: Difference between pages
(Difference between pages)
imported>Awesome Aasim No edit summary |
imported>Waddie96 fixx22 |
||
| Line 1: | Line 1: | ||
--- Provides functions for | ----------------------------------------------------------------------------- | ||
-- Provides functions for getting the difference between two pieces of text. | |||
-- | -- | ||
-- (c) 2007, 2008 Yuri Takhteyev (yuri@freewisdom.org) | -- (c) 2007, 2008 Yuri Takhteyev (yuri@freewisdom.org) | ||
-- (c) 2007 Hisham Muhammad | -- (c) 2007 Hisham Muhammad | ||
-- Adapted to MediaWiki Lua originally by | -- Adapted to MediaWiki Lua originally by User:Ebrahim | ||
-- | |||
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License | -- License: MIT/X, see http://sputnik.freewisdom.org/en/License | ||
----------------------------------------------------------------------------- | |||
--- Localization of globals | |||
local lstring = mw.ustring | |||
local ltext = mw.text | |||
local table_insert = table.insert | |||
local table_remove = table.remove | |||
local html_create = mw.html.create | |||
local p = {} | |||
local SKIP_SEPARATOR = true | local SKIP_SEPARATOR = true -- a constant | ||
local STYLES = 'src = "Module:Diff/styles.css"' | |||
-- | -- Token statuses | ||
local IN | local IN = "in" | ||
local OUT | local OUT = "out" | ||
local SAME = "same" | local SAME = "same" | ||
--- Split a string into tokens. | ----------------------------------------------------------------------------- | ||
--- Split a string into tokens. | |||
-- (Adapted from Gavin Kistner's split on http://lua-users.org/wiki/SplitJoin.) | |||
-- | -- | ||
-- @param text | --- @param text string Input string to be split. | ||
-- @param separator | --- @param separator string|'%s+'? the separator pattern (default: whitespace) | ||
--- @param skip_separator boolean? don't include the separator in the results. | |||
-- @param skip_separator | --- @return table token_list list of tokens. | ||
-- @return | ----------------------------------------------------------------------------- | ||
local function split(text, separator, skip_separator) | local function split(text, separator, skip_separator) | ||
skip_separator = skip_separator or SKIP_SEPARATOR | |||
separator = separator or "%s+" | separator = separator or "%s+" | ||
local parts = {} | local parts = {} | ||
local start = 1 | local start = 1 | ||
local split_start, split_end = | local split_start, split_end = lstring.find(text, separator, start) | ||
while split_start do | while split_start do | ||
table_insert(parts, lstring.sub(text, start, split_start - 1)) | |||
if not skip_separator then | if not skip_separator then | ||
table_insert(parts, lstring.sub(text, split_start, split_end)) | |||
end | end | ||
start = split_end + 1 | start = split_end + 1 | ||
split_start, split_end = | split_start, split_end = lstring.find(text, separator, start) | ||
end | end | ||
if | if lstring.sub(text, start) ~= "" then | ||
table_insert(parts, lstring.sub(text, start)) | |||
end | end | ||
return parts | return parts | ||
end | end | ||
----------------------------------------------------------------------------- | |||
--- Derives the longest common subsequence of two strings. This is a faster | --- Derives the longest common subsequence of two strings. This is a faster | ||
-- implementation than one provided by stdlib. Submitted by Hisham Muhammad. | --- implementation than one provided by stdlib. Submitted by Hisham Muhammad. | ||
-- The algorithm was taken from: | -- (The algorithm was taken from: | ||
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence | -- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence) | ||
-- | -- | ||
-- @param t1 | --- NOTE: High-level design choice. | ||
-- @param t2 | --- This implementation makes use of nested metatables to create a sparse, | ||
-- @return | --- auto-initializing 2D array. This is clever but complex and slightly | ||
--- unconventional in order to create the LCS matrix only as needed. | |||
--- It's a neat trick, but it may be slower than a more traditional approach, | |||
--- especially for large strings. It also uses more memory, since the matrix | |||
--- is not a sparse array and the matrix is of size (m+1)×(n+1), where m and n | |||
--- are the lengths of the two strings t1 and t2. | |||
--- While the metatable approach is compact, it increases mental overhead too. | |||
--- Sparse array initialization and explicitly managing state with tables would | |||
--- just as helpful and could improve performance. | |||
--- | |||
--- @param t1 any first string. | |||
--- @param t2 any second string. | |||
--- @return table lcs_matrix least common subsequence as a matrix. | |||
----------------------------------------------------------------------------- | |||
local function quick_LCS(t1, t2) | local function quick_LCS(t1, t2) | ||
local m = #t1 | local m = #t1 | ||
local n = #t2 | local n = #t2 | ||
-- Build matrix on demand | -- Build the dynamic programming matrix C on demand using nested metatables. | ||
-- This creates a "sparse array" structure where C[i][j] automatically | |||
-- initializes to 0 when first accessed, avoiding the need for explicit | |||
-- initialization of a potentially large (m+1) x (n+1) table. | |||
local C = {} | local C = {} | ||
local setmetatable = setmetatable | local setmetatable = setmetatable | ||
| Line 69: | Line 90: | ||
t[k] = 0 | t[k] = 0 | ||
return 0 | return 0 | ||
end | end, | ||
} | } | ||
local mt_C = { | local mt_C = { | ||
| Line 77: | Line 98: | ||
t[k] = tbl | t[k] = tbl | ||
return tbl | return tbl | ||
end | end, | ||
} | } | ||
setmetatable(C, mt_C) | setmetatable(C, mt_C) | ||
local max = math.max | local max = math.max | ||
for i = 1, m | |||
local ci1 = C[i+1] | -- Loops run from 1 to m and 1 to n. | ||
for i = 1, m do | |||
local ci1 = C[i + 1] | |||
local ci = C[i] | local ci = C[i] | ||
for j = 1, n | for j = 1, n do | ||
if t1[i | -- Compare the tokens directly using 1-based indices (t1[i] and t2[j]). | ||
ci1[j+1] = ci[j] + 1 | if t1[i] == t2[j] then | ||
ci1[j + 1] = ci[j] + 1 | |||
else | else | ||
ci1[j+1] = max(ci1[j], ci[j+1]) | ci1[j + 1] = max(ci1[j], ci[j + 1]) | ||
end | end | ||
end | end | ||
| Line 95: | Line 119: | ||
end | end | ||
----------------------------------------------------------------------------- | |||
--- Formats an inline diff as HTML, with <ins> and <del> tags. | --- Formats an inline diff as HTML, with <ins> and <del> tags. | ||
-- | -- | ||
-- @param tokens | --- @param tokens table table of {token, status} key-value pairs. | ||
-- @return | --- @return string diff_buffer HTML string. | ||
----------------------------------------------------------------------------- | |||
local function format_as_html(tokens) | local function format_as_html(tokens) | ||
local diff_buffer = "" | local diff_buffer = "" | ||
local token, status | local token, status | ||
for | for _, token_record in ipairs(tokens) do | ||
token = | token = ltext.nowiki(token_record[1]) | ||
status = token_record[2] | status = token_record[2] | ||
if status == "in" then | if status == "in" then | ||
diff_buffer = diff_buffer.. | diff_buffer = diff_buffer .. "<ins>" .. token .. "</ins>" | ||
elseif status == "out" then | elseif status == "out" then | ||
diff_buffer = diff_buffer.. | diff_buffer = diff_buffer .. "<del>" .. token .. "</del>" | ||
else | else | ||
diff_buffer = diff_buffer..token | diff_buffer = diff_buffer .. token | ||
end | end | ||
end | end | ||
| Line 118: | Line 142: | ||
end | end | ||
--- Returns a diff of two strings | ----------------------------------------------------------------------------- | ||
-- | -- Returns a diff of two strings in tuples/two-element array, where the key | ||
-- is a token and the value the token's status ("same", "in", "out"). | |||
-- | -- | ||
- | --- @param old string "old" text string | ||
-- @param old | --- @param new string "new" text string | ||
-- @param new | --- @param separator string|'%s+'? separator pattern (default: whitespace). | ||
-- @param separator | --- @return table<string, 'same'|'in'|'out'> token_list Two-element array/tuples of annotated tokens. | ||
----------------------------------------------------------------------------- | |||
-- @return | |||
local function diff(old, new, separator) | local function diff(old, new, separator) | ||
assert(old) | assert(old) | ||
assert(new) | |||
local tbl_new = split(new, separator) | |||
local tbl_old = split(old, separator) | |||
-- First, compare the beginnings and ends of strings to remove the common | -- First, compare the beginnings and ends of strings to remove the common | ||
-- prefix and suffix | -- prefix and suffix. | ||
local prefix = "" -- common text in the beginning | local prefix = "" -- common text in the beginning | ||
local suffix = "" -- common text in the end | local suffix = "" -- common text in the end | ||
while | |||
local token = | while tbl_old[1] and tbl_old[1] == tbl_new[1] do | ||
local token = table_remove(tbl_old, 1) | |||
prefix = prefix..token | table_remove(tbl_new, 1) | ||
prefix = prefix .. token | |||
end | end | ||
while | while tbl_old[#tbl_old] and tbl_old[#tbl_old] == tbl_new[#tbl_new] do | ||
local token = | local token = table_remove(tbl_old) | ||
table_remove(tbl_new) | |||
suffix = token..suffix | suffix = token .. suffix | ||
end | end | ||
-- Setup | -- Setup the table that will store the diff in reverse order. | ||
-- | -- ... (rest of the function is now fine as it uses tbl_old/tbl_new) | ||
-- | |||
local | local rev_diff = {} | ||
-- Define local helper functions to manage token insertion into rev_diff. | |||
local function put(token, type) | |||
table_insert(rev_diff, { token, type }) | |||
end | |||
local function ins(token) | |||
put(token, IN) | |||
end | |||
local function del(token) | |||
put(token, OUT) | |||
end | |||
local function same(token) | |||
if token then | |||
put(token, SAME) | |||
end | |||
end | |||
-- Put the suffix as the first token (we are storing the diff in the | -- Put the suffix as the first token (we are storing the diff in the | ||
-- reverse order) | -- reverse order) | ||
same(suffix) | |||
-- Define a function that will scan the LCS matrix backwards and build the | -- Define a function that will scan the LCS matrix backwards and build the | ||
-- diff output recursively. | -- diff output recursively. | ||
local function get_diff(C, | local function get_diff(C, tbl_old, tbl_new, i, j) | ||
local old_i = | local old_i = tbl_old[i] | ||
local new_j = | local new_j = tbl_new[j] | ||
if i >= 1 and j >= 1 and old_i == new_j then | if i >= 1 and j >= 1 and old_i == new_j then | ||
same(old_i) | |||
return get_diff(C, | return get_diff(C, tbl_old, tbl_new, i - 1, j - 1) | ||
else | else | ||
local Cij1 = C[i][j-1] | local Cij1 = C[i][j - 1] | ||
local Ci1j = C[i-1][j] | local Ci1j = C[i - 1][j] | ||
if j >= 1 and (i == 0 or Cij1 >= Ci1j) then | if j >= 1 and (i == 0 or Cij1 >= Ci1j) then | ||
ins(new_j) | |||
return get_diff(C, | return get_diff(C, tbl_old, tbl_new, i, j - 1) | ||
elseif i >= 1 and (j == 0 or Cij1 < Ci1j) then | elseif i >= 1 and (j == 0 or Cij1 < Ci1j) then | ||
del(old_i) | |||
return get_diff(C, | return get_diff(C, tbl_old, tbl_new, i - 1, j) | ||
end | end | ||
end | end | ||
end | end | ||
-- Then call it. | -- Then call it. | ||
get_diff(quick_LCS( | get_diff(quick_LCS(tbl_old, tbl_new), tbl_old, tbl_new, #tbl_old + 1, #tbl_new + 1) | ||
-- Put the prefix in at the end | -- Put the prefix in at the end | ||
same(prefix) | |||
-- Reverse the diff. | -- Reverse the diff. | ||
local | local tbl_diff = {} | ||
for i = #rev_diff, 1, -1 do | |||
table_insert(tbl_diff, rev_diff[i]) | |||
end | |||
tbl_diff.to_html = format_as_html | |||
return tbl_diff | |||
end | |||
local function attach_styles(base) | |||
local templatestyles_tag | |||
if not templatestyles_tag then | |||
templatestyles_tag = lstring.format( | |||
"%s %s", | |||
mw.getCurrentFrame():extensionTag({ | |||
name = "templatestyles", | |||
content = "", | |||
args = { STYLES }, | |||
}), | |||
base | |||
) | |||
else | |||
return lstring.format("%s%s", templatestyles_tag, base) | |||
end | end | ||
end | end | ||
--- Wiki diff style, currently just for a line | ----------------------------------------------------------------------------- | ||
-- | -- Wiki diff style, currently just for a line | ||
-- | ----------------------------------------------------------------------------- | ||
-- | |||
-- | |||
-- | |||
-- | |||
local function wikiDiff(old, new, separator) | local function wikiDiff(old, new, separator) | ||
local tokens = diff(old, new, separator) | local tokens = diff(old, new, separator) | ||
local root = | local root = html_create("") | ||
local token, status | local token, status | ||
local invalidStyles = "-webkit-border-end-width: 1px;" | |||
.. "-webkit-border-start-width: 4px;" | |||
.. "-moz-border-end-width: 1px;" | |||
.. "-moz-border-start-width: 4px;" | |||
local | local tr = root | ||
:tag("table") | |||
:addClass("mdl-diff") | |||
:attr("role", "none") | |||
-- :attr("aria-label", "text difference, table, two columns," .. | |||
-- " first column content removed, second column content added") | |||
:tag( | |||
"tr" | |||
) | |||
tr:tag("td"):addClass("mdl-diff-marker"):attr("aria-hidden", "true"):wikitext("−") | |||
tr:tag( | |||
local deleted = tr | local deleted = tr | ||
:tag( | :tag("td") | ||
:attr("aria-label", "Removed") | |||
:addClass("mdl-diff-deletedline") | |||
:addClass("mdl-diff-changedline") | |||
:css(invalidStyles) -- Workaround for a bug in Firefox where border styles are ignored on table cells | |||
:addClass("monospace") | |||
:addClass("monospaced") | |||
:tag("div") | |||
:addClass("mdl-diff-div") | |||
for | for _, token_record in ipairs(tokens) do | ||
token = | token = ltext.nowiki(token_record[1]):gsub("\n", " ") -- Force all newlines to encode to avoid linter issues | ||
status = token_record[2] | status = token_record[2] | ||
if status == OUT then | if status == OUT then | ||
deleted | deleted:tag("del"):addClass("mdl-diff-deletedline"):addClass("mdl-diff-change-inline"):wikitext(token) | ||
elseif status == SAME then | elseif status == SAME then | ||
deleted:wikitext(token) | deleted:wikitext(token) | ||
| Line 252: | Line 294: | ||
end | end | ||
tr:tag( | tr:tag("td"):addClass("mdl-diff-marker"):attr("aria-hidden", "true"):wikitext("+") | ||
local inserted = tr | local inserted = tr:tag("td") | ||
:attr("aria-label", "Added") | |||
:addClass("mdl-diff-addedline") | |||
:addClass("mdl-diff-changedline") | |||
:css(invalidStyles) | |||
:addClass("monospace") | |||
:addClass("monospaced") | |||
:tag("div") | |||
:addClass("mdl-diff-div") | |||
for | for _, token_record in ipairs(tokens) do | ||
token = | token = ltext.nowiki(token_record[1]):gsub("\n", " ") -- Force all newlines to encode to avoid linter issues | ||
status = token_record[2] | status = token_record[2] | ||
if status == IN then | if status == IN then | ||
inserted | inserted:tag("ins"):addClass("mdl-diff-addedline"):addClass("mdl-diff-change-inline"):wikitext(token) | ||
elseif status == SAME then | elseif status == SAME then | ||
inserted:wikitext(token) | inserted:wikitext(token) | ||
| Line 278: | Line 316: | ||
end | end | ||
return tostring(root) | return attach_styles(tostring(root)) | ||
end | end | ||
local function main(frame) | local function main(frame) | ||
return wikiDiff( | return wikiDiff( | ||
ltext.decode(ltext.unstrip(frame.args[1])), | |||
ltext.decode(ltext.unstrip(frame.args[2])), | |||
frame.args[3] or "[%s%.:-]+" | |||
) | |||
end | end | ||
p = { | |||
diff = diff, | diff = diff, | ||
wikiDiff = wikiDiff, | wikiDiff = wikiDiff, | ||
main = main | main = main, | ||
} | } | ||
return p | |||