Module:Diff/sandbox
Jump to navigation
Jump to search
| File:Edit In Sandbox Icon - Color.svg | This is the module sandbox page for Module:Diff (diff). See also the companion subpage for test cases (run). |
-----------------------------------------------------------------------------
-- Provides functions for getting the difference between two pieces of text.
--
-- (c) 2007, 2008 Yuri Takhteyev (yuri@freewisdom.org)
-- (c) 2007 Hisham Muhammad
-- Adapted to MediaWiki Lua originally by User:Ebrahim
--
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License
-----------------------------------------------------------------------------
--- Localization of globals
local lstring = mw.ustring
local ltext = mw.text
local table_insert = table.insert
local table_remove = table.remove
local html_create = mw.html.create
local p = {}
local SKIP_SEPARATOR = true -- a constant
local STYLES = 'src = "Module:Diff/styles.css"'
-- Token statuses
local IN = "in"
local OUT = "out"
local SAME = "same"
-----------------------------------------------------------------------------
--- Split a string into tokens.
-- (Adapted from Gavin Kistner's split on http://lua-users.org/wiki/SplitJoin.)
--
--- @param text string Input string to be split.
--- @param separator string|'%s+'? the separator pattern (default: whitespace)
--- @param skip_separator boolean? don't include the separator in the results.
--- @return table token_list list of tokens.
-----------------------------------------------------------------------------
local function split(text, separator, skip_separator)
skip_separator = skip_separator or SKIP_SEPARATOR
separator = separator or "%s+"
local parts = {}
local start = 1
local split_start, split_end = lstring.find(text, separator, start)
while split_start do
table_insert(parts, lstring.sub(text, start, split_start - 1))
if not skip_separator then
table_insert(parts, lstring.sub(text, split_start, split_end))
end
start = split_end + 1
split_start, split_end = lstring.find(text, separator, start)
end
if lstring.sub(text, start) ~= "" then
table_insert(parts, lstring.sub(text, start))
end
return parts
end
-----------------------------------------------------------------------------
--- Derives the longest common subsequence of two strings. This is a faster
--- implementation than one provided by stdlib. Submitted by Hisham Muhammad.
-- (The algorithm was taken from:
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence)
--
--- NOTE: High-level design choice.
--- This implementation makes use of nested metatables to create a sparse,
--- auto-initializing 2D array. This is clever but complex and slightly
--- unconventional in order to create the LCS matrix only as needed.
--- It's a neat trick, but it may be slower than a more traditional approach,
--- especially for large strings. It also uses more memory, since the matrix
--- is not a sparse array and the matrix is of size (m+1)×(n+1), where m and n
--- are the lengths of the two strings t1 and t2.
--- While the metatable approach is compact, it increases mental overhead too.
--- Sparse array initialization and explicitly managing state with tables would
--- just as helpful and could improve performance.
---
--- @param t1 any first string.
--- @param t2 any second string.
--- @return table lcs_matrix least common subsequence as a matrix.
-----------------------------------------------------------------------------
local function quick_LCS(t1, t2)
local m = #t1
local n = #t2
-- Build the dynamic programming matrix C on demand using nested metatables.
-- This creates a "sparse array" structure where C[i][j] automatically
-- initializes to 0 when first accessed, avoiding the need for explicit
-- initialization of a potentially large (m+1) x (n+1) table.
local C = {}
local setmetatable = setmetatable
local mt_tbl = {
__index = function(t, k)
t[k] = 0
return 0
end,
}
local mt_C = {
__index = function(t, k)
local tbl = {}
setmetatable(tbl, mt_tbl)
t[k] = tbl
return tbl
end,
}
setmetatable(C, mt_C)
local max = math.max
-- Loops run from 1 to m and 1 to n.
for i = 1, m do
local ci1 = C[i + 1]
local ci = C[i]
for j = 1, n do
-- Compare the tokens directly using 1-based indices (t1[i] and t2[j]).
if t1[i] == t2[j] then
ci1[j + 1] = ci[j] + 1
else
ci1[j + 1] = max(ci1[j], ci[j + 1])
end
end
end
return C
end
-----------------------------------------------------------------------------
--- Formats an inline diff as HTML, with <ins> and <del> tags.
--
--- @param tokens table table of {token, status} key-value pairs.
--- @return string diff_buffer HTML string.
-----------------------------------------------------------------------------
local function format_as_html(tokens)
local diff_buffer = ""
local token, status
for _, token_record in ipairs(tokens) do
token = ltext.nowiki(token_record[1])
status = token_record[2]
if status == "in" then
diff_buffer = diff_buffer .. "<ins>" .. token .. "</ins>"
elseif status == "out" then
diff_buffer = diff_buffer .. "<del>" .. token .. "</del>"
else
diff_buffer = diff_buffer .. token
end
end
return diff_buffer
end
-----------------------------------------------------------------------------
-- Returns a diff of two strings in tuples/two-element array, where the key
-- is a token and the value the token's status ("same", "in", "out").
--
--- @param old string "old" text string
--- @param new string "new" text string
--- @param separator string|'%s+'? separator pattern (default: whitespace).
--- @return table<string, 'same'|'in'|'out'> token_list Two-element array/tuples of annotated tokens.
-----------------------------------------------------------------------------
local function diff(old, new, separator)
assert(old)
assert(new)
local tbl_new = split(new, separator)
local tbl_old = split(old, separator)
-- First, compare the beginnings and ends of strings to remove the common
-- prefix and suffix.
local prefix = "" -- common text in the beginning
local suffix = "" -- common text in the end
while tbl_old[1] and tbl_old[1] == tbl_new[1] do
local token = table_remove(tbl_old, 1)
table_remove(tbl_new, 1)
prefix = prefix .. token
end
while tbl_old[#tbl_old] and tbl_old[#tbl_old] == tbl_new[#tbl_new] do
local token = table_remove(tbl_old)
table_remove(tbl_new)
suffix = token .. suffix
end
-- Setup the table that will store the diff in reverse order.
-- ... (rest of the function is now fine as it uses tbl_old/tbl_new)
local rev_diff = {}
-- Define local helper functions to manage token insertion into rev_diff.
local function put(token, type)
table_insert(rev_diff, { token, type })
end
local function ins(token)
put(token, IN)
end
local function del(token)
put(token, OUT)
end
local function same(token)
if token then
put(token, SAME)
end
end
-- Put the suffix as the first token (we are storing the diff in the
-- reverse order)
same(suffix)
-- Define a function that will scan the LCS matrix backwards and build the
-- diff output recursively.
local function get_diff(C, tbl_old, tbl_new, i, j)
local old_i = tbl_old[i]
local new_j = tbl_new[j]
if i >= 1 and j >= 1 and old_i == new_j then
same(old_i)
return get_diff(C, tbl_old, tbl_new, i - 1, j - 1)
else
local Cij1 = C[i][j - 1]
local Ci1j = C[i - 1][j]
if j >= 1 and (i == 0 or Cij1 >= Ci1j) then
ins(new_j)
return get_diff(C, tbl_old, tbl_new, i, j - 1)
elseif i >= 1 and (j == 0 or Cij1 < Ci1j) then
del(old_i)
return get_diff(C, tbl_old, tbl_new, i - 1, j)
end
end
end
-- Then call it.
get_diff(quick_LCS(tbl_old, tbl_new), tbl_old, tbl_new, #tbl_old + 1, #tbl_new + 1)
-- Put the prefix in at the end
same(prefix)
-- Reverse the diff.
local tbl_diff = {}
for i = #rev_diff, 1, -1 do
table_insert(tbl_diff, rev_diff[i])
end
tbl_diff.to_html = format_as_html
return tbl_diff
end
local function attach_styles(base)
local templatestyles_tag
if not templatestyles_tag then
templatestyles_tag = lstring.format(
"%s %s",
mw.getCurrentFrame():extensionTag({
name = "templatestyles",
content = "",
args = { STYLES },
}),
base
)
else
return lstring.format("%s%s", templatestyles_tag, base)
end
end
-----------------------------------------------------------------------------
-- Wiki diff style, currently just for a line
-----------------------------------------------------------------------------
local function wikiDiff(old, new, separator)
local tokens = diff(old, new, separator)
local root = html_create("")
local token, status
local invalidStyles = "-webkit-border-end-width: 1px;"
.. "-webkit-border-start-width: 4px;"
.. "-moz-border-end-width: 1px;"
.. "-moz-border-start-width: 4px;"
local tr = root
:tag("table")
:addClass("mdl-diff")
:attr("role", "none")
-- :attr("aria-label", "text difference, table, two columns," ..
-- " first column content removed, second column content added")
:tag(
"tr"
)
tr:tag("td"):addClass("mdl-diff-marker"):attr("aria-hidden", "true"):wikitext("−")
local deleted = tr
:tag("td")
:attr("aria-label", "Removed")
:addClass("mdl-diff-deletedline")
:addClass("mdl-diff-changedline")
:css(invalidStyles) -- Workaround for a bug in Firefox where border styles are ignored on table cells
:addClass("monospace")
:addClass("monospaced")
:tag("div")
:addClass("mdl-diff-div")
for _, token_record in ipairs(tokens) do
token = ltext.nowiki(token_record[1]):gsub("\n", " ") -- Force all newlines to encode to avoid linter issues
status = token_record[2]
if status == OUT then
deleted:tag("del"):addClass("mdl-diff-deletedline"):addClass("mdl-diff-change-inline"):wikitext(token)
elseif status == SAME then
deleted:wikitext(token)
end
end
tr:tag("td"):addClass("mdl-diff-marker"):attr("aria-hidden", "true"):wikitext("+")
local inserted = tr:tag("td")
:attr("aria-label", "Added")
:addClass("mdl-diff-addedline")
:addClass("mdl-diff-changedline")
:css(invalidStyles)
:addClass("monospace")
:addClass("monospaced")
:tag("div")
:addClass("mdl-diff-div")
for _, token_record in ipairs(tokens) do
token = ltext.nowiki(token_record[1]):gsub("\n", " ") -- Force all newlines to encode to avoid linter issues
status = token_record[2]
if status == IN then
inserted:tag("ins"):addClass("mdl-diff-addedline"):addClass("mdl-diff-change-inline"):wikitext(token)
elseif status == SAME then
inserted:wikitext(token)
end
end
return attach_styles(tostring(root))
end
local function main(frame)
return wikiDiff(
ltext.decode(ltext.unstrip(frame.args[1])),
ltext.decode(ltext.unstrip(frame.args[2])),
frame.args[3] or "[%s%.:-]+"
)
end
p = {
diff = diff,
wikiDiff = wikiDiff,
main = main,
}
return p