Module:Diff and Module:Diff/sandbox: Difference between pages

(Difference between pages)
Jump to navigation Jump to search
Page 1
Page 2
imported>Awesome Aasim
No edit summary
 
imported>Waddie96
fixx22
 
Line 1: Line 1:
--- Provides functions for diffing text.
-----------------------------------------------------------------------------
-- Provides functions for getting the difference between two pieces of text.
--
--
--
-- @module diff
-- @alias p
-- @author Yuri Takhteyev (yuri@freewisdom.org)
-- @author Hisham Muhammad
-- @author [[User:Ebrahim]]
-- @release stable
-- @license MIT/X
-- (c) 2007, 2008  Yuri Takhteyev (yuri@freewisdom.org)
-- (c) 2007, 2008  Yuri Takhteyev (yuri@freewisdom.org)
-- (c) 2007 Hisham Muhammad
-- (c) 2007 Hisham Muhammad
-- Adapted to MediaWiki Lua originally by [[User:Ebrahim]]
-- Adapted to MediaWiki Lua originally by User:Ebrahim
--
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License
-- License: MIT/X, see http://sputnik.freewisdom.org/en/License
-----------------------------------------------------------------------------
--- Localization of globals
local lstring = mw.ustring
local ltext = mw.text
local table_insert = table.insert
local table_remove = table.remove
local html_create = mw.html.create
local p = {}


local SKIP_SEPARATOR = true -- a constant
local SKIP_SEPARATOR = true -- a constant
local STYLES = 'src = "Module:Diff/styles.css"'


-- token statuses
-- Token statuses
local IN   = "in"
local IN = "in"
local OUT = "out"
local OUT = "out"
local SAME = "same"
local SAME = "same"


--- Split a string into tokens. (Adapted from Gavin Kistner's split on
-----------------------------------------------------------------------------
-- http://lua-users.org/wiki/SplitJoin.
--- Split a string into tokens.
-- (Adapted from Gavin Kistner's split on http://lua-users.org/wiki/SplitJoin.)
--
--
-- @param text           A string to be split.
--- @param text string Input string to be split.
-- @param separator     [optional] the separator pattern (defaults to any
--- @param separator string|'%s+'? the separator pattern (default: whitespace)
--                      whitespace - %s+).
--- @param skip_separator boolean? don't include the separator in the results.
-- @param skip_separator [optional] don't include the separator in the results.
--- @return table token_list list of tokens.
-- @return               A list of tokens.
-----------------------------------------------------------------------------
local function split(text, separator, skip_separator)
local function split(text, separator, skip_separator)
skip_separator = skip_separator or SKIP_SEPARATOR
separator = separator or "%s+"
separator = separator or "%s+"
local parts = {}
local parts = {}
local start = 1
local start = 1
local split_start, split_end = mw.ustring.find(text, separator, start)
local split_start, split_end = lstring.find(text, separator, start)
while split_start do
while split_start do
table.insert(parts, mw.ustring.sub(text, start, split_start-1))
table_insert(parts, lstring.sub(text, start, split_start - 1))
if not skip_separator then
if not skip_separator then
table.insert(parts, mw.ustring.sub(text, split_start, split_end))
table_insert(parts, lstring.sub(text, split_start, split_end))
end
end
start = split_end + 1
start = split_end + 1
split_start, split_end = mw.ustring.find(text, separator, start)
split_start, split_end = lstring.find(text, separator, start)
end
end
if mw.ustring.sub(text, start) ~= "" then
if lstring.sub(text, start) ~= "" then
table.insert(parts, mw.ustring.sub(text, start))
table_insert(parts, lstring.sub(text, start))
end
end
return parts
return parts
end
end


 
-----------------------------------------------------------------------------
--- Derives the longest common subsequence of two strings.  This is a faster
--- Derives the longest common subsequence of two strings.  This is a faster
-- implementation than one provided by stdlib.  Submitted by Hisham Muhammad.
--- implementation than one provided by stdlib.  Submitted by Hisham Muhammad.
-- The algorithm was taken from:
-- (The algorithm was taken from:
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence
-- http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Longest_common_subsequence)
--
--
-- @param t1             the first string.
--- NOTE: High-level design choice.
-- @param t2             the second string.
--- This implementation makes use of nested metatables to create a sparse,
-- @return               the least common subsequence as a matrix.
--- auto-initializing 2D array. This is clever but complex and slightly
--- unconventional in order to create the LCS matrix only as needed.
--- It's a neat trick, but it may be slower than a more traditional approach,
--- especially for large strings.  It also uses more memory, since the matrix
--- is not a sparse array and the matrix is of size (m+1)×(n+1), where m and n
--- are the lengths of the two strings t1 and t2.
--- While the metatable approach is compact, it increases mental overhead too.
--- Sparse array initialization and explicitly managing state with tables would
--- just as helpful and could improve performance.
---
--- @param t1 any first string.
--- @param t2 any second string.
--- @return table lcs_matrix least common subsequence as a matrix.
-----------------------------------------------------------------------------
local function quick_LCS(t1, t2)
local function quick_LCS(t1, t2)
local m = #t1
local m = #t1
local n = #t2
local n = #t2


-- Build matrix on demand
-- Build the dynamic programming matrix C on demand using nested metatables.
-- This creates a "sparse array" structure where C[i][j] automatically
-- initializes to 0 when first accessed, avoiding the need for explicit
-- initialization of a potentially large (m+1) x (n+1) table.
local C = {}
local C = {}
local setmetatable = setmetatable
local setmetatable = setmetatable
Line 69: Line 90:
t[k] = 0
t[k] = 0
return 0
return 0
end
end,
}
}
local mt_C = {
local mt_C = {
Line 77: Line 98:
t[k] = tbl
t[k] = tbl
return tbl
return tbl
end
end,
}
}
setmetatable(C, mt_C)
setmetatable(C, mt_C)
local max = math.max
local max = math.max
for i = 1, m+1 do
 
local ci1 = C[i+1]
-- Loops run from 1 to m and 1 to n.
for i = 1, m do
local ci1 = C[i + 1]
local ci = C[i]
local ci = C[i]
for j = 1, n+1 do
for j = 1, n do
if t1[i-1] == t2[j-1] then
-- Compare the tokens directly using 1-based indices (t1[i] and t2[j]).
ci1[j+1] = ci[j] + 1
if t1[i] == t2[j] then
ci1[j + 1] = ci[j] + 1
else
else
ci1[j+1] = max(ci1[j], ci[j+1])
ci1[j + 1] = max(ci1[j], ci[j + 1])
end
end
end
end
Line 95: Line 119:
end
end


 
-----------------------------------------------------------------------------
 
--- Formats an inline diff as HTML, with <ins> and <del> tags.
--- Formats an inline diff as HTML, with <ins> and <del> tags.
--
--
-- @param tokens         a table of {token, status} pairs.
--- @param tokens table table of {token, status} key-value pairs.
-- @return               an HTML string.
--- @return string diff_buffer HTML string.
-----------------------------------------------------------------------------
local function format_as_html(tokens)
local function format_as_html(tokens)
local diff_buffer = ""
local diff_buffer = ""
local token, status
local token, status
for i, token_record in ipairs(tokens) do
for _, token_record in ipairs(tokens) do
token = mw.text.nowiki(token_record[1])
token = ltext.nowiki(token_record[1])
status = token_record[2]
status = token_record[2]
if status == "in" then
if status == "in" then
diff_buffer = diff_buffer..'<ins>'..token..'</ins>'
diff_buffer = diff_buffer .. "<ins>" .. token .. "</ins>"
elseif status == "out" then
elseif status == "out" then
diff_buffer = diff_buffer..'<del>'..token..'</del>'
diff_buffer = diff_buffer .. "<del>" .. token .. "</del>"
else
else
diff_buffer = diff_buffer..token
diff_buffer = diff_buffer .. token
end
end
end
end
Line 118: Line 142:
end
end


--- Returns a diff of two strings as a list of pairs, where the first value
-----------------------------------------------------------------------------
-- represents a token and the second the token's status ("same", "in", "out").
-- Returns a diff of two strings in tuples/two-element array, where the key
-- is a token and the value the token's status ("same", "in", "out").
--
--
-- @function p.diff
--- @param old string "old" text string
-- @param old             The "old" text string
--- @param new string "new" text string
-- @param new             The "new" text string
--- @param separator string|'%s+'? separator pattern (default: whitespace).
-- @param separator     [optional] the separator pattern (defaults to any
--- @return table<string, 'same'|'in'|'out'> token_list Two-element array/tuples of annotated tokens.
--                      whitespace).
-----------------------------------------------------------------------------
-- @return               A list of annotated tokens.
local function diff(old, new, separator)
local function diff(old, new, separator)
assert(old); assert(new)
assert(old)
new = split(new, separator); old = split(old, separator)
assert(new)
local tbl_new = split(new, separator)
local tbl_old = split(old, separator)


-- First, compare the beginnings and ends of strings to remove the common
-- First, compare the beginnings and ends of strings to remove the common
-- prefix and suffix.  Chances are, there is only a small number of tokens
-- prefix and suffix.
-- in the middle that differ, in which case  we can save ourselves a lot
-- in terms of LCS computation.
local prefix = "" -- common text in the beginning
local prefix = "" -- common text in the beginning
local suffix = "" -- common text in the end
local suffix = "" -- common text in the end
while old[1] and old[1] == new[1] do
 
local token = table.remove(old, 1)
while tbl_old[1] and tbl_old[1] == tbl_new[1] do
table.remove(new, 1)
local token = table_remove(tbl_old, 1)
prefix = prefix..token
table_remove(tbl_new, 1)
prefix = prefix .. token
end
end
while old[#old] and old[#old] == new[#new] do
while tbl_old[#tbl_old] and tbl_old[#tbl_old] == tbl_new[#tbl_new] do
local token = table.remove(old)
local token = table_remove(tbl_old)
table.remove(new)
table_remove(tbl_new)
suffix = token..suffix
suffix = token .. suffix
end
end


-- Setup a table that will store the diff (an upvalue for get_diff). We'll
-- Setup the table that will store the diff in reverse order.
-- store it in the reverse order to allow for tail calls.  We'll also keep
-- ... (rest of the function is now fine as it uses tbl_old/tbl_new)
-- in this table functions to handle different events.
 
local rev_diff = {
local rev_diff = {}
put = function(self, token, type) table.insert(self, {token,type}) end,
-- Define local helper functions to manage token insertion into rev_diff.
ins = function(self, token) self:put(token, IN) end,
local function put(token, type)
del = function(self, token) self:put(token, OUT) end,
table_insert(rev_diff, { token, type })
same = function(self, token) if token then self:put(token, SAME) end end,
end
}
local function ins(token)
put(token, IN)
end
local function del(token)
put(token, OUT)
end
local function same(token)
if token then
put(token, SAME)
end
end


-- Put the suffix as the first token (we are storing the diff in the
-- Put the suffix as the first token (we are storing the diff in the
-- reverse order)
-- reverse order)
 
same(suffix)
rev_diff:same(suffix)
 
-- Define a function that will scan the LCS matrix backwards and build the
-- Define a function that will scan the LCS matrix backwards and build the
-- diff output recursively.
-- diff output recursively.
local function get_diff(C, old, new, i, j)
local function get_diff(C, tbl_old, tbl_new, i, j)
local old_i = old[i]
local old_i = tbl_old[i]
local new_j = new[j]
local new_j = tbl_new[j]
if i >= 1 and j >= 1 and old_i == new_j then
if i >= 1 and j >= 1 and old_i == new_j then
rev_diff:same(old_i)
same(old_i)
return get_diff(C, old, new, i-1, j-1)
return get_diff(C, tbl_old, tbl_new, i - 1, j - 1)
else
else
local Cij1 = C[i][j-1]
local Cij1 = C[i][j - 1]
local Ci1j = C[i-1][j]
local Ci1j = C[i - 1][j]
if j >= 1 and (i == 0 or Cij1 >= Ci1j) then
if j >= 1 and (i == 0 or Cij1 >= Ci1j) then
rev_diff:ins(new_j)
ins(new_j)
return get_diff(C, old, new, i, j-1)
return get_diff(C, tbl_old, tbl_new, i, j - 1)
elseif i >= 1 and (j == 0 or Cij1 < Ci1j) then
elseif i >= 1 and (j == 0 or Cij1 < Ci1j) then
rev_diff:del(old_i)
del(old_i)
return get_diff(C, old, new, i-1, j)
return get_diff(C, tbl_old, tbl_new, i - 1, j)
end
end
end
end
end
end
-- Then call it.
-- Then call it.
get_diff(quick_LCS(old, new), old, new, #old + 1, #new + 1)
get_diff(quick_LCS(tbl_old, tbl_new), tbl_old, tbl_new, #tbl_old + 1, #tbl_new + 1)
 
-- Put the prefix in at the end
-- Put the prefix in at the end
rev_diff:same(prefix)
same(prefix)


-- Reverse the diff.
-- Reverse the diff.
local diff = {}
local tbl_diff = {}
for i = #rev_diff, 1, -1 do
table_insert(tbl_diff, rev_diff[i])
end
tbl_diff.to_html = format_as_html
return tbl_diff
end


for i = #rev_diff, 1, -1 do
local function attach_styles(base)
table.insert(diff, rev_diff[i])
local templatestyles_tag
if not templatestyles_tag then
templatestyles_tag = lstring.format(
"%s %s",
mw.getCurrentFrame():extensionTag({
name = "templatestyles",
content = "",
args = { STYLES },
}),
base
)
else
return lstring.format("%s%s", templatestyles_tag, base)
end
end
diff.to_html = format_as_html
return diff
end
end


--- Wiki diff style, currently just for a line
-----------------------------------------------------------------------------
-- @function p.wikiDiff
-- Wiki diff style, currently just for a line
-- @param old            The "old" text string
-----------------------------------------------------------------------------
-- @param new            The "new" text string
-- @param separator      [optional] the separator pattern (defaults to any
--                       whitespace).
-- @return     HTML formatted diff
local function wikiDiff(old, new, separator)
local function wikiDiff(old, new, separator)
local tokens = diff(old, new, separator)
local tokens = diff(old, new, separator)
local root = mw.html.create('')
local root = html_create("")


local token, status
local token, status
local invalidStyles = "-webkit-border-end-width: 1px;"
.. "-webkit-border-start-width: 4px;"
.. "-moz-border-end-width: 1px;"
.. "-moz-border-start-width: 4px;"


local plusMinusStyle = 'width: 2%; padding: 0.25em; font-weight: bold;' ..
local tr = root
'font-size: 1.25em; text-align: end;'
:tag("table")
local tdDivStyle = 'word-wrap: break-word; direction: ltr;'
:addClass("mdl-diff")
:attr("role", "none")
-- :attr("aria-label", "text difference, table, two columns," ..
-- " first column content removed, second column content added")
:tag(
"tr"
)


local tdSharedStyle = 'vertical-align:top; width: 48%; border-style: solid; border-radius: 0.33em; ' ..
tr:tag("td"):addClass("mdl-diff-marker"):attr("aria-hidden", "true"):wikitext("")
'padding: 0.33em 0.5em; color: inherit; font-size: 1em; font-family: monospace; white-space: pre-wrap; border-width: 1px 1px 1px 4px; ' ..
'-webkit-border-end-width: 1px; -webkit-border-start-width: 4px; ' ..
'-moz-border-end-width: 1px; -moz-border-start-width: 4px;' -- these override default border-width for browsers that support them, needed for RTL UI on commons
local insDelSharedStyle = 'padding: 0.25em 0; font-weight: bold; text-decoration: initial;'
 
 
local tr = root:tag('table'):addClass('diff'):css('width', '100%'):tag('tr')
 
tr:tag('td')
:addClass('diff-marker')
:cssText(plusMinusStyle)
:wikitext('')


local deleted = tr
local deleted = tr
:tag('td')
:tag("td")
:cssText('border-color: var(--background-color-content-removed,#ffe49c); ' .. tdSharedStyle)
:attr("aria-label", "Removed")
:addClass('diff-deletedline')
:addClass("mdl-diff-deletedline")
:tag('div')
:addClass("mdl-diff-changedline")
:cssText(tdDivStyle)
:css(invalidStyles) -- Workaround for a bug in Firefox where border styles are ignored on table cells
:addClass("monospace")
:addClass("monospaced")
:tag("div")
:addClass("mdl-diff-div")


for i, token_record in ipairs(tokens) do
for _, token_record in ipairs(tokens) do
token = mw.text.nowiki(token_record[1]):gsub("\n", "&#10;") -- Force all newlines to encode to avoid linter issues
token = ltext.nowiki(token_record[1]):gsub("\n", "&#10;") -- Force all newlines to encode to avoid linter issues
status = token_record[2]
status = token_record[2]
if status == OUT then
if status == OUT then
deleted
deleted:tag("del"):addClass("mdl-diff-deletedline"):addClass("mdl-diff-change-inline"):wikitext(token)
:tag('del')
:cssText('background: var(--background-color-content-removed,#ffe49c); color: inherit; ' .. insDelSharedStyle)
:addClass('diffchange')
:addClass('diffchange-inline')
:wikitext(token)
elseif status == SAME then
elseif status == SAME then
deleted:wikitext(token)
deleted:wikitext(token)
Line 252: Line 294:
end
end


tr:tag('td')
tr:tag("td"):addClass("mdl-diff-marker"):attr("aria-hidden", "true"):wikitext("+")
:cssText(plusMinusStyle)
:wikitext('+')


local inserted = tr
local inserted = tr:tag("td")
:tag('td')
:attr("aria-label", "Added")
:cssText('border-color: var(--background-color-content-added,#a3d3ff); ' .. tdSharedStyle)
:addClass("mdl-diff-addedline")
:addClass('diff-addedline')
:addClass("mdl-diff-changedline")
:tag('div')
:css(invalidStyles)
:cssText(tdDivStyle)
:addClass("monospace")
:addClass("monospaced")
:tag("div")
:addClass("mdl-diff-div")


for i, token_record in ipairs(tokens) do
for _, token_record in ipairs(tokens) do
token = mw.text.nowiki(token_record[1]):gsub("\n", "&#10;") -- Force all newlines to encode to avoid linter issues
token = ltext.nowiki(token_record[1]):gsub("\n", "&#10;") -- Force all newlines to encode to avoid linter issues
status = token_record[2]
status = token_record[2]
if status == IN then
if status == IN then
inserted
inserted:tag("ins"):addClass("mdl-diff-addedline"):addClass("mdl-diff-change-inline"):wikitext(token)
:tag('ins')
:cssText('background: var(--background-color-content-added,#a3d3ff); color: inherit; ' .. insDelSharedStyle)
:addClass('diffchange')
:addClass('diffchange-inline')
:wikitext(token)
elseif status == SAME then
elseif status == SAME then
inserted:wikitext(token)
inserted:wikitext(token)
Line 278: Line 316:
end
end


return tostring(root)
return attach_styles(tostring(root))
end
end


--- Main entry point.
-- @function p.main
-- @param {table} frame Calling frame.
-- @return wikitext output
local function main(frame)
local function main(frame)
return wikiDiff(mw.text.decode(mw.text.unstrip(frame.args[1])), mw.text.decode(mw.text.unstrip(frame.args[2])), frame.args[3] or '[%s%.:-]+')
return wikiDiff(
ltext.decode(ltext.unstrip(frame.args[1])),
ltext.decode(ltext.unstrip(frame.args[2])),
frame.args[3] or "[%s%.:-]+"
)
end
end


--- Exports
p = {
return {
diff = diff,
diff = diff,
wikiDiff = wikiDiff,
wikiDiff = wikiDiff,
main = main
main = main,
}
}
return p