Module:a or an/sandbox
Jump to navigation
Jump to search
| File:Edit In Sandbox Icon - Color.svg | This is the module sandbox page for Module:A or an (diff). See also the companion subpage for test cases (run). |
File:Test Template Info-Icon - Version (2).svg Module documentation[view] [edit] [history] [purge]
| File:Green check.svg | This module is rated as ready for general use. It has reached a mature state, is considered relatively stable and bug-free, and may be used wherever appropriate. It can be mentioned on help pages and other Wikipedia resources as an option for new users. To minimise server load and avoid disruptive output, improvements should be developed through sandbox testing rather than repeated trial-and-error editing. |
| Warning | This Lua module is used on approximately 12,000 pages and changes may be widely noticed. Test changes in the module's /sandbox or /testcases subpages, or in your own module sandbox. Consider discussing changes on the talk page before implementing them. |
| File:Lua-Logo.svg | This module depends on the following other modules: |
This module implements Template:a or an. Exception words are stored at Module:a or an/words.
local p = {}
local words = mw.loadData('Module:A or an/words')
local lcVChars = 'aeiouà-æè-ïò-öø-üāăąēĕėęěĩīĭįıijōŏőœũūŭůűų'
local ucVvChars = 'AEFHILMNORSXÀ-ÆÈ-ÏÒ-ÖØĀĂĄĒĔĖĘĚĨĪĬĮıIJŌŎŐŒÑĤĦĹĻĽĿŁŃŅŇŊŔŖŘŚŜŞ'
local article = {
a = "a",
an = "an",
}
local function findWord(text, array)
for _, v in ipairs(array) do
if mw.ustring.find(text, '^' .. v .. '$') then
return true
end
end
end
local function get_article_from_acronym(text)
if mw.ustring.find(text, '^[' .. ucVvChars .. ']')
and not findWord(text, words.cvAcronyms) -- Exclude 'NASA' etc.
or findWord(text, words.vvAcronyms) -- 'UNRWA' etc.
then
return article.an
end
return article.a
end
local function get_article_from_number_word(text)
text = mw.ustring.match(text, '^[0-9]+') -- Extract the number
if findWord(text, words.vNums) then -- '18' etc.
return article.an
end
return article.a
end
local function clean_text(text)
text = mw.ustring.gsub(text, '</?[A-Za-z][^>]->', '') -- Remove HTML tags
text = mw.ustring.gsub(text, '%[%[[^%|]+%|(..-)%]%]', '%1') -- Remove wikilinks
text = mw.ustring.gsub(mw.ustring.gsub(text, '%[%[', ''), '%]%]', '')
text = mw.ustring.gsub(text, '^["%$\'%(<%[%{¢-¥₠-₿]+', '') -- Strip some symbols at the beginning
text = mw.ustring.match(text, '^%.?[0-9%u%l]+') or text -- Extract the first word
return text
end
function p._main(args)
local original_text = args[1] and mw.text.trim(args[1])
local text = original_text
local article = article.a
local ret = ''
if text and text ~= '' then
text = clean_text(text)
if mw.ustring.find(text, '^[0-9]') then -- It begins with a number
article = get_article_from_number_word(text)
elseif mw.ustring.match(text, '^[0-9%u]+$') then -- It looks like an acronym
article = get_article_from_acronym(text)
else
text = mw.ustring.lower(text) -- Uncapitalize
if mw.ustring.find(text, '^['.. lcVChars .. ']') then -- It begins with a vowel
if not findWord(text, words.vcWords) -- Exclude 'euro' etc.
or findWord(text, words.vvWords) -- But not 'Euler' etc.
then
article = article.an
end
elseif args.variety and mw.ustring.lower(args.variety) == 'us' -- 'herb' etc.
and findWord(text, words.cvWordsUS)
or findWord(text, words.cvWords) -- 'hour' etc.
then
article = article.an
end
end
ret = article .. ' ' .. original_text
end
return ret
end
function p.main(frame)
return p._main(frame:getParent().args)
end
return p