Module:Sandbox/BlaueBlüte/Wikibase/lexpression

From Wikipedia, the free encyclopedia
Jump to navigation Jump to search
-- a module (eventually) providing functions to express Wikidata Lexemes
-- (or rather, their Forms) as strings
-- compare also [[Module:Sandbox/BlaueBlüte/Wikibase/lexFormSimple]]

libraryUtil = require( 'libraryUtil' )
helperFunctions = require("Module:Sandbox/BlaueBlüte/Wikibase/test")

local m = {}

-- functions is…Id() do not test whether the entity actually
-- exists in Wikidata, but only whether the id syntax is valid

function m.isItemId(s)
	libraryUtil.checkType('isItemId', 1, s, 'string', false)
	return mw.ustring.match(s, '^Q%d+$') ~= nil
end

function m.isLexemeId(s)
	libraryUtil.checkType('isLexemeId', 1, s, 'string', false)
	return mw.ustring.match(s, '^L%d+$') ~= nil
end

function m.isSenseId(s)
	libraryUtil.checkType('isSenseId', 1, s, 'string', false)
	return mw.ustring.match(s, '^L%d+-S%d+$') ~= nil
end

function m.isFormId(s)
	libraryUtil.checkType('isFormId', 1, s, 'string', false)
	return mw.ustring.match(s, '^L%d+-F%d+$') ~= nil
end

function m.incAtIndex(t, i)
	if type(t) ~= 'table' then error() end
	if t[i] == nil then
		t[i] = 1
	else
		t[i] = t[i] + 1
	end
end


function m.write(frame)
	libraryUtil.checkType('write', 1, frame, 'table', false)
	libraryUtil.checkTypeForNamedArg('write', 'frame.args', frame.args, 'table', false)
	
	local languages = {}
	local requestedLexemeId = nil
	local requestedSenseId = nil
	local requestedFormId = nil
	local lexemeIdToLoad = nil
	local targetFormGrammaticalFeatures = {}
	
	mw.log('Processing arguments …')
	for p, v in pairs(frame.args) do
		-- log arguments for debugging purposes
		mw.log('  ', p, '(' .. type(p) .. ' index)', v)
		if type(p) == 'number' then
			if m.isLexemeId(v) then
				requestedLexemeId = v
				lexemeIdToLoad = requestedLexemeId
			elseif m.isSenseId(v) then
				requestedSenseId = v
				lexemeIdToLoad, _ = mw.wikibase.lexeme.splitLexemeId(requestedSenseId)
			elseif m.isFormId(v) then
				requestedFormId = v
				lexemeIdToLoad, _ = mw.wikibase.lexeme.splitLexemeId(requestedFormId)
			elseif m.isItemId(v) then
				-- we assume any Q number (item identifier) passed is to refer to a grammatical feature
				m.incAtIndex(targetFormGrammaticalFeatures, v)
			elseif mw.ustring.match(v, '^lang:[-%w]+$') then
				table.insert(languages, mw.ustring.sub(v, mw.ustring.find(v, ':')+1))
			end
		elseif p == 'lang' then
			table.insert(languages, v)
		end
	end
	
	mw.log('requested lexeme: ' .. tostring(requestedLexemeId))
	mw.log('requested sense: ' .. tostring(requestedSenseId))
	mw.log('requested form: ' .. tostring(requestedFormId))
	mw.log('requested language(s):' .. helperFunctions.dump(languages))
	mw.log('lexeme to load: ' .. tostring(lexemeIdToLoad))
	mw.log('looking for target features:' .. helperFunctions.dump(targetFormGrammaticalFeatures))

	if lexemeIdToLoad then
		mw.log('about to load lexeme ' .. lexemeIdToLoad)
		local lexeme = mw.wikibase.getEntity( lexemeIdToLoad )
		mw.log('loaded lexeme is ' .. tostring(lexeme))
		if type(lexeme) == 'table' then
			local forms = lexeme.forms
			local bestRepresentationString = nil
			local bestVal = nil
			for i, form in pairs(forms) do
				local formFeatureIds = form.grammaticalFeatures
				formVal = 0
				for _, featureId in pairs(formFeatureIds) do
					for targetFeatureId, val in pairs(targetFormGrammaticalFeatures) do
						if featureId == targetFeatureId then
							formVal = formVal + val
						end
					end
				end
				local representations = form.representations
				local aRepresentationString = ''
				local bestLangVal = nil
				for _, representation in pairs(representations) do
					local langVal = 0
					for _, lang in pairs(languages) do
						if representation.language == lang then
							langVal = langVal + 1
						end
					end
					if bestLangVal == nil or langVal > bestLangVal then
						aRepresentationString = representation.value
						bestLangVal = langVal
					end
				end
				if bestVal == nil or formVal > bestVal then
					mw.log('Form “' .. aRepresentationString .. '” is better (' .. formVal .. ') than what we have so far.')
					bestRepresentationString = aRepresentationString
					bestVal = formVal
				end
			end
			return bestRepresentationString
		end
	end
end


return m