Difference between revisions of "Module:Language"
m (1 revision) |
m (1 revision: From PNW foraging book - part 3) |
||
(One intermediate revision by the same user not shown) | |||
Line 1: | Line 1: | ||
require('Module:No globals') | require('Module:No globals') | ||
local m_data = mw.loadData("Module:Language/data") | local m_data = mw.loadData("Module:Language/data") | ||
+ | local langData = m_data.languages or m_data | ||
local p = {} | local p = {} | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
local function checkForString(variable) | local function checkForString(variable) | ||
Line 16: | Line 10: | ||
local function makeLinkedName(languageCode) | local function makeLinkedName(languageCode) | ||
− | local data = | + | local data = langData[languageCode] |
local article = data["article"] | local article = data["article"] | ||
local name = data["Wikipedia_name"] or data["name"] | local name = data["Wikipedia_name"] or data["name"] | ||
Line 23: | Line 17: | ||
local function makeEntryName(word, languageCode) | local function makeEntryName(word, languageCode) | ||
− | local data = | + | local data = langData[languageCode] |
+ | local ugsub = mw.ustring.gsub | ||
word = tostring(word) | word = tostring(word) | ||
if word == nil then | if word == nil then | ||
Line 31: | Line 26: | ||
else | else | ||
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping. | -- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping. | ||
− | word = gsub( | + | word = word:gsub("\'\'\'", "") |
− | word = gsub( | + | word = word:gsub("\'\'", "") |
if data == nil then | if data == nil then | ||
return word | return word | ||
Line 40: | Line 35: | ||
return word | return word | ||
else | else | ||
− | for regex, replacement in pairs(replacements) do | + | -- Decompose so that the diacritics of characters such |
− | + | -- as á can be removed in one go. | |
+ | -- No need to compose at the end, because the MediaWiki software | ||
+ | -- will handle that. | ||
+ | if replacements.decompose then | ||
+ | word = mw.ustring.toNFD(word) | ||
+ | for i, from in ipairs(replacements.from) do | ||
+ | word = ugsub( | ||
+ | word, | ||
+ | from, | ||
+ | replacements.to and replacements.to[i] or "") | ||
+ | end | ||
+ | else | ||
+ | for regex, replacement in pairs(replacements) do | ||
+ | word = ugsub(word, regex, replacement) | ||
+ | end | ||
end | end | ||
return word | return word | ||
Line 47: | Line 56: | ||
end | end | ||
end | end | ||
+ | end | ||
+ | |||
+ | p.makeEntryName = makeEntryName | ||
+ | |||
+ | local function fixScriptCode(firstLetter, threeLetters) | ||
+ | return string.upper(firstLetter) .. string.lower(threeLetters) | ||
end | end | ||
Line 54: | Line 69: | ||
if codes == nil or codes == "" then | if codes == nil or codes == "" then | ||
errorText = 'no language or script code provided' | errorText = 'no language or script code provided' | ||
− | elseif find(codes, "^%s*%a%a%a?%s*$") or find( | + | |
+ | -- Private-use subtag: x followed by one or more sequences of 1-8 lowercase | ||
+ | -- letters separated by hyphens. This only allows for one sequence, as it is | ||
+ | -- needed for proto-languages such as ine-x-proto (Proto-Indo-European). | ||
+ | elseif codes:find("^%s*%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then | ||
+ | languageCode, scriptCode = | ||
+ | codes:match("^%s*(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$") | ||
+ | if not languageCode then | ||
+ | errorText = '<code>'..codes..'</code> is not a valid language or script code.' | ||
+ | elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then | ||
+ | errorText = '<code>'..scriptCode..'</code> is not a valid script code.' | ||
+ | else | ||
+ | scriptCode = scriptCode:gsub( | ||
+ | "(%a)(%a%a%a)", | ||
+ | fixScriptCode, | ||
+ | 1 | ||
+ | ) | ||
+ | end | ||
+ | |||
+ | elseif codes:find("^%s*%a%a%a?%s*$") or codes:find("^%s*%a%a%a?%-%a%a%a%a%s*$") then | ||
-- A three- or two-letter lowercase sequence at beginning of first parameter | -- A three- or two-letter lowercase sequence at beginning of first parameter | ||
languageCode = | languageCode = | ||
− | find( | + | codes:find("^%s*%a%a%a?") and ( |
− | match( | + | codes:match("^%s*(%l%l%l?)") |
− | or | + | or codes:match("^%s*(%a%a%a?)") |
− | + | :gsub("(%a%a%a?)", string.lower, 1) | |
− | "(%a%a%a?)", | ||
− | |||
− | |||
− | |||
− | |||
− | |||
) | ) | ||
-- One uppercase and three lowercase letters at the end of the first parameter | -- One uppercase and three lowercase letters at the end of the first parameter | ||
scriptCode = | scriptCode = | ||
− | find( | + | codes:find("%a%a%a%a%s*$") and ( |
− | match( | + | codes:match("(%u%l%l%l)%s*$") |
or gsub( | or gsub( | ||
− | match( | + | codes:match("(%a%a%a%a)%s*$"), |
"(%a)(%a%a%a)", | "(%a)(%a%a%a)", | ||
− | + | fixScriptCode, | |
− | |||
− | |||
1 | 1 | ||
) | ) | ||
) | ) | ||
− | elseif find( | + | elseif codes:find("^%s*%a%a%a%-%a%a%a$") then |
− | languageCode = match( | + | languageCode = codes:match("^%s*%l%l%l%-%l%l%l$") and codes:match("^%s*%l%l%l%-%l%l%l$") |
− | elseif find( | + | or codes:match("^%s*%a%a%a%-%a%a%a$"):gsub("(%a%a%a?)", string.lower, 1) |
− | languageCode, invalidCode = match( | + | elseif codes:find("^%s*%a%a%a?") then |
− | languageCode = lower(languageCode) | + | languageCode, invalidCode = codes:match("^%s*(%a%a%a?)%-?(.*)") |
+ | languageCode = string.lower(languageCode) | ||
errorText = '<code>'..invalidCode..'</code> is not a valid script code.' | errorText = '<code>'..invalidCode..'</code> is not a valid script code.' | ||
− | elseif find( | + | elseif codes:find("%-?%a%a%a%a%s*$") then |
− | invalidCode, scriptCode = match( | + | invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)%s*$") |
scriptCode = gsub( | scriptCode = gsub( | ||
scriptCode, | scriptCode, | ||
"(%a)(%a%a%a)", | "(%a)(%a%a%a)", | ||
− | + | fixScriptCode | |
− | |||
− | |||
) | ) | ||
errorText = '<code>'..invalidCode..'</code> is not a valid language code.' | errorText = '<code>'..invalidCode..'</code> is not a valid language code.' | ||
Line 100: | Line 125: | ||
errorText = '<code>'..codes..'</code> is not a valid language or script code.' | errorText = '<code>'..codes..'</code> is not a valid language or script code.' | ||
end | end | ||
− | if not scriptCode then | + | if not scriptCode or scriptCode == "" then |
− | scriptCode = require("Module: | + | scriptCode = require("Module:Unicode data").is_Latin(text) and "Latn" or "unknown" |
end | end | ||
if errorText then | if errorText then | ||
Line 108: | Line 133: | ||
errorText = "" | errorText = "" | ||
end | end | ||
+ | languageCode = m_data.redirects[languageCode] or languageCode | ||
return languageCode, scriptCode, errorText | return languageCode, scriptCode, errorText | ||
end | end | ||
local function tag(text, languageCode, script, italics) | local function tag(text, languageCode, script, italics) | ||
− | local data = | + | local data = langData[languageCode] |
+ | -- Use Wikipedia code if it has been given: for instance, | ||
+ | -- Proto-Indo-European has the Wiktionary code "ine-pro" but the Wikipedia | ||
+ | -- code "ine-x-proto". | ||
+ | languageCode = data and data.Wikipedia_code or languageCode | ||
local italicize = script == "Latn" and italics | local italicize = script == "Latn" and italics | ||
Line 133: | Line 163: | ||
return table.concat(out) | return table.concat(out) | ||
end | end | ||
+ | |||
+ | |||
function p.lang(frame) | function p.lang(frame) | ||
Line 150: | Line 182: | ||
local function linkToWiktionary(entry, linkText, languageCode) | local function linkToWiktionary(entry, linkText, languageCode) | ||
− | local data = | + | local data = langData[languageCode] |
local name | local name | ||
if languageCode then | if languageCode then | ||
Line 159: | Line 191: | ||
name = mw.language.fetchLanguageName(languageCode, 'en') | name = mw.language.fetchLanguageName(languageCode, 'en') | ||
else | else | ||
− | error("No name for the language " .. (languageCode or | + | error("No name for the language " .. ("%q"):format(languageCode or nil) .. " could be found") |
end | end | ||
− | if sub( | + | if entry:sub(1, 1) == "*" then |
+ | if name ~= "" then | ||
+ | entry = "Reconstruction:" .. name .. "/" .. entry:sub(2) | ||
+ | else | ||
+ | error("Language name is empty") | ||
+ | end | ||
+ | elseif data and data.type == "reconstructed" then | ||
+ | mw.log("Reconstructed language without asterisk:", languageCode, name, entry) | ||
+ | local frame = mw.getCurrentFrame() | ||
+ | -- Track reconstructed entries with no asterisk by transcluding | ||
+ | -- a nonexistent template. This technique is used in Wiktionary: | ||
+ | -- see [[wikt:Module:debug]]. | ||
+ | -- [[Special:WhatLinksHere/tracking/wikt-lang/reconstructed with no asterisk]] | ||
+ | pcall(frame.expandTemplate, frame, | ||
+ | { title = 'tracking/wikt-lang/reconstructed with no asterisk' }) | ||
+ | if name ~= "" then | ||
+ | entry = "Reconstruction:" .. name .. "/" .. entry | ||
+ | else | ||
+ | error("Language name is empty") | ||
+ | end | ||
+ | elseif data and data.type == "appendix" then | ||
if name ~= "" then | if name ~= "" then | ||
− | entry = " | + | entry = "Appendix:" .. name .. "/" .. entry |
else | else | ||
error("Language name is empty") | error("Language name is empty") | ||
Line 185: | Line 237: | ||
local word1 = args[2] or nil | local word1 = args[2] or nil | ||
local word2 = args[3] or nil | local word2 = args[3] or nil | ||
+ | |||
+ | if not args[2] then | ||
+ | error("Parameter 2 is required") | ||
+ | end | ||
local languageCode, scriptCode, errorText = getCodes(codes, word1) | local languageCode, scriptCode, errorText = getCodes(codes, word1) |
Latest revision as of 22:10, 13 August 2018
require('Module:No globals') local m_data = mw.loadData("Module:Language/data") local langData = m_data.languages or m_data
local p = {}
local function checkForString(variable) return variable ~= "" and variable ~= nil end
local function makeLinkedName(languageCode) local data = langData[languageCode] local article = data["article"] local name = data["Wikipedia_name"] or data["name"] return "" .. name .. ": " end
local function makeEntryName(word, languageCode) local data = langData[languageCode] local ugsub = mw.ustring.gsub word = tostring(word) if word == nil then error("The function makeEntryName requires a string argument") elseif word == "" then return "" else -- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping. word = word:gsub("\'\'\'", "") word = word:gsub("\'\'", "") if data == nil then return word else local replacements = data and data["replacements"] if replacements == nil then return word else -- Decompose so that the diacritics of characters such -- as á can be removed in one go. -- No need to compose at the end, because the MediaWiki software -- will handle that. if replacements.decompose then word = mw.ustring.toNFD(word) for i, from in ipairs(replacements.from) do word = ugsub( word, from, replacements.to and replacements.to[i] or "") end else for regex, replacement in pairs(replacements) do word = ugsub(word, regex, replacement) end end return word end end end end
p.makeEntryName = makeEntryName
local function fixScriptCode(firstLetter, threeLetters) return string.upper(firstLetter) .. string.lower(threeLetters) end
local function getCodes(codes, text) local languageCode, scriptCode, invalidCode local errorText if codes == nil or codes == "" then errorText = 'no language or script code provided'
-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase
-- letters separated by hyphens. This only allows for one sequence, as it is
-- needed for proto-languages such as ine-x-proto (Proto-Indo-European).
elseif codes:find("^%s*%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then
languageCode, scriptCode =
codes:match("^%s*(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$")
if not languageCode then
errorText = ''..codes..'
is not a valid language or script code.'
elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then
errorText = ''..scriptCode..'
is not a valid script code.'
else
scriptCode = scriptCode:gsub(
"(%a)(%a%a%a)",
fixScriptCode,
1
)
end
elseif codes:find("^%s*%a%a%a?%s*$") or codes:find("^%s*%a%a%a?%-%a%a%a%a%s*$") then
-- A three- or two-letter lowercase sequence at beginning of first parameter
languageCode =
codes:find("^%s*%a%a%a?") and (
codes:match("^%s*(%l%l%l?)")
or codes:match("^%s*(%a%a%a?)")
:gsub("(%a%a%a?)", string.lower, 1)
)
-- One uppercase and three lowercase letters at the end of the first parameter
scriptCode =
codes:find("%a%a%a%a%s*$") and (
codes:match("(%u%l%l%l)%s*$")
or gsub(
codes:match("(%a%a%a%a)%s*$"),
"(%a)(%a%a%a)",
fixScriptCode,
1
)
)
elseif codes:find("^%s*%a%a%a%-%a%a%a$") then
languageCode = codes:match("^%s*%l%l%l%-%l%l%l$") and codes:match("^%s*%l%l%l%-%l%l%l$")
or codes:match("^%s*%a%a%a%-%a%a%a$"):gsub("(%a%a%a?)", string.lower, 1)
elseif codes:find("^%s*%a%a%a?") then
languageCode, invalidCode = codes:match("^%s*(%a%a%a?)%-?(.*)")
languageCode = string.lower(languageCode)
errorText = ''..invalidCode..'
is not a valid script code.'
elseif codes:find("%-?%a%a%a%a%s*$") then
invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)%s*$")
scriptCode = gsub(
scriptCode,
"(%a)(%a%a%a)",
fixScriptCode
)
errorText = ''..invalidCode..'
is not a valid language code.'
else
errorText = ''..codes..'
is not a valid language or script code.'
end
if not scriptCode or scriptCode == "" then
scriptCode = require("Module:Unicode data").is_Latin(text) and "Latn" or "unknown"
end
if errorText then
errorText = ' [' .. errorText .. ']'
else
errorText = ""
end
languageCode = m_data.redirects[languageCode] or languageCode
return languageCode, scriptCode, errorText
end
local function tag(text, languageCode, script, italics) local data = langData[languageCode] -- Use Wikipedia code if it has been given: for instance, -- Proto-Indo-European has the Wiktionary code "ine-pro" but the Wikipedia -- code "ine-x-proto". languageCode = data and data.Wikipedia_code or languageCode
local italicize = script == "Latn" and italics
if not text then text = "[text?]" end
local textDirectionMarkers = { "", "", "" } if data and data["direction"] == "rtl" then textDirectionMarkers = { ' dir="rtl"', '', '' } end
local out = { textDirectionMarkers[2] } if italicize then table.insert(out, "" .. text .. "") else table.insert(out, "" .. text .. "") end table.insert(out, textDirectionMarkers[3])
return table.concat(out) end
function p.lang(frame) local parent = frame:getParent() local args = parent.args[1] and parent.args or frame.args
local codes = args[1] local text = args[2] or error("Provide text in the second parameter")
local languageCode, scriptCode, errorText = getCodes(codes, text)
local italics = args.italics or args.i italics = not (italics == "n" or italics == "-")
return tag(text, languageCode, scriptCode, italics) .. errorText end
local function linkToWiktionary(entry, linkText, languageCode) local data = langData[languageCode] local name if languageCode then if data and data.name then name = data.name elseif mw.language.fetchLanguageName(languageCode, 'en') ~= "" then -- On other languages' wikis, use mw.getContentLanguage():getCode(), or replace with that wiki's language code. name = mw.language.fetchLanguageName(languageCode, 'en') else error("No name for the language " .. ("%q"):format(languageCode or nil) .. " could be found") end if entry:sub(1, 1) == "*" then if name ~= "" then entry = "Reconstruction:" .. name .. "/" .. entry:sub(2) else error("Language name is empty") end elseif data and data.type == "reconstructed" then mw.log("Reconstructed language without asterisk:", languageCode, name, entry) local frame = mw.getCurrentFrame() -- Track reconstructed entries with no asterisk by transcluding -- a nonexistent template. This technique is used in Wiktionary: -- see wikt:Module:debug. -- Special:WhatLinksHere/tracking/wikt-lang/reconstructed with no asterisk pcall(frame.expandTemplate, frame, { title = 'tracking/wikt-lang/reconstructed with no asterisk' }) if name ~= "" then entry = "Reconstruction:" .. name .. "/" .. entry else error("Language name is empty") end elseif data and data.type == "appendix" then if name ~= "" then entry = "Appendix:" .. name .. "/" .. entry else error("Language name is empty") end end if entry and linkText then return "" .. linkText .. "" else error("linkToWiktionary needs a Wiktionary entry or link text, or both") end else return "" .. linkText .. "" end end
function p.wiktlang(frame) local parent = frame:getParent() local args = parent.args[1] and parent.args or frame.args
local codes = args[1] or nil local word1 = args[2] or nil local word2 = args[3] or nil
if not args[2] then error("Parameter 2 is required") end
local languageCode, scriptCode, errorText = getCodes(codes, word1)
local italics = args.italics or args.i italics = not (italics == "n" or italics == "-")
local entry, linkText if checkForString(word2) and checkForString(word1) then entry = makeEntryName(word1, languageCode) linkText = word2 elseif checkForString(word1) then entry = makeEntryName(word1, languageCode) linkText = word1 end
local out if languageCode and entry and linkText then out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italics) elseif entry and linkText then out = linkToWiktionary(entry, linkText) else out = '[text?]' end
if out and errorText then return out .. errorText else return errorText or error("The function wiktlang generated nothing") end end
function p.wikt(frame) local parent = frame:getParent() local args = parent.args[1] and parent.args or frame.args
local codes = args[1] or nil local word1 = args[2] or nil local word2 = args[3] or nil
if not word1 then error("Provide a word in parameter 2.") end
local languageCode, scriptCode, errorText = getCodes(codes, word1)
local entry, linkText if checkForString(word2) and checkForString(word1) then entry = makeEntryName(word1, languageCode) linkText = word2 elseif checkForString(word1) then entry = makeEntryName(word1, languageCode) linkText = word1 end
local out if languageCode and entry and linkText then out = linkToWiktionary(entry, linkText, languageCode) elseif entry and linkText then out = linkToWiktionary(entry, linkText) else out = '[text?]' end
if out and errorText then return out and out .. errorText else return errorText or error("The function wikt generated nothing") end end
return p