Difference between revisions of "Module:Language"

From Eat Every Plant
Jump to navigation Jump to search
m (1 revision)
 
m (1 revision: From PNW foraging book - part 3)
 
(One intermediate revision by the same user not shown)
Line 1: Line 1:
 
require('Module:No globals')
 
require('Module:No globals')
 
local m_data = mw.loadData("Module:Language/data")
 
local m_data = mw.loadData("Module:Language/data")
 +
local langData = m_data.languages or m_data
  
 
local p = {}
 
local p = {}
 
local sub = mw.ustring.sub
 
local gsub = mw.ustring.gsub
 
local find = mw.ustring.find
 
local match = mw.ustring.match
 
local lower = mw.ustring.lower
 
local upper = mw.ustring.upper
 
  
 
local function checkForString(variable)
 
local function checkForString(variable)
Line 16: Line 10:
  
 
local function makeLinkedName(languageCode)
 
local function makeLinkedName(languageCode)
local data = m_data[languageCode]
+
local data = langData[languageCode]
 
local article = data["article"]
 
local article = data["article"]
 
local name = data["Wikipedia_name"] or data["name"]
 
local name = data["Wikipedia_name"] or data["name"]
Line 23: Line 17:
  
 
local function makeEntryName(word, languageCode)
 
local function makeEntryName(word, languageCode)
local data = m_data[languageCode]
+
local data = langData[languageCode]
 +
local ugsub = mw.ustring.gsub
 
word = tostring(word)
 
word = tostring(word)
 
if word == nil then
 
if word == nil then
Line 31: Line 26:
 
else
 
else
 
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
 
-- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping.
word = gsub(word, "\'\'\'", "")
+
word = word:gsub("\'\'\'", "")
word = gsub(word, "\'\'", "")
+
word = word:gsub("\'\'", "")
 
if data == nil then
 
if data == nil then
 
return word
 
return word
Line 40: Line 35:
 
return word
 
return word
 
else
 
else
for regex, replacement in pairs(replacements) do
+
-- Decompose so that the diacritics of characters such
word = gsub(word, regex, replacement)
+
-- as á can be removed in one go.
 +
-- No need to compose at the end, because the MediaWiki software
 +
-- will handle that.
 +
if replacements.decompose then
 +
word = mw.ustring.toNFD(word)
 +
for i, from in ipairs(replacements.from) do
 +
word = ugsub(
 +
word,
 +
from,
 +
replacements.to and replacements.to[i] or "")
 +
end
 +
else
 +
for regex, replacement in pairs(replacements) do
 +
word = ugsub(word, regex, replacement)
 +
end
 
end
 
end
 
return word
 
return word
Line 47: Line 56:
 
end
 
end
 
end
 
end
 +
end
 +
 +
p.makeEntryName = makeEntryName
 +
 +
local function fixScriptCode(firstLetter, threeLetters)
 +
return string.upper(firstLetter) .. string.lower(threeLetters)
 
end
 
end
  
Line 54: Line 69:
 
if codes == nil or codes == "" then
 
if codes == nil or codes == "" then
 
errorText = 'no language or script code provided'
 
errorText = 'no language or script code provided'
elseif find(codes, "^%s*%a%a%a?%s*$") or find(codes, "^%s*%a%a%a?%-%a%a%a%a%s*$") then
+
 +
-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase
 +
-- letters separated by hyphens. This only allows for one sequence, as it is
 +
-- needed for proto-languages such as ine-x-proto (Proto-Indo-European).
 +
elseif codes:find("^%s*%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then
 +
languageCode, scriptCode =
 +
codes:match("^%s*(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$")
 +
if not languageCode then
 +
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
 +
elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then
 +
errorText = '<code>'..scriptCode..'</code> is not a valid script code.'
 +
else
 +
scriptCode = scriptCode:gsub(
 +
"(%a)(%a%a%a)",
 +
fixScriptCode,
 +
1
 +
)
 +
end
 +
 +
elseif codes:find("^%s*%a%a%a?%s*$") or codes:find("^%s*%a%a%a?%-%a%a%a%a%s*$") then
 
-- A three- or two-letter lowercase sequence at beginning of first parameter
 
-- A three- or two-letter lowercase sequence at beginning of first parameter
 
languageCode =
 
languageCode =
find(codes, "^%s*%a%a%a?") and (
+
codes:find("^%s*%a%a%a?") and (
match(codes, "^%s*(%l%l%l?)")
+
codes:match("^%s*(%l%l%l?)")
or gsub(
+
or codes:match("^%s*(%a%a%a?)")
match(codes, "^%s*(%a%a%a?)"),
+
:gsub("(%a%a%a?)", string.lower, 1)
"(%a%a%a?)",
 
function(a)
 
return lower(a)
 
end,
 
1
 
)
 
 
)
 
)
 
-- One uppercase and three lowercase letters at the end of the first parameter
 
-- One uppercase and three lowercase letters at the end of the first parameter
 
scriptCode =
 
scriptCode =
find(codes, "%a%a%a%a%s*$") and (
+
codes:find("%a%a%a%a%s*$") and (
match(codes, "(%u%l%l%l)%s*$")
+
codes:match("(%u%l%l%l)%s*$")
 
or gsub(
 
or gsub(
match(codes, "(%a%a%a%a)%s*$"),
+
codes:match("(%a%a%a%a)%s*$"),
 
"(%a)(%a%a%a)",
 
"(%a)(%a%a%a)",
function(a, b)
+
fixScriptCode,
return upper(a) .. lower(b)
 
end,
 
 
1
 
1
 
)
 
)
 
)
 
)
elseif find(codes, "^%s*%a%a%a%-%a%a%a$") then
+
elseif codes:find("^%s*%a%a%a%-%a%a%a$") then
languageCode = match(codes, "^%s*%l%l%l%-%l%l%l$") and match (codes, "^%s*%l%l%l%-%l%l%l$") or gsub(match(codes, "^%s*%a%a%a%-%a%a%a$"), "(%a%a%a?)", function(a) return lower(a) end, 1)
+
languageCode = codes:match("^%s*%l%l%l%-%l%l%l$") and codes:match("^%s*%l%l%l%-%l%l%l$")
elseif find(codes, "^%s*%a%a%a?") then
+
or codes:match("^%s*%a%a%a%-%a%a%a$"):gsub("(%a%a%a?)", string.lower, 1)
languageCode, invalidCode = match(codes, "^%s*(%a%a%a?)%-?(.*)")
+
elseif codes:find("^%s*%a%a%a?") then
languageCode = lower(languageCode)
+
languageCode, invalidCode = codes:match("^%s*(%a%a%a?)%-?(.*)")
 +
languageCode = string.lower(languageCode)
 
errorText = '<code>'..invalidCode..'</code> is not a valid script code.'
 
errorText = '<code>'..invalidCode..'</code> is not a valid script code.'
elseif find(codes, "%-?%a%a%a%a%s*$") then
+
elseif codes:find("%-?%a%a%a%a%s*$") then
invalidCode, scriptCode = match(codes, "(.*)%-?(%a%a%a%a)%s*$")
+
invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)%s*$")
 
scriptCode = gsub(
 
scriptCode = gsub(
 
scriptCode,
 
scriptCode,
 
"(%a)(%a%a%a)",
 
"(%a)(%a%a%a)",
function(a, b)
+
fixScriptCode
return upper(a) .. lower(b)
 
end
 
 
)
 
)
 
errorText = '<code>'..invalidCode..'</code> is not a valid language code.'
 
errorText = '<code>'..invalidCode..'</code> is not a valid language code.'
Line 100: Line 125:
 
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
 
errorText = '<code>'..codes..'</code> is not a valid language or script code.'
 
end
 
end
if not scriptCode then
+
if not scriptCode or scriptCode == "" then
scriptCode = require("Module:Language/scripts").isLatn(text) and "Latn" or "unknown"
+
scriptCode = require("Module:Unicode data").is_Latin(text) and "Latn" or "unknown"
 
end
 
end
 
if errorText then
 
if errorText then
Line 108: Line 133:
 
errorText = ""
 
errorText = ""
 
end
 
end
 +
languageCode = m_data.redirects[languageCode] or languageCode
 
return languageCode, scriptCode, errorText
 
return languageCode, scriptCode, errorText
 
end
 
end
  
 
local function tag(text, languageCode, script, italics)
 
local function tag(text, languageCode, script, italics)
local data = m_data[languageCode]
+
local data = langData[languageCode]
 +
-- Use Wikipedia code if it has been given: for instance,
 +
-- Proto-Indo-European has the Wiktionary code "ine-pro" but the Wikipedia
 +
-- code "ine-x-proto".
 +
languageCode = data and data.Wikipedia_code or languageCode
 
 
 
local italicize = script == "Latn" and italics
 
local italicize = script == "Latn" and italics
Line 133: Line 163:
 
return table.concat(out)
 
return table.concat(out)
 
end
 
end
 +
 +
  
 
function p.lang(frame)
 
function p.lang(frame)
Line 150: Line 182:
  
 
local function linkToWiktionary(entry, linkText, languageCode)
 
local function linkToWiktionary(entry, linkText, languageCode)
local data = m_data[languageCode]
+
local data = langData[languageCode]
 
local name
 
local name
 
if languageCode then
 
if languageCode then
Line 159: Line 191:
 
name = mw.language.fetchLanguageName(languageCode, 'en')
 
name = mw.language.fetchLanguageName(languageCode, 'en')
 
else
 
else
error("No name for the language " .. (languageCode or "nil") .. " could be found")
+
error("No name for the language " .. ("%q"):format(languageCode or nil) .. " could be found")
 
end
 
end
if sub(entry, 1, 1) == "*" then
+
if entry:sub(1, 1) == "*" then
 +
if name ~= "" then
 +
entry = "Reconstruction:" .. name .. "/" .. entry:sub(2)
 +
else
 +
error("Language name is empty")
 +
end
 +
elseif data and data.type == "reconstructed" then
 +
mw.log("Reconstructed language without asterisk:", languageCode, name, entry)
 +
local frame = mw.getCurrentFrame()
 +
-- Track reconstructed entries with no asterisk by transcluding
 +
-- a nonexistent template. This technique is used in Wiktionary:
 +
-- see [[wikt:Module:debug]].
 +
-- [[Special:WhatLinksHere/tracking/wikt-lang/reconstructed with no asterisk]]
 +
pcall(frame.expandTemplate, frame,
 +
{ title = 'tracking/wikt-lang/reconstructed with no asterisk' })
 +
if name ~= "" then
 +
entry = "Reconstruction:" .. name .. "/" .. entry
 +
else
 +
error("Language name is empty")
 +
end
 +
elseif data and data.type == "appendix" then
 
if name ~= "" then
 
if name ~= "" then
entry = "Reconstruction:" .. name .. "/" .. sub(entry, 2)
+
entry = "Appendix:" .. name .. "/" .. entry
 
else
 
else
 
error("Language name is empty")
 
error("Language name is empty")
Line 185: Line 237:
 
local word1 = args[2] or nil
 
local word1 = args[2] or nil
 
local word2 = args[3] or nil
 
local word2 = args[3] or nil
 +
 +
if not args[2] then
 +
error("Parameter 2 is required")
 +
end
 
 
 
local languageCode, scriptCode, errorText = getCodes(codes, word1)
 
local languageCode, scriptCode, errorText = getCodes(codes, word1)

Latest revision as of 23:10, 13 August 2018

require('Module:No globals') local m_data = mw.loadData("Module:Language/data") local langData = m_data.languages or m_data

local p = {}

local function checkForString(variable) return variable ~= "" and variable ~= nil end

local function makeLinkedName(languageCode) local data = langData[languageCode] local article = data["article"] local name = data["Wikipedia_name"] or data["name"] return "" .. name .. ": " end

local function makeEntryName(word, languageCode) local data = langData[languageCode] local ugsub = mw.ustring.gsub word = tostring(word) if word == nil then error("The function makeEntryName requires a string argument") elseif word == "" then return "" else -- Remove bold and italics, so that words that contain bolding or emphasis can be linked without piping. word = word:gsub("\'\'\'", "") word = word:gsub("\'\'", "") if data == nil then return word else local replacements = data and data["replacements"] if replacements == nil then return word else -- Decompose so that the diacritics of characters such -- as á can be removed in one go. -- No need to compose at the end, because the MediaWiki software -- will handle that. if replacements.decompose then word = mw.ustring.toNFD(word) for i, from in ipairs(replacements.from) do word = ugsub( word, from, replacements.to and replacements.to[i] or "") end else for regex, replacement in pairs(replacements) do word = ugsub(word, regex, replacement) end end return word end end end end

p.makeEntryName = makeEntryName

local function fixScriptCode(firstLetter, threeLetters) return string.upper(firstLetter) .. string.lower(threeLetters) end

local function getCodes(codes, text) local languageCode, scriptCode, invalidCode local errorText if codes == nil or codes == "" then errorText = 'no language or script code provided'

-- Private-use subtag: x followed by one or more sequences of 1-8 lowercase -- letters separated by hyphens. This only allows for one sequence, as it is -- needed for proto-languages such as ine-x-proto (Proto-Indo-European). elseif codes:find("^%s*%a%a%a?%-x%-%a%a?%a?%a?%a?%a?%a?%a?$") then languageCode, scriptCode = codes:match("^%s*(%a%a%a%-x%-%a%a?%a?%a?%a?%a?%a?%a?)%-?(.*)$") if not languageCode then errorText = ''..codes..' is not a valid language or script code.' elseif scriptCode ~= "" and not scriptCode:find("%a%a%a%a") then errorText = ''..scriptCode..' is not a valid script code.' else scriptCode = scriptCode:gsub( "(%a)(%a%a%a)", fixScriptCode, 1 ) end

elseif codes:find("^%s*%a%a%a?%s*$") or codes:find("^%s*%a%a%a?%-%a%a%a%a%s*$") then -- A three- or two-letter lowercase sequence at beginning of first parameter languageCode = codes:find("^%s*%a%a%a?") and ( codes:match("^%s*(%l%l%l?)") or codes:match("^%s*(%a%a%a?)") :gsub("(%a%a%a?)", string.lower, 1) ) -- One uppercase and three lowercase letters at the end of the first parameter scriptCode = codes:find("%a%a%a%a%s*$") and ( codes:match("(%u%l%l%l)%s*$") or gsub( codes:match("(%a%a%a%a)%s*$"), "(%a)(%a%a%a)", fixScriptCode, 1 ) ) elseif codes:find("^%s*%a%a%a%-%a%a%a$") then languageCode = codes:match("^%s*%l%l%l%-%l%l%l$") and codes:match("^%s*%l%l%l%-%l%l%l$") or codes:match("^%s*%a%a%a%-%a%a%a$"):gsub("(%a%a%a?)", string.lower, 1) elseif codes:find("^%s*%a%a%a?") then languageCode, invalidCode = codes:match("^%s*(%a%a%a?)%-?(.*)") languageCode = string.lower(languageCode) errorText = ''..invalidCode..' is not a valid script code.' elseif codes:find("%-?%a%a%a%a%s*$") then invalidCode, scriptCode = codes:match("(.*)%-?(%a%a%a%a)%s*$") scriptCode = gsub( scriptCode, "(%a)(%a%a%a)", fixScriptCode ) errorText = ''..invalidCode..' is not a valid language code.' else errorText = ''..codes..' is not a valid language or script code.' end if not scriptCode or scriptCode == "" then scriptCode = require("Module:Unicode data").is_Latin(text) and "Latn" or "unknown" end if errorText then errorText = ' [' .. errorText .. ']' else errorText = "" end languageCode = m_data.redirects[languageCode] or languageCode return languageCode, scriptCode, errorText end

local function tag(text, languageCode, script, italics) local data = langData[languageCode] -- Use Wikipedia code if it has been given: for instance, -- Proto-Indo-European has the Wiktionary code "ine-pro" but the Wikipedia -- code "ine-x-proto". languageCode = data and data.Wikipedia_code or languageCode

local italicize = script == "Latn" and italics

if not text then text = "[text?]" end

local textDirectionMarkers = { "", "", "" } if data and data["direction"] == "rtl" then textDirectionMarkers = { ' dir="rtl"', '‏', '‎' } end

local out = { textDirectionMarkers[2] } if italicize then table.insert(out, "" .. text .. "") else table.insert(out, "" .. text .. "") end table.insert(out, textDirectionMarkers[3])

return table.concat(out) end


function p.lang(frame) local parent = frame:getParent() local args = parent.args[1] and parent.args or frame.args

local codes = args[1] local text = args[2] or error("Provide text in the second parameter")

local languageCode, scriptCode, errorText = getCodes(codes, text)

local italics = args.italics or args.i italics = not (italics == "n" or italics == "-")

return tag(text, languageCode, scriptCode, italics) .. errorText end

local function linkToWiktionary(entry, linkText, languageCode) local data = langData[languageCode] local name if languageCode then if data and data.name then name = data.name elseif mw.language.fetchLanguageName(languageCode, 'en') ~= "" then -- On other languages' wikis, use mw.getContentLanguage():getCode(), or replace with that wiki's language code. name = mw.language.fetchLanguageName(languageCode, 'en') else error("No name for the language " .. ("%q"):format(languageCode or nil) .. " could be found") end if entry:sub(1, 1) == "*" then if name ~= "" then entry = "Reconstruction:" .. name .. "/" .. entry:sub(2) else error("Language name is empty") end elseif data and data.type == "reconstructed" then mw.log("Reconstructed language without asterisk:", languageCode, name, entry) local frame = mw.getCurrentFrame() -- Track reconstructed entries with no asterisk by transcluding -- a nonexistent template. This technique is used in Wiktionary: -- see wikt:Module:debug. -- Special:WhatLinksHere/tracking/wikt-lang/reconstructed with no asterisk pcall(frame.expandTemplate, frame, { title = 'tracking/wikt-lang/reconstructed with no asterisk' }) if name ~= "" then entry = "Reconstruction:" .. name .. "/" .. entry else error("Language name is empty") end elseif data and data.type == "appendix" then if name ~= "" then entry = "Appendix:" .. name .. "/" .. entry else error("Language name is empty") end end if entry and linkText then return "" .. linkText .. "" else error("linkToWiktionary needs a Wiktionary entry or link text, or both") end else return "" .. linkText .. "" end end

function p.wiktlang(frame) local parent = frame:getParent() local args = parent.args[1] and parent.args or frame.args

local codes = args[1] or nil local word1 = args[2] or nil local word2 = args[3] or nil

if not args[2] then error("Parameter 2 is required") end

local languageCode, scriptCode, errorText = getCodes(codes, word1)

local italics = args.italics or args.i italics = not (italics == "n" or italics == "-")

local entry, linkText if checkForString(word2) and checkForString(word1) then entry = makeEntryName(word1, languageCode) linkText = word2 elseif checkForString(word1) then entry = makeEntryName(word1, languageCode) linkText = word1 end

local out if languageCode and entry and linkText then out = tag(linkToWiktionary(entry, linkText, languageCode), languageCode, scriptCode, italics) elseif entry and linkText then out = linkToWiktionary(entry, linkText) else out = '[text?]' end

if out and errorText then return out .. errorText else return errorText or error("The function wiktlang generated nothing") end end

function p.wikt(frame) local parent = frame:getParent() local args = parent.args[1] and parent.args or frame.args

local codes = args[1] or nil local word1 = args[2] or nil local word2 = args[3] or nil

if not word1 then error("Provide a word in parameter 2.") end

local languageCode, scriptCode, errorText = getCodes(codes, word1)

local entry, linkText if checkForString(word2) and checkForString(word1) then entry = makeEntryName(word1, languageCode) linkText = word2 elseif checkForString(word1) then entry = makeEntryName(word1, languageCode) linkText = word1 end

local out if languageCode and entry and linkText then out = linkToWiktionary(entry, linkText, languageCode) elseif entry and linkText then out = linkToWiktionary(entry, linkText) else out = '[text?]' end

if out and errorText then return out and out .. errorText else return errorText or error("The function wikt generated nothing") end end

return p