Difference between revisions of "Module:Language/data"
m (1 revision) |
m (1 revision: From PNW foraging book - part 3) |
||
(One intermediate revision by the same user not shown) | |||
Line 1: | Line 1: | ||
local U = mw.ustring.char | local U = mw.ustring.char | ||
− | -- | + | -- Diacritics, from the [[Combining Diacritical Marks]] block. |
− | local grave | + | local grave = U(0x300) |
− | local acute | + | local acute = U(0x301) |
+ | local circumflex = U(0x302) | ||
+ | local tilde = U(0x303) | ||
+ | local macron = U(0x304) | ||
+ | local breve = U(0x306) | ||
+ | local dot = U(0x307) | ||
+ | local diaeresis = U(0x308) | ||
local double_acute = U(0x30B) | local double_acute = U(0x30B) | ||
− | local | + | local double_grave = U(0x30F) |
− | local | + | local invbreve = U(0x311) |
− | local | + | local undertie = U(0x35C) |
− | |||
− | |||
− | |||
+ | --[[ | ||
+ | |||
+ | This is a table of Wiktionary language codes with data belonging to them. | ||
+ | Name is the "canonical name" used on Wiktionary. | ||
+ | Article is the Wikipedia article. | ||
+ | Script is the ISO 15924 code. | ||
+ | ]] | ||
local data = { | local data = { | ||
− | [" | + | ["languages"] = { |
− | [" | + | ["ab"] = { |
− | + | ["name"] = "Abkhaz", | |
− | + | }, | |
− | + | ["ang"] = { | |
− | [" | + | ["name"] = "Old English", |
− | [" | + | ["article"] = {"Old English"}, |
− | [" | + | -- ["scripts"] = {"Latn"}, |
− | [" | + | -- Remove macrons, acutes, and overdots |
− | + | ["replacements"] = { | |
− | + | decompose = true, | |
− | [" | + | from = { "[" .. macron .. acute .. dot .. "]" }, |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
}, | }, | ||
}, | }, | ||
− | + | ["ar"] = { | |
− | + | ["name"] = "Arabic", | |
− | + | ["article"] = "Arabic language", | |
− | + | -- ["scripts"] = { "Arab" }, | |
− | --[ | + | ["direction"] = "rtl", -- Should be in the script data module. |
− | + | ["replacements"] = { | |
− | + | -- ālif with wasla is replaced by ālif; | |
− | + | [U(0x0671)] = U(0x0627), | |
− | + | -- taṭwīl, fatḥatan, ḍammatan, kasratan, | |
− | + | -- fatḥa, ḍamma, kasra, | |
− | + | -- shadda, sukūn, and superscript (dagger) ālif are removed. | |
− | + | ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) | |
− | + | ..U(0x064E)..U(0x064F)..U(0x0650) | |
− | + | ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", | |
}, | }, | ||
}, | }, | ||
− | + | ["av"] = { | |
− | + | ["name"] = "Avar" | |
− | + | }, | |
− | + | ["be"] = { | |
− | + | ["article"] = "Belarusian language", | |
+ | -- ["scripts"] = { "Cyrl" }, | ||
+ | -- Combining acute accent is removed. | ||
+ | ["replacements"] = { [U(0x0301)] = "", }, | ||
+ | }, | ||
+ | ["bn"] = { | ||
+ | ["name"] = "Bengali", | ||
+ | ["article"] = "Bengali language", | ||
+ | -- ["scripts"] = { "Beng" }, | ||
}, | }, | ||
− | + | ["bua"] = { | |
− | + | ["name"] = "Buryat", | |
− | |||
− | |||
}, | }, | ||
− | + | ["cel-pro"] = { | |
− | + | ["name"] = "Proto-Celtic", | |
− | + | ["Wikipedia_code"] = "cel-x-proto", | |
− | |||
}, | }, | ||
− | + | ["cu"] = { | |
− | + | ["name"] = "Old Church Slavonic", | |
− | + | ["article"] = "Old Church Slavonic", | |
− | + | -- ["scripts"] = { "Cyrs" }, | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
}, | }, | ||
− | ]] | + | ["de"] = { |
+ | ["name"] = "German", | ||
+ | ["article"] = "German language", | ||
+ | -- ["scripts"] = { "Latn" }, | ||
+ | --[[ | ||
+ | ["replacements"] = { | ||
+ | ["ae"] = "ä", | ||
+ | ["oe"] = "ö", | ||
+ | ["ue"] = "ü", | ||
+ | ["A[Ee]"] = "Ä", | ||
+ | ["O[Ee]"] = "Ö", | ||
+ | ["U[Ee]"] = "Ü", | ||
+ | }, | ||
+ | ]] | ||
}, | }, | ||
− | + | ["en"] = { | |
− | + | ["name"] = "English", | |
− | + | ["article"] = "English language", | |
− | + | -- ["scripts"] = { "Latn" }, | |
}, | }, | ||
− | + | ["es"] = { | |
− | + | ["name"] = "Spanish", | |
− | + | ["article"] = "Spanish language", | |
− | + | -- ["scripts"] = { "Latn" }, | |
}, | }, | ||
− | + | ["egy"] = { | |
− | + | ["name"] = "Egyptian", | |
− | |||
− | |||
}, | }, | ||
− | + | ["fr"] = { | |
− | + | ["name"] = "French", | |
− | + | ["article"] = "French language", | |
− | + | -- ["scripts"] = { "Latn" }, | |
}, | }, | ||
− | + | ["frm"] = { | |
− | + | ["name"] = "Middle French", | |
− | + | ["article"] = "Middle French", | |
− | + | -- ["scripts"] = { "Latn" }, | |
− | |||
− | |||
}, | }, | ||
− | + | ["frp"] = { | |
− | ["name"] = " | + | ["name"] = "Franco-Provençal", |
− | ["article"] = " | + | }, |
− | + | ["ff"] = { | |
− | + | ["name"] = "Fula", | |
− | -- | + | }, |
− | ["[ | + | ["gem-pro"] = { |
− | ["[ | + | ["name"] = "Proto-Germanic", |
− | + | ["article"] = "Proto-Germanic language", | |
− | + | -- ["scripts"] = { "Latn" }, | |
− | + | ["type"] = "reconstructed", | |
− | + | ["replacements"] = {}, | |
− | + | ["Wikipedia_code"] = "gem-x-proto", | |
− | + | }, | |
− | + | ["gmw-ecg"] = { | |
− | + | ["name"] = "East Central German", | |
− | + | }, | |
− | + | ["got"] = { | |
− | + | ["name"] = "Gothic", | |
+ | ["article"] = "Gothic language", | ||
+ | -- ["scripts"] = { "Goth" }, | ||
+ | ["replacements"] = { | ||
+ | -- Latin to Gothic since people will not want to have to copy | ||
+ | -- and paste Gothic letters in | ||
+ | ["[AÁaáĀā]"] = "𐌰", | ||
+ | ["[Bb]"] = "𐌱", | ||
+ | ["[Gg]"] = "𐌲", | ||
+ | ["[Dd]"] = "𐌳", | ||
+ | ["[EeĒē]"] = "𐌴", | ||
+ | ["[Qq]"] = "𐌵", | ||
+ | ["[Zz]"] = "𐌶", | ||
+ | ["[Hh]"] = "𐌷", | ||
+ | ["[Þþ]"] = "𐌸", | ||
+ | ["[IiÍí]"] = "𐌹", | ||
+ | ["[Kk]"] = "𐌺", | ||
+ | ["[Ll]"] = "𐌻", | ||
+ | ["[Mm]"] = "𐌼", | ||
+ | ["[Nn]"] = "𐌽", | ||
+ | ["[Jj]"] = "𐌾", | ||
+ | ["[UuÚúŪū]"] = "𐌿", | ||
+ | ["[Pp]"] = "𐍀", | ||
+ | ["[Rr]"] = "𐍂", | ||
+ | ["[Ss]"] = "𐍃", | ||
+ | ["[Tt]"] = "𐍄", | ||
+ | ["[WwYy]"] = "𐍅", | ||
+ | ["[Ff]"] = "𐍆", | ||
+ | ["[Xx]"] = "𐍇", | ||
+ | ["[Ƕƕ]"] = "𐍈", -- Not sure if "hw" and "hv" can safely be converted | ||
+ | ["[OoŌō]"] = "𐍉", | ||
}, | }, | ||
}, | }, | ||
− | + | ["gsw"] = { | |
− | + | ["name"] = "Alemannic German", | |
− | + | }, | |
− | + | ["grc"] = { | |
− | [" | + | ["name"] = "Ancient Greek", |
− | + | ["article"] = "Ancient Greek", | |
− | [" | + | -- ["scripts"] = { "Grek" }, |
+ | ["replacements"] = { | ||
+ | decompose = true, | ||
+ | from = { | ||
+ | -- Replace variant letterforms with standard ones. | ||
+ | "ϐ", "ϵ", "ϑ", "ϰ", "ϱ", "ϲ", "ϕ", | ||
+ | -- Remove macrons and breves. | ||
+ | "[" .. macron .. breve .. undertie .. "]" | ||
+ | }, | ||
+ | to = { | ||
+ | "β", "ε", "θ", "κ", "ρ", "σ", "φ", | ||
+ | } | ||
+ | }, | ||
+ | }, | ||
+ | ["grk-pro"] = { | ||
+ | ["name"] = "Proto-Hellenic", | ||
+ | ["Wikipedia_name"] = "Proto-Greek", | ||
+ | ["article"] = "Proto-Greek language", | ||
+ | -- ["scripts"] = { "Latn" }, | ||
+ | ["type"] = "reconstructed", | ||
+ | ["replacements"] = {}, | ||
+ | }, | ||
+ | ["ha"] = { | ||
+ | ["name"] = "Hausa", | ||
+ | -- remove tilde, grave, acute, macron, circumflex | ||
+ | ["replacements"] = { | ||
+ | decompose = true, | ||
+ | from = { "[" .. grave .. circumflex .. macron .. acute .. tilde .. "]" }, | ||
+ | }, | ||
+ | }, | ||
+ | ["hi"] = { | ||
+ | ["name"] = "Hindi", | ||
+ | ["article"] = "Hindi", | ||
+ | -- ["scripts"] = { "Deva" }, | ||
+ | }, | ||
+ | ["ine-pro"] = { | ||
+ | ["name"] = "Proto-Indo-European", | ||
+ | ["article"] = "Proto-Indo-European language", | ||
+ | -- ["scripts"] = { "Latn" }, | ||
+ | ["type"] = "reconstructed", | ||
+ | ["replacements"] = {}, | ||
+ | ["Wikipedia_code"] = "ine-x-proto", | ||
+ | }, | ||
+ | ["ja"] = { | ||
+ | ["name"] = "Japanese", | ||
+ | ["article"] = "Japanese language", | ||
+ | -- ["scripts"] = { "Jpan" }, | ||
}, | }, | ||
− | + | ["jbo"] = { -- Lojban | |
− | + | ["type"] = "appendix", | |
− | |||
− | |||
}, | }, | ||
− | + | ["la"] = { | |
− | + | ["name"] = "Latin", | |
− | + | ["article"] = "Latin", | |
− | + | -- ["scripts"] = { "Latn" }, | |
− | + | ["replacements"] = { | |
− | + | -- Remove macrons, breves, and diaereses. | |
+ | decompose = true, | ||
+ | from = { "[" .. macron .. breve .. diaeresis .. "]" }, | ||
+ | }, | ||
}, | }, | ||
− | + | ["lt"] = { | |
− | + | ["name"] = "Lithuanian", | |
− | + | -- remove acute, tilde, grave | |
− | + | ["replacements"] = { | |
+ | decompose = true, | ||
+ | from = { "[" .. acute .. tilde .. grave .. "]" }, | ||
+ | }, | ||
}, | }, | ||
− | + | ["moe"] = { | |
− | + | ["name"] = "Cree", | |
− | + | }, | |
− | + | ["mul"] = { | |
− | [" | + | ["name"] = "Translingual", |
− | + | ["article"] = "", | |
− | [" | + | -- ["scripts"] = { "" }, |
− | [" | + | }, |
− | [" | + | ["nci"] = { |
− | + | ["name"] = "Classical Nahuatl", | |
− | [" | + | ["article"] = "Classical Nahuatl", |
− | [" | + | -- ["scripts"] = {"Latn"}, |
− | [" | + | -- Remove macrons, acutes, circumflexes and graves |
− | + | ["replacements"] = { | |
− | [" | + | decompose = true, |
− | + | -- Remove macrons, acutes, circumflexes, graves, and saltillo; | |
− | + | -- see [[Saltillo (linguistics)]]. | |
− | + | from = { "[" .. grave .. acute .. macron .. circumflex .. "Ꞌꞌʻʼ'ʔ]" }, | |
}, | }, | ||
}, | }, | ||
− | + | ["nds-de"] = { | |
− | + | ["name"] = "German Low German", | |
− | [" | + | }, |
− | + | ["oj"] = { | |
+ | ["name"] = "Ojibwe", | ||
}, | }, | ||
− | + | ["orv"] = { | |
− | + | ["name"] = "Old East Slavic", | |
− | + | ["article"] = "Old East Slavic", | |
− | + | -- ["scripts"] = { "Cyrs" }, | |
− | + | ["replacements"] = { | |
− | + | [U(0x484)] = "", | |
}, | }, | ||
}, | }, | ||
− | + | ["pt"] = { | |
− | + | ["name"] = "Portuguese", | |
− | + | ["article"] = "Portuguese language", | |
− | ["scripts"] = { " | + | -- ["scripts"] = { "Latn" }, |
+ | }, | ||
+ | ["pa"] = { | ||
+ | ["name"] = "Punjabi", | ||
+ | ["article"] = "Punjabi language", | ||
+ | -- ["scripts"] = { "Guru", "Arab", }, | ||
}, | }, | ||
− | + | ["ru"] = { | |
− | + | ["name"] = "Russian", | |
− | + | ["article"] = "Russian language", | |
− | + | -- ["scripts"] = { "Cyrl" }, | |
+ | -- Combining acute accent is removed. | ||
+ | ["replacements"] = { [U(0x0301)] = "", }, | ||
}, | }, | ||
− | + | ["rw"] = { | |
− | + | ["name"] = "Rwanda-Rundi", | |
− | |||
− | |||
− | |||
− | |||
}, | }, | ||
− | + | ["se"] = { | |
− | [" | + | ["replacements"] = { |
− | + | ["([đflmnŋrsšŧv])'%1"] = "%1%1", | |
− | + | }, | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
}, | }, | ||
− | + | ["sh"] = { | |
− | + | ["article"] = "Serbo-Croatian language", | |
− | + | -- ["scripts"] = { "Latn", "Cyrl" }, | |
− | + | ["replacements"] = { | |
− | + | decompose = true, | |
− | + | from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave | |
− | + | .. grave .. invbreve .. acute .. macron .. tilde .. "]" }, | |
− | + | to = { "%1" }, | |
− | + | }, | |
− | + | }, | |
− | + | ["sl"] = { | |
− | + | ["name"] = "Slovene", | |
− | + | ["replacements"] = { | |
− | + | -- remove tonal orthography | |
− | + | ["[ÁÀÂȂȀ]"] = "A", | |
− | + | ["[áàâȃȁ]"] = "a", | |
− | + | ["[ÉÈÊȆȄỆẸ]"] = "e", | |
− | + | ["[éèêȇȅệẹə]"] = "e", | |
− | + | ["[ÍÌÎȊȈ]"] = "I", | |
− | + | ["[íìîȋȉ]"] = "i", | |
− | + | ["[ÓÒÔȎȌỘỌ]"] = "O", | |
+ | ["[óòôȏȍộọ]"] = "o", | ||
+ | ["[ŔȒȐ]"] = "R", | ||
+ | ["[ŕȓȑ]"] = "r", | ||
+ | ["[ÚÙÛȖȔ]"] = "U", | ||
+ | ["[úùûȗȕ]"] = "u", | ||
+ | ["ł"] = "l", | ||
+ | }, | ||
+ | }, | ||
+ | ["sla-pro"] = { | ||
+ | ["name"] = "Proto-Slavic", -- also Common Slavic | ||
+ | ["type"] = "reconstructed", | ||
+ | -- ["scripts"] = { "Latn" }, | ||
+ | ["replacements"] = { | ||
+ | ["[ÀÁÃĀȀȂ]"] = "A", | ||
+ | ["[àáãāȁȃ]"] = "a", | ||
+ | ["[ÈÉẼĒȄȆ]"] = "E", | ||
+ | ["[èéẽēȅȇ]"] = "e", | ||
+ | ["[ÌÍĨĪȈȊ]"] = "I", | ||
+ | ["[ìíĩīȉȋ]"] = "i", | ||
+ | ["[ÒÓÕŌȌȎŐ]"] = "O", | ||
+ | ["[òóõōȍȏő]"] = "o", | ||
+ | ["[ÙÚŨŪȔȖŰ]"] = "U", | ||
+ | ["[ùúũūȕȗű]"] = "u", | ||
+ | ["[ỲÝỸȲ]"] = "Y", | ||
+ | ["[ỳýỹȳ]"] = "y", | ||
+ | ["Ǭ"] = "Ǫ", | ||
+ | ["ǭ"] = "ǫ", | ||
+ | ["[" .. grave .. acute .. double_acute .. tilde .. macron .. double_grave .. invbreve .. "]"] = "", | ||
+ | ["ĭ"] = "ь", | ||
+ | ["ŭ"] = "ъ", | ||
}, | }, | ||
}, | }, | ||
− | + | ["uk"] = { | |
− | + | ["article"] = "Ukrainian language", | |
− | + | -- ["scripts"] = { "Cyrl" }, | |
− | + | -- Combining acute accent is removed. | |
− | + | ["replacements"] = { [U(0x0301)] = "", } | |
}, | }, | ||
− | + | ["ur"] = { | |
− | + | ["name"] = "Urdu", | |
− | + | ["article"] = "Urdu", | |
− | + | -- ["scripts"] = { "Arab" }, | |
}, | }, | ||
− | + | ["zh"] = { | |
− | + | ["name"] = "Chinese", | |
− | + | ["article"] = "Chinese language", | |
− | + | -- ["scripts"] = { "Hani" }, | |
}, | }, | ||
− | + | ["xcl"] = { | |
− | + | ["name"] = "Old Armenian", | |
− | + | ["article"] = "Classical Armenian", | |
− | + | -- ["scripts"] = { "Armn" }, | |
− | + | ["replacements"] = { | |
− | + | ["[՞՜՛՟]"] = "", | |
− | + | ["և"] = "եւ", | |
}, | }, | ||
}, | }, | ||
− | + | ["xvn"] = { | |
− | + | ["name"] = "Vandalic", | |
+ | ["article"] = "Vandalic language", | ||
+ | -- ["scripts"] = { "Latn" }, | ||
+ | }, | ||
--[[ | --[[ | ||
− | + | [""] = { | |
− | + | ["name"] = "", | |
− | + | ["article"] = "", | |
− | + | -- ["scripts"] = { "" }, | |
− | |||
}, | }, | ||
− | + | ||
− | + | [""] = { | |
− | + | ["name"] = "", | |
− | + | ["article"] = "", | |
− | + | -- ["scripts"] = { "" }, | |
− | + | ["replacements"] = { | |
}, | }, | ||
}, | }, | ||
]] | ]] | ||
+ | }, | ||
+ | |||
+ | -- Here, keys (for example, "gem") are Wikipedia language codes used in | ||
+ | -- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary | ||
+ | -- code. | ||
+ | -- Subtags are not currently supported. | ||
+ | ["redirects"] = { | ||
+ | ["aae"] = "sq", | ||
+ | ["aiq"] = "fa", | ||
+ | ["aln"] = "sq", | ||
+ | ["als"] = "sq", | ||
+ | ["azb"] = "az", | ||
+ | ["azj"] = "az", | ||
+ | ["bgn"] = "bal", | ||
+ | ["bs"] = "sh", | ||
+ | ["bxr"] = "bua", | ||
+ | ["cel-x-proto"] = "cel-pro", | ||
+ | ["ciw"] = "oj", | ||
+ | ["cnr"] = "sh", | ||
+ | ["fil"] = "tl", | ||
+ | ["fuf"] = "ff", | ||
+ | ["gem"] = "gem-pro", -- Not correct, but is commonly used. | ||
+ | ["gem-x-proto"] = "gem-pro", | ||
+ | ["hak"] = "zh", | ||
+ | ["hbo"] = "he", | ||
+ | ["hr"] = "sh", | ||
+ | ["ine"] = "ine-pro", -- Not correct, but might be commonly used. | ||
+ | ["ine-x-proto"] = "ine-pro", | ||
+ | ["nan"] = "zh", | ||
+ | ["prs"] = "fa", | ||
+ | ["rn"] = "rw", | ||
+ | ["sli"] = "gmw-ecg", | ||
+ | ["sr"] = "sh", | ||
+ | ["src"] = "sc", | ||
+ | ["sro"] = "sc", | ||
+ | ["tw"] = "ak", | ||
+ | ["wae"] = "gsw", | ||
+ | ["wep"] = "nds-de", | ||
+ | ["yue"] = "zh", | ||
+ | ["xno"] = "fro", | ||
+ | }, | ||
+ | } | ||
return data | return data |
Latest revision as of 22:10, 13 August 2018
local U = mw.ustring.char
-- Diacritics, from the Combining Diacritical Marks block. local grave = U(0x300) local acute = U(0x301) local circumflex = U(0x302) local tilde = U(0x303) local macron = U(0x304) local breve = U(0x306) local dot = U(0x307) local diaeresis = U(0x308) local double_acute = U(0x30B) local double_grave = U(0x30F) local invbreve = U(0x311) local undertie = U(0x35C)
--[[
This is a table of Wiktionary language codes with data belonging to them. Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]] local data = { ["languages"] = { ["ab"] = { ["name"] = "Abkhaz", }, ["ang"] = { ["name"] = "Old English", ["article"] = {"Old English"}, -- ["scripts"] = {"Latn"}, -- Remove macrons, acutes, and overdots ["replacements"] = { decompose = true, from = { "[" .. macron .. acute .. dot .. "]" }, }, }, ["ar"] = { ["name"] = "Arabic", ["article"] = "Arabic language", -- ["scripts"] = { "Arab" }, ["direction"] = "rtl", -- Should be in the script data module. ["replacements"] = { -- ālif with wasla is replaced by ālif; [U(0x0671)] = U(0x0627), -- taṭwīl, fatḥatan, ḍammatan, kasratan, -- fatḥa, ḍamma, kasra, -- shadda, sukūn, and superscript (dagger) ālif are removed. ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) ..U(0x064E)..U(0x064F)..U(0x0650) ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", }, }, ["av"] = { ["name"] = "Avar" }, ["be"] = { ["article"] = "Belarusian language", -- ["scripts"] = { "Cyrl" }, -- Combining acute accent is removed. ["replacements"] = { [U(0x0301)] = "", }, }, ["bn"] = { ["name"] = "Bengali", ["article"] = "Bengali language", -- ["scripts"] = { "Beng" }, }, ["bua"] = { ["name"] = "Buryat", }, ["cel-pro"] = { ["name"] = "Proto-Celtic", ["Wikipedia_code"] = "cel-x-proto", }, ["cu"] = { ["name"] = "Old Church Slavonic", ["article"] = "Old Church Slavonic", -- ["scripts"] = { "Cyrs" }, }, ["de"] = { ["name"] = "German", ["article"] = "German language", -- ["scripts"] = { "Latn" }, --[[ ["replacements"] = { ["ae"] = "ä", ["oe"] = "ö", ["ue"] = "ü", ["A[Ee]"] = "Ä", ["O[Ee]"] = "Ö", ["U[Ee]"] = "Ü", }, ]] }, ["en"] = { ["name"] = "English", ["article"] = "English language", -- ["scripts"] = { "Latn" }, }, ["es"] = { ["name"] = "Spanish", ["article"] = "Spanish language", -- ["scripts"] = { "Latn" }, }, ["egy"] = { ["name"] = "Egyptian", }, ["fr"] = { ["name"] = "French", ["article"] = "French language", -- ["scripts"] = { "Latn" }, }, ["frm"] = { ["name"] = "Middle French", ["article"] = "Middle French", -- ["scripts"] = { "Latn" }, }, ["frp"] = { ["name"] = "Franco-Provençal", }, ["ff"] = { ["name"] = "Fula", }, ["gem-pro"] = { ["name"] = "Proto-Germanic", ["article"] = "Proto-Germanic language", -- ["scripts"] = { "Latn" }, ["type"] = "reconstructed", ["replacements"] = {}, ["Wikipedia_code"] = "gem-x-proto", }, ["gmw-ecg"] = { ["name"] = "East Central German", }, ["got"] = { ["name"] = "Gothic", ["article"] = "Gothic language", -- ["scripts"] = { "Goth" }, ["replacements"] = { -- Latin to Gothic since people will not want to have to copy -- and paste Gothic letters in ["[AÁaáĀā]"] = "𐌰", ["[Bb]"] = "𐌱", ["[Gg]"] = "𐌲", ["[Dd]"] = "𐌳", ["[EeĒē]"] = "𐌴", ["[Qq]"] = "𐌵", ["[Zz]"] = "𐌶", ["[Hh]"] = "𐌷", ["[Þþ]"] = "𐌸", ["[IiÍí]"] = "𐌹", ["[Kk]"] = "𐌺", ["[Ll]"] = "𐌻", ["[Mm]"] = "𐌼", ["[Nn]"] = "𐌽", ["[Jj]"] = "𐌾", ["[UuÚúŪū]"] = "𐌿", ["[Pp]"] = "𐍀", ["[Rr]"] = "𐍂", ["[Ss]"] = "𐍃", ["[Tt]"] = "𐍄", ["[WwYy]"] = "𐍅", ["[Ff]"] = "𐍆", ["[Xx]"] = "𐍇", ["[Ƕƕ]"] = "𐍈", -- Not sure if "hw" and "hv" can safely be converted ["[OoŌō]"] = "𐍉", }, }, ["gsw"] = { ["name"] = "Alemannic German", }, ["grc"] = { ["name"] = "Ancient Greek", ["article"] = "Ancient Greek", -- ["scripts"] = { "Grek" }, ["replacements"] = { decompose = true, from = { -- Replace variant letterforms with standard ones. "ϐ", "ϵ", "ϑ", "ϰ", "ϱ", "ϲ", "ϕ", -- Remove macrons and breves. "[" .. macron .. breve .. undertie .. "]" }, to = { "β", "ε", "θ", "κ", "ρ", "σ", "φ", } }, }, ["grk-pro"] = { ["name"] = "Proto-Hellenic", ["Wikipedia_name"] = "Proto-Greek", ["article"] = "Proto-Greek language", -- ["scripts"] = { "Latn" }, ["type"] = "reconstructed", ["replacements"] = {}, }, ["ha"] = { ["name"] = "Hausa", -- remove tilde, grave, acute, macron, circumflex ["replacements"] = { decompose = true, from = { "[" .. grave .. circumflex .. macron .. acute .. tilde .. "]" }, }, }, ["hi"] = { ["name"] = "Hindi", ["article"] = "Hindi", -- ["scripts"] = { "Deva" }, }, ["ine-pro"] = { ["name"] = "Proto-Indo-European", ["article"] = "Proto-Indo-European language", -- ["scripts"] = { "Latn" }, ["type"] = "reconstructed", ["replacements"] = {}, ["Wikipedia_code"] = "ine-x-proto", }, ["ja"] = { ["name"] = "Japanese", ["article"] = "Japanese language", -- ["scripts"] = { "Jpan" }, }, ["jbo"] = { -- Lojban ["type"] = "appendix", }, ["la"] = { ["name"] = "Latin", ["article"] = "Latin", -- ["scripts"] = { "Latn" }, ["replacements"] = { -- Remove macrons, breves, and diaereses. decompose = true, from = { "[" .. macron .. breve .. diaeresis .. "]" }, }, }, ["lt"] = { ["name"] = "Lithuanian", -- remove acute, tilde, grave ["replacements"] = { decompose = true, from = { "[" .. acute .. tilde .. grave .. "]" }, }, }, ["moe"] = { ["name"] = "Cree", }, ["mul"] = { ["name"] = "Translingual", ["article"] = "", -- ["scripts"] = { "" }, }, ["nci"] = { ["name"] = "Classical Nahuatl", ["article"] = "Classical Nahuatl", -- ["scripts"] = {"Latn"}, -- Remove macrons, acutes, circumflexes and graves ["replacements"] = { decompose = true, -- Remove macrons, acutes, circumflexes, graves, and saltillo; -- see Saltillo (linguistics). from = { "[" .. grave .. acute .. macron .. circumflex .. "Ꞌꞌʻʼ'ʔ]" }, }, }, ["nds-de"] = { ["name"] = "German Low German", }, ["oj"] = { ["name"] = "Ojibwe", }, ["orv"] = { ["name"] = "Old East Slavic", ["article"] = "Old East Slavic", -- ["scripts"] = { "Cyrs" }, ["replacements"] = { [U(0x484)] = "", }, }, ["pt"] = { ["name"] = "Portuguese", ["article"] = "Portuguese language", -- ["scripts"] = { "Latn" }, }, ["pa"] = { ["name"] = "Punjabi", ["article"] = "Punjabi language", -- ["scripts"] = { "Guru", "Arab", }, }, ["ru"] = { ["name"] = "Russian", ["article"] = "Russian language", -- ["scripts"] = { "Cyrl" }, -- Combining acute accent is removed. ["replacements"] = { [U(0x0301)] = "", }, }, ["rw"] = { ["name"] = "Rwanda-Rundi", }, ["se"] = { ["replacements"] = { ["([đflmnŋrsšŧv])'%1"] = "%1%1", }, }, ["sh"] = { ["article"] = "Serbo-Croatian language", -- ["scripts"] = { "Latn", "Cyrl" }, ["replacements"] = { decompose = true, from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave .. grave .. invbreve .. acute .. macron .. tilde .. "]" }, to = { "%1" }, }, }, ["sl"] = { ["name"] = "Slovene", ["replacements"] = { -- remove tonal orthography ["[ÁÀÂȂȀ]"] = "A", ["[áàâȃȁ]"] = "a", ["[ÉÈÊȆȄỆẸ]"] = "e", ["[éèêȇȅệẹə]"] = "e", ["[ÍÌÎȊȈ]"] = "I", ["[íìîȋȉ]"] = "i", ["[ÓÒÔȎȌỘỌ]"] = "O", ["[óòôȏȍộọ]"] = "o", ["[ŔȒȐ]"] = "R", ["[ŕȓȑ]"] = "r", ["[ÚÙÛȖȔ]"] = "U", ["[úùûȗȕ]"] = "u", ["ł"] = "l", }, }, ["sla-pro"] = { ["name"] = "Proto-Slavic", -- also Common Slavic ["type"] = "reconstructed", -- ["scripts"] = { "Latn" }, ["replacements"] = { ["[ÀÁÃĀȀȂ]"] = "A", ["[àáãāȁȃ]"] = "a", ["[ÈÉẼĒȄȆ]"] = "E", ["[èéẽēȅȇ]"] = "e", ["[ÌÍĨĪȈȊ]"] = "I", ["[ìíĩīȉȋ]"] = "i", ["[ÒÓÕŌȌȎŐ]"] = "O", ["[òóõōȍȏő]"] = "o", ["[ÙÚŨŪȔȖŰ]"] = "U", ["[ùúũūȕȗű]"] = "u", ["[ỲÝỸȲ]"] = "Y", ["[ỳýỹȳ]"] = "y", ["Ǭ"] = "Ǫ", ["ǭ"] = "ǫ", ["[" .. grave .. acute .. double_acute .. tilde .. macron .. double_grave .. invbreve .. "]"] = "", ["ĭ"] = "ь", ["ŭ"] = "ъ", }, }, ["uk"] = { ["article"] = "Ukrainian language", -- ["scripts"] = { "Cyrl" }, -- Combining acute accent is removed. ["replacements"] = { [U(0x0301)] = "", } }, ["ur"] = { ["name"] = "Urdu", ["article"] = "Urdu", -- ["scripts"] = { "Arab" }, }, ["zh"] = { ["name"] = "Chinese", ["article"] = "Chinese language", -- ["scripts"] = { "Hani" }, }, ["xcl"] = { ["name"] = "Old Armenian", ["article"] = "Classical Armenian", -- ["scripts"] = { "Armn" }, ["replacements"] = { ["[՞՜՛՟]"] = "", ["և"] = "եւ", }, }, ["xvn"] = { ["name"] = "Vandalic", ["article"] = "Vandalic language", -- ["scripts"] = { "Latn" }, }, --[[ [""] = { ["name"] = "", ["article"] = "", -- ["scripts"] = { "" }, },
[""] = { ["name"] = "", ["article"] = "", -- ["scripts"] = { "" }, ["replacements"] = { }, },
]] },
-- Here, keys (for example, "gem") are Wikipedia language codes used in -- [undefined] error: {{lang}}: no text (help), and values (for example, "gem-pro") are the equivalent Wiktionary -- code. -- Subtags are not currently supported. ["redirects"] = { ["aae"] = "sq", ["aiq"] = "fa", ["aln"] = "sq", ["als"] = "sq", ["azb"] = "az", ["azj"] = "az", ["bgn"] = "bal", ["bs"] = "sh", ["bxr"] = "bua", ["cel-x-proto"] = "cel-pro", ["ciw"] = "oj", ["cnr"] = "sh", ["fil"] = "tl", ["fuf"] = "ff", ["gem"] = "gem-pro", -- Not correct, but is commonly used. ["gem-x-proto"] = "gem-pro", ["hak"] = "zh", ["hbo"] = "he", ["hr"] = "sh", ["ine"] = "ine-pro", -- Not correct, but might be commonly used. ["ine-x-proto"] = "ine-pro", ["nan"] = "zh", ["prs"] = "fa", ["rn"] = "rw", ["sli"] = "gmw-ecg", ["sr"] = "sh", ["src"] = "sc", ["sro"] = "sc", ["tw"] = "ak", ["wae"] = "gsw", ["wep"] = "nds-de", ["yue"] = "zh", ["xno"] = "fro", }, }
return data