Difference between revisions of "Module:Language/data"

Latest revision as of 22:10, 13 August 2018

local U = mw.ustring.char

-- Diacritics, from the Combining Diacritical Marks block. local grave = U(0x300) local acute = U(0x301) local circumflex = U(0x302) local tilde = U(0x303) local macron = U(0x304) local breve = U(0x306) local dot = U(0x307) local diaeresis = U(0x308) local double_acute = U(0x30B) local double_grave = U(0x30F) local invbreve = U(0x311) local undertie = U(0x35C)

--[[

This is a table of Wiktionary language codes with data belonging to them. Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]] local data = { ["languages"] = { ["ab"] = { ["name"] = "Abkhaz", }, ["ang"] = { ["name"] = "Old English", ["article"] = {"Old English"}, -- ["scripts"] = {"Latn"}, -- Remove macrons, acutes, and overdots ["replacements"] = { decompose = true, from = { "[" .. macron .. acute .. dot .. "]" }, }, }, ["ar"] = { ["name"] = "Arabic", ["article"] = "Arabic language", -- ["scripts"] = { "Arab" }, ["direction"] = "rtl", -- Should be in the script data module. ["replacements"] = { -- ālif with wasla is replaced by ālif; [U(0x0671)] = U(0x0627), -- taṭwīl, fatḥatan, ḍammatan, kasratan, -- fatḥa, ḍamma, kasra, -- shadda, sukūn, and superscript (dagger) ālif are removed. ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) ..U(0x064E)..U(0x064F)..U(0x0650) ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", }, }, ["av"] = { ["name"] = "Avar" }, ["be"] = { ["article"] = "Belarusian language", -- ["scripts"] = { "Cyrl" }, -- Combining acute accent is removed. ["replacements"] = { [U(0x0301)] = "", }, }, ["bn"] = { ["name"] = "Bengali", ["article"] = "Bengali language", -- ["scripts"] = { "Beng" }, }, ["bua"] = { ["name"] = "Buryat", }, ["cel-pro"] = { ["name"] = "Proto-Celtic", ["Wikipedia_code"] = "cel-x-proto", }, ["cu"] = { ["name"] = "Old Church Slavonic", ["article"] = "Old Church Slavonic", -- ["scripts"] = { "Cyrs" }, }, ["de"] = { ["name"] = "German", ["article"] = "German language", -- ["scripts"] = { "Latn" }, --[[ ["replacements"] = { ["ae"] = "ä", ["oe"] = "ö", ["ue"] = "ü", ["A[Ee]"] = "Ä", ["O[Ee]"] = "Ö", ["U[Ee]"] = "Ü", }, ]] }, ["en"] = { ["name"] = "English", ["article"] = "English language", -- ["scripts"] = { "Latn" }, }, ["es"] = { ["name"] = "Spanish", ["article"] = "Spanish language", -- ["scripts"] = { "Latn" }, }, ["egy"] = { ["name"] = "Egyptian", }, ["fr"] = { ["name"] = "French", ["article"] = "French language", -- ["scripts"] = { "Latn" }, }, ["frm"] = { ["name"] = "Middle French", ["article"] = "Middle French", -- ["scripts"] = { "Latn" }, }, ["frp"] = { ["name"] = "Franco-Provençal", }, ["ff"] = { ["name"] = "Fula", }, ["gem-pro"] = { ["name"] = "Proto-Germanic", ["article"] = "Proto-Germanic language", -- ["scripts"] = { "Latn" }, ["type"] = "reconstructed", ["replacements"] = {}, ["Wikipedia_code"] = "gem-x-proto", }, ["gmw-ecg"] = { ["name"] = "East Central German", }, ["got"] = { ["name"] = "Gothic", ["article"] = "Gothic language", -- ["scripts"] = { "Goth" }, ["replacements"] = { -- Latin to Gothic since people will not want to have to copy -- and paste Gothic letters in ["[AÁaáĀā]"] = "𐌰", ["[Bb]"] = "𐌱", ["[Gg]"] = "𐌲", ["[Dd]"] = "𐌳", ["[EeĒē]"] = "𐌴", ["[Qq]"] = "𐌵", ["[Zz]"] = "𐌶", ["[Hh]"] = "𐌷", ["[Þþ]"] = "𐌸", ["[IiÍí]"] = "𐌹", ["[Kk]"] = "𐌺", ["[Ll]"] = "𐌻", ["[Mm]"] = "𐌼", ["[Nn]"] = "𐌽", ["[Jj]"] = "𐌾", ["[UuÚúŪū]"] = "𐌿", ["[Pp]"] = "𐍀", ["[Rr]"] = "𐍂", ["[Ss]"] = "𐍃", ["[Tt]"] = "𐍄", ["[WwYy]"] = "𐍅", ["[Ff]"] = "𐍆", ["[Xx]"] = "𐍇", ["[Ƕƕ]"] = "𐍈", -- Not sure if "hw" and "hv" can safely be converted ["[OoŌō]"] = "𐍉", }, }, ["gsw"] = { ["name"] = "Alemannic German", }, ["grc"] = { ["name"] = "Ancient Greek", ["article"] = "Ancient Greek", -- ["scripts"] = { "Grek" }, ["replacements"] = { decompose = true, from = { -- Replace variant letterforms with standard ones. "ϐ", "ϵ", "ϑ", "ϰ", "ϱ", "ϲ", "ϕ", -- Remove macrons and breves. "[" .. macron .. breve .. undertie .. "]" }, to = { "β", "ε", "θ", "κ", "ρ", "σ", "φ", } }, }, ["grk-pro"] = { ["name"] = "Proto-Hellenic", ["Wikipedia_name"] = "Proto-Greek", ["article"] = "Proto-Greek language", -- ["scripts"] = { "Latn" }, ["type"] = "reconstructed", ["replacements"] = {}, }, ["ha"] = { ["name"] = "Hausa", -- remove tilde, grave, acute, macron, circumflex ["replacements"] = { decompose = true, from = { "[" .. grave .. circumflex .. macron .. acute .. tilde .. "]" }, }, }, ["hi"] = { ["name"] = "Hindi", ["article"] = "Hindi", -- ["scripts"] = { "Deva" }, }, ["ine-pro"] = { ["name"] = "Proto-Indo-European", ["article"] = "Proto-Indo-European language", -- ["scripts"] = { "Latn" }, ["type"] = "reconstructed", ["replacements"] = {}, ["Wikipedia_code"] = "ine-x-proto", }, ["ja"] = { ["name"] = "Japanese", ["article"] = "Japanese language", -- ["scripts"] = { "Jpan" }, }, ["jbo"] = { -- Lojban ["type"] = "appendix", }, ["la"] = { ["name"] = "Latin", ["article"] = "Latin", -- ["scripts"] = { "Latn" }, ["replacements"] = { -- Remove macrons, breves, and diaereses. decompose = true, from = { "[" .. macron .. breve .. diaeresis .. "]" }, }, }, ["lt"] = { ["name"] = "Lithuanian", -- remove acute, tilde, grave ["replacements"] = { decompose = true, from = { "[" .. acute .. tilde .. grave .. "]" }, }, }, ["moe"] = { ["name"] = "Cree", }, ["mul"] = { ["name"] = "Translingual", ["article"] = "", -- ["scripts"] = { "" }, }, ["nci"] = { ["name"] = "Classical Nahuatl", ["article"] = "Classical Nahuatl", -- ["scripts"] = {"Latn"}, -- Remove macrons, acutes, circumflexes and graves ["replacements"] = { decompose = true, -- Remove macrons, acutes, circumflexes, graves, and saltillo; -- see Saltillo (linguistics). from = { "[" .. grave .. acute .. macron .. circumflex .. "Ꞌꞌʻʼ'ʔ]" }, }, }, ["nds-de"] = { ["name"] = "German Low German", }, ["oj"] = { ["name"] = "Ojibwe", }, ["orv"] = { ["name"] = "Old East Slavic", ["article"] = "Old East Slavic", -- ["scripts"] = { "Cyrs" }, ["replacements"] = { [U(0x484)] = "", }, }, ["pt"] = { ["name"] = "Portuguese", ["article"] = "Portuguese language", -- ["scripts"] = { "Latn" }, }, ["pa"] = { ["name"] = "Punjabi", ["article"] = "Punjabi language", -- ["scripts"] = { "Guru", "Arab", }, }, ["ru"] = { ["name"] = "Russian", ["article"] = "Russian language", -- ["scripts"] = { "Cyrl" }, -- Combining acute accent is removed. ["replacements"] = { [U(0x0301)] = "", }, }, ["rw"] = { ["name"] = "Rwanda-Rundi", }, ["se"] = { ["replacements"] = { ["([đflmnŋrsšŧv])'%1"] = "%1%1", }, }, ["sh"] = { ["article"] = "Serbo-Croatian language", -- ["scripts"] = { "Latn", "Cyrl" }, ["replacements"] = { decompose = true, from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave .. grave .. invbreve .. acute .. macron .. tilde .. "]" }, to = { "%1" }, }, }, ["sl"] = { ["name"] = "Slovene", ["replacements"] = { -- remove tonal orthography ["[ÁÀÂȂȀ]"] = "A", ["[áàâȃȁ]"] = "a", ["[ÉÈÊȆȄỆẸ]"] = "e", ["[éèêȇȅệẹə]"] = "e", ["[ÍÌÎȊȈ]"] = "I", ["[íìîȋȉ]"] = "i", ["[ÓÒÔȎȌỘỌ]"] = "O", ["[óòôȏȍộọ]"] = "o", ["[ŔȒȐ]"] = "R", ["[ŕȓȑ]"] = "r", ["[ÚÙÛȖȔ]"] = "U", ["[úùûȗȕ]"] = "u", ["ł"] = "l", }, }, ["sla-pro"] = { ["name"] = "Proto-Slavic", -- also Common Slavic ["type"] = "reconstructed", -- ["scripts"] = { "Latn" }, ["replacements"] = { ["[ÀÁÃĀȀȂ]"] = "A", ["[àáãāȁȃ]"] = "a", ["[ÈÉẼĒȄȆ]"] = "E", ["[èéẽēȅȇ]"] = "e", ["[ÌÍĨĪȈȊ]"] = "I", ["[ìíĩīȉȋ]"] = "i", ["[ÒÓÕŌȌȎŐ]"] = "O", ["[òóõōȍȏő]"] = "o", ["[ÙÚŨŪȔȖŰ]"] = "U", ["[ùúũūȕȗű]"] = "u", ["[ỲÝỸȲ]"] = "Y", ["[ỳýỹȳ]"] = "y", ["Ǭ"] = "Ǫ", ["ǭ"] = "ǫ", ["[" .. grave .. acute .. double_acute .. tilde .. macron .. double_grave .. invbreve .. "]"] = "", ["ĭ"] = "ь", ["ŭ"] = "ъ", }, }, ["uk"] = { ["article"] = "Ukrainian language", -- ["scripts"] = { "Cyrl" }, -- Combining acute accent is removed. ["replacements"] = { [U(0x0301)] = "", } }, ["ur"] = { ["name"] = "Urdu", ["article"] = "Urdu", -- ["scripts"] = { "Arab" }, }, ["zh"] = { ["name"] = "Chinese", ["article"] = "Chinese language", -- ["scripts"] = { "Hani" }, }, ["xcl"] = { ["name"] = "Old Armenian", ["article"] = "Classical Armenian", -- ["scripts"] = { "Armn" }, ["replacements"] = { ["[՞՜՛՟]"] = "", ["և"] = "եւ", }, }, ["xvn"] = { ["name"] = "Vandalic", ["article"] = "Vandalic language", -- ["scripts"] = { "Latn" }, }, --[[ [""] = { ["name"] = "", ["article"] = "", -- ["scripts"] = { "" }, },

[""] = { ["name"] = "", ["article"] = "", -- ["scripts"] = { "" }, ["replacements"] = { }, },

]] },

-- Here, keys (for example, "gem") are Wikipedia language codes used in -- [undefined] error: {{lang}}: no text (help), and values (for example, "gem-pro") are the equivalent Wiktionary -- code. -- Subtags are not currently supported. ["redirects"] = { ["aae"] = "sq", ["aiq"] = "fa", ["aln"] = "sq", ["als"] = "sq", ["azb"] = "az", ["azj"] = "az", ["bgn"] = "bal", ["bs"] = "sh", ["bxr"] = "bua", ["cel-x-proto"] = "cel-pro", ["ciw"] = "oj", ["cnr"] = "sh", ["fil"] = "tl", ["fuf"] = "ff", ["gem"] = "gem-pro", -- Not correct, but is commonly used. ["gem-x-proto"] = "gem-pro", ["hak"] = "zh", ["hbo"] = "he", ["hr"] = "sh", ["ine"] = "ine-pro", -- Not correct, but might be commonly used. ["ine-x-proto"] = "ine-pro", ["nan"] = "zh", ["prs"] = "fa", ["rn"] = "rw", ["sli"] = "gmw-ecg", ["sr"] = "sh", ["src"] = "sc", ["sro"] = "sc", ["tw"] = "ak", ["wae"] = "gsw", ["wep"] = "nds-de", ["yue"] = "zh", ["xno"] = "fro", }, }

return data

Difference between revisions of "Module:Language/data"

Latest revision as of 22:10, 13 August 2018

Navigation menu

Search

@@ Line 1: / Line 1: @@
 local U = mw.ustring.char
--- diacritics
+-- Diacritics, from the [[Combining Diacritical Marks]] block.
-local grave     = U(0x300)
+local grave        = U(0x300)
-local acute     = U(0x301)
+local acute        = U(0x301)
+local circumflex   = U(0x302)
+local tilde        = U(0x303)
+local macron       = U(0x304)
+local breve        = U(0x306)
+local dot          = U(0x307)
+local diaeresis    = U(0x308)
 local double_acute = U(0x30B)
-local tilde     = U(0x303)
+local double_grave = U(0x30F)
-local macron    = U(0x304)
+local invbreve     = U(0x311)
-local dgrave    = U(0x30F)
+local undertie     = U(0x35C)
-local invbreve  = U(0x311)
---[[ Name is the "canonical name" used on Wiktionary. Article is the Wikipedia article. Script is the ISO 15924 code. ]]
+--[[
+	This is a table of Wiktionary language codes with data belonging to them.
+	Name is the "canonical name" used on Wiktionary.
+	Article is the Wikipedia article.
+	Script is the ISO 15924 code.
+]]
 local data = {
-	["ang"] = {
+	["languages"] = {
-		["name"] = "Old English",
+		["ab"] = {
-		["article"] = {"Old English"},
+			["name"] = "Abkhaz",
-		["scripts"] = {"Latn"},
+		},
-		-- Remove macrons, acutes, and overdots
+		["ang"] = {
-		["replacements"] = {
+			["name"] = "Old English",
-			["[ĀÁ]"] = "A",
+			["article"] = {"Old English"},
-			["[āá]"] = "a",
+			-- ["scripts"] = {"Latn"},
-			["[ǢǼ]"] = "Æ",
+			-- Remove macrons, acutes, and overdots
-			["[ǣǽ]"] = "æ",
+			["replacements"] = {
-			["Ċ"]    = "C",
+				decompose = true,
-			["ċ"]    = "c",
+				from = { "[" .. macron .. acute .. dot .. "]" },
-			["[ĒÉ]"] = "E",
-			["[ēé]"] = "e",
-			["Ġ"]    = "G",
-			["ġ"]    = "g",
-			["[ĪÍ]"] = "I",
-			["[īí]"] = "i",
-			["[ŌÓ]"] = "O",
-			["[ōó]"] = "o",
-			["[ŪÚ]"] = "U",
-			["[ūú]"] = "u",
-			["[ȲÝ]"] = "Y",
-			["[ȳý]"] = "y",
 			},
 		},
-	["ar"] = {
+		["ar"] = {
-		["name"] = "Arabic",
+			["name"] = "Arabic",
-		["article"] = "Arabic language",
+			["article"] = "Arabic language",
-		["scripts"] = { "Arab" },
+			-- ["scripts"] = { "Arab" },
-			--[[ ālif with wasla is replaced by ālif;
+			["direction"] = "rtl", -- Should be in the script data module.
-			taṭwīl, fatḥatan, ḍammatan, kasratan,
+			["replacements"] = {
-			fatḥa, ḍamma, kasra,
+				-- ālif with wasla is replaced by ālif;
-			shadda, sukūn, and superscript (dagger) ālif are removed. ]]
+				[U(0x0671)] = U(0x0627),
-		["direction"] = "rtl", -- Should be in the script data module.
+				-- taṭwīl, fatḥatan, ḍammatan, kasratan,
-		["replacements"] = {
+				-- fatḥa, ḍamma, kasra,
-			[U(0x0671)] = U(0x0627),
+				-- shadda, sukūn, and superscript (dagger) ālif are removed.
-			["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
+				["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D)
-				..U(0x064E)..U(0x064F)..U(0x0650)
+					..U(0x064E)..U(0x064F)..U(0x0650)
-				..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
+					..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "",
 			},
 		},
-	["be"] = {
+		["av"] = {
-		["article"] = "Belarusian language",
+			["name"] = "Avar"
-		["scripts"] = { "Cyrl" },
+		},
-		-- Combining acute accent is removed.
+		["be"] = {
-		["replacements"] = { [U(0x0301)] = "", }
+			["article"] = "Belarusian language",
+			-- ["scripts"] = { "Cyrl" },
+			-- Combining acute accent is removed.
+			["replacements"] = { [U(0x0301)] = "", },
+		},
+		["bn"] = {
+			["name"] = "Bengali",
+			["article"] = "Bengali language",
+			-- ["scripts"] = { "Beng" },
 		},
-	["bn"] = {
+		["bua"] = {
-		["name"] = "Bengali",
+			["name"] = "Buryat",
-		["article"] = "Bengali language",
-		["scripts"] = { "Beng" },
 		},
-	["cu"] = {
+		["cel-pro"] = {
-		["name"] = "Old Church Slavonic",
+			["name"] = "Proto-Celtic",
-		["article"] = "Old Church Slavonic",
+			["Wikipedia_code"] = "cel-x-proto",
-		["scripts"] = { "Cyrs" },
 		},
-	["de"] = {
+		["cu"] = {
-		["name"] = "German",
+			["name"] = "Old Church Slavonic",
-		["article"] = "German language",
+			["article"] = "Old Church Slavonic",
-		["scripts"] = { "Latn" },
+			-- ["scripts"] = { "Cyrs" },
-		--[[
-		["replacements"] = {
-			["ae"]    = "ä",
-			["oe"]    = "ö",
-			["ue"]    = "ü",
-			["A[Ee]"] = "Ä",
-			["O[Ee]"] = "Ö",
-			["U[Ee]"] = "Ü",
 		},
-		]]
+		["de"] = {
+			["name"] = "German",
+			["article"] = "German language",
+			-- ["scripts"] = { "Latn" },
+			--[[
+			["replacements"] = {
+				["ae"]    = "ä",
+				["oe"]    = "ö",
+				["ue"]    = "ü",
+				["A[Ee]"] = "Ä",
+				["O[Ee]"] = "Ö",
+				["U[Ee]"] = "Ü",
+			},
+			]]
 		},
-	["en"] = {
+		["en"] = {
-		["name"] = "English",
+			["name"] = "English",
-		["article"] = "English language",
+			["article"] = "English language",
-		["scripts"] = { "Latn" },
+			-- ["scripts"] = { "Latn" },
 		},
-	["es"] = {
+		["es"] = {
-		["name"] = "Spanish",
+			["name"] = "Spanish",
-		["article"] = "Spanish language",
+			["article"] = "Spanish language",
-		["scripts"] = { "Latn" },
+			-- ["scripts"] = { "Latn" },
 		},
-	["fr"] = {
+		["egy"] = {
-		["name"] = "French",
+			["name"] = "Egyptian",
-		["article"] = "French language",
-		["scripts"] = { "Latn" },
 		},
-	["frm"] = {
+		["fr"] = {
-		["name"] = "Middle French",
+			["name"] = "French",
-		["article"] = "Middle French",
+			["article"] = "French language",
-		["scripts"] = { "Latn" },
+			-- ["scripts"] = { "Latn" },
 		},
-	["gem-pro"] = {
+		["frm"] = {
-		["name"] = "Proto-Germanic",
+			["name"] = "Middle French",
-		["article"] = "Proto-Germanic language",
+			["article"] = "Middle French",
-		["script"] = { "Latn" },
+			-- ["scripts"] = { "Latn" },
-		["type"] = "reconstructed",
-		["replacements"] = {},
 		},
-	["grc"] = {
+		["frp"] = {
-		["name"] = "Ancient Greek",
+			["name"] = "Franco-Provençal",
-		["article"] = "Ancient Greek",
+		},
-		["scripts"] = { "Grek" },
+		["ff"] = {
-		["replacements"] = {
+			["name"] = "Fula",
-			-- Vowels with macrons or breves are replaced with plain letters.
+		},
-			["[ᾱᾰ]"] = "α",
+		["gem-pro"] = {
-			["[ᾹᾸ]"] = "Α",
+			["name"] = "Proto-Germanic",
-			["[ῑῐ]"] = "ι",
+			["article"] = "Proto-Germanic language",
-			["[ῙῘ]"] = "Ι",
+			-- ["scripts"] = { "Latn" },
-			["[ῡῠ]"] = "υ",
+			["type"] = "reconstructed",
-			["[ῩῨ]"] = "Υ",
+			["replacements"] = {},
-			["ϐ"]    = "β",
+			["Wikipedia_code"] = "gem-x-proto",
-			["ϵ"]    = "ε",
+		},
-			["ϑ"]    = "θ",
+		["gmw-ecg"] = {
-			["ϰ"]    = "κ",
+			["name"] = "East Central German",
-			["ϱ"]    = "ρ",
+		},
-			["ϲ"]    = "σ",
+		["got"] = {
-			["ϕ"]    = "φ",
+			["name"] = "Gothic",
+			["article"] = "Gothic language",
+			-- ["scripts"] = { "Goth" },
+			["replacements"] = {
+				-- Latin to Gothic since people will not want to have to copy
+				-- and paste Gothic letters in
+				["[AÁaáĀā]"] = "𐌰",
+				["[Bb]"]     = "𐌱",
+				["[Gg]"]     = "𐌲",
+				["[Dd]"]     = "𐌳",
+				["[EeĒē]"]   = "𐌴",
+				["[Qq]"]     = "𐌵",
+				["[Zz]"]     = "𐌶",
+				["[Hh]"]     = "𐌷",
+				["[Þþ]"]     = "𐌸",
+				["[IiÍí]"]   = "𐌹",
+				["[Kk]"]     = "𐌺",
+				["[Ll]"]     = "𐌻",
+				["[Mm]"]     = "𐌼",
+				["[Nn]"]     = "𐌽",
+				["[Jj]"]     = "𐌾",
+				["[UuÚúŪū]"] = "𐌿",
+				["[Pp]"]     = "𐍀",
+				["[Rr]"]     = "𐍂",
+				["[Ss]"]     = "𐍃",
+				["[Tt]"]     = "𐍄",
+				["[WwYy]"]   = "𐍅",
+				["[Ff]"]     = "𐍆",
+				["[Xx]"]     = "𐍇",
+				["[Ƕƕ]"]    = "𐍈", -- Not sure if "hw" and "hv" can safely be converted
+				["[OoŌō]"]   = "𐍉",
 			},
 		},
-	["grk-pro"] = {
+		["gsw"] = {
-		["name"] = "Proto-Hellenic",
+			["name"] = "Alemannic German",
-		["Wikipedia_name"] = "Proto-Greek",
+		},
-		["article"] = "Proto-Greek language",
+		["grc"] = {
-		["script"] = { "Latn" },
+			["name"] = "Ancient Greek",
-		["type"] = "reconstructed",
+			["article"] = "Ancient Greek",
-		["replacements"] = {},
+			-- ["scripts"] = { "Grek" },
+			["replacements"] = {
+				decompose = true,
+				from = {
+					-- Replace variant letterforms with standard ones.
+					"ϐ", "ϵ", "ϑ", "ϰ", "ϱ", "ϲ", "ϕ",
+					-- Remove macrons and breves.
+					"[" .. macron .. breve .. undertie .. "]"
+				},
+				to   = {
+					"β", "ε", "θ", "κ", "ρ", "σ", "φ",
+				}
+			},
+		},
+		["grk-pro"] = {
+			["name"] = "Proto-Hellenic",
+			["Wikipedia_name"] = "Proto-Greek",
+			["article"] = "Proto-Greek language",
+			-- ["scripts"] = { "Latn" },
+			["type"] = "reconstructed",
+			["replacements"] = {},
+		},
+		["ha"] = {
+			["name"] = "Hausa",
+			-- remove tilde, grave, acute, macron, circumflex
+			["replacements"] = {
+				decompose = true,
+				from = { "[" .. grave .. circumflex .. macron .. acute .. tilde .. "]" },
+			},
+		},
+		["hi"] = {
+			["name"] = "Hindi",
+			["article"] = "Hindi",
+			-- ["scripts"] = { "Deva" },
+		},
+		["ine-pro"] = {
+			["name"] = "Proto-Indo-European",
+			["article"] = "Proto-Indo-European language",
+			-- ["scripts"] = { "Latn" },
+			["type"] = "reconstructed",
+			["replacements"] = {},
+			["Wikipedia_code"] = "ine-x-proto",
+		},
+		["ja"] = {
+			["name"] = "Japanese",
+			["article"] = "Japanese language",
+			-- ["scripts"] = { "Jpan" },
 		},
-	["hi"] = {
+		["jbo"] = { -- Lojban
-		["name"] = "Hindi",
+			["type"] = "appendix",
-		["article"] = "Hindi",
-		["scripts"] = { "Deva" },
 		},
-	["ine-pro"] = {
+		["la"] = {
-		["name"] = "Proto-Indo-European",
+			["name"] = "Latin",
-		["article"] = "Proto-Indo-European language",
+			["article"] = "Latin",
-		["script"] = { "Latn" },
+			-- ["scripts"] = { "Latn" },
-		["type"] = "reconstructed",
+			["replacements"] = {
-		["replacements"] = {},
+				-- Remove macrons, breves, and diaereses.
+				decompose = true,
+				from = { "[" .. macron .. breve .. diaeresis .. "]" },
+			},
 		},
-	["ja"] = {
+		["lt"] = {
-		["name"] = "Japanese",
+			["name"] = "Lithuanian",
-		["article"] = "Japanese language",
+			-- remove acute, tilde, grave
-		["scripts"] = { "Jpan" },
+			["replacements"] = {
+				decompose = true,
+				from = { "[" .. acute .. tilde .. grave .. "]" },
+			},
 		},
-	["la"] = {
+		["moe"] = {
-		["name"] = "Latin",
+			["name"] = "Cree",
-		["article"] = "Latin",
+		},
-		["scripts"] = { "Latn" },
+		["mul"] = {
-		["replacements"] = {
+			["name"] = "Translingual",
-			-- Vowels with macrons, breves, or diaereses are replaced with plain letters.
+			["article"] = "",
-			["[ĀĂ]"]  = "A",
+			-- ["scripts"] = { "" },
-			["[āă]"]  = "a",
+		},
-			["[ĒĔ]"]  = "E",
+		["nci"] = {
-			["[ēĕë]"] = "e",
+			["name"] = "Classical Nahuatl",
-			["[ĪĬÏ]"] = "I",
+			["article"] = "Classical Nahuatl",
-			["[īĭï]"] = "i",
+			-- ["scripts"] = {"Latn"},
-			["[ŌŎ]"]  = "O",
+			-- Remove macrons, acutes, circumflexes and graves
-			["[ōŏ]"]  = "o",
+			["replacements"] = {
-			["[ŪŬÜ]"] = "U",
+				decompose = true,
-			["[ūŭü]"] = "u",
+				-- Remove macrons, acutes, circumflexes, graves, and saltillo;
-			["Ȳ"]     = "Y",
+				-- see [[Saltillo (linguistics)]].
-			["ȳ"]     = "y"
+				from = { "[" .. grave .. acute .. macron .. circumflex .. "Ꞌꞌʻʼ'ʔ]" },
 			},
 		},
-	["mul"] = {
+		["nds-de"] = {
-		["name"] = "Translingual",
+			["name"] = "German Low German",
-		["article"] = "",
+		},
-		["script"] = { "" },
+		["oj"] = {
+			["name"] = "Ojibwe",
 		},
-	["orv"] = {
+		["orv"] = {
-		["name"] = "Old East Slavic",
+			["name"] = "Old East Slavic",
-		["article"] = "Old East Slavic",
+			["article"] = "Old East Slavic",
-		["script"] = { "Cyrs" },
+			-- ["scripts"] = { "Cyrs" },
-		["replacements"] = {
+			["replacements"] = {
-			[U(0x484)] = "",
+				[U(0x484)] = "",
 			},
 		},
-	["pt"] = {
+		["pt"] = {
-		["name"] = "Portuguese",
+			["name"] = "Portuguese",
-		["article"] = "Portuguese language",
+			["article"] = "Portuguese language",
-		["scripts"] = { "Latn" },
+			-- ["scripts"] = { "Latn" },
+		},
+		["pa"] = {
+			["name"] = "Punjabi",
+			["article"] = "Punjabi language",
+			-- ["scripts"] = { "Guru", "Arab", },
 		},
-	["pa"] = {
+		["ru"] = {
-		["name"] = "Punjabi",
+			["name"] = "Russian",
-		["article"] = "Punjabi language",
+			["article"] = "Russian language",
-		["scripts"] = { "Guru", "Arab", }
+			-- ["scripts"] = { "Cyrl" },
+			-- Combining acute accent is removed.
+			["replacements"] = { [U(0x0301)] = "", },
 		},
-	["ru"] = {
+		["rw"] = {
-		["name"] = "Russian",
+			["name"] = "Rwanda-Rundi",
-		["article"] = "Russian language",
-		["scripts"] = { "Cyrl" },
-		-- Combining acute accent is removed.
-		["replacements"] = { [U(0x0301)] = "", }
 		},
-	["sh"] = {
+		["se"] = {
-		["article"] = "Serbo-Croatian language",
+			["replacements"] = {
-		["scripts"] = { "Latn", "Cyrl" },
+				["([đflmnŋrsšŧv])'%1"] = "%1%1",
-		["replacements"] = {
+			},
-			["[ȀÀȂÁĀÃ]"]	= "A",
-			["[ȁàȃáāã]"]	= "a",
-			["[ȄÈȆÉĒẼ]"]	= "E",
-			["[ȅèȇéēẽ]"]	= "e",
-			["[ȈÌȊÍĪĨ]"]	= "I",
-			["[ȉìȋíīĩ]"]	= "i",
-			["[ȌÒȎÓŌÕ]"]	= "O",
-			["[ȍòȏóōõ]"]	= "o",
-			["[ȐȒŔ]"]		= "R",
-			["[ȑȓŕ]"]		= "r",
-			["[ȔÙȖÚŪŨ]"]	= "U",
-			["[ȕùȗúūũ]"]	= "u",
-			["Ѐ"]			= "Е",
-			["ѐ"]			= "е",
-			["[ӢЍ]"]		= "И",
-			["[ӣѝ]"]		= "и",
-			["[Ӯ]"]			= "У",
-			["[ӯ]"]			= "у"
 		},
-	},
+		["sh"] = {
-	["sla-pro"] = {
+			["article"] = "Serbo-Croatian language",
-		["name"] = "Proto-Slavic", -- also Common Slavic
+			-- ["scripts"] = { "Latn", "Cyrl" },
-		["type"] = "reconstructed",
+			["replacements"] = {
-		["scripts"] = { "Latn" },
+				decompose = true,
-		["replacements"] = {
+				from =  { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave
-			["[ÀÁÃĀȀȂ]"] = "A",
+					.. grave .. invbreve .. acute .. macron .. tilde .. "]" },
-			["[àáãāȁȃ]"] = "a",
+				to   = { "%1" },
-			["[ÈÉẼĒȄȆ]"] = "E",
+			},
-			["[èéẽēȅȇ]"] = "e",
+		},
-			["[ÌÍĨĪȈȊ]"] = "I",
+		["sl"] = {
-			["[ìíĩīȉȋ]"] = "i",
+			["name"] = "Slovene",
-			["[ÒÓÕŌȌȎŐ]"] = "O",
+			["replacements"] = {
-			["[òóõōȍȏő]"] = "o",
+				-- remove tonal orthography
-			["[ÙÚŨŪȔȖŰ]"] = "U",
+				["[ÁÀÂȂȀ]"] = "A",
-			["[ùúũūȕȗű]"] = "u",
+				["[áàâȃȁ]"] = "a",
-			["[ỲÝỸȲ]"] = "Y",
+				["[ÉÈÊȆȄỆẸ]"] = "e",
-			["[ỳýỹȳ]"] = "y",
+				["[éèêȇȅệẹə]"] = "e",
-			["Ǭ"] = "Ǫ",
+				["[ÍÌÎȊȈ]"] = "I",
-			["ǭ"] = "ǫ",
+				["[íìîȋȉ]"] = "i",
-			["[" .. grave .. acute .. double_acute .. tilde .. macron .. dgrave .. invbreve .. "]"] = "",
+				["[ÓÒÔȎȌỘỌ]"] = "O",
+				["[óòôȏȍộọ]"] = "o",
+				["[ŔȒȐ]"] = "R",
+				["[ŕȓȑ]"] = "r",
+				["[ÚÙÛȖȔ]"] = "U",
+				["[úùûȗȕ]"] = "u",
+				["ł"] = "l",
+			},
+		},
+		["sla-pro"] = {
+			["name"] = "Proto-Slavic", -- also Common Slavic
+			["type"] = "reconstructed",
+			-- ["scripts"] = { "Latn" },
+			["replacements"] = {
+				["[ÀÁÃĀȀȂ]"] = "A",
+				["[àáãāȁȃ]"] = "a",
+				["[ÈÉẼĒȄȆ]"] = "E",
+				["[èéẽēȅȇ]"] = "e",
+				["[ÌÍĨĪȈȊ]"] = "I",
+				["[ìíĩīȉȋ]"] = "i",
+				["[ÒÓÕŌȌȎŐ]"] = "O",
+				["[òóõōȍȏő]"] = "o",
+				["[ÙÚŨŪȔȖŰ]"] = "U",
+				["[ùúũūȕȗű]"] = "u",
+				["[ỲÝỸȲ]"] = "Y",
+				["[ỳýỹȳ]"] = "y",
+				["Ǭ"] = "Ǫ",
+				["ǭ"] = "ǫ",
+				["[" .. grave .. acute .. double_acute .. tilde .. macron .. double_grave .. invbreve .. "]"] = "",
+				["ĭ"] = "ь",
+				["ŭ"] = "ъ",
 			},
 		},
-	["uk"] = {
+		["uk"] = {
-		["article"] = "Ukrainian language",
+			["article"] = "Ukrainian language",
-		["scripts"] = { "Cyrl" },
+			-- ["scripts"] = { "Cyrl" },
-		-- Combining acute accent is removed.
+			-- Combining acute accent is removed.
-		["replacements"] = { [U(0x0301)] = "", }
+			["replacements"] = { [U(0x0301)] = "", }
 		},
-	["ur"] = {
+		["ur"] = {
-		["name"] = "Urdu",
+			["name"] = "Urdu",
-		["article"] = "Urdu",
+			["article"] = "Urdu",
-		["scripts"] = { "Arab" },
+			-- ["scripts"] = { "Arab" },
 		},
-	["zh"] = {
+		["zh"] = {
-		["name"] = "Chinese",
+			["name"] = "Chinese",
-		["article"] = "Chinese language",
+			["article"] = "Chinese language",
-		["scripts"] = { "Hani" },
+			-- ["scripts"] = { "Hani" },
 		},
-	["xcl"] = {
+		["xcl"] = {
-		["name"] = "Old Armenian",
+			["name"] = "Old Armenian",
-		["article"] = "Classical Armenian",
+			["article"] = "Classical Armenian",
-		["script"] = { "Armn" },
+			-- ["scripts"] = { "Armn" },
-		["replacements"] = {
+			["replacements"] = {
-			["[՞՜՛՟]"] = "",
+				["[՞՜՛՟]"] = "",
-			["և"] = "եւ",
+				["և"] = "եւ",
 			},
 		},
-	}
+		["xvn"] = {
+			["name"] = "Vandalic",
+			["article"] = "Vandalic language",
+			-- ["scripts"] = { "Latn" },
+		},
 --[[
+		[""] = {
-	[""] = {
+			["name"] = "",
-		["name"] = "",
+			["article"] = "",
-		["article"] = "",
+			-- ["scripts"] = { "" },
-		["script"] = { "" },
 		},
-	[""] = {
+		[""] = {
-		["name"] = "",
+			["name"] = "",
-		["article"] = "",
+			["article"] = "",
-		["script"] = { "" },
+			-- ["scripts"] = { "" },
-		["replacements"] = {
+			["replacements"] = {
 			},
 		},
 ]]
+	},
+-- Here, keys (for example, "gem") are Wikipedia language codes used in
+-- {{lang}}, and values (for example, "gem-pro") are the equivalent Wiktionary
+-- code.
+-- Subtags are not currently supported.
+	["redirects"] = {
+		["aae"] = "sq",
+		["aiq"] = "fa",
+		["aln"] = "sq",
+		["als"] = "sq",
+		["azb"] = "az",
+		["azj"] = "az",
+		["bgn"] = "bal",
+		["bs"] = "sh",
+		["bxr"] = "bua",
+		["cel-x-proto"] = "cel-pro",
+		["ciw"] = "oj",
+		["cnr"] = "sh",
+		["fil"] = "tl",
+		["fuf"] = "ff",
+		["gem"] = "gem-pro", -- Not correct, but is commonly used.
+		["gem-x-proto"] = "gem-pro",
+		["hak"] = "zh",
+		["hbo"] = "he",
+		["hr"] = "sh",
+		["ine"] = "ine-pro", -- Not correct, but might be commonly used.
+		["ine-x-proto"] = "ine-pro",
+		["nan"] = "zh",
+		["prs"] = "fa",
+		["rn"] = "rw",
+		["sli"] = "gmw-ecg",
+		["sr"] = "sh",
+		["src"] = "sc",
+		["sro"] = "sc",
+		["tw"] = "ak",
+		["wae"] = "gsw",
+		["wep"] = "nds-de",
+		["yue"] = "zh",
+		["xno"] = "fro",
+	},
+}
 return data