Difference between revisions of "Module:TaxonItalics"

From Eat Every Plant
Jump to navigation Jump to search
m (1 revision: Empetrum nigrum)
 
m (1 revision: Suaeda)
 
(5 intermediate revisions by the same user not shown)
Line 1: Line 1:
 +
local p = {}
 +
 
--[[=========================================================================
 
--[[=========================================================================
 
Italicize a taxon name appropriately by invoking italicizeTaxonName.
 
Italicize a taxon name appropriately by invoking italicizeTaxonName.
Line 10: Line 12:
 
   * Else if the name is made up of three words and the second word is a
 
   * Else if the name is made up of three words and the second word is a
 
     botanical connecting term or a variant of "cf.", de-italicize the
 
     botanical connecting term or a variant of "cf.", de-italicize the
     connecting term and add italic markup to the outside of the name.  
+
     connecting term and add italic markup to the outside of the name.
 
   * Else just add italic markup to the outside of the name.
 
   * Else just add italic markup to the outside of the name.
 +
The module also:
 +
* Ensures that the hybrid symbol, ×, and parentheses are not italicized
 +
* Has an option to abbreviate all parts of taxon names other than the last
 +
  to the first letter (e.g. "Pinus sylvestris var. sylvestris" becomes
 +
  "P. s. var. sylvestris").
 +
* Has an option to wikilink the italicized name to the input name.
 
=============================================================================]]
 
=============================================================================]]
  
local p = {}
+
--connecting terms in three part names (e.g. Pinus sylvestris var. sylvestris)
 
 
 
local cTerms3 = {
 
local cTerms3 = {
subspecies = "subsp.",
+
--subsp.
["subsp."] = "subsp.",
+
    subspecies = "subsp.",
subsp = "subsp.",
+
    ["subsp."] = "subsp.",
["ssp."] = "subsp.",
+
    subsp = "subsp.",
ssp = "subsp.",
+
    ["ssp."] = "subsp.",
varietas = "var.",
+
    ssp = "subsp.",
["var."] = "var.",
+
    --var.
var = "var.",
+
    varietas = "var.",
subvarietas = "subvar.",
+
    ["var."] = "var.",
["subvar."] = "subvar.",
+
    var = "var.",
subvar = "subvar.",
+
    --subvar.
forma = "f.",
+
    subvarietas = "subvar.",
["f."] = "f.",
+
    ["subvar."] = "subvar.",
f = "f.",
+
    subvar = "subvar.",
subforma = "subf.",
+
    --f.
["subf."] = "subf.",
+
    forma = "f.",
subf = "subf."
+
    ["f."] = "f.",
}
+
    f = "f.",
 +
    --subf.
 +
    subforma = "subf.",
 +
    ["subf."] = "subf.",
 +
    subf = "subf."
 +
    }
 +
--connecting terms in two part names (e.g. Pinus sect. Pinus)
 
local cTerms2 = {
 
local cTerms2 = {
subgenus = "subg.",
+
--subg.
["subg."] = "subg.",
+
    subgenus = "subg.",
subg = "subg.",
+
    ["subg."] = "subg.",
section = "sect.",
+
    subg = "subg.",
["sect."] = "sect.",
+
    --sect.
["cf."] = "cf.",
+
    section = "sect.",
cf = "cf.",
+
    ["sect."] = "sect.",
["c.f."] = "cf."
+
    sect = "sect.",
}
+
    --subsect.
 +
    subsection = "subsect.",
 +
    ["subsect."] = "subsect.",
 +
    subsect = "subsect.",
 +
    --ser.
 +
    series = "ser.",
 +
    ["ser."] = "ser.",
 +
    ser = "ser.",
 +
    --subser.
 +
    subseries = "subser.",
 +
    ["subser."] = "subser.",
 +
    subser = "subser.",
 +
    --cf.
 +
    cf = "cf.",
 +
    ["cf."] = "cf.",
 +
    ["c.f."] = "cf."
 +
    }
  
 
--[[=========================================================================
 
--[[=========================================================================
Italicize a taxon name appropriately.
+
Main function to italicize a taxon name appropriately.
 
=============================================================================]]
 
=============================================================================]]
 
function p.main(frame)
 
function p.main(frame)
local name = frame.args[1] or ''
+
    local name = frame.args[1] or ''
local linked = frame.args['linked'] == 'yes'
+
    local linked = frame.args['linked'] == 'yes'
return p.italicizeTaxonName(name, linked)
+
    local abbreviated = frame.args['abbreviated'] == 'yes'
 +
    return p.italicizeTaxonName(name, linked, abbreviated)
 
end
 
end
  
function p.italicizeTaxonName(name, linked)
+
--[[=========================================================================
local italMarker = "''"
+
Utility function to abbreviate an input string to its first character
-- trim the name and replace any use of the HTML italic tags by Wikimedia markup
+
followed by ".".
name = string.gsub(string.gsub(mw.text.trim(name), "<i>", italMarker), "</i>", italMarker)
+
Both "×" and an HTML entity at the start of the string are skipped over in
local result = name
+
determining first character, as is an opening parenthesis, which causes a
if name ~= '' then
+
closing parenthesis to be included.
if string.sub(name, 1, 2) == "''" or string.sub(name, -2) == "''" then
+
=============================================================================]]
-- do nothing if the name already has italic markers at the start or end
+
function p.abbreviate(str)
else
+
local result = ""
name = string.gsub(name, "''", "") -- first remove internal italics
+
local hasParentheses = false
local words = mw.text.split(name, " ", true)
+
if mw.ustring.len(str) < 2 then
local deitalicized = false
+
--single character strings are left unchanged
if #words == 4 then
+
result = str
-- test for the third word of a four word name being a connecting term
+
else
if cTerms3[words[3]] then
+
--skip over an opening parenthesis that could be present at the start of the string
-- de-italicize the connecting term by adding internal italic markup
+
if mw.ustring.sub(str,1,1) == "(" then
result = words[1] .. " " .. words[2] .. italMarker .. " " .. cTerms3[words[3]] .. italMarker .. " " .. words[4]
+
hasParentheses = true
deitalicized = true
+
result = "("  
end
+
str = mw.ustring.sub(str,2,mw.ustring.len(str))
elseif #words == 3 then
+
end
-- test for the second word of a three word name being a connecting term
+
--skip over a hybrid symbol that could be present at the start of the string
if cTerms2[words[2]] then
+
if mw.ustring.sub(str,1,1) == "×" then
-- de-italicize the connecting term by adding internal italic markup
+
result = "×"  
result = words[1] .. " " .. italMarker .. cTerms2[words[2]] .. italMarker .. " " .. words[3]
+
str = mw.ustring.sub(str,2,mw.ustring.len(str))
deitalicized = true
+
end
end
+
--skip over an HTML entity that could be present at the start of the string
else
+
if mw.ustring.sub(str,1,1) == "&" then
-- do nothing
+
local i,dummy = mw.ustring.find(str,";",2,plain)
result = name
+
result = result .. mw.ustring.sub(str,1,i)
end
+
str = mw.ustring.sub(str,i+1,mw.ustring.len(str))
-- add outside markup
+
end
if linked then
+
--if there's anything left, reduce it to its first character plus ".",
if deitalicized then
+
--adding the closing parenthesis if required
result = "[[" .. name .. "|" .. italMarker .. result .. italMarker .. "]]"
+
if str ~= "" then  
else
+
result = result .. mw.ustring.sub(str,1,1) .. "."
result = italMarker .. "[[" .. name .. "]]" .. italMarker
+
if hasParentheses then result = result .. ")" end
end
 
else
 
result = italMarker .. result .. italMarker
 
end
 
 
end
 
end
 
end
 
end
 
return result
 
return result
 +
end
 +
 +
--[[=========================================================================
 +
The function which does the italicization.
 +
=============================================================================]]
 +
function p.italicizeTaxonName(name, linked, abbreviated)
 +
    local italMarker = "''"
 +
    -- begin by tidying the input name: trim; replace any use of the HTML
 +
    -- italic tags by Wikimedia markup; replace any alternatives to the hybrid
 +
    -- symbol by the symbol itself; prevent the hybrid symbol being treated as
 +
    -- a 'word' by converting a following space to the HTML entity
 +
    name = string.gsub(mw.text.trim(name), "</?i>", italMarker)
 +
    name = string.gsub(string.gsub(name, "&#215;", "×"), "&times;", "×")
 +
    name = string.gsub(name, "</?span.->", "") -- remove any span markup
 +
    name = string.gsub(name, "× ", "×&#32;")
 +
    -- now italicize and abbreviate if required
 +
    local result = name
 +
    if name ~= '' then
 +
        if string.sub(name,1,2) == "''" or string.sub(name,-2) == "''" then
 +
            -- do nothing if the name already has italic markers at the start or end
 +
        else
 +
            name = string.gsub(name, "''", "") -- first remove any internal italics
 +
            local words = mw.text.split(name, " ", true)
 +
            if #words == 4 and cTerms3[words[3]] then
 +
                -- the third word of a four word name is a connecting term
 +
                -- ensure the connecting term isn't italicized
 +
                words[3] = '<span style="font-style:normal;">' .. cTerms3[words[3]] .. '</span>'
 +
                if abbreviated then
 +
                words[1] = p.abbreviate(words[1])
 +
                    words[2] = p.abbreviate(words[2])
 +
            end
 +
                result = words[1] .. " " .. words[2] .. " " .. words[3] .. " " .. words[4]
 +
            elseif #words == 3 and cTerms2[words[2]] then
 +
                -- the second word of a three word name is a connecting term
 +
                -- ensure the connecting term isn't italicized
 +
                words[2] = '<span style="font-style:normal;">' .. cTerms2[words[2]] .. '</span>'
 +
                if abbreviated then
 +
                words[1] = p.abbreviate(words[1])
 +
                end
 +
                result = words[1] .. " " .. words[2] .. " " .. words[3]
 +
            else
 +
                -- not a name as above; only deal with abbreviation
 +
                if abbreviated then
 +
                if #words > 1 then
 +
                result = p.abbreviate(words[1])
 +
                for i = 2, #words-1, 1 do
 +
                result = result .. " " .. p.abbreviate(words[i])
 +
                end
 +
                result = result .. " " .. words[#words]
 +
                end
 +
                else
 +
                result = name
 +
                end
 +
            end
 +
            -- deal with any hybrid symbol as it should not be italicized
 +
            result = string.gsub(result, "×", '<span style="font-style:normal;">×</span>')
 +
            -- deal with any parentheses as they should not be italicized
 +
            result = string.gsub(string.gsub(result,"%(",'<span style="font-style:normal;">(</span>'),"%)",'<span style="font-style:normal;">)</span>')
 +
          -- add outside markup
 +
            if linked then
 +
                if result ~= name then
 +
                    result = "[[" .. name .. "|" .. italMarker .. result .. italMarker .. "]]"
 +
                else
 +
                    result = italMarker .. "[[" .. name .. "]]" .. italMarker
 +
                end
 +
            else
 +
                result = italMarker .. result .. italMarker
 +
            end
 +
        end
 +
    end
 +
    return result
 
end
 
end
  
 
return p
 
return p

Latest revision as of 17:35, 18 December 2018

local p = {}

--[[========================================================================= Italicize a taxon name appropriately by invoking italicizeTaxonName. The algorithm used is:

  • If the name has italic markup at the start or the end, do nothing.
  • Else
 * Remove (internal) italic markup.
 * If the name is made up of four words and the third word is a
   botanical connecting term, de-italicize the connecting term and add italic
   markup to the outside of the name.
 * Else if the name is made up of three words and the second word is a
   botanical connecting term or a variant of "cf.", de-italicize the
   connecting term and add italic markup to the outside of the name.
 * Else just add italic markup to the outside of the name.
The module also:
* Ensures that the hybrid symbol, ×, and parentheses are not italicized
* Has an option to abbreviate all parts of taxon names other than the last
  to the first letter (e.g. "Pinus sylvestris var. sylvestris" becomes
  "P. s. var. sylvestris").
* Has an option to wikilink the italicized name to the input name.

=============================================================================]]

--connecting terms in three part names (e.g. Pinus sylvestris var. sylvestris) local cTerms3 = { --subsp.

   subspecies = "subsp.",
   ["subsp."] = "subsp.",
   subsp = "subsp.",
   ["ssp."] = "subsp.",
   ssp = "subsp.",
   --var.
   varietas = "var.",
   ["var."] = "var.",
   var = "var.",
   --subvar.
   subvarietas = "subvar.",
   ["subvar."] = "subvar.",
   subvar = "subvar.",
   --f.
   forma = "f.",
   ["f."] = "f.",
   f = "f.",
   --subf.
   subforma = "subf.",
   ["subf."] = "subf.",
   subf = "subf."
   }

--connecting terms in two part names (e.g. Pinus sect. Pinus) local cTerms2 = { --subg.

   subgenus = "subg.",
   ["subg."] = "subg.",
   subg = "subg.",
   --sect.
   section = "sect.",
   ["sect."] = "sect.",
   sect = "sect.",
   --subsect.
   subsection = "subsect.",
   ["subsect."] = "subsect.",
   subsect = "subsect.",
   --ser.
   series = "ser.",
   ["ser."] = "ser.",
   ser = "ser.",
   --subser.
   subseries = "subser.",
   ["subser."] = "subser.",
   subser = "subser.",
   --cf.
   cf = "cf.",
   ["cf."] = "cf.",
   ["c.f."] = "cf."
   }

--[[========================================================================= Main function to italicize a taxon name appropriately. =============================================================================]] function p.main(frame)

   local name = frame.args[1] or 
   local linked = frame.args['linked'] == 'yes'
   local abbreviated = frame.args['abbreviated'] == 'yes'
   return p.italicizeTaxonName(name, linked, abbreviated)

end

--[[========================================================================= Utility function to abbreviate an input string to its first character followed by ".". Both "×" and an HTML entity at the start of the string are skipped over in determining first character, as is an opening parenthesis, which causes a closing parenthesis to be included. =============================================================================]] function p.abbreviate(str) local result = "" local hasParentheses = false if mw.ustring.len(str) < 2 then --single character strings are left unchanged result = str else --skip over an opening parenthesis that could be present at the start of the string if mw.ustring.sub(str,1,1) == "(" then hasParentheses = true result = "(" str = mw.ustring.sub(str,2,mw.ustring.len(str)) end --skip over a hybrid symbol that could be present at the start of the string if mw.ustring.sub(str,1,1) == "×" then result = "×" str = mw.ustring.sub(str,2,mw.ustring.len(str)) end --skip over an HTML entity that could be present at the start of the string if mw.ustring.sub(str,1,1) == "&" then local i,dummy = mw.ustring.find(str,";",2,plain) result = result .. mw.ustring.sub(str,1,i) str = mw.ustring.sub(str,i+1,mw.ustring.len(str)) end --if there's anything left, reduce it to its first character plus ".", --adding the closing parenthesis if required if str ~= "" then result = result .. mw.ustring.sub(str,1,1) .. "." if hasParentheses then result = result .. ")" end end end return result end

--[[========================================================================= The function which does the italicization. =============================================================================]] function p.italicizeTaxonName(name, linked, abbreviated)

   local italMarker = ""
   -- begin by tidying the input name: trim; replace any use of the HTML
   -- italic tags by Wikimedia markup; replace any alternatives to the hybrid
   -- symbol by the symbol itself; prevent the hybrid symbol being treated as
   -- a 'word' by converting a following space to the HTML entity
   name = string.gsub(mw.text.trim(name), "</?i>", italMarker)
   name = string.gsub(string.gsub(name, "×", "×"), "×", "×")
   name = string.gsub(name, "</?span.->", "") -- remove any span markup
   name = string.gsub(name, "× ", "× ")
   -- now italicize and abbreviate if required
   local result = name
   if name ~=  then
       if string.sub(name,1,2) == "" or string.sub(name,-2) == "" then
           -- do nothing if the name already has italic markers at the start or end
       else
           name = string.gsub(name, "", "") -- first remove any internal italics
           local words = mw.text.split(name, " ", true)
           if #words == 4 and cTerms3[words[3]] then
               -- the third word of a four word name is a connecting term
               -- ensure the connecting term isn't italicized
               words[3] = '' .. cTerms3[words[3]] .. ''
               if abbreviated then
               	words[1] = p.abbreviate(words[1])
                   words[2] = p.abbreviate(words[2])
           	end
               result = words[1] .. " " .. words[2] .. " " .. words[3] .. " " .. words[4]
           elseif #words == 3 and cTerms2[words[2]] then
               -- the second word of a three word name is a connecting term
               -- ensure the connecting term isn't italicized
               words[2] = '' .. cTerms2[words[2]] .. ''
               if abbreviated then
               	words[1] = p.abbreviate(words[1])
               end
               result = words[1] .. " " .. words[2] .. " " .. words[3]
           else
               -- not a name as above; only deal with abbreviation
               if abbreviated then
               	if #words > 1 then
               		result = p.abbreviate(words[1])
               		for i = 2, #words-1, 1 do
               			result = result .. " " .. p.abbreviate(words[i])
               		end
               		result = result .. " " .. words[#words]
               	end
               else
               	result = name
               end
           end
           -- deal with any hybrid symbol as it should not be italicized
           result = string.gsub(result, "×", '×')
            -- deal with any parentheses as they should not be italicized
           result = string.gsub(string.gsub(result,"%(",'('),"%)",')')
          -- add outside markup
           if linked then
               if result ~= name then
                   result = "" .. italMarker .. result .. italMarker .. ""
               else
                   result = italMarker .. "" .. name .. "" .. italMarker
               end
           else
               result = italMarker .. result .. italMarker
           end
       end
   end
   return result

end

return p