Difference between revisions of "Module:TaxonItalics"

From Eat Every Plant
Jump to navigation Jump to search
m (1 revision: Cornus (two species))
 
m (1 revision: Pyrus communis)
(One intermediate revision by the same user not shown)
Line 12: Line 12:
 
     connecting term and add italic markup to the outside of the name.
 
     connecting term and add italic markup to the outside of the name.
 
   * Else just add italic markup to the outside of the name.
 
   * Else just add italic markup to the outside of the name.
 +
The module also:
 +
* Ensures that the hybrid symbol, ×, and parentheses are not italicized
 +
* Has an option to abbreviate all parts of taxon names other than the last
 +
  to the first letter (e.g. "Pinus sylvestris var. sylvestris" becomes
 +
  "P. s. var. sylvestris").
 +
* Has an option to wikilink the italicized name to the input name.
 
=============================================================================]]
 
=============================================================================]]
  
 
local p = {}
 
local p = {}
  
 +
--connecting terms in three part names (e.g. Pinus sylvestris var. sylvestris)
 
local cTerms3 = {
 
local cTerms3 = {
 +
--subsp.
 
     subspecies = "subsp.",
 
     subspecies = "subsp.",
 
     ["subsp."] = "subsp.",
 
     ["subsp."] = "subsp.",
Line 22: Line 30:
 
     ["ssp."] = "subsp.",
 
     ["ssp."] = "subsp.",
 
     ssp = "subsp.",
 
     ssp = "subsp.",
 +
    --var.
 
     varietas = "var.",
 
     varietas = "var.",
 
     ["var."] = "var.",
 
     ["var."] = "var.",
 
     var = "var.",
 
     var = "var.",
 +
    --subvar.
 
     subvarietas = "subvar.",
 
     subvarietas = "subvar.",
 
     ["subvar."] = "subvar.",
 
     ["subvar."] = "subvar.",
 
     subvar = "subvar.",
 
     subvar = "subvar.",
 +
    --f.
 
     forma = "f.",
 
     forma = "f.",
 
     ["f."] = "f.",
 
     ["f."] = "f.",
 
     f = "f.",
 
     f = "f.",
 +
    --subf.
 
     subforma = "subf.",
 
     subforma = "subf.",
 
     ["subf."] = "subf.",
 
     ["subf."] = "subf.",
 
     subf = "subf."
 
     subf = "subf."
 
     }
 
     }
 +
--connecting terms in two part names (e.g. Pinus sect. Pinus)
 
local cTerms2 = {
 
local cTerms2 = {
 +
--subg.
 
     subgenus = "subg.",
 
     subgenus = "subg.",
 
     ["subg."] = "subg.",
 
     ["subg."] = "subg.",
 
     subg = "subg.",
 
     subg = "subg.",
 +
    --sect.
 
     section = "sect.",
 
     section = "sect.",
 
     ["sect."] = "sect.",
 
     ["sect."] = "sect.",
 +
    sect = "sect.",
 +
    --subsect.
 
     subsection = "subsect.",
 
     subsection = "subsect.",
 +
    ["subsect."] = "subsect.",
 
     subsect = "subsect.",
 
     subsect = "subsect.",
     ["subsect."] = "subsect.",
+
     --ser.
 
     series = "ser.",
 
     series = "ser.",
 +
    ["ser."] = "ser.",
 
     ser = "ser.",
 
     ser = "ser.",
     ["ser."] = "ser.",
+
     --subser.
 
     subseries = "subser.",
 
     subseries = "subser.",
 +
    ["subser."] = "subser.",
 
     subser = "subser.",
 
     subser = "subser.",
     ["subser."] = "subser.",
+
     --cf.
 +
    cf = "cf.",
 
     ["cf."] = "cf.",
 
     ["cf."] = "cf.",
    cf = "cf.",
 
 
     ["c.f."] = "cf."
 
     ["c.f."] = "cf."
 
     }
 
     }
  
 
--[[=========================================================================
 
--[[=========================================================================
Italicize a taxon name appropriately.
+
Main function to italicize a taxon name appropriately.
 
=============================================================================]]
 
=============================================================================]]
 
function p.main(frame)
 
function p.main(frame)
 
     local name = frame.args[1] or ''
 
     local name = frame.args[1] or ''
 
     local linked = frame.args['linked'] == 'yes'
 
     local linked = frame.args['linked'] == 'yes'
     return p.italicizeTaxonName(name, linked)
+
    local abbreviated = frame.args['abbreviated'] == 'yes'
 +
     return p.italicizeTaxonName(name, linked, abbreviated)
 +
end
 +
 
 +
--[[=========================================================================
 +
Utility function to abbreviate an input string to its first character
 +
followed by ".".
 +
Both "×" and an HTML entity at the start of the string are skipped over in
 +
determining first character, as is an opening parenthesis, which causes a
 +
closing parenthesis to be included.
 +
=============================================================================]]
 +
function p.abbreviate(str)
 +
local result = ""
 +
local hasParentheses = false
 +
if mw.ustring.len(str) < 2 then
 +
--single character strings are left unchanged
 +
result = str
 +
else
 +
--skip over an opening parenthesis that could be present at the start of the string
 +
if mw.ustring.sub(str,1,1) == "(" then
 +
hasParentheses = true
 +
result = "("
 +
str = mw.ustring.sub(str,2,mw.ustring.len(str))
 +
end
 +
--skip over a hybrid symbol that could be present at the start of the string
 +
if mw.ustring.sub(str,1,1) == "×" then
 +
result = "×"
 +
str = mw.ustring.sub(str,2,mw.ustring.len(str))
 +
end
 +
--skip over an HTML entity that could be present at the start of the string
 +
if mw.ustring.sub(str,1,1) == "&" then
 +
local i,dummy = mw.ustring.find(str,";",2,plain)
 +
result = result .. mw.ustring.sub(str,1,i)
 +
str = mw.ustring.sub(str,i+1,mw.ustring.len(str))
 +
end
 +
--if there's anything left, reduce it to its first character plus ".",
 +
--adding the closing parenthesis if required
 +
if str ~= "" then
 +
result = result .. mw.ustring.sub(str,1,1) .. "."
 +
if hasParentheses then result = result .. ")" end
 +
end
 +
end
 +
return result
 
end
 
end
  
function p.italicizeTaxonName(name, linked)
+
--[[=========================================================================
 +
The function which does the italicization.
 +
=============================================================================]]
 +
function p.italicizeTaxonName(name, linked, abbreviated)
 
     local italMarker = "''"
 
     local italMarker = "''"
     -- trim the name and replace any use of the HTML italic tags by Wikimedia markup
+
     -- begin by tidying the input name: trim; replace any use of the HTML
 +
    -- italic tags by Wikimedia markup; replace any alternatives to the hybrid
 +
    -- symbol by the symbol itself; prevent the hybrid symbol being treated as
 +
    -- a 'word' by converting a following space to the HTML entity
 
     name = string.gsub(mw.text.trim(name), "</?i>", italMarker)
 
     name = string.gsub(mw.text.trim(name), "</?i>", italMarker)
 +
    name = string.gsub(string.gsub(name, "&#215;", "×"), "&times;", "×")
 +
    name = string.gsub(name, "</?span.->", "") -- remove any span markup
 +
    name = string.gsub(name, "× ", "×&#32;")
 +
    -- now italicize and abbreviate if required
 
     local result = name
 
     local result = name
 
     if name ~= '' then
 
     if name ~= '' then
         if string.sub(name, 1, 2) == "''" or string.sub(name, -2) == "''" then
+
         if string.sub(name,1,2) == "''" or string.sub(name,-2) == "''" then
 
             -- do nothing if the name already has italic markers at the start or end
 
             -- do nothing if the name already has italic markers at the start or end
 
         else
 
         else
             name = string.gsub(name, "''", "") -- first remove internal italics
+
             name = string.gsub(name, "''", "") -- first remove any internal italics
 
             local words = mw.text.split(name, " ", true)
 
             local words = mw.text.split(name, " ", true)
             if #words == 4 then
+
             if #words == 4 and cTerms3[words[3]] then
                 -- test for the third word of a four word name being a connecting term
+
                 -- the third word of a four word name is a connecting term
                 if cTerms3[words[3]] then
+
                 -- ensure the connecting term isn't italicized
                     -- de-italicize the connecting term by adding internal italic markup
+
                words[3] = '<span style="font-style:normal;">' .. cTerms3[words[3]] .. '</span>'
                    result = words[1] .. " " .. words[2] .. italMarker .. " " .. cTerms3[words[3]] .. " " .. italMarker .. words[4]
+
                if abbreviated then
 +
                words[1] = p.abbreviate(words[1])
 +
                     words[2] = p.abbreviate(words[2])
 +
            end
 +
                result = words[1] .. " " .. words[2] .. " " .. words[3] .. " " .. words[4]
 +
            elseif #words == 3 and cTerms2[words[2]] then
 +
                -- the second word of a three word name is a connecting term
 +
                -- ensure the connecting term isn't italicized
 +
                words[2] = '<span style="font-style:normal;">' .. cTerms2[words[2]] .. '</span>'
 +
                if abbreviated then
 +
                words[1] = p.abbreviate(words[1])
 
                 end
 
                 end
            elseif #words == 3 then
+
                result = words[1] .. " " .. words[2] .. " " .. words[3]
                 -- test for the second word of a three word name being a connecting term
+
            else
                 if cTerms2[words[2]] then
+
                 -- not a name as above; only deal with abbreviation
                    -- de-italicize the connecting term by adding internal italic markup
+
                 if abbreviated then
                    result = words[1] .. italMarker .. " " .. cTerms2[words[2]] .. " " .. italMarker .. words[3]
+
                if #words > 1 then
 +
                result = p.abbreviate(words[1])
 +
                for i = 2, #words-1, 1 do
 +
                result = result .. " " .. p.abbreviate(words[i])
 +
                end
 +
                result = result .. " " .. words[#words]
 +
                end
 +
                else
 +
                result = name
 
                 end
 
                 end
            else
 
                -- do nothing
 
                result = name
 
 
             end
 
             end
 
             -- deal with any hybrid symbol as it should not be italicized
 
             -- deal with any hybrid symbol as it should not be italicized
 
             result = string.gsub(result, "×", '<span style="font-style:normal;">×</span>')
 
             result = string.gsub(result, "×", '<span style="font-style:normal;">×</span>')
             result = string.gsub(result, "&times;", '<span style="font-style:normal;">×</span>')
+
            -- deal with any parentheses as they should not be italicized
            result = string.gsub(result, "&#215;", '<span style="font-style:normal;">×</span>')
+
             result = string.gsub(string.gsub(result,"%(",'<span style="font-style:normal;">(</span>'),"%)",'<span style="font-style:normal;">)</span>')
            -- add outside markup
+
          -- add outside markup
 
             if linked then
 
             if linked then
 
                 if result ~= name then
 
                 if result ~= name then

Revision as of 14:26, 26 August 2018

--[[========================================================================= Italicize a taxon name appropriately by invoking italicizeTaxonName. The algorithm used is:

  • If the name has italic markup at the start or the end, do nothing.
  • Else
 * Remove (internal) italic markup.
 * If the name is made up of four words and the third word is a
   botanical connecting term, de-italicize the connecting term and add italic
   markup to the outside of the name.
 * Else if the name is made up of three words and the second word is a
   botanical connecting term or a variant of "cf.", de-italicize the
   connecting term and add italic markup to the outside of the name.
 * Else just add italic markup to the outside of the name.
The module also:
* Ensures that the hybrid symbol, ×, and parentheses are not italicized
* Has an option to abbreviate all parts of taxon names other than the last
  to the first letter (e.g. "Pinus sylvestris var. sylvestris" becomes
  "P. s. var. sylvestris").
* Has an option to wikilink the italicized name to the input name.

=============================================================================]]

local p = {}

--connecting terms in three part names (e.g. Pinus sylvestris var. sylvestris) local cTerms3 = { --subsp.

   subspecies = "subsp.",
   ["subsp."] = "subsp.",
   subsp = "subsp.",
   ["ssp."] = "subsp.",
   ssp = "subsp.",
   --var.
   varietas = "var.",
   ["var."] = "var.",
   var = "var.",
   --subvar.
   subvarietas = "subvar.",
   ["subvar."] = "subvar.",
   subvar = "subvar.",
   --f.
   forma = "f.",
   ["f."] = "f.",
   f = "f.",
   --subf.
   subforma = "subf.",
   ["subf."] = "subf.",
   subf = "subf."
   }

--connecting terms in two part names (e.g. Pinus sect. Pinus) local cTerms2 = { --subg.

   subgenus = "subg.",
   ["subg."] = "subg.",
   subg = "subg.",
   --sect.
   section = "sect.",
   ["sect."] = "sect.",
   sect = "sect.",
   --subsect.
   subsection = "subsect.",
   ["subsect."] = "subsect.",
   subsect = "subsect.",
   --ser.
   series = "ser.",
   ["ser."] = "ser.",
   ser = "ser.",
   --subser.
   subseries = "subser.",
   ["subser."] = "subser.",
   subser = "subser.",
   --cf.
   cf = "cf.",
   ["cf."] = "cf.",
   ["c.f."] = "cf."
   }

--[[========================================================================= Main function to italicize a taxon name appropriately. =============================================================================]] function p.main(frame)

   local name = frame.args[1] or 
   local linked = frame.args['linked'] == 'yes'
   local abbreviated = frame.args['abbreviated'] == 'yes'
   return p.italicizeTaxonName(name, linked, abbreviated)

end

--[[========================================================================= Utility function to abbreviate an input string to its first character followed by ".". Both "×" and an HTML entity at the start of the string are skipped over in determining first character, as is an opening parenthesis, which causes a closing parenthesis to be included. =============================================================================]] function p.abbreviate(str) local result = "" local hasParentheses = false if mw.ustring.len(str) < 2 then --single character strings are left unchanged result = str else --skip over an opening parenthesis that could be present at the start of the string if mw.ustring.sub(str,1,1) == "(" then hasParentheses = true result = "(" str = mw.ustring.sub(str,2,mw.ustring.len(str)) end --skip over a hybrid symbol that could be present at the start of the string if mw.ustring.sub(str,1,1) == "×" then result = "×" str = mw.ustring.sub(str,2,mw.ustring.len(str)) end --skip over an HTML entity that could be present at the start of the string if mw.ustring.sub(str,1,1) == "&" then local i,dummy = mw.ustring.find(str,";",2,plain) result = result .. mw.ustring.sub(str,1,i) str = mw.ustring.sub(str,i+1,mw.ustring.len(str)) end --if there's anything left, reduce it to its first character plus ".", --adding the closing parenthesis if required if str ~= "" then result = result .. mw.ustring.sub(str,1,1) .. "." if hasParentheses then result = result .. ")" end end end return result end

--[[========================================================================= The function which does the italicization. =============================================================================]] function p.italicizeTaxonName(name, linked, abbreviated)

   local italMarker = ""
   -- begin by tidying the input name: trim; replace any use of the HTML
   -- italic tags by Wikimedia markup; replace any alternatives to the hybrid
   -- symbol by the symbol itself; prevent the hybrid symbol being treated as
   -- a 'word' by converting a following space to the HTML entity
   name = string.gsub(mw.text.trim(name), "</?i>", italMarker)
   name = string.gsub(string.gsub(name, "×", "×"), "×", "×")
   name = string.gsub(name, "</?span.->", "") -- remove any span markup
   name = string.gsub(name, "× ", "× ")
   -- now italicize and abbreviate if required
   local result = name
   if name ~=  then
       if string.sub(name,1,2) == "" or string.sub(name,-2) == "" then
           -- do nothing if the name already has italic markers at the start or end
       else
           name = string.gsub(name, "", "") -- first remove any internal italics
           local words = mw.text.split(name, " ", true)
           if #words == 4 and cTerms3[words[3]] then
               -- the third word of a four word name is a connecting term
               -- ensure the connecting term isn't italicized
               words[3] = '' .. cTerms3[words[3]] .. ''
               if abbreviated then
               	words[1] = p.abbreviate(words[1])
                   words[2] = p.abbreviate(words[2])
           	end
               result = words[1] .. " " .. words[2] .. " " .. words[3] .. " " .. words[4]
           elseif #words == 3 and cTerms2[words[2]] then
               -- the second word of a three word name is a connecting term
               -- ensure the connecting term isn't italicized
               words[2] = '' .. cTerms2[words[2]] .. ''
               if abbreviated then
               	words[1] = p.abbreviate(words[1])
               end
               result = words[1] .. " " .. words[2] .. " " .. words[3]
           else
               -- not a name as above; only deal with abbreviation
               if abbreviated then
               	if #words > 1 then
               		result = p.abbreviate(words[1])
               		for i = 2, #words-1, 1 do
               			result = result .. " " .. p.abbreviate(words[i])
               		end
               		result = result .. " " .. words[#words]
               	end
               else
               	result = name
               end
           end
           -- deal with any hybrid symbol as it should not be italicized
           result = string.gsub(result, "×", '×')
            -- deal with any parentheses as they should not be italicized
           result = string.gsub(string.gsub(result,"%(",'('),"%)",')')
          -- add outside markup
           if linked then
               if result ~= name then
                   result = "" .. italMarker .. result .. italMarker .. ""
               else
                   result = italMarker .. "" .. name .. "" .. italMarker
               end
           else
               result = italMarker .. result .. italMarker
           end
       end
   end
   return result

end

return p