Difference between revisions of "Module:Webarchive"

From Eat Every Plant
Jump to navigation Jump to search
m (1 revision: Prunus laurocerasus)
 
m (1 revision: Equisetum)
(One intermediate revision by the same user not shown)
Line 1: Line 1:
 
--[[ ----------------------------------
 
--[[ ----------------------------------
  
    Lua module implementing the {{webarchive}} template.  
+
Lua module implementing the {{webarchive}} template.  
  
      A merger of the functionality of three templates: {{wayback}}, {{webcite}} and {{cite archives}}
+
A merger of the functionality of three templates: {{wayback}}, {{webcite}} and {{cite archives}}
 
+
  ]]
+
]]
  
local p = {}
+
require('Module:No globals');
 +
local getArgs = require ('Module:Arguments').getArgs;
 +
local this_page = mw.title.getCurrentTitle();
  
--[[--------------------------< inlineError >-----------------------
 
  
    Critical error. Render output completely in red. Add to tracking category.
+
--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
 +
]]
  
]]
+
local categories = {}; -- category names from ./data
 +
local err_warn_msgs = {}; -- error and warning messages from ./data
 +
local excepted_pages = {};
 +
local prefixes = {}; -- service provider tail string prefixes from ./data
 +
local services = {}; -- archive service provider data from ./data
 +
local uncategorized_namespaces = {}; -- list of namespaces that we should not categorize
 +
local uncategorized_subpages = {}; -- list of subpages that should not be categorized
  
local function inlineError(arg, msg)
+
local ulx = {}; -- Associative array to hold template data
 +
local track = {}; -- Associative array to hold tracking categories
  
  track["Category:Webarchive template errors"] = 1
 
  return '<span style="font-size:100%" class="error citation-comment">Error in webarchive template: Check <code style="color:inherit; border:inherit; padding:inherit;">&#124;' .. arg .. '=</code> value. ' .. msg .. '</span>'
 
  
end
+
--[[--------------------------< G L O B A L  C O N F I G U R A T I O N  S E T T I N G S >--------------------
  
--[[--------------------------< inlineRed >-----------------------
+
]]
  
      Render a text fragment in red, such as a warning as part of the final output.
+
local maxurls = 10; -- Max number of URLs allowed.  
      Add tracking category.
+
local tname = 'Webarchive' -- name of calling template. Change if template rename.
 +
local verifydates = 'yes' -- See documentation. Set "no" to disable.
  
]]
 
  
local function inlineRed(msg, trackmsg)
+
--[[--------------------------< inlineError >-----------------------
  
  if trackmsg == "warning" then
+
Critical error. Render output completely in red. Add to tracking category.
    track["Category:Webarchive template warnings"] = 1
 
  elseif trackmsg == "error" then
 
    track["Category:Webarchive template errors"] = 1
 
  end
 
  
  return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>'
+
]]
  
 +
local function inlineError(arg, msg)
 +
track[categories.error] = 1
 +
return '<span style="font-size:100%" class="error citation-comment">Error in webarchive template: Check <code style="color:inherit; border:inherit; padding:inherit;">&#124;' .. arg .. '=</code> value. ' .. msg .. '</span>'
 
end
 
end
  
--[[--------------------------< trimArg >-----------------------
 
  
    trimArg returns nil if arg is "" while trimArg2 returns 'true' if arg is ""
+
--[[--------------------------< inlineRed >-----------------------
    trimArg2 is for args that might accept an empty value, as an on/off switch like nolink=
+
 
 +
Render a text fragment in red, such as a warning as part of the final output.
 +
Add tracking category.
  
 
  ]]
 
  ]]
  
local function trimArg(arg)
+
local function inlineRed(msg, trackmsg)
  if arg == "" or arg == nil then
+
if trackmsg == "warning" then
    return nil
+
track[categories.warning] = 1;
  else
+
elseif trackmsg == "error" then
    return mw.text.trim(arg)
+
track[categories.error] = 1;
  end
+
end
end
+
 
local function trimArg2(arg)
+
return '<span style="font-size:100%" class="error citation-comment">' .. msg .. '</span>'
  if arg == nil then
 
    return nil
 
  else
 
    return mw.text.trim(arg)
 
  end
 
 
end
 
end
 +
  
 
--[[--------------------------< base62 >-----------------------
 
--[[--------------------------< base62 >-----------------------
  
    Convert base-62 to base-10
+
Convert base-62 to base-10
    Credit: https://de.wikipedia.org/wiki/Modul:Expr  
+
Credit: https://de.wikipedia.org/wiki/Modul:Expr  
  
  ]]
+
]]
  
 
local function base62( value )
 
local function base62( value )
 +
local r = 1 -- default return value is input value is malformed
  
    local r = 1
+
if value:match( "^%w+$" ) then -- value must only be in the set [0-9a-zA-Z]
 +
local n = #value -- number of characters in value
 +
local k = 1
 +
local c
 +
r = 0
 +
for i = n, 1, -1 do -- loop through all characters in value from ls digit to ms digit
 +
c = value:byte( i, i )
 +
if c >= 48 and c <= 57 then -- character is digit 0-9
 +
c = c - 48
 +
elseif c >= 65 and c <= 90 then -- character is ascii a-z
 +
c = c - 55
 +
else -- must be ascii A-Z
 +
c = c - 61
 +
end
 +
r = r + c * k -- accumulate this base62 character's value
 +
k = k * 62 -- bump for next
 +
end -- for i
 +
end
 +
return r
 +
end
  
    if value:match( "^%w+$" ) then
 
        local n = #value
 
        local k = 1
 
        local c
 
        r = 0
 
        for i = n, 1, -1 do
 
            c = value:byte( i, i )
 
            if c >= 48  and  c <= 57 then
 
                c = c - 48
 
            elseif c >= 65  and  c <= 90 then
 
                c = c - 55
 
            elseif c >= 97  and  c <= 122 then
 
                c = c - 61
 
            else    -- How comes?
 
                r = 1
 
                break    -- for i
 
            end
 
            r = r + c * k
 
            k = k * 62
 
        end -- for i
 
    end
 
    return r
 
end
 
  
 
--[[--------------------------< tableLength >-----------------------
 
--[[--------------------------< tableLength >-----------------------
  
      Given a 1-D table, return number of elements
+
Given a 1-D table, return number of elements
  
  ]]
+
]]
  
 
local function tableLength(T)
 
local function tableLength(T)
  local count = 0
+
local count = 0
  for _ in pairs(T) do count = count + 1 end
+
for _ in pairs(T) do count = count + 1 end
  return count
+
return count
 
end
 
end
  
Line 113: Line 113:
 
--[[--------------------------< dateFormat >-----------------------
 
--[[--------------------------< dateFormat >-----------------------
  
    Given a date string, return its format: dmy, mdy, iso, ymd
+
Given a date string, return its format: dmy, mdy, iso, ymd
      If unable to determine return nil
+
If unable to determine return nil
  
  ]]
+
]]
  
 
local function dateFormat(date)
 
local function dateFormat(date)
  
  local dt = {}
+
local patterns = {
  dt.split = {}
+
['iso'] = '(%d%d%d%d)%-%d%d%-%d%d',
 +
['dmy'] = '%d%d? +%a+ +(%d%d%d%d)',
 +
['mdy'] = '%a+ %d%d?, +(%d%d%d%d)',
 +
['ymd'] = '(%d%d%d%d) +%a+ %d%d?', -- TODO: not mos compliant; delete?
 +
};
  
  dt.split = mw.text.split(date, "-")
+
local formy;
  if tableLength(dt.split) == 3 then
 
    if tonumber(dt.split[1]) > 1900 and tonumber(dt.split[1]) < 2200 and tonumber(dt.split[2]) and tonumber(dt.split[3]) then
 
      return "iso"
 
    else
 
      return nil
 
    end
 
  end  
 
  
  dt.split = mw.text.split(date, " ")
+
for k, v in pairs (patterns) do -- loop through the patterns table
  if tableLength(dt.split) == 3 then
+
y = mw.ustring.match (date, v); -- looking for a match
    if tonumber(dt.split[3]) then
+
if y then -- not nil when found
      if tonumber(dt.split[3]) > 1900 and tonumber(dt.split[3]) < 2200 then
+
form = k; -- save that
        if tonumber(dt.split[1]) then
+
break; -- and done
          return "dmy"
+
end
        else
+
end
          return "mdy"
 
        end
 
      else
 
        if tonumber(dt.split[1]) then
 
          if tonumber(dt.split[1]) > 1900 and tonumber(dt.split[1]) < 2200 then
 
            return "ymd"
 
          end
 
        end
 
      end
 
    end
 
  end
 
  return nil
 
  
 +
return (y and (1900 < tonumber(y) and 2200 > tonumber(y))) and form; -- TODO: why 1900? shouldn't that be birth-of-intenet year?  why 2200? shouldn't that be current year?
 
end
 
end
 +
  
 
--[[--------------------------< makeDate >-----------------------
 
--[[--------------------------< makeDate >-----------------------
  
    Given a zero-padded 4-digit year, 2-digit month and 2-digit day, return a full date in df format
+
Given a zero-padded 4-digit year, 2-digit month and 2-digit day, return a full date in df format
    df = mdy, dmy, iso, ymd
+
df = mdy, dmy, iso, ymd
 +
 
 +
on entry, year, month, day are presumed to be correct for the date that they represent; all are required
  
]]
+
]]
  
 
local function makeDate(year, month, day, df)
 
local function makeDate(year, month, day, df)
 +
local format = {
 +
['dmy'] = 'j F Y',
 +
['mdy'] = 'F j, Y',
 +
['ymd'] = 'Y F j',
 +
['iso'] = 'Y-m-d',
 +
};
 +
 +
if not year or '' == year or not month or '' == month or not day or '' == day and format[df] then
 +
return nil;
 +
end
 +
 +
local date = table.concat ({year, month, day}, '-'); -- assemble iso format date
 +
return mw.getContentLanguage():formatDate (format[df], date);
 +
end
  
  if not year or year == "" or not month or month == "" or not day or day == "" then
 
    return nil
 
  end
 
  
  local zmonth = month                                                      -- month with leading 0
+
--[[--------------------------< I S _ V A L I D _ D A T E >----------------------------------------------------
  month = month:match("0*(%d+)")                                            -- month without leading 0
 
  if tonumber(month) < 1 or tonumber(month) > 12 then
 
    return year
 
  end
 
  local nmonth = os.date("%B", os.time{year=2000, month=month, day=1} )    -- month in name form     
 
  if not nmonth then
 
    return year
 
  end
 
  
  local zday = day
+
Returns true if date is after 31 December 1899 (why is 1900 the min year? shouldn't the internet's date-of-birth
  day = zday:match("0*(%d+)")
+
be min year?), not after today's date, and represents a valid date (29 February 2017 is not a valid date). Applies
  if tonumber(day) < 1 or tonumber(day) > 31 then
+
Gregorian leapyear rules.
    if df == "mdy" or df == "dmy" then
 
      return nmonth .. " " .. year
 
    elseif df == "iso" then
 
      return year .. "-" .. zmonth
 
    elseif df == "ymd" then
 
      return year .. " " .. nmonth
 
    else
 
      return nmonth .. " " .. year
 
    end
 
  end                                     
 
  
  if df == "mdy" then
+
all arguments are required
    return nmonth .. " " .. day .. ", " .. year        -- September 1, 2016
 
  elseif df == "dmy" then
 
    return day .. " " .. nmonth .. " " .. year          -- 1 September 2016
 
  elseif df == "iso" then
 
    return year .. "-" .. zmonth .. "-" .. zday        -- 2016-09-01
 
  elseif df == "ymd" then
 
    return year .. " " .. nmonth .. " " .. day          -- 2016 September 1
 
  else
 
    return nmonth .. " " .. day .. ", " .. year        -- September 1, 2016
 
  end
 
  
 +
]]
 +
 +
local function is_valid_date (year, month, day)
 +
local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
 +
local month_length;
 +
local y, m, d;
 +
local today = os.date ('*t'); -- fetch a table of current date parts
 +
 +
if not year or '' == year or not month or '' == month or not day or '' == day then
 +
return false; -- something missing
 +
end
 +
 +
y = tonumber (year);
 +
m = tonumber (month);
 +
d = tonumber (day);
 +
 +
if 1900 > y or today.year < y or 1 > m or 12 < m then -- year and month are within bounds TODO: 1900?
 +
return false;
 +
end
 +
 +
if (2==m) then -- if February
 +
month_length = 28; -- then 28 days unless
 +
if (0==(y%4) and (0~=(y%100) or 0==(y%400))) then -- is a leap year?
 +
month_length = 29; -- if leap year then 29 days in February
 +
end
 +
else
 +
month_length=days_in_month[m];
 +
end
 +
 +
if 1 > d or month_length < d then -- day is within bounds
 +
return false;
 +
end
 +
-- here when date parts represent a valid date
 +
return os.time({['year']=y, ['month']=m, ['day']=d, ['hour']=0}) <= os.time(); -- date at midnight must be less than or equal to current date/time
 
end
 
end
  
Line 208: Line 214:
 
--[[--------------------------< decodeWebciteDate >-----------------------
 
--[[--------------------------< decodeWebciteDate >-----------------------
  
      Given a URI-path to Webcite (eg. /67xHmVFWP) return the encoded date in df format
+
Given a URI-path to Webcite (eg. /67xHmVFWP) return the encoded date in df format
 +
 
 +
]]
  
  ]]
 
 
local function decodeWebciteDate(path, df)
 
local function decodeWebciteDate(path, df)
  
    local dt = {}
+
local dt = {}
    dt.split = {}
 
  
    dt.split = mw.text.split(path, "/")
+
dt = mw.text.split(path, "/")
  
    -- valid URL formats that are not base62
+
-- valid URL formats that are not base62
  
    -- http://www.webcitation.org/query?id=1138911916587475
+
-- http://www.webcitation.org/query?id=1138911916587475
    -- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03
+
-- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03
    -- http://www.webcitation.org/1138911916587475
+
-- http://www.webcitation.org/1138911916587475
    -- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e
+
-- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e
    -- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e
+
-- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e
  
    if mw.ustring.find( dt.split[2], "query", 1, true) or  
+
if mw.ustring.find( dt[2], "query", 1, true) or  
      mw.ustring.find( dt.split[2], "cache", 1, true) or
+
mw.ustring.find( dt[2], "cache", 1, true) or
      mw.ustring.find( dt.split[2], "getfile", 1, true) or
+
mw.ustring.find( dt[2], "getfile", 1, true) or
      tonumber(dt.split[2]) then
+
tonumber(dt[2]) then
      return "query"
+
return "query"
    end
+
end
  
    dt.full = os.date("%Y %m %d", string.sub(string.format("%d", base62(dt.split[2])),1,10) )
+
dt = os.date('*t', string.format("%d", base62(dt[2])):sub(1,10)) -- base62 string -> exponential number -> text -> first 10 characters -> a table of date parts
    dt.split = mw.text.split(dt.full, " ")
 
    dt.year = dt.split[1]
 
    dt.month = dt.split[2]
 
    dt.day = dt.split[3]
 
  
    if not tonumber(dt.year) or not tonumber(dt.month) or not tonumber(dt.day) then
 
      return inlineRed("[Date error] (1)", "error")
 
    end
 
  
    if tonumber(dt.month) > 12 or tonumber(dt.day) > 31 or tonumber(dt.month) < 1 then
+
if not is_valid_date (dt.year, dt.month, dt.day) then
      return inlineRed("[Date error] (2)", "error")
+
return inlineRed(err_warn_msgs.date_err, 'error');
    end
+
end
    if tonumber(dt.year) > tonumber(os.date("%Y")) or tonumber(dt.year) < 1900 then
+
      return inlineRed("[Date error] (3)", "error")
+
return makeDate(dt.year, dt.month, dt.day, df) or inlineRed (err_warn_msgs.date4, 'error');
    end
+
end
 
 
    local fulldate = makeDate(dt.year, dt.month, dt.day, df)
 
    if not fulldate then
 
      return inlineRed("[Date error] (4)", "error")
 
    else
 
      return fulldate
 
    end
 
  
end
 
  
 
--[[--------------------------< decodeWaybackDate >-----------------------
 
--[[--------------------------< decodeWaybackDate >-----------------------
  
 
Given a URI-path to Wayback (eg. /web/20160901010101/http://example.com )
 
Given a URI-path to Wayback (eg. /web/20160901010101/http://example.com )
  or Library of Congress Web Archives (/all/20160901010101/http://example.com)
+
or Library of Congress Web Archives (/all/20160901010101/http://example.com)
  return the formatted date eg. "September 1, 2016" in df format  
+
return the formatted date eg. "September 1, 2016" in df format  
  Handle non-digits in snapshot ID such as "re_" and "-" and "*"
+
Handle non-digits in snapshot ID such as "re_" and "-" and "*"
  
]]
+
]]
  
 
local function decodeWaybackDate(path, df)
 
local function decodeWaybackDate(path, df)
  
    local snapdate, snapdatelong, currdate, fulldate
+
local msg, snapdate;
 +
 
 +
snapdate = path:gsub ('^/all/', ''):gsub ('^/web/', ''):gsub ('^/', ''); -- remove leading '/all/', leading '/web/' or leading '/'
 +
snapdate = snapdate:match ('^[^/]+'); -- get timestamp
 +
if snapdate == "*" then -- eg. /web/*/http.. or /all/*/http..
 +
return "index"
 +
end
 +
 
 +
snapdate = snapdate:gsub ('%a%a_%d?$', ''):gsub ('%-', ''); -- from date, remove any trailing "re_", dashes
 +
 
 +
msg = '';
 +
if snapdate:match ('%*$') then -- a trailing '*' causes calendar display at archive .org
 +
snapdate = snapdate:gsub ('%*$', ''); -- remove so not part of length calc later
 +
-- msg = inlineRed(err_warn_msgs.ts_cal, 'warning'); -- TODO: enable this -- make a message
 +
end
 +
 
 +
if not tonumber(snapdate) then
 +
return inlineRed (err_warn_msgs.date2, 'error');
 +
end
 +
local dlen = string.len(snapdate)
 +
if dlen < 8 then -- we need 8 digits TODO: but shouldn't this be testing for 14 digits?
 +
return inlineRed (err_warn_msgs.date3, 'error');
 +
end
  
    local safe = path
+
local year, month, day = snapdate:match ('(%d%d%d%d)(%d%d)(%d%d)'); -- no need for snapdatelong here
    snapdate = string.gsub(safe, "^/all/", "")                          -- Remove leading "/all/"
 
    safe = snapdate
 
    snapdate = string.gsub(safe, "^/w?e?b?/?", "")                      -- Remove leading "/web/" or "/"
 
    safe = snapdate
 
    local N = mw.text.split(safe, "/")
 
    snapdate = N[1]
 
    if snapdate == "*" then                                            -- eg. /web/*/http.. or /all/*/http..
 
      return "index"
 
    end
 
    safe = snapdate
 
    snapdate = string.gsub(safe, "[a-z][a-z]_[0-9]?$", "")             -- Remove any trailing "re_" from date
 
    safe = snapdate
 
    snapdate = string.gsub(safe, "[-]", "")                             -- Remove dashes from date eg. 2015-01-01
 
    safe = snapdate
 
    snapdate = string.gsub(safe, "[*]$", "")                           -- Remove trailing "*"
 
  
    if not tonumber(snapdate) then
+
if not is_valid_date (year, month, day) then
      return inlineRed("[Date error] (2)", "error")
+
return inlineRed(err_warn_msgs.date_err, 'error');
    end
+
end
    local dlen = string.len(snapdate)
 
    if dlen < 4 then
 
      return inlineRed("[Date error] (3)", "error")
 
    end
 
    if dlen < 14 then
 
      snapdatelong = snapdate .. string.rep("0", 14 - dlen)
 
    else
 
      snapdatelong = snapdate
 
    end
 
    local year = string.sub(snapdatelong, 1, 4)
 
    local month = string.sub(snapdatelong, 5, 6)
 
    local day = string.sub(snapdatelong, 7, 8)
 
    if not tonumber(year) or not tonumber(month) or not tonumber(day) then
 
      return inlineRed("[Date error] (4)", "error")
 
    end
 
    if tonumber(month) > 12 or tonumber(day) > 31 or tonumber(month) < 1 then
 
      return inlineRed("[Date error] (5)", "error")
 
    end
 
    currdate = os.date("%Y")
 
    if tonumber(year) > tonumber(currdate) or tonumber(year) < 1900 then
 
      return inlineRed("[Date error] (6)", "error")
 
    end
 
  
    fulldate = makeDate(year, month, day, df)
+
return makeDate(year, month, day, df) or inlineRed (err_warn_msgs.date7, 'error');
    if not fulldate then
 
      return inlineRed("[Date error] (7)", "error")
 
    else
 
      return fulldate
 
    end
 
  
 +
--[[ snapdate = makeDate(year, month, day, df); -- TODO: enable this
 +
if snapdate then
 +
if 14 == dlen then
 +
return snapdate, msg; -- return date with message if any
 +
else
 +
return snapdate, msg .. inlineRed(err_warn_msgs.ts_len, 'warning'); -- return date with warning message(s)
 +
end
 +
else
 +
return inlineRed (err_warn_msgs.date7, 'error'); -- return error message
 +
end
 +
]]
 
end
 
end
 +
  
 
--[[--------------------------< decodeArchiveisDate >-----------------------
 
--[[--------------------------< decodeArchiveisDate >-----------------------
  
  Given an Archive.is "long link" URI-path (e.g. /2016.08.28-144552/http://example.com)
+
Given an Archive.is "long link" URI-path (e.g. /2016.08.28-144552/http://example.com)
  return the date in df format (e.g. if df = dmy, return 28 August 2016)
+
return the date in df format (e.g. if df = dmy, return 28 August 2016)
  Handles "." and "-" in snapshot date, so 2016.08.28-144552 is same as 20160828144552
+
Handles "." and "-" in snapshot date, so 2016.08.28-144552 is same as 20160828144552
  
  ]]
+
]]
  
 
local function decodeArchiveisDate(path, df)
 
local function decodeArchiveisDate(path, df)
 +
local snapdate
 +
 +
if path:match ('^/%w+$') then -- short form url path is '/' followed by some number of base 62 digits and nothing else
 +
return "short link" -- e.g. http://archive.is/hD1qz
 +
end
  
    local snapdate, snapdatelong, currdate, fulldate
+
snapdate = mw.text.split (path, '/')[2]:gsub('[%.%-]', ''); -- get snapshot date, e.g. 2016.08.28-144552; remove periods and hyphens
  
    local safe = path
+
local dlen = string.len(snapdate)
    local N = mw.text.split(safe, "/")
+
if dlen < 8 then -- we need 8 digits TODO: but shouldn't this be testing for 14 digits?
    safe = N[2]                                                        -- get snapshot date, e.g. 2016.08.28-144552
+
return inlineRed (err_warn_msgs.date3, 'error');
    snapdate = string.gsub(safe, "[%.%-]", "")                         -- remove periods and hyphens
+
end
  
    if not tonumber(snapdate) then                                      -- if not numeric, it is "short link", not date
+
local year, month, day = snapdate:match ('(%d%d%d%d)(%d%d)(%d%d)'); -- no need for snapdatelong here
        return "short link"                                            -- e.g. http://archive.is/hD1qz
 
    end
 
  
    local dlen = string.len(snapdate)
+
if not is_valid_date (year, month, day) then
    if dlen < 4 then
+
return inlineRed(err_warn_msgs.date_err, 'error');
        return inlineRed("[Date error] (3)", "error")
+
end
    end
 
    if dlen < 14 then
 
        snapdatelong = snapdate .. string.rep("0", 14 - dlen)
 
    else
 
        snapdatelong = snapdate
 
    end
 
    local year = string.sub(snapdatelong, 1, 4)
 
    local month = string.sub(snapdatelong, 5, 6)
 
    local day = string.sub(snapdatelong, 7, 8)
 
    if not tonumber(year) or not tonumber(month) or not tonumber(day) then
 
        return inlineRed("[Date error] (4)", "error")
 
    end
 
    if tonumber(month) > 12 or tonumber(day) > 31 or tonumber(month) < 1 then
 
        return inlineRed("[Date error] (5)", "error")
 
    end
 
    currdate = os.date("%Y")
 
    if tonumber(year) > tonumber(currdate) or tonumber(year) < 1900 then
 
        return inlineRed("[Date error] (6)", "error")
 
    end
 
  
    fulldate = makeDate(year, month, day, df)
+
------ return makeDate(year, month, day, df) or inlineRed (err_warn_msgs.date7, 'error');
    if not fulldate then
 
        return inlineRed("[Date error] (7)", "error")
 
    else
 
        return fulldate
 
    end
 
  
 +
snapdate = makeDate(year, month, day, df); -- TODO: enable this
 +
if snapdate then
 +
if 14 == dlen then
 +
return snapdate; -- return date
 +
else
 +
return snapdate, inlineRed(err_warn_msgs.ts_len, 'warning'); -- return date with warning message
 +
end
 +
else
 +
return inlineRed (err_warn_msgs.date7, 'error'); -- return error message
 +
end
 
  end
 
  end
  
  
--[[--------------------------< serviceName >-----------------------
+
--[=[-------------------------< M A K E _ W I K I L I N K >----------------------------------------------------
 +
 
 +
Makes a wikilink; when both link and display text is provided, returns a wikilink in the form [[L|D]]; if only
 +
link is provided, returns a wikilink in the form [[L]]; if neither are provided or link is omitted, returns an
 +
empty string.
  
    Given a domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID
+
]=]
  
  ]]
+
local function make_wikilink (link, display, no_link)
 +
if nil == no_link then
 +
if link and ('' ~= link) then
 +
if display and ('' ~= display) then
 +
return table.concat ({'[[', link, '|', display, ']]'});
 +
else
 +
return table.concat ({'[[', link, ']]'});
 +
end
 +
end
 +
return display or ''; -- link not set so return the display text
  
local function serviceName(host, nolink)
+
else -- no_link
 +
if display and ('' ~= display) then -- if there is display text
 +
return display; -- return that
 +
else
 +
return link or ''; -- return the target article name or empty string
 +
end
 +
end
 +
end
  
  local tracking = "Category:Webarchive template other archives"
 
  
  local bracketopen = "[["
+
--[[--------------------------< serviceName >-----------------------
  local bracketclose = "]]"
 
  if nolink then
 
    bracketopen = ""
 
    bracketclose = ""
 
  end
 
  
  ulx.url1.service = "other"
+
Given a domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID
  ulx.url1.tail = " at " .. ulx.url1.host .. " " .. inlineRed("Error: unknown archive URL")
 
  
  host = string.lower(host)
+
]]
  
  if mw.ustring.find( host, "europarchive.org", 1, true ) then  -- any containing "archive.org" listed before Wayback to avoid disambiguation
+
local function serviceName(host, no_link)
    ulx.url1.tail = " at the " .. bracketopen .. "National Library of Ireland" .. bracketclose
+
local tracking;
  elseif mw.ustring.find( host, "webarchive.org.uk", 1, true ) then
+
local index;
    ulx.url1.tail = " at the " .. bracketopen .. "UK Web Archive" .. bracketclose
+
  elseif mw.ustring.find( host, "archive.org", 1, true ) then
+
host = host:lower():gsub ('^web%.(.+)', '%1'):gsub ('^www%.(.+)', '%1'); -- lowercase, remove web. and www. subdomains
    ulx.url1.service = "wayback"
 
    ulx.url1.tail = " at the " .. bracketopen .. "Wayback Machine" .. bracketclose
 
    tracking = "Category:Webarchive template wayback links"
 
  elseif mw.ustring.find( host, "webcitation.org", 1, true ) then
 
    ulx.url1.service = "webcite"
 
    ulx.url1.tail = " at " .. bracketopen .. "WebCite" .. bracketclose
 
    tracking = "Category:Webarchive template webcite links"
 
  elseif mw.ustring.find( host, "archive.is", 1, true ) then
 
    ulx.url1.service = "archiveis"
 
    ulx.url1.tail = " at " .. bracketopen .. "Archive.is" .. bracketclose
 
    tracking = "Category:Webarchive template archiveis links"
 
  elseif mw.ustring.find( host, "archive.fo", 1, true ) then
 
    ulx.url1.service = "archiveis"
 
    ulx.url1.tail = " at " .. bracketopen .. "Archive.is" .. bracketclose
 
    tracking = "Category:Webarchive template archiveis links"
 
  elseif mw.ustring.find( host, "archive.today", 1, true ) then
 
    ulx.url1.service = "archiveis"
 
    ulx.url1.tail = " at " .. bracketopen .. "Archive.is" .. bracketclose
 
    tracking = "Category:Webarchive template archiveis links"
 
  elseif mw.ustring.find( host, "archive.li", 1, true ) then
 
    ulx.url1.service = "archiveis"
 
    ulx.url1.tail = " at " .. bracketopen .. "Archive.is" .. bracketclose
 
    tracking = "Category:Webarchive template archiveis links"
 
  elseif mw.ustring.find( host, "archive.ec", 1, true ) then
 
    ulx.url1.service = "archiveis"
 
    ulx.url1.tail = " at " .. bracketopen .. "Archive.is" .. bracketclose
 
    tracking = "Category:Webarchive template archiveis links"
 
  elseif mw.ustring.find( host, "archive-it.org", 1, true ) then
 
    ulx.url1.service = "archiveit"
 
    ulx.url1.tail = " at " .. bracketopen .. "Archive-It" .. bracketclose
 
  elseif mw.ustring.find( host, "wikiwix.com", 1, true ) then
 
    ulx.url1.tail = " at Wikiwix"
 
  elseif mw.ustring.find( host, "arquivo.pt", 1, true) then
 
    ulx.url1.tail = " at the " .. "Portuguese Web Archive"
 
  elseif mw.ustring.find( host, "webarchive.loc.gov", 1, true ) then
 
    ulx.url1.service = "locwebarchives"
 
    ulx.url1.tail = " at the " .. bracketopen .. "Library of Congress" .. bracketclose .. " Web Archives"
 
  elseif mw.ustring.find( host, "loc.gov", 1, true ) then
 
    ulx.url1.tail = " at the " .. bracketopen .. "Library of Congress" .. bracketclose
 
  elseif mw.ustring.find( host, "webharvest.gov", 1, true ) then
 
    ulx.url1.tail = " at the " .. bracketopen .. "National Archives and Records Administration" .. bracketclose
 
  elseif mw.ustring.find( host, "bibalex.org", 1, true ) then
 
    ulx.url1.tail = " at " .. "[[Bibliotheca_Alexandrina#Internet_Archive_partnership|Bibliotheca Alexandrina]]"
 
  elseif mw.ustring.find( host, "collectionscanada", 1, true ) then
 
    ulx.url1.tail = " at the " .. "Canadian Government Web Archive"
 
  elseif mw.ustring.find( host, "haw.nsk", 1, true ) then
 
    ulx.url1.tail = " at the " .. "Croatian Web Archive (HAW)"
 
  elseif mw.ustring.find( host, "veebiarhiiv.digar.ee", 1, true ) then
 
    ulx.url1.tail = " at the " .. "Estonian Web Archive"
 
  elseif mw.ustring.find( host, "vefsafn.is", 1, true ) then
 
    ulx.url1.tail = " at the " .. "[[National and University Library of Iceland]]"
 
  elseif mw.ustring.find( host, "proni.gov", 1, true ) then
 
    ulx.url1.tail = " at the " .. bracketopen .. "Public Record Office of Northern Ireland" .. bracketclose
 
  elseif mw.ustring.find( host, "uni-lj.si", 1, true ) then
 
    ulx.url1.tail = " at the " .. "Slovenian Web Archive"
 
  elseif mw.ustring.find( host, "stanford.edu", 1, true ) then
 
    ulx.url1.tail = " at the " .. "[[Stanford University Libraries|Stanford Web Archive]]"
 
  elseif mw.ustring.find( host, "nationalarchives.gov.uk", 1, true ) then
 
    ulx.url1.tail = " at the " .. bracketopen .. "UK Government Web Archive" .. bracketclose
 
  elseif mw.ustring.find( host, "parliament.uk", 1, true ) then
 
    ulx.url1.tail = " at the " .. bracketopen .. "UK Parliament's Web Archive" .. bracketclose
 
  elseif mw.ustring.find( host, "nlb.gov.sg", 1, true ) then
 
    ulx.url1.tail = " at " .. "Web Archive Singapore"
 
  elseif mw.ustring.find( host, "pandora.nla.gov.au", 1, true ) then
 
    ulx.url1.tail = " at " .. bracketopen .. "Pandora Archive" .. bracketclose
 
  elseif mw.ustring.find( host, "perma.cc", 1, true ) then
 
    ulx.url1.tail = " at " .. bracketopen .. "Perma.cc" .. bracketclose
 
  elseif mw.ustring.find( host, "perma-archives.cc", 1, true ) then
 
    ulx.url1.tail = " at " .. bracketopen .. "Perma.cc" .. bracketclose
 
  elseif mw.ustring.find( host, "screenshots.com", 1, true ) then
 
    ulx.url1.tail = " at Screenshots"
 
  elseif mw.ustring.find( host, "freezepage.com", 1, true ) then
 
    ulx.url1.tail = " at Freezepage"
 
  elseif mw.ustring.find( host, "yorku.ca", 1, true ) then
 
    ulx.url1.tail = " at " .. "[[York University Libraries|York University Digital Library]]"
 
  elseif mw.ustring.find( host, "webcache.googleusercontent.com", 1, true ) then
 
    ulx.url1.tail = " at Google Cache"
 
  elseif mw.ustring.find( host, "timetravel.mementoweb.org", 1, true ) then
 
    ulx.url1.tail = " at " .. bracketopen .. "Memento Project" .. bracketclose
 
  elseif mw.ustring.find( host, "langzeitarchivierung.bib-bvb.de", 1, true ) then
 
    ulx.url1.tail = " at " .. bracketopen .. "Bavarian State Library" .. bracketclose
 
  elseif mw.ustring.find( host, "webrecorder.io", 1, true ) then
 
    ulx.url1.tail = " at " .. bracketopen .. "webrecorder.io" .. bracketclose
 
  elseif mw.ustring.find( host, "webarchive.bac-lac.gc.ca", 1, true ) then
 
    ulx.url1.tail = " at " .. bracketopen .. "Library and Archives Canada" .. bracketclose
 
  else
 
    tracking = "Category:Webarchive template unknown archives"
 
  end
 
  
  track[tracking] = 1
+
if services[host] then
 +
index = host;
 +
else
 +
for k, _ in pairs (services) do
 +
if host:find ('%f[%a]'..k:gsub ('([%.%-])', '%%%1')) then
 +
index = k;
 +
break;
 +
end
 +
end
 +
end
 +
 +
if index then
 +
local out = {''}; -- empty string in [1] so that concatenated result has leading single space
 +
ulx.url1.service = services[index][4] or 'other';
 +
tracking = services[index][5] or categories.other;
 +
-- build tail string
 +
if false == services[index][1] then -- select prefix
 +
table.insert (out, prefixes.at);
 +
elseif true == services[index][1] then
 +
table.insert (out, prefixes.atthe);
 +
else
 +
table.insert (out, services[index][1]);
 +
end
 +
 +
table.insert (out, make_wikilink (services[index][2], services[index][3], no_link)); -- add article wikilink
 +
if services[index][6] then -- add tail postfix if it exists
 +
table.insert (out, services[index][6]);
 +
end
 +
 +
ulx.url1.tail = table.concat (out, ' '); -- put it all together; result has leading space character
  
 +
else -- here when unknown archive
 +
ulx.url1.service = 'other';
 +
tracking = categories.unknown;
 +
ulx.url1.tail = table.concat ({'', prefixes.at, host, inlineRed (err_warn_msgs.unknown_url)}, ' '); -- TODO: call to inlineRed() does not specify 'error' or 'warning'; should it?
 +
end
 +
 +
track[tracking] = 1
 
end
 
end
 +
  
 
--[[--------------------------< parseExtraArgs >-----------------------
 
--[[--------------------------< parseExtraArgs >-----------------------
  
    Parse numbered arguments starting at 2, such as url2..url10, date2..date10, title2..title10
+
Parse numbered arguments starting at 2, such as url2..url10, date2..date10, title2..title10
      For example: {{webarchive |url=.. |url4=.. |url7=..}}
+
For example: {{webarchive |url=.. |url4=.. |url7=..}}
        Three url arguments not in numeric sequence (1..4..7).  
+
Three url arguments not in numeric sequence (1..4..7).  
        Function only processes arguments numbered 2 or greater (in this case 4 and 7)
+
Function only processes arguments numbered 2 or greater (in this case 4 and 7)
        It creates numeric sequenced table entries like:
+
It creates numeric sequenced table entries like:
          urlx.url2.url = <argument value for url4>
+
urlx.url2.url = <argument value for url4>
          urlx.url3.url = <argument value for url7>
+
urlx.url3.url = <argument value for url7>
      Returns the number of URL arguments found numbered 2 or greater (in this case returns "2")
+
Returns the number of URL arguments found numbered 2 or greater (in this case returns "2")
  
 
  ]]
 
  ]]
  
local function parseExtraArgs()
+
local function parseExtraArgs(args)
  
  local i, j, argurl, argurl2, argdate, argtitle
+
local i, j, argurl, argurl2, argdate, argtitle
  
  j = 2
+
j = 2
  for i = 2, maxurls do
+
for i = 2, maxurls do
    argurl = "url" .. i
+
argurl = "url" .. i
    if trimArg(args[argurl]) then
+
if args[argurl] then
      argurl2 = "url" .. j
+
argurl2 = "url" .. j
      ulx[argurl2] = {}
+
ulx[argurl2] = {}
      ulx[argurl2]["url"] = args[argurl]
+
ulx[argurl2]["url"] = args[argurl]
      argdate = "date" .. j
+
argdate = "date" .. j
      if trimArg(args[argdate]) then
+
if args[argdate] then
        ulx[argurl2]["date"] = args[argdate]
+
ulx[argurl2]["date"] = args[argdate]
      else
+
else
        ulx[argurl2]["date"] = inlineRed("[Date missing]", "warning")
+
ulx[argurl2]["date"] = inlineRed (err_warn_msgs.date_miss, 'warning');
      end
+
end
      argtitle = "title" .. j
+
      if trimArg(args[argtitle]) then
+
argtitle = "title" .. j
        ulx[argurl2]["title"] = args[argtitle]
+
if args[argtitle] then
      else
+
ulx[argurl2]["title"] = args[argtitle]
        ulx[argurl2]["title"] = nil
+
else
      end
+
ulx[argurl2]["title"] = nil
      j = j + 1
+
end
    end
+
j = j + 1
  end
+
end
 +
end
  
  if j == 2 then
+
if j == 2 then
    return 0
+
return 0
  else
+
else
    return j - 2
+
return j - 2
  end
+
end
 +
end
  
end
 
  
 
--[[--------------------------< comma >-----------------------
 
--[[--------------------------< comma >-----------------------
  
    Given a date string, return "," if it's MDY  
+
Given a date string, return "," if it's MDY  
  
  ]]
+
]]
  
 
local function comma(date)
 
local function comma(date)
  local N = mw.text.split(date, " ")
+
return (date and date:match ('%a+ +%d%d?(,) +%d%d%d%d')) or '';
  local O = mw.text.split(N[1], "-") -- for ISO
 
  if O[1] == "index" then return "" end
 
  if not tonumber(O[1]) then
 
    return ","
 
  else
 
    return ""
 
  end
 
 
end
 
end
 +
  
 
--[[--------------------------< createTracking >-----------------------
 
--[[--------------------------< createTracking >-----------------------
  
    Return data in track[] ie. tracking categories
+
Return data in track[] ie. tracking categories
  
  ]]
+
]]
  
 
local function createTracking()
 
local function createTracking()
 +
if not excepted_pages[this_page.fullText] then -- namespace:title/fragment is allowed to be categorized (typically this module's / template's testcases page(s))
 +
if uncategorized_namespaces[this_page.nsText] then -- TODO: enable this chunk
 +
return ''; -- this page not to be categorized so return empty string
 +
end
 +
for _,v in ipairs (uncategorized_subpages) do -- cycle through page name patterns
 +
if this_page.text:match (v) then -- test page name against each pattern
 +
return ''; -- this subpage type not to be categorized so return empty string
 +
end
 +
end
 +
end
  
  local sand = ""
+
local out = {};
  if tableLength(track) > 0 then                      
+
if tableLength(track) > 0 then
    for key,_ in pairs(track) do
+
for key, _ in pairs(track) do -- loop through table
      sand = sand .. "[[" .. key .. "]]"
+
table.insert (out, make_wikilink (key)); -- and convert category names to links
    end
+
end
  end
+
end
  return sand
+
return table.concat (out); -- concat into one big string; empty string if table is empty
  
 
end
 
end
 +
  
 
--[[--------------------------< createRendering >-----------------------
 
--[[--------------------------< createRendering >-----------------------
  
    Return a rendering of the data in ulx[][]
+
Return a rendering of the data in ulx[][]
 +
 
 +
TODO: when archive date is '*' ('index') leading archive extlink should be [<url> Archive index] instead of
 +
[<url> Archived] index; code to support this has been added but is commented out for the time being; look for TODO1
  
  ]]
+
]]
  
 
local function createRendering()
 
local function createRendering()
  
    local sand, displayheader, displayfield
+
local displayfield
 +
local out = {};
 +
 +
local period1 = ''; -- For backwards compat with {{wayback}}
 +
local period2 = '.';
 +
 
 +
if 'none' == ulx.url1.format then -- For {{wayback}}, {{webcite}}
 +
table.insert (out, '['); -- open extlink markup
 +
table.insert (out, ulx.url1.url); -- add url
 +
 
 +
if ulx.url1.title then
 +
table.insert (out, ' ') -- the required space
 +
table.insert (out, ulx.url1.title) -- the title
 +
table.insert (out, ']'); -- close extlink markup
 +
table.insert (out, ulx.url1.tail); -- tail text
 +
if ulx.url1.date then
 +
table.insert (out, '&#32;('); -- open date text; TODO: why the html entity?
 +
table.insert (out, 'index' == ulx.url1.date and 'archive' or 'archived'); -- add text
 +
table.insert (out, ' '); -- insert a space
 +
table.insert (out, ulx.url1.date); -- add date
 +
table.insert (out, ')'); -- close date text
 +
end
 +
else -- no title
 +
table.insert (out, ' Archived]') -- close extlink markup TODO1: remove this line
 +
--TODO1 table.insert (out, 'index' == ulx.url1.date and ' Archive index]' or ' Archived]'); -- begin link label-- use this line for correct link label when date is 'index'
 +
if ulx.url1.date then
 +
if 'wayback' == ulx.url1.service then
 +
period1 = '.';
 +
period2 = '';
 +
end
 +
table.insert (out, table.concat ({' ', ulx.url1.date})); -- add date TODO1: remove this line
 +
--[[TODO1 if 'index' ~= ulx.url1.date then -- TODO1: add this line -- use this if for correct link label when date is 'index'
 +
table.insert (out, ulx.url1.date); -- add date TODO1: add this line -- use this if for correct link label when date is 'index'
 +
end -- TODO1: add this line -- use this if for correct link label when date is 'index'
 +
]] table.insert (out, comma(ulx.url1.date)); -- add ',' if date format is mdy
 +
table.insert (out, ulx.url1.tail); -- add tail text
 +
table.insert (out, period1); -- terminate
 +
else -- no date
 +
table.insert (out, ulx.url1.tail); -- add tail text
 +
end
 +
end
 +
 
 +
if 0 < ulx.url1.extraurls then -- For multiple archive URLs
 +
local tot = ulx.url1.extraurls + 1
 +
table.insert (out, period2); -- terminate first url
 +
table.insert (out, ' Additional archives: '); -- add header text
 +
 
 +
for i=2, tot do -- loop through the additionals
 +
local index = table.concat ({'url', i}); -- make an index
 +
displayfield = ulx[index]['title'] and 'title' or 'date'; -- choose display text
 +
table.insert (out, '['); -- open extlink markup
 +
table.insert (out, ulx[index]['url']); -- add the url
 +
table.insert (out, ' '); -- the required space
 +
table.insert (out, ulx[index][displayfield]); -- add the label
 +
table.insert (out, ']'); -- close extlink markup
 +
table.insert (out, i==tot and '.' or ', '); -- add terminator
 +
end
 +
end
 +
return table.concat (out); -- make a big string and done
  
    local period1 = ""  -- For backwards compat with {{wayback}}
+
else -- For {{cite archives}}
    local period2 = "."                                                           
+
if 'addlarchives' == ulx.url1.format then -- Multiple archive services
 
+
table.insert (out, 'Additional archives: '); -- add header text
    local indexstr = "archived"
+
else -- Multiple pages from the same archive
    if ulx.url1.date == "index" then
+
table.insert (out, 'Additional pages archived&nbsp;on '); -- add header text
      indexstr = "archive"
+
table.insert (out, ulx.url1.date); -- add date to header text
    end
+
table.insert (out, ': '); -- close header text
                                                                                          -- For {{wayback}}, {{webcite}}
+
end
  
    if ulx.url1.format == "none" then                                                   
+
local tot = ulx.url1.extraurls + 1;
      if not ulx.url1.title and not ulx.url1.date then                                    -- No title. No date
+
for i=1, tot do -- loop through the additionals
        sand = "[" .. ulx.url1.url .. " Archived]" .. ulx.url1.tail
+
local index = table.concat ({'url', i}); -- make an index
      elseif not ulx.url1.title and ulx.url1.date then                                    -- No title. Date.
+
table.insert (out, '['); -- open extlink markup
        if ulx.url1.service == "wayback" then
+
table.insert (out, ulx[index]['url']); -- add url
          period1 = "."
+
table.insert (out, ' '); -- add required space
          period2 = ""
 
        end
 
        sand = "[" .. ulx.url1.url .. " Archived] " .. ulx.url1.date .. comma(ulx.url1.date) .. ulx.url1.tail .. period1
 
      elseif ulx.url1.title and not ulx.url1.date then                                    -- Title. No date.
 
        sand = "[" .. ulx.url1.url .. " " .. ulx.url1.title .. "]" .. ulx.url1.tail
 
      elseif ulx.url1.title and ulx.url1.date then                                        -- Title. Date.
 
        sand = "[" .. ulx.url1.url .. " " .. ulx.url1.title .. "]" .. ulx.url1.tail .. "&#32;(" .. indexstr .. " " .. ulx.url1.date .. ")"
 
      else
 
        return nil
 
      end
 
      if ulx.url1.extraurls > 0 then                                                      -- For multiple archive URLs
 
        local tot = ulx.url1.extraurls + 1
 
        sand = sand .. period2 .. " Additional archives: "
 
        for i=2,tot do
 
          local indx = "url" .. i
 
          if ulx[indx]["title"] then
 
            displayfield = "title"
 
          else
 
            displayfield = "date"
 
          end
 
          sand = sand .. "[" .. ulx[indx]["url"] .. " " .. ulx[indx][displayfield] .. "]"
 
          if i == tot then
 
            sand = sand .. "."
 
          else
 
            sand = sand .. ", "
 
          end
 
        end
 
      else
 
        return sand 
 
      end
 
      return sand
 
                                                                                          -- For {{cite archives}}
 
  
    else                                                                 
+
displayfield = ulx[index]['title'];
      if ulx.url1.format == "addlarchives" then                          -- Multiple archive services
+
if 'addlarchives' == ulx.url1.format then
        displayheader = "Additional archives: "
+
if not displayfield then  
      else                                                                -- Multiple pages from the same archive
+
displayfield = ulx[index]['date']
        displayheader = "Additional pages archived&nbsp;on " .. ulx.url1.date .. ": "
+
end
      end
+
else -- must be addlpages
      local tot = 1 + ulx.url1.extraurls
+
if not displayfield then  
      local sand = displayheader
+
displayfield = table.concat ({'Page ', i});
      for i=1,tot do
+
end
        local indx = "url" .. i
+
end
        displayfield = ulx[indx]["title"]
+
table.insert (out, displayfield); -- add title, date, page label text
        if ulx.url1.format == "addlarchives" then
+
table.insert (out, ']'); -- close extlink markup
          if not displayfield then  
+
table.insert (out, (i==tot and '.' or ', ')); -- add terminator
            displayfield = ulx[indx]["date"]
+
end
          end
+
return table.concat (out); -- make a big string and done
        else
+
end
          if not displayfield then  
 
            displayfield = "Page " .. i
 
          end
 
        end
 
        sand = sand .. "[" .. ulx[indx]["url"] .. " " .. displayfield .. "]"
 
        if i == tot then
 
          sand = sand .. "."
 
        else
 
          sand = sand .. ", "
 
        end
 
      end
 
      return sand
 
    end
 
 
end
 
end
  
function p.webarchive(frame)
 
  args = frame.args
 
  if (args[1]==nil) and (args["url"]==nil) then          -- if no argument provided than check parent template/module args
 
    args = frame:getParent().args
 
  end
 
 
  local tname = "Webarchive"                              -- name of calling template. Change if template rename.
 
  ulx = {}                                                -- Associative array to hold template data
 
  track = {}                                              -- Associative array to hold tracking categories
 
  maxurls = 10                                            -- Max number of URLs allowed.
 
  local verifydates = "yes"                              -- See documentation. Set "no" to disable.
 
  
                                                          -- URL argument (first)
+
--[[--------------------------< W E B A R C H I V E >----------------------------------------------------------
 +
 
 +
template entry point
  
  local url1 = trimArg(args.url) or trimArg(args.url1)         
+
TODO: deprecate empty |nolink= as a 'positive' assertion that archive service is not to be linked
  if not url1 then
 
    return inlineError("url", "Empty.") .. createTracking()
 
  end
 
  if mw.ustring.find( url1, "https://web.http", 1, true ) then    -- track bug
 
    track["Category:Webarchive template errors"] = 1
 
    return inlineError("url", "https://web.http") .. createTracking()
 
  end
 
  if url1 == "https://web.archive.org/http:/" then                -- track bug
 
    track["Category:Webarchive template errors"] = 1
 
    return inlineError("url", "Invalid URL") .. createTracking()
 
  end
 
  
  ulx.url1 = {}
+
]]
  ulx.url1.url = url1
 
  if not mw.ustring.find( mw.ustring.lower(url1), "^http") then
 
    if not mw.ustring.find( url1, "^//") then
 
      ulx.url1.url = "http://" .. url1
 
    end
 
  end
 
  local uri1 = mw.uri.new(ulx.url1.url)
 
  ulx.url1.host = uri1.host
 
  ulx.url1.extraurls = parseExtraArgs()
 
  
                                                          -- Nolink argument
+
local function webarchive(frame)
 +
local args = getArgs (frame, { -- TODO: delete this assignment
 +
valueFunc = function (key, value) -- this code so that we can detect and handle the oddity that is |nolink=
 +
if 'nolink' == key then -- |nolink= is 'set' when present with or without assigned value; TODO: deprecate this peculiar use
 +
return value; -- don't trim; we don't care (right now) what the value is except when nil and we can't trim nil
 +
elseif value then -- all other values: if the value is not nil
 +
value = mw.text.trim (value); -- trim whitespace
 +
if '' ~= value then -- empty string when value was only whitespace or was empty
 +
return value; -- return non-nil, non-empty values
 +
end
 +
end
 +
return nil; -- value was nil, empty, or contained only whitespace
 +
end -- end of valueFunc
 +
});
  
  local nolink = trimArg2(args.nolink)
+
-- local args = getArgs (frame); -- TODO: replace the above with this
 +
 +
local data = mw.loadData (table.concat ({ -- make a data module name; sandbox or live
 +
'Module:Webarchive/data',
 +
frame:getTitle():find('sandbox', 1, true) and '/sandbox' or '' -- this instance is ./sandbox then append /sandbox
 +
}));
 +
categories = data.categories; -- fill in the forward declarations
 +
err_warn_msgs = data.err_warn_msgs;
 +
excepted_pages = data.excepted_pages;
 +
prefixes = data.prefixes;
 +
services = data.services;
 +
uncategorized_namespaces = data.uncategorized_namespaces;
 +
uncategorized_subpages = data.uncategorized_subpages;
 +
  
  serviceName(uri1.host, nolink)
+
local date, format, msg, uri, url;
 +
 +
verifydates = 'yes' == verifydates; -- convert to boolean
  
                                                          -- Date argument
+
if args.url and args.url1 then -- URL argument (first)
 +
return inlineError("url", "Conflicting |url= and |url1=.") .. createTracking();
 +
end
 +
 +
url = args.url or args.url1;
 +
 +
if not url then
 +
return inlineError("url", "Empty.") .. createTracking()
 +
end
 +
if mw.ustring.find( url, "https://web.http", 1, true ) then -- track bug - TODO: IAbot bug; not known if the bug has been fixed; deferred
 +
track[categories.error] = 1;
 +
return inlineError("url", "https://web.http") .. createTracking()
 +
end
 +
if url == "https://web.archive.org/http:/" then -- track bug - TODO: IAbot bug; not known if the bug has been fixed; deferred
 +
track[categories.error] = 1;
 +
return inlineError("url", "Invalid URL") .. createTracking()
 +
end
  
  local date = trimArg(args.date) or trimArg(args.date1)
+
ulx.url1 = {}
  if date == "*" and (ulx.url1.service == "wayback" or ulx.url1.service == "locwebarchives") then
+
ulx.url1.url = url
    date = "index"
+
if not (url:lower():find ('^http') or url:find ('^//')) then -- TODO: is this a good idea? isn't it better to simply throw an error when url is malformed ...
  elseif date and (ulx.url1.service == "wayback" or ulx.url1.service == "locwebarchives") and verifydates == "yes" then
+
ulx.url1.url = 'http://' .. url -- ... rather than apply this 'fix' that might not fix anything?
    local ldf = dateFormat(date)
+
end
    if ldf then
 
      local udate = decodeWaybackDate( uri1.path, ldf )
 
      if udate ~= date then
 
        date = udate .. inlineRed("<sup>[Date mismatch]</sup>", "warning")     
 
      end
 
    end
 
  elseif date and ulx.url1.service == "webcite" and verifydates == "yes" then
 
    local ldf = dateFormat(date)
 
    if ldf then
 
      local udate = decodeWebciteDate( uri1.path, ldf )
 
      if udate == "query" then -- skip
 
      elseif udate ~= date then
 
        date = udate .. inlineRed("<sup>[Date mismatch]</sup>", "warning")     
 
      end
 
    end
 
  elseif date and ulx.url1.service == "archiveis" and verifydates == "yes" then
 
    local ldf = dateFormat(date)
 
    if ldf then
 
        local udate = decodeArchiveisDate( uri1.path, ldf )
 
        if udate == "short link" then -- skip
 
        elseif udate ~= date then
 
          date = udate .. inlineRed("<sup>[Date mismatch]</sup>", "warning")     
 
        end
 
    end
 
  elseif not date and (ulx.url1.service == "wayback" or ulx.url1.service == "locwebarchives") then
 
    date = decodeWaybackDate( uri1.path, "iso" )
 
    if not date then
 
      date = inlineRed("[Date error] (1)", "error")
 
    end
 
  elseif not date and ulx.url1.service == "webcite" then
 
    date = decodeWebciteDate( uri1.path, "iso" )
 
    if date == "query" then
 
      date = inlineRed("[Date missing]", "warning")
 
    elseif not date then
 
      date = inlineRed("[Date error] (1)", "error")
 
    end
 
  elseif not date and ulx.url1.service == "archiveis" then
 
    date = decodeArchiveisDate( uri1.path, "iso" )
 
    if date == "short link" then
 
        date = inlineRed("[Date missing]", "warning")
 
    elseif not date then
 
        date = inlineRed("[Date error] (1)", "error")
 
    end
 
  elseif not date then
 
    date = inlineRed("[Date missing]", "warning")
 
  end
 
  ulx.url1.date = date
 
  
                                                          -- Format argument
+
ulx.url1.extraurls = parseExtraArgs(args)
  
  local format = trimArg(args.format)
+
uri = mw.uri.new (ulx.url1.url); -- get a table of uri parts from this url
  if not format then
+
serviceName(uri.host, args.nolink)
    format = "none"
 
  else
 
    if format == "addlpages" then
 
      if not ulx.url1.date then
 
        format = "none"
 
      end
 
    elseif format == "addlarchives" then
 
      format = "addlarchives"
 
    else
 
      format = "none"
 
    end
 
  end
 
  ulx.url1.format = format
 
  
                                                          -- Title argument  
+
if args.date and args.date1 then -- Date argument
 +
return inlineError("date", "Conflicting |date= and |date1=.") .. createTracking();
 +
end
 +
 +
date = args.date or args.date1
  
  local title = trimArg(args.title) or trimArg(args.title1)
+
if 'wayback' == ulx.url1.service or 'locwebarchives' == ulx.url1.service then
  ulx.url1.title = title
+
if '*' == date then -- TODO: why is this not compared to url date?
 
+
date = 'index';
 +
end
 +
if date then
 +
if verifydates then
 +
local ldf = dateFormat(date)
 +
if ldf then
 +
local udate, msg = decodeWaybackDate( uri.path, ldf ) -- get the url date in the same format as date in |date=; 'index' when wayback date is *
 +
if udate ~= date then
 +
date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning') .. (msg or ''); -- mismatch us url date; add message if there is one
 +
else
 +
date = date .. (msg or ''); -- add message if there is one
 +
end
 +
end
 +
end
 +
else -- no |date=
 +
date, msg = decodeWaybackDate( uri.path, "iso" )
 +
if not date then
 +
date = inlineRed (err_warn_msgs.date1, 'error'); -- TODO: change this type of message so that it identifies url as source of error?
 +
else
 +
date = date .. (msg or ''); -- add message if there is one
 +
end
 +
end
  
  local rend = createRendering()
+
elseif 'webcite' == ulx.url1.service then
  if not rend then
+
if date then
    rend = '<span style="font-size:100%" class="error citation-comment">Error in [[:Template:' .. tname .. ']]: Unknown problem. Please report on template talk page.</span>'
+
if verifydates then
    track["Category:Webarchive template errors"] = 1
+
local ldf = dateFormat(date)
  end
+
if ldf then
 +
local udate = decodeWebciteDate( uri.path, ldf ) -- get the url date in the same format as date in |date=
 +
if 'query' ~= udate then -- skip if query
 +
if udate ~= date then
 +
date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning');
 +
end
 +
end
 +
end
 +
end
 +
else
 +
date = decodeWebciteDate( uri.path, "iso" )
 +
if date == "query" then
 +
date = inlineRed (err_warn_msgs.date_miss, 'warning');
 +
elseif not date then
 +
date = inlineRed (err_warn_msgs.date1, 'error');
 +
end
 +
end
  
  return rend .. createTracking()
+
elseif 'archiveis' == ulx.url1.service then
 +
if date then
 +
if verifydates then
 +
local ldf = dateFormat(date)
 +
if ldf then
 +
local udate, msg = decodeArchiveisDate( uri.path, ldf ) -- get the url date in the same format as date in |date=
 +
if 'short link' ~= udate then -- skip if short link
 +
if udate ~= date then
 +
date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning') .. (msg or ''); -- mismatch: use url date; add message if there is one
 +
else
 +
date = date .. (msg or ''); -- add message if there is one
 +
end
 +
end
 +
end
 +
end
 +
else -- no |date=
 +
date, msg = decodeArchiveisDate( uri.path, "iso" )
 +
if date == "short link" then
 +
date = inlineRed (err_warn_msgs.date_miss, 'warning');
 +
elseif not date then
 +
date = inlineRed (err_warn_msgs.date1, 'error');
 +
else
 +
date = date .. (msg or ''); -- add message if there is one
 +
end
 +
end
 +
 +
else -- some other service
 +
if not date then
 +
date = inlineRed (err_warn_msgs.date_miss, 'warning');
 +
end
 +
end
  
 +
ulx.url1.date = date
 +
 +
format = args.format; -- Format argument
 +
 +
if not format then
 +
format = "none"
 +
else
 +
if format == "addlpages" then
 +
if not ulx.url1.date then
 +
format = "none"
 +
end
 +
elseif format == "addlarchives" then
 +
format = "addlarchives"
 +
else
 +
format = "none"
 +
end
 +
end
 +
ulx.url1.format = format
 +
 +
if args.title and args.title1 then -- Title argument
 +
return inlineError("title", "Conflicting |title= and |title1=.") .. createTracking();
 +
end
 +
 +
ulx.url1.title = args.title or args.title1;
 +
 +
local rend = createRendering()
 +
if not rend then
 +
rend = '<span style="font-size:100%" class="error citation-comment">Error in [[:Template:' .. tname .. ']]: Unknown problem. Please report on template talk page.</span>'
 +
track[categories.error] = 1;
 +
end
 +
 +
return rend .. createTracking()
 
end
 
end
  
return p
+
 
 +
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
 +
]]
 +
 
 +
return {webarchive = webarchive};

Revision as of 15:04, 13 September 2018

--[[ ----------------------------------

Lua module implementing the Error in webarchive template: Check |url= value. Empty. template.

A merger of the functionality of three templates: Template:Wayback, Template:Webcite and Template:Cite archives

]]

require('Module:No globals'); local getArgs = require ('Module:Arguments').getArgs; local this_page = mw.title.getCurrentTitle();


--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- ]]

local categories = {}; -- category names from ./data local err_warn_msgs = {}; -- error and warning messages from ./data local excepted_pages = {}; local prefixes = {}; -- service provider tail string prefixes from ./data local services = {}; -- archive service provider data from ./data local uncategorized_namespaces = {}; -- list of namespaces that we should not categorize local uncategorized_subpages = {}; -- list of subpages that should not be categorized

local ulx = {}; -- Associative array to hold template data local track = {}; -- Associative array to hold tracking categories


--[[--------------------------< G L O B A L C O N F I G U R A T I O N S E T T I N G S >--------------------

]]

local maxurls = 10; -- Max number of URLs allowed. local tname = 'Webarchive' -- name of calling template. Change if template rename. local verifydates = 'yes' -- See documentation. Set "no" to disable.


--[[--------------------------< inlineError >-----------------------

Critical error. Render output completely in red. Add to tracking category.

]]

local function inlineError(arg, msg) track[categories.error] = 1 return 'Error in webarchive template: Check |' .. arg .. '= value. ' .. msg .. '' end


--[[--------------------------< inlineRed >-----------------------

Render a text fragment in red, such as a warning as part of the final output. Add tracking category.

]]

local function inlineRed(msg, trackmsg) if trackmsg == "warning" then track[categories.warning] = 1; elseif trackmsg == "error" then track[categories.error] = 1; end

return '' .. msg .. '' end


--[[--------------------------< base62 >-----------------------

Convert base-62 to base-10 Credit: https://de.wikipedia.org/wiki/Modul:Expr

]]

local function base62( value ) local r = 1 -- default return value is input value is malformed

if value:match( "^%w+$" ) then -- value must only be in the set [0-9a-zA-Z] local n = #value -- number of characters in value local k = 1 local c r = 0 for i = n, 1, -1 do -- loop through all characters in value from ls digit to ms digit c = value:byte( i, i ) if c >= 48 and c <= 57 then -- character is digit 0-9 c = c - 48 elseif c >= 65 and c <= 90 then -- character is ascii a-z c = c - 55 else -- must be ascii A-Z c = c - 61 end r = r + c * k -- accumulate this base62 character's value k = k * 62 -- bump for next end -- for i end return r end


--[[--------------------------< tableLength >-----------------------

Given a 1-D table, return number of elements

]]

local function tableLength(T) local count = 0 for _ in pairs(T) do count = count + 1 end return count end


--[[--------------------------< dateFormat >-----------------------

Given a date string, return its format: dmy, mdy, iso, ymd If unable to determine return nil

]]

local function dateFormat(date)

local patterns = { ['iso'] = '(%d%d%d%d)%-%d%d%-%d%d', ['dmy'] = '%d%d? +%a+ +(%d%d%d%d)', ['mdy'] = '%a+ %d%d?, +(%d%d%d%d)', ['ymd'] = '(%d%d%d%d) +%a+ %d%d?', -- TODO: not mos compliant; delete? };

local form, y;

for k, v in pairs (patterns) do -- loop through the patterns table y = mw.ustring.match (date, v); -- looking for a match if y then -- not nil when found form = k; -- save that break; -- and done end end

return (y and (1900 < tonumber(y) and 2200 > tonumber(y))) and form; -- TODO: why 1900? shouldn't that be birth-of-intenet year? why 2200? shouldn't that be current year? end


--[[--------------------------< makeDate >-----------------------

Given a zero-padded 4-digit year, 2-digit month and 2-digit day, return a full date in df format df = mdy, dmy, iso, ymd

on entry, year, month, day are presumed to be correct for the date that they represent; all are required

]]

local function makeDate(year, month, day, df) local format = { ['dmy'] = 'j F Y', ['mdy'] = 'F j, Y', ['ymd'] = 'Y F j', ['iso'] = 'Y-m-d', };

if not year or == year or not month or == month or not day or == day and format[df] then return nil; end

local date = table.concat ({year, month, day}, '-'); -- assemble iso format date return mw.getContentLanguage():formatDate (format[df], date); end


--[[--------------------------< I S _ V A L I D _ D A T E >----------------------------------------------------

Returns true if date is after 31 December 1899 (why is 1900 the min year? shouldn't the internet's date-of-birth be min year?), not after today's date, and represents a valid date (29 February 2017 is not a valid date). Applies Gregorian leapyear rules.

all arguments are required

]]

local function is_valid_date (year, month, day) local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; local month_length; local y, m, d; local today = os.date ('*t'); -- fetch a table of current date parts

if not year or == year or not month or == month or not day or == day then return false; -- something missing end

y = tonumber (year); m = tonumber (month); d = tonumber (day);

if 1900 > y or today.year < y or 1 > m or 12 < m then -- year and month are within bounds TODO: 1900? return false; end

if (2==m) then -- if February month_length = 28; -- then 28 days unless if (0==(y%4) and (0~=(y%100) or 0==(y%400))) then -- is a leap year? month_length = 29; -- if leap year then 29 days in February end else month_length=days_in_month[m]; end

if 1 > d or month_length < d then -- day is within bounds return false; end -- here when date parts represent a valid date return os.time({['year']=y, ['month']=m, ['day']=d, ['hour']=0}) <= os.time(); -- date at midnight must be less than or equal to current date/time end


--[[--------------------------< decodeWebciteDate >-----------------------

Given a URI-path to Webcite (eg. /67xHmVFWP) return the encoded date in df format

]]

local function decodeWebciteDate(path, df)

local dt = {}

dt = mw.text.split(path, "/")

-- valid URL formats that are not base62

-- http://www.webcitation.org/query?id=1138911916587475 -- http://www.webcitation.org/query?url=http..&date=2012-06-01+21:40:03 -- http://www.webcitation.org/1138911916587475 -- http://www.webcitation.org/cache/73e53dd1f16cf8c5da298418d2a6e452870cf50e -- http://www.webcitation.org/getfile.php?fileid=1c46e791d68e89e12d0c2532cc3cf629b8bc8c8e

if mw.ustring.find( dt[2], "query", 1, true) or mw.ustring.find( dt[2], "cache", 1, true) or mw.ustring.find( dt[2], "getfile", 1, true) or tonumber(dt[2]) then return "query" end

dt = os.date('*t', string.format("%d", base62(dt[2])):sub(1,10)) -- base62 string -> exponential number -> text -> first 10 characters -> a table of date parts


if not is_valid_date (dt.year, dt.month, dt.day) then return inlineRed(err_warn_msgs.date_err, 'error'); end

return makeDate(dt.year, dt.month, dt.day, df) or inlineRed (err_warn_msgs.date4, 'error'); end


--[[--------------------------< decodeWaybackDate >-----------------------

Given a URI-path to Wayback (eg. /web/20160901010101/http://example.com ) or Library of Congress Web Archives (/all/20160901010101/http://example.com) return the formatted date eg. "September 1, 2016" in df format Handle non-digits in snapshot ID such as "re_" and "-" and "*"

]]

local function decodeWaybackDate(path, df)

local msg, snapdate;

snapdate = path:gsub ('^/all/', ):gsub ('^/web/', ):gsub ('^/', ); -- remove leading '/all/', leading '/web/' or leading '/' snapdate = snapdate:match ('^[^/]+'); -- get timestamp if snapdate == "*" then -- eg. /web/*/http.. or /all/*/http.. return "index" end

snapdate = snapdate:gsub ('%a%a_%d?$', ):gsub ('%-', ); -- from date, remove any trailing "re_", dashes

msg = ; if snapdate:match ('%*$') then -- a trailing '*' causes calendar display at archive .org snapdate = snapdate:gsub ('%*$', ); -- remove so not part of length calc later -- msg = inlineRed(err_warn_msgs.ts_cal, 'warning'); -- TODO: enable this -- make a message end

if not tonumber(snapdate) then return inlineRed (err_warn_msgs.date2, 'error'); end local dlen = string.len(snapdate) if dlen < 8 then -- we need 8 digits TODO: but shouldn't this be testing for 14 digits? return inlineRed (err_warn_msgs.date3, 'error'); end

local year, month, day = snapdate:match ('(%d%d%d%d)(%d%d)(%d%d)'); -- no need for snapdatelong here

if not is_valid_date (year, month, day) then return inlineRed(err_warn_msgs.date_err, 'error'); end

return makeDate(year, month, day, df) or inlineRed (err_warn_msgs.date7, 'error');

--[[ snapdate = makeDate(year, month, day, df); -- TODO: enable this if snapdate then if 14 == dlen then return snapdate, msg; -- return date with message if any else return snapdate, msg .. inlineRed(err_warn_msgs.ts_len, 'warning'); -- return date with warning message(s) end else return inlineRed (err_warn_msgs.date7, 'error'); -- return error message end ]] end


--[[--------------------------< decodeArchiveisDate >-----------------------

Given an Archive.is "long link" URI-path (e.g. /2016.08.28-144552/http://example.com) return the date in df format (e.g. if df = dmy, return 28 August 2016) Handles "." and "-" in snapshot date, so 2016.08.28-144552 is same as 20160828144552

]]

local function decodeArchiveisDate(path, df) local snapdate

if path:match ('^/%w+$') then -- short form url path is '/' followed by some number of base 62 digits and nothing else return "short link" -- e.g. http://archive.is/hD1qz end

snapdate = mw.text.split (path, '/')[2]:gsub('[%.%-]', ); -- get snapshot date, e.g. 2016.08.28-144552; remove periods and hyphens

local dlen = string.len(snapdate) if dlen < 8 then -- we need 8 digits TODO: but shouldn't this be testing for 14 digits? return inlineRed (err_warn_msgs.date3, 'error'); end

local year, month, day = snapdate:match ('(%d%d%d%d)(%d%d)(%d%d)'); -- no need for snapdatelong here

if not is_valid_date (year, month, day) then return inlineRed(err_warn_msgs.date_err, 'error'); end


return makeDate(year, month, day, df) or inlineRed (err_warn_msgs.date7, 'error');

snapdate = makeDate(year, month, day, df); -- TODO: enable this if snapdate then if 14 == dlen then return snapdate; -- return date else return snapdate, inlineRed(err_warn_msgs.ts_len, 'warning'); -- return date with warning message end else return inlineRed (err_warn_msgs.date7, 'error'); -- return error message end

end


--[=[-------------------------< M A K E _ W I K I L I N K >----------------------------------------------------

Makes a wikilink; when both link and display text is provided, returns a wikilink in the form D; if only link is provided, returns a wikilink in the form L; if neither are provided or link is omitted, returns an empty string.

]=]

local function make_wikilink (link, display, no_link) if nil == no_link then if link and ( ~= link) then if display and ( ~= display) then return table.concat ({'', display, ''}); else return table.concat ({'', link, ''}); end end return display or ; -- link not set so return the display text

else -- no_link if display and ( ~= display) then -- if there is display text return display; -- return that else return link or ; -- return the target article name or empty string end end end


--[[--------------------------< serviceName >-----------------------

Given a domain extracted by mw.uri.new() (eg. web.archive.org) set tail string and service ID

]]

local function serviceName(host, no_link) local tracking; local index;

host = host:lower():gsub ('^web%.(.+)', '%1'):gsub ('^www%.(.+)', '%1'); -- lowercase, remove web. and www. subdomains

if services[host] then index = host; else for k, _ in pairs (services) do if host:find ('%f[%a]'..k:gsub ('([%.%-])', '%%%1')) then index = k; break; end end end

if index then local out = {}; -- empty string in [1] so that concatenated result has leading single space ulx.url1.service = services[index][4] or 'other'; tracking = services[index][5] or categories.other; -- build tail string if false == services[index][1] then -- select prefix table.insert (out, prefixes.at); elseif true == services[index][1] then table.insert (out, prefixes.atthe); else table.insert (out, services[index][1]); end

table.insert (out, make_wikilink (services[index][2], services[index][3], no_link)); -- add article wikilink if services[index][6] then -- add tail postfix if it exists table.insert (out, services[index][6]); end

ulx.url1.tail = table.concat (out, ' '); -- put it all together; result has leading space character

else -- here when unknown archive ulx.url1.service = 'other'; tracking = categories.unknown; ulx.url1.tail = table.concat ({, prefixes.at, host, inlineRed (err_warn_msgs.unknown_url)}, ' '); -- TODO: call to inlineRed() does not specify 'error' or 'warning'; should it? end

track[tracking] = 1 end


--[[--------------------------< parseExtraArgs >-----------------------

Parse numbered arguments starting at 2, such as url2..url10, date2..date10, title2..title10 For example: Archived [Date missing] at .. [Error: unknown archive URL]. Additional archives: [.. [Date missing]], [.. [Date missing]]. Three url arguments not in numeric sequence (1..4..7). Function only processes arguments numbered 2 or greater (in this case 4 and 7) It creates numeric sequenced table entries like: urlx.url2.url = <argument value for url4> urlx.url3.url = <argument value for url7> Returns the number of URL arguments found numbered 2 or greater (in this case returns "2")

]]

local function parseExtraArgs(args)

local i, j, argurl, argurl2, argdate, argtitle

j = 2 for i = 2, maxurls do argurl = "url" .. i if args[argurl] then argurl2 = "url" .. j ulx[argurl2] = {} ulx[argurl2]["url"] = args[argurl] argdate = "date" .. j if args[argdate] then ulx[argurl2]["date"] = args[argdate] else ulx[argurl2]["date"] = inlineRed (err_warn_msgs.date_miss, 'warning'); end

argtitle = "title" .. j if args[argtitle] then ulx[argurl2]["title"] = args[argtitle] else ulx[argurl2]["title"] = nil end j = j + 1 end end

if j == 2 then return 0 else return j - 2 end end


--[[--------------------------< comma >-----------------------

Given a date string, return "," if it's MDY

]]

local function comma(date) return (date and date:match ('%a+ +%d%d?(,) +%d%d%d%d')) or ; end


--[[--------------------------< createTracking >-----------------------

Return data in track[] ie. tracking categories

]]

local function createTracking() if not excepted_pages[this_page.fullText] then -- namespace:title/fragment is allowed to be categorized (typically this module's / template's testcases page(s)) if uncategorized_namespaces[this_page.nsText] then -- TODO: enable this chunk return ; -- this page not to be categorized so return empty string end for _,v in ipairs (uncategorized_subpages) do -- cycle through page name patterns if this_page.text:match (v) then -- test page name against each pattern return ; -- this subpage type not to be categorized so return empty string end end end

local out = {}; if tableLength(track) > 0 then for key, _ in pairs(track) do -- loop through table table.insert (out, make_wikilink (key)); -- and convert category names to links end end return table.concat (out); -- concat into one big string; empty string if table is empty

end


--[[--------------------------< createRendering >-----------------------

Return a rendering of the data in ulx[][]

TODO: when archive date is '*' ('index') leading archive extlink should be [<url> Archive index] instead of [<url> Archived] index; code to support this has been added but is commented out for the time being; look for TODO1

]]

local function createRendering()

local displayfield local out = {};

local period1 = ; -- For backwards compat with Template:Wayback local period2 = '.';

if 'none' == ulx.url1.format then -- For Template:Wayback, Template:Webcite table.insert (out, '['); -- open extlink markup table.insert (out, ulx.url1.url); -- add url

if ulx.url1.title then table.insert (out, ' ') -- the required space table.insert (out, ulx.url1.title) -- the title table.insert (out, ']'); -- close extlink markup table.insert (out, ulx.url1.tail); -- tail text if ulx.url1.date then table.insert (out, ' ('); -- open date text; TODO: why the html entity? table.insert (out, 'index' == ulx.url1.date and 'archive' or 'archived'); -- add text table.insert (out, ' '); -- insert a space table.insert (out, ulx.url1.date); -- add date table.insert (out, ')'); -- close date text end else -- no title table.insert (out, ' Archived]') -- close extlink markup TODO1: remove this line --TODO1 table.insert (out, 'index' == ulx.url1.date and ' Archive index]' or ' Archived]'); -- begin link label-- use this line for correct link label when date is 'index' if ulx.url1.date then if 'wayback' == ulx.url1.service then period1 = '.'; period2 = ; end table.insert (out, table.concat ({' ', ulx.url1.date})); -- add date TODO1: remove this line --[[TODO1 if 'index' ~= ulx.url1.date then -- TODO1: add this line -- use this if for correct link label when date is 'index' table.insert (out, ulx.url1.date); -- add date TODO1: add this line -- use this if for correct link label when date is 'index' end -- TODO1: add this line -- use this if for correct link label when date is 'index' ]] table.insert (out, comma(ulx.url1.date)); -- add ',' if date format is mdy table.insert (out, ulx.url1.tail); -- add tail text table.insert (out, period1); -- terminate else -- no date table.insert (out, ulx.url1.tail); -- add tail text end end

if 0 < ulx.url1.extraurls then -- For multiple archive URLs local tot = ulx.url1.extraurls + 1 table.insert (out, period2); -- terminate first url table.insert (out, ' Additional archives: '); -- add header text

for i=2, tot do -- loop through the additionals local index = table.concat ({'url', i}); -- make an index displayfield = ulx[index]['title'] and 'title' or 'date'; -- choose display text table.insert (out, '['); -- open extlink markup table.insert (out, ulx[index]['url']); -- add the url table.insert (out, ' '); -- the required space table.insert (out, ulx[index][displayfield]); -- add the label table.insert (out, ']'); -- close extlink markup table.insert (out, i==tot and '.' or ', '); -- add terminator end end return table.concat (out); -- make a big string and done

else -- For Template:Cite archives if 'addlarchives' == ulx.url1.format then -- Multiple archive services table.insert (out, 'Additional archives: '); -- add header text else -- Multiple pages from the same archive table.insert (out, 'Additional pages archived on '); -- add header text table.insert (out, ulx.url1.date); -- add date to header text table.insert (out, ': '); -- close header text end

local tot = ulx.url1.extraurls + 1; for i=1, tot do -- loop through the additionals local index = table.concat ({'url', i}); -- make an index table.insert (out, '['); -- open extlink markup table.insert (out, ulx[index]['url']); -- add url table.insert (out, ' '); -- add required space

displayfield = ulx[index]['title']; if 'addlarchives' == ulx.url1.format then if not displayfield then displayfield = ulx[index]['date'] end else -- must be addlpages if not displayfield then displayfield = table.concat ({'Page ', i}); end end table.insert (out, displayfield); -- add title, date, page label text table.insert (out, ']'); -- close extlink markup table.insert (out, (i==tot and '.' or ', ')); -- add terminator end return table.concat (out); -- make a big string and done end end


--[[--------------------------< W E B A R C H I V E >----------------------------------------------------------

template entry point

TODO: deprecate empty |nolink= as a 'positive' assertion that archive service is not to be linked

]]

local function webarchive(frame) local args = getArgs (frame, { -- TODO: delete this assignment valueFunc = function (key, value) -- this code so that we can detect and handle the oddity that is |nolink= if 'nolink' == key then -- |nolink= is 'set' when present with or without assigned value; TODO: deprecate this peculiar use return value; -- don't trim; we don't care (right now) what the value is except when nil and we can't trim nil elseif value then -- all other values: if the value is not nil value = mw.text.trim (value); -- trim whitespace if ~= value then -- empty string when value was only whitespace or was empty return value; -- return non-nil, non-empty values end end return nil; -- value was nil, empty, or contained only whitespace end -- end of valueFunc });

-- local args = getArgs (frame); -- TODO: replace the above with this

local data = mw.loadData (table.concat ({ -- make a data module name; sandbox or live 'Module:Webarchive/data', frame:getTitle():find('sandbox', 1, true) and '/sandbox' or -- this instance is ./sandbox then append /sandbox })); categories = data.categories; -- fill in the forward declarations err_warn_msgs = data.err_warn_msgs; excepted_pages = data.excepted_pages; prefixes = data.prefixes; services = data.services; uncategorized_namespaces = data.uncategorized_namespaces; uncategorized_subpages = data.uncategorized_subpages;


local date, format, msg, uri, url;

verifydates = 'yes' == verifydates; -- convert to boolean

if args.url and args.url1 then -- URL argument (first) return inlineError("url", "Conflicting |url= and |url1=.") .. createTracking(); end

url = args.url or args.url1;

if not url then return inlineError("url", "Empty.") .. createTracking() end if mw.ustring.find( url, "https://web.http", 1, true ) then -- track bug - TODO: IAbot bug; not known if the bug has been fixed; deferred track[categories.error] = 1; return inlineError("url", "https://web.http") .. createTracking() end if url == "https://web.archive.org/http:/" then -- track bug - TODO: IAbot bug; not known if the bug has been fixed; deferred track[categories.error] = 1; return inlineError("url", "Invalid URL") .. createTracking() end

ulx.url1 = {} ulx.url1.url = url if not (url:lower():find ('^http') or url:find ('^//')) then -- TODO: is this a good idea? isn't it better to simply throw an error when url is malformed ... ulx.url1.url = 'http://' .. url -- ... rather than apply this 'fix' that might not fix anything? end

ulx.url1.extraurls = parseExtraArgs(args)

uri = mw.uri.new (ulx.url1.url); -- get a table of uri parts from this url serviceName(uri.host, args.nolink)

if args.date and args.date1 then -- Date argument return inlineError("date", "Conflicting |date= and |date1=.") .. createTracking(); end

date = args.date or args.date1

if 'wayback' == ulx.url1.service or 'locwebarchives' == ulx.url1.service then if '*' == date then -- TODO: why is this not compared to url date? date = 'index'; end if date then if verifydates then local ldf = dateFormat(date) if ldf then local udate, msg = decodeWaybackDate( uri.path, ldf ) -- get the url date in the same format as date in |date=; 'index' when wayback date is * if udate ~= date then date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning') .. (msg or ); -- mismatch us url date; add message if there is one else date = date .. (msg or ); -- add message if there is one end end end else -- no |date= date, msg = decodeWaybackDate( uri.path, "iso" ) if not date then date = inlineRed (err_warn_msgs.date1, 'error'); -- TODO: change this type of message so that it identifies url as source of error? else date = date .. (msg or ); -- add message if there is one end end

elseif 'webcite' == ulx.url1.service then if date then if verifydates then local ldf = dateFormat(date) if ldf then local udate = decodeWebciteDate( uri.path, ldf ) -- get the url date in the same format as date in |date= if 'query' ~= udate then -- skip if query if udate ~= date then date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning'); end end end end else date = decodeWebciteDate( uri.path, "iso" ) if date == "query" then date = inlineRed (err_warn_msgs.date_miss, 'warning'); elseif not date then date = inlineRed (err_warn_msgs.date1, 'error'); end end

elseif 'archiveis' == ulx.url1.service then if date then if verifydates then local ldf = dateFormat(date) if ldf then local udate, msg = decodeArchiveisDate( uri.path, ldf ) -- get the url date in the same format as date in |date= if 'short link' ~= udate then -- skip if short link if udate ~= date then date = udate .. inlineRed (err_warn_msgs.mismatch, 'warning') .. (msg or ); -- mismatch: use url date; add message if there is one else date = date .. (msg or ); -- add message if there is one end end end end else -- no |date= date, msg = decodeArchiveisDate( uri.path, "iso" ) if date == "short link" then date = inlineRed (err_warn_msgs.date_miss, 'warning'); elseif not date then date = inlineRed (err_warn_msgs.date1, 'error'); else date = date .. (msg or ); -- add message if there is one end end

else -- some other service if not date then date = inlineRed (err_warn_msgs.date_miss, 'warning'); end end

ulx.url1.date = date

format = args.format; -- Format argument

if not format then format = "none" else if format == "addlpages" then if not ulx.url1.date then format = "none" end elseif format == "addlarchives" then format = "addlarchives" else format = "none" end end ulx.url1.format = format

if args.title and args.title1 then -- Title argument return inlineError("title", "Conflicting |title= and |title1=.") .. createTracking(); end

ulx.url1.title = args.title or args.title1;

local rend = createRendering() if not rend then rend = 'Error in Template:' .. tname .. ': Unknown problem. Please report on template talk page.' track[categories.error] = 1; end

return rend .. createTracking() end


--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------ ]]

return {webarchive = webarchive};