Module:Country extract
From ERPC Wiki Archive
Documentation for this module may be created at Module:Country extract/doc
local p = {} -- -- to enable us to replicate the current functioning of CountryAbbr and CountryAbbr2 -- We need to deal with -- 1 alternative names ISO 3166 should do that -- 2 {{<name>}} -- 3 [ [<name>] ] -- 4 [ [<name>|<junk>] ] -- 5 [ [image:flag of <country>.[svg|gif|png|jpg]|\d+px] ] -- function p.extractCountry(frame) local string= mw.ustring.toNFC (frame.args[1]) local match=nil; match = mw.ustring.match(string, "Flag of ([^\.]*)") if (match) then return match end -- () for Cocos (Keeling) Islands -- ' For People's -- . for U.S. etc. match = mw.ustring.match(string, "(%u[%a\(\)\.' -]+)") if (match == "Image") then string = mw.ustring.gsub(string, match, "") match = mw.ustring.match(string, "[\|\[](%u[%a\(\)\.' -]+)") end if (match == "20px") then string = mw.ustring.gsub(string, match, "") match = mw.ustring.match(string, "\[(%u[%a\(\)\.' -]+)") end if (match) then return match end return string end --[[ ]] function p.extractSubdivision(frame) local string= mw.ustring.toNFC (frame.args[1]) local match=nil; -- Needed for {{flag|Greenland}}, match = mw.ustring.match(string, "Flag of ([^\.]*)") if (match) then return match end -- . needed for Washington D.C. -- ' for cote d'azur -- leading ' for 'Aden -- ‘ and trailing ' for Şan‘ā' match = mw.ustring.match(string, "('?[%u][%a'‘ \.\,-]+[%a\.'\d]+)") if (match) then return match end return string end local countries = { ["Andorra"] = "AD", ["United Arab Emirates"] = "AE", ["UAE"] = "AE", ["Afghanistan"] = "AF", ["Antigua and Barbuda"] = "AG", ["Anguilla"] = "AI", ["Albania"] = "AL", ["Armenia"] = "AM", ["Angola"] = "AO", ["Antarctica"] = "AQ", ["Argentina"] = "AR", ["American Samoa"] = "AS", ["Austria"] = "AT", ["Australia"] = "AU", ["Aruba"] = "AW", ["Åland Islands"] = "AX", ["Aland Islands"] = "AX", ["Azerbaijan"] = "AZ", ["Nagorno-Karabakh"] = "AZ", -- disputed ["Nagorno-Karabakh Republic"] = "AZ", -- disputed ["Bosnia and Herzegovina"] = "BA", ["Bosnia"] = "BA", ["Barbados"] = "BB", ["Bangladesh"] = "BD", ["Belgium"] = "BE", ["Burkina Faso"] = "BF", ["Bulgaria"] = "BG", ["Bahrain"] = "BH", ["Burundi"] = "BI", ["Benin"] = "BJ", ["Saint Barthélemy"] = "BL", ["Saint Barthelemy"] = "BL", ["St. Barthélemy"] = "BL", ["St. Barthelemy"] = "BL", ["St Barthélemy"] = "BL", ["St Barthelemy"] = "BL", ["Bermuda"] = "BM", ["Brunei"] = "BN", ["Brunei Darussalam"] = "BN", ["Bolivia"] = "BO", ["Bolivia, Plurinational State of"] = "BO", ["Bonaire, Sint Eustatius and Saba"] = "BQ", ["Brazil"] = "BR", ["Bahamas"] = "BS", ["The Bahamas"] = "BS", ["the Bahamas"] = "BS", ["Bhutan"] = "BT", ["Bouvet Island"] = "BV", ["Botswana"] = "BW", ["Belarus"] = "BY", ["Belize"] = "BZ", ["Canada"] = "CA", ["Cocos (Keeling) Islands"] = "CC", ["Cocos Islands"] = "CC", ["Keeling Islands"] = "CC", ["Congo, the Democratic Republic of the"] = "CD", ["Democratic Republic of the Congo"] = "CD", ["DR Congo"] = "CD", ["Congo-Kinshasa"] = "CD", ["DROC"] = "CD", ["DRC"] = "CD", ["Central African Republic"] = "CF", ["CAR"] = "CF", ["Congo"] = "CG", ["Republic of the Congo"] = "CG", ["Congo Republic"] = "CG", ["Congo-Brazzaville"] = "CG", ["West Congo"] = "CG", ["Switzerland"] = "CH", ["Côte d'Ivoire"] = "CI", ["Ivory Coast"] = "CI", ["Cook Islands"] = "CK", ["Chile"] = "CL", ["Cameroon"] = "CM", ["China"] = "CN", ["PRC"] = "CN", ["People's Republic of China"] = "CN", ["Colombia"] = "CO", ["Costa Rica"] = "CR", ["Cuba"] = "CU", ["Cape Verde"] = "CV", ["Curaçao"] = "CW", ["Curacao"] = "CW", ["Christmas Island"] = "CX", ["Cyprus"] = "CY", ["Republic of Cyprus"] = "CY", ["Northern Cyprus"] = "CY", -- disputed ["Turkish Republic of Northern Cyprus"] = "CY", -- disputed ["Czech Republic"] = "CZ", ["Germany"] = "DE", ["Djibouti"] = "DJ", ["Denmark"] = "DK", ["Dominica"] = "DM", ["Dominican Republic"] = "DO", ["Algeria"] = "DZ", ["Ecuador"] = "EC", ["Estonia"] = "EE", ["Egypt"] = "EG", ["Western Sahara"] = "EH", ["Eritrea"] = "ER", ["Spain"] = "ES", ["Ethiopia"] = "ET", ["Finland"] = "FI", ["Fiji"] = "FJ", ["Falkland Islands (Malvinas)"] = "FK", ["Falklands"] = "FK", ["Falkland Islands"] = "FK", ["Malvinas"] = "FK", ["Malvinas Islands"] = "FK", ["Micronesia, Federated States of"] = "FM", ["Micronesia"] = "FM", ["Faroe Islands"] = "FO", ["France"] = "FR", ["Gabon"] = "GA", ["United Kingdom"] = "GB", ["the United Kingdom"] = "GB", ["U. K."] = "GB", ["U.K."] = "GB", ["the U.K."] = "GB", ["UK"] = "GB", ["the UK"] = "GB", ["Great Britain"] = "GB", ["England"] = "GB-ENG", ["Scotland"] = "GB-SCT", ["Wales"] = "GB-WLS", ["Northern Ireland"] = "GB-NIR", ["England and Wales"] = "GB-EAW", ["Grenada"] = "GD", ["Georgia"] = "GE", ["South Ossetia"] = "GE", -- disputed ["Tskhinvali Region"] = "GE", -- disputed ["Abkhazia"] = "GE-AB", -- disputed ["Republic of Abkhazia"] = "GE-AB", -- disputed ["French Guiana"] = "GF", ["Guernsey"] = "GG", ["Ghana"] = "GH", ["Gibraltar"] = "GI", ["Greenland"] = "GL", ["Gambia"] = "GM", ["Guinea"] = "GN", ["Guadeloupe"] = "GP", ["Equatorial Guinea"] = "GQ", ["Greece"] = "GR", ["South Georgia and the South Sandwich Islands"] = "GS", ["Guatemala"] = "GT", ["Guam"] = "GU", ["Guinea-Bissau"] = "GW", ["Guyana"] = "GY", ["Hong Kong"] = "HK", ["Heard Island and McDonald Islands"] = "HM", ["Heard and McDonald Islands"] = "HM", ["Honduras"] = "HN", ["Croatia"] = "HR", ["Haiti"] = "HT", ["Hungary"] = "HU", ["Indonesia"] = "ID", ["Ireland"] = "IE", ["Israel"] = "IL", ["Isle of Man"] = "IM", ["India"] = "IN", ["British Indian Ocean Territory"] = "IO", ["Iraq"] = "IQ", ["Iran, Islamic Republic of"] = "IR", ["Iran"] = "IR", ["Iceland"] = "IS", ["Italy"] = "IT", ["Jersey"] = "JE", ["Jamaica"] = "JM", ["Jordan"] = "JO", ["Japan"] = "JP", ["Kenya"] = "KE", ["Kyrgyzstan"] = "KG", ["Cambodia"] = "KH", ["Kiribati"] = "KI", ["Comoros"] = "KM", ["The Comoros"] = "KM", ["the Comoros"] = "KM", ["Saint Kitts and Nevis"] = "KN", ["St. Kitts and Nevis"] = "KN", ["St Kitts and Nevis"] = "KN", ["Korea, Democratic People's Republic of"] = "KP", ["Democratic People's Republic of Korea"] = "KP", ["North Korea"] = "KP", ["Korea, Republic of"] = "KR", ["Republic of Korea"] = "KR", ["South Korea"] = "KR", ["Kuwait"] = "KW", ["Cayman Islands"] = "KY", ["The Cayman Islands"] = "KY", ["the Cayman Islands"] = "KY", ["Kazakhstan"] = "KZ", ["Lao People's Democratic Republic"] = "LA", ["Laos"] = "LA", ["Lao"] = "LA", ["Lebanon"] = "LB", ["Saint Lucia"] = "LC", ["St. Lucia"] = "LC", ["St Lucia"] = "LC", ["Liechtenstein"] = "LI", ["Sri Lanka"] = "LK", ["Liberia"] = "LR", ["Lesotho"] = "LS", ["Lithuania"] = "LT", ["Luxembourg"] = "LU", ["Latvia"] = "LV", ["Libyan Arab Jamahiriya"] = "LY", ["Libya"] = "LY", ["Morocco"] = "MA", ["Monaco"] = "MC", ["Moldova, Republic of"] = "MD", ["Moldova"] = "MD", ["Transnistria"] = "MD", -- disputed ["Montenegro"] = "ME", ["Saint Martin (French part)"] = "MF", ["Saint Martin (France)"] = "MF", ["Collectivity of Saint Martin"] = "MF", ["Collectivity of St. Martin"] = "MF", ["Collectivity of St Martin"] = "MF", ["Saint-Martin"] = "MF", ["Madagascar"] = "MG", ["Marshall Islands"] = "MH", ["Macedonia"] = "MK", ["Mali"] = "ML", ["Myanmar"] = "MM", ["Burma"] = "MM", ["Mongolia"] = "MN", ["Macau"] = "MO", ["Macao"] = "MO", ["Northern Mariana Islands"] = "MP", ["Martinique"] = "MQ", ["Mauritania"] = "MR", ["Montserrat"] = "MS", ["Malta"] = "MT", ["Mauritius"] = "MU", ["Maldives"] = "MV", ["Malawi"] = "MW", ["Mexico"] = "MX", ["Malaysia"] = "MY", ["Mozambique"] = "MZ", ["Namibia"] = "NA", ["New Caledonia"] = "NC", ["Niger"] = "NE", ["Norfolk Island"] = "NF", ["Nigeria"] = "NG", ["Nicaragua"] = "NI", ["Netherlands"] = "NL", ["The Netherlands"] = "NL", ["the Netherlands"] = "NL", ["Norway"] = "NO", ["Nepal"] = "NP", ["Nauru"] = "NR", ["Niue"] = "NU", ["New Zealand"] = "NZ", ["Aotearoa"] = "NZ", ["Oman"] = "OM", ["Panama"] = "PA", ["Peru"] = "PE", ["French Polynesia"] = "PF", ["Papua New Guinea"] = "PG", ["Philippines"] = "PH", ["The Philippines"] = "PH", ["the Philippines"] = "PH", ["Pakistan"] = "PK", ["Poland"] = "PL", ["Saint Pierre and Miquelon"] = "PM", ["St. Pierre and Miquelon"] = "PM", ["St Pierre and Miquelon"] = "PM", ["Pitcairn"] = "PN", ["Puerto Rico"] = "PR", ["State of Palestine"] = "PS", ["Palestine"] = "PS", ["Portugal"] = "PT", ["Palau"] = "PW", ["Paraguay"] = "PY", ["Qatar"] = "QA", ["Réunion"] = "RE", ["Reunion"] = "RE", ["Romania"] = "RO", ["Serbia"] = "RS", ["Kosovo"] = "RS", -- disputed ["Russian Federation"] = "RU", ["Russia"] = "RU", ["Rwanda"] = "RW", ["Saudi Arabia"] = "SA", ["Solomon Islands"] = "SB", ["Seychelles"] = "SC", ["Sudan"] = "SD", ["Sweden"] = "SE", ["Singapore"] = "SG", ["Saint Helena, Ascension and Tristan da Cunha"] = "SH", ["Slovenia"] = "SI", ["Svalbard and Jan Mayen"] = "SJ", ["Slovakia"] = "SK", ["Sierra Leone"] = "SL", ["San Marino"] = "SM", ["Senegal"] = "SN", ["Somalia"] = "SO", ["Somaliland"] = "SO", -- disputed ["Suriname"] = "SR", ["South Sudan"] = "SS", ["São Tomé and Príncipe"] = "ST", ["Sao Tome and Principe"] = "ST", ["El Salvador"] = "SV", ["Sint Maarten"] = "SX", ["Saint Martin (Dutch part)"] = "SX", ["Saint Martin (Netherlands)"] = "SX", ["Syrian Arab Republic"] = "SY", ["Syria"] = "SY", ["Swaziland"] = "SZ", ["Turks and Caicos Islands"] = "TC", ["Chad"] = "TD", ["French Southern Territories"] = "TF", ["Togo"] = "TG", ["Thailand"] = "TH", ["Tajikistan"] = "TJ", ["Tokelau"] = "TK", ["Timor-Leste"] = "TL", ["East Timor"] = "TL", ["Turkmenistan"] = "TM", ["Tunisia"] = "TN", ["Tonga"] = "TO", ["Turkey"] = "TR", ["Trinidad and Tobago"] = "TT", ["Tuvalu"] = "TV", ["Taiwan"] = "TW", ["Republic of China"] = "TW", ["Tanzania, United Republic of"] = "TZ", ["Tanzania"] = "TZ", ["Ukraine"] = "UA", ["The Ukraine"] = "UA", ["the Ukraine"] = "UA", ["Uganda"] = "UG", ["United States Minor Outlying Islands"] = "UM", ["United States"] = "US", ["the United States"] = "US", ["United States of America"] = "US", ["the United States of America"] = "US", ["U. S."] = "US", ["U.S."] = "US", ["the U.S."] = "US", ["US"] = "US", ["the US"] = "US", ["Uruguay"] = "UY", ["Uzbekistan"] = "UZ", ["Holy See (Vatican City State)"] = "VA", ["Holy See"] = "VA", ["Vatican City State"] = "VA", ["Vatican City"] = "VA", ["Saint Vincent and the Grenadines"] = "VC", ["St. Vincent and the Grenadines"] = "VC", ["St Vincent and the Grenadines"] = "VC", ["Venezuela, Bolivarian Republic of"] = "VE", ["Venezuela"] = "VE", ["Virgin Islands, British"] = "VG", ["British Virgin Islands"] = "VG", ["UK Virgin Islands"] = "VG", ["Virgin Islands of the United Kingdom"] = "VG", ["Virgin Islands of the UK"] = "VG", ["Virgin Islands, U. S."] = "VI", ["Virgin Islands, U.S."] = "VI", ["Virgin Islands, US"] = "VI", ["United States Virgin Islands"] = "VI", ["American Virgin Islands"] = "VI", ["U. S. Virgin Islands"] = "VI", ["U.S. Virgin Islands"] = "VI", ["US Virgin Islands"] = "VI", ["Virgin Islands of the United States"] = "VI", ["Virgin Islands of the U.S."] = "VI", ["Virgin Islands of the US"] = "VI", ["Vietnam"] = "VN", ["Viet Nam"] = "VN", ["Vanuatu"] = "VU", ["Wallis and Futuna"] = "WF", ["Samoa"] = "WS", ["Yemen"] = "YE", ["Mayotte"] = "YT", ["South Africa"] = "ZA", ["Zambia"] = "ZM", ["Zimbabwe"] = "ZW", } function p.main(frame) local country = mw.ustring.gsub(mw.ustring.toNFC(frame.args[1] or ''), '^%s*(.-)%s*$', '%1') local match1 = '' if country and country ~= '' then match1 = mw.ustring.match(country, "Flag of the ([^\.]*)") or mw.ustring.match(country, "Flag of ([^\.]*)") or mw.ustring.match(country, "[Tt]he %A*(%u[%a%(%)%.' %-]+)") or mw.ustring.match(country, "(%u[%a%(%)%.' %-]+)") or "" if (match1 == "Image") or (match1 == "File") then country = mw.ustring.gsub(country, match1, "") match1 = mw.ustring.match(country, "[%|%[](%u[%a%(%)%.' %-]+)") or "" end if mw.ustring.match(match1, "^%d*x?%d+px$") then country = mw.ustring.gsub(country, match1, "") match1 = mw.ustring.match(country, "%[(%u[%a%(%)%.' %-]+)") or "" end end local code = countries[match1] or countries[mw.ustring.gsub(match1, " %(.*%)$", "")] or countries[mw.ustring.gsub(match1, ",.*$", "")] or countries[mw.ustring.gsub(match1, " %(.*$", "")] if not code then if mw.title.new('Template:ISO 3166 code ' .. match1).exists then code = frame:expandTemplate{ title = 'Template:ISO 3166 code ' .. match1 } else local ns = mw.title.getCurrentTitle().namespace if ns == 0 then return '[[Category:Wikipedia page with obscure country]]' else return '[[Category:Wikipedia page with obscure country|'.. frame:expandTemplate{ title = 'Namespace Greek' } ..']]' end end end local subdivision = mw.ustring.gsub(mw.ustring.toNFC(frame.args[2] or ''), '^%s*(.-)%s*$', '%1') if not subdivision or subdivision == '' then return code end local countrytables = { ["AD"] = true, ["AE"] = true, ["AF"] = true, ["AG"] = true, ["AL"] = true, ["AM"] = true, ["AO"] = true, ["AR"] = true, ["AT"] = true, ["AU"] = true, ["AZ"] = true, ["BA"] = true, ["BB"] = true, ["BD"] = true, ["BE"] = true, ["BF"] = true, ["BG"] = true, ["BH"] = true, ["BI"] = true, ["BJ"] = true, ["BN"] = true, ["BO"] = true, ["BQ"] = true, ["BR"] = true, ["BS"] = true, ["BT"] = true, ["BW"] = true, ["BY"] = true, ["BZ"] = true, ["CA"] = true, ["CD"] = true, ["CF"] = true, ["CG"] = true, ["CH"] = true, ["CI"] = true, ["CL"] = true, ["CM"] = true, ["CN"] = true, ["CO"] = true, ["CR"] = true, ["CU"] = true, ["CV"] = true, ["CY"] = true, ["CZ"] = true, ["DE"] = true, ["DJ"] = true, ["DK"] = true, ["DM"] = true, ["DO"] = true, ["DZ"] = true, ["EC"] = true, ["EE"] = true, ["EG"] = true, ["ER"] = true, ["ES"] = true, ["ET"] = true, ["FI"] = true, ["FJ"] = true, ["FM"] = true, ["FR"] = true, ["GA"] = true, ["GB"] = true, ["GD"] = true, ["GE"] = true, ["GH"] = true, ["GL"] = true, ["GM"] = true, ["GN"] = true, ["GQ"] = true, ["GR"] = true, ["GT"] = true, ["GW"] = true, ["GY"] = true, ["HN"] = true, ["HR"] = true, ["HT"] = true, ["HU"] = true, ["ID"] = true, ["IE"] = true, ["IL"] = true, ["IN"] = true, ["IQ"] = true, ["IR"] = true, ["IS"] = true, ["IT"] = true, ["JM"] = true, ["JO"] = true, ["JP"] = true, ["KE"] = true, ["KG"] = true, ["KH"] = true, ["KI"] = true, ["KM"] = true, ["KN"] = true, ["KP"] = true, ["KR"] = true, ["KW"] = true, ["KZ"] = true, ["LA"] = true, ["LB"] = true, ["LC"] = true, ["LI"] = true, ["LK"] = true, ["LR"] = true, ["LS"] = true, ["LT"] = true, ["LU"] = true, ["LV"] = true, ["LY"] = true, ["MA"] = true, ["MC"] = true, ["MD"] = true, ["ME"] = true, ["MG"] = true, ["MH"] = true, ["MK"] = true, ["ML"] = true, ["MM"] = true, ["MN"] = true, ["MR"] = true, ["MT"] = true, ["MU"] = true, ["MV"] = true, ["MW"] = true, ["MX"] = true, ["MY"] = true, ["MZ"] = true, ["NA"] = true, ["NE"] = true, ["NG"] = true, ["NI"] = true, ["NL"] = true, ["NO"] = true, ["NP"] = true, ["NR"] = true, ["NZ"] = true, ["OM"] = true, ["PA"] = true, ["PE"] = true, ["PG"] = true, ["PH"] = true, ["PK"] = true, ["PL"] = true, ["PS"] = true, ["PT"] = true, ["PW"] = true, ["PY"] = true, ["QA"] = true, ["RO"] = true, ["RS"] = true, ["RU"] = true, ["RW"] = true, ["SA"] = true, ["SB"] = true, ["SC"] = true, ["SD"] = true, ["SE"] = true, ["SG"] = true, ["SH"] = true, ["SI"] = true, ["SK"] = true, ["SL"] = true, ["SM"] = true, ["SN"] = true, ["SO"] = true, ["SR"] = true, ["SS"] = true, ["ST"] = true, ["SV"] = true, ["SY"] = true, ["SZ"] = true, ["TD"] = true, ["TG"] = true, ["TH"] = true, ["TJ"] = true, ["TL"] = true, ["TM"] = true, ["TN"] = true, ["TO"] = true, ["TR"] = true, ["TT"] = true, ["TV"] = true, ["TW"] = true, ["TZ"] = true, ["UA"] = true, ["UG"] = true, ["UM"] = true, ["US"] = true, ["UY"] = true, ["UZ"] = true, ["VC"] = true, ["VE"] = true, ["VN"] = true, ["VU"] = true, ["WF"] = true, ["WS"] = true, ["YE"] = true, ["ZA"] = true, ["ZM"] = true, ["ZW"] = true, } local match2 = mw.ustring.match(subdivision, "Flag of the ([^\.]*)") or mw.ustring.match(subdivision, "Flag of ([^\.]*)") or mw.ustring.match(subdivision, "[Tt]he %A*([ǁ'‘ʻ]?%u[%aZ̧z̄'‘‘ %.%,%-]+[%a%.'%d]+)") or mw.ustring.match(subdivision, "([ǁ'‘ʻ]?%u[%aZ̧z̄'‘‘ %.%,%-]+[%a%.'%d]+)") or "" if (match2 == "Image") or (match2 == "File") then subdivision = mw.ustring.gsub(subdivision, match2, "") match2 = mw.ustring.match(subdivision, "[\|\[]([ǁ'‘ʻ]?%u[%aZ̧z̄'‘ʻ %.%,%-]+[%a%.'%d]+)") or "" end if mw.ustring.match(match2, "^%d*x?%d+px$") then subdivision = mw.ustring.gsub(subdivision, match2, "") match2 = mw.ustring.match2(subdivision, "%[([ǁ'‘ʻ]?%u[%aZ̧z̄'‘ʻ %.%,%-]+[%a%.'%d]+)") or "" end local code1 = mw.ustring.sub(code, 1, 2) if countrytables[code1] then local tmp = mw.loadData('Module:Country extract/' .. code1) local code2 = tmp[match2] or tmp[mw.ustring.gsub(match2, " %(.*%)$", "")] or tmp[mw.ustring.gsub(match2, ",.*$", "")] or tmp[mw.ustring.gsub(match2, " %(.*$", "")] if code2 then return code2 end local legacytemplate = 'Template:ISO 3166 code ' .. match1 if mw.title.new(legacytemplate).exists then tmp = nil for k, v in ipairs({ frame:expandTemplate{ title = legacytemplate, args = { mw.ustring.gsub(match2, " %(.*%)$", "") } }, frame:expandTemplate{ title = legacytemplate, args = { mw.ustring.gsub(match2, ",.*$", "") } }, frame:expandTemplate{ title = legacytemplate, args = { mw.ustring.gsub(match2, " %(.*$", "") } }, frame:expandTemplate{ title = legacytemplate, args = { match2 } } }) do if v ~= code and v ~= '' then tmp = v break end end if tmp then return tmp end end local ns = mw.title.getCurrentTitle().namespace if ns == 0 then return code .. '[[Category:Wikipedia page with obscure country or subdivision]]' else return code .. '[[Category:Wikipedia page with obscure country or subdivision|'.. frame:expandTemplate{ title = 'Namespace Greek' } ..']]' end end return code end return p