Module:Language/scripts
local p = {}
local gsub = mw.ustring.gsub local length = mw.ustring.len local floor = math.floor local UTF8Char = "[%z\1-\127\194-\244][\128-\191]*"
local codepoint_data = mw.loadData("Module:language/scripts/codepoints")
local data = require("Module:Language/scripts/data")
function p.print(frame) local scriptCode = frame.args[1] local scriptData = scriptCode and data[scriptCode] or "Please supply a valid script code." local characters = scriptData and scriptData.characters or "No characters found for " .. scriptCode .. "." return characters end
local script = {}
-- Based on the Script:countCharacters() function of Module:scripts on Wiktionary local function countCharacters(text, scriptCode) if not data[scriptCode]["characters"] then return 0 else local _, count = gsub(text, "[" .. data[scriptCode]["characters"] .. "]", "") return count end end
function p.isLatn(text) if type(tostring(text)) == "string" then local count = countCharacters(text, "Latn") if count < (length(text) / 4) then -- Only 25% of characters in string are Latin return false else return true end else return nil end end
function p.Latin(frame) local text = frame.args[1] return p.isLatn(text) end
local ignore_script = require("Module:table").listToSet{ "Zinh", "Zmth", "Zsym", "Zsye", "Zxxx", "Zyyy", "Zzzz" }
local function map(func, t) local array = {} if t[1] then for i, v in ipairs(t) do array[i] = func(v, i, t) end else local i = 0 for k, v in pairs(t) do i = i + 1 array[i] = func(v, k, t) end end return array end
local function filter(t, func) local new_t = {}
if t[1] then local new_t_i = 0 for i, v in ipairs(t) do if func(v, i, t) then new_t_i = new_t_i + 1 new_t[new_t_i] = v end end else for k, v in pairs(t) do if func(v, k, t) then new_t[k] = v end end end
return new_t end
local function sortRange(range1, range2) return range1[1] < range2[1] end
--[[ Binary search: efficient for long lists of codepoint ranges. ]] local function binarySearch(ranges, value) if not ranges then return nil end
-- Initialize numbers. local bottom, i, top = 1, 0, ranges.length
if top == 0 then return nil end
-- Do search. while bottom <= top do -- Calculate current index. i = floor((bottom + top) / 2)
-- Get range array; for instance, { 0x41, 0x7A, "Latn"}. local range = ranges[i]
if value < range[1] then top = i - 1
-- Return matching range array so that it can be placed in cache. elseif value <= range[2] then return range
else bottom = i + 1 end end
return nil end
--[[ -- For debugging local function toHex(number) return ("0x%X"):format(number) end
local function logRange(range, number) return mw.log(toHex(range[1]), toHex(number) .. " (" .. mw.ustring.char(number) .. ")", toHex(range[2]), range[3]) end --]]
local function lookUpInOrder(number, ranges) for i, range in ipairs(ranges) do if number < range[1] then return nil elseif number <= range[2] then return range[3] end end end
-- Save previously used codepoint ranges in case another character is in the -- same range. local rangesCache = {}
--[=[ Takes a codepoint and returns the script code that is appropriate for it, based on the data module Module:Language/scripts/codepoints.
The data module uses the official Unicode script codes.
Returns a script code from the codepoint-to-script map, or one of the ranges in the array of ranges, else returns Zzzz. ]=] function p.codepointToScript(codepoint) local lookup = codepoint_data local t = type(codepoint) if t ~= "number" then error("Argument to codepointToScript should be a number, but its type is " .. t .. ".") end
local individualMatch = lookup.individual[codepoint] if individualMatch then return individualMatch else local script = lookUpInOrder(codepoint, rangesCache) if script then return script end
local range = binarySearch(lookup.ranges, codepoint) if range then table.insert(rangesCache, range) table.sort(rangesCache, sortRange) return range[3] end end
return "Zzzz" end
local function charToScript(char) return p.codepointToScript(mw.ustring.codepoint(char)) end
function p.countScripts(text) if type(text) ~= "string" then error("countScripts requires a string") end local scriptCounts = {} local codepointToScript = p.codepointToScript for codepoint in mw.ustring.gcodepoint(text) do local script = codepointToScript(codepoint) if script then if not scriptCounts[script] then scriptCounts[script] = 0 end scriptCounts[script] = scriptCounts[script] + 1 end end
return scriptCounts end
function p.getScript(text) local scripts = {} local i = 0 for code in pairs(p.countScripts(text)) do i = i + 1 scripts[i] = code end
scripts = filter(scripts, function (scCode) return not ignore_script[scCode] end)
if not scripts[2] then return scripts[1] else error("More than one script was found for " .. text) end end
function p.showScripts(frame) return table.concat( map(function(arg) return "* " .. arg .. ": " .. table.concat( map(function(count, script) return script .. " (" .. count .. ")" end, p.countScripts(arg)), ", ") end, frame.args), "\n") end
return p