Module:Unicode data: Difference between revisions
Split noncharacter per request |
m 1 revision imported |
||
(4 intermediate revisions by 3 users not shown) | |||
Line 440: | Line 440: | ||
local Latn = false | local Latn = false | ||
local i = 0; -- indexer for use in error messages | |||
for codepoint in mw.ustring.gcodepoint(str) do | for codepoint in mw.ustring.gcodepoint(str) do | ||
i = i + 1; -- bump the indexer | |||
local script = lookup_script(codepoint) | local script = lookup_script(codepoint) | ||
Line 448: | Line 450: | ||
elseif not (script == "Zyyy" or script == "Zinh" | elseif not (script == "Zyyy" or script == "Zinh" | ||
or script == "Zzzz") then | or script == "Zzzz") then | ||
return false | return false, i -- abandon as not Latn; identify the offending character's position | ||
end | end | ||
end | end | ||
return Latn | return Latn, (not Latn and i) or nil -- when <Latn> false, return offending charactor's position as second return value; nil else | ||
end | end | ||
Line 547: | Line 549: | ||
local codepoint = get_codepoint(frame.args, 2) | local codepoint = get_codepoint(frame.args, 2) | ||
return (func(codepoint)) -- Adjust to one result. | return (func(codepoint)) -- Adjust to one result. | ||
end | |||
end | |||
function p.lookup_kCantonese(codepoint) | |||
local data = loader[('Unihan/kCantonese/%02X'):format(floor(codepoint / 0x1000))] | |||
if data then | |||
return data[codepoint] | |||
end | end | ||
end | end | ||
return p | return p |