Module:Formatnum/sandbox

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
Lua
CodeDiscussionEditHistoryLinksLink count Subpages:DocumentationTestsResultsSandboxLive code All modules

Documentation for this module may be created at Module:Formatnum/sandbox/doc

Code

-- This module is intended to replace the functionality of Template:Formatnum and related templates.
local p = {}

function p.main(frame)
    local args = frame:getParent().args
    local prec    = args.prec or ''
    local sep     = args.sep or ''
    local number  = args[1] or args.number or ''
    local lang    = args[2] or args.lang or ''
    -- validate the language parameter within MediaWiki's caller frame
    if lang == "arabic-indic" then -- only for back-compatibility ("arabic-indic" is not a SupportedLanguage)
        lang = "fa" -- better support than "ks"
    elseif lang == '' or not mw.language.isSupportedLanguage(lang) then
        -- Note that 'SupportedLanguages' are not necessarily 'BuiltinValidCodes', and so they are not necessarily
        -- 'KnownLanguages' (with a language name defined at least in the default localisation of the local wiki).
        -- But they all are ValidLanguageCodes (suitable as Wiki subpages or identifiers: no slash, colon, HTML tags, or entities)
        -- In addition, they do not contain any capital letter in order to be unique in page titles (restriction inexistant in BCP47),
        -- but they may violate the standard format of BCP47 language tags for specific needs in MediaWiki.
        -- Empty/unspecified and unsupported languages are treated here in Commons using the user's language,
        -- instead of the local 'ContentLanguage' of the Wiki.
        lang = frame:callParserFunction("Int", "Lang") -- get user's chosen language
    end
    return p.formatNum(number, lang, prec, sep ~= '')
end

-- Substitution of decimal digits for languages not supported by mw.language:formatNum() in core Lua libraries for MediaWiki
local localizedDigits = {}; do
    local function makeDigits(zero)
        local digits = {}
        for i = 0, 9 do
            digits[string.char(0x30 + i)] = mw.ustring.char(zero + i)
        end
        return digits
    end
    for k, v in pairs({
        arab      = 0x00660, -- Arabic
        arabext   = 0x006F0, -- Extended Arabic
        nkoo      = 0x007C0, -- N’ko
        deva      = 0x00966, -- Devanagari
        beng      = 0x009E6, -- Bengla
        guru      = 0x00A66, -- Gurmukhi
        gujr      = 0x00AE6, -- Gujarati
        orya      = 0x00B66, -- Oriya (or Odiya)
        taml      = 0x00BE6, -- Tamil
        telu      = 0x00C66, -- Telugu
        knda      = 0x00CE6, -- Kannada (or Kannara)
        mlymold   = 0x00D66, -- Malayalam (old)
        sinhold   = 0x00DE6, -- Sinhalese (old)
        thai      = 0x00E50, -- Thai
        laoo      = 0x00ED0, -- Lao
        tibt      = 0x00F20, -- Tibetan
        mymr      = 0x01040, -- Myanmar
        mymrshn   = 0x01090, -- Myanmar (Shan variant)
        khmr      = 0x017E0, -- Khmer
        mongold   = 0x01810, -- Mongolian (old)
        limb      = 0x01946, -- Limbu
        talu      = 0x019D0, -- New Tai Lue
        lanahor   = 0x01A80, -- Tai Tham Hora
        lana      = 0x01A90, -- Tai Tham Tham (Lanna)
        bali      = 0x01B50, -- Balinese
        sund      = 0x01BB0, -- Sundanese
        lepc      = 0x01C40, -- Lepcha (Rong)
        olck      = 0x01C50, -- Ol Chiki
        vaii      = 0x0A620, -- Vai
        saur      = 0x0A8D0, -- Saurashtra
        kali      = 0x0A900, -- Kayah Li
        java      = 0x0A9D0, -- Javanese
        mymrtai   = 0x01040, -- Myanmar Tai
        cham      = 0x0AA50, -- Cham
        mtei      = 0x0ABF0, -- Meetei Mayek
        zyyyfull  = 0x0FF10, -- Fullwidth (for CJK)
        osma      = 0x104A0, -- Osmanya
        rohg      = 0x10D30, -- Hanifi Royinga
        brah      = 0x11066, -- Brahmi
        sora      = 0x110F0, -- Sora Sompeng
        cakm      = 0x11136, -- Chakma
        shrd      = 0x111D0, -- Sharada
        sind      = 0x112F0, -- Khudawadi (Sindhi)
        newa      = 0x11450, -- Newa
        tirh      = 0x114D0, -- Tirhuta
        modi      = 0x11650, -- Modi
        takr      = 0x116C0, -- Takri
        ahom      = 0x11730, -- ahom
        wara      = 0x118E0, -- Warang Citi
        diak      = 0x11950, -- Dives Akuru
        bhks      = 0x11C50, -- Bhaiksuki
        gonm      = 0x11D50, -- Masaram Gondi
        gong      = 0x11DA0, -- Gunjala Gondi
        mroo      = 0x16A60, -- Mro
        hmng      = 0x16B50, -- Pahawh Hmong
        zmthb     = 0x1D7CE, -- Mathematical Bold
        zmthd     = 0x1D7D8, -- Mathematical Double-Struck
        zmths     = 0x1D7E2, -- Mathematical Sans-Serif
        zmthsb    = 0x1D7EC, -- Mathematical Sans-Serif Bold
        zmthm     = 0x1D7CE, -- Mathematical Monospace
        hmnp      = 0x1E140, -- Nyiakeng Puachue Hmong
        wcho      = 0x1E2F0, -- Wancho
        adlm      = 0x1E950, -- Adlam
        zyyysegm  = 0x1FBF0, -- Segmented
    }) do
        localizedDigits['*-' .. k] = makeDigits(v)
    end
    for k, v in pairs({
        ['*-aran'] = '*-arabext', -- Alias for ISO 15924 script variant code.
        -- Languages with missing support in mw.language:formatNum
        ['ml-old'] = '*-mylm-old', -- Legacy alias in Wikimedia Commons.
        ta         = '*-taml',
        te         = '*-telu',
        th         = '*-thai',
    }) do
        localizedDigits[k] = localizedDigits[v]
    end
end

function p.formatNum(number, lang, prec, compact)
    -- Do not alter the specified value when it is not a valid number, return it as is.
    local tmp = tonumber(number)
    if tmp == nil then
        return number
    end
    -- Basic ASCII-only formatting (without paddings).
    number = tostring(tmp) -- May return an English number ASCII string with scientific notation.
	-- We no longer need the current `tmp` variable beyond this point.
    -- Check the presence of an exponent (incorrectly managed in mw.language:FormatNum()
    -- and even forgotten due to an internal bug, e.g. in Hindi).
    local sep, exponent = string.find(number, '[Ee]')
    if sep then
        -- Strip the '[Ee]' found in the exponent, keep its sign.
        exponent = string.sub(number, sep + 1, string.len(number))
		-- Leading part of the number with its sign, and possible dot and decimals
        number = string.sub(number, 1, sep - 1)
    end -- We no longer need the current `sep` variable beyond this point.
    -- Check the minimum precision requested (in the number string now without its exponent part).
    prec = tonumber(prec) -- Returns nil if not specified as a true number.
    if prec then
        prec = math.floor(prec)
        if prec < 0 then
            prec = nil -- Discard an incorrect precision (must be positive or zero).
        -- Maximum number of decimals displayed by tostring(number):
        -- tostring(0.00000000000005) displays '5e-14'.
        -- tostring(1.00000000000005) displays '1';
        -- tostring(1.00000000000006) displays 13 decimals after '.' (all digits but the last '6');
        -- tostring(9.99999999999994) displays 13 decimals after '.' (all digits but the last '4');
        -- tostring(9.99999999999995) displays '10'.
        -- tostring(9.99999999999994e+13) displays '99999999999999' (14 digits, all but the last '4');
        -- tostring(9.99999999999995e+13) displays '1e+14'.
        -- tostring(99999999999999.4) displays '99999999999999' (14 digits, all but the last '4');
        -- tostring(99999999999999.5) displays '1e+14'.
        -- Beside the exponent part, it prints at most 14 significant digits, including at least 1 on each side of '.'
        -- It also uses the exponential notation for numbers below 1e-4 in magnitude.
        -- There may be also up to 3 non-significant zeroes (not counted) after the dot before the first
        -- significant digit in decimals, so there may be up to 17 decimals including these 3 zeroes.
		-- Below 1e-4 or starting at 1e+14, tostring uses an exponential notation with 1 digit before the '.'
        elseif prec > 14 then
            tmp = math.abs(tonumber(number))
		    if tmp >= 1 or tmp < 1e-4 then
                prec = 14
            elseif prec > 15 and tmp >= 1e-1 then
                prec = 15
            elseif prec > 16 and tmp >= 1e-2 then
                prec = 16
            elseif prec > 17 then -- tmp >= 1e-3
                prec = 17
            end
        end
    end -- We no longer use the current `tmp` variable beyond this point.
    -- Preprocess the precision of number in the ASCII-only string (round it if necessary).
    if prec then -- The last decimal of the fraction part may need to be rounded.
        sep = string.find(number, '.', 1, true) -- Plain search, no pattern.
        if sep then
            prec = sep + prec - string.len(number) -- Effective number of trailing decimals to add or remove.
            if prec < 0 then -- Removing decimals in excess requires rounding.
                -- Get the most significant decimal in excess.
                tmp = string.sub(number, string.len(number) + prec + 1, string.len(number) + prec + 1)
                if tmp < '5' then -- Rounding down the decimals is just truncating them.
                    number = string.sub(number, 1, string.len(number) + prec)
                else -- Rounding up the decimals (complex case).
                    -- Get the decimals to preserve (after the dot), then
                    -- get its incremented value by prepending a '1' and test the carry on it.
                    tmp = tostring(tonumber('1' .. string.sub(number, sep + 1, string.len(number) + prec)) + 1)
					-- The leading (dummy) digit in decimals is either '1', or '2' if there was a carry.
                    -- With a carry, we must increment the absolute number (after the possible sign) before the dot.
                    if string.sub(tmp, 1, 1) == '1' then
                        number =                          string.sub(number, 1, sep)       .. string.sub(tmp, 2)
                    elseif string.sub(number, 1, 1) == '-' then
                        number = '-' .. tostring(tonumber(string.sub(number, 2, sep)) + 1) .. string.sub(tmp, 2)
                    else
                        number =        tostring(tonumber(string.sub(number, 1, sep)) + 1) .. string.sub(tmp, 2)
                    end
                end -- We no longer need the current `tmp` variable beyond this point.
                if string.sub(number, -1) == '.' then
                    -- Special case where rounding up or down leaves a trailing dot
                    number = string.sub(number, 1, -2)
                end
            elseif prec > 0 then -- adding missing decimals (trailing zeroes)
                number = number .. string.rep('0', prec)
            end
        elseif prec > 0 then -- adding missing decimals (trailing zeroes)
            number = number .. '.' .. string.rep('0', prec)
        end
    end -- We no longer need the current `prec` variable beyond this point.
    -- Language-dependant grouping, and remapping of signs, digits, and separators in number and exponent.
    if type(lang) == 'string' then
        lang = string.lower(lang)
        if mw.language.isKnownLanguageTag(lang) then
            -- Convert number to localized digits, decimal separator, and group separators.
            tmp = mw.getLanguage(lang) -- Caveat: can load localized resources for up to 20 languages.
            -- Insert appropriate group separators (group separators and their positioning and are language-dependant).
            number = tmp:formatNum(tonumber(number), { noCommafy = compact })
            -- Localize the base-10 exponent (with its sign but without grouping separators).
            if exponent then
                exponent = tmp:formatNum(tonumber(exponent), { noCommafy = true })
            end -- We no longer need the current `tmp` variable beyond this point.
        end
    end
    if exponent then
        -- A few Indic languages need bracketing to make sense for the scientific notation with a Latin 'e'. See CLDR:
        -- https://unicode-org.github.io/cldr-staging/charts/latest/by_type/numbers.number_formatting_patterns.html#Standard_Patterns_
        tmp = ({
            gu = 1, hi = 1, mai = 1, mr = 1, pa = 1, sa = 1, html = 2,
        })[not compact and lang or '']
        if tmp == 1 then
            number = '[' .. number .. 'e' .. exponent .. ']'
        elseif tmp == 2 then
            number = number .. '&#x202F;×&#x202F;10<sup>' .. exponent .. '</sup>'
        else
            number = number .. 'e' .. exponent
        end
    end
    -- Special cases for substitution of ASCII digits (missing support in Lua core libraries for some languages, known or not).
    if lang and localizedDigits[lang] then
       number = string.gsub(number, '[0-9]', localizedDigits[lang])
    end
    return number
end

return p