Jump to content

Module:Linb-translit

From Wiktionary, the free dictionary

This module will transliterate text in the Linear B script. It is used to transliterate Mycenaean Greek. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Linb-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.
-- In the |subst= parameter, as there is not a better way, use @ to force ideogram
-- reading for ambiguous characters, and use + to indicate superimposition.

local export = {}

local gsub = mw.ustring.gsub

-- Special character to ignore formatting in replacements. Only one character
-- because bolding should be ignored, but tables shouldn't.
local format_char = "'-"

local dig_chars = {
	[""] = 0,
	["𐄇"] = 1,
	["𐄈"] = 2,
	["𐄉"] = 3,
	["𐄊"] = 4,
	["𐄋"] = 5,
	["𐄌"] = 6,
	["𐄍"] = 7,
	["𐄎"] = 8,
	["𐄏"] = 9,
	["𐄐"] = 10,
	["𐄑"] = 20,
	["𐄒"] = 30,
	["𐄓"] = 40,
	["𐄔"] = 50,
	["𐄕"] = 60,
	["𐄖"] = 70,
	["𐄗"] = 80,
	["𐄘"] = 90,
	["𐄙"] = 100,
	["𐄚"] = 200,
	["𐄛"] = 300,
	["𐄜"] = 400,
	["𐄝"] = 500,
	["𐄞"] = 600,
	["𐄟"] = 700,
	["𐄠"] = 800,
	["𐄡"] = 900,
	["𐄢"] = 1000,
	["𐄣"] = 2000,
	["𐄤"] = 3000,
	["𐄥"] = 4000,
	["𐄦"] = 5000,
	["𐄧"] = 6000,
	["𐄨"] = 7000,
	["𐄩"] = 8000,
	["𐄪"] = 9000,
	["𐄫"] = 10000,
	["𐄬"] = 20000,
	["𐄭"] = 30000,
	["𐄮"] = 40000,
	["𐄯"] = 50000,
	["𐄰"] = 60000,
	["𐄱"] = 70000,
	["𐄲"] = 80000,
	["𐄳"] = 90000,
}

local chars = {
	-- SYLLABOGRAMS --
	["𐀅"] = { id = "1", tl = "da" },
	["𐀫"] = { id = "2", tl = "ro" },
	["𐀞"] = { id = "3", tl = "pa" },
	["𐀳"] = { id = "4", tl = "te" },
	["𐀵"] = { id = "5", tl = "to" },
	["𐀙"] = { id = "6", tl = "na" },
	["𐀇"] = { id = "7", tl = "di" },
	["𐀀"] = { id = "8", tl = "a" },
	["𐀮"] = { id = "9", tl = "se" },
	["𐀄"] = { id = "10", tl = "u" },
	["𐀡"] = { id = "11", tl = "po" },
	["𐀰"] = { id = "12", tl = "so" },
	["𐀕"] = { id = "13", tl = "me" },
	["𐀈"] = { id = "14", tl = "do" },
	["𐀗"] = { id = "15", tl = "mo" },
	["𐀣"] = { id = "16", tl = "qa" },
	["𐀼"] = { id = "17", tl = "za" },
	["𐁐"] = { id = "18" },
	["𐁑"] = { id = "19" },
	["𐀿"] = { id = "20", tl = "zo" },
	["𐀥"] = { id = "21", tl = "qi" },
	["𐁒"] = { id = "22" },
	["𐀘"] = { id = "23", tl = "mu" },
	["𐀚"] = { id = "24", tl = "ne" },
	["𐁀"] = { id = "25", tl = "a2" },
	["𐀬"] = { id = "26", tl = "ru" },
	["𐀩"] = { id = "27", tl = "re" },
	["𐀂"] = { id = "28", tl = "i" },
	["𐁆"] = { id = "29", tl = "pu2" },
	["𐀛"] = { id = "30", tl = "ni" },
	["𐀭"] = { id = "31", tl = "sa" },
	["𐀦"] = { id = "32", tl = "qo" },
	["𐁉"] = { id = "33", tl = "ra3" },
	["𐁓"] = { id = "34" },
	-- *35 is a flipped variant of *34
	["𐀍"] = { id = "36", tl = "jo" },
	["𐀴"] = { id = "37", tl = "ti" },
	["𐀁"] = { id = "38", tl = "e" },
	["𐀠"] = { id = "39", tl = "pi" },
	["𐀹"] = { id = "40", tl = "wi" },
	["𐀯"] = { id = "41", tl = "si" },
	["𐀺"] = { id = "42", tl = "wo" },
	["𐁁"] = { id = "43", tl = "a3" },
	["𐀐"] = { id = "44", tl = "ke" },
	["𐀆"] = { id = "45", tl = "de" },
	["𐀋"] = { id = "46", tl = "je" },
	["𐁔"] = { id = "47" },
	["𐁅"] = { id = "48", tl = "nwa" },
	["𐁕"] = { id = "49" },
	["𐀢"] = { id = "50", tl = "pu" },
	["𐀉"] = { id = "51", tl = "du" },
	["𐀜"] = { id = "52", tl = "no" },
	["𐀪"] = { id = "53", tl = "ri" },
	["𐀷"] = { id = "54", tl = "wa" },
	["𐀝"] = { id = "55", tl = "nu" },
	["𐁖"] = { id = "56" },
	["𐀊"] = { id = "57", tl = "ja" },
	["𐀱"] = { id = "58", tl = "su" },
	["𐀲"] = { id = "59", tl = "ta" },
	["𐀨"] = { id = "60", tl = "ra" },
	["𐀃"] = { id = "61", tl = "o" },
	["𐁇"] = { id = "62", tl = "pte" },
	["𐁗"] = { id = "63" },
	["𐁘"] = { id = "64" },
	["𐀎"] = { id = "65", tl = "ju" },
	["𐁋"] = { id = "66", tl = "ta2" },
	["𐀑"] = { id = "67", tl = "ki" },
	["𐁊"] = { id = "68", tl = "ro2" },
	["𐀶"] = { id = "69", tl = "tu" },
	["𐀒"] = { id = "70", tl = "ko" },
	["𐁃"] = { id = "71", tl = "dwe" },
	["𐀟"] = { id = "72", tl = "pe" },
	["𐀖"] = { id = "73", tl = "mi" },
	["𐀽"] = { id = "74", tl = "ze" },
	["𐀸"] = { id = "75", tl = "we" },
	["𐁈"] = { id = "76", tl = "ra2" },
	["𐀏"] = { id = "77", tl = "ka" },
	["𐀤"] = { id = "78", tl = "qe" },
	["𐁙"] = { id = "79" },
	["𐀔"] = { id = "80", tl = "ma" },
	["𐀓"] = { id = "81", tl = "ku" },
	["𐁚"] = { id = "82" },
	["𐁛"] = { id = "83" },
	-- *84
	["𐁂"] = { id = "85", tl = "au" },
	["𐁜"] = { id = "86" },
	["𐁌"] = { id = "87", tl = "twe" },
	-- *88
	["𐁝"] = { id = "89" },
	["𐁄"] = { id = "90", tl = "dwo" },
	["𐁍"] = { id = "91", tl = "two" },
	-- IDEOGRAMS --
	["𐂀"] = { id = "100", tl = "VIR" },
	-- *101 is a variant of *100
	["𐂁"] = { id = "102", tl = "MUL" },
	-- *103 is a variant of *102
	["𐂂"] = { id = "104", tl = "CERV" },
	["𐂃"] = { id = "105", tl = "EQU" },
	["𐂄"] = { id = "105f", tl = "EQU<sup>f</sup>" },
	["𐂅"] = { id = "105m", tl = "EQU<sup>m</sup>" },
	["@𐀥"] = { id = "106", tl = "OVIS" }, -- same as *21
	["𐂆"] = { id = "106f", tl = "OVIS<sup>f</sup>" },
	["𐂇"] = { id = "106m", tl = "OVIS<sup>m</sup>" },
	["@𐁒"] = { id = "107", tl = "CAP" }, -- same as *22
	["𐂈"] = { id = "107f", tl = "CAP<sup>f</sup>" },
	["𐂉"] = { id = "107m", tl = "CAP<sup>m</sup>" },
	["@𐁂"] = { id = "108", tl = "SUS" }, -- same as *85
	["𐂊"] = { id = "108f", tl = "SUS<sup>f</sup>" },
	["𐂋"] = { id = "108m", tl = "SUS<sup>m</sup>" },
	["@𐀘"] = { id = "109", tl = "BOS" }, -- same as *23
	["𐂌"] = { id = "109f", tl = "BOS<sup>f</sup>" },
	["𐂍"] = { id = "109m", tl = "BOS<sup>m</sup>" },
	["𐄿"] = { id = "110", tl = "Z" },
	["𐄾"] = { id = "111", tl = "V" },
	["𐄼"] = { id = "112", tl = "T" },
	["𐄽"] = { id = "113", tl = "S" },
	["𐄻"] = { id = "114", tl = "Q" },
	["𐄺"] = { id = "115", tl = "P" },
	["𐄹"] = { id = "116", tl = "N" },
	["𐄸"] = { id = "117", tl = "M" },
	["𐄷"] = { id = "118", tl = "L" },
	["𐂎"] = { id = "120", tl = "GRA" },
	["𐂏"] = { id = "121", tl = "HORD" },
	["𐂐"] = { id = "122", tl = "OLIV" },
	["𐂑"] = { id = "123", tl = "AROM" },
	-- *124 is a flipped variant of *125, traditionally transliterated as PYC
	["𐂒"] = { id = "125", tl = "CYP" },
	-- *126
	["𐂓"] = { id = "127", tl = "ka+po" },
	["𐂔"] = { id = "128", tl = "ka+na+ko" },
	["@𐀎"] = { id = "129", tl = "FAR" }, -- same as *65
	["𐂕"] = { id = "130", tl = "OLE" },
	["𐂖"] = { id = "131", tl = "VIN" },
	["𐂗"] = { id = "132" },
	["𐂘"] = { id = "133", tl = "a+re+pa" },
	-- *134
	["𐂙"] = { id = "135", tl = "me+ri", },
	["𐂚"] = { id = "140", tl = "AES" },
	["𐂛"] = { id = "141", tl = "AUR" },
	["𐂜"] = { id = "142" },
	-- *143
	["@𐁉"] = { id = "144", tl = "CROC" }, -- same as *33
	["𐂝"] = { id = "145", tl = "LANA" },
	["𐂞"] = { id = "146" },
	-- *147, *148, *149
	["𐂟"] = { id = "150" },
	["𐂠"] = { id = "151", tl = "CORN" },
	["𐂡"] = { id = "152" },
	["𐂢"] = { id = "153" },
	["𐂣"] = { id = "154" },
	["𐃞"] = { id = "155", vas = true },
	["𐂤"] = { id = "156", tl = "tu+ro2" },
	["𐂥"] = { id = "157" },
	["𐂦"] = { id = "158" },
	["𐂧"] = { id = "159", tl = "TELA" },
	["𐂨"] = { id = "160" },
	["𐂩"] = { id = "161" },
	["𐂪"] = { id = "162", tl = "TUN" },
	["𐂫"] = { id = "163", tl = "ARM" },
	["𐂬"] = { id = "164" },
	["𐂭"] = { id = "165" },
	["𐂮"] = { id = "166" },
	["𐂯"] = { id = "167" },
	["𐂰"] = { id = "168" },
	["𐂱"] = { id = "169" },
	["𐂲"] = { id = "170" },
	["𐂳"] = { id = "171" },
	["𐂴"] = { id = "172" },
	["𐂵"] = { id = "173", tl = "LUNA" },
	["𐂶"] = { id = "174" },
	-- *175
	["𐂷"] = { id = "176", tl = "ARB" },
	["𐂸"] = { id = "177" },
	["𐂹"] = { id = "178" },
	["𐂺"] = { id = "179" },
	["𐂻"] = { id = "180" },
	["𐂼"] = { id = "181" },
	["𐂽"] = { id = "182" },
	["𐂾"] = { id = "183" },
	["𐂿"] = { id = "184" },
	["𐃀"] = { id = "185" },
	-- *186, *187, *188
	["𐃁"] = { id = "189" },
	["𐃂"] = { id = "190" },
	["𐃃"] = { id = "191", tl = "GAL" },
	["𐃟"] = { id = "200", vas = true },
	["𐃠"] = { id = "201", vas = true },
	["𐃡"] = { id = "202", vas = true },
	["𐃢"] = { id = "203", vas = true },
	["𐃣"] = { id = "204", vas = true },
	["𐃤"] = { id = "205", vas = true },
	["𐃥"] = { id = "206", vas = true },
	["𐃦"] = { id = "207", vas = true },
	["𐃧"] = { id = "208", vas = true },
	["𐃨"] = { id = "209", tl = "AMPH", vas = true },
	["𐃩"] = { id = "210", vas = true },
	["𐃪"] = { id = "211", vas = true },
	["𐃫"] = { id = "212", vas = true },
	["𐃬"] = { id = "213", vas = true },
	["𐃭"] = { id = "214", vas = true },
	["𐃮"] = { id = "215", vas = true },
	["𐃯"] = { id = "216", vas = true },
	["𐃰"] = { id = "217", vas = true },
	["𐃱"] = { id = "218", vas = true },
	["𐃲"] = { id = "219", vas = true },
	["𐃄"] = { id = "220" },
	["𐃳"] = { id = "221", vas = true },
	["𐃴"] = { id = "222", vas = true },
	["𐃅"] = { id = "225", tl = "ALV" },
	["𐃵"] = { id = "226", vas = true },
	["𐃶"] = { id = "227", vas = true },
	["𐃷"] = { id = "228", vas = true },
	["𐃸"] = { id = "229", vas = true },
	["𐃆"] = { id = "230", tl = "HAS" },
	["𐃇"] = { id = "231", tl = "SAG" },
	["𐃈"] = { id = "232" },
	["𐃉"] = { id = "233", tl = "PUG" },
	["𐃊"] = { id = "234" },
	-- *235
	["𐃋"] = { id = "236", tl = "GUP" },
	-- *237, *238, *239
	["𐃌"] = { id = "240", tl = "BIG" },
	["𐃍"] = { id = "241", tl = "CUR" },
	["𐃎"] = { id = "242", tl = "CAPS" },
	["𐃏"] = { id = "243", tl = "ROTA" },
	-- *244, variant of *243 ?
	["𐃐"] = { id = "245" },
	["𐃑"] = { id = "246" },
	["𐃒"] = { id = "247", tl = "di+pte" },
	["𐃓"] = { id = "248" },
	["𐃔"] = { id = "249" },
	["𐃹"] = { id = "250", vas = true },
	["𐃕"] = { id = "251" },
	["𐃖"] = { id = "252" },
	["𐃗"] = { id = "253" },
	["𐃘"] = { id = "254", tl = "JAC" },
	["𐃙"] = { id = "255" },
	["𐃚"] = { id = "256" },
	["𐃛"] = { id = "257" },
	["𐃜"] = { id = "258" },
	["𐃝"] = { id = "259" },
	["𐃺"] = { id = "305", vas = true },

	-- enwikt-original ideogram abbreviations
	["@𐀭"] = { id = "31", tl = "LIN" }, -- often SA despite being an ideogram
}

function export.tr(text, lang, sc)
	-- Put a hyphen between syllabograms.
	for i = 1, 2 do
		text = gsub(text, "([𐀀-𐁝%]]"..format_char .. "[%[%]%{%}⟦⟧⟨⟩/]?" .. format_char..")([𐀀-𐁝])", "%1-%2")
	end
	-- Numerals.
	text = gsub(text, "[𐄇-𐄳]+", function(str)
		local ret, count = gsub(str, "^([𐄫-𐄳]?)([𐄢-𐄪]?)([𐄙-𐄡]?)([𐄐-𐄘]?)([𐄇-𐄏]?)$", function (u5, u4, u3, u2, u1)
			return dig_chars[u5] + dig_chars[u4] + dig_chars[u3] + dig_chars[u2] + dig_chars[u1]
		end)
		if count ~= 1 then
			error("The number " .. str .. " is weird. Are you sure?")
		end
		return ret
	end)
	-- Syllabograms and ideograms.
	text = gsub(text, "@?[𐀀-𐃺𐄷-𐄿]", function(x_text)
		x = chars[x_text]
		if x == nil then error("The symbol " .. x_text .. " is not recognised.") end
		return x.tl or ("*"..x.id..(x.vas and "<sup>VAS</sup>" or ""))
	end)
	-- Symbols.
	text = gsub(text, "(𐄀 ?"..format_char.." ?%] ?"..format_char.." ?)-", "%1")
	text = gsub(text, " ?𐄀 ?", ", ")
	text = gsub(text, " ([%]%}⟧⟩]) *", "%1 ")
	text = gsub(text, " *([%[%{⟦⟨]) ", " %1")
	-- The end.
	return text
end

return export