1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
|
--===========================================================================--
-- Serbian --
--===========================================================================--
local translit = thirddata.translit
local pcache = translit.parser_cache
local lpegmatch = lpeg.match
-- Special thanks to Mojca Miklavec and Arthur Reutenauer for their
-- assistance in creating these transliteration routines.
if not translit.done_serbian then
--------------------------------------------
-- Lowercase Serbian (Cyrillic -> Latin) --
--------------------------------------------
translit.sr_tolt_lower = translit.make_add_dict{
["а"] = "a",
["б"] = "b",
["в"] = "v",
["г"] = "g",
["д"] = "d",
["ђ"] = "đ",
["е"] = "e",
["ж"] = "ž",
["з"] = "z",
["и"] = "i",
["ј"] = "j",
["к"] = "k",
["л"] = "l",
["љ"] = "lj",
["м"] = "m",
["н"] = "n",
["њ"] = "nj",
["о"] = "o",
["п"] = "p",
["р"] = "r",
["с"] = "s",
["т"] = "t",
["ћ"] = "ć",
["у"] = "u",
["ф"] = "f",
["х"] = "h",
["ц"] = "c",
["ч"] = "č",
["џ"] = "dž",
["ш"] = "š",
}
translit.tables["Serbian Cyr->Lat Transliteration lowercase"] = translit.sr_tolt_lower
--------------------------------------------
-- Uppercase Serbian (Cyrillic -> Latin) --
--------------------------------------------
translit.sr_tolt_upper = translit.make_add_dict{
["А"] = "A",
["Б"] = "B",
["В"] = "V",
["Г"] = "G",
["Д"] = "D",
["Ђ"] = "Đ",
["Е"] = "E",
["Ж"] = "Ž",
["З"] = "Z",
["И"] = "I",
["Ј"] = "J",
["К"] = "K",
["Л"] = "L",
["Љ"] = "Lj",
["М"] = "M",
["Н"] = "N",
["Њ"] = "Nj",
["О"] = "O",
["П"] = "P",
["Р"] = "R",
["С"] = "S",
["Т"] = "T",
["Ћ"] = "Ć",
["У"] = "U",
["Ф"] = "F",
["Х"] = "H",
["Ц"] = "C",
["Ч"] = "Č",
["Џ"] = "Dž",
["Ш"] = "Š",
}
translit.tables["Serbian Cyr->Lat Transliteration uppercase"] = translit.sr_tolt_upper
local function __inverse_tab (t)
local result = { }
for k,v in next,t do result[v] = k end
return result
end
translit.sr_tocy_lower = translit.make_add_dict(__inverse_tab(translit.sr_tolt_lower))
translit.sr_tocy_upper = translit.make_add_dict(__inverse_tab(translit.sr_tolt_upper))
--- Good reading up front:
--- <http://en.wikipedia.org/wiki/User:Aleksandar_Šušnjar/Serbian_Wikipedia's_Challenges#Real-time_transliteration_for_display>
--- <http://www.vokabular.org/forum/index.php?topic=3817.15>
local except = {
["konjug"] = "конјуг",
["konjunk"] = "конјунк",
["injekc"] = "инјекц",
["injunkt"] = "инјункт",
["panjelin"] = "панјелин",
["tanjug"] = "танјуг",
["vanjezič"] = "ванјезич",
["vanjadransk"] = "ванјадранск",
["nadžanj"] = "наджањ",
["nadždrel"] = "надждрел",
["nadžet"] = "наджет",
["nadživ"] = "наджив",
["nadžnj"] = "наджњ",
["nadžup"] = "наджуп",
["odžal"] = "оджал",
["odžar"] = "оджар",
["odživ"] = "оджив",
["odžubor"] = "оджубор",
["odžur"] = "оджур",
["odžvak"] = "оджвак",
["podžanr"] = "поджанр",
["podže"] = "подже", -- “поджећи”
}
local P = lpeg.P
local utf8 = unicode and unicode.utf8 or utf or utf8
local sub = utf8.sub
local toupper = lpeg.patterns.toupper
local upper = function (s) return lpegmatch (toupper, s) end
local p_tocy, p_i_tocy, p_tolt, p_i_tolt
for left, right in next, except do -- generating exception patterns for both sides
local Left = upper(sub(left, 1, 1)) .. sub(left, 2)
local Right = upper(sub(right, 1, 1)) .. sub(right, 2)
local LEFT, RIGHT = upper(left), upper(right)
local p_i_left = P(left) / right + P(Left) / Right + P(LEFT) / RIGHT
local p_i_right = P(right) / left + P(Right) / Left + P(RIGHT) / LEFT
local p_left = P" " * p_i_left
local p_right = P" " * p_i_right
if not p_tocy then
p_tocy = p_left
p_i_tocy = p_i_left
p_tolt = p_right
p_i_tolt = p_i_right
else
p_tocy = p_tocy + p_left
p_i_tocy = p_i_tocy + p_i_left
p_tolt = p_tolt + p_right
p_i_tolt = p_i_tolt + p_i_right
end
end
local _p_hintchar = P"*" / ""
local hintme = "dln"
local _p_tocy_hint, _p_tolt_hint
for left in hintme:utfcharacters() do
local right = translit.sr_tocy_lower[left]
local LEFT, RIGHT = upper(left), upper(right)
if not _p_tocy_hint then
_p_tocy_hint = P(left) / right + P(LEFT) / RIGHT
_p_tolt_hint = P(right) / left + P(RIGHT) / LEFT
else
_p_tocy_hint = _p_tocy_hint + P(left) / right + P(LEFT) / RIGHT
_p_tolt_hint = _p_tolt_hint + P(right) / left + P(RIGHT) / LEFT
end
end
translit.serbian_exceptions = { }
translit.serbian_exceptions.p_tocy = p_tocy
translit.serbian_exceptions.p_tolt = p_tolt
translit.serbian_exceptions.p_tocy_init = p_i_tocy
translit.serbian_exceptions.p_tolt_init = p_i_tolt
translit.serbian_exceptions.p_tocy_hint = _p_tocy_hint * _p_hintchar
translit.serbian_exceptions.p_tolt_hint = _p_tolt_hint * _p_hintchar
translit.done_serbian = true
end
--===========================================================================--
-- End Of Tables --
--===========================================================================--
local t = translit
local function sr (mode)
local P, R, Cs = lpeg.P, lpeg.R, lpeg.Cs
local utfchar = translit.utfchar
local modestr = "p_" .. mode:match("to..$")
local _p_sre = t.serbian_exceptions[modestr]
local _p_sre_i = t.serbian_exceptions[modestr .. "_init"]
local trl_sr = translit.make_add_dict{}
trl_sr = t[mode.."_upper"] + t[mode.."_lower"]
-- transliteration from latin script requires macro handling …
local _p_macro = P[[\]] * R("az", "AZ")^1 -- assuming standard catcodes
local _p_sr = translit.addrules (trl_sr, _p_sr) / trl_sr
if translit.hinting then
_p_sr = t.serbian_exceptions[modestr .. "_hint"] + _p_sr
end
local p_sr
if translit.sr_except then
p_sr = Cs(_p_sre_i^-1 * (_p_macro + _p_sre + _p_sr + utfchar)^0)
else
p_sr = Cs((_p_macro + _p_sr + utfchar)^0)
end
return p_sr
end
translit.methods["sr_tolt"] = function (text)
local pname = "sr_tolt" .. tostring(translit.hinting) .. tostring(translit.sr_except)
local p = pcache[pname]
if not p then
p = sr("sr_tolt")
pcache[pname] = p
end
return lpegmatch(p, text)
end
translit.methods["sr_tocy"] = function (text)
local pname = "sr_tocy" .. tostring(translit.hinting) .. tostring(translit.sr_except)
local p = pcache[pname]
if not p then
p = sr("sr_tocy")
pcache[pname] = p
end
return lpegmatch(p, text)
end
-- vim:ft=lua:sw=4:ts=4
|