Skip to content

Commit 07bcf05

Browse files
Issue #28563: Fixed possible DoS and arbitrary code execution when handle
plural form selections in the gettext module. The expression parser now supports exact syntax supported by GNU gettext.
1 parent d751040 commit 07bcf05

File tree

3 files changed

+216
-45
lines changed

3 files changed

+216
-45
lines changed

‎Lib/gettext.py

+128-44
Original file line numberDiff line numberDiff line change
@@ -57,55 +57,139 @@
5757

5858
_default_localedir = os.path.join(sys.base_prefix, 'share', 'locale')
5959

60+
# Expression parsing for plural form selection.
61+
#
62+
# The gettext library supports a small subset of C syntax. The only
63+
# incompatible difference is that integer literals starting with zero are
64+
# decimal.
65+
#
66+
# https://www.gnu.org/software/gettext/manual/gettext.html#Plural-forms
67+
# http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/plural.y
68+
69+
_token_pattern = re.compile(r"""
70+
(?P<WHITESPACES>[ \t]+) | # spaces and horizontal tabs
71+
(?P<NUMBER>[0-9]+\b) | # decimal integer
72+
(?P<NAME>n\b) | # only n is allowed
73+
(?P<PARENTHESIS>[()]) |
74+
(?P<OPERATOR>[-*/%+?:]|[><!]=?|==|&&|\|\|) | # !, *, /, %, +, -, <, >,
75+
# <=, >=, ==, !=, &&, ||,
76+
# ? :
77+
# unary and bitwise ops
78+
# not allowed
79+
(?P<INVALID>\w+|.) # invalid token
80+
""", re.VERBOSE|re.DOTALL)
81+
82+
def _tokenize(plural):
83+
for mo in re.finditer(_token_pattern, plural):
84+
kind = mo.lastgroup
85+
if kind == 'WHITESPACES':
86+
continue
87+
value = mo.group(kind)
88+
if kind == 'INVALID':
89+
raise ValueError('invalid token in plural form: %s' % value)
90+
yield value
91+
yield ''
92+
93+
def _error(value):
94+
if value:
95+
return ValueError('unexpected token in plural form: %s' % value)
96+
else:
97+
return ValueError('unexpected end of plural form')
98+
99+
_binary_ops = (
100+
('||',),
101+
('&&',),
102+
('==', '!='),
103+
('<', '>', '<=', '>='),
104+
('+', '-'),
105+
('*', '/', '%'),
106+
)
107+
_binary_ops = {op: i for i, ops in enumerate(_binary_ops, 1) for op in ops}
108+
_c2py_ops = {'||': 'or', '&&': 'and', '/': '//'}
109+
110+
def _parse(tokens, priority=-1):
111+
result = ''
112+
nexttok = next(tokens)
113+
while nexttok == '!':
114+
result += 'not '
115+
nexttok = next(tokens)
116+
117+
if nexttok == '(':
118+
sub, nexttok = _parse(tokens)
119+
result = '%s(%s)' % (result, sub)
120+
if nexttok != ')':
121+
raise ValueError('unbalanced parenthesis in plural form')
122+
elif nexttok == 'n':
123+
result = '%s%s' % (result, nexttok)
124+
else:
125+
try:
126+
value = int(nexttok, 10)
127+
except ValueError:
128+
raise _error(nexttok) from None
129+
result = '%s%d' % (result, value)
130+
nexttok = next(tokens)
131+
132+
j = 100
133+
while nexttok in _binary_ops:
134+
i = _binary_ops[nexttok]
135+
if i < priority:
136+
break
137+
# Break chained comparisons
138+
if i in (3, 4) and j in (3, 4): # '==', '!=', '<', '>', '<=', '>='
139+
result = '(%s)' % result
140+
# Replace some C operators by their Python equivalents
141+
op = _c2py_ops.get(nexttok, nexttok)
142+
right, nexttok = _parse(tokens, i + 1)
143+
result = '%s %s %s' % (result, op, right)
144+
j = i
145+
if j == priority == 4: # '<', '>', '<=', '>='
146+
result = '(%s)' % result
147+
148+
if nexttok == '?' and priority <= 0:
149+
if_true, nexttok = _parse(tokens, 0)
150+
if nexttok != ':':
151+
raise _error(nexttok)
152+
if_false, nexttok = _parse(tokens)
153+
result = '%s if %s else %s' % (if_true, result, if_false)
154+
if priority == 0:
155+
result = '(%s)' % result
156+
157+
return result, nexttok
60158

61159
def c2py(plural):
62160
"""Gets a C expression as used in PO files for plural forms and returns a
63-
Python lambda function that implements an equivalent expression.
161+
Python function that implements an equivalent expression.
64162
"""
65-
# Security check, allow only the "n" identifier
66-
import token, tokenize
67-
tokens = tokenize.generate_tokens(io.StringIO(plural).readline)
68-
try:
69-
danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
70-
except tokenize.TokenError:
71-
raise ValueError('plural forms expression error, maybe unbalanced parenthesis')
72-
else:
73-
if danger:
74-
raise ValueError('plural forms expression could be dangerous')
75-
76-
# Replace some C operators by their Python equivalents
77-
plural = plural.replace('&&', ' and ')
78-
plural = plural.replace('||', ' or ')
79-
80-
expr = re.compile(r'\!([^=])')
81-
plural = expr.sub(' not \\1', plural)
82-
83-
# Regular expression and replacement function used to transform
84-
# "a?b:c" to "b if a else c".
85-
expr = re.compile(r'(.*?)\?(.*?):(.*)')
86-
def repl(x):
87-
return "(%s if %s else %s)" % (x.group(2), x.group(1),
88-
expr.sub(repl, x.group(3)))
89-
90-
# Code to transform the plural expression, taking care of parentheses
91-
stack = ['']
92-
for c in plural:
93-
if c == '(':
94-
stack.append('')
95-
elif c == ')':
96-
if len(stack) == 1:
97-
# Actually, we never reach this code, because unbalanced
98-
# parentheses get caught in the security check at the
99-
# beginning.
100-
raise ValueError('unbalanced parenthesis in plural form')
101-
s = expr.sub(repl, stack.pop())
102-
stack[-1] += '(%s)' % s
103-
else:
104-
stack[-1] += c
105-
plural = expr.sub(repl, stack.pop())
106-
107-
return eval('lambda n: int(%s)' % plural)
108163

164+
if len(plural) > 1000:
165+
raise ValueError('plural form expression is too long')
166+
try:
167+
result, nexttok = _parse(_tokenize(plural))
168+
if nexttok:
169+
raise _error(nexttok)
170+
171+
depth = 0
172+
for c in result:
173+
if c == '(':
174+
depth += 1
175+
if depth > 20:
176+
# Python compiler limit is about 90.
177+
# The most complex example has 2.
178+
raise ValueError('plural form expression is too complex')
179+
elif c == ')':
180+
depth -= 1
181+
182+
ns = {}
183+
exec('''if True:
184+
def func(n):
185+
if not isinstance(n, int):
186+
raise ValueError('Plural value must be an integer.')
187+
return int(%s)
188+
''' % result, ns)
189+
return ns['func']
190+
except RuntimeError:
191+
# Recursion error can be raised in _parse() or exec().
192+
raise ValueError('plural form expression is too complex')
109193

110194

111195
def _expand_lang(loc):

‎Lib/test/test_gettext.py

+84-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,9 @@ def test_plural_forms2(self):
230230
x = t.ngettext('There is %s file', 'There are %s files', 2)
231231
eq(x, 'Hay %s ficheros')
232232

233-
def test_hu(self):
233+
# Examples from http://www.gnu.org/software/gettext/manual/gettext.html
234+
235+
def test_ja(self):
234236
eq = self.assertEqual
235237
f = gettext.c2py('0')
236238
s = ''.join([ str(f(x)) for x in range(200) ])
@@ -248,6 +250,12 @@ def test_fr(self):
248250
s = ''.join([ str(f(x)) for x in range(200) ])
249251
eq(s, "00111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")
250252

253+
def test_lv(self):
254+
eq = self.assertEqual
255+
f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2')
256+
s = ''.join([ str(f(x)) for x in range(200) ])
257+
eq(s, "20111111111111111111101111111110111111111011111111101111111110111111111011111111101111111110111111111011111111111111111110111111111011111111101111111110111111111011111111101111111110111111111011111111")
258+
251259
def test_gd(self):
252260
eq = self.assertEqual
253261
f = gettext.c2py('n==1 ? 0 : n==2 ? 1 : 2')
@@ -261,6 +269,12 @@ def test_gd2(self):
261269
s = ''.join([ str(f(x)) for x in range(200) ])
262270
eq(s, "20122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222")
263271

272+
def test_ro(self):
273+
eq = self.assertEqual
274+
f = gettext.c2py('n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2')
275+
s = ''.join([ str(f(x)) for x in range(200) ])
276+
eq(s, "10111111111111111111222222222222222222222222222222222222222222222222222222222222222222222222222222222111111111111111111122222222222222222222222222222222222222222222222222222222222222222222222222222222")
277+
264278
def test_lt(self):
265279
eq = self.assertEqual
266280
f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2')
@@ -273,6 +287,12 @@ def test_ru(self):
273287
s = ''.join([ str(f(x)) for x in range(200) ])
274288
eq(s, "20111222222222222222201112222220111222222011122222201112222220111222222011122222201112222220111222222011122222222222222220111222222011122222201112222220111222222011122222201112222220111222222011122222")
275289

290+
def test_cs(self):
291+
eq = self.assertEqual
292+
f = gettext.c2py('(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2')
293+
s = ''.join([ str(f(x)) for x in range(200) ])
294+
eq(s, "20111222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222")
295+
276296
def test_pl(self):
277297
eq = self.assertEqual
278298
f = gettext.c2py('n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2')
@@ -285,10 +305,73 @@ def test_sl(self):
285305
s = ''.join([ str(f(x)) for x in range(200) ])
286306
eq(s, "30122333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333012233333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333")
287307

308+
def test_ar(self):
309+
eq = self.assertEqual
310+
f = gettext.c2py('n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5')
311+
s = ''.join([ str(f(x)) for x in range(200) ])
312+
eq(s, "01233333333444444444444444444444444444444444444444444444444444444444444444444444444444444444444444445553333333344444444444444444444444444444444444444444444444444444444444444444444444444444444444444444")
313+
288314
def test_security(self):
289315
raises = self.assertRaises
290316
# Test for a dangerous expression
291317
raises(ValueError, gettext.c2py, "os.chmod('/etc/passwd',0777)")
318+
# issue28563
319+
raises(ValueError, gettext.c2py, '"(eval(foo) && ""')
320+
raises(ValueError, gettext.c2py, 'f"{os.system(\'sh\')}"')
321+
# Maximum recursion depth exceeded during compilation
322+
raises(ValueError, gettext.c2py, 'n+'*10000 + 'n')
323+
self.assertEqual(gettext.c2py('n+'*100 + 'n')(1), 101)
324+
# MemoryError during compilation
325+
raises(ValueError, gettext.c2py, '('*100 + 'n' + ')'*100)
326+
# Maximum recursion depth exceeded in C to Python translator
327+
raises(ValueError, gettext.c2py, '('*10000 + 'n' + ')'*10000)
328+
self.assertEqual(gettext.c2py('('*20 + 'n' + ')'*20)(1), 1)
329+
330+
def test_chained_comparison(self):
331+
# C doesn't chain comparison as Python so 2 == 2 == 2 gets different results
332+
f = gettext.c2py('n == n == n')
333+
self.assertEqual(''.join(str(f(x)) for x in range(3)), '010')
334+
f = gettext.c2py('1 < n == n')
335+
self.assertEqual(''.join(str(f(x)) for x in range(3)), '100')
336+
f = gettext.c2py('n == n < 2')
337+
self.assertEqual(''.join(str(f(x)) for x in range(3)), '010')
338+
f = gettext.c2py('0 < n < 2')
339+
self.assertEqual(''.join(str(f(x)) for x in range(3)), '111')
340+
341+
def test_decimal_number(self):
342+
self.assertEqual(gettext.c2py('0123')(1), 123)
343+
344+
def test_invalid_syntax(self):
345+
invalid_expressions = [
346+
'x>1', '(n>1', 'n>1)', '42**42**42', '0xa', '1.0', '1e2',
347+
'n>0x1', '+n', '-n', 'n()', 'n(1)', '1+', 'nn', 'n n',
348+
]
349+
for expr in invalid_expressions:
350+
with self.assertRaises(ValueError):
351+
gettext.c2py(expr)
352+
353+
def test_nested_condition_operator(self):
354+
self.assertEqual(gettext.c2py('n?1?2:3:4')(0), 4)
355+
self.assertEqual(gettext.c2py('n?1?2:3:4')(1), 2)
356+
self.assertEqual(gettext.c2py('n?1:3?4:5')(0), 4)
357+
self.assertEqual(gettext.c2py('n?1:3?4:5')(1), 1)
358+
359+
def test_division(self):
360+
f = gettext.c2py('2/n*3')
361+
self.assertEqual(f(1), 6)
362+
self.assertEqual(f(2), 3)
363+
self.assertEqual(f(3), 0)
364+
self.assertEqual(f(-1), -6)
365+
self.assertRaises(ZeroDivisionError, f, 0)
366+
367+
def test_plural_number(self):
368+
f = gettext.c2py('1')
369+
self.assertEqual(f(1), 1)
370+
self.assertRaises(ValueError, f, 1.0)
371+
self.assertRaises(ValueError, f, '1')
372+
self.assertRaises(ValueError, f, [])
373+
self.assertRaises(ValueError, f, object())
374+
292375

293376

294377
class UnicodeTranslationsTest(GettextBaseTest):

‎Misc/NEWS

+4
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ Core and Builtins
3232
Library
3333
-------
3434

35+
- Issue #28563: Fixed possible DoS and arbitrary code execution when handle
36+
plural form selections in the gettext module. The expression parser now
37+
supports exact syntax supported by GNU gettext.
38+
3539
- Issue #27783: Fix possible usage of uninitialized memory in operator.methodcaller.
3640

3741
- Issue #27774: Fix possible Py_DECREF on unowned object in _sre.

0 commit comments

Comments
 (0)