Disallow surrogate escapes in dicts and lists in the config
In Dict.to_str() and List.to_str() we use json.dump to get a value. However, JSON includes surrogate escapes in the dumped values, which breaks round trips. >>> yaml.load(json.dumps({'\U00010000': True})) {'\ud800\udc00': True} >>> yaml.load(json.dumps({'\U00010000': True}, ensure_ascii=False)) yaml.reader.ReaderError: unacceptable character #x10000: special characters are not allowed See: https://stackoverflow.com/a/38552626/2085149 https://news.ycombinator.com/item?id=12798032
This commit is contained in:
parent
fa0f4e1101
commit
9ac2dbcc80
@ -181,6 +181,17 @@ class BaseType:
|
|||||||
raise configexc.ValidationError(
|
raise configexc.ValidationError(
|
||||||
value, "may not contain unprintable chars!")
|
value, "may not contain unprintable chars!")
|
||||||
|
|
||||||
|
def _validate_surrogate_escapes(self, full_value, value):
|
||||||
|
"""Make sure the given value doesn't contain surrogate escapes.
|
||||||
|
|
||||||
|
This is used for values passed to json.dump, as it can't handle those.
|
||||||
|
"""
|
||||||
|
if not isinstance(value, str):
|
||||||
|
return
|
||||||
|
if any(ord(c) > 0xFFFF for c in value):
|
||||||
|
raise configexc.ValidationError(
|
||||||
|
full_value, "may not contain surrogate escapes!")
|
||||||
|
|
||||||
def _validate_valid_values(self, value):
|
def _validate_valid_values(self, value):
|
||||||
"""Validate value against possible values.
|
"""Validate value against possible values.
|
||||||
|
|
||||||
@ -418,6 +429,9 @@ class List(BaseType):
|
|||||||
if not value:
|
if not value:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
for val in value:
|
||||||
|
self._validate_surrogate_escapes(value, val)
|
||||||
|
|
||||||
if self.length is not None and len(value) != self.length:
|
if self.length is not None and len(value) != self.length:
|
||||||
raise configexc.ValidationError(value, "Exactly {} values need to "
|
raise configexc.ValidationError(value, "Exactly {} values need to "
|
||||||
"be set!".format(self.length))
|
"be set!".format(self.length))
|
||||||
@ -1089,6 +1103,9 @@ class Dict(BaseType):
|
|||||||
return self._fill_fixed_keys({})
|
return self._fill_fixed_keys({})
|
||||||
|
|
||||||
self._validate_keys(value)
|
self._validate_keys(value)
|
||||||
|
for key, val in value.items():
|
||||||
|
self._validate_surrogate_escapes(value, key)
|
||||||
|
self._validate_surrogate_escapes(value, val)
|
||||||
|
|
||||||
d = {self.keytype.to_py(key): self.valtype.to_py(val)
|
d = {self.keytype.to_py(key): self.valtype.to_py(val)
|
||||||
for key, val in value.items()}
|
for key, val in value.items()}
|
||||||
|
@ -429,6 +429,8 @@ class TestString:
|
|||||||
({'minlen': 2, 'maxlen': 3}, 'abc'),
|
({'minlen': 2, 'maxlen': 3}, 'abc'),
|
||||||
# valid_values
|
# valid_values
|
||||||
({'valid_values': configtypes.ValidValues('abcd')}, 'abcd'),
|
({'valid_values': configtypes.ValidValues('abcd')}, 'abcd'),
|
||||||
|
# Surrogate escapes are allowed in strings
|
||||||
|
({}, '\U00010000'),
|
||||||
])
|
])
|
||||||
def test_to_py(self, klass, kwargs, val):
|
def test_to_py(self, klass, kwargs, val):
|
||||||
assert klass(**kwargs).to_py(val) == val
|
assert klass(**kwargs).to_py(val) == val
|
||||||
@ -535,7 +537,7 @@ class TestList:
|
|||||||
def test_to_py(self, klass, val):
|
def test_to_py(self, klass, val):
|
||||||
assert klass().to_py(val) == val
|
assert klass().to_py(val) == val
|
||||||
|
|
||||||
@pytest.mark.parametrize('val', [[42], '["foo"]'])
|
@pytest.mark.parametrize('val', [[42], '["foo"]', ['\U00010000']])
|
||||||
def test_to_py_invalid(self, klass, val):
|
def test_to_py_invalid(self, klass, val):
|
||||||
with pytest.raises(configexc.ValidationError):
|
with pytest.raises(configexc.ValidationError):
|
||||||
klass().to_py(val)
|
klass().to_py(val)
|
||||||
@ -1427,6 +1429,8 @@ class TestDict:
|
|||||||
assert klass(keytype=keytype, valtype=valtype).to_py(val) == val
|
assert klass(keytype=keytype, valtype=valtype).to_py(val) == val
|
||||||
|
|
||||||
@pytest.mark.parametrize('val', [
|
@pytest.mark.parametrize('val', [
|
||||||
|
{'\U00010000': 'foo'}, # UTF-16 surrogate in key
|
||||||
|
{'foo': '\U00010000'}, # UTF-16 surrogate in value
|
||||||
{0: 'foo'}, # Invalid key type
|
{0: 'foo'}, # Invalid key type
|
||||||
{'foo': 0}, # Invalid value type
|
{'foo': 0}, # Invalid value type
|
||||||
])
|
])
|
||||||
|
Loading…
Reference in New Issue
Block a user