From 9ac2dbcc80330c6090ecdce656046931e8cf591b Mon Sep 17 00:00:00 2001 From: Florian Bruhin <git@the-compiler.org> Date: Tue, 4 Jul 2017 10:16:27 +0200 Subject: [PATCH] Disallow surrogate escapes in dicts and lists in the config In Dict.to_str() and List.to_str() we use json.dump to get a value. However, JSON includes surrogate escapes in the dumped values, which breaks round trips. >>> yaml.load(json.dumps({'\U00010000': True})) {'\ud800\udc00': True} >>> yaml.load(json.dumps({'\U00010000': True}, ensure_ascii=False)) yaml.reader.ReaderError: unacceptable character #x10000: special characters are not allowed See: https://stackoverflow.com/a/38552626/2085149 https://news.ycombinator.com/item?id=12798032 --- qutebrowser/config/configtypes.py | 17 +++++++++++++++++ tests/unit/config/test_configtypes.py | 6 +++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/qutebrowser/config/configtypes.py b/qutebrowser/config/configtypes.py index e266bd38a..ca01cfd60 100644 --- a/qutebrowser/config/configtypes.py +++ b/qutebrowser/config/configtypes.py @@ -181,6 +181,17 @@ class BaseType: raise configexc.ValidationError( value, "may not contain unprintable chars!") + def _validate_surrogate_escapes(self, full_value, value): + """Make sure the given value doesn't contain surrogate escapes. + + This is used for values passed to json.dump, as it can't handle those. + """ + if not isinstance(value, str): + return + if any(ord(c) > 0xFFFF for c in value): + raise configexc.ValidationError( + full_value, "may not contain surrogate escapes!") + def _validate_valid_values(self, value): """Validate value against possible values. @@ -418,6 +429,9 @@ class List(BaseType): if not value: return [] + for val in value: + self._validate_surrogate_escapes(value, val) + if self.length is not None and len(value) != self.length: raise configexc.ValidationError(value, "Exactly {} values need to " "be set!".format(self.length)) @@ -1089,6 +1103,9 @@ class Dict(BaseType): return self._fill_fixed_keys({}) self._validate_keys(value) + for key, val in value.items(): + self._validate_surrogate_escapes(value, key) + self._validate_surrogate_escapes(value, val) d = {self.keytype.to_py(key): self.valtype.to_py(val) for key, val in value.items()} diff --git a/tests/unit/config/test_configtypes.py b/tests/unit/config/test_configtypes.py index 05f0f1922..9aa91b372 100644 --- a/tests/unit/config/test_configtypes.py +++ b/tests/unit/config/test_configtypes.py @@ -429,6 +429,8 @@ class TestString: ({'minlen': 2, 'maxlen': 3}, 'abc'), # valid_values ({'valid_values': configtypes.ValidValues('abcd')}, 'abcd'), + # Surrogate escapes are allowed in strings + ({}, '\U00010000'), ]) def test_to_py(self, klass, kwargs, val): assert klass(**kwargs).to_py(val) == val @@ -535,7 +537,7 @@ class TestList: def test_to_py(self, klass, val): assert klass().to_py(val) == val - @pytest.mark.parametrize('val', [[42], '["foo"]']) + @pytest.mark.parametrize('val', [[42], '["foo"]', ['\U00010000']]) def test_to_py_invalid(self, klass, val): with pytest.raises(configexc.ValidationError): klass().to_py(val) @@ -1427,6 +1429,8 @@ class TestDict: assert klass(keytype=keytype, valtype=valtype).to_py(val) == val @pytest.mark.parametrize('val', [ + {'\U00010000': 'foo'}, # UTF-16 surrogate in key + {'foo': '\U00010000'}, # UTF-16 surrogate in value {0: 'foo'}, # Invalid key type {'foo': 0}, # Invalid value type ])