From 1c1534b5f6971bfc2d006672945f0a237ee1c6cd Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Thu, 18 Sep 2014 16:57:07 +0200 Subject: [PATCH] safe_shlex_split: Use real lexer object and handle ' correctly. --- qutebrowser/utils/utils.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/qutebrowser/utils/utils.py b/qutebrowser/utils/utils.py index 70da55176..22835bf0d 100644 --- a/qutebrowser/utils/utils.py +++ b/qutebrowser/utils/utils.py @@ -98,6 +98,16 @@ def dotted_getattr(obj, path): return functools.reduce(getattr, path.split('.'), obj) +def _get_lexer(s): + """Get an shlex lexer for safe_shlex_split.""" + if s is None: + raise TypeError("Refusing to create a lexer with s=None!") + lexer = shlex.shlex(s, posix=True) + lexer.whitespace_split = True + lexer.commenters = '' + return lexer + + def safe_shlex_split(s): r"""Split a string via shlex safely (don't bail out on unbalanced quotes). @@ -113,24 +123,22 @@ def safe_shlex_split(s): We try 3 times so multiple errors can be fixed. """ - if s is None: - raise TypeError("Can't split None!") tokens = None orig_s = s for i in range(3): + lexer = _get_lexer(s) try: - tokens = shlex.split(s) + tokens = list(lexer) except ValueError as e: - if str(e) == "No closing quotation": - # e.g. eggs "bacon ham - # -> we fix this as eggs "bacon ham" - s += '"' - elif str(e) == "No escaped character": - # e.g. eggs\ - # -> we fix this as eggs\\ - s += '\\' - else: + if str(e) not in ("No closing quotation", "No escaped character"): raise + # eggs "bacon ham -> eggs "bacon ham" + # eggs\ -> eggs\\ + if lexer.state not in "\"'\\": + raise AssertionError( + "Lexer state is >{}< while parsing >{}< (attempted fixup: " + ">{}<)".format(lexer.state, orig_s, s)) + s += lexer.state if tokens is None: raise AssertionError("Gave up splitting >{}< after {} tries. " "Attempted fixup: >{}<. This is a bug.".format(