Handle safe parsing directly in ShellLexer

This commit is contained in:
Florian Bruhin 2014-11-04 20:24:42 +01:00
parent 95f7940996
commit 409eba76ca

View File

@ -77,8 +77,8 @@ class ShellLexer:
quoted = True quoted = True
if not nextchar: # end of file if not nextchar: # end of file
log.shlexer.vdebug("I see EOF in quotes state") log.shlexer.vdebug("I see EOF in quotes state")
# XXX what error should be raised here? self.state = None
raise ValueError("No closing quotation") break
if nextchar == self.state: if nextchar == self.state:
self.state = 'a' self.state = 'a'
elif (nextchar in self.escape and elif (nextchar in self.escape and
@ -90,8 +90,9 @@ class ShellLexer:
elif self.state in self.escape: elif self.state in self.escape:
if not nextchar: # end of file if not nextchar: # end of file
log.shlexer.vdebug("I see EOF in escape state") log.shlexer.vdebug("I see EOF in escape state")
# XXX what error should be raised here? self.token += self.state
raise ValueError("No escaped character") self.state = None
break
# In posix shells, only the quote itself or the escape # In posix shells, only the quote itself or the escape
# character may be escaped within quotes. # character may be escaped within quotes.
if (escapedstate in self.quotes and nextchar != self.state and if (escapedstate in self.quotes and nextchar != self.state and
@ -135,38 +136,6 @@ class ShellLexer:
def split(s): def split(s):
r"""Split a string via shlex safely (don't bail out on unbalanced quotes). """Split a string via ShellLexer."""
lexer = ShellLexer(s)
We split while the user is typing (for completion), and as return list(lexer)
soon as ", ' or \ is typed, the string is invalid for shlex,
because it encounters EOF while in quote/escape state.
Here we fix this error temporarily so shlex doesn't blow up,
and then retry splitting again.
Since shlex raises ValueError in both cases we unfortunately
have to parse the exception string...
We try 3 times so multiple errors can be fixed.
"""
orig_s = s
for i in range(3):
lexer = ShellLexer(s)
try:
tokens = list(lexer)
except ValueError as e:
if str(e) not in ("No closing quotation", "No escaped character"):
raise
# eggs "bacon ham -> eggs "bacon ham"
# eggs\ -> eggs\\
if lexer.state not in lexer.escape + lexer.quotes:
raise AssertionError(
"Lexer state is >{}< while parsing >{}< (attempted fixup: "
">{}<)".format(lexer.state, orig_s, s))
s += lexer.state
else:
return tokens
# We should never arrive here.
raise AssertionError(
"Gave up splitting >{}< after {} tries. Attempted fixup: >{}<.".format(
orig_s, i, s)) # pylint: disable=undefined-loop-variable