Add a keep-mode to shlexer.

This commit is contained in:
Florian Bruhin 2014-11-05 07:41:17 +01:00
parent 9ed466b536
commit 22da17bbca
2 changed files with 36 additions and 4 deletions

View File

@ -112,3 +112,12 @@ class SplitTests(unittest.TestCase):
with self.subTest(cmd=cmd): with self.subTest(cmd=cmd):
items = split.split(cmd) items = split.split(cmd)
self.assertEqual(items, out) self.assertEqual(items, out)
def test_split_keep(self):
"""Test splitting with keep=True."""
for case in test_data.strip().splitlines():
cmd, *_out = case.split('|')[:-1]
cmd = cmd.replace(r'\n', '\n')
with self.subTest(cmd=cmd):
items = split.split(cmd, keep=True)
self.assertEqual(''.join(items), cmd)

View File

@ -41,6 +41,7 @@ class ShellLexer:
self.escapedquotes = '"' self.escapedquotes = '"'
self.state = ' ' self.state = ' '
self.token = '' self.token = ''
self.keep = False
def read_token(self): def read_token(self):
"""Read a raw token from the input stream.""" """Read a raw token from the input stream."""
@ -63,15 +64,21 @@ class ShellLexer:
break break
elif nextchar in self.whitespace: elif nextchar in self.whitespace:
log.shlexer.vdebug("I see whitespace in whitespace state") log.shlexer.vdebug("I see whitespace in whitespace state")
if self.keep:
self.token += nextchar
if self.token or quoted: if self.token or quoted:
# emit current token # emit current token
break break
else: else:
continue continue
elif nextchar in self.escape: elif nextchar in self.escape:
if self.keep:
self.token += nextchar
escapedstate = 'a' escapedstate = 'a'
self.state = nextchar self.state = nextchar
elif nextchar in self.quotes: elif nextchar in self.quotes:
if self.keep:
self.token += nextchar
self.state = nextchar self.state = nextchar
else: else:
self.token = nextchar self.token = nextchar
@ -83,9 +90,13 @@ class ShellLexer:
self.state = None self.state = None
break break
if nextchar == self.state: if nextchar == self.state:
if self.keep:
self.token += nextchar
self.state = 'a' self.state = 'a'
elif (nextchar in self.escape and elif (nextchar in self.escape and
self.state in self.escapedquotes): self.state in self.escapedquotes):
if self.keep:
self.token += nextchar
escapedstate = self.state escapedstate = self.state
self.state = nextchar self.state = nextchar
else: else:
@ -93,13 +104,14 @@ class ShellLexer:
elif self.state in self.escape: elif self.state in self.escape:
if nextchar is None: if nextchar is None:
log.shlexer.vdebug("I see EOF in escape state") log.shlexer.vdebug("I see EOF in escape state")
self.token += self.state if not self.keep:
self.token += self.state
self.state = None self.state = None
break break
# In posix shells, only the quote itself or the escape # In posix shells, only the quote itself or the escape
# character may be escaped within quotes. # character may be escaped within quotes.
if (escapedstate in self.quotes and nextchar != self.state and if (escapedstate in self.quotes and nextchar != self.state and
nextchar != escapedstate): nextchar != escapedstate and not self.keep):
self.token += self.state self.token += self.state
self.token += nextchar self.token += nextchar
self.state = escapedstate self.state = escapedstate
@ -110,13 +122,19 @@ class ShellLexer:
elif nextchar in self.whitespace: elif nextchar in self.whitespace:
log.shlexer.vdebug("shlex: I see whitespace in word state") log.shlexer.vdebug("shlex: I see whitespace in word state")
self.state = ' ' self.state = ' '
if self.keep:
self.token += nextchar
if self.token or quoted: if self.token or quoted:
break # emit current token break # emit current token
else: else:
continue continue
elif nextchar in self.quotes: elif nextchar in self.quotes:
if self.keep:
self.token += nextchar
self.state = nextchar self.state = nextchar
elif nextchar in self.escape: elif nextchar in self.escape:
if self.keep:
self.token += nextchar
escapedstate = 'a' escapedstate = 'a'
self.state = nextchar self.state = nextchar
else: else:
@ -138,7 +156,12 @@ class ShellLexer:
return token return token
def split(s): def split(s, keep=False):
"""Split a string via ShellLexer.""" """Split a string via ShellLexer.
Args:
keep: Whether to keep are special chars in the split output.
"""
lexer = ShellLexer(s) lexer = ShellLexer(s)
lexer.keep = keep
return list(lexer) return list(lexer)