Add a keep-mode to shlexer.
This commit is contained in:
parent
9ed466b536
commit
22da17bbca
@ -112,3 +112,12 @@ class SplitTests(unittest.TestCase):
|
|||||||
with self.subTest(cmd=cmd):
|
with self.subTest(cmd=cmd):
|
||||||
items = split.split(cmd)
|
items = split.split(cmd)
|
||||||
self.assertEqual(items, out)
|
self.assertEqual(items, out)
|
||||||
|
|
||||||
|
def test_split_keep(self):
|
||||||
|
"""Test splitting with keep=True."""
|
||||||
|
for case in test_data.strip().splitlines():
|
||||||
|
cmd, *_out = case.split('|')[:-1]
|
||||||
|
cmd = cmd.replace(r'\n', '\n')
|
||||||
|
with self.subTest(cmd=cmd):
|
||||||
|
items = split.split(cmd, keep=True)
|
||||||
|
self.assertEqual(''.join(items), cmd)
|
||||||
|
@ -41,6 +41,7 @@ class ShellLexer:
|
|||||||
self.escapedquotes = '"'
|
self.escapedquotes = '"'
|
||||||
self.state = ' '
|
self.state = ' '
|
||||||
self.token = ''
|
self.token = ''
|
||||||
|
self.keep = False
|
||||||
|
|
||||||
def read_token(self):
|
def read_token(self):
|
||||||
"""Read a raw token from the input stream."""
|
"""Read a raw token from the input stream."""
|
||||||
@ -63,15 +64,21 @@ class ShellLexer:
|
|||||||
break
|
break
|
||||||
elif nextchar in self.whitespace:
|
elif nextchar in self.whitespace:
|
||||||
log.shlexer.vdebug("I see whitespace in whitespace state")
|
log.shlexer.vdebug("I see whitespace in whitespace state")
|
||||||
|
if self.keep:
|
||||||
|
self.token += nextchar
|
||||||
if self.token or quoted:
|
if self.token or quoted:
|
||||||
# emit current token
|
# emit current token
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
elif nextchar in self.escape:
|
elif nextchar in self.escape:
|
||||||
|
if self.keep:
|
||||||
|
self.token += nextchar
|
||||||
escapedstate = 'a'
|
escapedstate = 'a'
|
||||||
self.state = nextchar
|
self.state = nextchar
|
||||||
elif nextchar in self.quotes:
|
elif nextchar in self.quotes:
|
||||||
|
if self.keep:
|
||||||
|
self.token += nextchar
|
||||||
self.state = nextchar
|
self.state = nextchar
|
||||||
else:
|
else:
|
||||||
self.token = nextchar
|
self.token = nextchar
|
||||||
@ -83,9 +90,13 @@ class ShellLexer:
|
|||||||
self.state = None
|
self.state = None
|
||||||
break
|
break
|
||||||
if nextchar == self.state:
|
if nextchar == self.state:
|
||||||
|
if self.keep:
|
||||||
|
self.token += nextchar
|
||||||
self.state = 'a'
|
self.state = 'a'
|
||||||
elif (nextchar in self.escape and
|
elif (nextchar in self.escape and
|
||||||
self.state in self.escapedquotes):
|
self.state in self.escapedquotes):
|
||||||
|
if self.keep:
|
||||||
|
self.token += nextchar
|
||||||
escapedstate = self.state
|
escapedstate = self.state
|
||||||
self.state = nextchar
|
self.state = nextchar
|
||||||
else:
|
else:
|
||||||
@ -93,13 +104,14 @@ class ShellLexer:
|
|||||||
elif self.state in self.escape:
|
elif self.state in self.escape:
|
||||||
if nextchar is None:
|
if nextchar is None:
|
||||||
log.shlexer.vdebug("I see EOF in escape state")
|
log.shlexer.vdebug("I see EOF in escape state")
|
||||||
self.token += self.state
|
if not self.keep:
|
||||||
|
self.token += self.state
|
||||||
self.state = None
|
self.state = None
|
||||||
break
|
break
|
||||||
# In posix shells, only the quote itself or the escape
|
# In posix shells, only the quote itself or the escape
|
||||||
# character may be escaped within quotes.
|
# character may be escaped within quotes.
|
||||||
if (escapedstate in self.quotes and nextchar != self.state and
|
if (escapedstate in self.quotes and nextchar != self.state and
|
||||||
nextchar != escapedstate):
|
nextchar != escapedstate and not self.keep):
|
||||||
self.token += self.state
|
self.token += self.state
|
||||||
self.token += nextchar
|
self.token += nextchar
|
||||||
self.state = escapedstate
|
self.state = escapedstate
|
||||||
@ -110,13 +122,19 @@ class ShellLexer:
|
|||||||
elif nextchar in self.whitespace:
|
elif nextchar in self.whitespace:
|
||||||
log.shlexer.vdebug("shlex: I see whitespace in word state")
|
log.shlexer.vdebug("shlex: I see whitespace in word state")
|
||||||
self.state = ' '
|
self.state = ' '
|
||||||
|
if self.keep:
|
||||||
|
self.token += nextchar
|
||||||
if self.token or quoted:
|
if self.token or quoted:
|
||||||
break # emit current token
|
break # emit current token
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
elif nextchar in self.quotes:
|
elif nextchar in self.quotes:
|
||||||
|
if self.keep:
|
||||||
|
self.token += nextchar
|
||||||
self.state = nextchar
|
self.state = nextchar
|
||||||
elif nextchar in self.escape:
|
elif nextchar in self.escape:
|
||||||
|
if self.keep:
|
||||||
|
self.token += nextchar
|
||||||
escapedstate = 'a'
|
escapedstate = 'a'
|
||||||
self.state = nextchar
|
self.state = nextchar
|
||||||
else:
|
else:
|
||||||
@ -138,7 +156,12 @@ class ShellLexer:
|
|||||||
return token
|
return token
|
||||||
|
|
||||||
|
|
||||||
def split(s):
|
def split(s, keep=False):
|
||||||
"""Split a string via ShellLexer."""
|
"""Split a string via ShellLexer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
keep: Whether to keep are special chars in the split output.
|
||||||
|
"""
|
||||||
lexer = ShellLexer(s)
|
lexer = ShellLexer(s)
|
||||||
|
lexer.keep = keep
|
||||||
return list(lexer)
|
return list(lexer)
|
||||||
|
Loading…
Reference in New Issue
Block a user