Use yield
This commit is contained in:
parent
9591e86054
commit
cb24ec58cc
@ -41,108 +41,101 @@ class ShellLexer:
|
|||||||
self.escapedquotes = '"'
|
self.escapedquotes = '"'
|
||||||
self.keep = False
|
self.keep = False
|
||||||
|
|
||||||
def read_token(self):
|
def reset(self):
|
||||||
|
self.quoted = False
|
||||||
|
self.escapedstate = ' '
|
||||||
|
self.token = ''
|
||||||
|
self.state = ' '
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
"""Read a raw token from the input stream."""
|
"""Read a raw token from the input stream."""
|
||||||
quoted = False
|
self.quoted = False
|
||||||
escapedstate = ' '
|
self.escapedstate = ' '
|
||||||
token = ''
|
self.token = ''
|
||||||
state = ' '
|
self.state = ' '
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
nextchar = next(self.iterator)
|
nextchar = next(self.iterator)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
nextchar = None
|
nextchar = None
|
||||||
log.shlexer.vdebug("in state {!r} I see character: {!r}".format(
|
log.shlexer.vdebug("in state {!r} I see character: {!r}".format(
|
||||||
state, nextchar))
|
self.state, nextchar))
|
||||||
if state == ' ':
|
|
||||||
if nextchar is None:
|
if nextchar is None:
|
||||||
break
|
if self.state in self.escape and not self.keep:
|
||||||
elif nextchar in self.whitespace:
|
self.token += self.state
|
||||||
|
if self.token or self.quoted:
|
||||||
|
yield self.token
|
||||||
|
return
|
||||||
|
elif self.state == ' ':
|
||||||
|
if nextchar in self.whitespace:
|
||||||
log.shlexer.vdebug("I see whitespace in whitespace state")
|
log.shlexer.vdebug("I see whitespace in whitespace state")
|
||||||
if self.keep:
|
if self.keep:
|
||||||
token += nextchar
|
self.token += nextchar
|
||||||
if token or quoted:
|
if self.token or self.quoted:
|
||||||
# emit current token
|
# emit current token
|
||||||
break
|
yield self.token
|
||||||
|
self.reset()
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
elif nextchar in self.escape:
|
elif nextchar in self.escape:
|
||||||
if self.keep:
|
if self.keep:
|
||||||
token += nextchar
|
self.token += nextchar
|
||||||
escapedstate = 'a'
|
self.escapedstate = 'a'
|
||||||
state = nextchar
|
self.state = nextchar
|
||||||
elif nextchar in self.quotes:
|
elif nextchar in self.quotes:
|
||||||
if self.keep:
|
if self.keep:
|
||||||
token += nextchar
|
self.token += nextchar
|
||||||
state = nextchar
|
self.state = nextchar
|
||||||
else:
|
else:
|
||||||
token = nextchar
|
self.token = nextchar
|
||||||
state = 'a'
|
self.state = 'a'
|
||||||
elif state in self.quotes:
|
elif self.state in self.quotes:
|
||||||
quoted = True
|
self.quoted = True
|
||||||
if nextchar is None:
|
if nextchar == self.state:
|
||||||
log.shlexer.vdebug("I see EOF in quotes state")
|
|
||||||
break
|
|
||||||
if nextchar == state:
|
|
||||||
if self.keep:
|
if self.keep:
|
||||||
token += nextchar
|
self.token += nextchar
|
||||||
state = 'a'
|
self.state = 'a'
|
||||||
elif (nextchar in self.escape and
|
elif (nextchar in self.escape and
|
||||||
state in self.escapedquotes):
|
self.state in self.escapedquotes):
|
||||||
if self.keep:
|
if self.keep:
|
||||||
token += nextchar
|
self.token += nextchar
|
||||||
escapedstate = state
|
self.escapedstate = self.state
|
||||||
state = nextchar
|
self.state = nextchar
|
||||||
else:
|
else:
|
||||||
token += nextchar
|
self.token += nextchar
|
||||||
elif state in self.escape:
|
elif self.state in self.escape:
|
||||||
if nextchar is None:
|
|
||||||
log.shlexer.vdebug("I see EOF in escape state")
|
|
||||||
if not self.keep:
|
|
||||||
token += state
|
|
||||||
break
|
|
||||||
# In posix shells, only the quote itself or the escape
|
# In posix shells, only the quote itself or the escape
|
||||||
# character may be escaped within quotes.
|
# character may be escaped within quotes.
|
||||||
if (escapedstate in self.quotes and nextchar != state and
|
if (self.escapedstate in self.quotes and
|
||||||
nextchar != escapedstate and not self.keep):
|
nextchar != self.state and
|
||||||
token += state
|
nextchar != self.escapedstate and not self.keep):
|
||||||
token += nextchar
|
self.token += self.state
|
||||||
state = escapedstate
|
self.token += nextchar
|
||||||
elif state == 'a':
|
self.state = self.escapedstate
|
||||||
if nextchar is None:
|
elif self.state == 'a':
|
||||||
break
|
if nextchar in self.whitespace:
|
||||||
elif nextchar in self.whitespace:
|
|
||||||
log.shlexer.vdebug("shlex: I see whitespace in word state")
|
log.shlexer.vdebug("shlex: I see whitespace in word state")
|
||||||
state = ' '
|
self.state = ' '
|
||||||
if self.keep:
|
if self.keep:
|
||||||
token += nextchar
|
self.token += nextchar
|
||||||
if token or quoted:
|
if self.token or self.quoted:
|
||||||
break # emit current token
|
yield self.token
|
||||||
|
self.reset()
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
elif nextchar in self.quotes:
|
elif nextchar in self.quotes:
|
||||||
if self.keep:
|
if self.keep:
|
||||||
token += nextchar
|
self.token += nextchar
|
||||||
state = nextchar
|
self.state = nextchar
|
||||||
elif nextchar in self.escape:
|
elif nextchar in self.escape:
|
||||||
if self.keep:
|
if self.keep:
|
||||||
token += nextchar
|
self.token += nextchar
|
||||||
escapedstate = 'a'
|
self.escapedstate = 'a'
|
||||||
state = nextchar
|
self.state = nextchar
|
||||||
else:
|
else:
|
||||||
token += nextchar
|
self.token += nextchar
|
||||||
if not quoted and token == '':
|
|
||||||
token = None
|
|
||||||
log.shlexer.vdebug("token={!r}".format(token))
|
|
||||||
return token
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
while True:
|
|
||||||
token = self.read_token()
|
|
||||||
if token is None:
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
yield token
|
|
||||||
|
|
||||||
|
|
||||||
def split(s, keep=False):
|
def split(s, keep=False):
|
||||||
@ -154,6 +147,8 @@ def split(s, keep=False):
|
|||||||
lexer = ShellLexer(s)
|
lexer = ShellLexer(s)
|
||||||
lexer.keep = keep
|
lexer.keep = keep
|
||||||
tokens = list(lexer)
|
tokens = list(lexer)
|
||||||
|
if not tokens:
|
||||||
|
return []
|
||||||
out = []
|
out = []
|
||||||
if tokens[0].isspace():
|
if tokens[0].isspace():
|
||||||
out.append(tokens[0] + tokens[1])
|
out.append(tokens[0] + tokens[1])
|
||||||
|
Loading…
Reference in New Issue
Block a user