Try splitting with whitespace at next token.
This commit is contained in:
parent
ca1ca7db36
commit
76b72d3438
@ -41,7 +41,7 @@ one\/one\/one\/
|
|||||||
one "two\/one|two\/one| "two\/
|
one "two\/one|two\/one| "two\/
|
||||||
foo bar/foo|bar/foo| bar/
|
foo bar/foo|bar/foo| bar/
|
||||||
foo bar/foo|bar/ foo| bar/
|
foo bar/foo|bar/ foo| bar/
|
||||||
foo bar /foo|bar/ foo |bar /
|
foo bar /foo|bar/ foo| bar| /
|
||||||
foo bar bla fasel/foo|bar|bla|fasel/foo| bar| bla| fasel/
|
foo bar bla fasel/foo|bar|bla|fasel/foo| bar| bla| fasel/
|
||||||
x y z xxxx/x|y|z|xxxx/x| y| z| xxxx/
|
x y z xxxx/x|y|z|xxxx/x| y| z| xxxx/
|
||||||
\x bar/x|bar/\x| bar/
|
\x bar/x|bar/\x| bar/
|
||||||
|
@ -56,27 +56,25 @@ class ShellLexer:
|
|||||||
except StopIteration:
|
except StopIteration:
|
||||||
if self.state in self.escape and not self.keep:
|
if self.state in self.escape and not self.keep:
|
||||||
self.token += self.state
|
self.token += self.state
|
||||||
|
if self.state in self.whitespace:
|
||||||
|
yield self.state
|
||||||
if self.token or self.quoted:
|
if self.token or self.quoted:
|
||||||
yield self.token
|
yield self.token
|
||||||
return
|
return
|
||||||
log.shlexer.vdebug("in state {!r} I see character: {!r}".format(
|
log.shlexer.vdebug("in state {!r} I see character: {!r}".format(
|
||||||
self.state, nextchar))
|
self.state, nextchar))
|
||||||
if self.state == ' ':
|
if self.state == ' ':
|
||||||
if nextchar in self.whitespace:
|
|
||||||
log.shlexer.vdebug("I see whitespace in whitespace state")
|
|
||||||
if self.keep:
|
if self.keep:
|
||||||
self.token += nextchar
|
self.token += nextchar
|
||||||
|
if nextchar in self.whitespace:
|
||||||
|
log.shlexer.vdebug("I see whitespace in whitespace state")
|
||||||
if self.token or self.quoted:
|
if self.token or self.quoted:
|
||||||
yield self.token
|
yield self.token
|
||||||
self.reset()
|
self.reset()
|
||||||
elif nextchar in self.escape:
|
elif nextchar in self.escape:
|
||||||
if self.keep:
|
|
||||||
self.token += nextchar
|
|
||||||
self.escapedstate = 'a'
|
self.escapedstate = 'a'
|
||||||
self.state = nextchar
|
self.state = nextchar
|
||||||
elif nextchar in self.quotes:
|
elif nextchar in self.quotes:
|
||||||
if self.keep:
|
|
||||||
self.token += nextchar
|
|
||||||
self.state = nextchar
|
self.state = nextchar
|
||||||
else:
|
else:
|
||||||
self.token = nextchar
|
self.token = nextchar
|
||||||
@ -108,11 +106,11 @@ class ShellLexer:
|
|||||||
if nextchar in self.whitespace:
|
if nextchar in self.whitespace:
|
||||||
log.shlexer.vdebug("shlex: I see whitespace in word state")
|
log.shlexer.vdebug("shlex: I see whitespace in word state")
|
||||||
self.state = ' '
|
self.state = ' '
|
||||||
if self.keep:
|
|
||||||
self.token += nextchar
|
|
||||||
if self.token or self.quoted:
|
if self.token or self.quoted:
|
||||||
yield self.token
|
yield self.token
|
||||||
self.reset()
|
self.reset()
|
||||||
|
if self.keep:
|
||||||
|
yield nextchar
|
||||||
elif nextchar in self.quotes:
|
elif nextchar in self.quotes:
|
||||||
if self.keep:
|
if self.keep:
|
||||||
self.token += nextchar
|
self.token += nextchar
|
||||||
@ -138,12 +136,14 @@ def split(s, keep=False):
|
|||||||
if not tokens:
|
if not tokens:
|
||||||
return []
|
return []
|
||||||
out = []
|
out = []
|
||||||
if tokens[0].isspace():
|
spaces = ""
|
||||||
out.append(tokens[0] + tokens[1])
|
|
||||||
tokens = tokens[2:]
|
log.shlexer.vdebug("{!r} -> {!r}".format(s, tokens))
|
||||||
|
|
||||||
for t in tokens:
|
for t in tokens:
|
||||||
if t.isspace():
|
if t.isspace():
|
||||||
out[-1] += t
|
spaces += t
|
||||||
else:
|
else:
|
||||||
out.append(t)
|
out.append(spaces + t)
|
||||||
|
spaces = ""
|
||||||
return out
|
return out
|
||||||
|
Loading…
Reference in New Issue
Block a user