Try splitting with whitespace at next token.

This commit is contained in:
Florian Bruhin 2014-11-06 07:14:36 +01:00
parent ca1ca7db36
commit 76b72d3438
2 changed files with 49 additions and 49 deletions

View File

@ -41,7 +41,7 @@ one\/one\/one\/
one "two\/one|two\/one| "two\/ one "two\/one|two\/one| "two\/
foo bar/foo|bar/foo| bar/ foo bar/foo|bar/foo| bar/
foo bar/foo|bar/ foo| bar/ foo bar/foo|bar/ foo| bar/
foo bar /foo|bar/ foo |bar / foo bar /foo|bar/ foo| bar| /
foo bar bla fasel/foo|bar|bla|fasel/foo| bar| bla| fasel/ foo bar bla fasel/foo|bar|bla|fasel/foo| bar| bla| fasel/
x y z xxxx/x|y|z|xxxx/x| y| z| xxxx/ x y z xxxx/x|y|z|xxxx/x| y| z| xxxx/
\x bar/x|bar/\x| bar/ \x bar/x|bar/\x| bar/

View File

@ -56,27 +56,25 @@ class ShellLexer:
except StopIteration: except StopIteration:
if self.state in self.escape and not self.keep: if self.state in self.escape and not self.keep:
self.token += self.state self.token += self.state
if self.state in self.whitespace:
yield self.state
if self.token or self.quoted: if self.token or self.quoted:
yield self.token yield self.token
return return
log.shlexer.vdebug("in state {!r} I see character: {!r}".format( log.shlexer.vdebug("in state {!r} I see character: {!r}".format(
self.state, nextchar)) self.state, nextchar))
if self.state == ' ': if self.state == ' ':
if nextchar in self.whitespace:
log.shlexer.vdebug("I see whitespace in whitespace state")
if self.keep: if self.keep:
self.token += nextchar self.token += nextchar
if nextchar in self.whitespace:
log.shlexer.vdebug("I see whitespace in whitespace state")
if self.token or self.quoted: if self.token or self.quoted:
yield self.token yield self.token
self.reset() self.reset()
elif nextchar in self.escape: elif nextchar in self.escape:
if self.keep:
self.token += nextchar
self.escapedstate = 'a' self.escapedstate = 'a'
self.state = nextchar self.state = nextchar
elif nextchar in self.quotes: elif nextchar in self.quotes:
if self.keep:
self.token += nextchar
self.state = nextchar self.state = nextchar
else: else:
self.token = nextchar self.token = nextchar
@ -108,11 +106,11 @@ class ShellLexer:
if nextchar in self.whitespace: if nextchar in self.whitespace:
log.shlexer.vdebug("shlex: I see whitespace in word state") log.shlexer.vdebug("shlex: I see whitespace in word state")
self.state = ' ' self.state = ' '
if self.keep:
self.token += nextchar
if self.token or self.quoted: if self.token or self.quoted:
yield self.token yield self.token
self.reset() self.reset()
if self.keep:
yield nextchar
elif nextchar in self.quotes: elif nextchar in self.quotes:
if self.keep: if self.keep:
self.token += nextchar self.token += nextchar
@ -138,12 +136,14 @@ def split(s, keep=False):
if not tokens: if not tokens:
return [] return []
out = [] out = []
if tokens[0].isspace(): spaces = ""
out.append(tokens[0] + tokens[1])
tokens = tokens[2:] log.shlexer.vdebug("{!r} -> {!r}".format(s, tokens))
for t in tokens: for t in tokens:
if t.isspace(): if t.isspace():
out[-1] += t spaces += t
else: else:
out.append(t) out.append(spaces + t)
spaces = ""
return out return out