Try splitting with whitespace at next token.

This commit is contained in:
Florian Bruhin 2014-11-06 07:14:36 +01:00
parent ca1ca7db36
commit 76b72d3438
2 changed files with 49 additions and 49 deletions

View File

@ -30,48 +30,48 @@ from qutebrowser.utils import split
# Format: input/split|output|without|keep/split|output|with|keep/
test_data = r"""
one two/one|two/one |two/
one "two three" four/one|two three|four/one |"two three" |four/
one 'two three' four/one|two three|four/one |'two three' |four/
one "two\" three" four/one|two" three|four/one |"two\" three" |four/
one 'two'\'' three' four/one|two' three|four/one |'two'\'' three' |four/
one "two three/one|two three/one |"two three/
one 'two three/one|two three/one |'two three/
one two/one|two/one| two/
one "two three" four/one|two three|four/one| "two three"| four/
one 'two three' four/one|two three|four/one| 'two three'| four/
one "two\" three" four/one|two" three|four/one| "two\" three"| four/
one 'two'\'' three' four/one|two' three|four/one| 'two'\'' three'| four/
one "two three/one|two three/one| "two three/
one 'two three/one|two three/one| 'two three/
one\/one\/one\/
one "two\/one|two\/one |"two\/
foo bar/foo|bar/foo |bar/
foo bar/foo|bar/ foo |bar/
foo bar /foo|bar/ foo |bar /
foo bar bla fasel/foo|bar|bla|fasel/foo |bar |bla |fasel/
x y z xxxx/x|y|z|xxxx/x |y |z |xxxx/
\x bar/x|bar/\x |bar/
\ x bar/ x|bar/\ x |bar/
one "two\/one|two\/one| "two\/
foo bar/foo|bar/foo| bar/
foo bar/foo|bar/ foo| bar/
foo bar /foo|bar/ foo| bar| /
foo bar bla fasel/foo|bar|bla|fasel/foo| bar| bla| fasel/
x y z xxxx/x|y|z|xxxx/x| y| z| xxxx/
\x bar/x|bar/\x| bar/
\ x bar/ x|bar/\ x| bar/
\ bar/ bar/\ bar/
foo \x bar/foo|x|bar/foo |\x |bar/
foo \ x bar/foo| x|bar/foo |\ x |bar/
foo \ bar/foo| bar/foo |\ bar/
foo "bar" bla/foo|bar|bla/foo |"bar" |bla/
"foo" "bar" "bla"/foo|bar|bla/"foo" |"bar" |"bla"/
"foo" bar "bla"/foo|bar|bla/"foo" |bar |"bla"/
"foo" bar bla/foo|bar|bla/"foo" |bar |bla/
foo 'bar' bla/foo|bar|bla/foo |'bar' |bla/
'foo' 'bar' 'bla'/foo|bar|bla/'foo' |'bar' |'bla'/
'foo' bar 'bla'/foo|bar|bla/'foo' |bar |'bla'/
'foo' bar bla/foo|bar|bla/'foo' |bar |bla/
blurb foo"bar"bar"fasel" baz/blurb|foobarbarfasel|baz/blurb |foo"bar"bar"fasel" |baz/
blurb foo'bar'bar'fasel' baz/blurb|foobarbarfasel|baz/blurb |foo'bar'bar'fasel' |baz/
foo \x bar/foo|x|bar/foo| \x| bar/
foo \ x bar/foo| x|bar/foo| \ x| bar/
foo \ bar/foo| bar/foo| \ bar/
foo "bar" bla/foo|bar|bla/foo| "bar"| bla/
"foo" "bar" "bla"/foo|bar|bla/"foo"| "bar"| "bla"/
"foo" bar "bla"/foo|bar|bla/"foo"| bar| "bla"/
"foo" bar bla/foo|bar|bla/"foo"| bar| bla/
foo 'bar' bla/foo|bar|bla/foo| 'bar'| bla/
'foo' 'bar' 'bla'/foo|bar|bla/'foo'| 'bar'| 'bla'/
'foo' bar 'bla'/foo|bar|bla/'foo'| bar| 'bla'/
'foo' bar bla/foo|bar|bla/'foo'| bar| bla/
blurb foo"bar"bar"fasel" baz/blurb|foobarbarfasel|baz/blurb| foo"bar"bar"fasel"| baz/
blurb foo'bar'bar'fasel' baz/blurb|foobarbarfasel|baz/blurb| foo'bar'bar'fasel'| baz/
""//""/
''//''/
foo "" bar/foo||bar/foo |"" |bar/
foo '' bar/foo||bar/foo |'' |bar/
foo "" "" "" bar/foo||||bar/foo |"" |"" |"" |bar/
foo '' '' '' bar/foo||||bar/foo |'' |'' |'' |bar/
foo "" bar/foo||bar/foo| ""| bar/
foo '' bar/foo||bar/foo| ''| bar/
foo "" "" "" bar/foo||||bar/foo| ""| ""| ""| bar/
foo '' '' '' bar/foo||||bar/foo| ''| ''| ''| bar/
\"/"/\"/
"\""/"/"\""/
"foo\ bar"/foo\ bar/"foo\ bar"/
"foo\\ bar"/foo\ bar/"foo\\ bar"/
"foo\\ bar\""/foo\ bar"/"foo\\ bar\""/
"foo\\" bar\"/foo\|bar"/"foo\\" |bar\"/
"foo\\" bar\"/foo\|bar"/"foo\\"| bar\"/
"foo\\ bar\" dfadf"/foo\ bar" dfadf/"foo\\ bar\" dfadf"/
"foo\\\ bar\" dfadf"/foo\\ bar" dfadf/"foo\\\ bar\" dfadf"/
"foo\\\x bar\" dfadf"/foo\\x bar" dfadf/"foo\\\x bar\" dfadf"/
@ -92,12 +92,12 @@ foo\ x\x\"/foo xx"/foo\ x\x\"/
"foo\ x\x\\""foobar"/foo\ x\x\foobar/"foo\ x\x\\""foobar"/
"foo\ x\x\\"\'"foobar"/foo\ x\x\'foobar/"foo\ x\x\\"\'"foobar"/
"foo\ x\x\\"\'"fo'obar"/foo\ x\x\'fo'obar/"foo\ x\x\\"\'"fo'obar"/
"foo\ x\x\\"\'"fo'obar" 'don'\''t'/foo\ x\x\'fo'obar|don't/"foo\ x\x\\"\'"fo'obar" |'don'\''t'/
"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\/foo\ x\x\'fo'obar|don't|\/"foo\ x\x\\"\'"fo'obar" |'don'\''t' |\\/
"foo\ x\x\\"\'"fo'obar" 'don'\''t'/foo\ x\x\'fo'obar|don't/"foo\ x\x\\"\'"fo'obar"| 'don'\''t'/
"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\/foo\ x\x\'fo'obar|don't|\/"foo\ x\x\\"\'"fo'obar"| 'don'\''t'| \\/
'foo\ bar'/foo\ bar/'foo\ bar'/
'foo\\ bar'/foo\\ bar/'foo\\ bar'/
foo\ bar/foo bar/foo\ bar/
:-) ;-)/:-)|;-)/:-) |;-)/
:-) ;-)/:-)|;-)/:-)| ;-)/
áéíóú/áéíóú/áéíóú/
"""

View File

@ -56,27 +56,25 @@ class ShellLexer:
except StopIteration:
if self.state in self.escape and not self.keep:
self.token += self.state
if self.state in self.whitespace:
yield self.state
if self.token or self.quoted:
yield self.token
return
log.shlexer.vdebug("in state {!r} I see character: {!r}".format(
self.state, nextchar))
if self.state == ' ':
if self.keep:
self.token += nextchar
if nextchar in self.whitespace:
log.shlexer.vdebug("I see whitespace in whitespace state")
if self.keep:
self.token += nextchar
if self.token or self.quoted:
yield self.token
self.reset()
elif nextchar in self.escape:
if self.keep:
self.token += nextchar
self.escapedstate = 'a'
self.state = nextchar
elif nextchar in self.quotes:
if self.keep:
self.token += nextchar
self.state = nextchar
else:
self.token = nextchar
@ -108,11 +106,11 @@ class ShellLexer:
if nextchar in self.whitespace:
log.shlexer.vdebug("shlex: I see whitespace in word state")
self.state = ' '
if self.keep:
self.token += nextchar
if self.token or self.quoted:
yield self.token
self.reset()
if self.keep:
yield nextchar
elif nextchar in self.quotes:
if self.keep:
self.token += nextchar
@ -138,12 +136,14 @@ def split(s, keep=False):
if not tokens:
return []
out = []
if tokens[0].isspace():
out.append(tokens[0] + tokens[1])
tokens = tokens[2:]
spaces = ""
log.shlexer.vdebug("{!r} -> {!r}".format(s, tokens))
for t in tokens:
if t.isspace():
out[-1] += t
spaces += t
else:
out.append(t)
out.append(spaces + t)
spaces = ""
return out