Try splitting with whitespace at next token.
This commit is contained in:
parent
ca1ca7db36
commit
76b72d3438
@ -30,48 +30,48 @@ from qutebrowser.utils import split
|
||||
# Format: input/split|output|without|keep/split|output|with|keep/
|
||||
|
||||
test_data = r"""
|
||||
one two/one|two/one |two/
|
||||
one "two three" four/one|two three|four/one |"two three" |four/
|
||||
one 'two three' four/one|two three|four/one |'two three' |four/
|
||||
one "two\" three" four/one|two" three|four/one |"two\" three" |four/
|
||||
one 'two'\'' three' four/one|two' three|four/one |'two'\'' three' |four/
|
||||
one "two three/one|two three/one |"two three/
|
||||
one 'two three/one|two three/one |'two three/
|
||||
one two/one|two/one| two/
|
||||
one "two three" four/one|two three|four/one| "two three"| four/
|
||||
one 'two three' four/one|two three|four/one| 'two three'| four/
|
||||
one "two\" three" four/one|two" three|four/one| "two\" three"| four/
|
||||
one 'two'\'' three' four/one|two' three|four/one| 'two'\'' three'| four/
|
||||
one "two three/one|two three/one| "two three/
|
||||
one 'two three/one|two three/one| 'two three/
|
||||
one\/one\/one\/
|
||||
one "two\/one|two\/one |"two\/
|
||||
foo bar/foo|bar/foo |bar/
|
||||
foo bar/foo|bar/ foo |bar/
|
||||
foo bar /foo|bar/ foo |bar /
|
||||
foo bar bla fasel/foo|bar|bla|fasel/foo |bar |bla |fasel/
|
||||
x y z xxxx/x|y|z|xxxx/x |y |z |xxxx/
|
||||
\x bar/x|bar/\x |bar/
|
||||
\ x bar/ x|bar/\ x |bar/
|
||||
one "two\/one|two\/one| "two\/
|
||||
foo bar/foo|bar/foo| bar/
|
||||
foo bar/foo|bar/ foo| bar/
|
||||
foo bar /foo|bar/ foo| bar| /
|
||||
foo bar bla fasel/foo|bar|bla|fasel/foo| bar| bla| fasel/
|
||||
x y z xxxx/x|y|z|xxxx/x| y| z| xxxx/
|
||||
\x bar/x|bar/\x| bar/
|
||||
\ x bar/ x|bar/\ x| bar/
|
||||
\ bar/ bar/\ bar/
|
||||
foo \x bar/foo|x|bar/foo |\x |bar/
|
||||
foo \ x bar/foo| x|bar/foo |\ x |bar/
|
||||
foo \ bar/foo| bar/foo |\ bar/
|
||||
foo "bar" bla/foo|bar|bla/foo |"bar" |bla/
|
||||
"foo" "bar" "bla"/foo|bar|bla/"foo" |"bar" |"bla"/
|
||||
"foo" bar "bla"/foo|bar|bla/"foo" |bar |"bla"/
|
||||
"foo" bar bla/foo|bar|bla/"foo" |bar |bla/
|
||||
foo 'bar' bla/foo|bar|bla/foo |'bar' |bla/
|
||||
'foo' 'bar' 'bla'/foo|bar|bla/'foo' |'bar' |'bla'/
|
||||
'foo' bar 'bla'/foo|bar|bla/'foo' |bar |'bla'/
|
||||
'foo' bar bla/foo|bar|bla/'foo' |bar |bla/
|
||||
blurb foo"bar"bar"fasel" baz/blurb|foobarbarfasel|baz/blurb |foo"bar"bar"fasel" |baz/
|
||||
blurb foo'bar'bar'fasel' baz/blurb|foobarbarfasel|baz/blurb |foo'bar'bar'fasel' |baz/
|
||||
foo \x bar/foo|x|bar/foo| \x| bar/
|
||||
foo \ x bar/foo| x|bar/foo| \ x| bar/
|
||||
foo \ bar/foo| bar/foo| \ bar/
|
||||
foo "bar" bla/foo|bar|bla/foo| "bar"| bla/
|
||||
"foo" "bar" "bla"/foo|bar|bla/"foo"| "bar"| "bla"/
|
||||
"foo" bar "bla"/foo|bar|bla/"foo"| bar| "bla"/
|
||||
"foo" bar bla/foo|bar|bla/"foo"| bar| bla/
|
||||
foo 'bar' bla/foo|bar|bla/foo| 'bar'| bla/
|
||||
'foo' 'bar' 'bla'/foo|bar|bla/'foo'| 'bar'| 'bla'/
|
||||
'foo' bar 'bla'/foo|bar|bla/'foo'| bar| 'bla'/
|
||||
'foo' bar bla/foo|bar|bla/'foo'| bar| bla/
|
||||
blurb foo"bar"bar"fasel" baz/blurb|foobarbarfasel|baz/blurb| foo"bar"bar"fasel"| baz/
|
||||
blurb foo'bar'bar'fasel' baz/blurb|foobarbarfasel|baz/blurb| foo'bar'bar'fasel'| baz/
|
||||
""//""/
|
||||
''//''/
|
||||
foo "" bar/foo||bar/foo |"" |bar/
|
||||
foo '' bar/foo||bar/foo |'' |bar/
|
||||
foo "" "" "" bar/foo||||bar/foo |"" |"" |"" |bar/
|
||||
foo '' '' '' bar/foo||||bar/foo |'' |'' |'' |bar/
|
||||
foo "" bar/foo||bar/foo| ""| bar/
|
||||
foo '' bar/foo||bar/foo| ''| bar/
|
||||
foo "" "" "" bar/foo||||bar/foo| ""| ""| ""| bar/
|
||||
foo '' '' '' bar/foo||||bar/foo| ''| ''| ''| bar/
|
||||
\"/"/\"/
|
||||
"\""/"/"\""/
|
||||
"foo\ bar"/foo\ bar/"foo\ bar"/
|
||||
"foo\\ bar"/foo\ bar/"foo\\ bar"/
|
||||
"foo\\ bar\""/foo\ bar"/"foo\\ bar\""/
|
||||
"foo\\" bar\"/foo\|bar"/"foo\\" |bar\"/
|
||||
"foo\\" bar\"/foo\|bar"/"foo\\"| bar\"/
|
||||
"foo\\ bar\" dfadf"/foo\ bar" dfadf/"foo\\ bar\" dfadf"/
|
||||
"foo\\\ bar\" dfadf"/foo\\ bar" dfadf/"foo\\\ bar\" dfadf"/
|
||||
"foo\\\x bar\" dfadf"/foo\\x bar" dfadf/"foo\\\x bar\" dfadf"/
|
||||
@ -92,12 +92,12 @@ foo\ x\x\"/foo xx"/foo\ x\x\"/
|
||||
"foo\ x\x\\""foobar"/foo\ x\x\foobar/"foo\ x\x\\""foobar"/
|
||||
"foo\ x\x\\"\'"foobar"/foo\ x\x\'foobar/"foo\ x\x\\"\'"foobar"/
|
||||
"foo\ x\x\\"\'"fo'obar"/foo\ x\x\'fo'obar/"foo\ x\x\\"\'"fo'obar"/
|
||||
"foo\ x\x\\"\'"fo'obar" 'don'\''t'/foo\ x\x\'fo'obar|don't/"foo\ x\x\\"\'"fo'obar" |'don'\''t'/
|
||||
"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\/foo\ x\x\'fo'obar|don't|\/"foo\ x\x\\"\'"fo'obar" |'don'\''t' |\\/
|
||||
"foo\ x\x\\"\'"fo'obar" 'don'\''t'/foo\ x\x\'fo'obar|don't/"foo\ x\x\\"\'"fo'obar"| 'don'\''t'/
|
||||
"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\/foo\ x\x\'fo'obar|don't|\/"foo\ x\x\\"\'"fo'obar"| 'don'\''t'| \\/
|
||||
'foo\ bar'/foo\ bar/'foo\ bar'/
|
||||
'foo\\ bar'/foo\\ bar/'foo\\ bar'/
|
||||
foo\ bar/foo bar/foo\ bar/
|
||||
:-) ;-)/:-)|;-)/:-) |;-)/
|
||||
:-) ;-)/:-)|;-)/:-)| ;-)/
|
||||
áéíóú/áéíóú/áéíóú/
|
||||
"""
|
||||
|
||||
|
@ -56,27 +56,25 @@ class ShellLexer:
|
||||
except StopIteration:
|
||||
if self.state in self.escape and not self.keep:
|
||||
self.token += self.state
|
||||
if self.state in self.whitespace:
|
||||
yield self.state
|
||||
if self.token or self.quoted:
|
||||
yield self.token
|
||||
return
|
||||
log.shlexer.vdebug("in state {!r} I see character: {!r}".format(
|
||||
self.state, nextchar))
|
||||
if self.state == ' ':
|
||||
if self.keep:
|
||||
self.token += nextchar
|
||||
if nextchar in self.whitespace:
|
||||
log.shlexer.vdebug("I see whitespace in whitespace state")
|
||||
if self.keep:
|
||||
self.token += nextchar
|
||||
if self.token or self.quoted:
|
||||
yield self.token
|
||||
self.reset()
|
||||
elif nextchar in self.escape:
|
||||
if self.keep:
|
||||
self.token += nextchar
|
||||
self.escapedstate = 'a'
|
||||
self.state = nextchar
|
||||
elif nextchar in self.quotes:
|
||||
if self.keep:
|
||||
self.token += nextchar
|
||||
self.state = nextchar
|
||||
else:
|
||||
self.token = nextchar
|
||||
@ -108,11 +106,11 @@ class ShellLexer:
|
||||
if nextchar in self.whitespace:
|
||||
log.shlexer.vdebug("shlex: I see whitespace in word state")
|
||||
self.state = ' '
|
||||
if self.keep:
|
||||
self.token += nextchar
|
||||
if self.token or self.quoted:
|
||||
yield self.token
|
||||
self.reset()
|
||||
if self.keep:
|
||||
yield nextchar
|
||||
elif nextchar in self.quotes:
|
||||
if self.keep:
|
||||
self.token += nextchar
|
||||
@ -138,12 +136,14 @@ def split(s, keep=False):
|
||||
if not tokens:
|
||||
return []
|
||||
out = []
|
||||
if tokens[0].isspace():
|
||||
out.append(tokens[0] + tokens[1])
|
||||
tokens = tokens[2:]
|
||||
spaces = ""
|
||||
|
||||
log.shlexer.vdebug("{!r} -> {!r}".format(s, tokens))
|
||||
|
||||
for t in tokens:
|
||||
if t.isspace():
|
||||
out[-1] += t
|
||||
spaces += t
|
||||
else:
|
||||
out.append(t)
|
||||
out.append(spaces + t)
|
||||
spaces = ""
|
||||
return out
|
||||
|
Loading…
Reference in New Issue
Block a user