diff --git a/qutebrowser/test/utils/test_split.py b/qutebrowser/test/utils/test_split.py index 58e9a71bd..c937020f7 100644 --- a/qutebrowser/test/utils/test_split.py +++ b/qutebrowser/test/utils/test_split.py @@ -30,48 +30,48 @@ from qutebrowser.utils import split # Format: input/split|output|without|keep/split|output|with|keep/ test_data = r""" -one two/one|two/one |two/ -one "two three" four/one|two three|four/one |"two three" |four/ -one 'two three' four/one|two three|four/one |'two three' |four/ -one "two\" three" four/one|two" three|four/one |"two\" three" |four/ -one 'two'\'' three' four/one|two' three|four/one |'two'\'' three' |four/ -one "two three/one|two three/one |"two three/ -one 'two three/one|two three/one |'two three/ +one two/one|two/one| two/ +one "two three" four/one|two three|four/one| "two three"| four/ +one 'two three' four/one|two three|four/one| 'two three'| four/ +one "two\" three" four/one|two" three|four/one| "two\" three"| four/ +one 'two'\'' three' four/one|two' three|four/one| 'two'\'' three'| four/ +one "two three/one|two three/one| "two three/ +one 'two three/one|two three/one| 'two three/ one\/one\/one\/ -one "two\/one|two\/one |"two\/ -foo bar/foo|bar/foo |bar/ - foo bar/foo|bar/ foo |bar/ - foo bar /foo|bar/ foo |bar / -foo bar bla fasel/foo|bar|bla|fasel/foo |bar |bla |fasel/ -x y z xxxx/x|y|z|xxxx/x |y |z |xxxx/ -\x bar/x|bar/\x |bar/ -\ x bar/ x|bar/\ x |bar/ +one "two\/one|two\/one| "two\/ +foo bar/foo|bar/foo| bar/ + foo bar/foo|bar/ foo| bar/ + foo bar /foo|bar/ foo| bar| / +foo bar bla fasel/foo|bar|bla|fasel/foo| bar| bla| fasel/ +x y z xxxx/x|y|z|xxxx/x| y| z| xxxx/ +\x bar/x|bar/\x| bar/ +\ x bar/ x|bar/\ x| bar/ \ bar/ bar/\ bar/ -foo \x bar/foo|x|bar/foo |\x |bar/ -foo \ x bar/foo| x|bar/foo |\ x |bar/ -foo \ bar/foo| bar/foo |\ bar/ -foo "bar" bla/foo|bar|bla/foo |"bar" |bla/ -"foo" "bar" "bla"/foo|bar|bla/"foo" |"bar" |"bla"/ -"foo" bar "bla"/foo|bar|bla/"foo" |bar |"bla"/ -"foo" bar bla/foo|bar|bla/"foo" |bar |bla/ -foo 'bar' bla/foo|bar|bla/foo |'bar' |bla/ -'foo' 'bar' 'bla'/foo|bar|bla/'foo' |'bar' |'bla'/ -'foo' bar 'bla'/foo|bar|bla/'foo' |bar |'bla'/ -'foo' bar bla/foo|bar|bla/'foo' |bar |bla/ -blurb foo"bar"bar"fasel" baz/blurb|foobarbarfasel|baz/blurb |foo"bar"bar"fasel" |baz/ -blurb foo'bar'bar'fasel' baz/blurb|foobarbarfasel|baz/blurb |foo'bar'bar'fasel' |baz/ +foo \x bar/foo|x|bar/foo| \x| bar/ +foo \ x bar/foo| x|bar/foo| \ x| bar/ +foo \ bar/foo| bar/foo| \ bar/ +foo "bar" bla/foo|bar|bla/foo| "bar"| bla/ +"foo" "bar" "bla"/foo|bar|bla/"foo"| "bar"| "bla"/ +"foo" bar "bla"/foo|bar|bla/"foo"| bar| "bla"/ +"foo" bar bla/foo|bar|bla/"foo"| bar| bla/ +foo 'bar' bla/foo|bar|bla/foo| 'bar'| bla/ +'foo' 'bar' 'bla'/foo|bar|bla/'foo'| 'bar'| 'bla'/ +'foo' bar 'bla'/foo|bar|bla/'foo'| bar| 'bla'/ +'foo' bar bla/foo|bar|bla/'foo'| bar| bla/ +blurb foo"bar"bar"fasel" baz/blurb|foobarbarfasel|baz/blurb| foo"bar"bar"fasel"| baz/ +blurb foo'bar'bar'fasel' baz/blurb|foobarbarfasel|baz/blurb| foo'bar'bar'fasel'| baz/ ""//""/ ''//''/ -foo "" bar/foo||bar/foo |"" |bar/ -foo '' bar/foo||bar/foo |'' |bar/ -foo "" "" "" bar/foo||||bar/foo |"" |"" |"" |bar/ -foo '' '' '' bar/foo||||bar/foo |'' |'' |'' |bar/ +foo "" bar/foo||bar/foo| ""| bar/ +foo '' bar/foo||bar/foo| ''| bar/ +foo "" "" "" bar/foo||||bar/foo| ""| ""| ""| bar/ +foo '' '' '' bar/foo||||bar/foo| ''| ''| ''| bar/ \"/"/\"/ "\""/"/"\""/ "foo\ bar"/foo\ bar/"foo\ bar"/ "foo\\ bar"/foo\ bar/"foo\\ bar"/ "foo\\ bar\""/foo\ bar"/"foo\\ bar\""/ -"foo\\" bar\"/foo\|bar"/"foo\\" |bar\"/ +"foo\\" bar\"/foo\|bar"/"foo\\"| bar\"/ "foo\\ bar\" dfadf"/foo\ bar" dfadf/"foo\\ bar\" dfadf"/ "foo\\\ bar\" dfadf"/foo\\ bar" dfadf/"foo\\\ bar\" dfadf"/ "foo\\\x bar\" dfadf"/foo\\x bar" dfadf/"foo\\\x bar\" dfadf"/ @@ -92,12 +92,12 @@ foo\ x\x\"/foo xx"/foo\ x\x\"/ "foo\ x\x\\""foobar"/foo\ x\x\foobar/"foo\ x\x\\""foobar"/ "foo\ x\x\\"\'"foobar"/foo\ x\x\'foobar/"foo\ x\x\\"\'"foobar"/ "foo\ x\x\\"\'"fo'obar"/foo\ x\x\'fo'obar/"foo\ x\x\\"\'"fo'obar"/ -"foo\ x\x\\"\'"fo'obar" 'don'\''t'/foo\ x\x\'fo'obar|don't/"foo\ x\x\\"\'"fo'obar" |'don'\''t'/ -"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\/foo\ x\x\'fo'obar|don't|\/"foo\ x\x\\"\'"fo'obar" |'don'\''t' |\\/ +"foo\ x\x\\"\'"fo'obar" 'don'\''t'/foo\ x\x\'fo'obar|don't/"foo\ x\x\\"\'"fo'obar"| 'don'\''t'/ +"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\/foo\ x\x\'fo'obar|don't|\/"foo\ x\x\\"\'"fo'obar"| 'don'\''t'| \\/ 'foo\ bar'/foo\ bar/'foo\ bar'/ 'foo\\ bar'/foo\\ bar/'foo\\ bar'/ foo\ bar/foo bar/foo\ bar/ -:-) ;-)/:-)|;-)/:-) |;-)/ +:-) ;-)/:-)|;-)/:-)| ;-)/ áéíóú/áéíóú/áéíóú/ """ diff --git a/qutebrowser/utils/split.py b/qutebrowser/utils/split.py index de8174f3d..2e69a8acd 100644 --- a/qutebrowser/utils/split.py +++ b/qutebrowser/utils/split.py @@ -56,27 +56,25 @@ class ShellLexer: except StopIteration: if self.state in self.escape and not self.keep: self.token += self.state + if self.state in self.whitespace: + yield self.state if self.token or self.quoted: yield self.token return log.shlexer.vdebug("in state {!r} I see character: {!r}".format( self.state, nextchar)) if self.state == ' ': + if self.keep: + self.token += nextchar if nextchar in self.whitespace: log.shlexer.vdebug("I see whitespace in whitespace state") - if self.keep: - self.token += nextchar if self.token or self.quoted: yield self.token self.reset() elif nextchar in self.escape: - if self.keep: - self.token += nextchar self.escapedstate = 'a' self.state = nextchar elif nextchar in self.quotes: - if self.keep: - self.token += nextchar self.state = nextchar else: self.token = nextchar @@ -108,11 +106,11 @@ class ShellLexer: if nextchar in self.whitespace: log.shlexer.vdebug("shlex: I see whitespace in word state") self.state = ' ' - if self.keep: - self.token += nextchar if self.token or self.quoted: yield self.token self.reset() + if self.keep: + yield nextchar elif nextchar in self.quotes: if self.keep: self.token += nextchar @@ -138,12 +136,14 @@ def split(s, keep=False): if not tokens: return [] out = [] - if tokens[0].isspace(): - out.append(tokens[0] + tokens[1]) - tokens = tokens[2:] + spaces = "" + + log.shlexer.vdebug("{!r} -> {!r}".format(s, tokens)) + for t in tokens: if t.isspace(): - out[-1] += t + spaces += t else: - out.append(t) + out.append(spaces + t) + spaces = "" return out