Try splitting with whitespace at next token.

2014-11-06 07:14:36 +01:00 · 2014-11-06 07:14:36 +01:00 · 76b72d3438
commit 76b72d3438
parent ca1ca7db36
2 changed files with 49 additions and 49 deletions
--- a/qutebrowser/test/utils/test_split.py
+++ b/qutebrowser/test/utils/test_split.py
@ -30,48 +30,48 @@ from qutebrowser.utils import split
 # Format: input/split|output|without|keep/split|output|with|keep/

 test_data = r"""
-one two/one|two/one |two/
-one "two three" four/one|two three|four/one |"two three" |four/
-one 'two three' four/one|two three|four/one |'two three' |four/
-one "two\" three" four/one|two" three|four/one |"two\" three" |four/
-one 'two'\'' three' four/one|two' three|four/one |'two'\'' three' |four/
-one "two three/one|two three/one |"two three/
-one 'two three/one|two three/one |'two three/
+one two/one|two/one| two/
+one "two three" four/one|two three|four/one| "two three"| four/
+one 'two three' four/one|two three|four/one| 'two three'| four/
+one "two\" three" four/one|two" three|four/one| "two\" three"| four/
+one 'two'\'' three' four/one|two' three|four/one| 'two'\'' three'| four/
+one "two three/one|two three/one| "two three/
+one 'two three/one|two three/one| 'two three/
 one\/one\/one\/
-one "two\/one|two\/one |"two\/
-foo bar/foo|bar/foo |bar/
- foo bar/foo|bar/ foo |bar/
- foo bar /foo|bar/ foo |bar /
-foo   bar    bla     fasel/foo|bar|bla|fasel/foo   |bar    |bla     |fasel/
-x y  z              xxxx/x|y|z|xxxx/x |y  |z              |xxxx/
-\x bar/x|bar/\x |bar/
-\ x bar/ x|bar/\ x |bar/
+one "two\/one|two\/one| "two\/
+foo bar/foo|bar/foo| bar/
+ foo bar/foo|bar/ foo| bar/
+ foo bar /foo|bar/ foo| bar| /
+foo   bar    bla     fasel/foo|bar|bla|fasel/foo|   bar|    bla|     fasel/
+x y  z              xxxx/x|y|z|xxxx/x| y|  z|              xxxx/
+\x bar/x|bar/\x| bar/
+\ x bar/ x|bar/\ x| bar/
 \ bar/ bar/\ bar/
-foo \x bar/foo|x|bar/foo |\x |bar/
-foo \ x bar/foo| x|bar/foo |\ x |bar/
-foo \ bar/foo| bar/foo |\ bar/
-foo "bar" bla/foo|bar|bla/foo |"bar" |bla/
-"foo" "bar" "bla"/foo|bar|bla/"foo" |"bar" |"bla"/
-"foo" bar "bla"/foo|bar|bla/"foo" |bar |"bla"/
-"foo" bar bla/foo|bar|bla/"foo" |bar |bla/
-foo 'bar' bla/foo|bar|bla/foo |'bar' |bla/
-'foo' 'bar' 'bla'/foo|bar|bla/'foo' |'bar' |'bla'/
-'foo' bar 'bla'/foo|bar|bla/'foo' |bar |'bla'/
-'foo' bar bla/foo|bar|bla/'foo' |bar |bla/
-blurb foo"bar"bar"fasel" baz/blurb|foobarbarfasel|baz/blurb |foo"bar"bar"fasel" |baz/
-blurb foo'bar'bar'fasel' baz/blurb|foobarbarfasel|baz/blurb |foo'bar'bar'fasel' |baz/
+foo \x bar/foo|x|bar/foo| \x| bar/
+foo \ x bar/foo| x|bar/foo| \ x| bar/
+foo \ bar/foo| bar/foo| \ bar/
+foo "bar" bla/foo|bar|bla/foo| "bar"| bla/
+"foo" "bar" "bla"/foo|bar|bla/"foo"| "bar"| "bla"/
+"foo" bar "bla"/foo|bar|bla/"foo"| bar| "bla"/
+"foo" bar bla/foo|bar|bla/"foo"| bar| bla/
+foo 'bar' bla/foo|bar|bla/foo| 'bar'| bla/
+'foo' 'bar' 'bla'/foo|bar|bla/'foo'| 'bar'| 'bla'/
+'foo' bar 'bla'/foo|bar|bla/'foo'| bar| 'bla'/
+'foo' bar bla/foo|bar|bla/'foo'| bar| bla/
+blurb foo"bar"bar"fasel" baz/blurb|foobarbarfasel|baz/blurb| foo"bar"bar"fasel"| baz/
+blurb foo'bar'bar'fasel' baz/blurb|foobarbarfasel|baz/blurb| foo'bar'bar'fasel'| baz/
 ""//""/
 ''//''/
-foo "" bar/foo||bar/foo |"" |bar/
-foo '' bar/foo||bar/foo |'' |bar/
-foo "" "" "" bar/foo||||bar/foo |"" |"" |"" |bar/
-foo '' '' '' bar/foo||||bar/foo |'' |'' |'' |bar/
+foo "" bar/foo||bar/foo| ""| bar/
+foo '' bar/foo||bar/foo| ''| bar/
+foo "" "" "" bar/foo||||bar/foo| ""| ""| ""| bar/
+foo '' '' '' bar/foo||||bar/foo| ''| ''| ''| bar/
 \"/"/\"/
 "\""/"/"\""/
 "foo\ bar"/foo\ bar/"foo\ bar"/
 "foo\\ bar"/foo\ bar/"foo\\ bar"/
 "foo\\ bar\""/foo\ bar"/"foo\\ bar\""/
-"foo\\" bar\"/foo\|bar"/"foo\\" |bar\"/
+"foo\\" bar\"/foo\|bar"/"foo\\"| bar\"/
 "foo\\ bar\" dfadf"/foo\ bar" dfadf/"foo\\ bar\" dfadf"/
 "foo\\\ bar\" dfadf"/foo\\ bar" dfadf/"foo\\\ bar\" dfadf"/
 "foo\\\x bar\" dfadf"/foo\\x bar" dfadf/"foo\\\x bar\" dfadf"/
@ -92,12 +92,12 @@ foo\ x\x\"/foo xx"/foo\ x\x\"/
 "foo\ x\x\\""foobar"/foo\ x\x\foobar/"foo\ x\x\\""foobar"/
 "foo\ x\x\\"\'"foobar"/foo\ x\x\'foobar/"foo\ x\x\\"\'"foobar"/
 "foo\ x\x\\"\'"fo'obar"/foo\ x\x\'fo'obar/"foo\ x\x\\"\'"fo'obar"/
-"foo\ x\x\\"\'"fo'obar" 'don'\''t'/foo\ x\x\'fo'obar|don't/"foo\ x\x\\"\'"fo'obar" |'don'\''t'/
-"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\/foo\ x\x\'fo'obar|don't|\/"foo\ x\x\\"\'"fo'obar" |'don'\''t' |\\/
+"foo\ x\x\\"\'"fo'obar" 'don'\''t'/foo\ x\x\'fo'obar|don't/"foo\ x\x\\"\'"fo'obar"| 'don'\''t'/
+"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\/foo\ x\x\'fo'obar|don't|\/"foo\ x\x\\"\'"fo'obar"| 'don'\''t'| \\/
 'foo\ bar'/foo\ bar/'foo\ bar'/
 'foo\\ bar'/foo\\ bar/'foo\\ bar'/
 foo\ bar/foo bar/foo\ bar/
-:-) ;-)/:-)|;-)/:-) |;-)/
+:-) ;-)/:-)|;-)/:-)| ;-)/
 áéíóú/áéíóú/áéíóú/
 """

--- a/qutebrowser/utils/split.py
+++ b/qutebrowser/utils/split.py
@ -56,27 +56,25 @@ class ShellLexer:
            except StopIteration:
                if self.state in self.escape and not self.keep:
                    self.token += self.state
+                if self.state in self.whitespace:
+                    yield self.state
                if self.token or self.quoted:
                    yield self.token
                return
            log.shlexer.vdebug("in state {!r} I see character: {!r}".format(
                self.state, nextchar))
            if self.state == ' ':
+                if self.keep:
+                    self.token += nextchar
                if nextchar in self.whitespace:
                    log.shlexer.vdebug("I see whitespace in whitespace state")
-                    if self.keep:
-                        self.token += nextchar
                    if self.token or self.quoted:
                        yield self.token
                        self.reset()
                elif nextchar in self.escape:
-                    if self.keep:
-                        self.token += nextchar
                    self.escapedstate = 'a'
                    self.state = nextchar
                elif nextchar in self.quotes:
-                    if self.keep:
-                        self.token += nextchar
                    self.state = nextchar
                else:
                    self.token = nextchar
@ -108,11 +106,11 @@ class ShellLexer:
                if nextchar in self.whitespace:
                    log.shlexer.vdebug("shlex: I see whitespace in word state")
                    self.state = ' '
-                    if self.keep:
-                        self.token += nextchar
                    if self.token or self.quoted:
                        yield self.token
                        self.reset()
+                    if self.keep:
+                        yield nextchar
                elif nextchar in self.quotes:
                    if self.keep:
                        self.token += nextchar
@ -138,12 +136,14 @@ def split(s, keep=False):
    if not tokens:
        return []
    out = []
-    if tokens[0].isspace():
-        out.append(tokens[0] + tokens[1])
-        tokens = tokens[2:]
+    spaces = ""
+
+    log.shlexer.vdebug("{!r} -> {!r}".format(s, tokens))
+
    for t in tokens:
        if t.isspace():
-            out[-1] += t
+            spaces += t
        else:
-            out.append(t)
+            out.append(spaces + t)
+            spaces = ""
    return out