Add a keep-mode to shlexer.

2014-11-05 07:41:17 +01:00 · 2014-11-05 07:41:17 +01:00 · 22da17bbca
commit 22da17bbca
parent 9ed466b536
2 changed files with 36 additions and 4 deletions
--- a/qutebrowser/test/utils/test_split.py
+++ b/qutebrowser/test/utils/test_split.py
@ -112,3 +112,12 @@ class SplitTests(unittest.TestCase):
            with self.subTest(cmd=cmd):
                items = split.split(cmd)
                self.assertEqual(items, out)
    def test_split_keep(self):
        """Test splitting with keep=True."""
        for case in test_data.strip().splitlines():
            cmd, *_out = case.split('|')[:-1]
            cmd = cmd.replace(r'\n', '\n')
            with self.subTest(cmd=cmd):
                items = split.split(cmd, keep=True)
                self.assertEqual(''.join(items), cmd)
--- a/qutebrowser/utils/split.py
+++ b/qutebrowser/utils/split.py
@ -41,6 +41,7 @@ class ShellLexer:
        self.escapedquotes = '"'
        self.state = ' '
        self.token = ''
        self.keep = False
    def read_token(self):
        """Read a raw token from the input stream."""
@ -63,15 +64,21 @@ class ShellLexer:
                    break
                elif nextchar in self.whitespace:
                    log.shlexer.vdebug("I see whitespace in whitespace state")
                    if self.keep:
                        self.token += nextchar
                    if self.token or quoted:
                        # emit current token
                        break
                    else:
                        continue
                elif nextchar in self.escape:
                    if self.keep:
                        self.token += nextchar
                    escapedstate = 'a'
                    self.state = nextchar
                elif nextchar in self.quotes:
                    if self.keep:
                        self.token += nextchar
                    self.state = nextchar
                else:
                    self.token = nextchar
@ -83,9 +90,13 @@ class ShellLexer:
                    self.state = None
                    break
                if nextchar == self.state:
                    if self.keep:
                        self.token += nextchar
                    self.state = 'a'
                elif (nextchar in self.escape and
                        self.state in self.escapedquotes):
                    if self.keep:
                        self.token += nextchar
                    escapedstate = self.state
                    self.state = nextchar
                else:
@ -93,13 +104,14 @@ class ShellLexer:
            elif self.state in self.escape:
                if nextchar is None:
                    log.shlexer.vdebug("I see EOF in escape state")
-                    self.token += self.state
+                    if not self.keep:
                        self.token += self.state
                    self.state = None
                    break
                # In posix shells, only the quote itself or the escape
                # character may be escaped within quotes.
                if (escapedstate in self.quotes and nextchar != self.state and
-                        nextchar != escapedstate):
+                        nextchar != escapedstate and not self.keep):
                    self.token += self.state
                self.token += nextchar
                self.state = escapedstate
@ -110,13 +122,19 @@ class ShellLexer:
                elif nextchar in self.whitespace:
                    log.shlexer.vdebug("shlex: I see whitespace in word state")
                    self.state = ' '
                    if self.keep:
                        self.token += nextchar
                    if self.token or quoted:
                        break   # emit current token
                    else:
                        continue
                elif nextchar in self.quotes:
                    if self.keep:
                        self.token += nextchar
                    self.state = nextchar
                elif nextchar in self.escape:
                    if self.keep:
                        self.token += nextchar
                    escapedstate = 'a'
                    self.state = nextchar
                else:
@ -138,7 +156,12 @@ class ShellLexer:
        return token
-def split(s):
+def split(s, keep=False):
-    """Split a string via ShellLexer."""
+    """Split a string via ShellLexer.
    Args:
        keep: Whether to keep are special chars in the split output.
    """
    lexer = ShellLexer(s)
    lexer.keep = keep
    return list(lexer)