diff --git a/qutebrowser/commands/runners.py b/qutebrowser/commands/runners.py index c28c0b96f..037f2dd21 100644 --- a/qutebrowser/commands/runners.py +++ b/qutebrowser/commands/runners.py @@ -24,7 +24,7 @@ from PyQt5.QtWebKitWidgets import QWebPage from qutebrowser.config import config from qutebrowser.commands import cmdexc, cmdutils -from qutebrowser.utils import message, log, utils, objreg +from qutebrowser.utils import message, log, utils, objreg, split def replace_variables(win_id, arglist): @@ -190,7 +190,8 @@ class CommandRunner(QObject): new_cmd += ' ' return new_cmd - def parse(self, text, aliases=True, fallback=False, alias_no_args=True): + def parse(self, text, aliases=True, fallback=False, alias_no_args=True, + keep=False): """Split the commandline text into command and arguments. Args: @@ -199,18 +200,14 @@ class CommandRunner(QObject): fallback: Whether to do a fallback splitting when the command was unknown. alias_no_args: Whether to apply an alias if there are no arguments. + keep: Whether to keep special chars and whitespace Return: A split string commandline, e.g ['open', 'www.google.com'] """ - parts = text.strip().split(maxsplit=1) - if not parts: + cmdstr, sep, argstr = text.partition(' ') + if not cmdstr: raise cmdexc.NoSuchCommandError("No command given") - elif len(parts) > 1: - cmdstr, argstr = parts - else: - cmdstr = parts[0] - argstr = None if aliases: new_cmd = self._get_alias(text, alias_no_args) if new_cmd is not None: @@ -220,25 +217,35 @@ class CommandRunner(QObject): self._cmd = cmdutils.cmd_dict[cmdstr] except KeyError: if fallback: - parts = text.split(' ') - if text.endswith(' '): - parts.append('') - return parts + # FIXME test this + cmdstr, sep, argstr = text.partition(' ') + return [cmdstr, sep] + argstr.split(' ') else: raise cmdexc.NoSuchCommandError( '{}: no such command'.format(cmdstr)) - self._split_args(argstr) + self._split_args(argstr, keep) retargs = self._args[:] - if text.endswith(' '): - retargs.append('') - return [cmdstr] + retargs + if keep and retargs: + return [cmdstr, sep + retargs[0]] + retargs[1:] + elif keep: + return [cmdstr, sep] + else: + return [cmdstr] + retargs - def _split_args(self, argstr): - """Split the arguments from an arg string.""" - if argstr is None: + def _split_args(self, argstr, keep): + """Split the arguments from an arg string. + + Args: + argstr: An argument string. + keep: Whether to keep special chars and whitespace + + Return: + A list containing the splitted strings. + """ + if not argstr: self._args = [] elif self._cmd.split: - self._args = utils.safe_shlex_split(argstr) + self._args = split.split(argstr, keep=keep) else: # If split=False, we still want to split the flags, but not # everything after that. diff --git a/qutebrowser/test/utils/test_split.py b/qutebrowser/test/utils/test_split.py new file mode 100644 index 000000000..0d4315ffd --- /dev/null +++ b/qutebrowser/test/utils/test_split.py @@ -0,0 +1,131 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2014 Florian Bruhin (The Compiler) +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + +"""Tests for qutebrowser.utils.split.""" + +import unittest + +from qutebrowser.utils import split + + +# Most tests copied from Python's shlex. +# The original test data set was from shellwords, by Hartmut Goebel. + +# Format: input/split|output|without|keep/split|output|with|keep/ + +test_data = r""" +one two/one|two/one| two/ +one "two three" four/one|two three|four/one| "two three"| four/ +one 'two three' four/one|two three|four/one| 'two three'| four/ +one "two\" three" four/one|two" three|four/one| "two\" three"| four/ +one 'two'\'' three' four/one|two' three|four/one| 'two'\'' three'| four/ +one "two three/one|two three/one| "two three/ +one 'two three/one|two three/one| 'two three/ +one\/one\/one\/ +one "two\/one|two\/one| "two\/ +one /one/one| / +foo bar/foo|bar/foo| bar/ + foo bar/foo|bar/ foo| bar/ + foo bar /foo|bar/ foo| bar| / +foo bar bla fasel/foo|bar|bla|fasel/foo| bar| bla| fasel/ +x y z xxxx/x|y|z|xxxx/x| y| z| xxxx/ +\x bar/x|bar/\x| bar/ +\ x bar/ x|bar/\ x| bar/ +\ bar/ bar/\ bar/ +foo \x bar/foo|x|bar/foo| \x| bar/ +foo \ x bar/foo| x|bar/foo| \ x| bar/ +foo \ bar/foo| bar/foo| \ bar/ +foo "bar" bla/foo|bar|bla/foo| "bar"| bla/ +"foo" "bar" "bla"/foo|bar|bla/"foo"| "bar"| "bla"/ +"foo" bar "bla"/foo|bar|bla/"foo"| bar| "bla"/ +"foo" bar bla/foo|bar|bla/"foo"| bar| bla/ +foo 'bar' bla/foo|bar|bla/foo| 'bar'| bla/ +'foo' 'bar' 'bla'/foo|bar|bla/'foo'| 'bar'| 'bla'/ +'foo' bar 'bla'/foo|bar|bla/'foo'| bar| 'bla'/ +'foo' bar bla/foo|bar|bla/'foo'| bar| bla/ +blurb foo"bar"bar"fasel" baz/blurb|foobarbarfasel|baz/blurb| foo"bar"bar"fasel"| baz/ +blurb foo'bar'bar'fasel' baz/blurb|foobarbarfasel|baz/blurb| foo'bar'bar'fasel'| baz/ +""//""/ +''//''/ +foo "" bar/foo||bar/foo| ""| bar/ +foo '' bar/foo||bar/foo| ''| bar/ +foo "" "" "" bar/foo||||bar/foo| ""| ""| ""| bar/ +foo '' '' '' bar/foo||||bar/foo| ''| ''| ''| bar/ +\"/"/\"/ +"\""/"/"\""/ +"foo\ bar"/foo\ bar/"foo\ bar"/ +"foo\\ bar"/foo\ bar/"foo\\ bar"/ +"foo\\ bar\""/foo\ bar"/"foo\\ bar\""/ +"foo\\" bar\"/foo\|bar"/"foo\\"| bar\"/ +"foo\\ bar\" dfadf"/foo\ bar" dfadf/"foo\\ bar\" dfadf"/ +"foo\\\ bar\" dfadf"/foo\\ bar" dfadf/"foo\\\ bar\" dfadf"/ +"foo\\\x bar\" dfadf"/foo\\x bar" dfadf/"foo\\\x bar\" dfadf"/ +"foo\x bar\" dfadf"/foo\x bar" dfadf/"foo\x bar\" dfadf"/ +\'/'/\'/ +'foo\ bar'/foo\ bar/'foo\ bar'/ +'foo\\ bar'/foo\\ bar/'foo\\ bar'/ +"foo\\\x bar\" df'a\ 'df"/foo\\x bar" df'a\ 'df/"foo\\\x bar\" df'a\ 'df"/ +\"foo/"foo/\"foo/ +\"foo\x/"foox/\"foo\x/ +"foo\x"/foo\x/"foo\x"/ +"foo\ "/foo\ /"foo\ "/ +foo\ xx/foo xx/foo\ xx/ +foo\ x\x/foo xx/foo\ x\x/ +foo\ x\x\"/foo xx"/foo\ x\x\"/ +"foo\ x\x"/foo\ x\x/"foo\ x\x"/ +"foo\ x\x\\"/foo\ x\x\/"foo\ x\x\\"/ +"foo\ x\x\\""foobar"/foo\ x\x\foobar/"foo\ x\x\\""foobar"/ +"foo\ x\x\\"\'"foobar"/foo\ x\x\'foobar/"foo\ x\x\\"\'"foobar"/ +"foo\ x\x\\"\'"fo'obar"/foo\ x\x\'fo'obar/"foo\ x\x\\"\'"fo'obar"/ +"foo\ x\x\\"\'"fo'obar" 'don'\''t'/foo\ x\x\'fo'obar|don't/"foo\ x\x\\"\'"fo'obar"| 'don'\''t'/ +"foo\ x\x\\"\'"fo'obar" 'don'\''t' \\/foo\ x\x\'fo'obar|don't|\/"foo\ x\x\\"\'"fo'obar"| 'don'\''t'| \\/ +'foo\ bar'/foo\ bar/'foo\ bar'/ +'foo\\ bar'/foo\\ bar/'foo\\ bar'/ +foo\ bar/foo bar/foo\ bar/ +:-) ;-)/:-)|;-)/:-)| ;-)/ +áéíóú/áéíóú/áéíóú/ +""" + +class SplitTests(unittest.TestCase): + + """Test split.""" + + def test_split(self): + """Test splitting.""" + for case in test_data.strip().splitlines(): + cmd, *out = case.split('/')[:-1] + with self.subTest(cmd=cmd): + items = split.split(cmd) + self.assertEqual(items, out[0].split('|')) + + def test_split_keep_original(self): + """Test if splitting with keep=True yields the original string.""" + for case in test_data.strip().splitlines(): + cmd, *_out = case.split('/')[:-1] + with self.subTest(cmd=cmd): + items = split.split(cmd, keep=True) + self.assertEqual(''.join(items), cmd) + + def test_split_keep(self): + """Test splitting with keep=True.""" + for case in test_data.strip().splitlines(): + cmd, *out = case.split('/')[:-1] + with self.subTest(cmd=cmd): + items = split.split(cmd, keep=True) + self.assertEqual(items, out[1].split('|')) diff --git a/qutebrowser/test/utils/test_utils.py b/qutebrowser/test/utils/test_utils.py index 470cd449a..6b8ad1553 100644 --- a/qutebrowser/test/utils/test_utils.py +++ b/qutebrowser/test/utils/test_utils.py @@ -110,56 +110,6 @@ class DottedGetattrTests(unittest.TestCase): _ = utils.dotted_getattr(self, 'test.foo.baz') -class SafeShlexSplitTests(unittest.TestCase): - - """Test safe_shlex_split.""" - - def test_normal(self): - """Test safe_shlex_split with a simple string.""" - items = utils.safe_shlex_split('one two') - self.assertEqual(items, ['one', 'two']) - - def test_quoted(self): - """Test safe_shlex_split with a normally quoted string.""" - items = utils.safe_shlex_split('one "two three" four') - self.assertEqual(items, ['one', 'two three', 'four']) - - def test_single_quoted(self): - """Test safe_shlex_split with a single quoted string.""" - items = utils.safe_shlex_split("one 'two three' four") - self.assertEqual(items, ['one', 'two three', 'four']) - - def test_escaped(self): - """Test safe_shlex_split with a normal escaped string.""" - items = utils.safe_shlex_split(r'one "two\" three" four') - self.assertEqual(items, ['one', 'two" three', 'four']) - - def test_escaped_single(self): - """Test safe_shlex_split with a single escaped string.""" - items = utils.safe_shlex_split(r"one 'two'\'' three' four") - self.assertEqual(items, ['one', "two' three", 'four']) - - def test_unbalanced_quotes(self): - """Test safe_shlex_split with unbalanded quotes.""" - items = utils.safe_shlex_split(r'one "two three') - self.assertEqual(items, ['one', 'two three']) - - def test_unbalanced_single_quotes(self): - """Test safe_shlex_split with unbalanded single quotes.""" - items = utils.safe_shlex_split(r"one 'two three") - self.assertEqual(items, ['one', "two three"]) - - def test_unfinished_escape(self): - """Test safe_shlex_split with an unfinished escape.""" - items = utils.safe_shlex_split('one\\') - self.assertEqual(items, ['one\\']) - - def test_both(self): - """Test safe_shlex_split with an unfinished escape and quotes..""" - items = utils.safe_shlex_split('one "two\\') - self.assertEqual(items, ['one', 'two\\']) - - class InterpolateColorTests(unittest.TestCase): """Tests for interpolate_color. diff --git a/qutebrowser/utils/completer.py b/qutebrowser/utils/completer.py index cdb682815..5875ec70c 100644 --- a/qutebrowser/utils/completer.py +++ b/qutebrowser/utils/completer.py @@ -174,9 +174,12 @@ class Completer(QObject): Return: A completion model. """ - if parts[cursor_part].startswith('-'): - # cursor on a flag - return + try: + if parts[cursor_part].startswith('-'): + # cursor on a flag + return + except IndexError: + pass parts, cursor_part = self._filter_cmdline_parts(parts, cursor_part) if cursor_part == 0: # '|' or 'set|' @@ -302,7 +305,10 @@ class Completer(QObject): self._parts)) return - pattern = self._parts[self._cursor_part] if self._parts else '' + try: + pattern = self._parts[self._cursor_part] if self._parts else '' + except IndexError: + pattern = '' self._model().set_pattern(pattern) log.completion.debug( diff --git a/qutebrowser/utils/log.py b/qutebrowser/utils/log.py index a5e27d9d8..f88c406b8 100644 --- a/qutebrowser/utils/log.py +++ b/qutebrowser/utils/log.py @@ -125,6 +125,7 @@ qt = logging.getLogger('qt') # Warnings produced by Qt style = logging.getLogger('style') rfc6266 = logging.getLogger('rfc6266') ipc = logging.getLogger('ipc') +shlexer = logging.getLogger('shlexer') ram_handler = None diff --git a/qutebrowser/utils/split.py b/qutebrowser/utils/split.py new file mode 100644 index 000000000..ca8e11391 --- /dev/null +++ b/qutebrowser/utils/split.py @@ -0,0 +1,146 @@ +# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: + +# Copyright 2014 Florian Bruhin (The Compiler) +# +# This file is part of qutebrowser. +# +# qutebrowser is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# qutebrowser is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with qutebrowser. If not, see . + +"""Our own fork of shlex.split with some added and removed features.""" + +from qutebrowser.utils import log + + +class ShellLexer: + + """A lexical analyzer class for simple shell-like syntaxes. + + Based on Python's shlex, but cleaned up, removed some features, and added + some features useful for qutebrowser. + + Attributes: + FIXME + """ + + def __init__(self, s): + self.string = s + self.whitespace = ' \t\r' + self.quotes = '\'"' + self.escape = '\\' + self.escapedquotes = '"' + self.keep = False + + def reset(self): + self.quoted = False + self.escapedstate = ' ' + self.token = '' + self.state = ' ' + + def __iter__(self): + """Read a raw token from the input stream.""" + self.reset() + for nextchar in self.string: + log.shlexer.vdebug("in state {!r} I see character: {!r}".format( + self.state, nextchar)) + if self.state == ' ': + if self.keep: + self.token += nextchar + if nextchar in self.whitespace: + log.shlexer.vdebug("I see whitespace in whitespace state") + if self.token or self.quoted: + yield self.token + self.reset() + elif nextchar in self.escape: + self.escapedstate = 'a' + self.state = nextchar + elif nextchar in self.quotes: + self.state = nextchar + else: + self.token = nextchar + self.state = 'a' + elif self.state in self.quotes: + self.quoted = True + if nextchar == self.state: + if self.keep: + self.token += nextchar + self.state = 'a' + elif (nextchar in self.escape and + self.state in self.escapedquotes): + if self.keep: + self.token += nextchar + self.escapedstate = self.state + self.state = nextchar + else: + self.token += nextchar + elif self.state in self.escape: + # In posix shells, only the quote itself or the escape + # character may be escaped within quotes. + if (self.escapedstate in self.quotes and + nextchar != self.state and + nextchar != self.escapedstate and not self.keep): + self.token += self.state + self.token += nextchar + self.state = self.escapedstate + elif self.state == 'a': + if nextchar in self.whitespace: + log.shlexer.vdebug("shlex: I see whitespace in word state") + self.state = ' ' + if self.token or self.quoted: + yield self.token + self.reset() + if self.keep: + yield nextchar + elif nextchar in self.quotes: + if self.keep: + self.token += nextchar + self.state = nextchar + elif nextchar in self.escape: + if self.keep: + self.token += nextchar + self.escapedstate = 'a' + self.state = nextchar + else: + self.token += nextchar + if self.state in self.escape and not self.keep: + self.token += self.state + if self.token or self.quoted: + yield self.token + + +def split(s, keep=False): + """Split a string via ShellLexer. + + Args: + keep: Whether to keep are special chars in the split output. + """ + lexer = ShellLexer(s) + lexer.keep = keep + tokens = list(lexer) + if not tokens: + return [] + out = [] + spaces = "" + + log.shlexer.vdebug("{!r} -> {!r}".format(s, tokens)) + + for t in tokens: + if t.isspace(): + spaces += t + else: + out.append(spaces + t) + spaces = "" + if spaces: + out.append(spaces) + + return out diff --git a/qutebrowser/utils/utils.py b/qutebrowser/utils/utils.py index 3a96bc09d..1dd13ee77 100644 --- a/qutebrowser/utils/utils.py +++ b/qutebrowser/utils/utils.py @@ -22,7 +22,6 @@ import io import sys import enum -import shlex import inspect import os.path import urllib.request @@ -99,54 +98,6 @@ def dotted_getattr(obj, path): return functools.reduce(getattr, path.split('.'), obj) -def _get_lexer(s): - """Get an shlex lexer for safe_shlex_split.""" - if s is None: - raise TypeError("Refusing to create a lexer with s=None!") - lexer = shlex.shlex(s, posix=True) - lexer.whitespace_split = True - lexer.commenters = '' - return lexer - - -def safe_shlex_split(s): - r"""Split a string via shlex safely (don't bail out on unbalanced quotes). - - We split while the user is typing (for completion), and as - soon as ", ' or \ is typed, the string is invalid for shlex, - because it encounters EOF while in quote/escape state. - - Here we fix this error temporarily so shlex doesn't blow up, - and then retry splitting again. - - Since shlex raises ValueError in both cases we unfortunately - have to parse the exception string... - - We try 3 times so multiple errors can be fixed. - """ - orig_s = s - for i in range(3): - lexer = _get_lexer(s) - try: - tokens = list(lexer) - except ValueError as e: - if str(e) not in ("No closing quotation", "No escaped character"): - raise - # eggs "bacon ham -> eggs "bacon ham" - # eggs\ -> eggs\\ - if lexer.state not in lexer.escape + lexer.quotes: - raise AssertionError( - "Lexer state is >{}< while parsing >{}< (attempted fixup: " - ">{}<)".format(lexer.state, orig_s, s)) - s += lexer.state - else: - return tokens - # We should never arrive here. - raise AssertionError( - "Gave up splitting >{}< after {} tries. Attempted fixup: >{}<.".format( - orig_s, i, s)) # pylint: disable=undefined-loop-variable - - def pastebin(name, title, text, parent=None): """Paste the text into a pastebin and return the URL. diff --git a/qutebrowser/widgets/statusbar/command.py b/qutebrowser/widgets/statusbar/command.py index 9c794d8d0..9788dffb7 100644 --- a/qutebrowser/widgets/statusbar/command.py +++ b/qutebrowser/widgets/statusbar/command.py @@ -87,8 +87,12 @@ class Command(misc.MinimalLineEditMixin, misc.CommandLineEdit): else: return '' - def split(self): - """Get the text split up in parts.""" + def split(self, keep=False): + """Get the text split up in parts. + + Args: + keep: Whether to keep special chars and whitespace. + """ text = self.text()[len(self.prefix()):] if not text: # When only ":" is entered, we already have one imaginary part, @@ -99,7 +103,8 @@ class Command(misc.MinimalLineEditMixin, misc.CommandLineEdit): # the whitespace. return [text] runner = runners.CommandRunner(self._win_id) - parts = runner.parse(text, fallback=True, alias_no_args=False) + parts = runner.parse(text, fallback=True, alias_no_args=False, + keep=keep) if self._empty_item_idx is not None: log.completion.debug("Empty element queued at {}, " "inserting.".format(self._empty_item_idx)) @@ -117,7 +122,7 @@ class Command(misc.MinimalLineEditMixin, misc.CommandLineEdit): else: spaces = False cursor_pos -= len(self.prefix()) - parts = self.split() + parts = self.split(keep=True) log.completion.vdebug( "text: {}, parts: {}, cursor_pos after removing prefix '{}': " "{}".format(self.text(), parts, self.prefix(), cursor_pos)) @@ -135,12 +140,10 @@ class Command(misc.MinimalLineEditMixin, misc.CommandLineEdit): "{}".format(cursor_pos, len(part), i, self._empty_item_idx)) break - # FIXME are spaces always 1 char? - # https://github.com/The-Compiler/qutebrowser/issues/122 - cursor_pos -= (len(part) + 1) + cursor_pos -= len(part) log.completion.vdebug( - "Removing len({!r}) + 1 -> {} from cursor_pos -> {}".format( - part, len(part) + 1, cursor_pos)) + "Removing len({!r}) -> {} from cursor_pos -> {}".format( + part, len(part), cursor_pos)) log.completion.debug("cursor_part {}, spaces {}".format( self._cursor_part, spaces)) return @@ -211,7 +214,10 @@ class Command(misc.MinimalLineEditMixin, misc.CommandLineEdit): parts = self.split() log.completion.debug("changing part {} to '{}'".format( self._cursor_part, newtext)) - parts[self._cursor_part] = newtext + try: + parts[self._cursor_part] = newtext + except IndexError: + parts.append(newtext) # We want to place the cursor directly after the part we just changed. cursor_str = self.prefix() + ' '.join(parts[:self._cursor_part + 1]) if immediate: