From 257753841bd9194a4bed0bbe7c088496f2eba905 Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Tue, 6 Mar 2018 10:32:38 +0100 Subject: [PATCH] Allow lightweight URL patterns without a scheme See #3622 --- qutebrowser/utils/urlmatch.py | 23 +++++++++++++++++++---- tests/unit/utils/test_urlmatch.py | 29 +++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/qutebrowser/utils/urlmatch.py b/qutebrowser/utils/urlmatch.py index fcddd0fe7..ba9d1d63c 100644 --- a/qutebrowser/utils/urlmatch.py +++ b/qutebrowser/utils/urlmatch.py @@ -43,6 +43,7 @@ class UrlPattern: Class attributes: DEFAULT_PORTS: The default ports used for schemes which support ports. + _SCHEMES_WITHOUT_HOST: Schemes which don't need a host. Attributes: _pattern: The given pattern as string. @@ -59,6 +60,7 @@ class UrlPattern: """ DEFAULT_PORTS = {'https': 443, 'http': 80, 'ftp': 21} + _SCHEMES_WITHOUT_HOST = ['about', 'file', 'data', 'javascript'] def __init__(self, pattern): # Make sure all attributes are initialized if we exit early. @@ -120,6 +122,10 @@ class UrlPattern: if pattern.startswith('*:'): # Any scheme, but *:// is unparseable pattern = 'any:' + pattern[2:] + schemes = tuple(s + ':' for s in self._SCHEMES_WITHOUT_HOST) + if '://' not in pattern and not pattern.startswith(schemes): + pattern = 'any://' + pattern + # Chromium handles file://foo like file:///foo # FIXME This doesn't actually strip the hostname correctly. if (pattern.startswith('file://') and @@ -129,15 +135,24 @@ class UrlPattern: return pattern def _init_scheme(self, parsed): - if not parsed.scheme: - raise ParseError("No scheme given") - elif parsed.scheme == 'any': + """Parse the scheme from the given URL. + + Deviation from Chromium: + - We assume * when no scheme has been given. + """ + assert parsed.scheme, parsed + if parsed.scheme == 'any': self._scheme = None return self._scheme = parsed.scheme def _init_path(self, parsed): + """Parse the path from the given URL. + + Deviation from Chromium: + - We assume * when no path has been given. + """ if self._scheme == 'about' and not parsed.path.strip(): raise ParseError("Pattern without path") @@ -157,7 +172,7 @@ class UrlPattern: - http://:1234/ is not a valid URL because it has no host. """ if parsed.hostname is None or not parsed.hostname.strip(): - if self._scheme not in ['about', 'file', 'data', 'javascript']: + if self._scheme not in self._SCHEMES_WITHOUT_HOST: raise ParseError("Pattern without host") assert self._host is None return diff --git a/tests/unit/utils/test_urlmatch.py b/tests/unit/utils/test_urlmatch.py index 88da166ca..dcd703790 100644 --- a/tests/unit/utils/test_urlmatch.py +++ b/tests/unit/utils/test_urlmatch.py @@ -43,11 +43,11 @@ from qutebrowser.utils import urlmatch @pytest.mark.parametrize('pattern, error', [ # Chromium: PARSE_ERROR_MISSING_SCHEME_SEPARATOR - ("http", "No scheme given"), - ("http:", "Pattern without host"), - ("http:/", "Pattern without host"), + # ("http", "No scheme given"), + ("http:", "Invalid port: Port is empty"), + ("http:/", "Invalid port: Port is empty"), ("about://", "Pattern without path"), - ("http:/bar", "Pattern without host"), + ("http:/bar", "Invalid port: Port is empty"), # Chromium: PARSE_ERROR_EMPTY_HOST ("http://", "Pattern without host"), @@ -114,7 +114,6 @@ def test_port(pattern, port): @pytest.mark.parametrize('pattern, path', [ ("http://foo/", '/'), - ("http://foo", None), ("http://foo/*", None), ]) def test_parse_path(pattern, path): @@ -122,6 +121,24 @@ def test_parse_path(pattern, path): assert up._path == path +@pytest.mark.parametrize('pattern, scheme, host, path', [ + ("http://example.com", 'http', 'example.com', None), # no path + ("example.com/path", None, 'example.com', '/path'), # no scheme + ("example.com", None, 'example.com', None), # no scheme and no path + ("example.com:1234", None, 'example.com', None), # no scheme/path but port + ("data:monkey", 'data', None, 'monkey'), # existing scheme +]) +def test_lightweight_patterns(pattern, scheme, host, path): + """Make sure we can leave off parts of an URL. + + This is a deviation from Chromium to make patterns more user-friendly. + """ + up = urlmatch.UrlPattern(pattern) + assert up._scheme == scheme + assert up._host == host + assert up._path == path + + class TestMatchAllPagesForGivenScheme: @pytest.fixture @@ -264,7 +281,7 @@ class TestMatchChromeUrls: class TestMatchAnything: - @pytest.fixture(params=['*://*/*', '*://*:*/*', '']) + @pytest.fixture(params=['*://*/*', '*://*:*/*', '', '*://*']) def up(self, request): return urlmatch.UrlPattern(request.param)