Allow lightweight URL patterns without a scheme

See #3622
This commit is contained in:
Florian Bruhin 2018-03-06 10:32:38 +01:00
parent 7fc53ae78a
commit 257753841b
2 changed files with 42 additions and 10 deletions

View File

@ -43,6 +43,7 @@ class UrlPattern:
Class attributes:
DEFAULT_PORTS: The default ports used for schemes which support ports.
_SCHEMES_WITHOUT_HOST: Schemes which don't need a host.
Attributes:
_pattern: The given pattern as string.
@ -59,6 +60,7 @@ class UrlPattern:
"""
DEFAULT_PORTS = {'https': 443, 'http': 80, 'ftp': 21}
_SCHEMES_WITHOUT_HOST = ['about', 'file', 'data', 'javascript']
def __init__(self, pattern):
# Make sure all attributes are initialized if we exit early.
@ -120,6 +122,10 @@ class UrlPattern:
if pattern.startswith('*:'): # Any scheme, but *:// is unparseable
pattern = 'any:' + pattern[2:]
schemes = tuple(s + ':' for s in self._SCHEMES_WITHOUT_HOST)
if '://' not in pattern and not pattern.startswith(schemes):
pattern = 'any://' + pattern
# Chromium handles file://foo like file:///foo
# FIXME This doesn't actually strip the hostname correctly.
if (pattern.startswith('file://') and
@ -129,15 +135,24 @@ class UrlPattern:
return pattern
def _init_scheme(self, parsed):
if not parsed.scheme:
raise ParseError("No scheme given")
elif parsed.scheme == 'any':
"""Parse the scheme from the given URL.
Deviation from Chromium:
- We assume * when no scheme has been given.
"""
assert parsed.scheme, parsed
if parsed.scheme == 'any':
self._scheme = None
return
self._scheme = parsed.scheme
def _init_path(self, parsed):
"""Parse the path from the given URL.
Deviation from Chromium:
- We assume * when no path has been given.
"""
if self._scheme == 'about' and not parsed.path.strip():
raise ParseError("Pattern without path")
@ -157,7 +172,7 @@ class UrlPattern:
- http://:1234/ is not a valid URL because it has no host.
"""
if parsed.hostname is None or not parsed.hostname.strip():
if self._scheme not in ['about', 'file', 'data', 'javascript']:
if self._scheme not in self._SCHEMES_WITHOUT_HOST:
raise ParseError("Pattern without host")
assert self._host is None
return

View File

@ -43,11 +43,11 @@ from qutebrowser.utils import urlmatch
@pytest.mark.parametrize('pattern, error', [
# Chromium: PARSE_ERROR_MISSING_SCHEME_SEPARATOR
("http", "No scheme given"),
("http:", "Pattern without host"),
("http:/", "Pattern without host"),
# ("http", "No scheme given"),
("http:", "Invalid port: Port is empty"),
("http:/", "Invalid port: Port is empty"),
("about://", "Pattern without path"),
("http:/bar", "Pattern without host"),
("http:/bar", "Invalid port: Port is empty"),
# Chromium: PARSE_ERROR_EMPTY_HOST
("http://", "Pattern without host"),
@ -114,7 +114,6 @@ def test_port(pattern, port):
@pytest.mark.parametrize('pattern, path', [
("http://foo/", '/'),
("http://foo", None),
("http://foo/*", None),
])
def test_parse_path(pattern, path):
@ -122,6 +121,24 @@ def test_parse_path(pattern, path):
assert up._path == path
@pytest.mark.parametrize('pattern, scheme, host, path', [
("http://example.com", 'http', 'example.com', None), # no path
("example.com/path", None, 'example.com', '/path'), # no scheme
("example.com", None, 'example.com', None), # no scheme and no path
("example.com:1234", None, 'example.com', None), # no scheme/path but port
("data:monkey", 'data', None, 'monkey'), # existing scheme
])
def test_lightweight_patterns(pattern, scheme, host, path):
"""Make sure we can leave off parts of an URL.
This is a deviation from Chromium to make patterns more user-friendly.
"""
up = urlmatch.UrlPattern(pattern)
assert up._scheme == scheme
assert up._host == host
assert up._path == path
class TestMatchAllPagesForGivenScheme:
@pytest.fixture
@ -264,7 +281,7 @@ class TestMatchChromeUrls:
class TestMatchAnything:
@pytest.fixture(params=['*://*/*', '*://*:*/*', '<all_urls>'])
@pytest.fixture(params=['*://*/*', '*://*:*/*', '<all_urls>', '*://*'])
def up(self, request):
return urlmatch.UrlPattern(request.param)