From 462f9d7e4c6a45cc9bd3da65cf21a92010b0844c Mon Sep 17 00:00:00 2001 From: Tarcisio Fedrizzi Date: Thu, 28 Apr 2016 18:01:23 +0200 Subject: [PATCH] Refators discussed in the review - refactors what discussed in the review - adds unit tests for schemas without host and path --- qutebrowser/utils/urlutils.py | 23 ++++++++------------ tests/integration/features/yankpaste.feature | 3 ++- tests/unit/utils/test_urlutils.py | 1 + 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/qutebrowser/utils/urlutils.py b/qutebrowser/utils/urlutils.py index 4bc74c58a..c97d66a4c 100644 --- a/qutebrowser/utils/urlutils.py +++ b/qutebrowser/utils/urlutils.py @@ -179,16 +179,16 @@ def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True, if path is not None: url = QUrl.fromLocalFile(path) - elif not force_search and (not do_search or is_url(urlstr)): - # probably an address - log.url.debug("URL is a fuzzy address") - url = qurl_from_user_input(urlstr) - else: # probably a search term + elif force_search or (do_search and not is_url(urlstr)): + # probably a search term log.url.debug("URL is a fuzzy search term") try: url = _get_search_url(urlstr) except ValueError: # invalid search engine url = qurl_from_user_input(urlstr) + else: # probably an address + log.url.debug("URL is a fuzzy address") + url = qurl_from_user_input(urlstr) log.url.debug("Converting fuzzy term {!r} to URL -> {}".format( urlstr, url.toDisplayString())) if do_search and config.get('general', 'auto-search') and urlstr: @@ -199,21 +199,18 @@ def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True, return url -def _has_explicit_scheme(url, allow_only_scheme=True): +def _has_explicit_scheme(url): """Check if an url has an explicit scheme given. Args: url: The URL as QUrl. - allow_only_scheme: if set to True the URL is allowed to contain only - the scheme with an empty path. - """ # Note that generic URI syntax actually would allow a second colon # after the scheme delimiter. Since we don't know of any URIs # using this and want to support e.g. searching for scoped C++ # symbols, we treat this as not an URI anyways. return (url.isValid() and url.scheme() and - (allow_only_scheme or len(url.path()) > 0) and + (url.host() or url.path()) and not url.path().startswith(' ') and not url.path().startswith(':')) @@ -230,13 +227,11 @@ def is_special_url(url): return url.scheme() in special_schemes -def is_url(urlstr, allow_only_scheme=True): +def is_url(urlstr): """Check if url seems to be a valid URL. Args: urlstr: The URL as string. - allow_only_scheme: if set to True the URL is allowed to contain only - the scheme with an empty path. Return: True if it is a valid URL, False otherwise. @@ -264,7 +259,7 @@ def is_url(urlstr, allow_only_scheme=True): # This will also catch URLs containing spaces. return False - if _has_explicit_scheme(qurl, allow_only_scheme): + if _has_explicit_scheme(qurl): # URLs with explicit schemes are always URLs log.url.debug("Contains explicit scheme") url = True diff --git a/tests/integration/features/yankpaste.feature b/tests/integration/features/yankpaste.feature index c95aea14e..fe5697ef9 100644 --- a/tests/integration/features/yankpaste.feature +++ b/tests/integration/features/yankpaste.feature @@ -152,7 +152,8 @@ Feature: Yanking and pasting. - data/hello.txt?q=this%20url%3A%0Ahttp%3A//qutebrowser.org%0Ashould%20not%20open (active) Scenario: Pasting multiline whose first line looks like an URI - Given I have a fresh instance + Given I open about:blank + When I run :tab-only When I set searchengines -> DEFAULT to http://localhost:(port)/data/hello.txt?q={} And I put the following lines into the clipboard: text: diff --git a/tests/unit/utils/test_urlutils.py b/tests/unit/utils/test_urlutils.py index 99e1eac56..3440282ea 100644 --- a/tests/unit/utils/test_urlutils.py +++ b/tests/unit/utils/test_urlutils.py @@ -312,6 +312,7 @@ def test_get_search_url_invalid(urlutils_config_stub, url): (True, True, False, 'qute::foo'), # Invalid URLs (False, False, False, ''), + (False, True, False, 'onlyscheme:'), (False, True, False, 'http:foo:0'), # Not URLs (False, True, False, 'foo bar'), # no DNS because of space