From 73c200fb149a523ce73a8d87eff06789eb4f14a0 Mon Sep 17 00:00:00 2001 From: Tarcisio Fedrizzi Date: Thu, 7 Apr 2016 18:45:56 +0200 Subject: [PATCH 1/6] Tweaks the multi-line heuristic to handle scheme-like text Adds some options to implement a way to treat multiline text that starts with a scheme-like line as text instead as an URL. --- qutebrowser/browser/commands.py | 8 +++++--- qutebrowser/utils/urlutils.py | 19 ++++++++++++++----- tests/integration/features/yankpaste.feature | 13 +++++++++++++ 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/qutebrowser/browser/commands.py b/qutebrowser/browser/commands.py index 721603f3e..0db8ffdba 100644 --- a/qutebrowser/browser/commands.py +++ b/qutebrowser/browser/commands.py @@ -841,6 +841,7 @@ class CommandDispatcher: bg: Open in a background tab. window: Open in new window. """ + force_search = False if sel and utils.supports_selection(): target = "Primary selection" else: @@ -852,15 +853,16 @@ class CommandDispatcher: log.misc.debug("{} contained: {!r}".format(target, text)) text_urls = [u for u in text.split('\n') if u.strip()] if (len(text_urls) > 1 and not urlutils.is_url(text_urls[0]) and - urlutils.get_path_if_valid(text_urls[0], - check_exists=True) is None): + urlutils.get_path_if_valid( + text_urls[0], check_exists=True) is None): + force_search = True text_urls = [text] for i, text_url in enumerate(text_urls): if not window and i > 0: tab = False bg = True try: - url = urlutils.fuzzy_url(text_url) + url = urlutils.fuzzy_url(text_url, force_search=force_search) except urlutils.InvalidUrlError as e: raise cmdexc.CommandError(e) self._open(url, tab, bg, window) diff --git a/qutebrowser/utils/urlutils.py b/qutebrowser/utils/urlutils.py index ac9a49c7e..4bc74c58a 100644 --- a/qutebrowser/utils/urlutils.py +++ b/qutebrowser/utils/urlutils.py @@ -158,7 +158,8 @@ def _is_url_dns(urlstr): return not info.error() -def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True): +def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True, + force_search=False): """Get a QUrl based on a user input which is URL or search term. Args: @@ -166,6 +167,8 @@ def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True): cwd: The current working directory, or None. relative: Whether to resolve relative files. do_search: Whether to perform a search on non-URLs. + force_search: Whether to force a search even if the content can be + interpreted as an URL. Return: A target QUrl to a search page or the original URL. @@ -176,7 +179,7 @@ def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True): if path is not None: url = QUrl.fromLocalFile(path) - elif (not do_search) or is_url(urlstr): + elif not force_search and (not do_search or is_url(urlstr)): # probably an address log.url.debug("URL is a fuzzy address") url = qurl_from_user_input(urlstr) @@ -196,17 +199,21 @@ def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True): return url -def _has_explicit_scheme(url): +def _has_explicit_scheme(url, allow_only_scheme=True): """Check if an url has an explicit scheme given. Args: url: The URL as QUrl. + allow_only_scheme: if set to True the URL is allowed to contain only + the scheme with an empty path. + """ # Note that generic URI syntax actually would allow a second colon # after the scheme delimiter. Since we don't know of any URIs # using this and want to support e.g. searching for scoped C++ # symbols, we treat this as not an URI anyways. return (url.isValid() and url.scheme() and + (allow_only_scheme or len(url.path()) > 0) and not url.path().startswith(' ') and not url.path().startswith(':')) @@ -223,11 +230,13 @@ def is_special_url(url): return url.scheme() in special_schemes -def is_url(urlstr): +def is_url(urlstr, allow_only_scheme=True): """Check if url seems to be a valid URL. Args: urlstr: The URL as string. + allow_only_scheme: if set to True the URL is allowed to contain only + the scheme with an empty path. Return: True if it is a valid URL, False otherwise. @@ -255,7 +264,7 @@ def is_url(urlstr): # This will also catch URLs containing spaces. return False - if _has_explicit_scheme(qurl): + if _has_explicit_scheme(qurl, allow_only_scheme): # URLs with explicit schemes are always URLs log.url.debug("Contains explicit scheme") url = True diff --git a/tests/integration/features/yankpaste.feature b/tests/integration/features/yankpaste.feature index 1fcb14fbc..c95aea14e 100644 --- a/tests/integration/features/yankpaste.feature +++ b/tests/integration/features/yankpaste.feature @@ -151,6 +151,19 @@ Feature: Yanking and pasting. - about:blank - data/hello.txt?q=this%20url%3A%0Ahttp%3A//qutebrowser.org%0Ashould%20not%20open (active) + Scenario: Pasting multiline whose first line looks like an URI + Given I have a fresh instance + When I set searchengines -> DEFAULT to http://localhost:(port)/data/hello.txt?q={} + And I put the following lines into the clipboard: + text: + should open + as search + And I run :paste -t + And I wait until data/hello.txt?q=text%3A%0Ashould%20open%0Aas%20search is loaded + Then the following tabs should be open: + - about:blank + - data/hello.txt?q=text%3A%0Ashould%20open%0Aas%20search (active) + Scenario: Pasting multiple urls in a background tab Given I open about:blank When I run :tab-only From 462f9d7e4c6a45cc9bd3da65cf21a92010b0844c Mon Sep 17 00:00:00 2001 From: Tarcisio Fedrizzi Date: Thu, 28 Apr 2016 18:01:23 +0200 Subject: [PATCH 2/6] Refators discussed in the review - refactors what discussed in the review - adds unit tests for schemas without host and path --- qutebrowser/utils/urlutils.py | 23 ++++++++------------ tests/integration/features/yankpaste.feature | 3 ++- tests/unit/utils/test_urlutils.py | 1 + 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/qutebrowser/utils/urlutils.py b/qutebrowser/utils/urlutils.py index 4bc74c58a..c97d66a4c 100644 --- a/qutebrowser/utils/urlutils.py +++ b/qutebrowser/utils/urlutils.py @@ -179,16 +179,16 @@ def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True, if path is not None: url = QUrl.fromLocalFile(path) - elif not force_search and (not do_search or is_url(urlstr)): - # probably an address - log.url.debug("URL is a fuzzy address") - url = qurl_from_user_input(urlstr) - else: # probably a search term + elif force_search or (do_search and not is_url(urlstr)): + # probably a search term log.url.debug("URL is a fuzzy search term") try: url = _get_search_url(urlstr) except ValueError: # invalid search engine url = qurl_from_user_input(urlstr) + else: # probably an address + log.url.debug("URL is a fuzzy address") + url = qurl_from_user_input(urlstr) log.url.debug("Converting fuzzy term {!r} to URL -> {}".format( urlstr, url.toDisplayString())) if do_search and config.get('general', 'auto-search') and urlstr: @@ -199,21 +199,18 @@ def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True, return url -def _has_explicit_scheme(url, allow_only_scheme=True): +def _has_explicit_scheme(url): """Check if an url has an explicit scheme given. Args: url: The URL as QUrl. - allow_only_scheme: if set to True the URL is allowed to contain only - the scheme with an empty path. - """ # Note that generic URI syntax actually would allow a second colon # after the scheme delimiter. Since we don't know of any URIs # using this and want to support e.g. searching for scoped C++ # symbols, we treat this as not an URI anyways. return (url.isValid() and url.scheme() and - (allow_only_scheme or len(url.path()) > 0) and + (url.host() or url.path()) and not url.path().startswith(' ') and not url.path().startswith(':')) @@ -230,13 +227,11 @@ def is_special_url(url): return url.scheme() in special_schemes -def is_url(urlstr, allow_only_scheme=True): +def is_url(urlstr): """Check if url seems to be a valid URL. Args: urlstr: The URL as string. - allow_only_scheme: if set to True the URL is allowed to contain only - the scheme with an empty path. Return: True if it is a valid URL, False otherwise. @@ -264,7 +259,7 @@ def is_url(urlstr, allow_only_scheme=True): # This will also catch URLs containing spaces. return False - if _has_explicit_scheme(qurl, allow_only_scheme): + if _has_explicit_scheme(qurl): # URLs with explicit schemes are always URLs log.url.debug("Contains explicit scheme") url = True diff --git a/tests/integration/features/yankpaste.feature b/tests/integration/features/yankpaste.feature index c95aea14e..fe5697ef9 100644 --- a/tests/integration/features/yankpaste.feature +++ b/tests/integration/features/yankpaste.feature @@ -152,7 +152,8 @@ Feature: Yanking and pasting. - data/hello.txt?q=this%20url%3A%0Ahttp%3A//qutebrowser.org%0Ashould%20not%20open (active) Scenario: Pasting multiline whose first line looks like an URI - Given I have a fresh instance + Given I open about:blank + When I run :tab-only When I set searchengines -> DEFAULT to http://localhost:(port)/data/hello.txt?q={} And I put the following lines into the clipboard: text: diff --git a/tests/unit/utils/test_urlutils.py b/tests/unit/utils/test_urlutils.py index 99e1eac56..3440282ea 100644 --- a/tests/unit/utils/test_urlutils.py +++ b/tests/unit/utils/test_urlutils.py @@ -312,6 +312,7 @@ def test_get_search_url_invalid(urlutils_config_stub, url): (True, True, False, 'qute::foo'), # Invalid URLs (False, False, False, ''), + (False, True, False, 'onlyscheme:'), (False, True, False, 'http:foo:0'), # Not URLs (False, True, False, 'foo bar'), # no DNS because of space From 7da6faa7af7a81c2d20960db9670aa7306828685 Mon Sep 17 00:00:00 2001 From: Tarcisio Fedrizzi Date: Mon, 16 May 2016 18:19:33 +0200 Subject: [PATCH 3/6] Changes fuzzy_url to force search even when paths are passed --- qutebrowser/utils/urlutils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qutebrowser/utils/urlutils.py b/qutebrowser/utils/urlutils.py index c97d66a4c..5ea1c61b3 100644 --- a/qutebrowser/utils/urlutils.py +++ b/qutebrowser/utils/urlutils.py @@ -168,7 +168,7 @@ def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True, relative: Whether to resolve relative files. do_search: Whether to perform a search on non-URLs. force_search: Whether to force a search even if the content can be - interpreted as an URL. + interpreted as an URL or a path. Return: A target QUrl to a search page or the original URL. @@ -177,7 +177,7 @@ def fuzzy_url(urlstr, cwd=None, relative=False, do_search=True, path = get_path_if_valid(urlstr, cwd=cwd, relative=relative, check_exists=True) - if path is not None: + if not force_search and path is not None: url = QUrl.fromLocalFile(path) elif force_search or (do_search and not is_url(urlstr)): # probably a search term From a9e96df5dfcb6088f8958687f22508093d700bb3 Mon Sep 17 00:00:00 2001 From: Tarcisio Fedrizzi Date: Mon, 16 May 2016 18:19:56 +0200 Subject: [PATCH 4/6] Adds unit test when force_search is True --- tests/unit/utils/test_urlutils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/unit/utils/test_urlutils.py b/tests/unit/utils/test_urlutils.py index 3440282ea..7ca569fea 100644 --- a/tests/unit/utils/test_urlutils.py +++ b/tests/unit/utils/test_urlutils.py @@ -238,6 +238,19 @@ class TestFuzzyUrl: with pytest.raises(urlutils.InvalidUrlError): urlutils.fuzzy_url(url, do_search=True) + @pytest.mark.parametrize('urlstring', [ + 'http://www.qutebrowser.org/', + '/foo', + 'test' + ]) + def test_force_search(self, urlstring, get_search_url_mock): + """Test the force search option""" + get_search_url_mock.return_value = QUrl('search_url') + + url = urlutils.fuzzy_url(urlstring, force_search = True) + + assert url == QUrl('search_url') + @pytest.mark.parametrize('path, check_exists', [ ('/foo', False), ('/bar', True), From a3e6761db694778064f38867f645da38030b559d Mon Sep 17 00:00:00 2001 From: Tarcisio Fedrizzi Date: Wed, 25 May 2016 07:57:16 +0200 Subject: [PATCH 5/6] Fixes pylint error --- tests/unit/utils/test_urlutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/utils/test_urlutils.py b/tests/unit/utils/test_urlutils.py index 7ca569fea..a9cd21ba1 100644 --- a/tests/unit/utils/test_urlutils.py +++ b/tests/unit/utils/test_urlutils.py @@ -247,7 +247,7 @@ class TestFuzzyUrl: """Test the force search option""" get_search_url_mock.return_value = QUrl('search_url') - url = urlutils.fuzzy_url(urlstring, force_search = True) + url = urlutils.fuzzy_url(urlstring, force_search=True) assert url == QUrl('search_url') From ec2935fab0020992eafb0543ce334456ce2afe01 Mon Sep 17 00:00:00 2001 From: Tarcisio Fedrizzi Date: Sat, 28 May 2016 17:38:31 +0200 Subject: [PATCH 6/6] Fixes flake8 error --- tests/unit/utils/test_urlutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/utils/test_urlutils.py b/tests/unit/utils/test_urlutils.py index a9cd21ba1..be3e6b2ae 100644 --- a/tests/unit/utils/test_urlutils.py +++ b/tests/unit/utils/test_urlutils.py @@ -244,7 +244,7 @@ class TestFuzzyUrl: 'test' ]) def test_force_search(self, urlstring, get_search_url_mock): - """Test the force search option""" + """Test the force search option.""" get_search_url_mock.return_value = QUrl('search_url') url = urlutils.fuzzy_url(urlstring, force_search=True)