From e112038a8b21a7ea351e23de2865f56cb6d54094 Mon Sep 17 00:00:00 2001 From: Florian Bruhin Date: Tue, 2 Sep 2014 08:20:33 +0200 Subject: [PATCH] Handle IPv6 literals correctly. --- qutebrowser/test/utils/test_url.py | 33 +++++++++++++++++++++ qutebrowser/utils/urlutils.py | 47 +++++++++++++++++++++++++----- 2 files changed, 72 insertions(+), 8 deletions(-) diff --git a/qutebrowser/test/utils/test_url.py b/qutebrowser/test/utils/test_url.py index e960b4bc4..7959f217a 100644 --- a/qutebrowser/test/utils/test_url.py +++ b/qutebrowser/test/utils/test_url.py @@ -159,5 +159,38 @@ class IsUrlTests(unittest.TestCase): self.assertFalse(urlutils.is_url(url), url) +class QurlFromUserInputTests(unittest.TestCase): + + """Tests for qurl_from_user_input.""" + + def test_url(self): + """Test a normal URL.""" + self.assertEqual( + urlutils.qurl_from_user_input('qutebrowser.org').toString(), + 'http://qutebrowser.org') + + def test_url_http(self): + """Test a normal URL with http://.""" + self.assertEqual( + urlutils.qurl_from_user_input('http://qutebrowser.org').toString(), + 'http://qutebrowser.org') + + def test_ipv6_bare(self): + """Test an IPv6 without brackets.""" + self.assertEqual(urlutils.qurl_from_user_input('::1/foo').toString(), + 'http://[::1]/foo') + + def test_ipv6(self): + """Test an IPv6 with brackets.""" + self.assertEqual(urlutils.qurl_from_user_input('[::1]/foo').toString(), + 'http://[::1]/foo') + + def test_ipv6_http(self): + """Test an IPv6 with http:// and brackets.""" + self.assertEqual( + urlutils.qurl_from_user_input('http://[::1]').toString(), + 'http://[::1]') + + if __name__ == '__main__': unittest.main() diff --git a/qutebrowser/utils/urlutils.py b/qutebrowser/utils/urlutils.py index b2f2005db..e27c16a8f 100644 --- a/qutebrowser/utils/urlutils.py +++ b/qutebrowser/utils/urlutils.py @@ -64,7 +64,7 @@ def _get_search_url(txt): log.url.debug("engine: default, term '{}'".format(txt)) if not term: raise FuzzyUrlError("No search term given") - url = QUrl.fromUserInput(template.format(urllib.parse.quote(term))) + url = qurl_from_user_input(template.format(urllib.parse.quote(term))) qtutils.ensure_valid(url) return url @@ -78,7 +78,7 @@ def _is_url_naive(urlstr): Return: True if the URL really is a URL, False otherwise. """ - url = QUrl.fromUserInput(urlstr) + url = qurl_from_user_input(urlstr) try: ipaddress.ip_address(urlstr) except ValueError: @@ -104,7 +104,7 @@ def _is_url_dns(url): """Check if a URL is really a URL via DNS. Args: - url: The URL to check for as QUrl, ideally via QUrl::fromUserInput. + url: The URL to check for as QUrl, ideally via qurl_from_user_input. Return: True if the URL really is a URL, False otherwise. @@ -143,13 +143,13 @@ def fuzzy_url(urlstr): elif is_url(stripped): # probably an address log.url.debug("URL is a fuzzy address") - url = QUrl.fromUserInput(urlstr) + url = qurl_from_user_input(urlstr) else: # probably a search term log.url.debug("URL is a fuzzy search term") try: url = _get_search_url(urlstr) except ValueError: # invalid search engine - url = QUrl.fromUserInput(stripped) + url = qurl_from_user_input(stripped) log.url.debug("Converting fuzzy term {} to URL -> {}".format( urlstr, url.toDisplayString())) qtutils.ensure_valid(url) @@ -215,9 +215,9 @@ def is_url(urlstr): return True elif autosearch == 'dns': log.url.debug("Checking via DNS") - # We want to use fromUserInput here, as the user might enter "foo.de" - # and that should be treated as URL here. - return _is_url_dns(QUrl.fromUserInput(urlstr)) + # We want to use qurl_from_user_input here, as the user might enter + # "foo.de" and that should be treated as URL here. + return _is_url_dns(qurl_from_user_input(urlstr)) elif autosearch == 'naive': log.url.debug("Checking via naive check") return _is_url_naive(urlstr) @@ -225,6 +225,37 @@ def is_url(urlstr): raise ValueError("Invalid autosearch value") +def qurl_from_user_input(urlstr): + """Get a QUrl based on an user input. Additionally handles IPv6 addresses. + + QUrl.fromUserInput handles something like '::1' as a file URL instead of an + IPv6, so we first try to handle it as a valid IPv6, and if that fails we + use QUrl.fromUserInput. + + Args: + urlstr: The URL as string. + + Return: + The converted QUrl. + """ + # First we try very liberally to separate something like an IPv6 from the + # rest (e.g. path info or parameters) + match = re.match(r'\[?([0-9a-fA-F:.]+)\]?(.*)', urlstr.strip()) + if match: + ipstr, rest = match.groups() + else: + ipstr = urlstr.strip() + rest = '' + # Then we try to parse it as an IPv6, and if we fail use + # QUrl.fromUserInput. + try: + ipaddress.IPv6Address(ipstr) + except ipaddress.AddressValueError: + return QUrl.fromUserInput(urlstr) + else: + return QUrl('http://[{}]{}'.format(ipstr, rest)) + + class FuzzyUrlError(Exception): """Exception raised by fuzzy_url on problems."""