Show Punycode URL for IDN pages in addition to decoded one

This helps when Unicode homographs are used for phishing purposes.
Fixes #2547
This commit is contained in:
Florian Bruhin 2017-04-24 06:58:41 +02:00
parent beb661cdc7
commit 195d0ea207
5 changed files with 88 additions and 41 deletions

View File

@ -32,6 +32,8 @@ Added
Changed
~~~~~~~
- To prevent elaborate phishing attacks, the Punycode version is now shown in
addition to the decoded version for IDN domain names.
- When using QtWebEngine, the underlying Chromium version is now shown in the
version info.
- Improved `qute:history` page with lazy loading

View File

@ -24,7 +24,7 @@ from PyQt5.QtCore import pyqtSlot, pyqtProperty, Qt, QUrl
from qutebrowser.browser import browsertab
from qutebrowser.mainwindow.statusbar import textbase
from qutebrowser.config import style
from qutebrowser.utils import usertypes
from qutebrowser.utils import usertypes, urlutils
# Note this has entries for success/error/warn from widgets.webview:LoadStatus
@ -139,7 +139,7 @@ class UrlText(textbase.TextBase):
if url is None:
self._normal_url = None
else:
self._normal_url = url.toDisplayString()
self._normal_url = urlutils.safe_display_url(url)
self._normal_url_type = UrlType.normal
self._update_url()
@ -156,9 +156,9 @@ class UrlText(textbase.TextBase):
if link:
qurl = QUrl(link)
if qurl.isValid():
self._hover_url = qurl.toDisplayString()
self._hover_url = urlutils.safe_display_url(qurl)
else:
self._hover_url = link
self._hover_url = '(invalid URL!) {}'.format(link)
else:
self._hover_url = None
self._update_url()
@ -167,6 +167,9 @@ class UrlText(textbase.TextBase):
def on_tab_changed(self, tab):
"""Update URL if the tab changed."""
self._hover_url = None
self._normal_url = tab.url().toDisplayString()
if tab.url().isValid():
self._normal_url = urlutils.safe_display_url(tab.url())
else:
self._normal_url = ''
self.on_load_status_changed(tab.load_status().name)
self._update_url()

View File

@ -592,6 +592,30 @@ def data_url(mimetype, data):
return url
def safe_display_url(qurl):
"""Get a IDN-homograph phising safe form of the given QUrl.
If we're dealing with a Punycode-encoded URL, this prepends the hostname in
its encoded form, to make sure those URLs are distinguishable.
See https://github.com/qutebrowser/qutebrowser/issues/2547
and https://bugreports.qt.io/browse/QTBUG-60365
"""
if not qurl.isValid():
raise InvalidUrlError(qurl)
host = qurl.host(QUrl.FullyEncoded)
if '..' in host:
# WORKAROUND for https://bugreports.qt.io/browse/QTBUG-60364
return '(unparseable URL!) {}'.format(qurl.toDisplayString())
for part in host.split('.'):
if part.startswith('xn--') and host != qurl.host(QUrl.FullyDecoded):
return '({}) {}'.format(host, qurl.toDisplayString())
return qurl.toDisplayString()
class InvalidProxyTypeError(Exception):
"""Error raised when proxy_from_url gets an unknown proxy type."""

View File

@ -22,7 +22,7 @@
import pytest
from qutebrowser.utils import usertypes
from qutebrowser.utils import usertypes, urlutils
from qutebrowser.mainwindow.statusbar import url
from PyQt5.QtCore import QUrl
@ -51,49 +51,41 @@ def url_widget(qtbot, monkeypatch, config_stub):
return widget
@pytest.mark.parametrize('qurl', [
QUrl('http://abc123.com/this/awesome/url.html'),
QUrl('https://supersecret.gov/nsa/files.txt'),
None
])
def test_set_url(url_widget, qurl):
"""Test text displayed by the widget."""
url_widget.set_url(qurl)
if qurl is not None:
assert url_widget.text() == qurl.toDisplayString()
else:
assert url_widget.text() == ""
@pytest.mark.parametrize('url_text', [
'http://abc123.com/this/awesome/url.html',
'https://supersecret.gov/nsa/files.txt',
None,
])
def test_set_hover_url(url_widget, url_text):
"""Test text when hovering over a link."""
url_widget.set_hover_url(url_text)
if url_text is not None:
assert url_widget.text() == url_text
assert url_widget._urltype == url.UrlType.hover
else:
assert url_widget.text() == ''
assert url_widget._urltype == url.UrlType.normal
@pytest.mark.parametrize('url_text, expected', [
('http://example.com/foo/bar.html', 'http://example.com/foo/bar.html'),
('http://test.gr/%CE%B1%CE%B2%CE%B3%CE%B4.txt', 'http://test.gr/αβγδ.txt'),
('http://test.ru/%D0%B0%D0%B1%D0%B2%D0%B3.txt', 'http://test.ru/абвг.txt'),
('http://test.com/s%20p%20a%20c%20e.txt', 'http://test.com/s p a c e.txt'),
('http://test.com/%22quotes%22.html', 'http://test.com/%22quotes%22.html'),
('http://username:secret%20password@test.com', 'http://username@test.com'),
('http://example.com%5b/', 'http://example.com%5b/'), # invalid url
('http://example.com%5b/', '(invalid URL!) http://example.com%5b/'),
# https://bugreports.qt.io/browse/QTBUG-60364
('http://www.xn--80ak6aa92e.com',
'(unparseable URL!) http://www.аррӏе.com'),
# IDN URL
('http://www.ä.com', '(www.xn--4ca.com) http://www.ä.com'),
(None, ''),
])
def test_set_hover_url_encoded(url_widget, url_text, expected):
@pytest.mark.parametrize('which', ['normal', 'hover'])
def test_set_url(url_widget, url_text, expected, which):
"""Test text when hovering over a percent encoded link."""
url_widget.set_hover_url(url_text)
qurl = QUrl(url_text)
if which == 'normal':
if not qurl.isValid():
with pytest.raises(urlutils.InvalidUrlError):
url_widget.set_url(qurl)
return
else:
url_widget.set_url(qurl)
else:
url_widget.set_hover_url(url_text)
assert url_widget.text() == expected
assert url_widget._urltype == url.UrlType.hover
if which == 'hover' and expected:
assert url_widget._urltype == url.UrlType.hover
else:
assert url_widget._urltype == url.UrlType.normal
@pytest.mark.parametrize('status, expected', [
@ -114,6 +106,7 @@ def test_on_load_status_changed(url_widget, status, expected):
@pytest.mark.parametrize('load_status, qurl', [
(url.UrlType.success, QUrl('http://abc123.com/this/awesome/url.html')),
(url.UrlType.success, QUrl('http://reddit.com/r/linux')),
(url.UrlType.success, QUrl('http://ä.com/')),
(url.UrlType.success_https, QUrl('www.google.com')),
(url.UrlType.success_https, QUrl('https://supersecret.gov/nsa/files.txt')),
(url.UrlType.warn, QUrl('www.shadysite.org/some/file/with/issues.htm')),
@ -123,7 +116,7 @@ def test_on_tab_changed(url_widget, fake_web_tab, load_status, qurl):
tab_widget = fake_web_tab(load_status=load_status, url=qurl)
url_widget.on_tab_changed(tab_widget)
assert url_widget._urltype == load_status
assert url_widget.text() == qurl.toDisplayString()
assert url_widget.text() == urlutils.safe_display_url(qurl)
@pytest.mark.parametrize('qurl, load_status, expected_status', [

View File

@ -742,6 +742,31 @@ def test_data_url():
assert url == QUrl('data:text/plain;base64,Zm9v')
@pytest.mark.parametrize('url, expected', [
# No IDN
(QUrl('http://www.example.com'), 'http://www.example.com'),
# IDN in domain
(QUrl('http://www.ä.com'), '(www.xn--4ca.com) http://www.ä.com'),
# IDN with non-whitelisted TLD
(QUrl('http://www.ä.foo'), 'http://www.xn--4ca.foo'),
# Unicode only in path
(QUrl('http://www.example.com/ä'), 'http://www.example.com/ä'),
# Unicode only in TLD (looks like Qt shows Punycode with рф...)
(QUrl('http://www.example.xn--p1ai'),
'(www.example.xn--p1ai) http://www.example.рф'),
# https://bugreports.qt.io/browse/QTBUG-60364
(QUrl('http://www.xn--80ak6aa92e.com'),
'(unparseable URL!) http://www.аррӏе.com'),
])
def test_safe_display_url(url, expected):
assert urlutils.safe_display_url(url) == expected
def test_safe_display_url_invalid():
with pytest.raises(urlutils.InvalidUrlError):
urlutils.safe_display_url(QUrl())
class TestProxyFromUrl:
@pytest.mark.parametrize('url, expected', [