diff --git a/qutebrowser/browser/network/networkmanager.py b/qutebrowser/browser/network/networkmanager.py index 97e88008c..52dc4f017 100644 --- a/qutebrowser/browser/network/networkmanager.py +++ b/qutebrowser/browser/network/networkmanager.py @@ -359,18 +359,11 @@ class NetworkManager(QNetworkAccessManager): req.setRawHeader('DNT'.encode('ascii'), dnt) req.setRawHeader('X-Do-Not-Track'.encode('ascii'), dnt) - def same_domain(): - tabbed_browser = objreg.get('tabbed-browser', scope='window', window=self._win_id) - view = tabbed_browser.currentWidget() - print(req.url().host() == view.url().host(), repr(view.url().host()), repr(req.url().host())) - # TODO: We probably want to allow headers if we're on test.com and - # doing a request to www.test.com? Or maybe a new settings value for - # this? - return req.url().host() == view.url().host() - + current_url = objreg.get('tabbed-browser', scope='window', + window=self._win_id).currentWidget().url() if (config.get('network', 'referer-header') == 'never' or (config.get('network', 'referer-header') == 'same-domain' and - same_domain())): + urlutils.same_domain(req.url(), current_url))): # Note: using ''.encode('ascii') sends a header with no value, # instead of no header at all req.setRawHeader('Referer'.encode('ascii'), QByteArray()) diff --git a/qutebrowser/config/configtypes.py b/qutebrowser/config/configtypes.py index 8e9c27697..cb1bc744a 100644 --- a/qutebrowser/config/configtypes.py +++ b/qutebrowser/config/configtypes.py @@ -1471,7 +1471,6 @@ class Referer(BaseType): " shouldn't break any sites.")) - class UserAgent(BaseType): """The user agent to use.""" diff --git a/qutebrowser/utils/urlutils.py b/qutebrowser/utils/urlutils.py index 143e7cfc5..fbe280199 100644 --- a/qutebrowser/utils/urlutils.py +++ b/qutebrowser/utils/urlutils.py @@ -392,6 +392,33 @@ def get_errstring(url, base="Invalid URL"): return base +def same_domain(url1, url2): + """Check if url1 and url2 belong to the same website. + + This will use a "public suffix list" to determine what a "top level domain" + is. All further domains are ignored. + + For example example.com and www.example.com are considered the same. but + example.co.uk and test.co.uk are not. + + Return: + True if the domains are the same, False otherwise. + """ + if not url1.isValid(): + raise ValueError(get_errstring(url1)) + if not url2.isValid(): + raise ValueError(get_errstring(url2)) + + suffix1 = url1.topLevelDomain() + suffix2 = url2.topLevelDomain() + if not suffix1 == suffix2: + return False + + domain1 = url1.host()[:-len(suffix1)].split('.')[-1] + domain2 = url2.host()[:-len(suffix2)].split('.')[-1] + return domain1 == domain2 + + class FuzzyUrlError(Exception): """Exception raised by fuzzy_url on problems. diff --git a/tests/utils/test_urlutils.py b/tests/utils/test_urlutils.py index 264de925b..4f027ceff 100644 --- a/tests/utils/test_urlutils.py +++ b/tests/utils/test_urlutils.py @@ -227,3 +227,32 @@ class TestFilenameFromUrl: """Test with an URL with no path.""" url = QUrl('http://qutebrowser.org/') assert urlutils.filename_from_url(url) == 'qutebrowser.org.html' + + +class TestSameDomain: + + """Tests for dame_domain.""" + + def test_same_domains(self): + """Test for domains that should be considered the same.""" + hosts = ( + ('http://example.com', 'http://www.example.com'), + ('http://bbc.co.uk', 'https://www.bbc.co.uk'), + ('http://many.levels.of.domains.example.com', 'http://www.example.com'), + ) + + for host1, host2 in hosts: + assert urlutils.same_domain(QUrl(host1), QUrl(host2)) + assert urlutils.same_domain(QUrl(host2), QUrl(host1)) + + def test_not_same_domains(self): + """Test for domains that should be NOT considered the same.""" + hosts = ( + ('http://bbc.co.uk', 'http://example.co.uk'), + ('https://example.kids.museum', 'http://example.kunst.museum'), + ('http://idn.иком.museum', 'http://idn.ירושלים.museum') + ) + + for host1, host2 in hosts: + assert not urlutils.same_domain(QUrl(host1), QUrl(host2)) + assert not urlutils.same_domain(QUrl(host2), QUrl(host1))