Merge branch 'referer-header' of https://github.com/Carpetsmoker/qutebrowser into Carpetsmoker-referer-header

Conflicts:
      tests/utils/test_urlutils.py
This commit is contained in:
Florian Bruhin 2015-08-01 12:44:57 +02:00
commit 27f65be860
5 changed files with 91 additions and 1 deletions

View File

@ -22,7 +22,7 @@
import collections
from PyQt5.QtCore import (pyqtSlot, pyqtSignal, PYQT_VERSION, QCoreApplication,
QUrl)
QUrl, QByteArray)
from PyQt5.QtNetwork import (QNetworkAccessManager, QNetworkReply, QSslError,
QSslSocket)
@ -337,6 +337,18 @@ class NetworkManager(QNetworkAccessManager):
dnt = '0'.encode('ascii')
req.setRawHeader('DNT'.encode('ascii'), dnt)
req.setRawHeader('X-Do-Not-Track'.encode('ascii'), dnt)
current_url = objreg.get('tabbed-browser', scope='window',
window=self._win_id).currentWidget().url()
if config.get('network', 'referer-header') == 'never':
# Note: using ''.encode('ascii') sends a header with no value,
# instead of no header at all
req.setRawHeader('Referer'.encode('ascii'), QByteArray())
elif (config.get('network', 'referer-header') == 'same-domain' and
current_url.isValid() and
not urlutils.same_domain(req.url(), current_url)):
req.setRawHeader('Referer'.encode('ascii'), QByteArray())
accept_language = config.get('network', 'accept-language')
if accept_language is not None:
req.setRawHeader('Accept-Language'.encode('ascii'),

View File

@ -326,6 +326,10 @@ def data(readonly=False):
SettingValue(typ.String(none_ok=True), 'en-US,en'),
"Value to send in the `accept-language` header."),
('referer-header',
SettingValue(typ.Referer(), 'same-domain'),
"Send the Referer header"),
('user-agent',
SettingValue(typ.UserAgent(none_ok=True), ''),
"User agent to send. Empty to send the default."),

View File

@ -1489,6 +1489,18 @@ class DownloadPathSuggestion(BaseType):
('both', "Show download path and filename."))
class Referer(BaseType):
"""Send the Referer header."""
valid_values = ValidValues(('always', "Always send."),
('never', "Never send; this is not recommended,"
" as some sites may break."),
('same-domain', "Only send for the same domain,"
" this will still protect your privacy, but"
" shouldn't break any sites."))
class UserAgent(BaseType):
"""The user agent to use."""

View File

@ -392,6 +392,36 @@ def get_errstring(url, base="Invalid URL"):
return base
def same_domain(url1, url2):
"""Check if url1 and url2 belong to the same website.
This will use a "public suffix list" to determine what a "top level domain"
is. All further domains are ignored.
For example example.com and www.example.com are considered the same. but
example.co.uk and test.co.uk are not.
Return:
True if the domains are the same, False otherwise.
"""
if not url1.isValid():
raise ValueError(get_errstring(url1))
if not url2.isValid():
raise ValueError(get_errstring(url2))
suffix1 = url1.topLevelDomain()
suffix2 = url2.topLevelDomain()
if suffix1 == '':
return url1.host() == url2.host()
if not suffix1 == suffix2:
return False
domain1 = url1.host()[:-len(suffix1)].split('.')[-1]
domain2 = url2.host()[:-len(suffix2)].split('.')[-1]
return domain1 == domain2
class FuzzyUrlError(Exception):
"""Exception raised by fuzzy_url on problems.

View File

@ -497,3 +497,35 @@ def test_fuzzy_url_error(url, raising, has_err_string):
else:
expected_text = "Error message"
assert str(excinfo.value) == expected_text
class TestSameDomain:
"""Tests for dame_domain."""
def test_same_domains(self):
"""Test for domains that should be considered the same."""
hosts = (
('http://example.com', 'http://www.example.com'),
('http://bbc.co.uk', 'https://www.bbc.co.uk'),
('http://many.levels.of.domains.example.com', 'http://www.example.com'),
('http://idn.иком.museum', 'http://idn2.иком.museum'),
('http://one.not_a_valid_tld', 'http://one.not_a_valid_tld'),
)
for host1, host2 in hosts:
assert urlutils.same_domain(QUrl(host1), QUrl(host2))
assert urlutils.same_domain(QUrl(host2), QUrl(host1))
def test_not_same_domains(self):
"""Test for domains that should be NOT considered the same."""
hosts = (
('http://bbc.co.uk', 'http://example.co.uk'),
('https://example.kids.museum', 'http://example.kunst.museum'),
('http://idn.иком.museum', 'http://idn.ירושלים.museum'),
('http://one.not_a_valid_tld', 'http://two.not_a_valid_tld'),
)
for host1, host2 in hosts:
assert not urlutils.same_domain(QUrl(host1), QUrl(host2))
assert not urlutils.same_domain(QUrl(host2), QUrl(host1))