Improve url/searchterm checking.

This commit is contained in:
Florian Bruhin 2014-02-07 17:14:13 +01:00
parent 01a251382b
commit ccddf3f7e7
3 changed files with 85 additions and 16 deletions

View File

@ -19,10 +19,12 @@
import re import re
import sys import sys
import socket
import os.path import os.path
import platform import platform
import logging import logging
import subprocess import subprocess
import ipaddress
import urllib.parse import urllib.parse
from PyQt5.QtCore import QUrl, QT_VERSION_STR, PYQT_VERSION_STR, qVersion from PyQt5.QtCore import QUrl, QT_VERSION_STR, PYQT_VERSION_STR, qVersion
@ -36,16 +38,24 @@ def qurl(url):
if isinstance(url, QUrl): if isinstance(url, QUrl):
logging.debug("url is already a qurl") logging.debug("url is already a qurl")
return url return url
elif '.' in url or is_about_url(url): # probably an address return QUrl.fromUserInput(url)
def fuzzy_url(url):
"""Returns a QUrl based on an user input which is URL or search term."""
u = url.toString() if isinstance(url, QUrl) else url
if is_url(u):
# probably an address
logging.debug("url is a fuzzy address") logging.debug("url is a fuzzy address")
newurl = QUrl.fromUserInput(url) newurl = QUrl.fromUserInput(u)
else: # probably a search term else: # probably a search term
logging.debug("url is a fuzzy search term") logging.debug("url is a fuzzy search term")
try: try:
newurl = QUrl.fromUserInput(_get_search_url(url)) newurl = QUrl.fromUserInput(_get_search_url(u))
except ValueError: except ValueError:
newurl = QUrl.fromUserInput(url) newurl = QUrl.fromUserInput(u)
logging.debug('Converting {} to qurl -> {}'.format(url, newurl.url())) logging.debug('Converting fuzzy term {} to url -> {}'.format(
u, newurl.url()))
return newurl return newurl
@ -104,9 +114,67 @@ def version():
def is_about_url(url): def is_about_url(url):
"""Return True if url is an about:... or other special URL.""" """Return True if url is an about:... or other special URL."""
if isinstance(url, QUrl): u = url.toString() if isinstance(url, QUrl) else url
url = url.toString() return u.replace('http://', '').startswith('about:')
return url.replace('http://', '').startswith('about:')
def is_url(url):
"""Return True if url seems to be a valid URL."""
# FIXME Importing this here fixes some weird dependency problems.
import qutebrowser.utils.config as config
logging.debug('Checking if "{}" is an URL'.format(url))
if ' ' in url:
# An URL will never contain a space
logging.debug('Contains space -> no url')
return False
elif config.config.getboolean('general', 'addressbar_dns_lookup'):
logging.debug('Checking via DNS')
return _is_url_dns(url)
else:
logging.debug('Checking via naive check')
return _is_url_naive(url)
def _is_url_naive(url):
"""Naive check if given url string is really an url."""
PROTOCOLS = ['http://', 'https://']
ip = _get_netloc(url)
if not ip:
is_ip = False
else:
try:
ipaddress.ip_address(ip)
except ValueError:
is_ip = False
else:
is_ip = True
return (any([url.startswith(proto) for proto in PROTOCOLS]) or
'.' in url or is_about_url(url) or url == 'localhost'
or is_ip)
def _is_url_dns(url):
"""Check if an url string is really an url via DNS."""
# FIXME we could probably solve this in a nicer way by attempting to open
# the page in the webview, and then open the search if that fails.
netloc = _get_netloc(url)
if not netloc:
return False
try:
socket.gethostbyname(netloc)
except socket.gaierror:
return False
else:
return True
def _get_netloc(url):
"""Gets the host part of an url."""
# FIXME better way to do this?
if '://' in url:
return urllib.parse.urlsplit(url).netloc
else:
return urllib.parse.urlsplit('http://' + url).netloc
def _git_str(): def _git_str():

View File

@ -39,6 +39,7 @@ default_config = """
ignorecase = true ignorecase = true
wrapsearch = true wrapsearch = true
startpage = http://www.duckduckgo.com/ startpage = http://www.duckduckgo.com/
addressbar_dns_lookup = false
[searchengines] [searchengines]
duckduckgo = https://duckduckgo.com/?q={} duckduckgo = https://duckduckgo.com/?q={}

View File

@ -428,19 +428,19 @@ class BrowserTab(QWebView):
url -- The URL to load, as string or QUrl. url -- The URL to load, as string or QUrl.
""" """
qurl = utils.qurl(url) u = utils.fuzzy_url(url)
logging.debug('New title: {}'.format(qurl.url())) logging.debug('New title: {}'.format(u.url()))
self.titleChanged.emit(qurl.url()) self.titleChanged.emit(u.url())
if utils.is_about_url(qurl): if utils.is_about_url(u):
try: try:
content = about.handle(qurl.toString()) content = about.handle(u.toString())
except AttributeError: except AttributeError:
return self.load(qurl) return self.load(u)
else: else:
self.setUrl(qurl) self.setUrl(u)
self.setContent(content, 'text/html') self.setContent(content, 'text/html')
else: else:
return self.load(qurl) return self.load(u)
def link_handler(self, url): def link_handler(self, url):
"""Handle a link. """Handle a link.