qutebrowser/qutebrowser/utils/url.py
2014-08-08 13:14:48 +02:00

222 lines
6.4 KiB
Python

# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
# Copyright 2014 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
#
# This file is part of qutebrowser.
#
# qutebrowser is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# qutebrowser is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
"""Utils regarding URL handling."""
import re
import os.path
import urllib.parse
from PyQt5.QtCore import QUrl
from PyQt5.QtNetwork import QHostInfo
import qutebrowser.config.config as config
from qutebrowser.utils.log import url as logger
from qutebrowser.utils.misc import qt_ensure_valid
# FIXME: we probably could raise some exceptions on invalid URLs
def _get_search_url(txt):
"""Get a search engine URL for a text.
Args:
txt: Text to search for.
Return:
The search URL as a QUrl.
Raise:
FuzzyUrlError if there is no template or no search term was found.
"""
logger.debug("Finding search engine for '{}'".format(txt))
r = re.compile(r'(^|\s+)!(\w+)($|\s+)')
m = r.search(txt)
if m:
engine = m.group(2)
try:
template = config.get('searchengines', engine)
except config.NoOptionError:
raise FuzzyUrlError("Search engine {} not found!".format(
engine))
term = r.sub('', txt)
logger.debug("engine {}, term '{}'".format(engine, term))
else:
template = config.get('searchengines', 'DEFAULT')
term = txt
logger.debug("engine: default, term '{}'".format(txt))
if not term:
raise FuzzyUrlError("No search term given")
url = QUrl.fromUserInput(template.format(urllib.parse.quote(term)))
qt_ensure_valid(url)
return url
def _is_url_naive(urlstr):
"""Naive check if given URL is really a URL.
Args:
urlstr: The URL to check for, as string.
Return:
True if the URL really is a URL, False otherwise.
"""
url = QUrl.fromUserInput(urlstr)
# We don't use url here because fromUserInput appends http://
# automatically.
if not url.isValid():
return False
elif '.' in url.host():
return True
elif url.host() == 'localhost':
return True
else:
return False
def _is_url_dns(url):
"""Check if a URL is really a URL via DNS.
Args:
url: The URL to check for as QUrl, ideally via QUrl::fromUserInput.
Return:
True if the URL really is a URL, False otherwise.
"""
if not url.isValid():
return False
host = url.host()
logger.debug("DNS request for {}".format(host))
if not host:
return False
info = QHostInfo.fromName(host)
return not info.error()
def fuzzy_url(urlstr):
"""Get a QUrl based on an user input which is URL or search term.
Args:
urlstr: URL to load as a string.
Return:
A target QUrl to a searchpage or the original URL.
"""
path = os.path.abspath(os.path.expanduser(urlstr))
stripped = urlstr.strip()
if os.path.exists(path):
logger.debug("URL is a local file")
url = QUrl.fromLocalFile(path)
elif (not _has_explicit_scheme(QUrl(urlstr)) and
os.path.exists(os.path.abspath(path))):
# We do this here rather than in the first block because we first want
# to make sure it's not an URL like http://, because os.path.abspath
# would mangle that.
logger.debug("URL is a relative local file")
url = QUrl.fromLocalFile(os.path.abspath(path))
elif is_url(stripped):
# probably an address
logger.debug("URL is a fuzzy address")
url = QUrl.fromUserInput(urlstr)
else: # probably a search term
logger.debug("URL is a fuzzy search term")
try:
url = _get_search_url(urlstr)
except ValueError: # invalid search engine
url = QUrl.fromUserInput(stripped)
logger.debug("Converting fuzzy term {} to URL -> {}".format(
urlstr, url.toDisplayString()))
qt_ensure_valid(url)
return url
def _has_explicit_scheme(url):
"""Check if an url has an explicit scheme given.
Args:
url: The URL as QUrl.
"""
return url.isValid() and url.scheme()
def is_special_url(url):
"""Return True if url is an about:... or other special URL.
Args:
url: The URL as QUrl.
"""
if not url.isValid():
return False
special_schemes = ('about', 'qute', 'file')
return url.scheme() in special_schemes
def is_url(urlstr):
"""Check if url seems to be a valid URL.
Args:
urlstr: The URL as string.
Return:
True if it is a valid URL, False otherwise.
Raise:
ValueError if the autosearch config value is invalid.
"""
autosearch = config.get('general', 'auto-search')
logger.debug("Checking if '{}' is a URL (autosearch={}).".format(
urlstr, autosearch))
qurl = QUrl(urlstr)
if not autosearch:
# no autosearch, so everything is a URL.
return True
if _has_explicit_scheme(qurl):
# URLs with explicit schemes are always URLs
logger.debug("Contains explicit scheme")
return True
elif ' ' in urlstr:
# A URL will never contain a space
logger.debug("Contains space -> no URL")
return False
elif is_special_url(qurl):
# Special URLs are always URLs, even with autosearch=False
logger.debug("Is an special URL.")
return True
elif autosearch == 'dns':
logger.debug("Checking via DNS")
# We want to use fromUserInput here, as the user might enter "foo.de"
# and that should be treated as URL here.
return _is_url_dns(QUrl.fromUserInput(urlstr))
elif autosearch == 'naive':
logger.debug("Checking via naive check")
return _is_url_naive(urlstr)
else:
raise ValueError("Invalid autosearch value")
class FuzzyUrlError(Exception):
"""Exception raised by fuzzy_url on problems."""
pass