dos2unix for url.py, argh

This commit is contained in:
Florian Bruhin 2014-02-07 19:21:41 +01:00
parent b50eba38d3
commit dadfc952d1

View File

@ -1,110 +1,110 @@
"""Utils regarding URL handling.""" """Utils regarding URL handling."""
import re import re
import socket import socket
import logging import logging
import urllib.parse import urllib.parse
from PyQt5.QtCore import QUrl from PyQt5.QtCore import QUrl
import qutebrowser.utils.config as config import qutebrowser.utils.config as config
def qurl(url): def qurl(url):
"""Get a QUrl from an url string.""" """Get a QUrl from an url string."""
return url if isinstance(url, QUrl) else QUrl(url) return url if isinstance(url, QUrl) else QUrl(url)
def urlstring(url): def urlstring(url):
"""Return an QUrl as string. """Return an QUrl as string.
qurl -- URL as string or QUrl. qurl -- URL as string or QUrl.
""" """
return url.url() if isinstance(url, QUrl) else url return url.url() if isinstance(url, QUrl) else url
def fuzzy_url(url): def fuzzy_url(url):
"""Return a QUrl based on an user input which is URL or search term. """Return a QUrl based on an user input which is URL or search term.
url -- URL to load as QUrl or string. url -- URL to load as QUrl or string.
""" """
u = qurl(url) u = qurl(url)
urlstr = urlstring(url) urlstr = urlstring(url)
if is_url(u): if is_url(u):
# probably an address # probably an address
logging.debug("url is a fuzzy address") logging.debug("url is a fuzzy address")
newurl = QUrl.fromUserInput(urlstr) newurl = QUrl.fromUserInput(urlstr)
else: # probably a search term else: # probably a search term
logging.debug("url is a fuzzy search term") logging.debug("url is a fuzzy search term")
try: try:
newurl = _get_search_url(urlstr) newurl = _get_search_url(urlstr)
except ValueError: # invalid search engine except ValueError: # invalid search engine
newurl = QUrl.fromUserInput(urlstr) newurl = QUrl.fromUserInput(urlstr)
logging.debug('Converting fuzzy term {} to url -> {}'.format(urlstr, logging.debug('Converting fuzzy term {} to url -> {}'.format(urlstr,
newurl.url())) newurl.url()))
return newurl return newurl
def _get_search_url(txt): def _get_search_url(txt):
"""Return a search engine URL (QUrl) for a text.""" """Return a search engine URL (QUrl) for a text."""
logging.debug('Finding search engine for "{}"'.format(txt)) logging.debug('Finding search engine for "{}"'.format(txt))
r = re.compile(r'(^|\s+)!(\w+)($|\s+)') r = re.compile(r'(^|\s+)!(\w+)($|\s+)')
m = r.search(txt) m = r.search(txt)
if m: if m:
engine = m.group(2) engine = m.group(2)
# FIXME why doesn't fallback work?! # FIXME why doesn't fallback work?!
template = config.config.get('searchengines', engine, fallback=None) template = config.config.get('searchengines', engine, fallback=None)
term = r.sub('', txt) term = r.sub('', txt)
logging.debug('engine {}, term "{}"'.format(engine, term)) logging.debug('engine {}, term "{}"'.format(engine, term))
else: else:
template = config.config.get('searchengines', '__default__', template = config.config.get('searchengines', '__default__',
fallback=None) fallback=None)
term = txt term = txt
logging.debug('engine: default, term "{}"'.format(txt)) logging.debug('engine: default, term "{}"'.format(txt))
if template is None or not term: if template is None or not term:
raise ValueError raise ValueError
return QUrl.fromUserInput(template.format(urllib.parse.quote(term))) return QUrl.fromUserInput(template.format(urllib.parse.quote(term)))
def is_about_url(url): def is_about_url(url):
"""Return True if url (QUrl) is an about:... or other special URL.""" """Return True if url (QUrl) is an about:... or other special URL."""
return urlstring(url).replace('http://', '').startswith('about:') return urlstring(url).replace('http://', '').startswith('about:')
def is_url(url): def is_url(url):
"""Return True if url (QUrl) seems to be a valid URL.""" """Return True if url (QUrl) seems to be a valid URL."""
logging.debug('Checking if "{}" is an URL'.format(url.url())) logging.debug('Checking if "{}" is an URL'.format(url.url()))
if ' ' in urlstring(url): if ' ' in urlstring(url):
# An URL will never contain a space # An URL will never contain a space
logging.debug('Contains space -> no url') logging.debug('Contains space -> no url')
return False return False
elif config.config.getboolean('general', 'addressbar_dns_lookup'): elif config.config.getboolean('general', 'addressbar_dns_lookup'):
logging.debug('Checking via DNS') logging.debug('Checking via DNS')
return _is_url_dns(QUrl.fromUserInput(urlstring(url))) return _is_url_dns(QUrl.fromUserInput(urlstring(url)))
else: else:
logging.debug('Checking via naive check') logging.debug('Checking via naive check')
return _is_url_naive(url) return _is_url_naive(url)
def _is_url_naive(url): def _is_url_naive(url):
"""Naive check if given url (QUrl) is really an url.""" """Naive check if given url (QUrl) is really an url."""
PROTOCOLS = ['http://', 'https://'] PROTOCOLS = ['http://', 'https://']
u = urlstring(url) u = urlstring(url)
return (any([u.startswith(proto) for proto in PROTOCOLS]) or '.' in u or return (any([u.startswith(proto) for proto in PROTOCOLS]) or '.' in u or
is_about_url(url) or u == 'localhost') is_about_url(url) or u == 'localhost')
def _is_url_dns(url): def _is_url_dns(url):
"""Check if an url (QUrl) is really an url via DNS.""" """Check if an url (QUrl) is really an url via DNS."""
# FIXME we could probably solve this in a nicer way by attempting to open # FIXME we could probably solve this in a nicer way by attempting to open
# the page in the webview, and then open the search if that fails. # the page in the webview, and then open the search if that fails.
host = url.host() host = url.host()
logging.debug("DNS request for {}".format(host)) logging.debug("DNS request for {}".format(host))
if not host: if not host:
return False return False
try: try:
socket.gethostbyname(host) socket.gethostbyname(host)
except socket.gaierror: except socket.gaierror:
return False return False
else: else:
return True return True