Move url utils to url.py

This commit is contained in:
Florian Bruhin 2014-02-07 17:20:55 +01:00
parent ccddf3f7e7
commit a4a2832f8e
4 changed files with 130 additions and 127 deletions

View File

@ -17,71 +17,17 @@
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
import re
import sys
import socket
import os.path
import platform
import logging
import subprocess
import ipaddress
import urllib.parse
from PyQt5.QtCore import QUrl, QT_VERSION_STR, PYQT_VERSION_STR, qVersion
from PyQt5.QtCore import QT_VERSION_STR, PYQT_VERSION_STR, qVersion
from PyQt5.QtWebKit import qWebKitVersion
import qutebrowser
def qurl(url):
"""Get a QUrl from an url string."""
if isinstance(url, QUrl):
logging.debug("url is already a qurl")
return url
return QUrl.fromUserInput(url)
def fuzzy_url(url):
"""Returns a QUrl based on an user input which is URL or search term."""
u = url.toString() if isinstance(url, QUrl) else url
if is_url(u):
# probably an address
logging.debug("url is a fuzzy address")
newurl = QUrl.fromUserInput(u)
else: # probably a search term
logging.debug("url is a fuzzy search term")
try:
newurl = QUrl.fromUserInput(_get_search_url(u))
except ValueError:
newurl = QUrl.fromUserInput(u)
logging.debug('Converting fuzzy term {} to url -> {}'.format(
u, newurl.url()))
return newurl
def _get_search_url(txt):
"""Get a search engine URL for a text."""
# FIXME Importing this here fixes some weird dependency problems.
import qutebrowser.utils.config as config
logging.debug('Finding search engine for "{}"'.format(txt))
r = re.compile(r'(^|\s+)!(\w+)($|\s+)')
m = r.search(txt)
if m:
engine = m.group(2)
# FIXME why doesn't fallback work?!
template = config.config.get('searchengines', engine, fallback=None)
term = r.sub('', txt)
logging.debug('engine {}, term "{}"'.format(engine, term))
else:
template = config.config.get('searchengines', '__default__',
fallback=None)
term = txt
logging.debug('engine: default, term "{}"'.format(txt))
if template is None or not term:
raise ValueError
return template.format(urllib.parse.quote(term))
def version():
"""Return a string with various version informations."""
if sys.platform == 'linux':
@ -112,71 +58,6 @@ def version():
return ''.join(lines)
def is_about_url(url):
"""Return True if url is an about:... or other special URL."""
u = url.toString() if isinstance(url, QUrl) else url
return u.replace('http://', '').startswith('about:')
def is_url(url):
"""Return True if url seems to be a valid URL."""
# FIXME Importing this here fixes some weird dependency problems.
import qutebrowser.utils.config as config
logging.debug('Checking if "{}" is an URL'.format(url))
if ' ' in url:
# An URL will never contain a space
logging.debug('Contains space -> no url')
return False
elif config.config.getboolean('general', 'addressbar_dns_lookup'):
logging.debug('Checking via DNS')
return _is_url_dns(url)
else:
logging.debug('Checking via naive check')
return _is_url_naive(url)
def _is_url_naive(url):
"""Naive check if given url string is really an url."""
PROTOCOLS = ['http://', 'https://']
ip = _get_netloc(url)
if not ip:
is_ip = False
else:
try:
ipaddress.ip_address(ip)
except ValueError:
is_ip = False
else:
is_ip = True
return (any([url.startswith(proto) for proto in PROTOCOLS]) or
'.' in url or is_about_url(url) or url == 'localhost'
or is_ip)
def _is_url_dns(url):
"""Check if an url string is really an url via DNS."""
# FIXME we could probably solve this in a nicer way by attempting to open
# the page in the webview, and then open the search if that fails.
netloc = _get_netloc(url)
if not netloc:
return False
try:
socket.gethostbyname(netloc)
except socket.gaierror:
return False
else:
return True
def _get_netloc(url):
"""Gets the host part of an url."""
# FIXME better way to do this?
if '://' in url:
return urllib.parse.urlsplit(url).netloc
else:
return urllib.parse.urlsplit('http://' + url).netloc
def _git_str():
"""Try to find out git version and return a string if possible.

View File

@ -17,7 +17,8 @@
# You should have received a copy of the GNU General Public License
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
import qutebrowser.utils as utils
from qutebrowser.utils import version
from qutebrowser.utils.url import is_about_url
_html_template = """
@ -42,7 +43,7 @@ def handle(url):
Returns HTML content.
"""
if not utils.is_about_url(url):
if not is_about_url(url):
raise ValueError
handler = getattr(AboutHandlers, _transform_url(url))
return handler()
@ -73,4 +74,4 @@ class AboutHandlers:
@classmethod
def about_version(cls):
"""Handler for about:version."""
return _get_html('Version', '<pre>{}</pre>'.format(utils.version()))
return _get_html('Version', '<pre>{}</pre>'.format(version()))

121
qutebrowser/utils/url.py Normal file
View File

@ -0,0 +1,121 @@
"""Utils regarding URL handling."""
import re
import socket
import logging
import ipaddress
import urllib.parse
from PyQt5.QtCore import QUrl
import qutebrowser.utils.config as config
def qurl(url):
"""Get a QUrl from an url string."""
if isinstance(url, QUrl):
logging.debug("url is already a qurl")
return url
return QUrl.fromUserInput(url)
def fuzzy_url(url):
"""Returns a QUrl based on an user input which is URL or search term."""
u = url.toString() if isinstance(url, QUrl) else url
if is_url(u):
# probably an address
logging.debug("url is a fuzzy address")
newurl = QUrl.fromUserInput(u)
else: # probably a search term
logging.debug("url is a fuzzy search term")
try:
newurl = QUrl.fromUserInput(_get_search_url(u))
except ValueError:
newurl = QUrl.fromUserInput(u)
logging.debug('Converting fuzzy term {} to url -> {}'.format(
u, newurl.url()))
return newurl
def _get_search_url(txt):
"""Get a search engine URL for a text."""
logging.debug('Finding search engine for "{}"'.format(txt))
r = re.compile(r'(^|\s+)!(\w+)($|\s+)')
m = r.search(txt)
if m:
engine = m.group(2)
# FIXME why doesn't fallback work?!
template = config.config.get('searchengines', engine, fallback=None)
term = r.sub('', txt)
logging.debug('engine {}, term "{}"'.format(engine, term))
else:
template = config.config.get('searchengines', '__default__',
fallback=None)
term = txt
logging.debug('engine: default, term "{}"'.format(txt))
if template is None or not term:
raise ValueError
return template.format(urllib.parse.quote(term))
def is_about_url(url):
"""Return True if url is an about:... or other special URL."""
u = url.toString() if isinstance(url, QUrl) else url
return u.replace('http://', '').startswith('about:')
def is_url(url):
"""Return True if url seems to be a valid URL."""
logging.debug('Checking if "{}" is an URL'.format(url))
if ' ' in url:
# An URL will never contain a space
logging.debug('Contains space -> no url')
return False
elif config.config.getboolean('general', 'addressbar_dns_lookup'):
logging.debug('Checking via DNS')
return _is_url_dns(url)
else:
logging.debug('Checking via naive check')
return _is_url_naive(url)
def _is_url_naive(url):
"""Naive check if given url string is really an url."""
PROTOCOLS = ['http://', 'https://']
ip = _get_netloc(url)
if not ip:
is_ip = False
else:
try:
ipaddress.ip_address(ip)
except ValueError:
is_ip = False
else:
is_ip = True
return (any([url.startswith(proto) for proto in PROTOCOLS]) or
'.' in url or is_about_url(url) or url == 'localhost'
or is_ip)
def _is_url_dns(url):
"""Check if an url string is really an url via DNS."""
# FIXME we could probably solve this in a nicer way by attempting to open
# the page in the webview, and then open the search if that fails.
netloc = _get_netloc(url)
if not netloc:
return False
try:
socket.gethostbyname(netloc)
except socket.gaierror:
return False
else:
return True
def _get_netloc(url):
"""Gets the host part of an url."""
# FIXME better way to do this?
if '://' in url:
return urllib.parse.urlsplit(url).netloc
else:
return urllib.parse.urlsplit('http://' + url).netloc

View File

@ -31,9 +31,9 @@ from PyQt5.QtGui import QClipboard
from PyQt5.QtPrintSupport import QPrintPreviewDialog
from PyQt5.QtWebKitWidgets import QWebView, QWebPage
import qutebrowser.utils as utils
import qutebrowser.utils.about as about
import qutebrowser.utils.config as config
import qutebrowser.utils.url as urlutils
from qutebrowser.widgets.tabbar import TabWidget
@ -76,7 +76,7 @@ class TabbedBrowser(TabWidget):
Also connect all the signals we need to _filter_signals.
"""
logging.debug("Opening {}".format(url))
url = utils.qurl(url)
url = urlutils.qurl(url)
tab = BrowserTab(self)
tab.openurl(url)
self.addTab(tab, url.url())
@ -428,10 +428,10 @@ class BrowserTab(QWebView):
url -- The URL to load, as string or QUrl.
"""
u = utils.fuzzy_url(url)
u = urlutils.fuzzy_url(url)
logging.debug('New title: {}'.format(u.url()))
self.titleChanged.emit(u.url())
if utils.is_about_url(u):
if urlutils.is_about_url(u):
try:
content = about.handle(u.toString())
except AttributeError: