442 lines
15 KiB
Python
442 lines
15 KiB
Python
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
|
|
|
|
# Copyright 2014-2016 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
|
|
#
|
|
# This file is part of qutebrowser.
|
|
#
|
|
# qutebrowser is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# qutebrowser is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
"""Utilities related to QWebElements.
|
|
|
|
Module attributes:
|
|
Group: Enum for different kinds of groups.
|
|
SELECTORS: CSS selectors for different groups of elements.
|
|
FILTERS: A dictionary of filter functions for the modes.
|
|
The filter for "links" filters javascript:-links and a-tags
|
|
without "href".
|
|
"""
|
|
|
|
import collections.abc
|
|
import functools
|
|
|
|
from PyQt5.QtCore import QRect, QUrl
|
|
from PyQt5.QtWebKit import QWebElement
|
|
|
|
from qutebrowser.config import config
|
|
from qutebrowser.utils import log, usertypes, utils
|
|
|
|
|
|
Group = usertypes.enum('Group', ['all', 'links', 'images', 'url', 'prevnext',
|
|
'focus'])
|
|
|
|
|
|
SELECTORS = {
|
|
Group.all: ('a, area, textarea, select, input:not([type=hidden]), button, '
|
|
'frame, iframe, link, [onclick], [onmousedown], [role=link], '
|
|
'[role=option], [role=button], img'),
|
|
Group.links: 'a, area, link, [role=link]',
|
|
Group.images: 'img',
|
|
Group.url: '[src], [href]',
|
|
Group.prevnext: 'a, area, button, link, [role=button]',
|
|
Group.focus: '*:focus',
|
|
}
|
|
|
|
|
|
def filter_links(elem):
|
|
return 'href' in elem and QUrl(elem['href']).scheme() != 'javascript'
|
|
|
|
|
|
FILTERS = {
|
|
Group.links: filter_links,
|
|
Group.prevnext: filter_links,
|
|
}
|
|
|
|
|
|
class IsNullError(Exception):
|
|
|
|
"""Gets raised by WebElementWrapper if an element is null."""
|
|
|
|
pass
|
|
|
|
|
|
class WebElementWrapper(collections.abc.MutableMapping):
|
|
|
|
"""A wrapper around QWebElement to make it more intelligent."""
|
|
|
|
def __init__(self, elem):
|
|
if isinstance(elem, self.__class__):
|
|
raise TypeError("Trying to wrap a wrapper!")
|
|
if elem.isNull():
|
|
raise IsNullError('{} is a null element!'.format(elem))
|
|
self._elem = elem
|
|
for name in ('addClass', 'appendInside', 'appendOutside',
|
|
'attributeNS', 'classes', 'clone', 'document',
|
|
'encloseContentsWith', 'encloseWith',
|
|
'evaluateJavaScript', 'findAll', 'findFirst',
|
|
'firstChild', 'geometry', 'hasAttributeNS',
|
|
'hasAttributes', 'hasClass', 'hasFocus', 'lastChild',
|
|
'localName', 'namespaceUri', 'nextSibling', 'parent',
|
|
'prefix', 'prependInside', 'prependOutside',
|
|
'previousSibling', 'removeAllChildren',
|
|
'removeAttributeNS', 'removeClass', 'removeFromDocument',
|
|
'render', 'replace', 'setAttributeNS', 'setFocus',
|
|
'setInnerXml', 'setOuterXml', 'setPlainText',
|
|
'setStyleProperty', 'styleProperty', 'tagName',
|
|
'takeFromDocument', 'toInnerXml', 'toOuterXml',
|
|
'toggleClass', 'webFrame', '__eq__', '__ne__'):
|
|
# We don't wrap some methods for which we have better alternatives:
|
|
# - Mapping access for attributeNames/hasAttribute/setAttribute/
|
|
# attribute/removeAttribute.
|
|
# - isNull is checked automagically.
|
|
# - str(...) instead of toPlainText
|
|
# For the rest, we create a wrapper which checks if the element is
|
|
# null.
|
|
|
|
method = getattr(self._elem, name)
|
|
|
|
def _wrapper(meth, *args, **kwargs):
|
|
self._check_vanished()
|
|
return meth(*args, **kwargs)
|
|
|
|
wrapper = functools.partial(_wrapper, method)
|
|
# We used to do functools.update_wrapper here, but for some reason
|
|
# when using hints with many links, this accounted for nearly 50%
|
|
# of the time when profiling, which is unacceptable.
|
|
setattr(self, name, wrapper)
|
|
|
|
def __str__(self):
|
|
self._check_vanished()
|
|
return self._elem.toPlainText()
|
|
|
|
def __repr__(self):
|
|
try:
|
|
html = self.debug_text()
|
|
except IsNullError:
|
|
html = None
|
|
return utils.get_repr(self, html=html)
|
|
|
|
def __getitem__(self, key):
|
|
self._check_vanished()
|
|
if key not in self:
|
|
raise KeyError(key)
|
|
return self._elem.attribute(key)
|
|
|
|
def __setitem__(self, key, val):
|
|
self._check_vanished()
|
|
self._elem.setAttribute(key, val)
|
|
|
|
def __delitem__(self, key):
|
|
self._check_vanished()
|
|
if key not in self:
|
|
raise KeyError(key)
|
|
self._elem.removeAttribute(key)
|
|
|
|
def __contains__(self, key):
|
|
self._check_vanished()
|
|
return self._elem.hasAttribute(key)
|
|
|
|
def __iter__(self):
|
|
self._check_vanished()
|
|
yield from self._elem.attributeNames()
|
|
|
|
def __len__(self):
|
|
self._check_vanished()
|
|
return len(self._elem.attributeNames())
|
|
|
|
def _check_vanished(self):
|
|
"""Raise an exception if the element vanished (is null)."""
|
|
if self._elem.isNull():
|
|
raise IsNullError('Element {} vanished!'.format(self._elem))
|
|
|
|
def is_visible(self, mainframe):
|
|
"""Check whether the element is currently visible on the screen.
|
|
|
|
Args:
|
|
mainframe: The main QWebFrame.
|
|
|
|
Return:
|
|
True if the element is visible, False otherwise.
|
|
"""
|
|
return is_visible(self._elem, mainframe)
|
|
|
|
def rect_on_view(self):
|
|
"""Get the geometry of the element relative to the webview."""
|
|
return rect_on_view(self._elem)
|
|
|
|
def is_writable(self):
|
|
"""Check whether an element is writable."""
|
|
self._check_vanished()
|
|
return not ('disabled' in self or 'readonly' in self)
|
|
|
|
def is_content_editable(self):
|
|
"""Check if an element has a contenteditable attribute.
|
|
|
|
Args:
|
|
elem: The QWebElement to check.
|
|
|
|
Return:
|
|
True if the element has a contenteditable attribute,
|
|
False otherwise.
|
|
"""
|
|
self._check_vanished()
|
|
try:
|
|
return self['contenteditable'].lower() not in ('false', 'inherit')
|
|
except KeyError:
|
|
return False
|
|
|
|
def _is_editable_object(self):
|
|
"""Check if an object-element is editable."""
|
|
if 'type' not in self:
|
|
log.webview.debug("<object> without type clicked...")
|
|
return False
|
|
objtype = self['type'].lower()
|
|
if objtype.startswith('application/') or 'classid' in self:
|
|
# Let's hope flash/java stuff has an application/* mimetype OR
|
|
# at least a classid attribute. Oh, and let's hope images/...
|
|
# DON'T have a classid attribute. HTML sucks.
|
|
log.webview.debug("<object type='{}'> clicked.".format(objtype))
|
|
return config.get('input', 'insert-mode-on-plugins')
|
|
else:
|
|
# Image/Audio/...
|
|
return False
|
|
|
|
def _is_editable_input(self):
|
|
"""Check if an input-element is editable.
|
|
|
|
Return:
|
|
True if the element is editable, False otherwise.
|
|
"""
|
|
try:
|
|
objtype = self['type'].lower()
|
|
except KeyError:
|
|
return self.is_writable()
|
|
else:
|
|
if objtype in ['text', 'email', 'url', 'tel', 'number', 'password',
|
|
'search']:
|
|
return self.is_writable()
|
|
else:
|
|
return False
|
|
|
|
def _is_editable_div(self):
|
|
"""Check if a div-element is editable.
|
|
|
|
Return:
|
|
True if the element is editable, False otherwise.
|
|
"""
|
|
# Beginnings of div-classes which are actually some kind of editor.
|
|
div_classes = ('CodeMirror', # Javascript editor over a textarea
|
|
'kix-', # Google Docs editor
|
|
'ace_') # http://ace.c9.io/
|
|
for klass in self._elem.classes():
|
|
if any([klass.startswith(e) for e in div_classes]):
|
|
return True
|
|
return False
|
|
|
|
def is_editable(self, strict=False):
|
|
"""Check whether we should switch to insert mode for this element.
|
|
|
|
Args:
|
|
strict: Whether to do stricter checking so only fields where we can
|
|
get the value match, for use with the :editor command.
|
|
|
|
Return:
|
|
True if we should switch to insert mode, False otherwise.
|
|
"""
|
|
# pylint: disable=too-many-return-statements
|
|
self._check_vanished()
|
|
roles = ('combobox', 'textbox')
|
|
log.misc.debug("Checking if element is editable: {}".format(
|
|
repr(self)))
|
|
tag = self._elem.tagName().lower()
|
|
if self.is_content_editable() and self.is_writable():
|
|
return True
|
|
elif self.get('role', None) in roles and self.is_writable():
|
|
return True
|
|
elif tag == 'input':
|
|
return self._is_editable_input()
|
|
elif tag == 'textarea':
|
|
return self.is_writable()
|
|
elif tag in ('embed', 'applet'):
|
|
# Flash/Java/...
|
|
return config.get('input', 'insert-mode-on-plugins') and not strict
|
|
elif tag == 'object':
|
|
return self._is_editable_object() and not strict
|
|
elif tag == 'div':
|
|
return self._is_editable_div() and not strict
|
|
else:
|
|
return False
|
|
|
|
def is_text_input(self):
|
|
"""Check if this element is some kind of text box."""
|
|
self._check_vanished()
|
|
roles = ('combobox', 'textbox')
|
|
tag = self._elem.tagName().lower()
|
|
return self.get('role', None) in roles or tag in ('input', 'textarea')
|
|
|
|
def remove_blank_target(self):
|
|
"""Remove target from link."""
|
|
elem = self._elem
|
|
for _ in range(5):
|
|
if elem is None:
|
|
break
|
|
tag = elem.tagName().lower()
|
|
if tag == 'a' or tag == 'area':
|
|
if elem.attribute('target') == '_blank':
|
|
elem.setAttribute('target', '_top')
|
|
break
|
|
elem = elem.parent()
|
|
|
|
def debug_text(self):
|
|
"""Get a text based on an element suitable for debug output."""
|
|
self._check_vanished()
|
|
return utils.compact_text(self._elem.toOuterXml(), 500)
|
|
|
|
|
|
def javascript_escape(text):
|
|
"""Escape values special to javascript in strings.
|
|
|
|
With this we should be able to use something like:
|
|
elem.evaluateJavaScript("this.value='{}'".format(javascript_escape(...)))
|
|
And all values should work.
|
|
"""
|
|
# This is a list of tuples because order matters, and using OrderedDict
|
|
# makes no sense because we don't actually need dict-like properties.
|
|
replacements = (
|
|
('\\', r'\\'), # First escape all literal \ signs as \\.
|
|
("'", r"\'"), # Then escape ' and " as \' and \".
|
|
('"', r'\"'), # (note it won't hurt when we escape the wrong one).
|
|
('\n', r'\n'), # We also need to escape newlines for some reason.
|
|
('\r', r'\r'),
|
|
('\x00', r'\x00'),
|
|
('\ufeff', r'\ufeff'),
|
|
# http://stackoverflow.com/questions/2965293/
|
|
('\u2028', r'\u2028'),
|
|
('\u2029', r'\u2029'),
|
|
)
|
|
for orig, repl in replacements:
|
|
text = text.replace(orig, repl)
|
|
return text
|
|
|
|
|
|
def get_child_frames(startframe):
|
|
"""Get all children recursively of a given QWebFrame.
|
|
|
|
Loosely based on http://blog.nextgenetics.net/?e=64
|
|
|
|
Args:
|
|
startframe: The QWebFrame to start with.
|
|
|
|
Return:
|
|
A list of children QWebFrame, or an empty list.
|
|
"""
|
|
results = []
|
|
frames = [startframe]
|
|
while frames:
|
|
new_frames = []
|
|
for frame in frames:
|
|
results.append(frame)
|
|
new_frames += frame.childFrames()
|
|
frames = new_frames
|
|
return results
|
|
|
|
|
|
def focus_elem(frame):
|
|
"""Get the focused element in a web frame.
|
|
|
|
Args:
|
|
frame: The QWebFrame to search in.
|
|
"""
|
|
elem = frame.findFirstElement(SELECTORS[Group.focus])
|
|
return WebElementWrapper(elem)
|
|
|
|
|
|
def rect_on_view(elem, elem_geometry=None):
|
|
"""Get the geometry of the element relative to the webview.
|
|
|
|
We need this as a standalone function (as opposed to a WebElementWrapper
|
|
method) because we want to run is_visible before wrapping when hinting for
|
|
performance reasons.
|
|
|
|
Args:
|
|
elem: The QWebElement to get the rect for.
|
|
elem_geometry: The geometry of the element, or None.
|
|
Calling QWebElement::geometry is rather expensive so we
|
|
want to avoid doing it twice.
|
|
"""
|
|
if elem.isNull():
|
|
raise IsNullError("Got called on a null element!")
|
|
if elem_geometry is None:
|
|
elem_geometry = elem.geometry()
|
|
frame = elem.webFrame()
|
|
rect = QRect(elem_geometry)
|
|
while frame is not None:
|
|
rect.translate(frame.geometry().topLeft())
|
|
rect.translate(frame.scrollPosition() * -1)
|
|
frame = frame.parentFrame()
|
|
return rect
|
|
|
|
|
|
def is_visible(elem, mainframe):
|
|
"""Check if the given element is visible in the frame.
|
|
|
|
We need this as a standalone function (as opposed to a WebElementWrapper
|
|
method) because we want to check this before wrapping when hinting for
|
|
performance reasons.
|
|
|
|
Args:
|
|
elem: The QWebElement to check.
|
|
mainframe: The QWebFrame in which the element should be visible.
|
|
"""
|
|
if elem.isNull():
|
|
raise IsNullError("Got called on a null element!")
|
|
# CSS attributes which hide an element
|
|
hidden_attributes = {
|
|
'visibility': 'hidden',
|
|
'display': 'none',
|
|
}
|
|
for k, v in hidden_attributes.items():
|
|
if elem.styleProperty(k, QWebElement.ComputedStyle) == v:
|
|
return False
|
|
elem_geometry = elem.geometry()
|
|
if not elem_geometry.isValid() and elem_geometry.x() == 0:
|
|
# Most likely an invisible link
|
|
return False
|
|
# First check if the element is visible on screen
|
|
elem_rect = rect_on_view(elem, elem_geometry=elem_geometry)
|
|
mainframe_geometry = mainframe.geometry()
|
|
if elem_rect.isValid():
|
|
visible_on_screen = mainframe_geometry.intersects(elem_rect)
|
|
else:
|
|
# We got an invalid rectangle (width/height 0/0 probably), but this
|
|
# can still be a valid link.
|
|
visible_on_screen = mainframe_geometry.contains(elem_rect.topLeft())
|
|
# Then check if it's visible in its frame if it's not in the main
|
|
# frame.
|
|
elem_frame = elem.webFrame()
|
|
framegeom = QRect(elem_frame.geometry())
|
|
if not framegeom.isValid():
|
|
visible_in_frame = False
|
|
elif elem_frame.parentFrame() is not None:
|
|
framegeom.moveTo(0, 0)
|
|
framegeom.translate(elem_frame.scrollPosition())
|
|
if elem_geometry.isValid():
|
|
visible_in_frame = framegeom.intersects(elem_geometry)
|
|
else:
|
|
# We got an invalid rectangle (width/height 0/0 probably), but
|
|
# this can still be a valid link.
|
|
visible_in_frame = framegeom.contains(elem_geometry.topLeft())
|
|
else:
|
|
visible_in_frame = visible_on_screen
|
|
return all([visible_on_screen, visible_in_frame])
|