385 lines
13 KiB
Python
385 lines
13 KiB
Python
from functools import cached_property
|
||
from warnings import warn
|
||
|
||
from webencodings import ascii_lower
|
||
|
||
from .compiler import compile_selector_list, split_whitespace
|
||
|
||
|
||
class ElementWrapper:
|
||
"""Wrapper of :class:`xml.etree.ElementTree.Element` for Selector matching.
|
||
|
||
This class should not be instanciated directly. :meth:`from_xml_root` or
|
||
:meth:`from_html_root` should be used for the root element of a document,
|
||
and other elements should be accessed (and wrappers generated) using
|
||
methods such as :meth:`iter_children` and :meth:`iter_subtree`.
|
||
|
||
:class:`ElementWrapper` objects compare equal if their underlying
|
||
:class:`xml.etree.ElementTree.Element` do.
|
||
|
||
"""
|
||
@classmethod
|
||
def from_xml_root(cls, root, content_language=None):
|
||
"""Wrap for selector matching the root of an XML or XHTML document.
|
||
|
||
:param root:
|
||
An ElementTree :class:`xml.etree.ElementTree.Element`
|
||
for the root element of a document.
|
||
If the given element is not the root,
|
||
selector matching will behave is if it were.
|
||
In other words, selectors will be not be `scoped`_
|
||
to the subtree rooted at that element.
|
||
:returns:
|
||
A new :class:`ElementWrapper`
|
||
|
||
.. _scoped: https://drafts.csswg.org/selectors-4/#scoping
|
||
|
||
"""
|
||
return cls._from_root(root, content_language, in_html_document=False)
|
||
|
||
@classmethod
|
||
def from_html_root(cls, root, content_language=None):
|
||
"""Same as :meth:`from_xml_root` with case-insensitive attribute names.
|
||
|
||
Useful for documents parsed with an HTML parser like html5lib, which
|
||
should be the case of documents with the ``text/html`` MIME type.
|
||
|
||
"""
|
||
return cls._from_root(root, content_language, in_html_document=True)
|
||
|
||
@classmethod
|
||
def _from_root(cls, root, content_language, in_html_document=True):
|
||
if hasattr(root, 'getroot'):
|
||
root = root.getroot()
|
||
return cls(
|
||
root, parent=None, index=0, previous=None,
|
||
in_html_document=in_html_document, content_language=content_language)
|
||
|
||
def __init__(self, etree_element, parent, index, previous,
|
||
in_html_document, content_language=None):
|
||
#: The underlying ElementTree :class:`xml.etree.ElementTree.Element`
|
||
self.etree_element = etree_element
|
||
#: The parent :class:`ElementWrapper`,
|
||
#: or :obj:`None` for the root element.
|
||
self.parent = parent
|
||
#: The previous sibling :class:`ElementWrapper`,
|
||
#: or :obj:`None` for the root element.
|
||
self.previous = previous
|
||
if parent is not None:
|
||
#: The :attr:`parent`’s children
|
||
#: as a list of
|
||
#: ElementTree :class:`xml.etree.ElementTree.Element`\ s.
|
||
#: For the root (which has no parent)
|
||
self.etree_siblings = parent.etree_children
|
||
else:
|
||
self.etree_siblings = [etree_element]
|
||
#: The position within the :attr:`parent`’s children, counting from 0.
|
||
#: ``e.etree_siblings[e.index]`` is always ``e.etree_element``.
|
||
self.index = index
|
||
self.in_html_document = in_html_document
|
||
self.transport_content_language = content_language
|
||
|
||
# Cache
|
||
self._ancestors = None
|
||
self._previous_siblings = None
|
||
|
||
def __eq__(self, other):
|
||
return (
|
||
type(self) is type(other) and
|
||
self.etree_element == other.etree_element)
|
||
|
||
def __ne__(self, other):
|
||
return not (self == other)
|
||
|
||
def __hash__(self):
|
||
return hash((type(self), self.etree_element))
|
||
|
||
def __iter__(self):
|
||
yield from self.iter_children()
|
||
|
||
@property
|
||
def ancestors(self):
|
||
"""Tuple of existing ancestors.
|
||
|
||
Tuple of existing :class:`ElementWrapper` objects for this element’s
|
||
ancestors, in reversed tree order, from :attr:`parent` to the root.
|
||
|
||
"""
|
||
if self._ancestors is None:
|
||
self._ancestors = (
|
||
() if self.parent is None else (*self.parent.ancestors, self.parent))
|
||
return self._ancestors
|
||
|
||
@property
|
||
def previous_siblings(self):
|
||
"""Tuple of previous siblings.
|
||
|
||
Tuple of existing :class:`ElementWrapper` objects for this element’s
|
||
previous siblings, in reversed tree order.
|
||
|
||
"""
|
||
if self._previous_siblings is None:
|
||
self._previous_siblings = (
|
||
() if self.previous is None else
|
||
(*self.previous.previous_siblings, self.previous))
|
||
return self._previous_siblings
|
||
|
||
def iter_ancestors(self):
|
||
"""Iterate over ancestors.
|
||
|
||
Return an iterator of existing :class:`ElementWrapper` objects for this
|
||
element’s ancestors, in reversed tree order (from :attr:`parent` to the
|
||
root).
|
||
|
||
The element itself is not included, this is an empty sequence for the
|
||
root element.
|
||
|
||
This method is deprecated and will be removed in version 0.7.0. Use
|
||
:attr:`ancestors` instead.
|
||
|
||
"""
|
||
warn(
|
||
'This method is deprecated and will be removed in version 0.7.0. '
|
||
'Use the "ancestors" attribute instead.',
|
||
DeprecationWarning)
|
||
yield from self.ancestors
|
||
|
||
def iter_previous_siblings(self):
|
||
"""Iterate over previous siblings.
|
||
|
||
Return an iterator of existing :class:`ElementWrapper` objects for this
|
||
element’s previous siblings, in reversed tree order.
|
||
|
||
The element itself is not included, this is an empty sequence for a
|
||
first child or the root element.
|
||
|
||
This method is deprecated and will be removed in version 0.7.0. Use
|
||
:attr:`previous_siblings` instead.
|
||
|
||
"""
|
||
warn(
|
||
'This method is deprecated and will be removed in version 0.7.0. '
|
||
'Use the "previous_siblings" attribute instead.',
|
||
DeprecationWarning)
|
||
yield from self.previous_siblings
|
||
|
||
def iter_siblings(self):
|
||
"""Iterate over siblings.
|
||
|
||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||
this element’s siblings, in tree order.
|
||
|
||
"""
|
||
if self.parent is None:
|
||
yield self
|
||
else:
|
||
yield from self.parent.iter_children()
|
||
|
||
def iter_next_siblings(self):
|
||
"""Iterate over next siblings.
|
||
|
||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||
this element’s next siblings, in tree order.
|
||
|
||
"""
|
||
found = False
|
||
for sibling in self.iter_siblings():
|
||
if found:
|
||
yield sibling
|
||
if sibling == self:
|
||
found = True
|
||
|
||
def iter_children(self):
|
||
"""Iterate over children.
|
||
|
||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||
this element’s child elements, in tree order.
|
||
|
||
"""
|
||
child = None
|
||
for i, etree_child in enumerate(self.etree_children):
|
||
child = type(self)(
|
||
etree_child, parent=self, index=i, previous=child,
|
||
in_html_document=self.in_html_document)
|
||
yield child
|
||
|
||
def iter_subtree(self):
|
||
"""Iterate over subtree.
|
||
|
||
Return an iterator of newly-created :class:`ElementWrapper` objects for
|
||
the entire subtree rooted at this element, in tree order.
|
||
|
||
Unlike in other methods, the element itself *is* included.
|
||
|
||
This loops over an entire document:
|
||
|
||
.. code-block:: python
|
||
|
||
for element in ElementWrapper.from_root(root_etree).iter_subtree():
|
||
...
|
||
|
||
"""
|
||
stack = [iter([self])]
|
||
while stack:
|
||
element = next(stack[-1], None)
|
||
if element is None:
|
||
stack.pop()
|
||
else:
|
||
yield element
|
||
stack.append(element.iter_children())
|
||
|
||
@staticmethod
|
||
def _compile(selectors):
|
||
return [
|
||
compiled_selector.test
|
||
for selector in selectors
|
||
for compiled_selector in (
|
||
[selector] if hasattr(selector, 'test')
|
||
else compile_selector_list(selector))
|
||
if compiled_selector.pseudo_element is None and
|
||
not compiled_selector.never_matches]
|
||
|
||
def matches(self, *selectors):
|
||
"""Return wether this elememt matches any of the given selectors.
|
||
|
||
:param selectors:
|
||
Each given selector is either a :class:`compiler.CompiledSelector`,
|
||
or an argument to :func:`compile_selector_list`.
|
||
|
||
"""
|
||
return any(test(self) for test in self._compile(selectors))
|
||
|
||
def query_all(self, *selectors):
|
||
"""Return elements, in tree order, that match any of given selectors.
|
||
|
||
Selectors are `scoped`_ to the subtree rooted at this element.
|
||
|
||
.. _scoped: https://drafts.csswg.org/selectors-4/#scoping
|
||
|
||
:param selectors:
|
||
Each given selector is either a :class:`compiler.CompiledSelector`,
|
||
or an argument to :func:`compile_selector_list`.
|
||
:returns:
|
||
An iterator of newly-created :class:`ElementWrapper` objects.
|
||
|
||
"""
|
||
tests = self._compile(selectors)
|
||
if len(tests) == 1:
|
||
return filter(tests[0], self.iter_subtree())
|
||
elif selectors:
|
||
return (
|
||
element for element in self.iter_subtree()
|
||
if any(test(element) for test in tests))
|
||
else:
|
||
return iter(())
|
||
|
||
def query(self, *selectors):
|
||
"""Return first element that matches any of given selectors.
|
||
|
||
:param selectors:
|
||
Each given selector is either a :class:`compiler.CompiledSelector`,
|
||
or an argument to :func:`compile_selector_list`.
|
||
:returns:
|
||
A newly-created :class:`ElementWrapper` object,
|
||
or :obj:`None` if there is no match.
|
||
|
||
"""
|
||
return next(self.query_all(*selectors), None)
|
||
|
||
@cached_property
|
||
def etree_children(self):
|
||
"""Children as a list of :class:`xml.etree.ElementTree.Element`.
|
||
|
||
Other ElementTree nodes such as
|
||
:func:`comments <xml.etree.ElementTree.Comment>` and
|
||
:func:`processing instructions
|
||
<xml.etree.ElementTree.ProcessingInstruction>`
|
||
are not included.
|
||
|
||
"""
|
||
return [
|
||
element for element in self.etree_element
|
||
if isinstance(element.tag, str)]
|
||
|
||
@cached_property
|
||
def local_name(self):
|
||
"""The local name of this element, as a string."""
|
||
namespace_url, local_name = _split_etree_tag(self.etree_element.tag)
|
||
self.__dict__['namespace_url'] = namespace_url
|
||
return local_name
|
||
|
||
@cached_property
|
||
def namespace_url(self):
|
||
"""The namespace URL of this element, as a string."""
|
||
namespace_url, local_name = _split_etree_tag(self.etree_element.tag)
|
||
self.__dict__['local_name'] = local_name
|
||
return namespace_url
|
||
|
||
@cached_property
|
||
def id(self):
|
||
"""The ID of this element, as a string."""
|
||
return self.etree_element.get('id')
|
||
|
||
@cached_property
|
||
def classes(self):
|
||
"""The classes of this element, as a :class:`set` of strings."""
|
||
return set(split_whitespace(self.etree_element.get('class', '')))
|
||
|
||
@cached_property
|
||
def lang(self):
|
||
"""The language of this element, as a string."""
|
||
# http://whatwg.org/C#language
|
||
xml_lang = self.etree_element.get('{http://www.w3.org/XML/1998/namespace}lang')
|
||
if xml_lang is not None:
|
||
return ascii_lower(xml_lang)
|
||
is_html = (
|
||
self.in_html_document or
|
||
self.namespace_url == 'http://www.w3.org/1999/xhtml')
|
||
if is_html:
|
||
lang = self.etree_element.get('lang')
|
||
if lang is not None:
|
||
return ascii_lower(lang)
|
||
if self.parent is not None:
|
||
return self.parent.lang
|
||
# Root elememnt
|
||
if is_html:
|
||
content_language = None
|
||
iterator = self.etree_element.iter('{http://www.w3.org/1999/xhtml}meta')
|
||
for meta in iterator:
|
||
http_equiv = meta.get('http-equiv', '')
|
||
if ascii_lower(http_equiv) == 'content-language':
|
||
content_language = _parse_content_language(meta.get('content'))
|
||
if content_language is not None:
|
||
return ascii_lower(content_language)
|
||
# Empty string means unknown
|
||
return _parse_content_language(self.transport_content_language) or ''
|
||
|
||
@cached_property
|
||
def in_disabled_fieldset(self):
|
||
if self.parent is None:
|
||
return False
|
||
fieldset = '{http://www.w3.org/1999/xhtml}fieldset'
|
||
legend = '{http://www.w3.org/1999/xhtml}legend'
|
||
disabled_fieldset = (
|
||
self.parent.etree_element.tag == fieldset and
|
||
self.parent.etree_element.get('disabled') is not None and (
|
||
self.etree_element.tag != legend or any(
|
||
sibling.etree_element.tag == legend
|
||
for sibling in self.iter_previous_siblings())))
|
||
return disabled_fieldset or self.parent.in_disabled_fieldset
|
||
|
||
|
||
def _split_etree_tag(tag):
|
||
position = tag.rfind('}')
|
||
if position == -1 or tag[0] != '{':
|
||
return '', tag
|
||
else:
|
||
return tag[1:position], tag[position+1:]
|
||
|
||
|
||
def _parse_content_language(value):
|
||
if value is not None and ',' not in value:
|
||
parts = split_whitespace(value)
|
||
if len(parts) == 1:
|
||
return parts[0]
|