Viewing file: etree_lxml.py (6.21 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
from __future__ import absolute_import, division, unicode_literals from pip._vendor.six import text_type
from collections import OrderedDict
from lxml import etree from ..treebuilders.etree import tag_regexp
from . import base
from .. import _ihatexml
def ensure_str(s): if s is None: return None elif isinstance(s, text_type): return s else: return s.decode("ascii", "strict")
class Root(object): def __init__(self, et): self.elementtree = et self.children = []
try: if et.docinfo.internalDTD: self.children.append(Doctype(self, ensure_str(et.docinfo.root_name), ensure_str(et.docinfo.public_id), ensure_str(et.docinfo.system_url))) except AttributeError: pass
try: node = et.getroot() except AttributeError: node = et
while node.getprevious() is not None: node = node.getprevious() while node is not None: self.children.append(node) node = node.getnext()
self.text = None self.tail = None
def __getitem__(self, key): return self.children[key]
def getnext(self): return None
def __len__(self): return 1
class Doctype(object): def __init__(self, root_node, name, public_id, system_id): self.root_node = root_node self.name = name self.public_id = public_id self.system_id = system_id
self.text = None self.tail = None
def getnext(self): return self.root_node.children[1]
class FragmentRoot(Root): def __init__(self, children): self.children = [FragmentWrapper(self, child) for child in children] self.text = self.tail = None
def getnext(self): return None
class FragmentWrapper(object): def __init__(self, fragment_root, obj): self.root_node = fragment_root self.obj = obj if hasattr(self.obj, 'text'): self.text = ensure_str(self.obj.text) else: self.text = None if hasattr(self.obj, 'tail'): self.tail = ensure_str(self.obj.tail) else: self.tail = None
def __getattr__(self, name): return getattr(self.obj, name)
def getnext(self): siblings = self.root_node.children idx = siblings.index(self) if idx < len(siblings) - 1: return siblings[idx + 1] else: return None
def __getitem__(self, key): return self.obj[key]
def __bool__(self): return bool(self.obj)
def getparent(self): return None
def __str__(self): return str(self.obj)
def __unicode__(self): return str(self.obj)
def __len__(self): return len(self.obj)
class TreeWalker(base.NonRecursiveTreeWalker): def __init__(self, tree): # pylint:disable=redefined-variable-type if isinstance(tree, list): self.fragmentChildren = set(tree) tree = FragmentRoot(tree) else: self.fragmentChildren = set() tree = Root(tree) base.NonRecursiveTreeWalker.__init__(self, tree) self.filter = _ihatexml.InfosetFilter()
def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key))
elif isinstance(node, Root): return (base.DOCUMENT,)
elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id
elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj)
elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text)
elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = OrderedDict() for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text)
def getFirstChild(self, node): assert not isinstance(node, tuple), "Text nodes have no children"
assert len(node) or node.text, "Node has no children" if node.text: return (node, "text") else: return node[0]
def getNextSibling(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key if key == "text": # XXX: we cannot use a "bool(node) and node[0] or None" construct here # because node[0] might evaluate to False if it has no child element if len(node): return node[0] else: return None else: # tail return node.getnext()
return (node, "tail") if node.tail else node.getnext()
def getParentNode(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key if key == "text": return node # else: fallback to "normal" processing elif node in self.fragmentChildren: return None
return node.getparent()
|