Module refinery.lib.xml
Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from __future__ import annotations
import uuid
import weakref
import defusedxml.ElementTree as et
from typing import Any, Dict, Iterable, List, Optional
from xml.parsers import expat
from xml.etree.ElementTree import Element, ElementTree
from refinery.lib.structures import MemoryFile
def ForgivingParse(data, entities=None) -> ElementTree:
try:
return et.parse(MemoryFile(data), parser=ForgivingXMLParser(entities))
except et.ParseError as PE:
raise ValueError from PE
class ForgivingXMLParser(et.XMLParser):
def __init__(self, emap=None):
class ForgivingEntityResolver(dict):
def __getitem__(self, key):
if key in self:
return dict.__getitem__(self, key)
uid = str(uuid.uuid4())
self[key] = uid
if emap is not None:
emap[uid] = key
return uid
self.__entity = ForgivingEntityResolver()
_ParserCreate = expat.ParserCreate
try:
def PC(encoding, _):
parser = _ParserCreate(
encoding, namespace_separator=None)
parser.UseForeignDTD(True)
return parser
expat.ParserCreate = PC
super().__init__()
finally:
expat.ParserCreate = _ParserCreate
@property
def entity(self):
return self.__entity
@entity.setter
def entity(self, value):
self.__entity.update(value)
class XMLNodeBase:
__slots__ = 'tag', 'children', 'empty', 'attributes', 'content', '_parent', '__weakref__'
attributes: Dict[str, Any]
children: List[XMLNodeBase]
content: Optional[str]
parent: Optional[weakref.ProxyType[XMLNodeBase]]
subtree: Iterable[XMLNodeBase]
empty: bool
tag: Optional[str]
def __init__(
self,
tag: str,
parent: Optional[XMLNodeBase] = None,
content: Optional[str] = None,
empty: bool = False,
attributes: Optional[Dict[str, Any]] = None,
):
if attributes is None:
attributes = {}
self.tag = tag
self.content = content
self.empty = empty
self.children = []
self.attributes = attributes
self.parent = parent
@property
def parent(self) -> XMLNodeBase:
parent = self._parent
if parent is not None:
parent = parent()
return parent
@parent.setter
def parent(self, parent):
if parent is not None:
parent = weakref.ref(parent)
self._parent = parent
def __iter__(self):
return iter(self.children)
def __getitem__(self, key):
return self.attributes[key]
def get_attribute(self, key, default=None):
return self.attributes.get(key, default)
def child(self, tag: str):
for child in self.children:
if child.tag == tag:
return child
raise LookupError(tag)
@property
def subtree(self) -> Iterable[XMLNodeBase]:
yield self
for child in self.children:
yield from child.subtree
def __enter__(self):
return self.subtree
def __exit__(self, *a):
return False
class XMLNode(XMLNodeBase):
__slots__ = 'source',
source: Optional[Element]
def __init__(self, tag: str):
super().__init__(tag)
def write(self, stream):
return ElementTree(self.source).write(stream)
def parse(data) -> XMLNode:
def translate(element: Element, cursor: XMLNode, level: int = 0):
for child in element:
node = XMLNode(child.tag)
translate(child, node, level + 1)
node.parent = cursor
node.source = child
cursor.children.append(node)
cursor.attributes = element.attrib
cursor.content = element.text or element.tail or ''
return cursor
root = ForgivingParse(data).getroot()
rt = translate(root, XMLNode(root.tag))
rt.source = root
return rt
Functions
def ForgivingParse(data, entities=None)
-
Expand source code Browse git
def ForgivingParse(data, entities=None) -> ElementTree: try: return et.parse(MemoryFile(data), parser=ForgivingXMLParser(entities)) except et.ParseError as PE: raise ValueError from PE
def parse(data)
-
Expand source code Browse git
def parse(data) -> XMLNode: def translate(element: Element, cursor: XMLNode, level: int = 0): for child in element: node = XMLNode(child.tag) translate(child, node, level + 1) node.parent = cursor node.source = child cursor.children.append(node) cursor.attributes = element.attrib cursor.content = element.text or element.tail or '' return cursor root = ForgivingParse(data).getroot() rt = translate(root, XMLNode(root.tag)) rt.source = root return rt
Classes
class ForgivingXMLParser (emap=None)
-
Element structure builder for XML source data based on the expat parser.
html are predefined HTML entities (deprecated and not supported), target is an optional target object which defaults to an instance of the standard TreeBuilder class, encoding is an optional encoding string which if given, overrides the encoding specified in the XML file: http://www.iana.org/assignments/character-sets
Expand source code Browse git
class ForgivingXMLParser(et.XMLParser): def __init__(self, emap=None): class ForgivingEntityResolver(dict): def __getitem__(self, key): if key in self: return dict.__getitem__(self, key) uid = str(uuid.uuid4()) self[key] = uid if emap is not None: emap[uid] = key return uid self.__entity = ForgivingEntityResolver() _ParserCreate = expat.ParserCreate try: def PC(encoding, _): parser = _ParserCreate( encoding, namespace_separator=None) parser.UseForeignDTD(True) return parser expat.ParserCreate = PC super().__init__() finally: expat.ParserCreate = _ParserCreate @property def entity(self): return self.__entity @entity.setter def entity(self, value): self.__entity.update(value)
Ancestors
- defusedxml.ElementTree.DefusedXMLParser
- xml.etree.ElementTree.XMLParser
Instance variables
var entity
-
Expand source code Browse git
@property def entity(self): return self.__entity
class XMLNodeBase (tag, parent=None, content=None, empty=False, attributes=None)
-
Expand source code Browse git
class XMLNodeBase: __slots__ = 'tag', 'children', 'empty', 'attributes', 'content', '_parent', '__weakref__' attributes: Dict[str, Any] children: List[XMLNodeBase] content: Optional[str] parent: Optional[weakref.ProxyType[XMLNodeBase]] subtree: Iterable[XMLNodeBase] empty: bool tag: Optional[str] def __init__( self, tag: str, parent: Optional[XMLNodeBase] = None, content: Optional[str] = None, empty: bool = False, attributes: Optional[Dict[str, Any]] = None, ): if attributes is None: attributes = {} self.tag = tag self.content = content self.empty = empty self.children = [] self.attributes = attributes self.parent = parent @property def parent(self) -> XMLNodeBase: parent = self._parent if parent is not None: parent = parent() return parent @parent.setter def parent(self, parent): if parent is not None: parent = weakref.ref(parent) self._parent = parent def __iter__(self): return iter(self.children) def __getitem__(self, key): return self.attributes[key] def get_attribute(self, key, default=None): return self.attributes.get(key, default) def child(self, tag: str): for child in self.children: if child.tag == tag: return child raise LookupError(tag) @property def subtree(self) -> Iterable[XMLNodeBase]: yield self for child in self.children: yield from child.subtree def __enter__(self): return self.subtree def __exit__(self, *a): return False
Subclasses
Instance variables
var parent
-
Expand source code Browse git
@property def parent(self) -> XMLNodeBase: parent = self._parent if parent is not None: parent = parent() return parent
var subtree
-
Expand source code Browse git
@property def subtree(self) -> Iterable[XMLNodeBase]: yield self for child in self.children: yield from child.subtree
var attributes
-
Return an attribute of instance, which is of type owner.
var children
-
Return an attribute of instance, which is of type owner.
var content
-
Return an attribute of instance, which is of type owner.
var empty
-
Return an attribute of instance, which is of type owner.
var tag
-
Return an attribute of instance, which is of type owner.
Methods
def get_attribute(self, key, default=None)
-
Expand source code Browse git
def get_attribute(self, key, default=None): return self.attributes.get(key, default)
def child(self, tag)
-
Expand source code Browse git
def child(self, tag: str): for child in self.children: if child.tag == tag: return child raise LookupError(tag)
class XMLNode (tag)
-
Expand source code Browse git
class XMLNode(XMLNodeBase): __slots__ = 'source', source: Optional[Element] def __init__(self, tag: str): super().__init__(tag) def write(self, stream): return ElementTree(self.source).write(stream)
Ancestors
Instance variables
var source
-
Return an attribute of instance, which is of type owner.
Methods
def write(self, stream)
-
Expand source code Browse git
def write(self, stream): return ElementTree(self.source).write(stream)
Inherited members