Module refinery.units.sinks.ppxml

Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import io

from refinery.units import Arg, Unit
from refinery.lib.xml import ForgivingParse


class ppxml(Unit):
    """
    Expects XML input data and outputs it in a neatly formatted manner.
    """

    def __init__(self,
        indent: Arg.Number('-i', help=(
            'Controls the amount of space characters used for indentation in the output. Default is 4.')) = 4,
        header: Arg.Switch('-x', help='Add an XML header to the formatted output.') = False
    ):
        super().__init__(indent=indent, header=header)

    def process(self, data):

        pad = self.args.indent * ' '
        etm = {}

        try:
            dom = ForgivingParse(data, etm)
        except Exception:
            from refinery.lib.meta import metavars
            msg = 'error parsing as XML, returning original content'
            path = metavars(data).get('path')
            if path:
                msg = F'{msg}: {path}'
            self.log_warn(msg)
            return data

        def indent(element, level=0, more_sibs=False):
            """
            The credit for this one goes to:
            https://stackoverflow.com/a/12940014
            """
            indentation = '\n'
            if level:
                indentation += (level - 1) * pad
            childcount = len(element)
            if childcount:
                if not element.text or not element.text.strip():
                    element.text = indentation + pad
                    if level:
                        element.text += pad
                for count, child in enumerate(element):
                    indent(child, level + 1, count < childcount - 1)
                if level and (not element.tail or element.tail.isspace()):
                    element.tail = indentation
                    if more_sibs:
                        element.tail += pad
            elif level and (not element.tail or element.tail.isspace()):
                element.tail = indentation
                if more_sibs: element.tail += pad

        indent(dom.getroot())

        with io.BytesIO() as output:
            dom.write(output, encoding=self.codec, xml_declaration=self.args.header)
            result = output.getvalue()

        for uid, key in etm.items():
            entity = F'&{key};'.encode(self.codec)
            needle = uid.encode(self.codec)
            result = result.replace(needle, entity)

        return result

Classes

class ppxml (indent=4, header=False)

Expects XML input data and outputs it in a neatly formatted manner.

Expand source code Browse git
class ppxml(Unit):
    """
    Expects XML input data and outputs it in a neatly formatted manner.
    """

    def __init__(self,
        indent: Arg.Number('-i', help=(
            'Controls the amount of space characters used for indentation in the output. Default is 4.')) = 4,
        header: Arg.Switch('-x', help='Add an XML header to the formatted output.') = False
    ):
        super().__init__(indent=indent, header=header)

    def process(self, data):

        pad = self.args.indent * ' '
        etm = {}

        try:
            dom = ForgivingParse(data, etm)
        except Exception:
            from refinery.lib.meta import metavars
            msg = 'error parsing as XML, returning original content'
            path = metavars(data).get('path')
            if path:
                msg = F'{msg}: {path}'
            self.log_warn(msg)
            return data

        def indent(element, level=0, more_sibs=False):
            """
            The credit for this one goes to:
            https://stackoverflow.com/a/12940014
            """
            indentation = '\n'
            if level:
                indentation += (level - 1) * pad
            childcount = len(element)
            if childcount:
                if not element.text or not element.text.strip():
                    element.text = indentation + pad
                    if level:
                        element.text += pad
                for count, child in enumerate(element):
                    indent(child, level + 1, count < childcount - 1)
                if level and (not element.tail or element.tail.isspace()):
                    element.tail = indentation
                    if more_sibs:
                        element.tail += pad
            elif level and (not element.tail or element.tail.isspace()):
                element.tail = indentation
                if more_sibs: element.tail += pad

        indent(dom.getroot())

        with io.BytesIO() as output:
            dom.write(output, encoding=self.codec, xml_declaration=self.args.header)
            result = output.getvalue()

        for uid, key in etm.items():
            entity = F'&{key};'.encode(self.codec)
            needle = uid.encode(self.codec)
            result = result.replace(needle, entity)

        return result

Ancestors

Class variables

var required_dependencies
var optional_dependencies

Inherited members