Module refinery.lib.decorators

A selection of refinery-specific decorators.

Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
A selection of refinery-specific decorators.
"""
import codecs
import re
import itertools

from functools import wraps, WRAPPER_ASSIGNMENTS
from typing import Callable, Iterable, Optional

from refinery.units import Unit


def wraps_without_annotations(method: Callable) -> Callable:
    """
    This decorator works simila to `wraps` from `functools` but does not update the
    type annotations of the wrapped function. This is used in the other decorators
    in this module because they change the function signature.
    """
    assignments = set(WRAPPER_ASSIGNMENTS)
    assignments.discard('__annotations__')
    return wraps(method, assigned=assignments)


def unicoded(method: Callable[[Unit, str], Optional[str]]) -> Callable[[Unit, bytes], bytes]:
    """
    Can be used to decorate a `refinery.units.Unit.process` routine that takes a
    string argument and also returns one. The resulting routine takes a binary buffer
    as input and attempts to decode it as unicode text. If certain characters cannot
    be decoded, then these ranges are skipped and the decorated routine is called
    once for each string patch that was successfully decoded.
    """
    @wraps_without_annotations(method)
    def method_wrapper(self: Unit, data: bytes) -> bytes:
        input_codec = self.codec if any(data[::2]) else 'UTF-16LE'
        partial = re.split(R'([\uDC80-\uDCFF]+)',  # surrogate escape range
            codecs.decode(data, input_codec, errors='surrogateescape'))
        partial[::2] = (method(self, p) or '' if p else '' for p in itertools.islice(iter(partial), 0, None, 2))
        nones = sum(1 for p in partial if p is None)
        if nones == len(partial):
            return None
        if nones >= 1:
            for k, p in enumerate(partial):
                if p is None:
                    partial[k] = ''
        return codecs.encode(''.join(partial), self.codec, errors='surrogateescape')
    return method_wrapper


def linewise(method: Callable[[Unit, str], str]) -> Callable[[Unit, bytes], Iterable[bytes]]:
    """
    Can be used to decorate a `refinery.units.Unit.process` routine that takes a
    string argument and also returns one. The resulting routine expects a default
    encoded string input buffer and calls the decorated routine once for each
    line in the corresponding decoded string.
    """
    @wraps_without_annotations(method)
    def method_wrapper(self: Unit, data: bytes) -> Iterable[bytes]:
        lines = data.decode(self.codec).splitlines()
        width = len(str(len(lines)))
        for k, line in enumerate(lines):
            try:
                yield method(self, line).encode(self.codec)
            except Exception as E:
                self.log_info(F'error in line {k:0{width}d}: {E}')
    return method_wrapper

Functions

def wraps_without_annotations(method)

This decorator works simila to wraps from functools but does not update the type annotations of the wrapped function. This is used in the other decorators in this module because they change the function signature.

Expand source code Browse git
def wraps_without_annotations(method: Callable) -> Callable:
    """
    This decorator works simila to `wraps` from `functools` but does not update the
    type annotations of the wrapped function. This is used in the other decorators
    in this module because they change the function signature.
    """
    assignments = set(WRAPPER_ASSIGNMENTS)
    assignments.discard('__annotations__')
    return wraps(method, assigned=assignments)
def unicoded(method)

Can be used to decorate a Unit.process() routine that takes a string argument and also returns one. The resulting routine takes a binary buffer as input and attempts to decode it as unicode text. If certain characters cannot be decoded, then these ranges are skipped and the decorated routine is called once for each string patch that was successfully decoded.

Expand source code Browse git
def unicoded(method: Callable[[Unit, str], Optional[str]]) -> Callable[[Unit, bytes], bytes]:
    """
    Can be used to decorate a `refinery.units.Unit.process` routine that takes a
    string argument and also returns one. The resulting routine takes a binary buffer
    as input and attempts to decode it as unicode text. If certain characters cannot
    be decoded, then these ranges are skipped and the decorated routine is called
    once for each string patch that was successfully decoded.
    """
    @wraps_without_annotations(method)
    def method_wrapper(self: Unit, data: bytes) -> bytes:
        input_codec = self.codec if any(data[::2]) else 'UTF-16LE'
        partial = re.split(R'([\uDC80-\uDCFF]+)',  # surrogate escape range
            codecs.decode(data, input_codec, errors='surrogateescape'))
        partial[::2] = (method(self, p) or '' if p else '' for p in itertools.islice(iter(partial), 0, None, 2))
        nones = sum(1 for p in partial if p is None)
        if nones == len(partial):
            return None
        if nones >= 1:
            for k, p in enumerate(partial):
                if p is None:
                    partial[k] = ''
        return codecs.encode(''.join(partial), self.codec, errors='surrogateescape')
    return method_wrapper
def linewise(method)

Can be used to decorate a Unit.process() routine that takes a string argument and also returns one. The resulting routine expects a default encoded string input buffer and calls the decorated routine once for each line in the corresponding decoded string.

Expand source code Browse git
def linewise(method: Callable[[Unit, str], str]) -> Callable[[Unit, bytes], Iterable[bytes]]:
    """
    Can be used to decorate a `refinery.units.Unit.process` routine that takes a
    string argument and also returns one. The resulting routine expects a default
    encoded string input buffer and calls the decorated routine once for each
    line in the corresponding decoded string.
    """
    @wraps_without_annotations(method)
    def method_wrapper(self: Unit, data: bytes) -> Iterable[bytes]:
        lines = data.decode(self.codec).splitlines()
        width = len(str(len(lines)))
        for k, line in enumerate(lines):
            try:
                yield method(self, line).encode(self.codec)
            except Exception as E:
                self.log_info(F'error in line {k:0{width}d}: {E}')
    return method_wrapper