Module refinery.units.strings.pf

Expand source code Browse git
from __future__ import annotations

from refinery.lib.meta import metavars, STRING_FORMAT_HELP
from refinery.lib.types import Param, buf
from refinery.units import Arg, Unit


class pf(Unit):
    """
    Constructs its output from data, variables, and expressions using string format expressions.

    Stands for "Print Format". The positional format string placeholder `{}` will be replaced by
    the incoming data, named placeholders have to exist as meta variables in the current chunk.
    For example, the following pipeline can be used to print all files in a given directory with
    their corresponding SHA-256 hash:

        ef ** [| sha256 -t | pf {} {path} ]]

    By default, all arguments are joined along a space character to form a single format string.
    Backslash escape sequences are unescaped by default. %s
    """

    def __init__(
        self,
        *formats: Param[buf, Arg.Binary(help='Format strings.', metavar='format')],
        variable: Param[str, Arg.String('-n', metavar='N', help='Store the formatted string in a meta variable.')] = '',
        separator: Param[str, Arg.String('-s', group='SEP', metavar='S',
            help='Separator to insert between format strings. The default is a space character.')] = ' ',
        multiplex: Param[bool, Arg.Switch('-m', group='SEP',
            help='Do not join the format strings along the separator, generate one output for each.')] = False,
        raw: Param[bool, Arg.Switch('-r', help='Do not interpret backslash-escape sequences in format strings.')] = False,
    ):
        def fixfmt(fmt: bytes | str):
            if not isinstance(fmt, str):
                fmt = bytes(fmt).decode(self.codec)
            return fmt
        _formats = [fixfmt(f) for f in formats]
        if not multiplex:
            _formats = [fixfmt(separator).join(_formats)]
        super().__init__(formats=_formats, variable=variable, raw=raw)

    def process(self, data):
        meta = metavars(data)
        args = [data]
        variable = self.args.variable
        escaped = not self.args.raw
        for spec in self.args.formats:
            result = meta.format_bin(
                spec,
                codec=self.codec,
                args=args,
                escaped=escaped,
                lenient=True
            )
            if variable:
                result = self.labelled(data, **{variable: result})
            yield result


if __d := pf.__doc__:
    pf.__doc__ = __d % STRING_FORMAT_HELP

Classes

class pf (*formats, variable='', separator=' ', multiplex=False, raw=False)

Constructs its output from data, variables, and expressions using string format expressions.

Stands for "Print Format". The positional format string placeholder {} will be replaced by the incoming data, named placeholders have to exist as meta variables in the current chunk. For example, the following pipeline can be used to print all files in a given directory with their corresponding SHA-256 hash:

ef ** [| sha256 -t | pf {} {path} ]]

By default, all arguments are joined along a space character to form a single format string. Backslash escape sequences are unescaped by default. The format definitions use the following syntax:

{field[!modifier]:handlers}

The field can specify an extracted meta variable, or the positional index of an extracted value. The optional multibin suffix handlers is used to post-process the value of this field. For example, {2:hex:zl:b64} means: Take the second match group, hex-decode it, decompress it using zl, and finally decode it using base64. The optional modifier can be one of these:

  • !r: Computes the Python repr() of the field before processing it.
  • !s: Field is a UTF-8 string literal, not a variable.
  • !a: Field is a latin1 string literal.
  • !u: Field is a UTF-16LE string literal.
  • !h: Field is a hex-encoded literal (shortcut for !s:h).
  • !q: Field is a URL-encoded literal (shortcut for !s:q).
  • !n: Field is an escape-sequence literal (shortcut for !s:n).
  • !z: Field evaluates to integer N; returns N zero bytes.
Expand source code Browse git
class pf(Unit):
    """
    Constructs its output from data, variables, and expressions using string format expressions.

    Stands for "Print Format". The positional format string placeholder `{}` will be replaced by
    the incoming data, named placeholders have to exist as meta variables in the current chunk.
    For example, the following pipeline can be used to print all files in a given directory with
    their corresponding SHA-256 hash:

        ef ** [| sha256 -t | pf {} {path} ]]

    By default, all arguments are joined along a space character to form a single format string.
    Backslash escape sequences are unescaped by default. %s
    """

    def __init__(
        self,
        *formats: Param[buf, Arg.Binary(help='Format strings.', metavar='format')],
        variable: Param[str, Arg.String('-n', metavar='N', help='Store the formatted string in a meta variable.')] = '',
        separator: Param[str, Arg.String('-s', group='SEP', metavar='S',
            help='Separator to insert between format strings. The default is a space character.')] = ' ',
        multiplex: Param[bool, Arg.Switch('-m', group='SEP',
            help='Do not join the format strings along the separator, generate one output for each.')] = False,
        raw: Param[bool, Arg.Switch('-r', help='Do not interpret backslash-escape sequences in format strings.')] = False,
    ):
        def fixfmt(fmt: bytes | str):
            if not isinstance(fmt, str):
                fmt = bytes(fmt).decode(self.codec)
            return fmt
        _formats = [fixfmt(f) for f in formats]
        if not multiplex:
            _formats = [fixfmt(separator).join(_formats)]
        super().__init__(formats=_formats, variable=variable, raw=raw)

    def process(self, data):
        meta = metavars(data)
        args = [data]
        variable = self.args.variable
        escaped = not self.args.raw
        for spec in self.args.formats:
            result = meta.format_bin(
                spec,
                codec=self.codec,
                args=args,
                escaped=escaped,
                lenient=True
            )
            if variable:
                result = self.labelled(data, **{variable: result})
            yield result

Ancestors

Subclasses

Class variables

var reverse

The type of the None singleton.

Inherited members