Module refinery


        __     __  High Octane Triage Analysis          __
        ||    _||______ __       __________     _____   ||
        ||    \||___   \__| ____/   ______/___ / ____\  ||
========||=====||  | __/  |/    \  /==|  / __ \   __\===]|
        '======||  |   \  |   |  \_  _| \  ___/|  |     ||
               ||____  /__|___|__/  / |  \____]|  |     ||
===============''====\/=========/  /==|__|=====|__|======'
                               \  /
                                \/

The main package refinery exports all Units which are also of type Entry, i.e. they expose a shell command. The command line interface for each of these units is given below, this is the same text as would be available by executing the command with the -h or --help option. To better understand how the command line parameters are parsed, it is recommended to study the module documentation of the following library modules, as their content is relevant for command line use of the refinery.

  1. refinery.lib.frame
  2. refinery.lib.argformats
  3. refinery.lib.meta

Furthermore, the module documentation of refinery.units contains a brief example of how to write simple units.

Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
R"""
    ----------------------------------------------------------
            __     __  High Octane Triage Analysis          __
            ||    _||______ __       __________     _____   ||
            ||    \||___   \__| ____/   ______/___ / ____\  ||
    ========||=====||  | __/  |/    \  /==|  / __ \   __\===]|
            '======||  |   \  |   |  \_  _| \  ___/|  |     ||
                   ||____  /__|___|__/  / |  \____]|  |     ||
    ===============''====\/=========/  /==|__|=====|__|======'
                                   \  /
                                    \/

The main package `refinery` exports all `refinery.units.Unit`s which are also
of type `refinery.units.Entry`, i.e. they expose a shell command. The command
line interface for each of these units is given below, this is the same text
as would be available by executing the command with the `-h` or `--help`
option. To better understand how the command line parameters are parsed, it is
recommended to study the module documentation of the following library modules,
as their content is relevant for command line use of the `refinery`.

1. `refinery.lib.frame`
2. `refinery.lib.argformats`
3. `refinery.lib.meta`

Furthermore, the module documentation of `refinery.units` contains a brief
example of how to write simple units.
"""
__version__ = '0.4.1'
__pip_pkg__ = 'binary-refinery'

import os
import pickle

from .units import arg, Unit


def _singleton(cls):
    return cls()


@_singleton
class _cache:
    """
    Every unit can be imported from the refinery base module. The actual import
    is performed on demand to reduce import times. On first import of the refinery
    package, it creates a map of units and their corresponding module and stores
    this map as `__init__.pkl` in the package directory; this process can take
    several seconds. Subsequent imports of refinery should be faster, and the
    loading of units from the module is nearly as fast as specifying the full path.
    """
    def __init__(self, filename='__init__.pkl'):
        self.path = os.path.join(os.path.dirname(__file__), filename)
        self.reloading = False
        self.loaded = False
        self.units = {}
        self.cache = {}
        self.load()

    def load(self):
        try:
            with open(self.path, 'rb') as stream:
                self.units = pickle.load(stream)
        except (FileNotFoundError, EOFError):
            self.reload()
        else:
            self.loaded = True

    def save(self):
        try:
            with open(self.path, 'wb') as stream:
                pickle.dump(self.units, stream)
        except Exception:
            pass
        else:
            self.loaded = True

    def reload(self):
        if not self.reloading:
            from .lib.loader import get_all_entry_points
            self.reloading = True
            self.units = {e.__qualname__: e.__module__ for e in get_all_entry_points()}
            self.reloading = False
            self.save()

    def _resolve(self, name, retry=False):
        if retry:
            self.reload()
        try:
            module_path = self.units[name]
            module = __import__(module_path, None, None, [name])
            entry = getattr(module, name)
            self.cache[name] = entry
            return entry
        except (KeyError, ModuleNotFoundError):
            if not retry:
                return self._resolve(name, retry=True)
            raise AttributeError

    def __getitem__(self, name):
        return self._resolve(name)


@_singleton
class __pdoc__(dict):
    def __init__(self, *a, **kw):
        super().__init__()
        self._loaded = False

    def _strip_globals(self, hlp: str):
        def _strip(lines):
            triggered = False
            for line in lines:
                if triggered:
                    if line.lstrip() != line:
                        continue
                    triggered = False
                if line.lower().startswith('global options:'):
                    triggered = True
                    continue
                yield line
        return ''.join(_strip(hlp.splitlines(keepends=True)))

    def _load(self):
        if self._loaded:
            return
        from .explore import get_help_string
        self['Unit'] = False
        for name in _cache.units:
            unit = _cache[name]
            for base in unit.mro():
                try:
                    abstractmethods = base.__abstractmethods__
                except AttributeError:
                    break
                for method in abstractmethods:
                    at = getattr(unit, method, None)
                    bt = getattr(unit.mro()[1], method, None)
                    if at and at is not bt:
                        self[F'{name}.{method}'] = False
            hlp = get_help_string(unit, width=74)
            hlp = hlp.replace('\x60', '')
            hlp = self._strip_globals(hlp).strip()
            hlp = (
                F'This unit is implemented in `{unit.__module__}` and has the following '
                F'commandline Interface:\n```text\n{hlp}\n```'
            )
            self[name] = hlp
        self._loaded = True

    def items(self):
        self._load()
        return super().items()


__all__ = [x for x, _ in sorted(_cache.units.items(), key=lambda x: x[1])] + [Unit.__name__, arg.__name__, '__pdoc__']


def __getattr__(name):
    return _cache[name]


def __dir__():
    return __all__


def load(name):
    if _cache.loaded:
        return _cache.cache.get(name)
    return _cache[name]

Sub-modules

refinery.explore

A commandline script to search for binary refinery units based on keywords.

refinery.lib

Library functions used by various refinery units.

refinery.units

This package contains all refinery units. To write an executable refinery unit, it is sufficient to write a class that inherits from …

Classes

class add (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.add and has the following commandline Interface:

usage: add [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Add the given argument to each block.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class add(BinaryOperation):
    """
    Add the given argument to each block.
    """
    @staticmethod
    def operate(a, b): return a + b
    @staticmethod
    def inplace(a, b): a += b

Ancestors

Inherited members

class bitrev (bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.bitrev and has the following commandline Interface:

usage: bitrev [-h] [-L] [-Q] [-0] [-v] [-E] [-B N]

Reverse the bits of every block.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class bitrev(UnaryOperation):
    """
    Reverse the bits of every block.
    """
    @staticmethod
    def operate(arg): pass

    def __init__(self, bigendian=False, blocksize=1):
        """
        Unreadable bit reversal operations due to:
        https://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64BitsDiv
        https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
        """
        super().__init__(bigendian=bigendian, blocksize=blocksize)

        if self.bytestream:
            self.operate = lambda v: ((v * 0x202020202) & 0x10884422010) % 1023
        elif self.args.blocksize in (2, 4, 8):
            def operate(v):
                s = self.fbits
                m = self.fmask
                w = v
                while s > 1:
                    s >>= 1
                    m = m ^ (m << s)
                    w = ((w << s) & ~m) | ((w >> s) & m)
                return w
            self.operate = operate
        else:
            def operate(v):
                w = v & 0
                for s in range(self.fbits):
                    w |= ((v >> s) & 1) << (self.fbits - s - 1)
                return w
            self.operate = operate

Ancestors

Inherited members

class blockop (operation, *argument, seed=0, prologue=None, epilogue=None, inc=False, dec=False, cbc=False, bigendian=False, blocksize=1, precision=None)

This unit is implemented in refinery.units.blockwise.blockop and has the following commandline Interface:

usage: blockop [-h] [-L] [-Q] [-0] [-v] [-s SEED] [-p E]
               [-e E | -I | -D | -X] [-E] [-B N] [-P N]
               operation [argument [argument ...]]

This unit allows you to specify a custom Python expression where the
following variables are allowed.

- the variable A: same as V[0]
- the variable B: current block
- the variable N: number of bytes in the input
- the variable I: current index in the input
- the variable S: an optional seed value for an internal state
- the variable V: the vector of arguments

Each block of the input is replaced by the value of this expression.
Additionally, it is possible to specify prologue and epilogue expressions
which are used to update the state variable S before and after the update
of each block, respectively.

positional arguments:
  operation          A Python expression defining the operation.
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -s, --seed SEED    Optional seed value for the state variable S. The
                     default is zero. This can be an expression involving
                     the variable N.
  -p, --prologue E   Optional expression with which the state variable S
                     is updated before a block is operated on.
  -e, --epilogue E   Optional expression with which the state variable S
                     is updated after a block was operated on.
  -I, --inc          equivalent to --epilogue=S+1
  -D, --dec          equivalent to --epilogue=S-1
  -X, --cbc          equivalent to --epilogue=(B)
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.
  -P, --precision N  The size of the variables used for computing the
                     result. By default, this is equal to the block size.
                     The value may be zero, indicating that arbitrary
                     precision is required.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class blockop(ArithmeticUnit):
    """
    This unit allows you to specify a custom Python expression where the following variables are allowed.

    - the variable `A`: same as `V[0]`
    - the variable `B`: current block
    - the variable `N`: number of bytes in the input
    - the variable `I`: current index in the input
    - the variable `S`: an optional seed value for an internal state
    - the variable `V`: the vector of arguments

    Each block of the input is replaced by the value of this expression. Additionally, it is possible to
    specify prologue and epilogue expressions which are used to update the state variable `S` before and
    after the update of each block, respectively.
    """

    @staticmethod
    def _parse_op(definition):
        """
        An argparse type which uses the `refinery.lib.argformats.PythonExpression` parser to
        parse the expressions that can be passed to `refinery.blockop`. Essentially, these
        are Python expressions which can contain variables `B`, `A`, `S`, and `V`.
        """
        def wrapper(J, B, S, N, *V):
            return wrapper.parsed(I=J, B=B, A=V[0], N=N, S=S, V=V) if V else wrapper.parsed(I=J, B=B, S=S)
        wrapper.parsed = PythonExpression(definition, *'IBASNV')
        return wrapper

    def __init__(
        self, operation: arg(type=str, help='A Python expression defining the operation.'), *argument,
        seed: arg('-s', type=str, help=(
            'Optional seed value for the state variable S. The default is zero. This can be an expression '
            'involving the variable N.')) = 0,
        prologue: arg('-p', type=str, metavar='E', help=(
            'Optional expression with which the state variable S is updated before a block is operated on.')) = None,
        epilogue: arg('-e', type=str, metavar='E', group='EPI', help=(
            'Optional expression with which the state variable S is updated after a block was operated on.')) = None,
        inc: arg('-I', group='EPI', help='equivalent to --epilogue=S+1') = False,
        dec: arg('-D', group='EPI', help='equivalent to --epilogue=S-1') = False,
        cbc: arg('-X', group='EPI', help='equivalent to --epilogue=(B)') = False,
        bigendian=False, blocksize=1, precision=None
    ):
        for flag, flag_is_set, expression in [
            ('--cbc', cbc, '(B)'),
            ('--inc', inc, 'S+1'),
            ('--dec', dec, 'S-1'),
        ]:
            if flag_is_set:
                if epilogue is not None:
                    raise ValueError(
                        F'Ambiguous specification; epilogue was already set to {epilogue} '
                        F'when {flag} was parsed.'
                    )
                epilogue = expression

        self._index = IndexCounter()

        super().__init__(
            self._index,
            *argument,
            bigendian=bigendian,
            blocksize=blocksize,
            precision=precision,
            operation=self._parse_op(operation),
            seed=seed,
            prologue=prologue and self._parse_op(prologue),
            epilogue=epilogue and self._parse_op(epilogue),
        )

    @property
    def _is_ecb(self):
        return not self.args.epilogue and not self.args.prologue

    def process_ecb_fast(self, data):
        if not self._is_ecb:
            raise NoNumpy
        return super().process_ecb_fast(data)

    def process(self, data):
        seed = self.args.seed
        if isinstance(seed, str):
            seed = PythonExpression(seed, 'N', constants=metavars(data))
        self._index.init(self.fmask)
        self._total = len(data)
        self._state = seed
        if callable(self._state):
            self._state = self._state(N=self._total)
        return super().process(data)

    def operate(self, block, index, *args):
        if self.args.prologue:
            self._state = self.args.prologue(index, block, self._state, self._total, *args)
        block = self.args.operation(index, block, self._state, self._total, *args) & self.fmask
        if self.args.epilogue:
            self._state = self.args.epilogue(index, block, self._state, self._total, *args)
        return block

    def inplace(self, block, *args) -> None:
        super().inplace(block, *args)

Ancestors

Inherited members

class map (index, image, blocksize=1)

This unit is implemented in refinery.units.blockwise.map and has the following commandline Interface:

usage: map [-h] [-L] [-Q] [-0] [-v] [-B N] index image

Each block of the input data which occurs as a block of the index argument
is replaced by the corresponding block of the image argument.

positional arguments:
  index              index characters
  image              image characters

optional arguments:
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class map(BlockTransformation):
    """
    Each block of the input data which occurs as a block of the index argument
    is replaced by the corresponding block of the image argument.
    """
    def __init__(
        self,
        index: arg(help='index characters'),
        image: arg(help='image characters'),
        blocksize=1
    ):
        super().__init__(blocksize=blocksize, index=index, image=image)

    def process(self, data):
        self._map = dict(zip(
            self.chunk(self.args.index),
            self.chunk(self.args.image)))
        return super().process(data)

    def process_block(self, token):
        return self._map.get(token, token)

Ancestors

Inherited members

class neg (bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.neg and has the following commandline Interface:

usage: neg [-h] [-L] [-Q] [-0] [-v] [-E] [-B N]

Each block of the input data is negated bitwise. This is sometimes also
called the bitwise complement or inverse.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class neg(UnaryOperation):
    """
    Each block of the input data is negated bitwise. This is sometimes
    also called the bitwise complement or inverse.
    """
    def operate(self, a): return ~a
    def inplace(self, a): a ^= self.fmask

Ancestors

Inherited members

class pack (base=0, prefix=False, strict=False, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.pack and has the following commandline Interface:

usage: pack [-h] [-L] [-Q] [-0] [-v] [-R] [-r] [-s] [-E] [-B N] [base]

Scans the input data for numeric constants and packs them into a binary
format. This is useful to convert the textual representation of an array
of numbers into its binary form. For example, 123,34,256,12,1,234 would be
transformed into the byte sequence 7B22000C01EA, where 256 was wrapped and
packed as a null byte because the default block size is one byte. If the
above sequence would be packed with options -EB2, the result would be
equal to 007B00220100000C000100EA in hexadecimal.

positional arguments:
  base               Find only numbers in given base. Default of 0 means
                     that common expressions for hexadecimal, octal and
                     binary are accepted.

optional arguments:
  -r, --prefix       Add numeric prefixes like 0x, 0b, and 0o in reverse
                     mode.
  -s, --strict       Only parse integers that fit in one block of the
                     given block size.
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class pack(BlockTransformationBase):
    """
    Scans the input data for numeric constants and packs them into a binary
    format. This is useful to convert the textual representation of an array of
    numbers into its binary form. For example, `123,34,256,12,1,234` would be
    transformed into the byte sequence `7B22000C01EA`, where `256` was wrapped
    and packed as a null byte because the default block size is one byte. If
    the above sequence would be packed with options -EB2, the result would be
    equal to `007B00220100000C000100EA` in hexadecimal.
    """

    def __init__(self,
        base: arg(type=number[2:36], help=(
            'Find only numbers in given base. Default of 0 means that '
            'common expressions for hexadecimal, octal and binary are '
            'accepted.')) = 0,
        prefix  : arg.switch('-r', help='Add numeric prefixes like 0x, 0b, and 0o in reverse mode.') = False,
        strict  : arg.switch('-s', help='Only parse integers that fit in one block of the given block size.') = False,
        bigendian=False, blocksize=1
    ):
        super().__init__(
            base=base,
            prefix=prefix,
            strict=strict,
            bigendian=bigendian,
            blocksize=blocksize
        )

    @property
    def bytestream(self):
        # never alow bytes to be left unchunked
        return False

    def reverse(self, data):
        base = self.args.base or 10
        prefix = B''

        self.log_debug(F'using base {base:d}')

        if self.args.prefix:
            prefix = {
                0x02: b'0b',
                0x08: b'0o',
                0x10: b'0x'
            }.get(base, B'')

        converter = base_unit(base, not self.args.bigendian)

        for n in self.chunk(data, raw=True):
            yield prefix + converter.reverse(n)

    def process(self, data):
        def intb(integers):
            for n in integers:
                if self.args.base == 0 and n.startswith(B'0') and n[1:].isdigit():
                    n = B'0o' + n
                N = int(n, self.args.base)
                M = N & self.fmask
                self.log_debug(lambda: F'0x{M:0{self.fbits // 4}X}')
                if self.args.strict and M != N:
                    continue
                yield M

        if self.args.base == 0:
            pattern = formats.integer
        elif self.args.base <= 10:
            pattern = re.compile(B'[-+]?[0-%d]{1,64}' % (self.args.base - 1))
        else:
            pattern = re.compile(B'[-+]?[0-9a-%c]{1,20}' % (0x57 + self.args.base), re.IGNORECASE)

        return self.unchunk(intb(pattern.findall(data)))

Ancestors

Inherited members

class rev (blocksize=1)

This unit is implemented in refinery.units.blockwise.rev and has the following commandline Interface:

usage: rev [-h] [-L] [-Q] [-0] [-v] [-B N]

The blocks of the input data are output in reverse order. If the length of
the input data is not a multiple of the block size, the data is truncated.

optional arguments:
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class rev(BlockTransformationBase):
    """
    The blocks of the input data are output in reverse order. If the length of
    the input data is not a multiple of the block size, the data is truncated.
    """
    def __init__(self, blocksize=1):
        super().__init__(blocksize=blocksize)

    def process(self, data):
        if self.bytestream:
            return data[::-1]
        else:
            rv = list(self.chunk(data, raw=True))[::-1]
            return self.rest(data) + self.unchunk(rv, raw=True)

Ancestors

Inherited members

class rotl (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.rotl and has the following commandline Interface:

usage: rotl [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Rotate the bits of each block left.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class rotl(BinaryOperation):
    """
    Rotate the bits of each block left.
    """
    def operate(self, value, shift):
        shift %= self.fbits
        return (value << shift) | (value >> (self.fbits - shift))

    def inplace(self, value, shift):
        shift %= self.fbits
        lower = value >> (self.fbits - shift)
        value <<= shift
        value |= lower

Ancestors

Inherited members

class rotr (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.rotr and has the following commandline Interface:

usage: rotr [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Rotate the bits of each block right.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class rotr(BinaryOperation):
    """
    Rotate the bits of each block right.
    """
    def operate(self, value, shift):
        shift %= self.fbits
        return (value >> shift) | (value << (self.fbits - shift))

    def inplace(self, value, shift):
        shift %= self.fbits
        lower = value >> shift
        value <<= self.fbits - shift
        value |= lower

Ancestors

Inherited members

class shl (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.shl and has the following commandline Interface:

usage: shl [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Shift the bits of each block left, filling with zero bits.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class shl(BinaryOperation):
    """
    Shift the bits of each block left, filling with zero bits.
    """
    @staticmethod
    def operate(a, b): return a << b
    @staticmethod
    def inplace(a, b): a <<= b

Ancestors

Inherited members

class shr (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.shr and has the following commandline Interface:

usage: shr [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Shift the bits of each block right, filling with zero bits.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class shr(BinaryOperation):
    """
    Shift the bits of each block right, filling with zero bits.
    """
    @staticmethod
    def operate(a, b): return a >> b
    @staticmethod
    def inplace(a, b): a >>= b

Ancestors

Inherited members

class sub (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.sub and has the following commandline Interface:

usage: sub [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Subtract the given argument from each block.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class sub(BinaryOperation):
    """
    Subtract the given argument from each block.
    """
    @staticmethod
    def operate(a, b): return a - b
    @staticmethod
    def inplace(a, b): a -= b

Ancestors

Inherited members

class terminate (sentinel=0, blocksize=1, bigendian=False)

This unit is implemented in refinery.units.blockwise.terminate and has the following commandline Interface:

usage: terminate [-h] [-L] [-Q] [-0] [-v] [-B N] [-E] [sentinel]

The unit reads data from the incoming chunk in blocks of any given size
until the sentinel value is encountered. The output of the unit is all
data that was read, excluding the sentinel. The default block size is one
and the default sentinel value is zero, which corresponds to reading a
null-terminated string from the input. If the sentinel value is not found
anywhere in the incoming data, the complete input is returned as output.

positional arguments:
  sentinel           sentinel value to look for; default is 0

optional arguments:
  -B, --blocksize N  The size of each block in bytes, default is 1.
  -E, --bigendian    Read chunks in big endian.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class terminate(BlockTransformationBase):
    """
    The unit reads data from the incoming chunk in blocks of any given size until the
    sentinel value is encountered. The output of the unit is all data that was read,
    excluding the sentinel. The default block size is one and the default sentinel value
    is zero, which corresponds to reading a null-terminated string from the input.
    If the sentinel value is not found anywhere in the incoming data, the complete input
    is returned as output.
    """
    def __init__(
        self,
        sentinel: arg(type=numseq, help='sentinel value to look for; default is {default}') = 0,
        blocksize=1,
        bigendian=False
    ):
        if not isinstance(sentinel, int):
            sentinel = next(chunks.unpack(sentinel, blocksize, bigendian))
        super().__init__(blocksize=blocksize, bigendian=bigendian, sentinel=sentinel)

    def process(self, data: bytearray):
        sentinel: int = self.args.sentinel

        self.log_debug(F'using sentinel value: 0x{sentinel:0{self.args.blocksize*2}X}')

        if self.bytestream:
            pos = data.find(sentinel)
            if pos < 0:
                self.log_info(F'the sentinel value {sentinel} was not found')
            else:
                data[pos:] = []
            return data

        def seek(it):
            for chunk in it:
                if chunk == sentinel:
                    break
                yield chunk

        return self.unchunk(seek(self.chunk(data)))

Ancestors

Inherited members

class transpose (padding=b'', bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.transpose and has the following commandline Interface:

usage: transpose [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] [padding]

Interprets the sequence of blocks as rows of a matrix and returns the
blocks that correspond to the columns of this matrix.

positional arguments:
  padding            Optional byte sequence to use as padding for tail
                     end.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class transpose(BlockTransformationBase):
    """
    Interprets the sequence of blocks as rows of a matrix and returns the
    blocks that correspond to the columns of this matrix.
    """
    def __init__(
        self, padding: arg(help='Optional byte sequence to use as padding for tail end.') = B'',
        bigendian=False, blocksize=1
    ):
        super().__init__(bigendian=bigendian, blocksize=blocksize, padding=padding)

    def process(self, data):
        rest = self.rest(data)
        data = list(self.chunk(data, raw=True))

        if self.args.padding:
            while len(rest) < self.args.blocksize:
                rest += self.args.padding
            rest = rest[:self.args.blocksize]
            data.append(rest)
            rest = B''

        return self.unchunk((
            bytes(data[j][i] for j in range(len(data)))
            for i in range(self.args.blocksize)), raw=True)

Ancestors

Inherited members

class xor (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.xor and has the following commandline Interface:

usage: xor [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Form the exclusive or of the input data with the given argument.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class xor(BinaryOperation):
    """
    Form the exclusive or of the input data with the given argument.
    """
    def process_ecb_fast(self, data):
        try:
            return super().process_ecb_fast(data)
        except NoNumpy as E:
            try:
                from Crypto.Util.strxor import strxor
            except ModuleNotFoundError:
                raise E
            else:
                from itertools import islice, cycle
                take = len(data) // self.args.blocksize + 1
                argb = self.unchunk(islice(cycle(x & self.fmask for x in self.args.argument[0]), take))
                return strxor(data, argb[:len(data)])

    @staticmethod
    def operate(a, b): return a ^ b
    @staticmethod
    def inplace(a, b): a ^= b

Ancestors

Inherited members

class aplib

This unit is implemented in refinery.units.compression.ap and has the following commandline Interface:

usage: aplib [-h] [-L] [-Q] [-0] [-v] [-R]

APLib compression and decompression.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class aplib(Unit):
    """
    APLib compression and decompression.
    """

    def reverse(self, buf):
        return compressor(buf).compress()

    def process(self, buf):
        return decompressor(buf).decompress()

Ancestors

Inherited members

class blz

This unit is implemented in refinery.units.compression.blz and has the following commandline Interface:

usage: blz [-h] [-L] [-Q] [-0] [-v] [-R]

BriefLZ compression and decompression. The compression algorithm uses a
pure Python suffix tree implementation: It requires a lot of time &
memory.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class blz(Unit):
    """
    BriefLZ compression and decompression. The compression algorithm uses a pure Python suffix tree
    implementation: It requires a lot of time & memory.
    """
    def _begin(self, data):
        self._src = StructReader(memoryview(data))
        self._dst = MemoryFile(bytearray())
        return self

    def _reset(self):
        self._src.seek(0)
        self._dst.seek(0)
        self._dst.truncate()
        return self

    def _decompress(self):
        (
            signature,
            version,
            src_count,
            src_crc32,
            dst_count,
            dst_crc32,
        ) = self._src.read_struct('>6L')
        if signature != 0x626C7A1A:
            raise ValueError(F'Invalid BriefLZ signature: {signature:08X}, should be 626C7A1A.')
        if version > 10:
            raise ValueError(F'Invalid version number {version}, should be less than 10.')
        self.log_debug(F'signature: 0x{signature:08X} V{version}')
        self.log_debug(F'src count: 0x{src_count:08X}')
        self.log_debug(F'src crc32: 0x{src_crc32:08X}')
        self.log_debug(F'dst count: 0x{dst_count:08X}')
        self.log_debug(F'dst crc32: 0x{dst_crc32:08X}')
        src = self._src.getbuffer()
        src = src[24:24 + src_count]
        if len(src) < src_count:
            self.log_warn(F'Only {len(src)} bytes in buffer, but header annoucned a length of {src_count}.')
        if src_crc32:
            check = zlib.crc32(src)
            if check != src_crc32:
                self.log_warn(F'Invalid source data CRC {check:08X}, should be {src_crc32:08X}.')
        dst = self._decompress_chunk(dst_count)
        if not dst_crc32:
            return dst
        check = zlib.crc32(dst)
        if check != dst_crc32:
            self.log_warn(F'Invalid result data CRC {check:08X}, should be {dst_crc32:08X}.')
        return dst

    def _decompress_modded(self):
        self._src.seekrel(8)
        total_size = self._src.u64()
        chunk_size = self._src.u64()
        remaining = total_size
        self.log_debug(F'total size: 0x{total_size:016X}')
        self.log_debug(F'chunk size: 0x{chunk_size:016X}')
        while remaining > chunk_size:
            self._decompress_chunk(chunk_size)
            remaining -= chunk_size
        return self._decompress_chunk(remaining)

    def _decompress_chunk(self, size=None):
        bitcount = 0
        bitstore = 0
        decompressed = 1

        def readbit():
            nonlocal bitcount, bitstore
            if not bitcount:
                bitstore = int.from_bytes(self._src.read(2), 'little')
                bitcount = 0xF
            else:
                bitcount = bitcount - 1
            return (bitstore >> bitcount) & 1

        def readint():
            result = 2 + readbit()
            while readbit():
                result <<= 1
                result += readbit()
            return result

        self._dst.write(self._src.read(1))

        try:
            while not size or decompressed < size:
                if readbit():
                    length = readint() + 2
                    sector = readint() - 2
                    offset = self._src.read(1)[0] + 1
                    delta = offset + 0x100 * sector
                    available = self._dst.tell()
                    if delta not in range(available + 1):
                        raise RefineryPartialResult(
                            F'Requested rewind by 0x{delta:08X} bytes with only 0x{available:08X} bytes in output buffer.',
                            partial=self._dst.getvalue())
                    quotient, remainder = divmod(length, delta)
                    replay = memoryview(self._dst.getbuffer())
                    replay = bytes(replay[-delta:] if quotient else replay[-delta:length - delta])
                    replay = quotient * replay + replay[:remainder]
                    self._dst.write(replay)
                    decompressed += length
                else:
                    self._dst.write(self._src.read(1))
                    decompressed += 1
        except EOF as E:
            raise RefineryPartialResult(str(E), partial=self._dst.getbuffer())
        dst = self._dst.getbuffer()
        if decompressed < size:
            raise RefineryPartialResult(
                F'Attempted to decompress {size} bytes, got only {len(dst)}.', dst)
        if decompressed > size:
            raise RuntimeError('Decompressed buffer contained more bytes than expected.')
        return dst

    def _compress(self):
        from ...lib.suffixtree import SuffixTree

        try:
            self.log_info('computing suffix tree')
            tree = SuffixTree(self._src.getbuffer())
        except Exception:
            raise

        bitstore = 0  # The bit stream to be written
        bitcount = 0  # The number of bits in the bit stream
        buffer = MemoryFile(bytearray())

        # Write empty header and first byte of source
        self._dst.write(bytearray(24))
        self._dst.write(self._src.read(1))

        def writeint(n: int) -> None:
            """
            Write an integer to the bit stream.
            """
            nonlocal bitstore, bitcount
            nbits = n.bit_length()
            if nbits < 2:
                raise ValueError
            # The highest bit is implicitly assumed:
            n ^= 1 << (nbits - 1)
            remaining = nbits - 2
            while remaining:
                remaining -= 1
                bitstore <<= 2
                bitcount += 2
                bitstore |= ((n >> remaining) & 3) | 1
            bitstore <<= 2
            bitcount += 2
            bitstore |= (n & 1) << 1

        src = self._src.getbuffer()
        remaining = len(src) - 1
        self.log_info('compressing data')

        while True:
            cursor = len(src) - remaining
            rest = src[cursor:]
            if bitcount >= 0x10:
                block_count, bitcount = divmod(bitcount, 0x10)
                info_channel = bitstore >> bitcount
                bitstore = info_channel << bitcount ^ bitstore
                # The decompressor will read bits from top to bottom, and each 16 bit block has to be
                # little-endian encoded. The bit stream is encoded top to bottom bit in the bitstore
                # variable, and by encoding it as a big endian integer, the stream is in the correct
                # order. However, we need to swap adjacent bytes to achieve little endian encoding for
                # each of the blocks:
                info_channel = bytearray(info_channel.to_bytes(block_count * 2, 'big'))
                for k in range(block_count):
                    k0 = 2 * k + 0
                    k1 = 2 * k + 1
                    info_channel[k0], info_channel[k1] = info_channel[k1], info_channel[k0]
                info_channel = memoryview(info_channel)
                data_channel = memoryview(buffer.getbuffer())
                self._dst.write(info_channel[:2])
                self._dst.write(data_channel[:-1])
                self._dst.write(info_channel[2:])
                data_channel = bytes(data_channel[-1:])
                buffer.truncate(0)
                store = buffer if bitcount else self._dst
                store.write(data_channel)
            if remaining + bitcount < 0x10:
                buffer = buffer.getbuffer()
                if rest or buffer:
                    bitstore <<= 0x10 - bitcount
                    self._dst.write(bitstore.to_bytes(2, 'little'))
                    self._dst.write(buffer)
                    self._dst.write(rest)
                elif bitcount:
                    raise RuntimeError('Bitbuffer Overflow')
                break
            node = tree.root
            length = 0
            offset = 0
            sector = None
            while node.children and length < len(rest):
                for child in node.children.values():
                    if tree.data[child.start] == rest[length]:
                        node = child
                        break
                if node.start >= cursor:
                    break
                offset = node.start - length
                length = node.end + 1 - offset
            length = min(remaining, length)
            if length >= 4:
                sector, offset = divmod(cursor - offset - 1, 0x100)
            bitcount += 1
            bitstore <<= 1
            if sector is None:
                buffer.write(rest[:1])
                remaining -= 1
                continue
            bitstore |= 1
            buffer.write(bytes((offset,)))
            writeint(length - 2)
            writeint(sector + 2)
            remaining -= length

        self._dst.seek(24)
        dst = self._dst.peek()
        self._dst.seek(0)
        self._dst.write(struct.pack('>6L', 0x626C7A1A, 1, len(dst), zlib.crc32(dst), len(src), zlib.crc32(src)))
        return self._dst.getbuffer()

    def process(self, data):
        self._begin(data)
        partial = None
        try:
            return self._decompress()
        except ValueError as error:
            if isinstance(error, RefineryPartialResult):
                partial = error
            self.log_warn(F'Reverting to modified BriefLZ after decompression error: {error!s}')
            self._reset()

        try:
            return self._decompress_modded()
        except RefineryPartialResult:
            raise
        except Exception as error:
            if not partial:
                raise
            raise partial from error

    def reverse(self, data):
        return self._begin(data)._compress()

Ancestors

Inherited members

class bz2 (level=9)

This unit is implemented in refinery.units.compression.bz2 and has the following commandline Interface:

usage: bz2 [-h] [-L] [-Q] [-0] [-v] [-R] [-l LEVEL]

BZip2 compression and decompression.

optional arguments:
  -l, --level LEVEL  compression level preset between 1 and 9

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class bz2(Unit):
    """
    BZip2 compression and decompression.
    """
    def __init__(self, level: arg('-l', type=number[1:9], help='compression level preset between 1 and 9') = 9):
        super().__init__(level=level)

    def process(self, data):
        return bz2_.decompress(data)

    def reverse(self, data):
        return bz2_.compress(data, self.args.level)

Ancestors

Inherited members

class decompress (prepend=True, tolerance=12, min_ratio=1)

This unit is implemented in refinery.units.compression.decompress and has the following commandline Interface:

usage: decompress [-h] [-L] [-Q] [-0] [-v] [-P] [-t N] [-r R]

Attempts all available decompression units against the input and returns
the output of the first successful one. If none succeeds, the data is
returned unaltered. The process is heavily biased against LZNT1
decompression due to a large tendency for LZNT1 false positives.

optional arguments:
  -P, --no-prepend   By default, if decompression fails, the unit attempts
                     to prefix the data with all possible values of a
                     single byte and decompress the result. This behavior
                     can be disabled with this flag.
  -t, --tolerance N  Maximum number of bytes to strip from the beginning
                     of the data; The default value is 12.
  -r, --min-ratio R  To determine whether a decompression algorithm was
                     successful, the ratio of compressed size to
                     decompressed size is required to be at least this
                     number, a floating point value R; default value is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class decompress(Unit):
    """
    Attempts all available decompression units against the input and returns
    the output of the first successful one. If none succeeds, the data is
    returned unaltered. The process is heavily biased against LZNT1 decompression
    due to a large tendency for LZNT1 false positives.
    """
    def __init__(
        self,
        prepend: arg.switch('-P', '--no-prepend', off=True, help=(
            'By default, if decompression fails, the unit attempts to prefix '
            'the data with all possible values of a single byte and decompress '
            'the result. This behavior can be disabled with this flag.')
        ) = True,
        tolerance: arg.number('-t', help=(
            'Maximum number of bytes to strip from the beginning of the data; '
            'The default value is 12.')
        ) = 12,
        min_ratio: arg('-r', metavar='R', help=(
            'To determine whether a decompression algorithm was successful, the '
            'ratio of compressed size to decompressed size is required to be at '
            'least this number, a floating point value R; default value is 1.')
        ) = 1,
    ):
        if min_ratio <= 0:
            raise ValueError('The compression factor must be nonnegative.')
        super().__init__(tolerance=tolerance, prepend=prepend, min_ratio=min_ratio)
        self.engines = [
            engine() for engine in [zl, lzma, aplib, bz2, blz, lz4, lznt1]
        ]

    def process(self, data):
        best = None
        current_ratio = 1

        class result:
            unit = self

            def __init__(self, engine, cutoff=0, prefix=None):
                feed = data

                self.engine = engine
                self.prefix = prefix
                self.cutoff = cutoff

                if cutoff:
                    feed = data[cutoff:]
                if prefix is not None:
                    feed = prefix + data

                try:
                    self.result = engine.process(feed)
                except RefineryPartialResult as pr:
                    self.result = pr.partial
                except Exception:
                    self.result = B''

                if not self.result:
                    self.ratio = INF
                else:
                    self.ratio = len(data) / len(self.result)

            @property
            def unmodified(self):
                return not self.prefix and not self.cutoff

            def schedule(self):
                nonlocal best, current_ratio
                if self.ratio >= self.unit.args.min_ratio:
                    return
                prefix = hex(self.prefix[0]) if self.prefix else None
                r = 1 if self.unmodified and best and not best.unmodified else 0.9
                if self.engine.__class__ is lznt1:
                    r /= 2
                if not best or self.ratio / current_ratio < r:
                    self.unit.log_info(lambda: (
                        F'obtained {self.ratio:.2f} compression ratio with: prefix={prefix}, '
                        F'cutoff={self.cutoff}, engine={self.engine.name}'))
                    best = self
                    current_ratio = self.ratio

        for engine in self.engines:
            for t in range(self.args.tolerance):
                result(engine, t).schedule()
            if self.args.prepend:
                for p in range(0x100):
                    result(engine, 0, bytes((p,))).schedule()

        if best is None:
            self.log_info('nothing worked, returning original data.')
            return data

        return best.result

Ancestors

Inherited members

class lzma (filter=None, raw=False, alone=False, xz=False, level=9, delta=None)

This unit is implemented in refinery.units.compression.lz and has the following commandline Interface:

usage: lzma [-h] [-L] [-Q] [-0] [-v] [-R] [-r | -a | -x] [-l N] [-d N]
            [FILTER]

LZMA compression and decompression.

positional arguments:
  FILTER         Specifies a bcj filter to be applied. Possible values
                 are: ARM, ARMTHUMB, IA64, LZMA1, LZMA2, POWERPC, SPARC,
                 X86

optional arguments:
  -r, --raw      Use raw (no container) format.
  -a, --alone    Use the lzma container format.
  -x, --xz       Use the default xz format.
  -l, --level N  The compression level preset; between 0 and 9.
  -d, --delta N  Add a delta filter when compressing.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class lzma(Unit):
    """
    LZMA compression and decompression.
    """
    _LZMA_FILTER = extract_options(lzma_, 'FILTER_', 'DELTA')
    _LZMA_PARSER = OptionFactory(_LZMA_FILTER)

    def __init__(
        self, filter: arg.choice(choices=list(_LZMA_FILTER), metavar='FILTER', help=(
            'Specifies a bcj filter to be applied. Possible values are: {choices}')) = None,
        raw   : arg.switch('-r', group='MODE', help='Use raw (no container) format.') = False,
        alone : arg.switch('-a', group='MODE', help='Use the lzma container format.') = False,
        xz    : arg.switch('-x', group='MODE', help='Use the default xz format.') = False,
        level : arg.number('-l', bound=(0, 9), help='The compression level preset; between 0 and 9.') = 9,
        delta : arg.number('-d', help='Add a delta filter when compressing.') = None,
    ):
        filter = filter and self._LZMA_PARSER(filter)
        if (raw, alone, xz).count(True) > 1:
            raise ValueError('Only one container format can be enabled.')
        if level not in range(10):
            raise ValueError('Compression level must be a number between 0 and 9.')
        super().__init__(filter=filter, raw=raw, alone=alone, xz=xz, delta=delta,
            level=level | lzma_.PRESET_EXTREME)

    def _get_lz_mode_and_filters(self, reverse=False):
        mode = lzma_.FORMAT_AUTO
        filters = []
        if self.args.filter is not None:
            filters.append({'id': self.args.filter.value})
        if self.args.delta is not None:
            self.log_debug('adding delta filter')
            filters.append({
                'id': lzma_.FILTER_DELTA,
                'dist': self.args.delta
            })
        if self.args.alone:
            self.log_debug('setting alone format')
            mode = lzma_.FORMAT_ALONE
            filters.append({
                'id': lzma_.FILTER_LZMA1,
                'preset': self.args.level
            })
        elif self.args.raw:
            self.log_debug('setting raw format')
            mode = lzma_.FORMAT_RAW
            filters.append({
                'id': lzma_.FILTER_LZMA2,
                'preset': self.args.level
            })
        elif self.args.xz or reverse:
            if reverse and not self.log_debug('setting xz container format'):
                self.log_info('choosing default .xz container format for compression.')
            mode = lzma_.FORMAT_XZ
            filters.append({
                'id': lzma_.FILTER_LZMA2,
                'preset': self.args.level
            })
        return mode, filters

    def reverse(self, data):
        mode, filters = self._get_lz_mode_and_filters(True)
        lz = lzma_.LZMACompressor(mode, filters=filters)
        output = lz.compress(data)
        output += lz.flush()
        return output

    def process(self, data):
        keywords = {}
        mode, filters = self._get_lz_mode_and_filters(False)
        if self.args.raw:
            keywords['filters'] = filters
        lz = lzma_.LZMADecompressor(mode, **keywords)
        with MemoryFile() as output:
            pos, size = 0, 4096
            with MemoryFile(data) as stream:
                while not stream.eof and not stream.closed:
                    pos = stream.tell()
                    try:
                        chunk = lz.decompress(stream.read(size))
                    except (EOFError, lzma_.LZMAError) as error:
                        if size > 1:
                            lz = lzma_.LZMADecompressor(mode, **keywords)
                            stream.seek(0)
                            output.seek(0)
                            if pos > 0:
                                output.write(lz.decompress(stream.read(pos)))
                            msg = error.args[0] if len(error.args) == 1 else error.__class__.__name__
                            self.log_debug(F'decompression error, reverting to one byte at a time: {msg}')
                            size = 1
                        else:
                            remaining = len(stream.getbuffer()) - pos
                            raise RefineryPartialResult(F'compression failed with {remaining} bytes remaining', output.getvalue())
                    else:
                        output.write(chunk)
            return output.getvalue()

Ancestors

Inherited members

class lz4

This unit is implemented in refinery.units.compression.lz4 and has the following commandline Interface:

usage: lz4 [-h] [-L] [-Q] [-0] [-v]

LZ4 block decompression. See also:
https://github.com/lz4/lz4/blob/master/doc/lz4_Block_format.md#compressed-
block-format

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class lz4(Unit):
    """
    LZ4 block decompression. See also:
    https://github.com/lz4/lz4/blob/master/doc/lz4_Block_format.md#compressed-block-format
    """
    def _read_block(self, reader, output, ubound=None):
        entry = reader.tell()
        lastend = 0

        def ubound_check():
            if ubound is None:
                return False
            consumed = reader.tell() - entry
            if consumed > ubound:
                raise ValueError(F'upper bound {ubound} exceeded by {consumed-ubound} in LZ4 block')
            return consumed == ubound

        while not reader.eof:
            reflen = reader.read_nibble()
            litlen = reader.read_nibble()
            litlen = reader.read_size(litlen)
            literal = reader.read(litlen)
            output.write(literal)
            if ubound_check(): break
            try: refpos = reader.u16()
            except EOF: break
            if refpos - 1 not in range(output.tell()):
                with StreamDetour(output, lastend):
                    if output.read(len(literal)) == literal:
                        # This literal could have been encoded in the last match, but it wasn't.
                        # Therefore, it is very likely that we have reached the end of the stream.
                        break
                position = reader.tell()
                remaining = len(literal) - position
                raise RefineryPartialResult(
                    F'encountered invalid match offset value {refpos} at position {position} with {remaining} bytes remaining',
                    partial=output.getvalue())
            reflen = reader.read_size(reflen)
            if ubound_check():
                raise ValueError('last sequence in block contained a match')
            reflen += 4
            available_bytes = min(refpos, reflen)
            q, r = divmod(reflen, available_bytes)
            with StreamDetour(output, -refpos, io.SEEK_CUR):
                match = output.read(available_bytes)
                match = q * match + match[:r]
                assert len(match) == reflen
                lastend = output.tell() - available_bytes + r
            output.write(match)

    def process(self, data):
        output = io.BytesIO()
        reader = LZ4Reader(memoryview(data))
        try:
            magic = reader.u32() == 0x184D2204
        except EOF:
            magic = False
        if not magic:
            reader.seek(0)
            self._read_block(reader, output)
            return output.getbuffer()

        (dict_id, rsrv1, content_checksummed, content_size,
            blocks_checksummed, blocks_independent, v2, v1) = reader.read_bits(8)
        rsrv2 = reader.read_nibble()
        try:
            block_maximum = {
                7: 0x400000,
                6: 0x100000,
                5: 0x040000,
                4: 0x010000,
            }[reader.read_integer(3)]
        except KeyError:
            raise ValueError('unknown maximum block size value in LZ4 frame header')
        rsrv3 = reader.read_bit()
        if any((rsrv1, rsrv2, rsrv3)):
            self.log_warn('nonzero reserved value in LZ4 frame header')
        if (v1, v2) != (0, 1):
            self.log_warn(F'invalid version ({v1},{v2}) in LZ4 frame header')
        content_size = content_size and reader.u64() or None
        dict_id = dict_id and reader.u32() or None
        # Header Checksum
        xxh = xxhash(data[4:reader.tell()]).intdigest() >> 8 & 0xFF
        chk = reader.read_byte()
        if chk != xxh:
            self.log_warn(F'header checksum {chk:02X} does not match computed value {xxh:02X}')

        self.log_debug(lambda: F'dictionary id: {dict_id}')
        self.log_debug(lambda: F'block max: 0x{block_maximum:X}')
        if content_size is not None:
            self.log_debug(lambda: F'chunk max: 0x{content_size:X}')
        self.log_debug(lambda: F'blocks independent: {bool(blocks_independent)}')
        self.log_debug(lambda: F'blocks checksummed: {bool(blocks_checksummed)}')

        blockindex = 0

        while True:
            blockindex += 1
            size = reader.read_integer(31)
            uncompressed = reader.read_bit()
            if not size:
                assert not uncompressed
                break
            self.log_info(F'reading block of size 0x{size:06X}')
            assert reader.byte_aligned
            assert size <= block_maximum, 'block size exceeds maximum size'
            if uncompressed:
                output.write(reader.read(size))
            else:
                self._read_block(reader, output, size)
            if blocks_checksummed:
                with StreamDetour(reader, -size, io.SEEK_CUR):
                    xxh = xxhash(reader.read(size)).intdigest()
                chk = reader.u32()
                if chk != xxh:
                    self.log_warn(F'block {blockindex} had checksum {chk:08X} which did not match computed value {xxh:08X}')
        if content_checksummed:
            self.log_info('computing checksum')
            xxh = xxhash(output.getbuffer()).intdigest()
            chk = reader.u32()
            if chk != xxh:
                self.log_warn(F'the given checksum {chk:08X} did not match the computed checksum {xxh:08X}')
        if not reader.eof:
            pos = reader.tell()
            self.log_warn(F'found {len(data)-pos} additional bytes starting at position 0x{pos:X} after compressed data')
        return output.getbuffer()

Ancestors

Inherited members

class lznt1 (chunk_size=4096)

This unit is implemented in refinery.units.compression.lznt1 and has the following commandline Interface:

usage: lznt1 [-h] [-L] [-Q] [-0] [-v] [-R] [-c N]

LZNT1 compression and decompression. This compression algorithm is
expected by the Win32 API routine RtlDecompressBuffer, for example.

optional arguments:
  -c, --chunk-size N  Optionally specify the chunk size for compression,
                      default is 0x1000.

generic options:
  -h, --help          Show this help message and exit.
  -L, --lenient       Allow partial results as output.
  -Q, --quiet         Disables all log output.
  -0, --devnull       Do not produce any output.
  -v, --verbose       Specify up to two times to increase log level.
  -R, --reverse       Use the reverse operation.
Expand source code Browse git
class lznt1(Unit):
    """
    LZNT1 compression and decompression. This compression algorithm is expected
    by the Win32 API routine `RtlDecompressBuffer`, for example.
    """

    def _decompress_chunk(self, chunk):
        out = B''
        while chunk:
            flags = chunk[0]
            chunk = chunk[1:]
            for i in range(8):
                if not (flags >> i & 1):
                    out += chunk[:1]
                    chunk = chunk[1:]
                else:
                    flag = struct.unpack('<H', chunk[:2])[0]
                    pos = len(out) - 1
                    l_mask = 0xFFF
                    o_shift = 12
                    while pos >= 0x10:
                        l_mask >>= 1
                        o_shift -= 1
                        pos >>= 1
                    length = (flag & l_mask) + 3
                    offset = (flag >> o_shift) + 1
                    if length >= offset:
                        tmp = out[-offset:] * (0xFFF // len(out[-offset:]) + 1)
                        out += tmp[:length]
                    else:
                        out += out[-offset:length - offset]
                    chunk = chunk[2:]
                if len(chunk) == 0:
                    break
        return out

    def _find(self, src, target, max_len):
        result_offset = 0
        result_length = 0
        for i in range(1, max_len):
            offset = src.rfind(target[:i])
            if offset == -1:
                break
            tmp_offset = len(src) - offset
            tmp_length = i
            if tmp_offset == tmp_length:
                tmp = src[offset:] * (0xFFF // len(src[offset:]) + 1)
                for j in range(i, max_len + 1):
                    offset = tmp.rfind(target[:j])
                    if offset == -1:
                        break
                    tmp_length = j
            if tmp_length > result_length:
                result_offset = tmp_offset
                result_length = tmp_length
        if result_length < 3:
            return 0, 0
        return result_offset, result_length

    def _compress_chunk(self, chunk):
        blob = copy.copy(chunk)
        out = B''
        pow2 = 0x10
        l_mask3 = 0x1002
        o_shift = 12
        while len(blob) > 0:
            bits = 0
            tmp = B''
            for i in range(8):
                bits >>= 1
                while pow2 < (len(chunk) - len(blob)):
                    pow2 <<= 1
                    l_mask3 = (l_mask3 >> 1) + 1
                    o_shift -= 1
                if len(blob) < l_mask3:
                    max_len = len(blob)
                else:
                    max_len = l_mask3
                offset1, length1 = self._find(
                    chunk[:len(chunk) - len(blob)], blob, max_len)
                # try to find more compressed pattern
                offset2, length2 = self._find(
                    chunk[:len(chunk) - len(blob) + 1], blob[1:], max_len)
                if length1 < length2:
                    length1 = 0
                if length1 > 0:
                    symbol = ((offset1 - 1) << o_shift) | (length1 - 3)
                    tmp += struct.pack('<H', symbol)
                    bits |= 0x80  # set the highest bit
                    blob = blob[length1:]
                else:
                    tmp += blob[:1]
                    blob = blob[1:]
                if len(blob) == 0:
                    break
            out += struct.pack('B', bits >> (7 - i))
            out += tmp
        return out

    def reverse(self, buf):
        out = B''
        while buf:
            chunk = buf[:self.args.chunk_size]
            compressed = self._compress_chunk(chunk)
            if len(compressed) < len(chunk):  # chunk is compressed
                flags = 0xB000
                header = struct.pack('<H', flags | (len(compressed) - 1))
                out += header + compressed
            else:
                flags = 0x3000
                header = struct.pack('<H', flags | (len(chunk) - 1))
                out += header + chunk
            buf = buf[self.args.chunk_size:]
        return out

    def process(self, data):
        out = io.BytesIO()
        offset = 0
        while offset < len(data):
            try:
                header, = struct.unpack('<H', data[offset:offset + 2])
            except struct.error as err:
                raise RefineryPartialResult(str(err), partial=out.getvalue())
            offset += 2
            size = (header & 0xFFF) + 1
            if size + 1 >= len(data):
                raise RefineryPartialResult(
                    F'chunk header indicates size {size}, but only {len(data)} bytes remain.',
                    partial=out.getvalue()
                )
            chunk = data[offset:offset + size]
            offset += size
            if header & 0x8000:
                chunk = self._decompress_chunk(chunk)
            out.write(chunk)
        return out.getvalue()

    def __init__(self, chunk_size: arg.number('-c', help='Optionally specify the chunk size for compression, default is 0x1000.') = 0x1000):
        super().__init__(chunk_size=chunk_size)

Ancestors

Inherited members

class zl (level=9, window=15, force=False, zlib_header=False, gzip_header=False)

This unit is implemented in refinery.units.compression.zl and has the following commandline Interface:

usage: zl [-h] [-L] [-Q] [-0] [-v] [-R] [-l N] [-w N] [-f] [-z | -g]

ZLib compression and decompression.

optional arguments:
  -l, --level N      Specify a compression level between 0 and 9.
  -w, --window N     Manually specify the window size between 8 and 15.
  -f, --force        Decompress as far as possible, even if all known
                     methods fail.
  -z, --zlib-header  Use a ZLIB header.
  -g, --gzip-header  Use a GZIP header.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class zl(Unit):
    """
    ZLib compression and decompression.
    """

    def __init__(
        self,
        level  : arg.number('-l', bound=(0, 0X9), help='Specify a compression level between 0 and 9.') = 9,
        window : arg.number('-w', bound=(8, 0XF), help='Manually specify the window size between 8 and 15.') = 15,
        force  : arg.switch('-f', help='Decompress as far as possible, even if all known methods fail.') = False,
        zlib_header: arg.switch('-z', group='MODE', help='Use a ZLIB header.') = False,
        gzip_header: arg.switch('-g', group='MODE', help='Use a GZIP header.') = False
    ):
        if zlib_header and gzip_header:
            raise ValueError('You can only specify one header type (ZLIB or GZIP).')
        return super().__init__(level=level, window=window, force=force, zlib_header=zlib_header, gzip_header=gzip_header)

    def _force_decompress(self, data, mode):
        z = zlib.decompressobj(mode)

        def as_many_as_possible():
            for k in range(len(data)):
                try: yield z.decompress(data[k : k + 1])
                except zlib.error: break

        return B''.join(as_many_as_possible())

    def process(self, data):
        if data[0] == 0x78 or data[0:2] == B'\x1F\x8B' or self.args.zlib_header or self.args.gzip_header:
            mode_candidates = [self.args.window | 0x20, -self.args.window, 0]
        else:
            mode_candidates = [-self.args.window, self.args.window | 0x20, 0]
        for mode in mode_candidates:
            self.log_info(F'using mode {mode:+2d} for decompression')
            try:
                z = zlib.decompressobj(mode)
                return z.decompress(data)
            except zlib.error:
                pass
        if self.args.force:
            return self._force_decompress(data, mode_candidates[0])
        raise ValueError('could not detect any zlib stream.')

    def reverse(self, data):
        mode = -self.args.window
        if self.args.zlib_header:
            mode = -mode
        if self.args.gzip_header:
            mode = -mode | 0x10
        self.log_info(F'using mode {mode:+2d} for compression')
        zl = zlib.compressobj(self.args.level, zlib.DEFLATED, mode)
        zz = zl.compress(data)
        return zz + zl.flush(zlib.Z_FINISH)

Ancestors

Inherited members

class aes (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.aes and has the following commandline Interface:

usage: aes [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE] key

AES encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CCM, CFB, CTR, EAX, ECB, GCM, OCB, OFB, OPENPGP,
                     SIV. By default, the CBC mode is used when an IV is
                     is provided, and ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class aes(StandardBlockCipherUnit, cipher=AES):
    """
    AES encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class blowfish (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.blowfish and has the following commandline Interface:

usage: blowfish [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE]
                key

Blowfish encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CFB, CTR, EAX, ECB, OFB, OPENPGP. By default,
                     the CBC mode is used when an IV is is provided, and
                     ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class blowfish(StandardBlockCipherUnit, cipher=Blowfish):
    """
    Blowfish encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class cast (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.cast and has the following commandline Interface:

usage: cast [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE] key

CAST encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CFB, CTR, EAX, ECB, OFB, OPENPGP. By default,
                     the CBC mode is used when an IV is is provided, and
                     ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class cast(StandardBlockCipherUnit, cipher=CAST):
    """
    CAST encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class chacha (key, nonce=b'REFINERY', magic=b'', offset=0, rounds=20)

This unit is implemented in refinery.units.crypto.cipher.chacha and has the following commandline Interface:

usage: chacha [-h] [-L] [-Q] [-0] [-v] [-R] [-m MAGIC] [-x N] [-r N]
              key [nonce]

ChaCha encryption and decryption. The nonce must be 8 bytes long as
currently, only the original Bernstein algorithm is implemented.

positional arguments:
  key                The encryption key.
  nonce              The nonce. Default is the string REFINERY.

optional arguments:
  -m, --magic MAGIC  The magic constant; depends on the key size by
                     default.
  -x, --offset N     Optionally specify the stream index, default is 0.
  -r, --rounds N     The number of rounds. Has to be an even number.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class chacha(LatinCipherUnit):
    """
    ChaCha encryption and decryption. The nonce must be 8 bytes long as currently, only
    the original Bernstein algorithm is implemented.
    """
    def keystream(self) -> Iterable[int]:
        yield from ChaChaCipher(
            self.args.key,
            self.args.nonce,
            self.args.magic,
            self.args.rounds,
            self.args.offset
        )

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class chacha20 (key, nonce=b'REFINERY')

This unit is implemented in refinery.units.crypto.cipher.chacha and has the following commandline Interface:

usage: chacha20 [-h] [-L] [-Q] [-0] [-v] [-R] key [nonce]

ChaCha20 and XChaCha20 encryption and decryption. For ChaCha20, the IV
(nonce) must be 8 or 12 bytes long; for XChaCha20, choose an IV which is
24 bytes long. Invoking this unit for ChaCha20 is functionally equivalent
to chacha with 20 rounds, but this unit uses the PyCryptodome library C
implementation rather than the pure Python implementation used by chacha.

positional arguments:
  key            The encryption key.
  nonce          The nonce. Default is the string REFINERY.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class chacha20(LatinCipherStandardUnit, cipher=ChaCha20):
    """
    ChaCha20 and XChaCha20 encryption and decryption. For ChaCha20, the IV (nonce) must
    be 8 or 12 bytes long; for XChaCha20, choose an IV which is 24 bytes long. Invoking
    this unit for ChaCha20 is functionally equivalent to `refinery.chacha` with 20 rounds,
    but this unit uses the PyCryptodome library C implementation rather than the pure
    Python implementation used by `refinery.chacha`.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class des (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.des and has the following commandline Interface:

usage: des [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE] key

DES encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CFB, CTR, EAX, ECB, OFB, OPENPGP. By default,
                     the CBC mode is used when an IV is is provided, and
                     ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class des(StandardBlockCipherUnit, cipher=DES):
    """
    DES encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class des3 (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.des3 and has the following commandline Interface:

usage: des3 [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE] key

3-DES encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CFB, CTR, EAX, ECB, OFB, OPENPGP. By default,
                     the CBC mode is used when an IV is is provided, and
                     ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class des3(StandardBlockCipherUnit, cipher=DES3):
    """
    3-DES encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class hc128 (key)

This unit is implemented in refinery.units.crypto.cipher.hc128 and has the following commandline Interface:

usage: hc128 [-h] [-L] [-Q] [-0] [-v] [-R] key

HC-128 encryption and decryption.

positional arguments:
  key            The encryption key.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class hc128(StreamCipherUnit):
    """
    HC-128 encryption and decryption.
    """
    key_sizes = 32

    def keystream(self) -> Iterable[int]:
        return hc128cipher(self.args.key)

Ancestors

Class variables

var key_sizes
var blocksize

Inherited members

class rabbit (key, iv=b'')

This unit is implemented in refinery.units.crypto.cipher.rabbit and has the following commandline Interface:

usage: rabbit [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] key

RABBIT encryption and decryption.

positional arguments:
  key            The encryption key.

optional arguments:
  -I, --iv IV    Optional initialization vector.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class rabbit(StreamCipherUnit):
    """
    RABBIT encryption and decryption.
    """
    key_sizes = 16

    def __init__(self, key, iv: arg('-I', '--iv', help='Optional initialization vector.') = B''):
        super().__init__(key=key, iv=iv)

    def keystream(self) -> Iterable[int]:
        if len(self.args.iv) not in (0, 8):
            raise ValueError('The IV length must be exactly 8 bytes.')
        return RabbitCipher(self.args.key, self.args.iv)

Ancestors

Class variables

var key_sizes
var blocksize

Inherited members

class rc2 (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.rc2 and has the following commandline Interface:

usage: rc2 [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE] key

RC2 encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CFB, CTR, EAX, ECB, OFB, OPENPGP. By default,
                     the CBC mode is used when an IV is is provided, and
                     ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class rc2(StandardBlockCipherUnit, cipher=ARC2):
    """
    RC2 encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class rc4 (key)

This unit is implemented in refinery.units.crypto.cipher.rc4 and has the following commandline Interface:

usage: rc4 [-h] [-L] [-Q] [-0] [-v] [-R] key

RC4 encryption and decryption.

positional arguments:
  key            The encryption key.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class rc4(StandardCipherUnit, cipher=ARC4):
    """
    RC4 encryption and decryption.
    """
    def __init__(self, key): super().__init__(key)

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class rc4mod (key, *, size=256)

This unit is implemented in refinery.units.crypto.cipher.rc4mod and has the following commandline Interface:

usage: rc4mod [-h] [-L] [-Q] [-0] [-v] [-R] [-t N] key

Implements a modifiably version of the RC4 stream cipher where the size of
the RC4 table can be altered.

positional arguments:
  key            The encryption key.

optional arguments:
  -t, --size N   Table size, 256 by default.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class rc4mod(StreamCipherUnit):
    """
    Implements a modifiably version of the RC4 stream cipher where the size of
    the RC4 table can be altered.
    """

    def __init__(
        self, key, *,
        size: arg.number('-t', help='Table size, {default} by default.', bound=(1, None)) = 0x100
    ):
        super().__init__(key=key, size=size)

    def keystream(self):
        size = self.args.size
        tablerange = range(max(size, 0x100))
        b, table = 0, bytearray(k & 0xFF for k in tablerange)
        for a, keybyte in zip(tablerange, cycle(self.args.key)):
            t = table[a]
            b = (b + keybyte + t) % size
            table[a] = table[b]
            table[b] = t
        b, a = 0, 0
        while True:
            a = (a + 1) % size
            t = table[a]
            b = (b + t) % size
            table[a] = table[b]
            table[b] = t
            yield table[(table[a] + t) % size]

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class rncrypt (password)

This unit is implemented in refinery.units.crypto.cipher.rncrypt and has the following commandline Interface:

usage: rncrypt [-h] [-L] [-Q] [-0] [-v] [-R] password

Implements encryption and decryption using the RNCryptor specification.
See also: https://github.com/RNCryptor

positional arguments:
  password

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class rncrypt(Unit):
    """
    Implements encryption and decryption using the RNCryptor specification.
    See also: https://github.com/RNCryptor
    """
    def __init__(self, password: bytearray):
        super().__init__(password=password)

    def process(self, data: bytes) -> bytes:
        encryption_salt = data[2:10]
        hmac_salt = data[10:18]
        iv = data[18:34]
        cipher_text = data[34:-32]
        hmac_signature = data[-32:]
        encryption_key = self._pbkdf2(self.args.password, encryption_salt)
        hmac_key = self._pbkdf2(self.args.password, hmac_salt)
        if not hmac.compare_digest(self._hmac(hmac_key, data[:-32]), hmac_signature):
            raise ValueError("Failed to verify signature.")
        return unpad(
            self._aes_decrypt(encryption_key, iv, cipher_text),
            block_size=AES.block_size
        )

    def reverse(self, data: bytes) -> bytes:
        prng = Random.new()
        data = pad(data, block_size=AES.block_size)
        encryption_salt = prng.read(8)
        encryption_key = self._pbkdf2(self.args.password, encryption_salt)
        hmac_salt = prng.read(8)
        hmac_key = self._pbkdf2(self.args.password, hmac_salt)
        iv = prng.read(AES.block_size)
        cipher_text = self._aes_encrypt(encryption_key, iv, data)
        new_data = b'\x03\x01' + encryption_salt + hmac_salt + iv + cipher_text
        return new_data + self._hmac(hmac_key, new_data)

    def _aes_encrypt(self, key, iv, text):
        return AES.new(key, AES.MODE_CBC, iv).encrypt(text)

    def _aes_decrypt(self, key, iv, text):
        return AES.new(key, AES.MODE_CBC, iv).decrypt(text)

    def _hmac(self, key, data):
        return hmac.new(key, data, hashlib.sha256).digest()

    def _prf(self, secret, salt):
        return hmac.new(secret, salt, hashlib.sha1).digest()

    def _pbkdf2(self, password, salt, iterations=10000, key_length=32):
        return KDF.PBKDF2(password, salt, dkLen=key_length, count=iterations, prf=self._prf)

Ancestors

Inherited members

class rot (amount=13)

This unit is implemented in refinery.units.crypto.cipher.rot and has the following commandline Interface:

usage: rot [-h] [-L] [-Q] [-0] [-v] [amount]

Rotate the characters of the alphabet by the given amount. The default
amount is 13, providing the common (and weak) string obfuscation method.

positional arguments:
  amount         Number of letters to rotate by; Default is 13.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class rot(Unit):
    """
    Rotate the characters of the alphabet by the given amount. The default
    amount is 13, providing the common (and weak) string obfuscation method.
    """

    def __init__(self, amount: arg(help='Number of letters to rotate by; Default is 13.', type=number[1:25]) = 13):
        super().__init__(amount=amount)

    def process(self, data):
        def rotate(char):
            if 0x41 <= char <= 0x5A:  # A-Z
                return (char - 0x41 + self.args.amount) % 26 + 0x41
            if 0x61 <= char <= 0x7A:  # a-z
                return (char - 0x61 + self.args.amount) % 26 + 0x61
            return char
        return bytes(rotate(char) for char in data)

Ancestors

Inherited members

class rsa (key, swapkeys=False, textbook=False, padding=PAD.AUTO, rsautl=False)

This unit is implemented in refinery.units.crypto.cipher.rsa and has the following commandline Interface:

usage: rsa [-h] [-L] [-Q] [-0] [-v] [-R] [-s] [-t | -p PAD | -r] key

Implements single block RSA encryption and decryption. This unit can be
used to encrypt and decrypt blocks generated by openssl's rsautl tool when
using the mode -verify. When it is executed with a public key for
decryption or with a private key for encryption, it will perform a raw RSA
operation. The result of these operations are (un)padded using EMSA-
PKCS1-v1_5.

positional arguments:
  key                RSA key in PEM, DER, or Microsoft BLOB format.

optional arguments:
  -s, --swapkeys     Swap public and private exponent.
  -t, --textbook     Equivalent to --padding=NONE.
  -p, --padding PAD  Choose one of the following padding modes: AUTO,
                     NONE, OAEP, PKCS15, PKCS10. The default is AUTO.
  -r, --rsautl       Act as rsautl from OpenSSH; This is equivalent to
                     --swapkeys --padding=PKCS10

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class rsa(Unit):
    """
    Implements single block RSA encryption and decryption. This unit can be used to encrypt
    and decrypt blocks generated by openssl's `rsautl` tool when using the mode `-verify`.
    When it is executed with a public key for decryption or with a private key for encryption,
    it will perform a raw RSA operation. The result of these operations are (un)padded using
    EMSA-PKCS1-v1_5.
    """
    def __init__(
        self,
        key: arg(help='RSA key in PEM, DER, or Microsoft BLOB format.'),
        swapkeys: arg.switch('-s', help='Swap public and private exponent.') = False,
        textbook: arg.switch('-t', group='PAD', help='Equivalent to --padding=NONE.') = False,
        padding : arg.option('-p', group='PAD', choices=PAD,
            help='Choose one of the following padding modes: {choices}. The default is AUTO.') = PAD.AUTO,
        rsautl  : arg.switch('-r', group='PAD',
            help='Act as rsautl from OpenSSH; This is equivalent to --swapkeys --padding=PKCS10') = False,
    ):
        padding = arg.as_option(padding, PAD)
        if textbook:
            if padding != PAD.AUTO:
                raise ValueError('Conflicting padding options!')
            padding = padding.NONE
        if rsautl:
            if padding and padding != PAD.PKCS10:
                raise ValueError('Conflicting padding options!')
            swapkeys = True
            padding = PAD.PKCS10

        super().__init__(key=key, textbook=textbook, padding=padding, swapkeys=swapkeys)

        self._key_hash = None
        self._key_data = None

    @property
    def blocksize(self) -> int:
        return self.key.size_in_bytes()

    @property
    def _blocksize_plain(self) -> int:
        # PKCS#1 v1.5 padding is at least 11 bytes.
        return self.blocksize - 11

    @property
    def pub(self):
        return self.key.d if self.args.swapkeys else self.key.e

    @property
    def prv(self):
        return self.key.e if self.args.swapkeys else self.key.d

    def _get_msg(self, data):
        msg = int.from_bytes(data, byteorder='big')
        if msg > self.key.n:
            raise ValueError(F'This key can only handle messages of size {self.blocksize}.')
        return msg

    def _encrypt_raw(self, data):
        return pow(
            self._get_msg(data),
            self.pub,
            self.key.n
        ).to_bytes(self.blocksize, byteorder='big')

    def _decrypt_raw(self, data):
        return pow(
            self._get_msg(data),
            self.prv,
            self.key.n
        ).to_bytes(self.blocksize, byteorder='big')

    def _unpad(self, data, head, padbyte=None):
        if len(data) > self.blocksize:
            raise ValueError(F'This key can only handle messages of size {self.blocksize}.')
        if data.startswith(head):
            pos = data.find(B'\0', 2)
            if pos > 0:
                pad = data[2:pos]
                if padbyte is None or all(b == padbyte for b in pad):
                    return data[pos + 1:]
        raise ValueError('Incorrect padding')

    def _pad(self, data, head, padbyte=None):
        if len(data) > self._blocksize_plain:
            raise ValueError(F'This key can only encrypt messages of size at most {self._blocksize_plain}.')
        pad = self.blocksize - len(data) - len(head) - 1
        if padbyte is not None:
            padding = pad * bytes((padbyte,))
        else:
            padding = bytearray(1)
            while not all(padding):
                padding = bytearray(filter(None, padding))
                padding.extend(get_random_bytes(pad - len(padding)))
        return head + padding + B'\0' + data

    def _unpad_pkcs10(self, data):
        return self._unpad(data, B'\x00\x01', 0xFF)

    def _unpad_pkcs15(self, data):
        return self._unpad(data, B'\x00\x02', None)

    def _pad_pkcs10(self, data):
        return self._pad(data, B'\x00\x01', 0xFF)

    def _pad_pkcs15(self, data):
        return self._pad(data, B'\x00\x02', None)

    def _decrypt_block_OAEP(self, data):
        self.log_debug('Attempting decryption with PyCrypto PKCS1 OAEP.')
        result = PKCS1_OAEP.new(self.key).decrypt(data)
        if result is not None:
            return result
        raise ValueError('OAEP decryption was unsuccessful.')

    def _encrypt_block_OAEP(self, data):
        self.log_debug('Attempting encryption with PyCrypto PKCS1 OAEP.')
        result = PKCS1_OAEP.new(self.key).encrypt(data)
        if result is None:
            return result
        raise ValueError('OAEP encryption was unsuccessful.')

    def _decrypt_block(self, data):
        if self._oaep and self._pads in {PAD.AUTO, PAD.OAEP}:
            try:
                return self._decrypt_block_OAEP(data)
            except ValueError:
                if self._pads: raise
                self.log_debug('PyCrypto primitives failed, no longer attempting OAEP.')
                self._oaep = False

        result = self._decrypt_raw(data)

        if self._pads == PAD.NONE:
            return result
        elif self._pads == PAD.PKCS10:
            return self._unpad_pkcs10(result)
        elif self._pads == PAD.PKCS15:
            return self._unpad_pkcs15(result)
        elif self._pads == PAD.AUTO:
            with suppress(ValueError):
                data = self._unpad_pkcs10(result)
                self.log_info('Detected PKCS1.0 padding.')
                self._pads = PAD.PKCS10
                return data
            with suppress(ValueError):
                data = self._unpad_pkcs15(result)
                self.log_info('Detected PKCS1.5 padding.')
                self._pads = PAD.PKCS15
                return data
            self.log_warn('No padding worked, returning raw decrypted blocks.')
            self._pads = PAD.NONE
            return result
        else:
            raise ValueError(F'Invalid padding value: {self._pads!r}')

    def _encrypt_block(self, data):
        if self._pads in {PAD.AUTO, PAD.OAEP}:
            try:
                return self._encrypt_block_OAEP(data)
            except ValueError:
                if self._pads: raise
                self.log_debug('PyCrypto primitives for OAEP failed, falling back to PKCS1.5.')
                self._pads = PAD.PKCS15

        if self._pads == PAD.PKCS15:
            data = self._pad_pkcs15(data)
        elif self._pads == PAD.PKCS10:
            data = self._pad_pkcs10(data)

        return self._encrypt_raw(data)

    @property
    def key(self) -> RSA.RsaKey:
        key_blob = self.args.key
        key_hash = hash(key_blob)
        if key_hash != self._key_hash:
            self._key_hash = key_hash
            self._key_data = normalize_rsa_key(key_blob)
        return self._key_data

    def process(self, data):
        if not self.key.has_private():
            try:
                return self._unpad_pkcs10(self._encrypt_raw(data))
            except Exception as E:
                raise ValueError('A public key was given for decryption and rsautl mode resulted in an error.') from E
        self._oaep = True
        self._pads = self.args.padding
        return B''.join(self._decrypt_block(block) for block in splitchunks(data, self.blocksize))

    def reverse(self, data):
        self._pads = self.args.padding
        return B''.join(self._encrypt_block(block) for block in splitchunks(data, self._blocksize_plain))

Ancestors

Instance variables

var blocksize
Expand source code Browse git
@property
def blocksize(self) -> int:
    return self.key.size_in_bytes()
var pub
Expand source code Browse git
@property
def pub(self):
    return self.key.d if self.args.swapkeys else self.key.e
var prv
Expand source code Browse git
@property
def prv(self):
    return self.key.e if self.args.swapkeys else self.key.d
var key
Expand source code Browse git
@property
def key(self) -> RSA.RsaKey:
    key_blob = self.args.key
    key_hash = hash(key_blob)
    if key_hash != self._key_hash:
        self._key_hash = key_hash
        self._key_data = normalize_rsa_key(key_blob)
    return self._key_data

Inherited members

class rsakey (output=RSAFormat.PEM)

This unit is implemented in refinery.units.crypto.cipher.rsakey and has the following commandline Interface:

usage: rsakey [-h] [-L] [-Q] [-0] [-v] [RSAFormat]

Parse RSA keys in various formats; PEM, DER, Microsoft BLOB, and W3C-XKMS
(XML) format are supported.

positional arguments:
  RSAFormat      Select an output format (PEM, DER, XKMS, TEXT, JSON),
                 default is PEM.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class rsakey(Unit):
    """
    Parse RSA keys in various formats; PEM, DER, Microsoft BLOB, and W3C-XKMS (XML) format are supported.
    """
    def __init__(self, output: arg.option(
        choices=RSAFormat,
        help='Select an output format ({choices}), default is {default}.') = RSAFormat.PEM
    ):
        super().__init__(output=arg.as_option(output, RSAFormat))

    def _xkms_wrap(self, number: int):
        size, r = divmod(number.bit_length(), 8)
        size += int(bool(r))
        return base64.b64encode(number.to_bytes(size, 'big'))

    def process(self, data):
        key = normalize_rsa_key(data)
        out = self.args.output
        if out is RSAFormat.PEM:
            yield key.export_key('PEM')
            return
        if out is RSAFormat.DER:
            yield key.export_key('DER')
            return
        components = {
            'Modulus' : key.n,
            'Exponent': key.e,
        }
        if key.has_private():
            decoded = DerSequence()
            decoded.decode(key.export_key('DER'))
            it = itertools.islice(decoded, 3, None)
            for v in ('D', 'P', 'Q', 'DP', 'DQ', 'InverseQ'):
                try:
                    components[v] = next(it)
                except StopIteration:
                    break
        if out is RSAFormat.XKMS:
            for tag in components:
                components[tag] = base64.b64encode(number.long_to_bytes(components[tag])).decode('ascii')
            tags = '\n'.join(F'\t<{tag}>{value}</{tag}>' for tag, value in components.items())
            yield F'<RSAKeyPair>\n{tags}\n</RSAKeyPair>'.encode(self.codec)
            return
        for tag in components:
            components[tag] = F'{components[tag]:X}'
        if out is RSAFormat.JSON:
            yield json.dumps(components).encode(self.codec)
            return
        if out is RSAFormat.TEXT:
            table = list(flattened(components))
            for key, value in table:
                value = '\n'.join(F'{L}' for L in textwrap.wrap(F'0x{value}', 80))
                yield F'-- {key+" ":-<77}\n{value!s}'.encode(self.codec)

Ancestors

Inherited members

class salsa (key, nonce=b'REFINERY', magic=b'', offset=0, rounds=20)

This unit is implemented in refinery.units.crypto.cipher.salsa and has the following commandline Interface:

usage: salsa [-h] [-L] [-Q] [-0] [-v] [-R] [-m MAGIC] [-x N] [-r N]
             key [nonce]

Salsa encryption and decryption. The nonce must be 8 bytes long.

positional arguments:
  key                The encryption key.
  nonce              The nonce. Default is the string REFINERY.

optional arguments:
  -m, --magic MAGIC  The magic constant; depends on the key size by
                     default.
  -x, --offset N     Optionally specify the stream index, default is 0.
  -r, --rounds N     The number of rounds. Has to be an even number.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class salsa(LatinCipherUnit):
    """
    Salsa encryption and decryption. The nonce must be 8 bytes long.
    """
    def keystream(self) -> Iterable[int]:
        yield from SalsaCipher(
            self.args.key,
            self.args.nonce,
            self.args.magic,
            self.args.rounds,
            self.args.offset
        )

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class salsa20 (key, nonce=b'REFINERY')

This unit is implemented in refinery.units.crypto.cipher.salsa and has the following commandline Interface:

usage: salsa20 [-h] [-L] [-Q] [-0] [-v] [-R] key [nonce]

Salsa20 encryption and decryption. This unit is functionally equivalent to
salsa with 20 rounds, but it uses the PyCryptodome library C
implementation rather than the pure Python implementation used by salsa.

positional arguments:
  key            The encryption key.
  nonce          The nonce. Default is the string REFINERY.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class salsa20(LatinCipherStandardUnit, cipher=Salsa20):
    """
    Salsa20 encryption and decryption. This unit is functionally equivalent to `refinery.salsa`
    with 20 rounds, but it uses the PyCryptodome library C implementation rather than the pure
    Python implementation used by `refinery.salsa`.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class seal (key)

This unit is implemented in refinery.units.crypto.cipher.seal and has the following commandline Interface:

usage: seal [-h] [-L] [-Q] [-0] [-v] [-R] key

SEAL encryption and decryption.

positional arguments:
  key            The encryption key.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class seal(StreamCipherUnit):
    """
    SEAL encryption and decryption.
    """
    key_sizes = 20

    def keystream(self) -> Iterable[bytes]:
        return SEAL_Cipher(self.args.key)

Ancestors

Class variables

var key_sizes
var blocksize

Inherited members

class secstr (key=b'\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10', iv=None)

This unit is implemented in refinery.units.crypto.cipher.secstr and has the following commandline Interface:

usage: secstr [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [key]

Implements the AES-based encryption scheme used by the PowerShell commands
ConvertFrom-SecureString and ConvertTo-SecureString.

positional arguments:
  key            Secure string encryption 16-byte AES key; the default are
                 the bytes from 1 to 16.

optional arguments:
  -I, --iv IV    Optionally specify an IV to use for encryption.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class secstr(Unit):
    """
    Implements the AES-based encryption scheme used by the PowerShell commands
    `ConvertFrom-SecureString` and `ConvertTo-SecureString`.
    """

    # This is a magic header value used for PowerShell secure strings.
    _MAGIC = bytes((
        0xEF, 0xAE, 0x3D, 0xD9, 0xDD, 0x75, 0xD7, 0xAE, 0xF8, 0xDD, 0xFD, 0x38,
        0xDB, 0x7E, 0x35, 0xDD, 0xBD, 0x7A, 0xD3, 0x9D, 0x1A, 0xE7, 0x7E, 0x39))

    # Secure strings include a decimal number formatted as a string directly
    # following the header. Presumably, this is the PowerShell version.
    _PSVER = 2

    def __init__(
        self, key: arg(
            help='Secure string encryption 16-byte AES key; the default are the bytes from 1 to 16.'
        ) = bytes(range(1, 17)),
        iv: arg('-I', help='Optionally specify an IV to use for encryption.') = None
    ):
        super().__init__(key=key, iv=iv)

    @property
    def key(self):
        key = self.args.key
        if len(key) != 0x10:
            raise ValueError('The encryption key has to be 16 bytes long.')
        return key

    @property
    def iv(self):
        iv = self.args.iv
        if iv is not None and len(iv) != 0x10:
            raise ValueError('The IV has to be 16 bytes long.')
        return iv

    def reverse(self, data):
        ivec = self.iv or urandom(0x10)
        if len(ivec) != 0x10:
            raise ValueError(self._IVERR)
        cipher = AES.new(self.key, AES.MODE_CBC, ivec)
        data = data.decode('latin-1').encode('utf-16LE')
        data = cipher.encrypt(pad(data, block_size=0x10))
        data = base64.b16encode(data).lower().decode('ascii')
        ivec = base64.b64encode(ivec).decode('ascii')
        data = '|'.join(('%d' % self._PSVER, ivec, data)).encode('utf-16LE')
        return base64.b64encode(self._MAGIC + data)

    def process(self, data):
        head, ivec, data = base64.b64decode(data).split(b'|\0')
        self.log_info('head:', head.hex())
        ivec = base64.b64decode(ivec.decode('utf-16LE'))
        self.log_info('ivec:', ivec.hex())
        data = base64.b16decode(data.decode('utf-16LE'), casefold=True)
        if len(data) % 0x10 != 0:
            self.log_info('data not block-aligned, padding with zeros')
            data += B'\0' * (0x10 - len(data) % 0x10)
        cipher = AES.new(self.key, AES.MODE_CBC, ivec)
        data = cipher.decrypt(data)
        try:
            data = unpad(data, block_size=0x10)
        except Exception:
            self.log_warn('decrypted data does not have PKCS7 padding')
        for p in range(0x10):
            try:
                return data[-p:].decode('utf-16LE').encode('latin-1')
            except UnicodeDecodeError:
                pass
            except UnicodeEncodeError:
                pass
        self.log_warn('result is not a padded unicode string, key is likely wrong')
        return data

Ancestors

Instance variables

var key
Expand source code Browse git
@property
def key(self):
    key = self.args.key
    if len(key) != 0x10:
        raise ValueError('The encryption key has to be 16 bytes long.')
    return key
var iv
Expand source code Browse git
@property
def iv(self):
    iv = self.args.iv
    if iv is not None and len(iv) != 0x10:
        raise ValueError('The IV has to be 16 bytes long.')
    return iv

Inherited members

class vigenere (key, alphabet='abcdefghijklmnopqrstuvwxyz', case_sensitive=False, ignore_unknown=False)

This unit is implemented in refinery.units.crypto.cipher.vigenere and has the following commandline Interface:

usage: vigenere [-h] [-L] [-Q] [-0] [-v] [-R] [-c] [-i] key [alphabet]

Encryption and decryption using the Vigenère-Bellaso polyalphabetic
cipher.

positional arguments:
  key                   The encryption key
  alphabet              The alphabet, by default the Latin one is used:
                        "abcdefghijklmnopqrstuvwxyz"

optional arguments:
  -c, --case-sensitive  Unless this option is set, the key will be case
                        insensitive and the alphabet is assumed to contain
                        only lowercase letters. Any uppercase letter is
                        transformed using the same shift as would be the
                        lowercase variant, but case is retained.
  -i, --ignore-unknown  Unless this option is set, the key stream will be
                        iterated even for letters that are not contained
                        in the alphabet.

generic options:
  -h, --help            Show this help message and exit.
  -L, --lenient         Allow partial results as output.
  -Q, --quiet           Disables all log output.
  -0, --devnull         Do not produce any output.
  -v, --verbose         Specify up to two times to increase log level.
  -R, --reverse         Use the reverse operation.
Expand source code Browse git
class vigenere(Unit):
    """
    Encryption and decryption using the Vigenère-Bellaso polyalphabetic cipher.
    """

    def __init__(
        self,
        key: arg(type=str, help='The encryption key'),
        alphabet: arg(
            help='The alphabet, by default the Latin one is used: "{default}"'
        ) = 'abcdefghijklmnopqrstuvwxyz',
        case_sensitive: arg.switch('-c', help=(
            'Unless this option is set, the key will be case insensitive and '
            'the alphabet is assumed to contain only lowercase letters. Any '
            'uppercase letter is transformed using the same shift as would be '
            'the lowercase variant, but case is retained.'
        )) = False,
        ignore_unknown: arg.switch('-i', help=(
            'Unless this option is set, the key stream will be iterated even '
            'for letters that are not contained in the alphabet.'
        )) = False
    ):
        if not case_sensitive:
            key = key.lower()
            alphabet = alphabet.lower()
            if len(set(alphabet)) != len(alphabet):
                raise ValueError('Duplicate entries detected in alphabet.')
        if not set(key) <= set(alphabet):
            raise ValueError('key contains letters which are not from the given alphabet')
        self.superinit(super(), **vars())

    def _tabula_recta(self, data, reverse=True):
        keystream = cycle(self.args.key)
        alphabet_size = len(self.args.alphabet)
        for letter in data:
            uppercase = not self.args.case_sensitive and letter.isupper()
            if uppercase:
                letter = letter.lower()
            try:
                position = self.args.alphabet.index(letter)
            except ValueError:
                yield letter
                if not self.args.ignore_unknown:
                    next(keystream)
                continue
            shift = self.args.alphabet.index(next(keystream))
            if reverse:
                position -= shift
            else:
                position += shift
            result = self.args.alphabet[position % alphabet_size]
            yield result.upper() if uppercase else result

    @unicoded
    def process(self, data):
        return ''.join(self._tabula_recta(data, True))

    @unicoded
    def reverse(self, data):
        return ''.join(self._tabula_recta(data, False))

Ancestors

Inherited members

class xtea (key, padding=None)

This unit is implemented in refinery.units.crypto.cipher.xtea and has the following commandline Interface:

usage: xtea [-h] [-L] [-Q] [-0] [-v] [-R] [-P ALG] key

XTEA encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class xtea(BlockCipherUnitBase):
    """
    XTEA encryption and decryption.
    """
    blocksize = 16
    key_sizes = 16

    def __init__(self, key, padding=None):
        super().__init__(key=key, padding=padding)

    @property
    def key(self):
        return struct.unpack('4I', self.args.key)

    def encrypt(self, data):
        it = iter(self._load32(data))
        return self._stor64(self._encrypt_block(y, z, *self.key) for y, z in zip(it, it))

    def decrypt(self, data):
        it = iter(self._load32(data))
        return self._stor64(self._decrypt_block(y, z, *self.key) for y, z in zip(it, it))

    @staticmethod
    def _encrypt_block(y, z, k1, k2, k3, k4):
        sum_t = 0
        delta = 0x9E3779B9
        for _ in range(32, 0, -1):
            sum_t = (sum_t + delta) & 0xFFFFFFFF
            y = y + ((z << 4) + k1 ^ z + sum_t ^ (z >> 5) + k2) & 0xFFFFFFFF
            z = z + ((y << 4) + k3 ^ y + sum_t ^ (y >> 5) + k4) & 0xFFFFFFFF
        return y + (z << 0x20)

    @staticmethod
    def _decrypt_block(y, z, k1, k2, k3, k4):
        sum_t = 0xC6EF3720
        delta = 0x9E3779B9
        for _ in range(32, 0, -1):
            z = z - ((y << 4) + k3 ^ y + sum_t ^ (y >> 5) + k4) & 0xFFFFFFFF
            y = y - ((z << 4) + k1 ^ z + sum_t ^ (z >> 5) + k2) & 0xFFFFFFFF
            sum_t = (sum_t - delta) & 0xFFFFFFFF
        return y + (z << 0x20)

    @staticmethod
    def _load32(vector):
        Q, R = divmod(len(vector), 4)
        if R > 0:
            raise ValueError('Data not padded to a 16 byte boundary.')
        yield from struct.unpack(F'{Q}I', vector)

    @staticmethod
    def _stor64(vector):
        vector = tuple(vector)
        return struct.pack(F'{len(vector)}Q', *vector)

Ancestors

Class variables

var blocksize
var key_sizes

Instance variables

var key
Expand source code Browse git
@property
def key(self):
    return struct.unpack('4I', self.args.key)

Inherited members

class adler32 (text=False)

This unit is implemented in refinery.units.crypto.hash.checksums and has the following commandline Interface:

usage: adler32 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the Adler32 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class adler32(HashUnit):
    """
    Returns the Adler32 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data: bytes) -> bytes:
        return struct.pack('>I', zlib.adler32(data))

Ancestors

Inherited members

class crc32 (text=False)

This unit is implemented in refinery.units.crypto.hash.checksums and has the following commandline Interface:

usage: crc32 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the CRC32 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class crc32(HashUnit):
    """
    Returns the CRC32 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data: bytes) -> bytes:
        return struct.pack('>I', zlib.crc32(data))

Ancestors

Inherited members

class blk224 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: blk224 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the BLK224 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class blk224(HashUnit):
    """
    Returns the BLK224 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data): return hashlib.blake2b(data, digest_size=28)

Ancestors

Inherited members

class blk256 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: blk256 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the BLK256 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class blk256(HashUnit):
    """
    Returns the BLK256 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data): return hashlib.blake2b(data, digest_size=32)

Ancestors

Inherited members

class blk384 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: blk384 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the BLK384 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class blk384(HashUnit):
    """
    Returns the BLK384 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data): return hashlib.blake2b(data, digest_size=48)

Ancestors

Inherited members

class blk512 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: blk512 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the BLK512 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class blk512(HashUnit):
    """
    Returns the BLK512 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data): return hashlib.blake2b(data, digest_size=64)

Ancestors

Inherited members

class md2 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: md2 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the MD5 hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class md2(HashUnit):
    """
    Returns the MD5 hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data):
        from Crypto.Hash import MD2
        return MD2.new(data)

Ancestors

Inherited members

class md4 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: md4 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the MD5 hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class md4(HashUnit):
    """
    Returns the MD5 hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data):
        from Crypto.Hash import MD4
        return MD4.new(data)

Ancestors

Inherited members

class md5 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: md5 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the MD5 hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class md5(HashUnit):
    """
    Returns the MD5 hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data): return hashlib.md5(data)

Ancestors

Inherited members

class sha1 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: sha1 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the SHA1 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sha1(HashUnit):
    """
    Returns the SHA1 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data): return hashlib.sha1(data)

Ancestors

Inherited members

class sha224 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: sha224 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the SHA224 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sha224(HashUnit):
    """
    Returns the SHA224 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data): return hashlib.sha224(data)

Ancestors

Inherited members

class sha256 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: sha256 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the SHA256 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sha256(HashUnit):
    """
    Returns the SHA256 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data): return hashlib.sha256(data)

Ancestors

Inherited members

class sha384 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: sha384 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the SHA384 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sha384(HashUnit):
    """
    Returns the SHA384 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data): return hashlib.sha384(data)

Ancestors

Inherited members

class sha512 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: sha512 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the SHA512 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sha512(HashUnit):
    """
    Returns the SHA512 Hash of the input data.
    """
    @classmethod
    def _algorithm(cls, data): return hashlib.sha512(data)

Ancestors

Inherited members

class imphash (text=False)

This unit is implemented in refinery.units.crypto.hash.imphash and has the following commandline Interface:

usage: imphash [-h] [-L] [-Q] [-0] [-v] [-t]

Implements the import hash for PE files.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class imphash(HashUnit):
    """
    Implements the import hash for PE files.
    """

    def process(self, data):
        pe = PE(data=data, fast_load=True)
        pe.parse_data_directories(directories=[IMAGE_DIRECTORY_ENTRY_IMPORT])
        th = pe.get_imphash()
        if not th:
            raise ValueError('no import directory.')
        return th.encode(self.codec) if self.args.text else bytes.fromhex(th)

Ancestors

Inherited members

class xxh (text=False)

This unit is implemented in refinery.units.crypto.hash.xxhash and has the following commandline Interface:

usage: xxh [-h] [-L] [-Q] [-0] [-v] [-t]

Implements the xxHash hashing algorithm.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class xxh(HashUnit):
    """
    Implements the xxHash hashing algorithm.
    """
    @classmethod
    def _algorithm(cls, data): return xxhash(data)

Ancestors

Inherited members

class CryptDeriveKey (size, hash='MD5')

This unit is implemented in refinery.units.crypto.keyderive.CryptDeriveKey and has the following commandline Interface:

usage: CryptDeriveKey [-h] [-L] [-Q] [-0] [-v] size [hash]

An implementation of the CryptDeriveKey routine available from the Win32
API.

positional arguments:
  size           The number of bytes to generate.
  hash           Specify one of these algorithms (default is MD5): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class CryptDeriveKey(KeyDerivation):
    """
    An implementation of the CryptDeriveKey routine available from the Win32 API.
    """

    def __init__(self, size, hash='MD5'):
        if size > 21:
            raise ValueError('The CryptDeriveKey specification only provides keys up to length 21.')
        super().__init__(size=size, salt=None, hash=hash)

    def process(self, data):
        def digest(x):
            return self.hash.new(x).digest()
        if self.args.hash in (HASH.SHA224, HASH.SHA256, HASH.SHA384, HASH.SHA512):
            return digest(data)[:self.args.size]
        max_size = 2 * self.hash.digest_size
        value = digest(data)
        del data
        buffer1 = bytearray([0x36] * 64)
        buffer2 = bytearray([0x5C] * 64)
        for k, b in enumerate(value):
            buffer1[k] ^= b
            buffer2[k] ^= b
        buffer = digest(buffer1) + digest(buffer2)
        if self.args.size > max_size:
            raise RefineryPartialResult(
                F'too many bytes requested, can only provide {max_size}',
                partial=buffer
            )
        return buffer[:self.args.size]

Ancestors

Inherited members

class DESDerive (size=8)

This unit is implemented in refinery.units.crypto.keyderive.DESDerive and has the following commandline Interface:

usage: DESDerive [-h] [-L] [-Q] [-0] [-v] [size]

Implements the same functionality as DES_string_to_key in OpenSSL. It
converts a string to an 8 byte DES key with odd byte parity, per FIPS
specification. This is not a modern key derivation function.

positional arguments:
  size           The number of bytes to generate, default is the maximum
                 of 8.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class DESDerive(KeyDerivation):
    """
    Implements the same functionality as `DES_string_to_key` in OpenSSL. It
    converts a string to an 8 byte DES key with odd byte parity, per FIPS
    specification. This is not a modern key derivation function.
    """
    def __init__(self, size: arg(help='The number of bytes to generate, default is the maximum of 8.') = 8):
        super().__init__(size=size, salt=None)

    def process(self, password):
        key = bytearray(8)

        for i, j in enumerate(password):
            if ((i % 16) < 8):
                key[i % 8] ^= (j << 1) & 0xFF
            else:
                j = (((j << 4) & 0xf0) | ((j >> 4) & 0x0f))
                j = (((j << 2) & 0xcc) | ((j >> 2) & 0x33))
                j = (((j << 1) & 0xaa) | ((j >> 1) & 0x55))
                key[7 - (i % 8)] ^= j

        des_set_odd_parity(key)

        if password:
            n = len(password)
            password = password.ljust(n + 7 - ((n - 1) % 8), b'\0')
            des = DES.new(key, DES.MODE_ECB)
            for k in range(0, n, 8):
                key[:] = des.encrypt(strxor(password[k:k + 8], key))
            des_set_odd_parity(key)

        if self.args.size > 8:
            raise RefineryPartialResult('DESDerive can provide at most 8 bytes.', partial=key)

        return key[:self.args.size]

Ancestors

Inherited members

class PasswordDeriveBytes (size, salt, iter=100, hash='SHA1')

This unit is implemented in refinery.units.crypto.keyderive.PasswordDeriveBytes and has the following commandline Interface:

usage: PasswordDeriveBytes [-h] [-L] [-Q] [-0] [-v]
                           size salt [iter] [hash]

An implementation of the PasswordDeriveBytes routine available from the
.NET standard library. According to documentation, it is an extension of
PBKDF1.

positional arguments:
  size           The number of bytes to generate.
  salt           Salt for the derivation.
  iter           Number of iterations; default is 100.
  hash           Specify one of these algorithms (default is SHA1): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class PasswordDeriveBytes(KeyDerivation):
    """
    An implementation of the PasswordDeriveBytes routine available from the .NET
    standard library. According to documentation, it is an extension of PBKDF1.
    """
    def __init__(self, size, salt, iter=100, hash='SHA1'):
        self.superinit(super(), **vars())

    def process(self, data):
        if self.codec != 'UTF8':
            data = data.decode(self.codec).encode('UTF8')
        data += self.args.salt
        for _ in range(self.args.iter - 1):
            data = self.hash.new(data).digest()
        counter, seedhash = 1, data
        data = self.hash.new(data).digest()
        while len(data) < self.args.size:
            data += self.hash.new(B'%d%s' % (counter, seedhash)).digest()
            counter += 1
        return data[:self.args.size]

Ancestors

Inherited members

class HKDF (size, salt, hash='SHA512')

This unit is implemented in refinery.units.crypto.keyderive.hkdf and has the following commandline Interface:

usage: HKDF [-h] [-L] [-Q] [-0] [-v] size salt [hash]

HKDF Key derivation

positional arguments:
  size           The number of bytes to generate.
  salt           Salt for the derivation.
  hash           Specify one of these algorithms (default is SHA512): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class HKDF(KeyDerivation):
    """HKDF Key derivation"""

    def __init__(self, size, salt, hash='SHA512'):
        super().__init__(size=size, salt=salt, hash=hash)

    def process(self, data):
        return HKDF_(data, self.args.size, self.args.salt, self.hash)

Ancestors

Inherited members

class hmac (salt, hash='SHA1', size=None)

This unit is implemented in refinery.units.crypto.keyderive.hmac and has the following commandline Interface:

usage: hmac [-h] [-L] [-Q] [-0] [-v] salt [hash] [size]

HMAC based key derivation

positional arguments:
  salt           Salt for the derivation.
  hash           Specify one of these algorithms (default is SHA1): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384
  size           The number of bytes to generate.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class hmac(KeyDerivation):
    """
    HMAC based key derivation
    """

    def __init__(self, salt, hash='SHA1', size=None):
        super().__init__(salt=salt, size=size, hash=hash)

    def process(self, data):
        return HMAC.new(data, self.args.salt, digestmod=self.hash).digest()

Ancestors

Inherited members

class kblob

This unit is implemented in refinery.units.crypto.keyderive.kblob and has the following commandline Interface:

usage: kblob [-h] [-L] [-Q] [-0] [-v]

Extracts a key from a Microsoft Crypto API BLOB structure.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class kblob(Unit):
    """
    Extracts a key from a Microsoft Crypto API BLOB structure.
    """

    def process(self, data):
        blob = CRYPTOKEY(data)
        self.log_info(F'BLOB Type: {blob.header.type!s}')
        self.log_info(F'Algorithm: {blob.header.algorithm!s}')
        try:
            return bytes(blob.key)
        except AttributeError as A:
            raise ValueError(F'unable to derive key from {blob.header.type!s}') from A

Ancestors

Inherited members

class PBKDF1 (size, salt=b'\x00\x00\x00\x00\x00\x00\x00\x00', iter=1000, hash='SHA1')

This unit is implemented in refinery.units.crypto.keyderive.pbkdf1 and has the following commandline Interface:

usage: PBKDF1 [-h] [-L] [-Q] [-0] [-v] size [salt] [iter] [hash]

PBKDF1 Key derivation

positional arguments:
  size           The number of bytes to generate.
  salt           Salt for the derivation; default are 8 null bytes.
  iter           Number of iterations; default is 1000.
  hash           Specify one of these algorithms (default is SHA1): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class PBKDF1(KeyDerivation):
    """PBKDF1 Key derivation"""

    @arg('salt', help='Salt for the derivation; default are 8 null bytes.')
    def __init__(self, size, salt=bytes(8), iter=1000, hash='SHA1'):
        self.superinit(super(), **vars())

    def process(self, data):
        return PBKDF1_(
            data.decode(self.codec),
            self.args.salt,
            dkLen=self.args.size,
            count=self.args.iter,
            hashAlgo=self.hash
        )

Ancestors

Inherited members

class PBKDF2 (size, salt, iter=1000, hash='SHA1')

This unit is implemented in refinery.units.crypto.keyderive.pbkdf2 and has the following commandline Interface:

usage: PBKDF2 [-h] [-L] [-Q] [-0] [-v] size salt [iter] [hash]

PBKDF2 Key derivation

positional arguments:
  size           The number of bytes to generate.
  salt           Salt for the derivation.
  iter           Number of iterations; default is 1000.
  hash           Specify one of these algorithms (default is SHA1): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class PBKDF2(KeyDerivation):
    """PBKDF2 Key derivation"""

    def __init__(self, size, salt, iter=1000, hash='SHA1'):
        self.superinit(super(), **vars())

    def process(self, data):
        return PBKDF2_(
            data.decode(self.codec),
            self.args.salt,
            dkLen=self.args.size,
            hmac_hash_module=self.hash,
            count=self.args.iter
        )

Ancestors

Inherited members

class ucrypt (size=13, salt=b'AA')

This unit is implemented in refinery.units.crypto.keyderive.unixcrypt and has the following commandline Interface:

usage: ucrypt [-h] [-L] [-Q] [-0] [-v] [size] [salt]

Implements the classic Unix crypt algorithm.

positional arguments:
  size           The number of bytes to generate, default is 13.
  salt           Salt for the derivation, the default is "AA".

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class ucrypt(KeyDerivation):
    """
    Implements the classic Unix crypt algorithm.
    """
    def __init__(
        self,
        size: arg(help='The number of bytes to generate, default is 13.') = 13,
        salt: arg(help='Salt for the derivation, the default is "AA".') = B'AA'
    ):
        super().__init__(size=size, salt=salt)

    def process(self, data):
        crypted = bytes(UnixCrypt(data, salt=self.args.salt))
        if len(crypted) < self.args.size:
            raise RefineryPartialResult(
                F'unix crypt only provided {len(crypted)} bytes, but {self.args.size} '
                F'were requested.', partial=crypted
            )
        return crypted[:self.args.size]

Ancestors

Inherited members

class b32

This unit is implemented in refinery.units.encoding.b32 and has the following commandline Interface:

usage: b32 [-h] [-L] [-Q] [-0] [-v] [-R]

Base32 encoding and decoding.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class b32(Unit):
    """
    Base32 encoding and decoding.
    """
    def reverse(self, data):
        return base64.b32encode(data)

    def process(self, data):
        return base64.b32decode(data, casefold=True)

Ancestors

Inherited members

class b64 (urlsafe=False)

This unit is implemented in refinery.units.encoding.b64 and has the following commandline Interface:

usage: b64 [-h] [-L] [-Q] [-0] [-v] [-R] [-u]

Base64 encoding and decoding.

optional arguments:
  -u, --urlsafe  use URL-safe alphabet

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class b64(Unit):
    """
    Base64 encoding and decoding.
    """
    def __init__(self, urlsafe: arg.switch('-u', help='use URL-safe alphabet') = False):
        super().__init__(urlsafe=urlsafe)

    @property
    def altchars(self):
        if self.args.urlsafe:
            return B'-_'

    def reverse(self, data):
        return base64.b64encode(data, altchars=self.altchars)

    def process(self, data: bytearray):
        if len(data) <= 1:
            raise ValueError('single byte can not be base64-decoded.')
        data.extend(B'===')
        return base64.b64decode(data, altchars=self.altchars)

Ancestors

Instance variables

var altchars
Expand source code Browse git
@property
def altchars(self):
    if self.args.urlsafe:
        return B'-_'

Inherited members

class b85

This unit is implemented in refinery.units.encoding.b85 and has the following commandline Interface:

usage: b85 [-h] [-L] [-Q] [-0] [-v] [-R]

Base85 encoding and decoding.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class b85(Unit):
    """
    Base85 encoding and decoding.
    """
    def reverse(self, data):
        return base64.b85encode(data)

    def process(self, data):
        return base64.b85decode(data)

Ancestors

Inherited members

class base (base=0, little_endian=False, alphabet=b'')

This unit is implemented in refinery.units.encoding.base and has the following commandline Interface:

usage: base [-h] [-L] [-Q] [-0] [-v] [-R] [-e] [-a STR] [base]

Encodes and decodes integers in arbitrary base.

positional arguments:
  base                 Base to be used for conversion; The value defaults
                       to the length of the alphabet if given, or 0
                       otherwise. Base 0 treats the input as a Python
                       integer literal.

optional arguments:
  -e, --little-endian  Use little endian instead byte order.
  -a, --alphabet STR   The alphabet of digits. Has to have length at least
                       equal to the chosen base. The default is:
                       0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ.

generic options:
  -h, --help           Show this help message and exit.
  -L, --lenient        Allow partial results as output.
  -Q, --quiet          Disables all log output.
  -0, --devnull        Do not produce any output.
  -v, --verbose        Specify up to two times to increase log level.
  -R, --reverse        Use the reverse operation.
Expand source code Browse git
class base(Unit):
    """
    Encodes and decodes integers in arbitrary base.
    """

    _DEFAULT_APHABET = B'0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'

    def __init__(
        self,
        base: arg.number(bound=(2, None), metavar='base', help=(
            'Base to be used for conversion; The value defaults to the length of the alphabet '
            'if given, or 0 otherwise. Base 0 treats the input as a Python integer literal.')) = 0,
        little_endian: arg('-e', help='Use little endian instead byte order.') = False,
        alphabet: arg('-a', metavar='STR', help=(
            'The alphabet of digits. Has to have length at least equal to the chosen base. '
            'The default is: 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ.')) = B'',
    ):
        if alphabet:
            if len(alphabet) < 2:
                raise ValueError('an alphabet with at least two digits is required')
            if not base:
                base = len(alphabet)
        else:
            alphabet = self._DEFAULT_APHABET
        if base and base not in range(2, len(alphabet) + 1):
            raise ValueError(F'base may only be an integer between 2 and {len(alphabet)}')
        super().__init__(base=base, little_endian=little_endian, alphabet=alphabet)

    @property
    def byteorder(self):
        return 'little' if self.args.little_endian else 'big'

    def reverse(self, data):
        self.log_info('using byte order', self.byteorder)
        number = int.from_bytes(data, byteorder=self.byteorder)

        if number == 0:
            return B'0'
        if self.args.base == 0:
            return B'0x%X' % number
        if self.args.base > len(self.args.alphabet):
            raise ValueError(
                F'Only {len(self.args.alphabet)} available; not enough to '
                F'encode base {self.args.base}'
            )

        def reverse_result(number):
            while number:
                yield self.args.alphabet[number % self.args.base]
                number //= self.args.base

        return bytes(reversed(tuple(reverse_result(number))))

    def process(self, data):
        data = data.strip()
        base = self.args.base
        defaults = self._DEFAULT_APHABET[:base]
        alphabet = self.args.alphabet[:base]
        if len(alphabet) == len(defaults):
            if alphabet != defaults:
                self.log_info('translating input data to a default alphabet for faster conversion')
                data = data.translate(bytes.maketrans(alphabet, defaults))
            result = int(data, self.args.base)
        else:
            self.log_warn('very long alphabet, unable to use built-ins; reverting to (slow) fallback.')
            result = 0
            alphabet = {digit: k for k, digit in enumerate(alphabet)}
            for digit in data:
                result *= base
                result += alphabet[digit]
        size, rest = divmod(result.bit_length(), 8)
        size += int(bool(rest))
        return result.to_bytes(size, byteorder=self.byteorder)

Ancestors

Instance variables

var byteorder
Expand source code Browse git
@property
def byteorder(self):
    return 'little' if self.args.little_endian else 'big'

Inherited members

class cp1252

This unit is implemented in refinery.units.encoding.cp1252 and has the following commandline Interface:

usage: cp1252 [-h] [-L] [-Q] [-0] [-v] [-R]

Encodes and decodes Windows CP 1252 (aka Latin1) encoded string data.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class cp1252(Unit):
    """
    Encodes and decodes Windows CP 1252 (aka Latin1) encoded string data.
    """

    def process(self, data):
        return data.decode(self.codec).encode('cp1252')

    def reverse(self, data):
        return data.decode('cp1252').encode(self.codec)

Ancestors

Inherited members

class esc (hex=False, unicode=False, greedy=False, quoted=False, expand=False)

This unit is implemented in refinery.units.encoding.esc and has the following commandline Interface:

usage: esc [-h] [-L] [-Q] [-0] [-v] [-R] [-x] [-u] [-g] [-q] [-p]

Encodes and decodes common ASCII escape sequences.

optional arguments:
  -x, --hex      Hex encode everything, do not use C escape sequences.
  -u, --unicode  Use unicode escape sequences and UTF-8 encoding.
  -g, --greedy   Replace \x by x and \u by u when not followed by two or
                 four hex digits, respectively.
  -q, --quoted   Remove enclosing quotes while decoding and add them for
                 encoding.
  -p, --expand   Decode sequences of the form \uHHLL as two bytes when the
                 upper byte is nonzero.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class esc(Unit):
    """
    Encodes and decodes common ASCII escape sequences.
    """
    _ESCAPE = {
        0x00: BR'\0',
        0x07: BR'\a',
        0x08: BR'\b',
        0x0C: BR'\f',
        0x0A: BR'\n',
        0x0D: BR'\r',
        0x09: BR'\t',
        0x0B: BR'\v',
        0x5C: BR'\\',
        0x27: BR'\'',
        0x22: BR'\"'
    }
    _UNESCAPE = {
        BR'0': B'\x00',
        BR'a': B'\x07',
        BR'b': B'\x08',
        BR'f': B'\x0C',
        BR'n': B'\x0A',
        BR'r': B'\x0D',
        BR't': B'\x09',
        BR'v': B'\x0B',
        B'\\': B'\x5C',
        BR"'": B'\x27',
        BR'"': B'\x22'
    }

    def __init__(self,
        hex     : arg.switch('-x', help='Hex encode everything, do not use C escape sequences.') = False,
        unicode : arg.switch('-u', help='Use unicode escape sequences and UTF-8 encoding.') = False,
        greedy  : arg.switch('-g', help='Replace \\x by x and \\u by u when not followed by two or four hex digits, respectively.') = False,
        quoted  : arg.switch('-q', help='Remove enclosing quotes while decoding and add them for encoding.') = False,
        expand  : arg.switch('-p', help='Decode sequences of the form \\uHHLL as two bytes when the upper byte is nonzero.') = False,
    ) -> Unit: pass  # noqa

    def process(self, data):
        if self.args.quoted:
            quote = data[0]
            if data[~0] != quote:
                raise ValueError('string is not correctly quoted')
            data = data[1:~0]

        if self.args.unicode:
            return data.decode('UNICODE_ESCAPE').encode(self.codec)

        def unescape(match):
            c = match[1]
            if len(c) > 1:
                if c[0] in B'u':  # unicode
                    upper = int(c[1:3], 16)
                    lower = int(c[3:5], 16)
                    if self.args.expand:
                        return bytes((upper, lower))
                    return bytes((lower,))
                if c[0] in B'x':  # hexadecimal
                    return bytes((int(c[1:3], 16),))
            elif c in B'ux':
                return c if self.args.greedy else match[0]
            return self._UNESCAPE.get(c, c)
        data = re.sub(
            RB'\\(u[a-fA-F0-9]{4}|x[a-fA-F0-9]{2}|.)', unescape, data)
        return data

    def reverse(self, data):
        if self.args.unicode:
            string = data.decode(self.codec).encode('UNICODE_ESCAPE')
        else:
            if not self.args.hex:
                def escape(match):
                    c = match[0][0]
                    return self._ESCAPE.get(c, RB'\x%02x' % c)
                string = re.sub(RB'[\x00-\x1F\x22\x27\x5C\x7F-\xFF]', escape, data)
            else:
                string = bytearray(4 * len(data))
                for k in range(len(data)):
                    a = k * 4
                    b = k * 4 + 4
                    string[a:b] = RB'\x%02x' % data[k]
        if self.args.quoted:
            string = B'"%s"' % string
        return string

Ancestors

Inherited members

class hex

This unit is implemented in refinery.units.encoding.hex and has the following commandline Interface:

usage: hex [-h] [-L] [-Q] [-0] [-v] [-R]

Hex-decodes and encodes binary data. Non hex characters are removed from
the input. For decoding, any odd trailing hex digits are stripped as two
hex digits are required to represent a byte.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class hex(Unit):
    """
    Hex-decodes and encodes binary data. Non hex characters are removed from
    the input. For decoding, any odd trailing hex digits are stripped as two
    hex digits are required to represent a byte.
    """

    def reverse(self, data):
        import base64
        return base64.b16encode(data)

    def process(self, data):
        import re
        import base64
        data = re.sub(B'[^A-Fa-f0-9]+', B'', data)
        if len(data) % 2:
            data = data[:-1]
        return base64.b16decode(data, casefold=True)

Ancestors

Inherited members

class html

This unit is implemented in refinery.units.encoding.htm and has the following commandline Interface:

usage: html [-h] [-L] [-Q] [-0] [-v] [-R]

Encodes and decodes common ASCII escape sequences.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class html(Unit):
    """
    Encodes and decodes common ASCII escape sequences.
    """

    @unicoded
    def process(self, data: str) -> str:
        return html_.unescape(data)

    @unicoded
    def reverse(self, data: str) -> str:
        return html_.escape(data)

Ancestors

Inherited members

class netbios (key=b'A')

This unit is implemented in refinery.units.encoding.netbios and has the following commandline Interface:

usage: netbios [-h] [-L] [-Q] [-0] [-v] [-R] [key]

Encodes and decodes strings using the same algorithm that is used for
NetBIOS labels. Each byte 0xUL is encoded as two bytes, which are the sum
of 0xU and 0xL with an offset character, respectively. The default offset
is the capital letter A.

positional arguments:
  key            Provide a single letter to use as the offset.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class netbios(Unit):
    """
    Encodes and decodes strings using the same algorithm that is used for NetBIOS
    labels. Each byte 0xUL is encoded as two bytes, which are the sum of 0xU and
    0xL with an offset character, respectively. The default offset is the capital
    letter A.
    """

    def __init__(self, key: arg(help="Provide a single letter to use as the offset.") = B'A'):
        if len(key) != 1:
            raise ValueError("The key must be a binary string of length exactly 1")
        super().__init__(key=key[0])

    def reverse(self, data):
        result = bytearray(2 * len(data))
        for k, byte in enumerate(data):
            hi, lo = byte >> 4, byte & 15
            result[2 * k + 0] = hi + self.args.key
            result[2 * k + 1] = lo + self.args.key
        return result

    def process(self, data):
        def merge(it):
            while True:
                try:
                    hi = next(it) - self.args.key
                    lo = next(it) - self.args.key
                    if hi not in range(16) or lo not in range(16):
                        raise ValueError(F'Invalid character encoding detected: hi={hi:X}, lo={lo:X}.')
                    yield (hi << 4) | lo
                except StopIteration:
                    break
        return bytearray(merge(iter(data)))

Ancestors

Inherited members

class ps1str

This unit is implemented in refinery.units.encoding.ps1str and has the following commandline Interface:

usage: ps1str [-h] [-L] [-Q] [-0] [-v] [-R]

Escapes and unescapes PowerShell strings.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class ps1str(Unit):
    """
    Escapes and unescapes PowerShell strings.
    """
    UNESCAPE = {
        '`0': '\0',
        '`a': '\a',
        '`b': '\b',
        '`f': '\f',
        '`n': '\n',
        '`r': '\r',
        '`t': '\t',
        '`v': '\v',
        '``': '`',
        "`'": '\'',
        '`"': '\"',
    }
    ESCAPE = {
        '`' : '``',
        '$' : '`$',
        '\0': '`0',
        '\a': '`a',
        '\b': '`b',
        '\f': '`f',
        '\n': '`n',
        '\r': '`r',
        '\t': '`t',
        '\v': '`v',
        '\'': "`'",
        '\"': '""',
    }

    def __init__(self): pass

    @unicoded
    def process(self, data):
        match = re.fullmatch(R'''@(['"])\s*\n(.*?)\n\s*\1@''', data)
        if match:
            return match.group(2)
        if data[0] not in ''''"''' or data[-1] != data[0]:
            raise ValueError(
                'No quotes found at beginning of input. To escape a PowerShell string, the '
                'quotes must be included because quote escaping depends on whether a single '
                'or double quote was used.')

        quote, data = data[0], data[1:-1]

        def unescape(match):
            string = match[0]
            return self.UNESCAPE.get(string, string[1:])

        if quote == '"':
            if re.search(R'(?<!`)\$(?=[\w\(\{\$\?\^:])', data):
                self.log_warn('Loss of information: double quoted string contains variable substitutions.')
            data = re.sub('`.', unescape, data)

        return data.replace(quote + quote, quote)

    @unicoded
    def reverse(self, data):
        def escaper(match):
            char = match[0]
            return ps1str.ESCAPE.get(char, char)
        return '"{}"'.format(re.sub(R'''[\x00\x07-\x0D`$'"]''', escaper, data))

Ancestors

Class variables

var UNESCAPE
var ESCAPE

Inherited members

class recode (decode=None, encode='UTF8', decerr=None, encerr=None, errors=None)

This unit is implemented in refinery.units.encoding.recode and has the following commandline Interface:

usage: recode [-h] [-L] [-Q] [-0] [-v] [-R] [-d Handler] [-e Handler]
              [-E Handler]
              [decode-as] [encode-as]

Expects input string data encoded in the from encoding and encodes it in
the to encoding, then outputs the result.

positional arguments:
  decode-as             Input encoding; Guess encoding by default.
  encode-as             Output encoding; The default is UTF8.

optional arguments:
  -d, --decerr Handler  Specify an error handler for decoding.
  -e, --encerr Handler  Specify an error handler for encoding.
  -E, --errors Handler  Specify an error handler for both encoding and
                        decoding. The possible choices are the following:
                        STRICT, IGNORE, REPLACE, XMLREF, BACKSLASH,
                        SURROGATE

generic options:
  -h, --help            Show this help message and exit.
  -L, --lenient         Allow partial results as output.
  -Q, --quiet           Disables all log output.
  -0, --devnull         Do not produce any output.
  -v, --verbose         Specify up to two times to increase log level.
  -R, --reverse         Use the reverse operation.
Expand source code Browse git
class recode(Unit):
    """
    Expects input string data encoded in the `from` encoding and encodes it in
    the `to` encoding, then outputs the result.
    """

    def __init__(
        self,
        decode: arg(metavar='decode-as', type=str, help='Input encoding; Guess encoding by default.') = None,
        encode: arg(metavar='encode-as', type=str, help=F'Output encoding; The default is {Unit.codec}.') = Unit.codec,
        decerr: arg.option('-d', choices=Handler,
            help='Specify an error handler for decoding.') = None,
        encerr: arg.option('-e', choices=Handler,
            help='Specify an error handler for encoding.') = None,
        errors: arg.option('-E', choices=Handler, help=(
            'Specify an error handler for both encoding and decoding. '
            'The possible choices are the following: {choices}')) = None,
    ):
        super().__init__(
            decode=decode,
            encode=encode,
            decerr=arg.as_option(decerr or errors or 'STRICT', Handler).value,
            encerr=arg.as_option(encerr or errors or 'STRICT', Handler).value
        )

    def _detect(self, data):
        mv = memoryview(data)
        if not any(mv[1::2]): return 'utf-16le'
        if not any(mv[0::2]): return 'utf-16be'
        import chardet
        detection = chardet.detect(data)
        codec = detection['encoding']
        self.log_info(lambda: F'Using input encoding: {codec}, detected with {int(detection["confidence"]*100)}% confidence.')
        return codec

    def _recode(self, enc, dec, encerr, decerr, data):
        dec = dec or self._detect(data)
        return codecs.encode(codecs.decode(data, dec, errors=decerr), enc, errors=encerr)

    def reverse(self, data):
        return self._recode(self.args.decode, self.args.encode, self.args.decerr, self.args.encerr, data)

    def process(self, data):
        return self._recode(self.args.encode, self.args.decode, self.args.encerr, self.args.decerr, data)

Ancestors

Inherited members

class u16

This unit is implemented in refinery.units.encoding.u16 and has the following commandline Interface:

usage: u16 [-h] [-L] [-Q] [-0] [-v] [-R]

Encodes and decodes UTF-16LE encoded string data.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class u16(Unit):
    """
    Encodes and decodes UTF-16LE encoded string data.
    """

    def reverse(self, data):
        return data.decode(self.codec).encode('utf-16LE')

    def process(self, data):
        return data.decode('utf-16LE').encode(self.codec)

Ancestors

Inherited members

class url (plus=False, hex=False)

This unit is implemented in refinery.units.encoding.url and has the following commandline Interface:

usage: url [-h] [-L] [-Q] [-0] [-v] [-R] [-p] [-x]

Decodes and encodes URL-Encoding, which preserves only alphanumeric
characters and the symbols _, ., -, ~, \, and /. Every other character is
escaped by hex-encoding it and prefixing it with a percent symbol.

optional arguments:
  -p, --plus     also replace plus signs by spaces
  -x, --hex      hex encode every character in reverse mode

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class url(Unit):
    """
    Decodes and encodes URL-Encoding, which preserves only alphanumeric
    characters and the symbols `_`, `.`, `-`, `~`, `\\`, and `/`.
    Every other character is escaped by hex-encoding it and prefixing it
    with a percent symbol.
    """

    def __init__(
        self,
        plus: arg.switch('-p', help='also replace plus signs by spaces') = False,
        hex : arg.switch('-x', help='hex encode every character in reverse mode') = False
    ):
        super().__init__(plus=plus, hex=hex)

    def process(self, data):
        data = re.sub(
            B'\\%([0-9a-fA-F]{2})',
            lambda m: bytes((int(m[1], 16),)),
            data
        )
        if self.args.plus:
            data = data.replace(B'+', B' ')
        return data

    def reverse(self, data):
        if self.args.plus:
            data = data.replace(B' ', B'+')
        if not self.args.hex:
            return re.sub(B'[^a-zA-Z0-9_.-~\\/]', lambda m: B'%%%02X' % ord(m[0]), data)
        result = bytearray(len(data) * 3)
        offset = 0
        for byte in data:
            result[offset] = B'%'[0]
            offset += 1
            result[offset:offset + 2] = B'%02X' % byte
            offset += 2
        return result

Ancestors

Inherited members

class wshenc (marker=True)

This unit is implemented in refinery.units.encoding.wshenc and has the following commandline Interface:

usage: wshenc [-h] [-L] [-Q] [-0] [-v] [-R] [-m]

Windows Scripting Host encoding and decoding of VBScript (VBS/VBE) and
JScript (JS/JSE).

optional arguments:
  -m, --no-marker  Do not require magic marker when encoding and do not
                   search for marker when decoding.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
  -R, --reverse    Use the reverse operation.
Expand source code Browse git
class wshenc(Unit):
    """
    Windows Scripting Host encoding and decoding of VBScript (VBS/VBE) and JScript (JS/JSE).
    """

    _MARKER_INIT = RB'#@~^BINREF=='
    _MARKER_STOP = RB'BINREF==^#~@'

    _CHUNKS = (
        0x57, 0x6E, 0x7B, 0x4A, 0x4C, 0x41, 0x0B, 0x0B, 0x0B, 0x0C, 0x0C, 0x0C, 0x4A, 0x4C, 0x41,
        0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x12, 0x12, 0x12,
        0x13, 0x13, 0x13, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17,
        0x18, 0x18, 0x18, 0x19, 0x19, 0x19, 0x1A, 0x1A, 0x1A, 0x1B, 0x1B, 0x1B, 0x1C, 0x1C, 0x1C,
        0x1D, 0x1D, 0x1D, 0x1E, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F, 0x2E, 0x2D, 0x32, 0x47, 0x75, 0x30,
        0x7A, 0x52, 0x21, 0x56, 0x60, 0x29, 0x42, 0x71, 0x5B, 0x6A, 0x5E, 0x38, 0x2F, 0x49, 0x33,
        0x26, 0x5C, 0x3D, 0x49, 0x62, 0x58, 0x41, 0x7D, 0x3A, 0x34, 0x29, 0x35, 0x32, 0x36, 0x65,
        0x5B, 0x20, 0x39, 0x76, 0x7C, 0x5C, 0x72, 0x7A, 0x56, 0x43, 0x7F, 0x73, 0x38, 0x6B, 0x66,
        0x39, 0x63, 0x4E, 0x70, 0x33, 0x45, 0x45, 0x2B, 0x6B, 0x68, 0x68, 0x62, 0x71, 0x51, 0x59,
        0x4F, 0x66, 0x78, 0x09, 0x76, 0x5E, 0x62, 0x31, 0x7D, 0x44, 0x64, 0x4A, 0x23, 0x54, 0x6D,
        0x75, 0x43, 0x71, 0x4A, 0x4C, 0x41, 0x7E, 0x3A, 0x60, 0x4A, 0x4C, 0x41, 0x5E, 0x7E, 0x53,
        0x40, 0x4C, 0x40, 0x77, 0x45, 0x42, 0x4A, 0x2C, 0x27, 0x61, 0x2A, 0x48, 0x5D, 0x74, 0x72,
        0x22, 0x27, 0x75, 0x4B, 0x37, 0x31, 0x6F, 0x44, 0x37, 0x4E, 0x79, 0x4D, 0x3B, 0x59, 0x52,
        0x4C, 0x2F, 0x22, 0x50, 0x6F, 0x54, 0x67, 0x26, 0x6A, 0x2A, 0x72, 0x47, 0x7D, 0x6A, 0x64,
        0x74, 0x39, 0x2D, 0x54, 0x7B, 0x20, 0x2B, 0x3F, 0x7F, 0x2D, 0x38, 0x2E, 0x2C, 0x77, 0x4C,
        0x30, 0x67, 0x5D, 0x6E, 0x53, 0x7E, 0x6B, 0x47, 0x6C, 0x66, 0x34, 0x6F, 0x35, 0x78, 0x79,
        0x25, 0x5D, 0x74, 0x21, 0x30, 0x43, 0x64, 0x23, 0x26, 0x4D, 0x5A, 0x76, 0x52, 0x5B, 0x25,
        0x63, 0x6C, 0x24, 0x3F, 0x48, 0x2B, 0x7B, 0x55, 0x28, 0x78, 0x70, 0x23, 0x29, 0x69, 0x41,
        0x28, 0x2E, 0x34, 0x73, 0x4C, 0x09, 0x59, 0x21, 0x2A, 0x33, 0x24, 0x44, 0x7F, 0x4E, 0x3F,
        0x6D, 0x50, 0x77, 0x55, 0x09, 0x3B, 0x53, 0x56, 0x55, 0x7C, 0x73, 0x69, 0x3A, 0x35, 0x61,
        0x5F, 0x61, 0x63, 0x65, 0x4B, 0x50, 0x46, 0x58, 0x67, 0x58, 0x3B, 0x51, 0x31, 0x57, 0x49,
        0x69, 0x22, 0x4F, 0x6C, 0x6D, 0x46, 0x5A, 0x4D, 0x68, 0x48, 0x25, 0x7C, 0x27, 0x28, 0x36,
        0x5C, 0x46, 0x70, 0x3D, 0x4A, 0x6E, 0x24, 0x32, 0x7A, 0x79, 0x41, 0x2F, 0x37, 0x3D, 0x5F,
        0x60, 0x5F, 0x4B, 0x51, 0x4F, 0x5A, 0x20, 0x42, 0x2C, 0x36, 0x65, 0x57)
    _OFFSETS = (
        0, 1, 2, 0, 1, 2, 1, 2, 2, 1, 2, 1, 0, 2, 1, 2, 0, 2, 1, 2, 0, 0, 1, 2, 2, 1, 0, 2, 1, 2, 2, 1,
        0, 0, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 2, 0, 2, 1, 0, 2, 1, 2, 0, 0, 1, 2, 2, 0, 0, 1, 2, 0, 2, 1)
    _ENCODER = {
        0x09 : [0x37, 0x69, 0x64], 0x0B : [0x0B, 0x0B, 0x0B], 0x0C : [0x0C, 0x0C, 0x0C],
        0x0E : [0x0E, 0x0E, 0x0E], 0x0F : [0x0F, 0x0F, 0x0F], 0x10 : [0x10, 0x10, 0x10],
        0x11 : [0x11, 0x11, 0x11], 0x12 : [0x12, 0x12, 0x12], 0x13 : [0x13, 0x13, 0x13],
        0x14 : [0x14, 0x14, 0x14], 0x15 : [0x15, 0x15, 0x15], 0x16 : [0x16, 0x16, 0x16],
        0x17 : [0x17, 0x17, 0x17], 0x18 : [0x18, 0x18, 0x18], 0x19 : [0x19, 0x19, 0x19],
        0x1A : [0x1A, 0x1A, 0x1A], 0x1B : [0x1B, 0x1B, 0x1B], 0x1C : [0x1C, 0x1C, 0x1C],
        0x1D : [0x1D, 0x1D, 0x1D], 0x1E : [0x1E, 0x1E, 0x1E], 0x1F : [0x1F, 0x1F, 0x1F],
        0x20 : [0x7E, 0x2C, 0x50], 0x21 : [0x5A, 0x65, 0x22], 0x22 : [0x45, 0x72, 0x4A],
        0x23 : [0x3A, 0x5B, 0x61], 0x24 : [0x79, 0x66, 0x5E], 0x25 : [0x59, 0x75, 0x5D],
        0x26 : [0x27, 0x4C, 0x5B], 0x27 : [0x76, 0x45, 0x42], 0x28 : [0x63, 0x76, 0x60],
        0x29 : [0x62, 0x2A, 0x23], 0x2A : [0x4D, 0x43, 0x65], 0x2B : [0x51, 0x33, 0x5F],
        0x2C : [0x53, 0x42, 0x7E], 0x2D : [0x52, 0x20, 0x4F], 0x2E : [0x20, 0x63, 0x52],
        0x2F : [0x26, 0x4A, 0x7A], 0x30 : [0x54, 0x5A, 0x21], 0x31 : [0x71, 0x38, 0x46],
        0x32 : [0x2B, 0x79, 0x20], 0x33 : [0x66, 0x32, 0x26], 0x34 : [0x2A, 0x57, 0x63],
        0x35 : [0x58, 0x6C, 0x2A], 0x36 : [0x7F, 0x2B, 0x76], 0x37 : [0x7B, 0x46, 0x47],
        0x38 : [0x30, 0x52, 0x25], 0x39 : [0x31, 0x4F, 0x2C], 0x3A : [0x6C, 0x3D, 0x29],
        0x3B : [0x49, 0x70, 0x69], 0x3D : [0x78, 0x7B, 0x27], 0x3F : [0x5F, 0x51, 0x67],
        0x40 : [0x40, None, 0x40], 0x41 : [0x29, 0x7A, 0x62], 0x42 : [0x24, 0x7E, 0x41],
        0x43 : [0x2F, 0x3B, 0x5A], 0x44 : [0x39, 0x47, 0x66], 0x45 : [0x33, 0x41, 0x32],
        0x46 : [0x6F, 0x77, 0x73], 0x47 : [0x21, 0x56, 0x4D], 0x48 : [0x75, 0x5F, 0x43],
        0x49 : [0x28, 0x26, 0x71], 0x4A : [0x42, 0x78, 0x39], 0x4B : [0x46, 0x6E, 0x7C],
        0x4C : [0x4A, 0x64, 0x53], 0x4D : [0x5C, 0x74, 0x48], 0x4E : [0x48, 0x67, 0x31],
        0x4F : [0x36, 0x7D, 0x72], 0x50 : [0x4B, 0x68, 0x6E], 0x51 : [0x7D, 0x35, 0x70],
        0x52 : [0x5D, 0x22, 0x49], 0x53 : [0x6A, 0x55, 0x3F], 0x54 : [0x50, 0x3A, 0x4B],
        0x55 : [0x69, 0x60, 0x6A], 0x56 : [0x23, 0x6A, 0x2E], 0x57 : [0x09, 0x71, 0x7F],
        0x58 : [0x70, 0x6F, 0x28], 0x59 : [0x65, 0x49, 0x35], 0x5A : [0x74, 0x5C, 0x7D],
        0x5B : [0x2C, 0x5D, 0x24], 0x5C : [0x77, 0x27, 0x2D], 0x5D : [0x44, 0x59, 0x54],
        0x5E : [0x3F, 0x25, 0x37], 0x5F : [0x6D, 0x7C, 0x7B], 0x60 : [0x7C, 0x23, 0x3D],
        0x61 : [0x43, 0x6D, 0x6C], 0x62 : [0x38, 0x28, 0x34], 0x63 : [0x5E, 0x31, 0x6D],
        0x64 : [0x5B, 0x39, 0x4E], 0x65 : [0x6E, 0x7F, 0x2B], 0x66 : [0x57, 0x36, 0x30],
        0x67 : [0x4C, 0x54, 0x6F], 0x68 : [0x34, 0x34, 0x74], 0x69 : [0x72, 0x62, 0x6B],
        0x6A : [0x25, 0x4E, 0x4C], 0x6B : [0x56, 0x30, 0x33], 0x6C : [0x73, 0x5E, 0x56],
        0x6D : [0x68, 0x73, 0x3A], 0x6E : [0x55, 0x09, 0x78], 0x6F : [0x47, 0x4B, 0x57],
        0x70 : [0x32, 0x61, 0x77], 0x71 : [0x35, 0x24, 0x3B], 0x72 : [0x2E, 0x4D, 0x44],
        0x73 : [0x64, 0x6B, 0x2F], 0x74 : [0x4F, 0x44, 0x59], 0x75 : [0x3B, 0x21, 0x45],
        0x76 : [0x2D, 0x37, 0x5C], 0x77 : [0x41, 0x53, 0x68], 0x78 : [0x61, 0x58, 0x36],
        0x79 : [0x7A, 0x48, 0x58], 0x7A : [0x22, 0x2E, 0x79], 0x7B : [0x60, 0x50, 0x09],
        0x7C : [0x6B, 0x2D, 0x75], 0x7D : [0x4E, 0x29, 0x38], 0x7E : [0x3D, 0x3F, 0x55],
        0x7F : [0x67, 0x2F, 0x51]
    }

    _ESCAPE = {
        0x40: B'@$',
        0x3C: B'@!',
        0x3E: B'@*',
        0x0D: B'@#',
        0x0A: B'@&',
    }

    _UNESCAPE = {
        B'@$': B'@',
        B'@!': B'<',
        B'@*': B'>',
        B'@#': B'\r',
        B'@&': B'\n',
    }

    def __init__(
        self,
        marker: arg.switch('-m', '--no-marker', off=True, help=(
            'Do not require magic marker when encoding and do not search for '
            'marker when decoding.')
        ) = True
    ):
        super().__init__(marker=marker)

    @classmethod
    def _chunk(cls, byte, index):
        k = byte - 9
        c = cls._CHUNKS[k * 3 : k * 3 + 3]
        return c[cls._OFFSETS[index % 64]]

    def _escape(self, iterable):
        escapes = bytes(self._ESCAPE)
        if self.args.marker:
            yield from self._MARKER_INIT
        for byte in iterable:
            if byte in escapes:
                yield from self._ESCAPE[byte]
            else:
                yield byte
        if self.args.marker:
            yield from self._MARKER_STOP

    def _unescape(self, data):
        def unescaper(m): return self._UNESCAPE[m[0]]
        return re.sub(RB'@[$!*#&]', unescaper, data)

    @classmethod
    def _decoded(cls, data):
        index = -1
        for byte in data:
            if byte < 128:
                index += 1
            if (byte == 9 or 31 < byte < 128) and byte != 60 and byte != 62 and byte != 64:
                byte = cls._chunk(byte, index)
            yield byte

    @classmethod
    def _encoded(cls, data):
        for i, byte in enumerate(data):
            try:
                sequence = cls._ENCODER[byte]
            except KeyError:
                yield byte
            else:
                offset = cls._OFFSETS[i % 0x40]
                yield sequence[offset]

    def reverse(self, data):
        return bytearray(self._escape(self._encoded(data)))

    def process(self, data):
        if self.args.marker:
            match = formats.vbe.search(data)
            if not match:
                raise ValueError('Encoded script marker was not found.')
            data = match[0][12:-12]
        return bytearray(self._decoded(self._unescape(data)))

Ancestors

Inherited members

class xt7z (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', pwd=b'')

This unit is implemented in refinery.units.formats.archive.xt7z and has the following commandline Interface:

usage: xt7z [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
            [-p PWD]
            [path [path ...]]

Extract files from a 7zip archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -p, --pwd PWD    Optionally specify an extraction password.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xt7z(ArchiveUnit):
    """
    Extract files from a 7zip archive.
    """
    def unpack(self, data):

        def mk7z(**keywords):
            return py7zr.SevenZipFile(MemoryFile(mv[zp:]), **keywords)

        pwd = self.args.pwd
        mv = memoryview(data)
        zp = max(0, data.find(B'7z\xBC\xAF\x27\x1C'))

        if pwd:
            archive = mk7z(password=pwd.decode(self.codec))
        else:
            archive = mk7z()
            for pwd in self._COMMON_PASSWORDS:
                try:
                    problem = archive.testzip()
                except py7zr.PasswordRequired:
                    problem = True
                if not problem:
                    break
                self.log_debug(F'trying password: {pwd}')
                archive = mk7z(password=pwd)

        for info in archive.list():
            def extract(archive: py7zr.SevenZipFile = archive, info: py7zr.FileInfo = info):
                archive.reset()
                return archive.read(info.filename).get(info.filename).read()
            if info.is_directory:
                continue
            yield self._pack(info.filename, info.creationtime, extract)

Ancestors

Inherited members

class xtace (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', pwd=b'')

This unit is implemented in refinery.units.formats.archive.xtace and has the following commandline Interface:

usage: xtace [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
             [-p PWD]
             [path [path ...]]

Extract files from an ACE archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -p, --pwd PWD    Optionally specify an extraction password.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtace(ArchiveUnit):
    """
    Extract files from an ACE archive.
    """
    def unpack(self, data):
        ace = acefile.open(MemoryFile(data, read_as_bytes=True))
        for member in ace.getmembers():
            member: acefile.AceMember
            comment = {} if not member.comment else {'comment': member.comment}
            yield self._pack(
                member.filename,
                member.datetime,
                lambda a=ace, m=member: a.read(m, pwd=self.args.pwd),
                **comment
            )

Ancestors

Inherited members

class xtcpio (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', pwd=b'')

This unit is implemented in refinery.units.formats.archive.xtcpio and has the following commandline Interface:

usage: xtcpio [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
              [-p PWD]
              [path [path ...]]

Extract files from a CPIO archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -p, --pwd PWD    Optionally specify an extraction password.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtcpio(ArchiveUnit):
    """
    Extract files from a CPIO archive.
    """
    def unpack(self, data):
        def cpio():
            with suppress(EOF): return CPIOEntry(reader)
        reader = StructReader(memoryview(data))
        for entry in iter(cpio, None):
            if entry.name == 'TRAILER!!!':
                break
            yield self._pack(entry.name, entry.mtime, entry.data)

Ancestors

Inherited members

class xtiso (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', fs='auto')

This unit is implemented in refinery.units.formats.archive.xtiso and has the following commandline Interface:

usage: xtiso [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
             [-s TYPE]
             [path [path ...]]

Extract files from a ISO archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -s, --fs TYPE    Specify a file system (udf, joliet, rr, iso, auto)
                   extension to use. The default setting auto will
                   automatically detect the first of the other available
                   options and use it.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtiso(ArchiveUnit):
    """
    Extract files from a ISO archive.
    """
    def __init__(self, *paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date',
        fs: arg.choice('-s', metavar='TYPE', choices=_ISO_FILE_SYSTEMS, help=(
            'Specify a file system ({choices}) extension to use. The default setting {default} will automatically '
            'detect the first of the other available options and use it.')) = 'auto'
    ):
        if fs not in _ISO_FILE_SYSTEMS:
            raise ValueError(F'invalid file system {fs}: must be udf, joliet, rr, iso, or auto.')
        super().__init__(*paths, list=list, join_path=join_path, drop_path=drop_path, path=path, date=date, fs=fs)

    def unpack(self, data):
        with MemoryFile(data) as stream:
            iso = pycdlib.PyCdlib()
            iso.open_fp(stream)
            fs = self.args.fs
            if fs != 'auto':
                mkfacade = {
                    'iso'    : iso.get_iso9660_facade,
                    'udf'    : iso.get_udf_facade,
                    'joliet' : iso.get_joliet_facade,
                    'rr'     : iso.get_rock_ridge_facade,
                }
                facade = mkfacade[fs]()
            elif iso.has_udf():
                facade = iso.get_udf_facade()
            elif iso.has_joliet():
                facade = iso.get_joliet_facade()
            elif iso.has_rock_ridge():
                facade = iso.get_rock_ridge_facade()
            else:
                facade = iso.get_iso9660_facade()

            for root, _, files in facade.walk('/'):
                root = root.rstrip('/')
                for name in files:
                    name = name.lstrip('/')
                    path = F'{root}/{name}'
                    try:
                        info = facade.get_record(path)
                    except Exception:
                        info = None
                        date = None
                    else:
                        date = datetime.datetime(
                            info.date.years_since_1900 + 1900,
                            info.date.month,
                            info.date.day_of_month,
                            info.date.hour,
                            info.date.minute,
                            info.date.second,
                            tzinfo=datetime.timezone(datetime.timedelta(minutes=15 * info.date.gmtoffset))
                        )

                    def extract(info=info):
                        if info:
                            buffer = MemoryFile(bytearray(info.data_length))
                        else:
                            buffer = MemoryFile(bytearray())
                        facade.get_file_from_iso_fp(buffer, path)
                        return buffer.getvalue()

                    yield self._pack(path, date, extract)

Ancestors

Inherited members

class xttar (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date')

This unit is implemented in refinery.units.formats.archive.xttar and has the following commandline Interface:

usage: xttar [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
             [path [path ...]]

Extract files from a Tar archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xttar(ArchiveUnit):
    """
    Extract files from a Tar archive.
    """
    def __init__(self, *paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date'):
        super().__init__(*paths, list=list, join_path=join_path, drop_path=drop_path, path=path, date=date)

    def unpack(self, data):
        archive = tarfile.open(fileobj=MemoryFile(data))
        for info in archive.getmembers():
            if not info.isfile():
                continue
            extractor = archive.extractfile(info)
            if extractor is None:
                continue
            date = datetime.datetime.fromtimestamp(info.mtime)
            yield self._pack(info.name, date, lambda e=extractor: e.read())

Ancestors

Inherited members

class xtzip (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', pwd=b'')

This unit is implemented in refinery.units.formats.archive.xtzip and has the following commandline Interface:

usage: xtzip [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
             [-p PWD]
             [path [path ...]]

Extract files from a Zip archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -p, --pwd PWD    Optionally specify an extraction password.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtzip(ArchiveUnit):
    """
    Extract files from a Zip archive.
    """
    def unpack(self, data):
        password = self.args.pwd.decode(self.codec)
        archive = ZipFile(MemoryFile(data))

        if password:
            archive.setpassword(self.args.pwd)
        else:
            def password_invalid(pwd: Optional[str]):
                if pwd is not None:
                    archive.setpassword(pwd.encode(self.codec))
                try:
                    archive.testzip()
                except RuntimeError as E:
                    if 'password' not in str(E):
                        raise
                    return True
                else:
                    self.log_debug(pwd)
                    return False
            for pwd in [None, *self._COMMON_PASSWORDS]:
                if not password_invalid(pwd):
                    break
            else:
                raise RuntimeError('Archive is password-protected.')

        for info in archive.infolist():
            def xt(archive: ZipFile = archive, info: ZipInfo = info):
                try:
                    return archive.read(info.filename)
                except RuntimeError as E:
                    if 'password' not in str(E):
                        raise
                    if not password:
                        raise RuntimeError('archive is password-protected')
                    else:
                        raise RuntimeError(F'invalid password: {password}') from E
            if info.is_dir():
                continue
            try:
                date = datetime(*info.date_time)
            except Exception:
                date = None
            yield self._pack(info.filename, date, xt)

Ancestors

Inherited members

class dsphp

This unit is implemented in refinery.units.formats.deserialize_php and has the following commandline Interface:

usage: dsphp [-h] [-L] [-Q] [-0] [-v]

Deserialize PHP serialized data and re-serialize as JSON.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class dsphp(Unit):
    """
    Deserialize PHP serialized data and re-serialize as JSON.
    """

    class _encoder(json.JSONEncoder):
        def default(self, obj):
            try:
                return super().default(obj)
            except TypeError:
                pass
            if isinstance(obj, bytes) or isinstance(obj, bytearray):
                return obj.decode('utf8')
            if isinstance(obj, php.phpobject):
                return obj._asdict()

    def process(self, data):
        return json.dumps(
            php.loads(
                data,
                object_hook=php.phpobject,
                decode_strings=True
            ),
            indent=4,
            cls=self._encoder
        ).encode(self.codec)

Ancestors

Inherited members

class xtmail (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.email and has the following commandline Interface:

usage: xtmail [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
              [path [path ...]]

Extract files and body from EMail messages. The unit supports both the
Outlook message format and regular MIME documents.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtmail(PathExtractorUnit):
    """
    Extract files and body from EMail messages. The unit supports both the Outlook message format
    and regular MIME documents.
    """
    def _get_headparts(self, head):
        mw = mimewords()
        mw = partial(mw.process.__wrapped__.__wrapped__, mw)
        jh = defaultdict(list)
        for key, value in head:
            jh[key].append(mw(''.join(t.lstrip() for t in value.splitlines(False))))
        jh = {k: v[0] if len(v) == 1 else [t for t in v if t] for k, v in jh.items()}
        yield UnpackResult('headers.txt',
            lambda h=head: '\n'.join(F'{k}: {v}' for k, v in h).encode(self.codec))
        yield UnpackResult('headers.json',
            lambda jsn=jh: json.dumps(jsn, indent=4).encode(self.codec))

    def _get_parts_outlook(self, data):
        def ensure_bytes(data):
            return data if isinstance(data, bytes) else data.encode(self.codec)

        def make_message(name, msg):
            if msg.body:
                yield UnpackResult(F'{name}.txt', ensure_bytes(msg.body))
            if msg.htmlBody:
                yield UnpackResult(F'{name}.htm', ensure_bytes(msg.htmlBody))

        msgcount = 0

        with NoLogging:
            with Message(bytes(data)) as msg:
                yield from self._get_headparts(msg.header.items())
                yield from make_message('body', msg)
                for attachment in msg.attachments:
                    if attachment.type == 'msg':
                        msgcount += 1
                        yield from make_message(F'attachments/msg_{msgcount:d}', attachment.data)
                        continue
                    if not isbuffer(attachment.data):
                        self.log_warn(F'unknown attachment of type {attachment.type}, please report this!')
                        continue
                    path = attachment.longFilename or attachment.shortFilename
                    yield UnpackResult(F'attachments/{path}', attachment.data)

    def _get_parts_regular(self, data):
        msg = BytesParser().parsebytes(data)

        yield from self._get_headparts(msg.items())

        for k, part in enumerate(msg.walk()):
            path = part.get_filename()
            elog = None
            if path is None:
                extension = file_extension(part.get_content_type(), 'txt')
                path = F'body.{extension}'
            else:
                path = F'attachments/{path}'
            try:
                data = part.get_payload(decode=True)
            except Exception as E:
                try:
                    data = part.get_payload(decode=False)
                except Exception as E:
                    elog = str(E)
                    data = None
                else:
                    from refinery import carve
                    self.log_warn(F'manually decoding part {k}, data might be corrupted: {path}')
                    if isinstance(data, str):
                        data = data.encode('latin1')
                    if isbuffer(data):
                        data = next(data | carve('b64', stripspace=True, single=True, decode=True))
                    else:
                        elog = str(E)
                        data = None
            if not data:
                if elog is not None:
                    self.log_warn(F'could not get content of message part {k}: {elog!s}')
                continue
            yield UnpackResult(path, data)

    def unpack(self, data):
        try:
            yield from self._get_parts_outlook(data)
        except Exception:
            self.log_debug('failed parsing input as Outlook message')
            yield from self._get_parts_regular(data)

Ancestors

Inherited members

class vsect (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.exe.vsect and has the following commandline Interface:

usage: vsect [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
             [path [path ...]]

Extract sections/segments from PE, ELF, and MachO executables.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class vsect(PathExtractorUnit):
    """
    Extract sections/segments from PE, ELF, and MachO executables.
    """
    def unpack(self, data):
        mv = memoryview(data)
        for name, start, size in exeroute(
            data,
            self._unpack_elf,
            self._unpack_macho,
            self._unpack_pe
        ):
            end = start + size
            yield UnpackResult(name, mv[start:end])

    @staticmethod
    def _ascii(string: bytes) -> str:
        term = string.find(0)
        if term >= 0:
            string = string[:term]
        return string.decode('latin-1')

    def _unpack_pe(self, pe):
        for section in pe.sections:
            yield self._ascii(section.Name), section.PointerToRawData, section.SizeOfRawData

    def _unpack_elf(self, elf):
        for section in elf.iter_sections():
            if section.is_null():
                continue
            yield section.name, section['sh_offset'], section.data_size

    def _unpack_macho(self, macho):
        for header in macho.headers:
            for command in header.commands:
                header, segment, sections = command
                if not header.get_cmd_name().startswith('LC_SEGMENT'):
                    continue
                segname = self._ascii(segment.segname)
                yield segname, segment.fileoff, segment.filesize
                for section in sections:
                    secname = F'{segname}/{self._ascii(section.sectname)}'
                    yield secname, section.offset, section.size

Ancestors

Inherited members

class vsnip (addresses, ascii=False, utf16=False, until=b'', base=None)

This unit is implemented in refinery.units.formats.exe.vsnip and has the following commandline Interface:

usage: vsnip [-h] [-L] [-Q] [-0] [-v] [-a | -u | -t B] [-b ADDR]
             start:count:align [start:count:align ...]

Extract data from PE, ELF, and MachO files based on virtual offsets.

positional arguments:
  start:count:align  Use Python slice syntax to describe an area of
                     virtual memory to read. If a chunksize is specified,
                     then the unit will always read a multiple of that
                     number of bytes

optional arguments:
  -a, --ascii        Read ASCII strings; equivalent to -th:00
  -u, --utf16        Read UTF16 strings; equivalent to -th:0000 (also sets
                     chunksize to 2)
  -t, --until B      Read until sequence B is read.
  -b, --base ADDR    Optionally specify a custom base address B.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class vsnip(Unit):
    """
    Extract data from PE, ELF, and MachO files based on virtual offsets.
    """

    def __init__(
        self, addresses: arg(type=sliceobj, nargs='+', metavar='start:count:align', help=(
            'Use Python slice syntax to describe an area of virtual memory to read. If a chunksize is '
            'specified, then the unit will always read a multiple of that number of bytes')),
        ascii: arg.switch('-a', group='END', help='Read ASCII strings; equivalent to -th:00') = False,
        utf16: arg.switch('-u', group='END', help='Read UTF16 strings; equivalent to -th:0000 (also sets chunksize to 2)') = False,
        until: arg.binary('-t', group='END', help='Read until sequence {varname} is read.') = B'',
        base : arg.number('-b', metavar='ADDR', help='Optionally specify a custom base address B.') = None,
    ):
        if sum(1 for t in (until, utf16, ascii) if t) > 1:
            raise ValueError('Only one of utf16, ascii, and until can be specified.')
        return super().__init__(addresses=addresses, utf16=utf16, ascii=ascii, until=until, base=base)

    def process(self, data):
        until = self.args.until
        addrs = self.args.addresses
        if self.args.ascii:
            until = B'\0'
        if self.args.utf16:
            until = B'\0\0'
            addrs = (slice(a.start, a.stop, 2) for a in addrs)

        for addr in addrs:
            area = MemoryArea(addr)
            offset, lbound = exeroute(
                data,
                self._get_buffer_range_elf,
                self._get_buffer_range_macho,
                self._get_buffer_range_pe,
                area.start
            )

            lbound = lbound or len(data)

            if not until:
                end = lbound
            else:
                end = offset - 1
                align = area.align
                while True:
                    end = data.find(until, end + 1)
                    if end not in range(offset, lbound):
                        raise EndOfStringNotFound
                    if (end - offset) % align == 0:
                        break

            if area.count:
                end = min(end, offset + area.count)

            yield data[offset:end]

    def _rebase(self, addr, truebase):
        self.log_info(F'using base address: 0x{truebase:X}')
        if self.args.base is None:
            return addr
        rebased = addr - self.args.base + truebase
        self.log_info(F'rebased to address: 0x{rebased:X}')
        return rebased

    def _get_buffer_range_elf(self, elf, address):
        addr = self._rebase(
            address,
            min(s.header.p_vaddr for s in elf.iter_segments() if s.header.p_type == 'PT_LOAD')
        )
        for segment in elf.iter_segments():
            begin = segment.header.p_vaddr
            size = segment.header.p_memsz
            delta = addr - begin
            if delta in range(size + 1):
                offset = segment.header.p_offset
                return offset + delta, offset + segment.header.p_filesz
        raise CompartmentNotFound(addr)

    def _get_buffer_range_macho(self, macho, address):
        for header in macho.headers:
            segments = [segment for header, segment, sections in header.commands
                if header.get_cmd_name().startswith('LC_SEGMENT') and segment.filesize > 0]
            addr = self._rebase(address, min(segment.vmaddr for segment in segments))
            for segment in segments:
                if addr in range(segment.vmaddr, segment.vmaddr + segment.vmsize):
                    offset = addr - segment.vmaddr
                    return offset + segment.fileoff, segment.fileoff + segment.filesize
        raise CompartmentNotFound(address)

    def _get_buffer_range_pe(self, pe, address):
        base = pe.OPTIONAL_HEADER.ImageBase
        addr = self._rebase(address, base) - base
        offset = pe.get_offset_from_rva(addr)
        for section in pe.sections:
            if offset in range(section.PointerToRawData, section.PointerToRawData + section.SizeOfRawData):
                return offset, section.PointerToRawData + section.SizeOfRawData
        raise CompartmentNotFound(addr, 'section')

Ancestors

Inherited members

class hexdmp (hexaddr=True, width=0, expand=False)

This unit is implemented in refinery.units.formats.hexdmp and has the following commandline Interface:

usage: hexdmp [-h] [-L] [-Q] [-0] [-v] [-R] [-A] [-W N] [-E]

Convert hex dumps back to the original data and vice versa. All options of
this unit apply to its reverse operation where binary data is converted to
a readable hexdump format. The default mode of the unit expects the input
data to contain a readable hexdump and converts it back to binary.

optional arguments:
  -A, --no-addr  Do not show addresses in hexdump
  -W, --width N  Specify the number of hexadecimal characters to use in
                 preview.
  -E, --expand   Do not compress sequences of identical lines in hexdump

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class hexdmp(HexViewer):
    """
    Convert hex dumps back to the original data and vice versa. All options of this unit apply
    to its reverse operation where binary data is converted to a readable hexdump format.
    The default mode of the unit expects the input data to contain a readable hexdump and
    converts it back to binary.
    """
    _ENCODED_BYTES = re.compile(
        '(?:\\s|^)'               # encoded byte patches must be prefixed by white space
        '[A-Fa-f0-9]{2}(\\s+)'    # encoded byte followed by whitespace
        '[A-Fa-f0-9]{2}'          # at least one more encoded byte
        '(?:\\1[A-Fa-f0-9]{2})*'  # more encoded bytes using the same spacing
    )

    def __init__(self, hexaddr=True, width=0, expand=False):
        super().__init__(hexaddr=hexaddr, width=width, expand=expand)
        self._hexline_pattern = re.compile(F'{make_hexline_pattern(1)}(?:[\r\n]|$)', flags=re.MULTILINE)

    def process(self, data):
        lines = data.decode(self.codec).splitlines(keepends=False)
        decoded_bytes = bytearray()
        for line in lines:
            matches = {}
            for match in self._ENCODED_BYTES.finditer(line):
                encoded_bytes = match[0]
                matches[len(encoded_bytes)] = match
            if not matches:
                if decoded_bytes:
                    yield decoded_bytes
                    decoded_bytes.clear()
                continue
            encoded_line = matches[max(matches)][0]
            self.log_debug(F'decoding: {encoded_line.strip()}')
            decoded_line = bytes.fromhex(encoded_line)
            decoded_bytes.extend(decoded_line)
            txt = line[match.end():]
            txt_stripped = txt.strip()
            if not txt_stripped:
                continue
            if len(decoded_line) not in range(len(txt_stripped), len(txt) + 1):
                self.log_warn(F'preview size {len(txt_stripped)} does not match decoding: {line}')
        if decoded_bytes:
            yield decoded_bytes

    def reverse(self, data):
        for line in self.hexdump(data):
            yield line.encode(self.codec)

Ancestors

Inherited members

class httpresponse

This unit is implemented in refinery.units.formats.httpresponse and has the following commandline Interface:

usage: httpresponse [-h] [-L] [-Q] [-0] [-v]

Parses HTTP response text, as you would obtain from a packet dump. This
can be useful if chunked or compressed transfer encoding was used.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class httpresponse(Unit):
    """
    Parses HTTP response text, as you would obtain from a packet dump. This can be
    useful if chunked or compressed transfer encoding was used.
    """
    def process(self, data):
        with SockWrapper(data) as mock:
            mock.seek(0)
            parser = HTTPResponse(mock)
            parser.begin()
            return parser.read()

Ancestors

Inherited members

class dsjava

This unit is implemented in refinery.units.formats.java.deserialize and has the following commandline Interface:

usage: dsjava [-h] [-L] [-Q] [-0] [-v]

Deserialize Java serialized data and re-serialize as JSON.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class dsjava(Unit):
    """
    Deserialize Java serialized data and re-serialize as JSON.
    """
    def process(self, data):
        with JavaEncoder as encoder:
            return encoder.dumps(java.loads(data)).encode(self.codec)

Ancestors

Inherited members

class jvdasm (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.java.jvdasm and has the following commandline Interface:

usage: jvdasm [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
              [path [path ...]]

Disassembles the JVM bytecode instructions of methods of classes defined
in Java class files. The unit is implemented as a PathExtractorUnit and
each path name corresponds to the name of one method defined in the class
file.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class jvdasm(PathExtractorUnit):
    """
    Disassembles the JVM bytecode instructions of methods of classes defined in Java class
    files. The unit is implemented as a `refinery.units.formats.PathExtractorUnit` and each
    path name corresponds to the name of one method defined in the class file.
    """
    _OPC_STRLEN = max(len(op.name) for op in opc)

    def _hex(self, bytestring, sep=''):
        return sep.join(F'{x:02x}' for x in bytestring)

    def unpack(self, data):
        jc = JvClassFile(data)
        tt = '  '
        opcw = self._OPC_STRLEN
        for method in jc.methods:
            for attribute in method.attributes:
                if attribute.name == 'Code': break
            else:
                self.log_warn(F'no code found for method: {method.name}')
                continue
            code: JvCode = attribute.parse(JvCode)
            with io.StringIO() as display:
                args, retval = re.match(R'^\((.*?)\)(.*?)$', method.descriptor).groups()
                print(F'{jc.this!s}::{method!s}{method.descriptor}', file=display)
                for op in code.disassembly:
                    olen = len(op.raw)
                    if op.table is None:
                        args = ', '.join(repr(a) for a in op.arguments)
                    else:
                        ow = 4 if op.code is opc.tableswitch else 8
                        olen = olen - (len(op.table) - 1) * ow
                        args = F'defaultjmp => {op.table[None]:#010x}'
                        jmps = []
                        for k, (key, jmp) in enumerate(op.table.items()):
                            if key is None:
                                continue
                            raw = self._hex(op.raw[olen + k * ow: olen + k * ow + ow], ' ')
                            jmps.append(F'{tt}{raw!s:<{opcw+15}} {key:#010x} => {jmp:#010x}')
                        args = '\n'.join((args, *jmps))
                    opch = self._hex(op.raw[:olen], ' ')
                    if len(opch) > 14:
                        opch += F'\n{tt}{tt:<15}'
                    print(F'{tt}{opch:<15}{op.code!r:<{opcw}} {args}', file=display)
                name = method.name
                if name.startswith('<'):
                    this = jc.this.value.split('/')
                    this = this[-1]
                    name = F'{this}${name[1:-1]}'
                yield UnpackResult(F'{name}.jd', display.getvalue().encode(self.codec))

Ancestors

Inherited members

class jvstr

This unit is implemented in refinery.units.formats.java.jvstr and has the following commandline Interface:

usage: jvstr [-h] [-L] [-Q] [-0] [-v]

Extract string constants from Java class files.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class jvstr(Unit):
    """
    Extract string constants from Java class files.
    """
    def process(self, data):
        jc = JvClassFile(data)
        for string in jc.strings:
            yield string.encode(self.codec)

Ancestors

Inherited members

class xtjson (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.json and has the following commandline Interface:

usage: xtjson [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
              [path [path ...]]

Extract values from a JSON document.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtjson(PathExtractorUnit):
    """
    Extract values from a JSON document.
    """
    _STRICT_PATH_MATCHING = True

    def unpack(self, data):

        def crawl(path, cursor):
            if isinstance(cursor, (dict, list)) and path:
                path = F'{path}/'
            if isinstance(cursor, dict):
                for key, value in cursor.items():
                    yield from crawl(F'{path}{key}', value)
            elif isinstance(cursor, list):
                width = len(F'{len(cursor)-1:d}')
                for key, value in enumerate(cursor):
                    yield from crawl(F'{path}#{key:0{width}d}', value)
            if path:
                yield path, cursor, cursor.__class__.__name__

        for path, item, typename in crawl('', json.loads(data)):
            def extract(item=item):
                if isinstance(item, (list, dict)):
                    dumped = json.dumps(item, indent=4)
                else:
                    dumped = str(item)
                return dumped.encode(self.codec)
            yield UnpackResult(path, extract, type=typename)

Ancestors

Inherited members

class msgpack

This unit is implemented in refinery.units.formats.msgpack and has the following commandline Interface:

usage: msgpack [-h] [-L] [-Q] [-0] [-v] [-R]

Converts a message-pack (msgpack) buffer to JSON and vice-versa.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class msgpack(Unit):
    """
    Converts a message-pack (msgpack) buffer to JSON and vice-versa.
    """
    def reverse(self, data):
        return mp.dumps(json.loads(data))

    def process(self, data):
        unpacker = mp.Unpacker(MemoryFile(data, read_as_bytes=True))
        while True:
            try:
                item = unpacker.unpack()
            except mp.exceptions.OutOfData:
                position = unpacker.tell()
                if position < len(data):
                    self.log_warn("oops")
                break
            except Exception as E:
                position = unpacker.tell()
                if not position:
                    raise
                view = memoryview(data)
                raise RefineryPartialResult(str(E), view[position:])
            else:
                yield json.dumps(item).encode(self.codec)

Ancestors

Inherited members

class officecrypt (password)

This unit is implemented in refinery.units.formats.office.officecrypt and has the following commandline Interface:

usage: officecrypt [-h] [-L] [-Q] [-0] [-v] password

A simple proxy for the msoffcrypto package to decrypt office documents.

positional arguments:
  password       The document password.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class officecrypt(Unit):
    """
    A simple proxy for the `msoffcrypto` package to decrypt office documents.
    """

    def __init__(self, password: arg(help='The document password.', type=str)):
        super().__init__(password=password)

    def process(self, data):
        with MemoryFile(data) as stream:
            doc = msoffcrypto.OfficeFile(stream)
            doc.load_key(password=self.args.password)
            with MemoryFile(bytearray()) as output:
                doc.decrypt(output)
                return output.getvalue()

Ancestors

Inherited members

class xlxtr (*references)

This unit is implemented in refinery.units.formats.office.xlxtr and has the following commandline Interface:

usage: xlxtr [-h] [-L] [-Q] [-0] [-v] [reference [reference ...]]

Extract data from Microsoft Excel documents, both Legacy and new XML type
documents. A sheet reference is of the form B1 or 1.2, both specifying the
first cell of the second column. A cell range can be specified as B1:C12,
or 1.2:C12, or 1.2:12.3. Finally, the unit will always refer to the first
sheet in the document and to change this, specify the sheet name or index
separated by a hashtag, i.e. sheet#B1:C12 or 1#B1:C12. Note that indices
are 1-based. To get all elements of one sheet, use sheet#. The unit If
parsing a sheet reference fails, the script will assume that the given
reference specifies a sheet.

positional arguments:
  reference      A sheet reference to be extracted. If no sheet references
                 are given, the unit lists all sheet names.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class xlxtr(Unit):
    """
    Extract data from Microsoft Excel documents, both Legacy and new XML type documents. A sheet reference is of the form `B1` or `1.2`,
    both specifying the first cell of the second column. A cell range can be specified as `B1:C12`, or `1.2:C12`, or `1.2:12.3`. Finally,
    the unit will always refer to the first sheet in the document and to change this, specify the sheet name or index separated by a
    hashtag, i.e. `sheet#B1:C12` or `1#B1:C12`. Note that indices are 1-based. To get all elements of one sheet, use `sheet#`. The unit
    If parsing a sheet reference fails, the script will assume that the given reference specifies a sheet.
    """
    def __init__(self, *references: arg(metavar='reference', type=SheetReference, help=(
        'A sheet reference to be extracted. '
        'If no sheet references are given, the unit lists all sheet names.'
    ))):
        if not references:
            references = [SheetReference('*')]
        super().__init__(references=references)

    def _rcmatch(self, sheet_index, sheet_name, row, col):
        assert row > 0
        assert col > 0
        if not self.args.references:
            return True
        for ref in self.args.references:
            ref: SheetReference
            if not ref.match(sheet_index, sheet_name):
                continue
            if (row, col) in ref:
                return True
        else:
            return False

    def _get_value(self, sheet_index, sheet, callable, row, col):
        if col <= 0 or row <= 0:
            raise ValueError(F'invalid cell reference ({row}, {col}) - indices must be positive numbers')
        if not self._rcmatch(sheet_index, sheet, row, col):
            return
        try:
            value = callable(row - 1, col - 1)
        except IndexError:
            return
        if not value:
            return
        if isinstance(value, float):
            if float(int(value)) == value:
                value = int(value)
        yield self.labelled(
            str(value).encode(self.codec),
            row=row,
            col=col,
            ref=_rc2ref(row, col),
            sheet=sheet
        )

    def _process_old(self, data):
        with io.StringIO() as logfile:
            wb = xlrd.open_workbook(file_contents=data, logfile=logfile, verbosity=self.args.verbose - 1, on_demand=True)
            logfile.seek(0)
            for entry in logfile:
                entry = entry.strip()
                if re.search(R'^[A-Z]+:', entry) or '***' in entry:
                    self.log_info(entry)
        for ref in self.args.references:
            ref: SheetReference
            for k, name in enumerate(wb.sheet_names()):
                if not ref.match(k, name):
                    continue
                sheet = wb.sheet_by_name(name)
                self.log_info(F'iterating {sheet.ncols} columns and {sheet.nrows} rows')
                for row, col in ref.cells(sheet.nrows, sheet.ncols):
                    yield from self._get_value(k, name, sheet.cell_value, row, col)

    def _process_new(self, data):
        workbook = openpyxl.load_workbook(MemoryFile(data), read_only=True)
        for ref in self.args.references:
            ref: SheetReference
            for k, name in enumerate(workbook.sheetnames):
                if not ref.match(k, name):
                    continue
                sheet = workbook[name]
                cells = [row for row in sheet.iter_rows(values_only=True)]
                nrows = len(cells)
                ncols = max(len(row) for row in cells)
                for row, col in ref.cells(nrows, ncols):
                    yield from self._get_value(k, name, lambda r, c: cells[r][c], row, col)

    def process(self, data):
        try:
            yield from self._process_new(data)
        except Exception as e:
            self.log_info(F'reverting to xlrd module due to exception: {e!s}')
            yield from self._process_old(data)

Ancestors

Inherited members

class xtdoc (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.office.xtdoc and has the following commandline Interface:

usage: xtdoc [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
             [path [path ...]]

Extract files from an OLE document such as a Microsoft Word DOCX file.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtdoc(PathExtractorUnit):
    """
    Extract files from an OLE document such as a Microsoft Word DOCX file.
    """

    def unpack(self, data):
        with MemoryFile(data) as stream:
            try:
                oledoc = olefile.OleFileIO(stream)
            except OSError as error:
                self.log_info(F'error, {error}, treating input as zip file')
                from ..archive.xtzip import xtzip
                yield from xtzip().unpack(data)
                return
            for item in oledoc.listdir():
                if not item or not item[-1]:
                    continue
                path = '/'.join(item)
                olestream = oledoc.openstream(path)
                c0 = ord(item[-1][:1])
                if c0 < 20:
                    item[-1] = F'[{c0:d}]{item[-1][1:]}'
                    path = '/'.join(item)
                self.log_debug('exploring:', path)
                yield UnpackResult(path, olestream.read())

Ancestors

Inherited members

class xtrtf (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.office.xtrtf and has the following commandline Interface:

usage: xtrtf [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
             [path [path ...]]

Extract embedded objects in RTF documents.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtrtf(PathExtractorUnit):
    """
    Extract embedded objects in RTF documents.
    """

    def unpack(self, data):
        parser = RtfObjParser(data)
        parser.parse()
        width = len(str(len(parser.objects)))
        for k, item in enumerate(parser.objects):
            item: RtfObject
            path = item.filename or F'carve{k:0{width}}.bin'
            data = item.rawdata
            meta = {}
            if item.is_ole:
                if item.format_id == OleObject.TYPE_EMBEDDED:
                    meta['ole_type'] = 'EMBEDDED'
                elif item.format_id == OleObject.TYPE_LINKED:
                    meta['ole_type'] = 'LINKED'
                if item.is_package:
                    meta['src_path'] = item.src_path
                    meta['tmp_path'] = item.temp_path
                if item.clsid is not None:
                    meta['ole_info'] = item.clsid_desc
                    meta['ole_guid'] = item.clsid
                meta['ole_name'] = item.class_name
            if item.oledata:
                data = item.oledata
                pos = item.rawdata.find(data)
                if pos > 0:
                    meta['raw_header'] = item.rawdata[:pos]
                if item.olepkgdata:
                    data = item.olepkgdata
                    pos = item.oledata.find(data)
                    if pos >= 0:
                        meta['ole_header'] = item.oledata[:pos]
            yield UnpackResult(path, data, **meta)

Ancestors

Inherited members

class dnblob

This unit is implemented in refinery.units.formats.pe.dotnet.dnblob and has the following commandline Interface:

usage: dnblob [-h] [-L] [-Q] [-0] [-v]

Extracts all blobs defined in the #Blob stream of .NET executables.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class dnblob(Unit):
    """
    Extracts all blobs defined in the `#Blob` stream of .NET executables.
    """
    def process(self, data):
        header = DotNetHeader(data, parse_resources=False)
        for blob in header.meta.Streams.Blob.values():
            yield blob

Ancestors

Inherited members

class dncfx

This unit is implemented in refinery.units.formats.pe.dotnet.dncfx and has the following commandline Interface:

usage: dncfx [-h] [-L] [-Q] [-0] [-v]

Extracts the encrypted strings from ConfuserX protected .NET execuctables.
Each decrypted string is returned as a single output.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class dncfx(Unit):
    """
    Extracts the encrypted strings from ConfuserX protected .NET execuctables.
    Each decrypted string is returned as a single output.
    """
    _PATTERN_ARRAY_INIT = (
        BR'(\x1F.|\x20....)'      # load size of a chunk
        BR'\x8D.\x00\x00\x01'     # create a UInt32 array
        BR'\x25'                  # dup
        BR'\xD0%s\x04'            # ldtoken: RVA of array data
        BR'\x28.\x00\x00.'        # call to InitializeArray
    )

    def process(self, data):
        header = DotNetHeader(data, parse_resources=False)
        decompressor = lzma()

        class IntegerAssignment:
            def __init__(self, match):
                self.offset = match.start()
                self.value, = struct.unpack('<I', match[1])

        def get_size(match):
            ins = match[1]
            fmt = '<B' if ins[0] == 0x1F else '<I'
            result, = struct.unpack(fmt, ins[-struct.calcsize(fmt):])
            return result

        potential_seeds = [
            IntegerAssignment(m)
            for m in re.finditer(br'\x20(....)', data, re.DOTALL)
        ]

        for entry in header.meta.RVAs:
            offset = header.pe.get_offset_from_rva(entry.RVA)
            index = struct.pack('<I', entry.Field.Index)
            strings_found = 0
            for match in re.finditer(self._PATTERN_ARRAY_INIT % re.escape(index[:3]), data, flags=re.DOTALL):
                ms = match.start()

                def sortkey(t):
                    weight = abs(t.offset - ms)
                    if t.offset < ms:
                        # this weights assignments after the array initialization down, but still
                        # prefers them over assignments that are further away than 2kb
                        weight += 2000
                    return weight

                size = get_size(match)

                if size % 0x10 or size > 10000:
                    continue

                self.log_debug(F'found RVA {entry.Field.Index} initialized with length {size}.')
                potential_seeds.sort(key=sortkey)

                for seed in potential_seeds[1:400]:
                    # the first potential_seed will always be the assignment of the size variable
                    ciphertext = data[offset:offset + size * 4]
                    key = self._xs64star(seed.value)
                    key = chunks.pack(key, 4) + ciphertext[:-0x40]
                    decrypted = strxor(key, ciphertext)
                    try:
                        decompressed = decompressor(decrypted)
                    except Exception as e:
                        self.log_debug(
                            F'decompression failed for seed {seed.value:08X} at offset {seed.offset:08X}: {e}')
                        continue
                    else:
                        self.log_info(
                            F'decompression worked for seed {seed.value:08X} at offset {seed.offset:08X}.')
                    if len(decompressed) < 0x100:
                        continue
                    for string in self._extract_strings(decompressed):
                        strings_found += 1
                        yield string
                    if strings_found > 10:
                        break

    def _xs64star(self, state):
        for i in range(16):
            state ^= (state >> 12) & 0xFFFFFFFF
            state ^= (state << 25) & 0xFFFFFFFF
            state ^= (state >> 27) & 0xFFFFFFFF
            yield state & 0xFFFFFFFF

    def _extract_strings(self, blob):
        reader = StreamReader(blob)
        while reader.tell() < len(blob):
            try:
                size = reader.expect(UInt32)
                string = reader.expect(StringPrimitive, size=size, codec='UTF8', align=4)
            except ParserEOF:
                return
            if string:
                yield string.encode(self.codec)

Ancestors

Inherited members

class dnds (dereference=True, encode=None, digest=None)

This unit is implemented in refinery.units.formats.pe.dotnet.dnds and has the following commandline Interface:

usage: dnds [-h] [-L] [-Q] [-0] [-v] [-r] [-e UNIT | -d HASH]

Expects data that has been formatted with the .NET class BinaryFormatter.
The output is a representation of the deserialized data in JSON format.

optional arguments:
  -r, --keep-references  Do not resolve Object references in serialized
                         data.
  -e, --encode UNIT      Select an encoder unit used to represent binary
                         data in the JSON output. Available are: HEX, ESC,
                         URL, B64.
  -d, --digest HASH      Select a hashing algorithm to digest binary data;
                         instead of the data, only the hash will be
                         displayed. The available algorithms are: MD5,
                         CRC32, SHA1, SHA256, SHA512.

generic options:
  -h, --help             Show this help message and exit.
  -L, --lenient          Allow partial results as output.
  -Q, --quiet            Disables all log output.
  -0, --devnull          Do not produce any output.
  -v, --verbose          Specify up to two times to increase log level.
Expand source code Browse git
class dnds(JSONEncoderUnit):
    """
    Expects data that has been formatted with the .NET class `BinaryFormatter`.
    The output is a representation of the deserialized data in JSON format.
    """

    def __init__(
        self, dereference: arg.switch('-r', '--keep-references', off=True,
            help='Do not resolve Object references in serialized data.') = True,
        encode=None, digest=None
    ):
        super().__init__(encode=encode, digest=digest, dereference=dereference)

    def process(self, data):
        self.log_debug('initializing parser, will fail on malformed stream')
        bf = BinaryFormatterParser(
            data,
            keep_meta=True,
            dereference=self.args.dereference,
            ignore_errors=not self.log_debug(),
        )

        return self.to_json([
            {
                'Type': repr(record),
                'Data': record
            } for record in bf
        ])

Ancestors

Inherited members

class dnfields (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.pe.dotnet.dnfields and has the following commandline Interface:

usage: dnfields [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
                [path [path ...]]

This unit can extract data from constant field variables in classes of
.NET executables. Since the .NET header stores only the offset and not the
size of constant fields, heuristics are used to search for opcode
sequences that load the data and additional heuristics are used to guess
the size of the data type.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class dnfields(PathExtractorUnit):
    """
    This unit can extract data from constant field variables in classes of .NET
    executables. Since the .NET header stores only the offset and not the size of
    constant fields, heuristics are used to search for opcode sequences that load
    the data and additional heuristics are used to guess the size of the data
    type.
    """
    _SIZEMAP = {
        '^s?byte$'       : 1,
        '^s?char$'       : 2,
        '^[us]?int.?16$' : 2,
        '^[us]?int.?32$' : 4,
        '^[us]?int.?64$' : 8,
    }

    def _guess_field_info(self, tables, data, t) -> FieldInfo:
        pattern = (
            BR'(\x20....|\x1F.)'                # ldc.i4  count
            BR'\x8D(...)([\x01\x02])'           # newarr  col|row
            BR'\x25'                            # dup
            BR'\xD0\x%02x\x%02x\x%02x\x04'      # ldtoken t
            BR'(?:.{0,12}'                      # ...
            BR'\x80(...)\x04)?' % (             # stsfld variable
                (t >> 0x00) & 0xFF,
                (t >> 0x08) & 0xFF,
                (t >> 0x10) & 0xFF
            )
        )
        for match in re.finditer(pattern, data, flags=re.DOTALL):
            count, j, r, name = match.groups()
            count, j, r = struct.unpack('<LLB', B'%s%s\0%s' % (count[1:].ljust(4, B'\0'), j, r))
            if name:
                try:
                    name = struct.unpack('<L', B'%s\0' % name)
                    name = name[0]
                    name = tables[4][name - 1].Name
                except Exception as E:
                    self.log_info(F'attempt to parse field name failed: {E!s}')
                    name = None
            element = tables[r][j - 1]
            for pattern, size in self._SIZEMAP.items():
                if re.match(pattern, element.TypeName, flags=re.IGNORECASE):
                    return FieldInfo(element.TypeName, count, size, name)

    def unpack(self, data):
        header = DotNetHeader(data, parse_resources=False)
        tables = header.meta.Streams.Tables
        fields = tables.FieldRVA
        if not fields:
            return
        iwidth = len(str(len(fields)))
        rwidth = max(len(F'{field.RVA:X}') for field in fields)
        rwidth = max(rwidth, 4)

        for k, rv in enumerate(fields):
            index = rv.Field.Index
            field = tables.Field[index - 1]
            fname = field.Name
            if len(field.Signature) == 2:
                # Crude signature parser for non-array case. Reference:
                # https://www.codeproject.com/Articles/42649/NET-File-Format-Signatures-Under-the-Hood-Part-1
                # https://www.codeproject.com/Articles/42655/NET-file-format-Signatures-under-the-hood-Part-2
                guess = {
                    0x03: ('Char',   1, 1),  # noqa
                    0x04: ('SByte',  1, 1),  # noqa
                    0x05: ('Byte',   1, 1),  # noqa
                    0x06: ('Int16',  1, 2),  # noqa
                    0x07: ('UInt16', 1, 2),  # noqa
                    0x08: ('Int32',  1, 4),  # noqa
                    0x09: ('UInt32', 1, 4),  # noqa
                    0x0A: ('Int64',  1, 8),  # noqa
                    0x0B: ('UInt64', 1, 8),  # noqa
                    0x0C: ('Single', 1, 4),  # noqa
                    0x0D: ('Double', 1, 8),  # noqa
                }.get(field.Signature[1], None)
            else:
                guess = self._guess_field_info(tables, data, index)
            if guess is None:
                self.log_debug(lambda: F'field {k:0{iwidth}d} name {field.Signature}: unable to guess type information')
                continue
            totalsize = guess.count * guess.size
            if guess.name is not None:
                fname = guess.name
            if not fname.isprintable():
                fname = F'F{rv.RVA:0{rwidth}X}'
            name = F'{fname}.{guess.type}[{guess.count}]'
            self.log_info(lambda: F'field {k:0{iwidth}d} at RVA 0x{rv.RVA:04X} of type {guess.type}, count: {guess.count}, name: {fname}')
            offset = header.pe.get_offset_from_rva(rv.RVA)
            yield UnpackResult(name, lambda t=offset, s=totalsize: data[t:t + s])

Ancestors

Inherited members

class dnhdr (resources=False, encode=None, digest=None)

This unit is implemented in refinery.units.formats.pe.dotnet.dnhdr and has the following commandline Interface:

usage: dnhdr [-h] [-L] [-Q] [-0] [-v] [-r] [-e UNIT | -d HASH]

Expects data that has been formatted with the BinaryFormatter class. The
output is a representation of the deserialized data in JSON format.

optional arguments:
  -r, --resources    Also parse .NET resources.
  -e, --encode UNIT  Select an encoder unit used to represent binary data
                     in the JSON output. Available are: HEX, ESC, URL,
                     B64.
  -d, --digest HASH  Select a hashing algorithm to digest binary data;
                     instead of the data, only the hash will be displayed.
                     The available algorithms are: MD5, CRC32, SHA1,
                     SHA256, SHA512.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class dnhdr(JSONEncoderUnit):
    """
    Expects data that has been formatted with the `BinaryFormatter` class. The
    output is a representation of the deserialized data in JSON format.
    """
    def __init__(
        self,
        resources: arg.switch('-r', '--resources', help='Also parse .NET resources.') = False,
        encode=None, digest=None
    ):
        super().__init__(encode=encode, digest=digest, resources=resources)

    def process(self, data):
        dn = DotNetHeader(data, parse_resources=self.args.resources)
        dn = {
            'Head': dn.head,
            'Meta': dn.meta
        }

        if self.args.resources:
            dn['RSRC'] = dn.resources

        return self.to_json(dn)

Ancestors

Inherited members

class dnmr (*paths, list=False, join_path=False, drop_path=False, path=b'name', raw=False)

This unit is implemented in refinery.units.formats.pe.dotnet.dnmr and has the following commandline Interface:

usage: dnmr [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-r]
            [path [path ...]]

Extracts subfiles from .NET managed resources.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "name".
  -r, --raw        Do not deserialize the managed resource entry data.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class dnmr(PathExtractorUnit):
    """
    Extracts subfiles from .NET managed resources.
    """
    def __init__(
        self, *paths, list=False, join_path=False, drop_path=False, path=b'name',
        raw: arg.switch('-r', help='Do not deserialize the managed resource entry data.') = False
    ):
        super().__init__(*paths, list=list, join_path=join_path, drop_path=drop_path, path=path, raw=raw)

    def unpack(self, data):
        try:
            managed = NetStructuredResources(data)
        except NoManagedResource:
            managed = None
        if not managed:
            raise RefineryPartialResult('no managed resources found', partial=data)
        for entry in managed:
            if entry.Error:
                self.log_warn(F'entry {entry.Name} carried error message: {entry.Error}')
            data = entry.Data
            if not self.args.raw:
                if isinstance(entry.Value, str):
                    data = entry.Value.encode('utf-16le')
                elif isbuffer(entry.Value):
                    data = entry.Value
            yield UnpackResult(entry.Name, data)

Ancestors

Inherited members

class dnrc (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.pe.dotnet.dnrc and has the following commandline Interface:

usage: dnrc [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
            [path [path ...]]

Extracts all .NET resources whose name matches any of the given patterns
and outputs them. Use the dnmr unit to extract subfiles from managed .NET
resources.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class dnrc(PathExtractorUnit):
    """
    Extracts all .NET resources whose name matches any of the given patterns
    and outputs them. Use the `refinery.units.formats.pe.dotnet.dnmr` unit to
    extract subfiles from managed .NET resources.
    """
    def unpack(self, data):
        header = DotNetHeader(data)

        if not header.resources:
            if self.args.list:
                return
            raise ValueError('This file contains no resources.')

        for resource in header.resources:
            yield UnpackResult(resource.Name, resource.Data)

Ancestors

Inherited members

class dnstr (user=True, meta=True)

This unit is implemented in refinery.units.formats.pe.dotnet.dnstr and has the following commandline Interface:

usage: dnstr [-h] [-L] [-Q] [-0] [-v] [-m | -u]

Extracts all strings defined in the #Strings and #US streams of .NET
executables.

optional arguments:
  -m, --meta     Only extract from #Strings.
  -u, --user     Only extract from #US.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class dnstr(Unit):
    """
    Extracts all strings defined in the `#Strings` and `#US` streams of .NET
    executables.
    """

    def __init__(
        self,
        user: arg.switch('-m', '--meta', off=True, group='HEAP', help='Only extract from #Strings.') = True,
        meta: arg.switch('-u', '--user', off=True, group='HEAP', help='Only extract from #US.') = True,
    ):
        if not meta and not user:
            raise ValueError('Either ascii or utf16 strings must be enabled.')
        super().__init__(meta=meta, user=user)

    def process(self, data):
        header = DotNetHeader(data, parse_resources=False)
        if self.args.meta:
            for string in header.meta.Streams.Strings.values():
                yield string.encode(self.codec)
        if self.args.user:
            for string in header.meta.Streams.US.values():
                yield string.encode(self.codec)

Ancestors

Inherited members

class pemeta (all=True, debug=False, dotnet=False, signatures=False, timestamps=False, version=False, header=False, exports=False, imports=False, tabular=False, timeraw=False)

This unit is implemented in refinery.units.formats.pe.pemeta and has the following commandline Interface:

usage: pemeta [-h] [-L] [-Q] [-0] [-v] [-c] [-D] [-N] [-S] [-T] [-V] [-H]
              [-E] [-I] [-t] [-r]

Extract metadata from PE files. By default, all information except for
imports and exports are extracted.

optional arguments:
  -c, --custom      Unless enabled, all default categories will be
                    extracted.
  -D, --debug       Parse the PDB path from the debug directory.
  -N, --dotnet      Parse the .NET header.
  -S, --signatures  Parse digital signatures.
  -T, --timestamps  Extract time stamps.
  -V, --version     Parse the VERSION resource.
  -H, --header      Parse data from the PE header.
  -E, --exports     List all exported functions.
  -I, --imports     List all imported functions.
  -t, --tabular     Print information in a table rather than as JSON
  -r, --timeraw     Extract time stamps as numbers instead of human-
                    readable format.

generic options:
  -h, --help        Show this help message and exit.
  -L, --lenient     Allow partial results as output.
  -Q, --quiet       Disables all log output.
  -0, --devnull     Do not produce any output.
  -v, --verbose     Specify up to two times to increase log level.
Expand source code Browse git
class pemeta(Unit):
    """
    Extract metadata from PE files. By default, all information except for imports and exports are
    extracted.
    """
    def __init__(
        self, all : arg('-c', '--custom',
            help='Unless enabled, all default categories will be extracted.') = True,
        debug      : arg('-D', help='Parse the PDB path from the debug directory.') = False,
        dotnet     : arg('-N', help='Parse the .NET header.') = False,
        signatures : arg('-S', help='Parse digital signatures.') = False,
        timestamps : arg('-T', help='Extract time stamps.') = False,
        version    : arg('-V', help='Parse the VERSION resource.') = False,
        header     : arg('-H', help='Parse data from the PE header.') = False,
        exports    : arg('-E', help='List all exported functions.') = False,
        imports    : arg('-I', help='List all imported functions.') = False,
        tabular    : arg('-t', help='Print information in a table rather than as JSON') = False,
        timeraw    : arg('-r', help='Extract time stamps as numbers instead of human-readable format.') = False,
    ):
        super().__init__(
            debug=all or debug,
            dotnet=all or dotnet,
            signatures=all or signatures,
            timestamps=all or timestamps,
            version=all or version,
            header=all or header,
            imports=imports,
            exports=exports,
            timeraw=timeraw,
            tabular=tabular,
        )

    @classmethod
    def _ensure_string(cls, x):
        if not isinstance(x, str):
            x = repr(x) if not isinstance(x, bytes) else x.decode(cls.codec, 'backslashreplace')
        return x

    @classmethod
    def _parse_pedict(cls, bin):
        return dict((
            cls._ensure_string(key),
            cls._ensure_string(val)
        ) for key, val in bin.items() if val)

    @classmethod
    def parse_signature(cls, data: bytearray) -> dict:
        """
        Extracts a JSON-serializable and human readable dictionary with information about
        time stamp and code signing certificates that are attached to the input PE file.
        """
        from refinery.units.formats.pkcs7 import pkcs7
        from refinery.units.formats.pe.pesig import pesig

        try:
            signature = data | pesig | pkcs7 | json.loads
        except Exception as E:
            raise ValueError(F'PKCS7 parser failed with error: {E!s}')

        info = {}

        def find_timestamps(entry):
            if isinstance(entry, dict):
                if set(entry.keys()) == {'type', 'value'}:
                    if entry['type'] == 'signing_time':
                        return {'Timestamp': entry['value']}
                for value in entry.values():
                    result = find_timestamps(value)
                    if result is None:
                        continue
                    with suppress(KeyError):
                        result.setdefault('TimestampIssuer', entry['sid']['issuer']['common_name'])
                    return result
            elif isinstance(entry, list):
                for value in entry:
                    result = find_timestamps(value)
                    if result is None:
                        continue
                    return result

        timestamp_info = find_timestamps(signature)
        if timestamp_info is not None:
            info.update(timestamp_info)

        try:
            certificates = signature['content']['certificates']
        except KeyError:
            return info

        if len(certificates) == 1:
            main_certificate = certificates[0]['tbs_certificate']
        else:
            certificates_with_extended_use = []
            main_certificate = None
            for certificate in certificates:
                with suppress(Exception):
                    crt = certificate['tbs_certificate']
                    ext = [e for e in crt['extensions'] if e['extn_id'] == 'extended_key_usage' and e['extn_value'] != ['time_stamping']]
                    key = [e for e in crt['extensions'] if e['extn_id'] == 'key_usage']
                    if ext:
                        certificates_with_extended_use.append(crt)
                    if any('key_cert_sign' in e['extn_value'] for e in key):
                        continue
                    if any('code_signing' in e['extn_value'] for e in ext):
                        main_certificate = crt
                        break
            if main_certificate is None and len(certificates_with_extended_use) == 1:
                main_certificate = certificates_with_extended_use[0]
        if main_certificate:
            serial = main_certificate['serial_number']
            if not isinstance(serial, int):
                serial = int(serial, 0)
            serial = F'{serial:x}'
            if len(serial) % 2:
                serial = '0' + serial
            subject = main_certificate['subject']
            location = [subject.get(t, '') for t in ('locality_name', 'state_or_province_name', 'country_name')]
            info.update(Subject=subject['common_name'])
            if any(location):
                info.update(SubjectLocation=', '.join(filter(None, location)))
            info.update(Issuer=main_certificate['issuer']['common_name'], Serial=serial)
            return info
        return info

    @classmethod
    def parse_version(cls, pe: PE, data=None) -> dict:
        """
        Extracts a JSON-serializable and human readable dictionary with information about
        the version resource of an input PE file, if available.
        """
        pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']])
        for FileInfo in pe.FileInfo:
            for FileInfoEntry in FileInfo:
                with suppress(AttributeError):
                    for StringTableEntry in FileInfoEntry.StringTable:
                        StringTableEntryParsed = cls._parse_pedict(StringTableEntry.entries)
                        with suppress(AttributeError):
                            LangID = StringTableEntry.entries.get('LangID', None) or StringTableEntry.LangID
                            LangID = int(LangID, 0x10) if not isinstance(LangID, int) else LangID
                            LangHi = LangID >> 0x10
                            LangLo = LangID & 0xFFFF
                            Language = cls._LCID.get(LangHi, 'Language Neutral')
                            Charset = cls._CHARSET.get(LangLo, 'Unknown Charset')
                            StringTableEntryParsed.update(
                                LangID=F'{LangID:08X}',
                                Charset=Charset,
                                Language=Language
                            )
                        return StringTableEntryParsed

    @classmethod
    def parse_exports(cls, pe: PE, data=None) -> list:
        pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT']])
        info = []
        for k, exp in enumerate(pe.DIRECTORY_ENTRY_EXPORT.symbols):
            if not exp.name:
                info.append(F'@{k}')
            else:
                info.append(exp.name.decode('ascii'))
        return info

    @classmethod
    def parse_imports(cls, pe: PE, data=None) -> list:
        pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']])
        info = {}
        for idd in pe.DIRECTORY_ENTRY_IMPORT:
            dll = idd.dll.decode('ascii')
            if dll.lower().endswith('.dll'):
                dll = dll[:-4]
            imports = info.setdefault(dll, [])
            for imp in idd.imports:
                imports.append(imp.name.decode('ascii'))
        return info

    def parse_header(cls, pe: PE, data=None) -> dict:
        def format_macro_name(name: str, prefix, convert=True):
            name = name.split('_')[prefix:]
            if convert:
                for k, part in enumerate(name):
                    name[k] = part.upper() if len(part) <= 3 else part.capitalize()
            return ' '.join(name)

        major = pe.OPTIONAL_HEADER.MajorOperatingSystemVersion
        minor = pe.OPTIONAL_HEADER.MinorOperatingSystemVersion
        version = cls._WINVER.get(major, {0: 'Unknown'})

        try:
            MinimumOS = version[minor]
        except LookupError:
            MinimumOS = version[0]
        header_information = {
            'Machine': format_macro_name(MACHINE_TYPE[pe.FILE_HEADER.Machine], 3, False),
            'Subsystem': format_macro_name(SUBSYSTEM_TYPE[pe.OPTIONAL_HEADER.Subsystem], 2),
            'MinimumOS': MinimumOS,
        }

        rich_header = pe.parse_rich_header()
        rich = []
        if rich_header:
            it = rich_header.get('values', [])
            for idv in it[0::2]:
                info = cls._RICH_HEADER.get(idv, None)
                if info is None:
                    info = guess_version(idv)
                if not info:
                    continue
                rich.append(str(info))
            header_information['RICH'] = rich

        characteristics = [
            name for name, mask in image_characteristics
            if pe.FILE_HEADER.Characteristics & mask
        ]
        for typespec, flag in {
            'EXE': 'IMAGE_FILE_EXECUTABLE_IMAGE',
            'DLL': 'IMAGE_FILE_DLL',
            'SYS': 'IMAGE_FILE_SYSTEM'
        }.items():
            if flag in characteristics:
                header_information['Type'] = typespec
        address_width = None
        if 'IMAGE_FILE_16BIT_MACHINE' in characteristics:
            address_width = 4
        elif pe.FILE_HEADER.Machine == MACHINE_TYPE['IMAGE_FILE_MACHINE_I386']:
            address_width = 8
        elif pe.FILE_HEADER.Machine == MACHINE_TYPE['IMAGE_FILE_MACHINE_AMD64']:
            address_width = 16
        if address_width:
            header_information['Bits'] = 4 * address_width
        else:
            address_width = 16
        header_information['ImageBase'] = F'0x{pe.OPTIONAL_HEADER.ImageBase:0{address_width}}'
        return header_information

    @classmethod
    def parse_time_stamps(cls, pe: PE, raw_time_stamps: bool) -> dict:
        """
        Extracts time stamps from the PE header (link time), as well as from the imports,
        exports, debug, and resource directory. The resource time stamp is also parsed as
        a DOS time stamp and returned as the "Delphi" time stamp.
        """
        if raw_time_stamps:
            def dt(ts): return ts
        else:
            def dt(ts):
                # parse as UTC but then forget time zone information
                return datetime.fromtimestamp(
                    ts,
                    tz=timezone.utc
                ).replace(tzinfo=None)

        pe.parse_data_directories(directories=[
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'],
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG'],
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']
        ])

        info = {}

        with suppress(AttributeError):
            info.update(Linker=dt(pe.FILE_HEADER.TimeDateStamp))

        with suppress(AttributeError):
            for entry in pe.DIRECTORY_ENTRY_IMPORT:
                info.update(Import=dt(entry.TimeDateStamp()))

        with suppress(AttributeError):
            for entry in pe.DIRECTORY_ENTRY_DEBUG:
                info.update(DbgDir=dt(entry.struct.TimeDateStamp))

        with suppress(AttributeError):
            Export = pe.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp
            if Export: info.update(Export=dt(Export))

        with suppress(AttributeError):
            res_timestamp = pe.DIRECTORY_ENTRY_RESOURCE.struct.TimeDateStamp
            if res_timestamp:
                with suppress(ValueError):
                    from ...misc.datefix import datefix
                    dos = datefix.dostime(res_timestamp)
                    info.update(Delphi=dos)
                    info.update(RsrcTS=dt(res_timestamp))

        def norm(value):
            if isinstance(value, int):
                return value
            return str(value)

        return {key: norm(value) for key, value in info.items()}

    @classmethod
    def parse_dotnet(cls, pe: PE, data):
        """
        Extracts a JSON-serializable and human readable dictionary with information about
        the .NET metadata of an input PE file.
        """
        header = DotNetHeader(data, pe=pe)
        tables = header.meta.Streams.Tables
        info = dict(
            RuntimeVersion=F'{header.head.MajorRuntimeVersion}.{header.head.MinorRuntimeVersion}',
            Version=F'{header.meta.MajorVersion}.{header.meta.MinorVersion}',
            VersionString=header.meta.VersionString
        )

        info['Flags'] = [name for name, check in header.head.KnownFlags.items() if check]

        if len(tables.Assembly) == 1:
            assembly = tables.Assembly[0]
            info.update(
                AssemblyName=assembly.Name,
                Release='{}.{}.{}.{}'.format(
                    assembly.MajorVersion,
                    assembly.MinorVersion,
                    assembly.BuildNumber,
                    assembly.RevisionNumber
                )
            )

        try:
            entry = header.head.EntryPointToken + pe.OPTIONAL_HEADER.ImageBase
            info.update(EntryPoint=F'0x{entry:08X}')
        except AttributeError:
            pass

        if len(tables.Module) == 1:
            module = tables.Module[0]
            info.update(ModuleName=module.Name)

        return info

    @classmethod
    def parse_debug(cls, pe: PE, data=None):
        result = {}
        pe.parse_data_directories(directories=[
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG']])
        for dbg in pe.DIRECTORY_ENTRY_DEBUG:
            if DEBUG_TYPE.get(dbg.struct.Type, None) != 'IMAGE_DEBUG_TYPE_CODEVIEW':
                continue
            with suppress(Exception):
                pdb = dbg.entry.PdbFileName
                if 0 in pdb:
                    pdb = pdb[:pdb.index(0)]
                result.update(
                    PdbPath=pdb.decode(cls.codec),
                    PdbAge=dbg.entry.Age
                )
        return result

    def process(self, data):
        result = {}
        pe = PE(data=data, fast_load=True)

        for switch, resolver, name in [
            (self.args.debug,   self.parse_debug,    'Debug'),    # noqa
            (self.args.dotnet,  self.parse_dotnet,   'DotNet'),   # noqa
            (self.args.header,  self.parse_header,   'Header'),   # noqa
            (self.args.version, self.parse_version,  'Version'),  # noqa
            (self.args.imports, self.parse_imports,  'Imports'),  # noqa
            (self.args.exports, self.parse_exports,  'Exports'),  # noqa
        ]:
            if not switch:
                continue
            self.log_debug(F'parsing: {name}')
            try:
                info = resolver(pe, data)
            except Exception as E:
                self.log_info(F'failed to obtain {name}: {E!s}')
                continue
            if info:
                result[name] = info

        signature = {}

        if self.args.timestamps or self.args.signatures:
            with suppress(Exception):
                signature = self.parse_signature(data)

        if self.args.timestamps:
            ts = self.parse_time_stamps(pe, self.args.timeraw)
            with suppress(KeyError):
                ts.update(Signed=signature['Timestamp'])
            result.update(TimeStamp=ts)

        if signature and self.args.signatures:
            result['Signature'] = signature

        if result:
            yield from ppjson(tabular=self.args.tabular)._pretty_output(result, indent=4, ensure_ascii=False)

    _LCID = {
        0x0436: 'Afrikaans-South Africa',
        0x041c: 'Albanian-Albania',
        0x045e: 'Amharic-Ethiopia',
        0x0401: 'Arabic (Saudi Arabia)',
        0x1401: 'Arabic (Algeria)',
        0x3c01: 'Arabic (Bahrain)',
        0x0c01: 'Arabic (Egypt)',
        0x0801: 'Arabic (Iraq)',
        0x2c01: 'Arabic (Jordan)',
        0x3401: 'Arabic (Kuwait)',
        0x3001: 'Arabic (Lebanon)',
        0x1001: 'Arabic (Libya)',
        0x1801: 'Arabic (Morocco)',
        0x2001: 'Arabic (Oman)',
        0x4001: 'Arabic (Qatar)',
        0x2801: 'Arabic (Syria)',
        0x1c01: 'Arabic (Tunisia)',
        0x3801: 'Arabic (U.A.E.)',
        0x2401: 'Arabic (Yemen)',
        0x042b: 'Armenian-Armenia',
        0x044d: 'Assamese',
        0x082c: 'Azeri (Cyrillic)',
        0x042c: 'Azeri (Latin)',
        0x042d: 'Basque',
        0x0423: 'Belarusian',
        0x0445: 'Bengali (India)',
        0x0845: 'Bengali (Bangladesh)',
        0x141A: 'Bosnian (Bosnia/Herzegovina)',
        0x0402: 'Bulgarian',
        0x0455: 'Burmese',
        0x0403: 'Catalan',
        0x045c: 'Cherokee-United States',
        0x0804: 'Chinese (People\'s Republic of China)',
        0x1004: 'Chinese (Singapore)',
        0x0404: 'Chinese (Taiwan)',
        0x0c04: 'Chinese (Hong Kong SAR)',
        0x1404: 'Chinese (Macao SAR)',
        0x041a: 'Croatian',
        0x101a: 'Croatian (Bosnia/Herzegovina)',
        0x0405: 'Czech',
        0x0406: 'Danish',
        0x0465: 'Divehi',
        0x0413: 'Dutch-Netherlands',
        0x0813: 'Dutch-Belgium',
        0x0466: 'Edo',
        0x0409: 'English (United States)',
        0x0809: 'English (United Kingdom)',
        0x0c09: 'English (Australia)',
        0x2809: 'English (Belize)',
        0x1009: 'English (Canada)',
        0x2409: 'English (Caribbean)',
        0x3c09: 'English (Hong Kong SAR)',
        0x4009: 'English (India)',
        0x3809: 'English (Indonesia)',
        0x1809: 'English (Ireland)',
        0x2009: 'English (Jamaica)',
        0x4409: 'English (Malaysia)',
        0x1409: 'English (New Zealand)',
        0x3409: 'English (Philippines)',
        0x4809: 'English (Singapore)',
        0x1c09: 'English (South Africa)',
        0x2c09: 'English (Trinidad)',
        0x3009: 'English (Zimbabwe)',
        0x0425: 'Estonian',
        0x0438: 'Faroese',
        0x0429: 'Farsi',
        0x0464: 'Filipino',
        0x040b: 'Finnish',
        0x040c: 'French (France)',
        0x080c: 'French (Belgium)',
        0x2c0c: 'French (Cameroon)',
        0x0c0c: 'French (Canada)',
        0x240c: 'French (Democratic Rep. of Congo)',
        0x300c: 'French (Cote d\'Ivoire)',
        0x3c0c: 'French (Haiti)',
        0x140c: 'French (Luxembourg)',
        0x340c: 'French (Mali)',
        0x180c: 'French (Monaco)',
        0x380c: 'French (Morocco)',
        0xe40c: 'French (North Africa)',
        0x200c: 'French (Reunion)',
        0x280c: 'French (Senegal)',
        0x100c: 'French (Switzerland)',
        0x1c0c: 'French (West Indies)',
        0x0462: 'Frisian-Netherlands',
        0x0467: 'Fulfulde-Nigeria',
        0x042f: 'FYRO Macedonian',
        0x083c: 'Gaelic (Ireland)',
        0x043c: 'Gaelic (Scotland)',
        0x0456: 'Galician',
        0x0437: 'Georgian',
        0x0407: 'German (Germany)',
        0x0c07: 'German (Austria)',
        0x1407: 'German (Liechtenstein)',
        0x1007: 'German (Luxembourg)',
        0x0807: 'German (Switzerland)',
        0x0408: 'Greek',
        0x0474: 'Guarani-Paraguay',
        0x0447: 'Gujarati',
        0x0468: 'Hausa-Nigeria',
        0x0475: 'Hawaiian (United States)',
        0x040d: 'Hebrew',
        0x0439: 'Hindi',
        0x040e: 'Hungarian',
        0x0469: 'Ibibio-Nigeria',
        0x040f: 'Icelandic',
        0x0470: 'Igbo-Nigeria',
        0x0421: 'Indonesian',
        0x045d: 'Inuktitut',
        0x0410: 'Italian (Italy)',
        0x0810: 'Italian (Switzerland)',
        0x0411: 'Japanese',
        0x044b: 'Kannada',
        0x0471: 'Kanuri-Nigeria',
        0x0860: 'Kashmiri',
        0x0460: 'Kashmiri (Arabic)',
        0x043f: 'Kazakh',
        0x0453: 'Khmer',
        0x0457: 'Konkani',
        0x0412: 'Korean',
        0x0440: 'Kyrgyz (Cyrillic)',
        0x0454: 'Lao',
        0x0476: 'Latin',
        0x0426: 'Latvian',
        0x0427: 'Lithuanian',
        0x043e: 'Malay-Malaysia',
        0x083e: 'Malay-Brunei Darussalam',
        0x044c: 'Malayalam',
        0x043a: 'Maltese',
        0x0458: 'Manipuri',
        0x0481: 'Maori-New Zealand',
        0x044e: 'Marathi',
        0x0450: 'Mongolian (Cyrillic)',
        0x0850: 'Mongolian (Mongolian)',
        0x0461: 'Nepali',
        0x0861: 'Nepali-India',
        0x0414: 'Norwegian (Bokmål)',
        0x0814: 'Norwegian (Nynorsk)',
        0x0448: 'Oriya',
        0x0472: 'Oromo',
        0x0479: 'Papiamentu',
        0x0463: 'Pashto',
        0x0415: 'Polish',
        0x0416: 'Portuguese-Brazil',
        0x0816: 'Portuguese-Portugal',
        0x0446: 'Punjabi',
        0x0846: 'Punjabi (Pakistan)',
        0x046B: 'Quecha (Bolivia)',
        0x086B: 'Quecha (Ecuador)',
        0x0C6B: 'Quecha (Peru)',
        0x0417: 'Rhaeto-Romanic',
        0x0418: 'Romanian',
        0x0818: 'Romanian (Moldava)',
        0x0419: 'Russian',
        0x0819: 'Russian (Moldava)',
        0x043b: 'Sami (Lappish)',
        0x044f: 'Sanskrit',
        0x046c: 'Sepedi',
        0x0c1a: 'Serbian (Cyrillic)',
        0x081a: 'Serbian (Latin)',
        0x0459: 'Sindhi (India)',
        0x0859: 'Sindhi (Pakistan)',
        0x045b: 'Sinhalese-Sri Lanka',
        0x041b: 'Slovak',
        0x0424: 'Slovenian',
        0x0477: 'Somali',
        0x042e: 'Sorbian',
        0x0c0a: 'Spanish (Modern Sort)',
        0x040a: 'Spanish (Traditional Sort)',
        0x2c0a: 'Spanish (Argentina)',
        0x400a: 'Spanish (Bolivia)',
        0x340a: 'Spanish (Chile)',
        0x240a: 'Spanish (Colombia)',
        0x140a: 'Spanish (Costa Rica)',
        0x1c0a: 'Spanish (Dominican Republic)',
        0x300a: 'Spanish (Ecuador)',
        0x440a: 'Spanish (El Salvador)',
        0x100a: 'Spanish (Guatemala)',
        0x480a: 'Spanish (Honduras)',
        0x580a: 'Spanish (Latin America)',
        0x080a: 'Spanish (Mexico)',
        0x4c0a: 'Spanish (Nicaragua)',
        0x180a: 'Spanish (Panama)',
        0x3c0a: 'Spanish (Paraguay)',
        0x280a: 'Spanish (Peru)',
        0x500a: 'Spanish (Puerto Rico)',
        0x540a: 'Spanish (United States)',
        0x380a: 'Spanish (Uruguay)',
        0x200a: 'Spanish (Venezuela)',
        0x0430: 'Sutu',
        0x0441: 'Swahili',
        0x041d: 'Swedish',
        0x081d: 'Swedish-Finland',
        0x045a: 'Syriac',
        0x0428: 'Tajik',
        0x045f: 'Tamazight (Arabic)',
        0x085f: 'Tamazight (Latin)',
        0x0449: 'Tamil',
        0x0444: 'Tatar',
        0x044a: 'Telugu',
        0x041e: 'Thai',
        0x0851: 'Tibetan (Bhutan)',
        0x0451: 'Tibetan (People\'s Republic of China)',
        0x0873: 'Tigrigna (Eritrea)',
        0x0473: 'Tigrigna (Ethiopia)',
        0x0431: 'Tsonga',
        0x0432: 'Tswana',
        0x041f: 'Turkish',
        0x0442: 'Turkmen',
        0x0480: 'Uighur-China',
        0x0422: 'Ukrainian',
        0x0420: 'Urdu',
        0x0820: 'Urdu-India',
        0x0843: 'Uzbek (Cyrillic)',
        0x0443: 'Uzbek (Latin)',
        0x0433: 'Venda',
        0x042a: 'Vietnamese',
        0x0452: 'Welsh',
        0x0434: 'Xhosa',
        0x0478: 'Yi',
        0x043d: 'Yiddish',
        0x046a: 'Yoruba',
        0x0435: 'Zulu',
        0x04ff: 'HID (Human Interface DeVITe)'
    }

    _CHARSET = {
        0x0000: '7-bit ASCII',
        0x03A4: 'Japan (Shift ? JIS X-0208)',
        0x03B5: 'Korea (Shift ? KSC 5601)',
        0x03B6: 'Taiwan (Big5)',
        0x04B0: 'Unicode',
        0x04E2: 'Latin-2 (Eastern European)',
        0x04E3: 'Cyrillic',
        0x04E4: 'Multilingual',
        0x04E5: 'Greek',
        0x04E6: 'Turkish',
        0x04E7: 'Hebrew',
        0x04E8: 'Arabic',
    }

    _WINVER = {
        3: {
            0x00: 'Windows NT 3',
            0x0A: 'Windows NT 3.1',
            0x32: 'Windows NT 3.5',
            0x33: 'Windows NT 3.51',
        },
        4: {
            0x00: 'Windows 95',
            0x0A: 'Windows 98',
        },
        5: {
            0x00: 'Windows 2000',
            0x5A: 'Windows Me',
            0x01: 'Windows XP',
            0x02: 'Windows Server 2003',
        },
        6: {
            0x00: 'Windows Vista',
            0x01: 'Windows 7',
            0x02: 'Windows 8',
            0x03: 'Windows 8.1',
        },
        10: {
            0x00: 'Windows 10',
        }
    }

    # copy of https://raw.githubusercontent.com/dishather/richprint/master/comp_id.txt
    _RICH_HEADER = {
        # Objects without @comp.id are collected under this record
        0x00010000: VersionInfo(VIT.ERR, 'Unmarked objects'),
        0x00000000: VersionInfo(VIT.ERR, 'Unmarked objects (old)'),

        # MSVS2019 v16.9.2
        0x010474d9: VersionInfo(VIT.OBJ, 'VS2019 v16.9.2 build 29913'),
        0x010374d9: VersionInfo(VIT.ASM, 'VS2019 v16.9.2 build 29913'),
        0x010574d9: VersionInfo(VIT.CPP, 'VS2019 v16.9.2 build 29913'),
        0x00ff74d9: VersionInfo(VIT.RES, 'VS2019 v16.9.2 build 29913'),
        0x010274d9: VersionInfo(VIT.LNK, 'VS2019 v16.9.2 build 29913'),
        0x010074d9: VersionInfo(VIT.EXP, 'VS2019 v16.9.2 build 29913'),
        0x010174d9: VersionInfo(VIT.IMP, 'VS2019 v16.9.2 build 29913'),

        # MSVS2019 v16.9.2
        # from https://walbourn.github.io/vs-2019-update-9/
        0x010474d6: VersionInfo(VIT.OBJ, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x010374d6: VersionInfo(VIT.ASM, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x010574d6: VersionInfo(VIT.CPP, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x00ff74d6: VersionInfo(VIT.RES, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x010274d6: VersionInfo(VIT.LNK, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x010074d6: VersionInfo(VIT.EXP, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x010174d6: VersionInfo(VIT.IMP, 'VS2019 v16.9.0 build 29910', interpolated=True),

        # MSVS2019 v16.8.5
        0x01047299: VersionInfo(VIT.OBJ, 'VS2019 v16.8.5 build 29337'),
        0x01037299: VersionInfo(VIT.ASM, 'VS2019 v16.8.5 build 29337'),
        0x01057299: VersionInfo(VIT.CPP, 'VS2019 v16.8.5 build 29337'),
        0x00ff7299: VersionInfo(VIT.RES, 'VS2019 v16.8.5 build 29337'),
        0x01027299: VersionInfo(VIT.LNK, 'VS2019 v16.8.5 build 29337'),
        0x01007299: VersionInfo(VIT.EXP, 'VS2019 v16.8.5 build 29337'),
        0x01017299: VersionInfo(VIT.IMP, 'VS2019 v16.8.5 build 29337'),

        # MSVS2019 v16.8.4
        0x01047298: VersionInfo(VIT.OBJ, 'VS2019 v16.8.4 build 29336'),
        0x01037298: VersionInfo(VIT.ASM, 'VS2019 v16.8.4 build 29336'),
        0x01057298: VersionInfo(VIT.CPP, 'VS2019 v16.8.4 build 29336'),
        0x00ff7298: VersionInfo(VIT.RES, 'VS2019 v16.8.4 build 29336'),
        0x01027298: VersionInfo(VIT.LNK, 'VS2019 v16.8.4 build 29336'),
        0x01007298: VersionInfo(VIT.EXP, 'VS2019 v16.8.4 build 29336'),
        0x01017298: VersionInfo(VIT.IMP, 'VS2019 v16.8.4 build 29336'),

        # MSVS2019 v16.8.3
        0x01047297: VersionInfo(VIT.OBJ, 'VS2019 v16.8.3 build 29335'),
        0x01037297: VersionInfo(VIT.ASM, 'VS2019 v16.8.3 build 29335'),
        0x01057297: VersionInfo(VIT.CPP, 'VS2019 v16.8.3 build 29335'),
        0x00ff7297: VersionInfo(VIT.RES, 'VS2019 v16.8.3 build 29335'),
        0x01027297: VersionInfo(VIT.LNK, 'VS2019 v16.8.3 build 29335'),
        0x01007297: VersionInfo(VIT.EXP, 'VS2019 v16.8.3 build 29335'),
        0x01017297: VersionInfo(VIT.IMP, 'VS2019 v16.8.3 build 29335'),

        # MSVS2019 v16.8.2
        0x01047296: VersionInfo(VIT.OBJ, 'VS2019 v16.8.2 build 29334'),
        0x01037296: VersionInfo(VIT.ASM, 'VS2019 v16.8.2 build 29334'),
        0x01057296: VersionInfo(VIT.CPP, 'VS2019 v16.8.2 build 29334'),
        0x00ff7296: VersionInfo(VIT.RES, 'VS2019 v16.8.2 build 29334'),
        0x01027296: VersionInfo(VIT.LNK, 'VS2019 v16.8.2 build 29334'),
        0x01007296: VersionInfo(VIT.EXP, 'VS2019 v16.8.2 build 29334'),
        0x01017296: VersionInfo(VIT.IMP, 'VS2019 v16.8.2 build 29334'),

        # MSVS2019 v16.8.0
        # from https://walbourn.github.io/vs-2019-update-8/
        0x01047295: VersionInfo(VIT.OBJ, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x01037295: VersionInfo(VIT.ASM, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x01057295: VersionInfo(VIT.CPP, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x00ff7295: VersionInfo(VIT.RES, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x01027295: VersionInfo(VIT.LNK, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x01007295: VersionInfo(VIT.EXP, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x01017295: VersionInfo(VIT.IMP, 'VS2019 v16.8.0 build 29333', interpolated=True),

        # MSVS2019 v16.7.5
        0x010471b8: VersionInfo(VIT.OBJ, 'VS2019 v16.7.5 build 29112'),
        0x010371b8: VersionInfo(VIT.ASM, 'VS2019 v16.7.5 build 29112'),
        0x010571b8: VersionInfo(VIT.CPP, 'VS2019 v16.7.5 build 29112'),
        0x00ff71b8: VersionInfo(VIT.RES, 'VS2019 v16.7.5 build 29112'),
        0x010271b8: VersionInfo(VIT.LNK, 'VS2019 v16.7.5 build 29112'),
        0x010071b8: VersionInfo(VIT.EXP, 'VS2019 v16.7.5 build 29112'),
        0x010171b8: VersionInfo(VIT.IMP, 'VS2019 v16.7.5 build 29112'),

        # MSVS2019 v16.7.1 .. 16.7.4
        0x010471b7: VersionInfo(VIT.OBJ, 'VS2019 v16.7.1 build 29111'),
        0x010371b7: VersionInfo(VIT.ASM, 'VS2019 v16.7.1 build 29111'),
        0x010571b7: VersionInfo(VIT.CPP, 'VS2019 v16.7.1 build 29111'),
        0x00ff71b7: VersionInfo(VIT.RES, 'VS2019 v16.7.1 build 29111'),
        0x010271b7: VersionInfo(VIT.LNK, 'VS2019 v16.7.1 build 29111'),
        0x010071b7: VersionInfo(VIT.EXP, 'VS2019 v16.7.1 build 29111'),
        0x010171b7: VersionInfo(VIT.IMP, 'VS2019 v16.7.1 build 29111'),

        # MSVS2019 v16.7.0
        0x010471b6: VersionInfo(VIT.OBJ, 'VS2019 v16.7.0 build 29110'),
        0x010371b6: VersionInfo(VIT.ASM, 'VS2019 v16.7.0 build 29110'),
        0x010571b6: VersionInfo(VIT.CPP, 'VS2019 v16.7.0 build 29110'),
        0x00ff71b6: VersionInfo(VIT.RES, 'VS2019 v16.7.0 build 29110'),
        0x010271b6: VersionInfo(VIT.LNK, 'VS2019 v16.7.0 build 29110'),
        0x010071b6: VersionInfo(VIT.EXP, 'VS2019 v16.7.0 build 29110'),
        0x010171b6: VersionInfo(VIT.IMP, 'VS2019 v16.7.0 build 29110'),

        # MSVS2019 v16.6.2 ... 16.6.5
        0x01047086: VersionInfo(VIT.OBJ, 'VS2019 v16.6.2 build 28806'),
        0x01037086: VersionInfo(VIT.ASM, 'VS2019 v16.6.2 build 28806'),
        0x01057086: VersionInfo(VIT.CPP, 'VS2019 v16.6.2 build 28806'),
        0x00ff7086: VersionInfo(VIT.RES, 'VS2019 v16.6.2 build 28806'),
        0x01027086: VersionInfo(VIT.LNK, 'VS2019 v16.6.2 build 28806'),
        0x01007086: VersionInfo(VIT.EXP, 'VS2019 v16.6.2 build 28806'),
        0x01017086: VersionInfo(VIT.IMP, 'VS2019 v16.6.2 build 28806'),

        # MSVS2019 v16.6.0
        0x01047085: VersionInfo(VIT.OBJ, 'VS2019 v16.6.0 build 28805'),
        0x01037085: VersionInfo(VIT.ASM, 'VS2019 v16.6.0 build 28805'),
        0x01057085: VersionInfo(VIT.CPP, 'VS2019 v16.6.0 build 28805'),
        0x00ff7085: VersionInfo(VIT.RES, 'VS2019 v16.6.0 build 28805'),
        0x01027085: VersionInfo(VIT.LNK, 'VS2019 v16.6.0 build 28805'),
        0x01007085: VersionInfo(VIT.EXP, 'VS2019 v16.6.0 build 28805'),
        0x01017085: VersionInfo(VIT.IMP, 'VS2019 v16.6.0 build 28805'),

        # MSVS2019 v16.5.5 (also 16.5.4)
        0x01046fc6: VersionInfo(VIT.OBJ, 'VS2019 v16.5.5 build 28614'),
        0x01036fc6: VersionInfo(VIT.ASM, 'VS2019 v16.5.5 build 28614'),
        0x01056fc6: VersionInfo(VIT.CPP, 'VS2019 v16.5.5 build 28614'),
        0x00ff6fc6: VersionInfo(VIT.RES, 'VS2019 v16.5.5 build 28614'),
        0x01026fc6: VersionInfo(VIT.LNK, 'VS2019 v16.5.5 build 28614'),
        0x01006fc6: VersionInfo(VIT.EXP, 'VS2019 v16.5.5 build 28614'),
        0x01016fc6: VersionInfo(VIT.IMP, 'VS2019 v16.5.5 build 28614'),

        # Visual Studio 2019 version 16.5.2 (values are interpolated)
        # source: https://walbourn.github.io/vs-2019-update-5/
        0x01046fc4: VersionInfo(VIT.OBJ, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x01036fc4: VersionInfo(VIT.ASM, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x01056fc4: VersionInfo(VIT.CPP, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x00ff6fc4: VersionInfo(VIT.RES, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x01026fc4: VersionInfo(VIT.LNK, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x01016fc4: VersionInfo(VIT.IMP, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x01006fc4: VersionInfo(VIT.EXP, 'VS2019 v16.5.2 build 28612', interpolated=True),

        # Visual Studio 2019 version 16.5.1 (values are interpolated)
        0x01046fc3: VersionInfo(VIT.OBJ, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x01036fc3: VersionInfo(VIT.ASM, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x01056fc3: VersionInfo(VIT.CPP, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x00ff6fc3: VersionInfo(VIT.RES, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x01026fc3: VersionInfo(VIT.LNK, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x01016fc3: VersionInfo(VIT.IMP, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x01006fc3: VersionInfo(VIT.EXP, 'VS2019 v16.5.1 build 28611', interpolated=True),

        # Visual Studio 2019 version 16.5.0 (values are interpolated)
        # source: https://walbourn.github.io/vs-2019-update-5/
        0x01046fc2: VersionInfo(VIT.OBJ, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x01036fc2: VersionInfo(VIT.ASM, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x01056fc2: VersionInfo(VIT.CPP, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x00ff6fc2: VersionInfo(VIT.RES, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x01026fc2: VersionInfo(VIT.LNK, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x01016fc2: VersionInfo(VIT.IMP, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x01006fc2: VersionInfo(VIT.EXP, 'VS2019 v16.5.0 build 28610', interpolated=True),

        # MSVS2019 v16.4.6 (values are interpolated)
        # source: https://walbourn.github.io/vs-2019-update-4/
        0x01046e9f: VersionInfo(VIT.OBJ, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x01036e9f: VersionInfo(VIT.ASM, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x01056e9f: VersionInfo(VIT.CPP, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x00ff6e9f: VersionInfo(VIT.RES, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x01026e9f: VersionInfo(VIT.LNK, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x01006e9f: VersionInfo(VIT.EXP, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x01016e9f: VersionInfo(VIT.IMP, 'VS2019 v16.4.6 build 28319', interpolated=True),

        # MSVS2019 v16.4.4 (values are interpolated)
        # source: https://walbourn.github.io/vs-2019-update-4/
        0x01046e9c: VersionInfo(VIT.OBJ, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x01036e9c: VersionInfo(VIT.ASM, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x01056e9c: VersionInfo(VIT.CPP, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x00ff6e9c: VersionInfo(VIT.RES, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x01026e9c: VersionInfo(VIT.LNK, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x01006e9c: VersionInfo(VIT.EXP, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x01016e9c: VersionInfo(VIT.IMP, 'VS2019 v16.4.4 build 28316', interpolated=True),

        # MSVS2019 v16.4.3
        0x01046e9b: VersionInfo(VIT.OBJ, 'VS2019 v16.4.3 build 28315'),
        0x01036e9b: VersionInfo(VIT.ASM, 'VS2019 v16.4.3 build 28315'),
        0x01056e9b: VersionInfo(VIT.CPP, 'VS2019 v16.4.3 build 28315'),
        0x00ff6e9b: VersionInfo(VIT.RES, 'VS2019 v16.4.3 build 28315'),
        0x01026e9b: VersionInfo(VIT.LNK, 'VS2019 v16.4.3 build 28315'),
        0x01006e9b: VersionInfo(VIT.EXP, 'VS2019 v16.4.3 build 28315'),
        0x01016e9b: VersionInfo(VIT.IMP, 'VS2019 v16.4.3 build 28315'),

        # Visual Studio 2019 version 16.4.0 (values are interpolated)
        0x01046e9a: VersionInfo(VIT.OBJ, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x01036e9a: VersionInfo(VIT.ASM, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x01056e9a: VersionInfo(VIT.CPP, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x00ff6e9a: VersionInfo(VIT.RES, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x01026e9a: VersionInfo(VIT.LNK, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x01016e9a: VersionInfo(VIT.IMP, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x01006e9a: VersionInfo(VIT.EXP, 'VS2019 v16.4.0 build 28314', interpolated=True),

        # Visual Studio 2019 version 16.3.2 (values are interpolated)
        0x01046dc9: VersionInfo(VIT.OBJ, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x01036dc9: VersionInfo(VIT.ASM, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x01056dc9: VersionInfo(VIT.CPP, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x00ff6dc9: VersionInfo(VIT.RES, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x01026dc9: VersionInfo(VIT.LNK, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x01016dc9: VersionInfo(VIT.IMP, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x01006dc9: VersionInfo(VIT.EXP, 'VS2019 v16.3.2 build 28105', interpolated=True),

        # Visual Studio 2019 version 16.2.3 (values are interpolated)
        0x01046d01: VersionInfo(VIT.OBJ, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x01036d01: VersionInfo(VIT.ASM, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x01056d01: VersionInfo(VIT.CPP, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x00ff6d01: VersionInfo(VIT.RES, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x01026d01: VersionInfo(VIT.LNK, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x01016d01: VersionInfo(VIT.IMP, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x01006d01: VersionInfo(VIT.EXP, 'VS2019 v16.2.3 build 27905', interpolated=True),

        # Visual Studio 2019 version 16.1.2 (values are interpolated)
        0x01046c36: VersionInfo(VIT.OBJ, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x01036c36: VersionInfo(VIT.ASM, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x01056c36: VersionInfo(VIT.CPP, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x00ff6c36: VersionInfo(VIT.RES, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x01026c36: VersionInfo(VIT.LNK, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x01016c36: VersionInfo(VIT.IMP, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x01006c36: VersionInfo(VIT.EXP, 'VS2019 v16.1.2 build 27702', interpolated=True),

        # MSVS2019 v16.0.0
        0x01046b74: VersionInfo(VIT.OBJ, 'VS2019 v16.0.0 build 27508'),
        0x01036b74: VersionInfo(VIT.ASM, 'VS2019 v16.0.0 build 27508'),
        0x01056b74: VersionInfo(VIT.CPP, 'VS2019 v16.0.0 build 27508'),
        0x00ff6b74: VersionInfo(VIT.RES, 'VS2019 v16.0.0 build 27508'),
        0x01026b74: VersionInfo(VIT.LNK, 'VS2019 v16.0.0 build 27508'),
        0x01006b74: VersionInfo(VIT.EXP, 'VS2019 v16.0.0 build 27508'),
        0x01016b74: VersionInfo(VIT.IMP, 'VS2019 v16.0.0 build 27508'),

        # Visual Studio 2017 version 15.9.11 (values are interpolated)
        0x01046996: VersionInfo(VIT.OBJ, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x01036996: VersionInfo(VIT.ASM, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x01056996: VersionInfo(VIT.CPP, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x00ff6996: VersionInfo(VIT.RES, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x01026996: VersionInfo(VIT.LNK, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x01016996: VersionInfo(VIT.IMP, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x01006996: VersionInfo(VIT.EXP, 'VS2017 v15.9.11 build 27030', interpolated=True),

        # Visual Studio 2017 version 15.9.7 (values are interpolated)
        0x01046993: VersionInfo(VIT.OBJ, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x01036993: VersionInfo(VIT.ASM, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x01056993: VersionInfo(VIT.CPP, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x00ff6993: VersionInfo(VIT.RES, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x01026993: VersionInfo(VIT.LNK, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x01016993: VersionInfo(VIT.IMP, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x01006993: VersionInfo(VIT.EXP, 'VS2017 v15.9.7 build 27027', interpolated=True),

        # Visual Studio 2017 version 15.9.5 (values are interpolated)
        0x01046992: VersionInfo(VIT.OBJ, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x01036992: VersionInfo(VIT.ASM, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x01056992: VersionInfo(VIT.CPP, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x00ff6992: VersionInfo(VIT.RES, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x01026992: VersionInfo(VIT.LNK, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x01016992: VersionInfo(VIT.IMP, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x01006992: VersionInfo(VIT.EXP, 'VS2017 v15.9.5 build 27026', interpolated=True),

        # Visual Studio 2017 version 15.9.4 (values are interpolated)
        0x01046991: VersionInfo(VIT.OBJ, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x01036991: VersionInfo(VIT.ASM, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x01056991: VersionInfo(VIT.CPP, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x00ff6991: VersionInfo(VIT.RES, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x01026991: VersionInfo(VIT.LNK, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x01016991: VersionInfo(VIT.IMP, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x01006991: VersionInfo(VIT.EXP, 'VS2017 v15.9.4 build 27025', interpolated=True),

        # Visual Studio 2017 version 15.9.1 (values are interpolated)
        0x0104698f: VersionInfo(VIT.OBJ, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x0103698f: VersionInfo(VIT.ASM, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x0105698f: VersionInfo(VIT.CPP, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x00ff698f: VersionInfo(VIT.RES, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x0102698f: VersionInfo(VIT.LNK, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x0101698f: VersionInfo(VIT.IMP, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x0100698f: VersionInfo(VIT.EXP, 'VS2017 v15.9.1 build 27023', interpolated=True),

        # Visual Studio 2017 version 15.8.5 (values are interpolated)
        # source: https://walbourn.github.io/vs-2017-15-8-update/
        0x0104686c: VersionInfo(VIT.OBJ, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x0103686c: VersionInfo(VIT.ASM, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x0105686c: VersionInfo(VIT.CPP, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x00ff686c: VersionInfo(VIT.RES, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x0102686c: VersionInfo(VIT.LNK, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x0101686c: VersionInfo(VIT.IMP, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x0100686c: VersionInfo(VIT.EXP, 'VS2017 v15.8.5 build 26732', interpolated=True),

        # Visual Studio 2017 version 15.8.9 (sic!) (values are interpolated)
        # source: https://walbourn.github.io/vs-2017-15-8-update/
        0x0104686a: VersionInfo(VIT.OBJ, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x0103686a: VersionInfo(VIT.ASM, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x0105686a: VersionInfo(VIT.CPP, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x00ff686a: VersionInfo(VIT.RES, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x0102686a: VersionInfo(VIT.LNK, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x0101686a: VersionInfo(VIT.IMP, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x0100686a: VersionInfo(VIT.EXP, 'VS2017 v15.8.9? build 26730', interpolated=True),

        # Visual Studio 2017 version 15.8.4 (values are interpolated)
        # source: https://walbourn.github.io/vs-2017-15-8-update/
        0x01046869: VersionInfo(VIT.OBJ, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x01036869: VersionInfo(VIT.ASM, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x01056869: VersionInfo(VIT.CPP, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x00ff6869: VersionInfo(VIT.RES, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x01026869: VersionInfo(VIT.LNK, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x01016869: VersionInfo(VIT.IMP, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x01006869: VersionInfo(VIT.EXP, 'VS2017 v15.8.4 build 26729', interpolated=True),

        # Visual Studio 2017 version 15.8.0 (values are interpolated)
        # source: https://walbourn.github.io/vs-2017-15-8-update/
        0x01046866: VersionInfo(VIT.OBJ, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x01036866: VersionInfo(VIT.ASM, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x01056866: VersionInfo(VIT.CPP, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x00ff6866: VersionInfo(VIT.RES, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x01026866: VersionInfo(VIT.LNK, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x01016866: VersionInfo(VIT.IMP, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x01006866: VersionInfo(VIT.EXP, 'VS2017 v15.8.0 build 26726', interpolated=True),

        # Visual Studio 2017 version 15.7.5 (values are interpolated)
        0x01046741: VersionInfo(VIT.OBJ, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x01036741: VersionInfo(VIT.ASM, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x01056741: VersionInfo(VIT.CPP, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x00ff6741: VersionInfo(VIT.RES, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x01026741: VersionInfo(VIT.LNK, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x01016741: VersionInfo(VIT.IMP, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x01006741: VersionInfo(VIT.EXP, 'VS2017 v15.7.5 build 26433', interpolated=True),

        # Visual Studio 2017 version 15.7.4 (values are interpolated)
        # source: https://walbourn.github.io/vs-2017-15-7-update/
        0x0104673f: VersionInfo(VIT.OBJ, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x0103673f: VersionInfo(VIT.ASM, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x0105673f: VersionInfo(VIT.CPP, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x00ff673f: VersionInfo(VIT.RES, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x0102673f: VersionInfo(VIT.LNK, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x0101673f: VersionInfo(VIT.IMP, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x0100673f: VersionInfo(VIT.EXP, 'VS2017 v15.7.4 build 26431', interpolated=True),

        # Visual Studio 2017 version 15.7.3 (values are interpolated)
        0x0104673e: VersionInfo(VIT.OBJ, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x0103673e: VersionInfo(VIT.ASM, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x0105673e: VersionInfo(VIT.CPP, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x00ff673e: VersionInfo(VIT.RES, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x0102673e: VersionInfo(VIT.LNK, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x0101673e: VersionInfo(VIT.IMP, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x0100673e: VersionInfo(VIT.EXP, 'VS2017 v15.7.3 build 26430', interpolated=True),

        # Visual Studio 2017 version 15.7.2 (values are interpolated)
        0x0104673d: VersionInfo(VIT.OBJ, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x0103673d: VersionInfo(VIT.ASM, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x0105673d: VersionInfo(VIT.CPP, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x00ff673d: VersionInfo(VIT.RES, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x0102673d: VersionInfo(VIT.LNK, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x0101673d: VersionInfo(VIT.IMP, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x0100673d: VersionInfo(VIT.EXP, 'VS2017 v15.7.2 build 26429', interpolated=True),

        # Visual Studio 2017 version 15.7.1 (values are interpolated)
        0x0104673c: VersionInfo(VIT.OBJ, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x0103673c: VersionInfo(VIT.ASM, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x0105673c: VersionInfo(VIT.CPP, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x00ff673c: VersionInfo(VIT.RES, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x0102673c: VersionInfo(VIT.LNK, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x0101673c: VersionInfo(VIT.IMP, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x0100673c: VersionInfo(VIT.EXP, 'VS2017 v15.7.1 build 26428', interpolated=True),

        # Visual Studio 2017 version 15.6.7 (values are interpolated)
        0x01046614: VersionInfo(VIT.OBJ, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x01036614: VersionInfo(VIT.ASM, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x01056614: VersionInfo(VIT.CPP, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x00ff6614: VersionInfo(VIT.RES, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x01026614: VersionInfo(VIT.LNK, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x01016614: VersionInfo(VIT.IMP, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x01006614: VersionInfo(VIT.EXP, 'VS2017 v15.6.7 build 26132', interpolated=True),

        # Visual Studio 2017 version 15.6.6 (values are interpolated)
        0x01046613: VersionInfo(VIT.OBJ, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x01036613: VersionInfo(VIT.ASM, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x01056613: VersionInfo(VIT.CPP, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x00ff6613: VersionInfo(VIT.RES, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x01026613: VersionInfo(VIT.LNK, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x01016613: VersionInfo(VIT.IMP, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x01006613: VersionInfo(VIT.EXP, 'VS2017 v15.6.6 build 26131', interpolated=True),

        # Visual Studio 2017 version 15.6.4 has the same build number
        # Visual Studio 2017 version 15.6.3 (values are interpolated)
        0x01046611: VersionInfo(VIT.OBJ, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x01036611: VersionInfo(VIT.ASM, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x01056611: VersionInfo(VIT.CPP, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x00ff6611: VersionInfo(VIT.RES, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x01026611: VersionInfo(VIT.LNK, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x01016611: VersionInfo(VIT.IMP, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x01006611: VersionInfo(VIT.EXP, 'VS2017 v15.6.3 build 26129', interpolated=True),

        # Visual Studio 2017 version 15.6.2 has the same build number
        # Visual Studio 2017 version 15.6.1 has the same build number
        # Visual Studio 2017 version 15.6.0 (values are interpolated)
        0x01046610: VersionInfo(VIT.OBJ, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x01036610: VersionInfo(VIT.ASM, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x01056610: VersionInfo(VIT.CPP, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x00ff6610: VersionInfo(VIT.RES, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x01026610: VersionInfo(VIT.LNK, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x01016610: VersionInfo(VIT.IMP, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x01006610: VersionInfo(VIT.EXP, 'VS2017 v15.6.0 build 26128', interpolated=True),

        # Visual Studio 2017 version 15.5.7 has the same build number
        # Visual Studio 2017 version 15.5.6 (values are interpolated)
        0x010464eb: VersionInfo(VIT.OBJ, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x010364eb: VersionInfo(VIT.ASM, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x010564eb: VersionInfo(VIT.CPP, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x00ff64eb: VersionInfo(VIT.RES, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x010264eb: VersionInfo(VIT.LNK, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x010164eb: VersionInfo(VIT.IMP, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x010064eb: VersionInfo(VIT.EXP, 'VS2017 v15.5.6 build 25835', interpolated=True),

        # MSVS2017 v15.5.4 (15.5.3 has the same build number)
        0x010464ea: VersionInfo(VIT.OBJ, 'VS2017 v15.5.4 build 25834'),
        0x010364ea: VersionInfo(VIT.ASM, 'VS2017 v15.5.4 build 25834'),
        0x010564ea: VersionInfo(VIT.CPP, 'VS2017 v15.5.4 build 25834'),
        0x00ff64ea: VersionInfo(VIT.RES, 'VS2017 v15.5.4 build 25834'),
        0x010264ea: VersionInfo(VIT.LNK, 'VS2017 v15.5.4 build 25834'),
        0x010064ea: VersionInfo(VIT.EXP, 'VS2017 v15.5.4 build 25834'),
        0x010164ea: VersionInfo(VIT.IMP, 'VS2017 v15.5.4 build 25834'),

        # Visual Studio 2017 version 15.5.2 (values are interpolated)
        0x010464e7: VersionInfo(VIT.OBJ, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x010364e7: VersionInfo(VIT.ASM, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x010564e7: VersionInfo(VIT.CPP, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x00ff64e7: VersionInfo(VIT.RES, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x010264e7: VersionInfo(VIT.LNK, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x010164e7: VersionInfo(VIT.IMP, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x010064e7: VersionInfo(VIT.EXP, 'VS2017 v15.5.2 build 25831', interpolated=True),

        # Visual Studio 2017 version 15.4.5 (values are interpolated)
        0x010463cb: VersionInfo(VIT.OBJ, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x010363cb: VersionInfo(VIT.ASM, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x010563cb: VersionInfo(VIT.CPP, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x00ff63cb: VersionInfo(VIT.RES, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x010263cb: VersionInfo(VIT.LNK, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x010163cb: VersionInfo(VIT.IMP, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x010063cb: VersionInfo(VIT.EXP, 'VS2017 v15.4.5 build 25547', interpolated=True),

        # Visual Studio 2017 version 15.4.4 (values are interpolated)
        0x010463c6: VersionInfo(VIT.OBJ, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x010363c6: VersionInfo(VIT.ASM, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x010563c6: VersionInfo(VIT.CPP, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x00ff63c6: VersionInfo(VIT.RES, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x010263c6: VersionInfo(VIT.LNK, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x010163c6: VersionInfo(VIT.IMP, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x010063c6: VersionInfo(VIT.EXP, 'VS2017 v15.4.4 build 25542', interpolated=True),

        # Visual Studio 2017 version 15.3.3 (values are interpolated)
        0x010463a3: VersionInfo(VIT.OBJ, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x010363a3: VersionInfo(VIT.ASM, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x010563a3: VersionInfo(VIT.CPP, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x00ff63a3: VersionInfo(VIT.RES, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x010263a3: VersionInfo(VIT.LNK, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x010163a3: VersionInfo(VIT.IMP, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x010063a3: VersionInfo(VIT.EXP, 'VS2017 v15.3.3 build 25507', interpolated=True),

        # Visual Studio 2017 version 15.3 (values are interpolated)
        # source: https://twitter.com/visualc/status/897853176002433024
        0x010463a2: VersionInfo(VIT.OBJ, 'VS2017 v15.3 build 25506', interpolated=True),
        0x010363a2: VersionInfo(VIT.ASM, 'VS2017 v15.3 build 25506', interpolated=True),
        0x010563a2: VersionInfo(VIT.CPP, 'VS2017 v15.3 build 25506', interpolated=True),
        0x00ff63a2: VersionInfo(VIT.RES, 'VS2017 v15.3 build 25506', interpolated=True),
        0x010263a2: VersionInfo(VIT.LNK, 'VS2017 v15.3 build 25506', interpolated=True),
        0x010163a2: VersionInfo(VIT.IMP, 'VS2017 v15.3 build 25506', interpolated=True),
        0x010063a2: VersionInfo(VIT.EXP, 'VS2017 v15.3 build 25506', interpolated=True),

        # Visual Studio 2017 version 15.2 has the same build number
        # Visual Studio 2017 version 15.1 has the same build number
        # Visual Studio 2017 version 15.0 (values are interpolated)
        0x010461b9: VersionInfo(VIT.OBJ, 'VS2017 v15.0 build 25017', interpolated=True),
        0x010361b9: VersionInfo(VIT.ASM, 'VS2017 v15.0 build 25017', interpolated=True),
        0x010561b9: VersionInfo(VIT.CPP, 'VS2017 v15.0 build 25017', interpolated=True),
        0x00ff61b9: VersionInfo(VIT.RES, 'VS2017 v15.0 build 25017', interpolated=True),
        0x010261b9: VersionInfo(VIT.LNK, 'VS2017 v15.0 build 25017', interpolated=True),
        0x010161b9: VersionInfo(VIT.IMP, 'VS2017 v15.0 build 25017', interpolated=True),
        0x010061b9: VersionInfo(VIT.EXP, 'VS2017 v15.0 build 25017', interpolated=True),

        # MSVS Community 2015 UPD3.1 (cl version 19.00.24215.1) - some IDs are interpolated
        # [ASM] is the same as in UPD3 build 24213
        0x01045e97: VersionInfo(VIT.OBJ, 'VS2015 UPD3.1 build 24215'),
        0x01055e97: VersionInfo(VIT.CPP, 'VS2015 UPD3.1 build 24215'),
        0x01025e97: VersionInfo(VIT.LNK, 'VS2015 UPD3.1 build 24215'),
        0x01005e97: VersionInfo(VIT.EXP, 'VS2015 UPD3.1 build 24215'),
        0x01015e97: VersionInfo(VIT.IMP, 'VS2015 UPD3.1 build 24215'),

        # MSVS Community 2015 UPD3 (cl version 19.00.24213.1)
        0x01045e95: VersionInfo(VIT.OBJ, 'VS2015 UPD3 build 24213'),
        0x01035e92: VersionInfo(VIT.ASM, 'VS2015 UPD3 build 24210'),
        0x01055e95: VersionInfo(VIT.CPP, 'VS2015 UPD3 build 24213'),
        0x00ff5e92: VersionInfo(VIT.RES, 'VS2015 UPD3 build 24210'),
        0x01025e95: VersionInfo(VIT.LNK, 'VS2015 UPD3 build 24213'),
        0x01005e95: VersionInfo(VIT.EXP, 'VS2015 UPD3 build 24213'),
        0x01015e95: VersionInfo(VIT.IMP, 'VS2015 UPD3 build 24213'),

        # Visual Studio 2015 Update 3 [14.0] (values are interpolated)
        0x01045e92: VersionInfo(VIT.OBJ, 'VS2015 Update 3 [14.0] build 24210', interpolated=True),
        # 01035e92 [ASM] VS2015 Update 3 [14.0] build 24210 (*)
        0x01055e92: VersionInfo(VIT.CPP, 'VS2015 Update 3 [14.0] build 24210', interpolated=True),
        # 00ff5e92 [RES] VS2015 Update 3 [14.0] build 24210 (*)
        0x01025e92: VersionInfo(VIT.LNK, 'VS2015 Update 3 [14.0] build 24210', interpolated=True),
        0x01015e92: VersionInfo(VIT.IMP, 'VS2015 Update 3 [14.0] build 24210', interpolated=True),
        0x01005e92: VersionInfo(VIT.EXP, 'VS2015 Update 3 [14.0] build 24210', interpolated=True),

        # MSVS Community 2015 UPD2 (14.0.25123.0?)
        0x01045d6e: VersionInfo(VIT.OBJ, 'VS2015 UPD2 build 23918'),
        0x01035d6e: VersionInfo(VIT.ASM, 'VS2015 UPD2 build 23918'),
        0x01055d6e: VersionInfo(VIT.CPP, 'VS2015 UPD2 build 23918'),
        0x00ff5d6e: VersionInfo(VIT.RES, 'VS2015 UPD2 build 23918'),
        0x01025d6e: VersionInfo(VIT.LNK, 'VS2015 UPD2 build 23918'),
        0x01005d6e: VersionInfo(VIT.EXP, 'VS2015 UPD2 build 23918'),
        0x01015d6e: VersionInfo(VIT.IMP, 'VS2015 UPD2 build 23918'),

        # MSVS Community 2015 14.0.24728.2 (UPD 1) 14.0.24720.0 D14REL
        0x01045bd2: VersionInfo(VIT.OBJ, 'VS2015 UPD1 build 23506'),
        0x01035bd2: VersionInfo(VIT.ASM, 'VS2015 UPD1 build 23506'),
        0x01055bd2: VersionInfo(VIT.CPP, 'VS2015 UPD1 build 23506'),
        0x00ff5bd2: VersionInfo(VIT.RES, 'VS2015 UPD1 build 23506'),
        0x01025bd2: VersionInfo(VIT.LNK, 'VS2015 UPD1 build 23506'),
        0x01005bd2: VersionInfo(VIT.EXP, 'VS2015 UPD1 build 23506'),
        0x01015bd2: VersionInfo(VIT.IMP, 'VS2015 UPD1 build 23506'),

        # MSVS Community 2015 [14.0]
        0x010459f2: VersionInfo(VIT.OBJ, 'VS2015 [14.0] build 23026'),
        0x010359f2: VersionInfo(VIT.ASM, 'VS2015 [14.0] build 23026'),
        0x010559f2: VersionInfo(VIT.CPP, 'VS2015 [14.0] build 23026'),
        0x00ff59f2: VersionInfo(VIT.RES, 'VS2015 [14.0] build 23026'),
        0x010259f2: VersionInfo(VIT.LNK, 'VS2015 [14.0] build 23026'),
        0x010059f2: VersionInfo(VIT.EXP, 'VS2015 [14.0] build 23026'),
        0x010159f2: VersionInfo(VIT.IMP, 'VS2015 [14.0] build 23026'),

        # Visual Studio 2013 Nobemver CTP [12.0] (values are interpolated)
        0x00e0527a: VersionInfo(VIT.OBJ, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00df527a: VersionInfo(VIT.ASM, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00e1527a: VersionInfo(VIT.CPP, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00db527a: VersionInfo(VIT.RES, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00de527a: VersionInfo(VIT.LNK, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00dd527a: VersionInfo(VIT.IMP, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00dc527a: VersionInfo(VIT.EXP, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),

        # MSVS2013 12.0.40629.00 Update 5
        0x00e09eb5: VersionInfo(VIT.OBJ, 'VS2013 UPD5 build 40629'),
        0x00e19eb5: VersionInfo(VIT.CPP, 'VS2013 UPD5 build 40629'),
        # cvtres not updated since RTM version, so add interpolated one
        0x00db9eb5: VersionInfo(VIT.RES, 'VS2013 Update 5 [12.0] build 40629', interpolated=True),
        0x00de9eb5: VersionInfo(VIT.LNK, 'VS2013 UPD5 build 40629'),
        0x00dc9eb5: VersionInfo(VIT.EXP, 'VS2013 UPD5 build 40629'),
        0x00dd9eb5: VersionInfo(VIT.IMP, 'VS2013 UPD5 build 40629'),
        0x00df9eb5: VersionInfo(VIT.ASM, 'VS2013 UPD5 build 40629'),

        # MSVS2013 12.0.31101.00 Update 4 - not attested in real world, @comp.id is
        # calculated.
        0x00e0797d: VersionInfo(VIT.OBJ, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00e1797d: VersionInfo(VIT.CPP, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00db797d: VersionInfo(VIT.RES, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00de797d: VersionInfo(VIT.LNK, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00dc797d: VersionInfo(VIT.EXP, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00dd797d: VersionInfo(VIT.IMP, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00df797d: VersionInfo(VIT.ASM, 'VS2013 UPD4 build 31101', interpolated=True),

        # MSVS2013 12.0.30723.00 Update 3 - not attested in real world, @comp.id is
        # calculated.
        0x00e07803: VersionInfo(VIT.OBJ, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00e17803: VersionInfo(VIT.CPP, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00db7803: VersionInfo(VIT.RES, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00de7803: VersionInfo(VIT.LNK, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00dc7803: VersionInfo(VIT.EXP, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00dd7803: VersionInfo(VIT.IMP, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00df7803: VersionInfo(VIT.ASM, 'VS2013 UPD3 build 30723', interpolated=True),

        # MSVS2013 12.0.30501.00 Update 2 - not attested in real world, @comp.id is
        # calculated.
        0x00e07725: VersionInfo(VIT.OBJ, 'VS2013 UPD2 build 30501'),
        0x00e17725: VersionInfo(VIT.CPP, 'VS2013 UPD2 build 30501'),
        # cvtres not updated since RTM version, so add interpolated one
        0x00db7725: VersionInfo(VIT.RES, 'VS2013 Update 2 [12.0] build 30501', interpolated=True),
        0x00de7725: VersionInfo(VIT.LNK, 'VS2013 UPD2 build 30501'),
        0x00dc7725: VersionInfo(VIT.EXP, 'VS2013 UPD2 build 30501'),
        0x00dd7725: VersionInfo(VIT.IMP, 'VS2013 UPD2 build 30501'),
        0x00df7725: VersionInfo(VIT.ASM, 'VS2013 UPD2 build 30501'),

        # Visual Studio 2013 Update2 RC [12.0] (values are interpolated)
        0x00e07674: VersionInfo(VIT.OBJ, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00df7674: VersionInfo(VIT.ASM, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00e17674: VersionInfo(VIT.CPP, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00db7674: VersionInfo(VIT.RES, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00de7674: VersionInfo(VIT.LNK, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00dd7674: VersionInfo(VIT.IMP, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00dc7674: VersionInfo(VIT.EXP, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),

        # MSVS2013 RTM
        # Looks like it doesn't always dump linker's comp.id
        # Visual Studio 2013 Update 1 [12.0] also has this build number
        0x00e0520d: VersionInfo(VIT.OBJ, 'VS2013 build 21005'),
        0x00e1520d: VersionInfo(VIT.CPP, 'VS2013 build 21005'),
        0x00db520d: VersionInfo(VIT.RES, 'VS2013 build 21005'),
        0x00de520d: VersionInfo(VIT.LNK, 'VS2013 build 21005'),
        0x00dc520d: VersionInfo(VIT.EXP, 'VS2013 build 21005'),
        0x00dd520d: VersionInfo(VIT.IMP, 'VS2013 build 21005'),
        0x00df520d: VersionInfo(VIT.ASM, 'VS2013 build 21005'),

        # Visual Studio 2013 RC [12.0] (values are interpolated)
        0x00e0515b: VersionInfo(VIT.OBJ, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00df515b: VersionInfo(VIT.ASM, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00e1515b: VersionInfo(VIT.CPP, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00db515b: VersionInfo(VIT.RES, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00de515b: VersionInfo(VIT.LNK, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00dd515b: VersionInfo(VIT.IMP, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00dc515b: VersionInfo(VIT.EXP, 'VS2013 RC [12.0] build 20827', interpolated=True),

        # Visual Studio 2013 Preview [12.0] (values are interpolated)
        0x00e05089: VersionInfo(VIT.OBJ, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00df5089: VersionInfo(VIT.ASM, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00e15089: VersionInfo(VIT.CPP, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00db5089: VersionInfo(VIT.RES, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00de5089: VersionInfo(VIT.LNK, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00dd5089: VersionInfo(VIT.IMP, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00dc5089: VersionInfo(VIT.EXP, 'VS2013 Preview [12.0] build 20617', interpolated=True),

        # MSVS2012 Premium Update 4 (11.0.61030.00 Update 4)
        0x00ceee66: VersionInfo(VIT.OBJ, 'VS2012 UPD4 build 61030'),
        0x00cfee66: VersionInfo(VIT.CPP, 'VS2012 UPD4 build 61030'),
        0x00cdee66: VersionInfo(VIT.ASM, 'VS2012 UPD4 build 61030'),
        0x00c9ee66: VersionInfo(VIT.RES, 'VS2012 UPD4 build 61030'),
        0x00ccee66: VersionInfo(VIT.LNK, 'VS2012 UPD4 build 61030'),
        0x00caee66: VersionInfo(VIT.EXP, 'VS2012 UPD4 build 61030'),
        0x00cbee66: VersionInfo(VIT.IMP, 'VS2012 UPD4 build 61030'),

        # MSVS2012 Update 3 (17.00.60610.1 Update 3) - not attested in real world,
        # @comp.id is calculated.
        0x00ceecc2: VersionInfo(VIT.OBJ, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00cfecc2: VersionInfo(VIT.CPP, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00cdecc2: VersionInfo(VIT.ASM, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00c9ecc2: VersionInfo(VIT.RES, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00ccecc2: VersionInfo(VIT.LNK, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00caecc2: VersionInfo(VIT.EXP, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00cbecc2: VersionInfo(VIT.IMP, 'VS2012 UPD3 build 60610', interpolated=True),

        # MSVS2012 Update 2 (17.00.60315.1 Update 2) - not attested in real world,
        # @comp.id is calculated.
        0x00ceeb9b: VersionInfo(VIT.OBJ, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00cfeb9b: VersionInfo(VIT.CPP, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00cdeb9b: VersionInfo(VIT.ASM, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00c9eb9b: VersionInfo(VIT.RES, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00cceb9b: VersionInfo(VIT.LNK, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00caeb9b: VersionInfo(VIT.EXP, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00cbeb9b: VersionInfo(VIT.IMP, 'VS2012 UPD2 build 60315', interpolated=True),

        # MSVS2012 Update 1 (17.00.51106.1 Update 1) - not attested in real world,
        # @comp.id is calculated.
        0x00cec7a2: VersionInfo(VIT.OBJ, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00cfc7a2: VersionInfo(VIT.CPP, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00cdc7a2: VersionInfo(VIT.ASM, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00c9c7a2: VersionInfo(VIT.RES, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00ccc7a2: VersionInfo(VIT.LNK, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00cac7a2: VersionInfo(VIT.EXP, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00cbc7a2: VersionInfo(VIT.IMP, 'VS2012 UPD1 build 51106', interpolated=True),

        # Visual Studio 2012 November CTP [11.0] (values are interpolated)
        0x00cec751: VersionInfo(VIT.OBJ, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00cdc751: VersionInfo(VIT.ASM, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00cfc751: VersionInfo(VIT.CPP, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00c9c751: VersionInfo(VIT.RES, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00ccc751: VersionInfo(VIT.LNK, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00cbc751: VersionInfo(VIT.IMP, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00cac751: VersionInfo(VIT.EXP, 'VS2012 November CTP [11.0] build 51025', interpolated=True),

        # MSVS2012 Premium (11.0.50727.1 RTMREL)
        0x00cec627: VersionInfo(VIT.OBJ, 'VS2012 build 50727'),
        0x00cfc627: VersionInfo(VIT.CPP, 'VS2012 build 50727'),
        0x00c9c627: VersionInfo(VIT.RES, 'VS2012 build 50727'),
        0x00cdc627: VersionInfo(VIT.ASM, 'VS2012 build 50727'),
        0x00cac627: VersionInfo(VIT.EXP, 'VS2012 build 50727'),
        0x00cbc627: VersionInfo(VIT.IMP, 'VS2012 build 50727'),
        0x00ccc627: VersionInfo(VIT.LNK, 'VS2012 build 50727'),

        # MSVS2010 SP1 kb 983509 (10.0.40219.1 SP1Rel)
        0x00aa9d1b: VersionInfo(VIT.OBJ, 'VS2010 SP1 build 40219'),
        0x00ab9d1b: VersionInfo(VIT.CPP, 'VS2010 SP1 build 40219'),
        0x009d9d1b: VersionInfo(VIT.LNK, 'VS2010 SP1 build 40219'),
        0x009a9d1b: VersionInfo(VIT.RES, 'VS2010 SP1 build 40219'),
        0x009b9d1b: VersionInfo(VIT.EXP, 'VS2010 SP1 build 40219'),
        0x009c9d1b: VersionInfo(VIT.IMP, 'VS2010 SP1 build 40219'),
        0x009e9d1b: VersionInfo(VIT.ASM, 'VS2010 SP1 build 40219'),

        # MSVS2010 (10.0.30319.1 RTMRel)
        0x00aa766f: VersionInfo(VIT.OBJ, 'VS2010 build 30319'),
        0x00ab766f: VersionInfo(VIT.CPP, 'VS2010 build 30319'),
        0x009d766f: VersionInfo(VIT.LNK, 'VS2010 build 30319'),
        0x009a766f: VersionInfo(VIT.RES, 'VS2010 build 30319'),
        0x009b766f: VersionInfo(VIT.EXP, 'VS2010 build 30319'),
        0x009c766f: VersionInfo(VIT.IMP, 'VS2010 build 30319'),
        0x009e766f: VersionInfo(VIT.ASM, 'VS2010 build 30319'),

        # Visual Studio 2010 Beta 2 [10.0] (values are interpolated)
        0x00aa520b: VersionInfo(VIT.OBJ, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x009e520b: VersionInfo(VIT.ASM, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x00ab520b: VersionInfo(VIT.CPP, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x009a520b: VersionInfo(VIT.RES, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x009d520b: VersionInfo(VIT.LNK, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x009c520b: VersionInfo(VIT.IMP, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x009b520b: VersionInfo(VIT.EXP, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),

        # Visual Studio 2010 Beta 1 [10.0] (values are interpolated)
        0x00aa501a: VersionInfo(VIT.OBJ, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x009e501a: VersionInfo(VIT.ASM, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x00ab501a: VersionInfo(VIT.CPP, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x009a501a: VersionInfo(VIT.RES, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x009d501a: VersionInfo(VIT.LNK, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x009c501a: VersionInfo(VIT.IMP, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x009b501a: VersionInfo(VIT.EXP, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),

        # MSVS2008 SP1 (9.0.30729.1 SP)
        0x00837809: VersionInfo(VIT.OBJ, 'VS2008 SP1 build 30729'),
        0x00847809: VersionInfo(VIT.CPP, 'VS2008 SP1 build 30729'),
        # cvtres is the same as in VS2008, so add interpolated
        0x00947809: VersionInfo(VIT.RES, 'VS2008 SP1 [9.0] build 30729', interpolated=True),
        0x00957809: VersionInfo(VIT.ASM, 'VS2008 SP1 build 30729'),
        0x00927809: VersionInfo(VIT.EXP, 'VS2008 SP1 build 30729'),
        0x00937809: VersionInfo(VIT.IMP, 'VS2008 SP1 build 30729'),
        0x00917809: VersionInfo(VIT.LNK, 'VS2008 SP1 build 30729'),

        # MSVS2008 (9.0.21022.8 RTM)
        0x0083521e: VersionInfo(VIT.OBJ, 'VS2008 build 21022'),
        0x0084521e: VersionInfo(VIT.CPP, 'VS2008 build 21022'),
        0x0091521e: VersionInfo(VIT.LNK, 'VS2008 build 21022'),
        0x0094521e: VersionInfo(VIT.RES, 'VS2008 build 21022'),
        0x0092521e: VersionInfo(VIT.EXP, 'VS2008 build 21022'),
        0x0093521e: VersionInfo(VIT.IMP, 'VS2008 build 21022'),
        0x0095521e: VersionInfo(VIT.ASM, 'VS2008 build 21022'),

        # Visual Studio 2008 Beta 2 [9.0] (values are interpolated)
        0x008350e2: VersionInfo(VIT.OBJ, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x009550e2: VersionInfo(VIT.ASM, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x008450e2: VersionInfo(VIT.CPP, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x009450e2: VersionInfo(VIT.RES, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x009150e2: VersionInfo(VIT.LNK, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x009350e2: VersionInfo(VIT.IMP, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x009250e2: VersionInfo(VIT.EXP, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),

        # MSVS2005 (RTM.50727-4200) cl version: 14.00.50727.42
        # MSVS2005-SP1 dumps the same comp.id's.
        # It is strange, but there exists VS2012 with the same build number:
        # 11 Build 50727.1
        0x006dc627: VersionInfo(VIT.OBJ, 'VS2005 build 50727'),
        0x006ec627: VersionInfo(VIT.CPP, 'VS2005 build 50727'),
        0x0078c627: VersionInfo(VIT.LNK, 'VS2005 build 50727'),
        0x007cc627: VersionInfo(VIT.RES, 'VS2005 build 50727'),
        0x007ac627: VersionInfo(VIT.EXP, 'VS2005 build 50727'),
        0x007bc627: VersionInfo(VIT.IMP, 'VS2005 build 50727'),
        0x007dc627: VersionInfo(VIT.ASM, 'VS2005 build 50727'),

        # Visual Studio 2005 [8.0] (values are interpolated)
        0x006dc490: VersionInfo(VIT.OBJ, 'VS2005 [8.0] build 50320', interpolated=True),
        0x007dc490: VersionInfo(VIT.ASM, 'VS2005 [8.0] build 50320', interpolated=True),
        0x006ec490: VersionInfo(VIT.CPP, 'VS2005 [8.0] build 50320', interpolated=True),
        0x007cc490: VersionInfo(VIT.RES, 'VS2005 [8.0] build 50320', interpolated=True),
        0x0078c490: VersionInfo(VIT.LNK, 'VS2005 [8.0] build 50320', interpolated=True),
        0x007bc490: VersionInfo(VIT.IMP, 'VS2005 [8.0] build 50320', interpolated=True),
        0x007ac490: VersionInfo(VIT.EXP, 'VS2005 [8.0] build 50320', interpolated=True),

        # Visual Studio 2005 Beta 2 [8.0] (values are interpolated)
        0x006dc427: VersionInfo(VIT.OBJ, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x007dc427: VersionInfo(VIT.ASM, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x006ec427: VersionInfo(VIT.CPP, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x007cc427: VersionInfo(VIT.RES, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x0078c427: VersionInfo(VIT.LNK, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x007bc427: VersionInfo(VIT.IMP, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x007ac427: VersionInfo(VIT.EXP, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),

        # Visual Studio 2005 Beta 1 [8.0] (values are interpolated)
        0x006d9e9f: VersionInfo(VIT.OBJ, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x007d9e9f: VersionInfo(VIT.ASM, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x006e9e9f: VersionInfo(VIT.CPP, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x007c9e9f: VersionInfo(VIT.RES, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x00789e9f: VersionInfo(VIT.LNK, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x007b9e9f: VersionInfo(VIT.IMP, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x007a9e9f: VersionInfo(VIT.EXP, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),

        # Windows Server 2003 SP1 DDK (for AMD64) (values are interpolated)
        0x006d9d76: VersionInfo(VIT.OBJ, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x007d9d76: VersionInfo(VIT.ASM, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x006e9d76: VersionInfo(VIT.CPP, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x007c9d76: VersionInfo(VIT.RES, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x00789d76: VersionInfo(VIT.LNK, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x007b9d76: VersionInfo(VIT.IMP, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x007a9d76: VersionInfo(VIT.EXP, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),

        # MSVS2003 (.NET) SP1 (kb918007)
        0x005f178e: VersionInfo(VIT.OBJ, 'VS2003 (.NET) SP1 build 6030'),
        0x0060178e: VersionInfo(VIT.CPP, 'VS2003 (.NET) SP1 build 6030'),
        0x005a178e: VersionInfo(VIT.LNK, 'VS2003 (.NET) SP1 build 6030'),
        0x000f178e: VersionInfo(VIT.ASM, 'VS2003 (.NET) SP1 build 6030'),
        # cvtres is the same version as without SP1
        0x005e178e: VersionInfo(VIT.RES, 'VS.NET 2003 SP1 [7.1] build 6030', interpolated=True),
        0x005c178e: VersionInfo(VIT.EXP, 'VS2003 (.NET) SP1 build 6030'),
        0x005d178e: VersionInfo(VIT.IMP, 'VS2003 (.NET) SP1 build 6030'),

        # Windows Server 2003 SP1 DDK (values are interpolated)
        0x005f0fc3: VersionInfo(VIT.OBJ, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x000f0fc3: VersionInfo(VIT.ASM, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x00600fc3: VersionInfo(VIT.CPP, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x005e0fc3: VersionInfo(VIT.RES, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x005a0fc3: VersionInfo(VIT.LNK, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x005d0fc3: VersionInfo(VIT.IMP, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x005c0fc3: VersionInfo(VIT.EXP, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),

        # MSVS2003 (.NET) 7.0.1.3088
        0x005f0c05: VersionInfo(VIT.OBJ, 'VS2003 (.NET) build 3077'),
        0x00600c05: VersionInfo(VIT.CPP, 'VS2003 (.NET) build 3077'),
        0x000f0c05: VersionInfo(VIT.ASM, 'VS2003 (.NET) build 3077'),
        0x005e0bec: VersionInfo(VIT.RES, 'VS2003 (.NET) build 3052'),
        0x005c0c05: VersionInfo(VIT.EXP, 'VS2003 (.NET) build 3077'),
        0x005d0c05: VersionInfo(VIT.IMP, 'VS2003 (.NET) build 3077'),
        0x005a0c05: VersionInfo(VIT.LNK, 'VS2003 (.NET) build 3077'),
        # Visual Studio .NET 2003 [7.1] (values are interpolated)
        0x005e0c05: VersionInfo(VIT.RES, 'VS.NET 2003 [7.1] build 3077', interpolated=True),

        # MSVS2002 (.NET) 7.0.9466
        0x001c24fa: VersionInfo(VIT.OBJ, 'VS2002 (.NET) build 9466'),
        0x001d24fa: VersionInfo(VIT.CPP, 'VS2002 (.NET) build 9466'),
        0x004024fa: VersionInfo(VIT.ASM, 'VS2002 (.NET) build 9466'),
        0x003d24fa: VersionInfo(VIT.LNK, 'VS2002 (.NET) build 9466'),
        0x004524fa: VersionInfo(VIT.RES, 'VS2002 (.NET) build 9466'),
        0x003f24fa: VersionInfo(VIT.EXP, 'VS2002 (.NET) build 9466'),
        0x001924fa: VersionInfo(VIT.IMP, 'VS2002 (.NET) build 9466'),

        # Windows XP SP1 DDK (values are interpolated)
        0x001c23d8: VersionInfo(VIT.OBJ, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x004023d8: VersionInfo(VIT.ASM, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x001d23d8: VersionInfo(VIT.CPP, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x004523d8: VersionInfo(VIT.RES, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x003d23d8: VersionInfo(VIT.LNK, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x001923d8: VersionInfo(VIT.IMP, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x003f23d8: VersionInfo(VIT.EXP, 'Windows XP SP1 DDK build 9176', interpolated=True),

        # MSVS98 6.0 SP6 (Enterprise edition)
        # Looks like linker may mix compids for C and C++ objects (why?)
        0x000a2636: VersionInfo(VIT.OBJ, 'VS98 (6.0) SP6 build 8804'),
        0x000b2636: VersionInfo(VIT.CPP, 'VS98 (6.0) SP6 build 8804'),

        # MSVC++ 6.0 SP5 (Enterprise edition)
        0x00152306: VersionInfo(VIT.OBJ, 'VC++ 6.0 SP5 build 8804'),
        0x00162306: VersionInfo(VIT.CPP, 'VC++ 6.0 SP5 build 8804'),
        0x000420ff: VersionInfo(VIT.LNK, 'VC++ 6.0 SP5 imp/exp build 8447'),
        0x000606c7: VersionInfo(VIT.RES, 'VS98 (6.0) SP6 cvtres build 1736'),

        # MSVS6.0 (no serVITepacks)
        0x000a1fe8: VersionInfo(VIT.OBJ, 'VS98 (6.0) build 8168'),
        0x000b1fe8: VersionInfo(VIT.CPP, 'VS98 (6.0) build 8168'),
        0x000606b8: VersionInfo(VIT.RES, 'VS98 (6.0) cvtres build 1720'),
        0x00041fe8: VersionInfo(VIT.LNK, 'VS98 (6.0) imp/exp build 8168'),

        # MSVS97 5.0 Enterprise Edition (cl 11.00.7022, link 5.00.7022)
        # Does NOT generate any @comp.id records, nor Rich headers.
        # SP3 added Rich-generating linker (albeit it doesn't identify itself),
        # and CVTRES and LIB(?) utilities that generate @comp.id records. There is no
        # distinction between import and export records yet. I marked the records as
        # [IMP] because VS98 linker seems to omit export records from the header; VS97
        # linker might do the same.
        0x00060684: VersionInfo(VIT.RES, 'VS97 (5.0) SP3 cvtres 5.00.1668'),
        0x00021c87: VersionInfo(VIT.IMP, 'VS97 (5.0) SP3 link 5.10.7303'),
    }

Ancestors

Static methods

def parse_signature(data)

Extracts a JSON-serializable and human readable dictionary with information about time stamp and code signing certificates that are attached to the input PE file.

Expand source code Browse git
@classmethod
def parse_signature(cls, data: bytearray) -> dict:
    """
    Extracts a JSON-serializable and human readable dictionary with information about
    time stamp and code signing certificates that are attached to the input PE file.
    """
    from refinery.units.formats.pkcs7 import pkcs7
    from refinery.units.formats.pe.pesig import pesig

    try:
        signature = data | pesig | pkcs7 | json.loads
    except Exception as E:
        raise ValueError(F'PKCS7 parser failed with error: {E!s}')

    info = {}

    def find_timestamps(entry):
        if isinstance(entry, dict):
            if set(entry.keys()) == {'type', 'value'}:
                if entry['type'] == 'signing_time':
                    return {'Timestamp': entry['value']}
            for value in entry.values():
                result = find_timestamps(value)
                if result is None:
                    continue
                with suppress(KeyError):
                    result.setdefault('TimestampIssuer', entry['sid']['issuer']['common_name'])
                return result
        elif isinstance(entry, list):
            for value in entry:
                result = find_timestamps(value)
                if result is None:
                    continue
                return result

    timestamp_info = find_timestamps(signature)
    if timestamp_info is not None:
        info.update(timestamp_info)

    try:
        certificates = signature['content']['certificates']
    except KeyError:
        return info

    if len(certificates) == 1:
        main_certificate = certificates[0]['tbs_certificate']
    else:
        certificates_with_extended_use = []
        main_certificate = None
        for certificate in certificates:
            with suppress(Exception):
                crt = certificate['tbs_certificate']
                ext = [e for e in crt['extensions'] if e['extn_id'] == 'extended_key_usage' and e['extn_value'] != ['time_stamping']]
                key = [e for e in crt['extensions'] if e['extn_id'] == 'key_usage']
                if ext:
                    certificates_with_extended_use.append(crt)
                if any('key_cert_sign' in e['extn_value'] for e in key):
                    continue
                if any('code_signing' in e['extn_value'] for e in ext):
                    main_certificate = crt
                    break
        if main_certificate is None and len(certificates_with_extended_use) == 1:
            main_certificate = certificates_with_extended_use[0]
    if main_certificate:
        serial = main_certificate['serial_number']
        if not isinstance(serial, int):
            serial = int(serial, 0)
        serial = F'{serial:x}'
        if len(serial) % 2:
            serial = '0' + serial
        subject = main_certificate['subject']
        location = [subject.get(t, '') for t in ('locality_name', 'state_or_province_name', 'country_name')]
        info.update(Subject=subject['common_name'])
        if any(location):
            info.update(SubjectLocation=', '.join(filter(None, location)))
        info.update(Issuer=main_certificate['issuer']['common_name'], Serial=serial)
        return info
    return info
def parse_version(pe, data=None)

Extracts a JSON-serializable and human readable dictionary with information about the version resource of an input PE file, if available.

Expand source code Browse git
@classmethod
def parse_version(cls, pe: PE, data=None) -> dict:
    """
    Extracts a JSON-serializable and human readable dictionary with information about
    the version resource of an input PE file, if available.
    """
    pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']])
    for FileInfo in pe.FileInfo:
        for FileInfoEntry in FileInfo:
            with suppress(AttributeError):
                for StringTableEntry in FileInfoEntry.StringTable:
                    StringTableEntryParsed = cls._parse_pedict(StringTableEntry.entries)
                    with suppress(AttributeError):
                        LangID = StringTableEntry.entries.get('LangID', None) or StringTableEntry.LangID
                        LangID = int(LangID, 0x10) if not isinstance(LangID, int) else LangID
                        LangHi = LangID >> 0x10
                        LangLo = LangID & 0xFFFF
                        Language = cls._LCID.get(LangHi, 'Language Neutral')
                        Charset = cls._CHARSET.get(LangLo, 'Unknown Charset')
                        StringTableEntryParsed.update(
                            LangID=F'{LangID:08X}',
                            Charset=Charset,
                            Language=Language
                        )
                    return StringTableEntryParsed
def parse_exports(pe, data=None)
Expand source code Browse git
@classmethod
def parse_exports(cls, pe: PE, data=None) -> list:
    pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT']])
    info = []
    for k, exp in enumerate(pe.DIRECTORY_ENTRY_EXPORT.symbols):
        if not exp.name:
            info.append(F'@{k}')
        else:
            info.append(exp.name.decode('ascii'))
    return info
def parse_imports(pe, data=None)
Expand source code Browse git
@classmethod
def parse_imports(cls, pe: PE, data=None) -> list:
    pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']])
    info = {}
    for idd in pe.DIRECTORY_ENTRY_IMPORT:
        dll = idd.dll.decode('ascii')
        if dll.lower().endswith('.dll'):
            dll = dll[:-4]
        imports = info.setdefault(dll, [])
        for imp in idd.imports:
            imports.append(imp.name.decode('ascii'))
    return info
def parse_time_stamps(pe, raw_time_stamps)

Extracts time stamps from the PE header (link time), as well as from the imports, exports, debug, and resource directory. The resource time stamp is also parsed as a DOS time stamp and returned as the "Delphi" time stamp.

Expand source code Browse git
@classmethod
def parse_time_stamps(cls, pe: PE, raw_time_stamps: bool) -> dict:
    """
    Extracts time stamps from the PE header (link time), as well as from the imports,
    exports, debug, and resource directory. The resource time stamp is also parsed as
    a DOS time stamp and returned as the "Delphi" time stamp.
    """
    if raw_time_stamps:
        def dt(ts): return ts
    else:
        def dt(ts):
            # parse as UTC but then forget time zone information
            return datetime.fromtimestamp(
                ts,
                tz=timezone.utc
            ).replace(tzinfo=None)

    pe.parse_data_directories(directories=[
        DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
        DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'],
        DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG'],
        DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']
    ])

    info = {}

    with suppress(AttributeError):
        info.update(Linker=dt(pe.FILE_HEADER.TimeDateStamp))

    with suppress(AttributeError):
        for entry in pe.DIRECTORY_ENTRY_IMPORT:
            info.update(Import=dt(entry.TimeDateStamp()))

    with suppress(AttributeError):
        for entry in pe.DIRECTORY_ENTRY_DEBUG:
            info.update(DbgDir=dt(entry.struct.TimeDateStamp))

    with suppress(AttributeError):
        Export = pe.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp
        if Export: info.update(Export=dt(Export))

    with suppress(AttributeError):
        res_timestamp = pe.DIRECTORY_ENTRY_RESOURCE.struct.TimeDateStamp
        if res_timestamp:
            with suppress(ValueError):
                from ...misc.datefix import datefix
                dos = datefix.dostime(res_timestamp)
                info.update(Delphi=dos)
                info.update(RsrcTS=dt(res_timestamp))

    def norm(value):
        if isinstance(value, int):
            return value
        return str(value)

    return {key: norm(value) for key, value in info.items()}
def parse_dotnet(pe, data)

Extracts a JSON-serializable and human readable dictionary with information about the .NET metadata of an input PE file.

Expand source code Browse git
@classmethod
def parse_dotnet(cls, pe: PE, data):
    """
    Extracts a JSON-serializable and human readable dictionary with information about
    the .NET metadata of an input PE file.
    """
    header = DotNetHeader(data, pe=pe)
    tables = header.meta.Streams.Tables
    info = dict(
        RuntimeVersion=F'{header.head.MajorRuntimeVersion}.{header.head.MinorRuntimeVersion}',
        Version=F'{header.meta.MajorVersion}.{header.meta.MinorVersion}',
        VersionString=header.meta.VersionString
    )

    info['Flags'] = [name for name, check in header.head.KnownFlags.items() if check]

    if len(tables.Assembly) == 1:
        assembly = tables.Assembly[0]
        info.update(
            AssemblyName=assembly.Name,
            Release='{}.{}.{}.{}'.format(
                assembly.MajorVersion,
                assembly.MinorVersion,
                assembly.BuildNumber,
                assembly.RevisionNumber
            )
        )

    try:
        entry = header.head.EntryPointToken + pe.OPTIONAL_HEADER.ImageBase
        info.update(EntryPoint=F'0x{entry:08X}')
    except AttributeError:
        pass

    if len(tables.Module) == 1:
        module = tables.Module[0]
        info.update(ModuleName=module.Name)

    return info
def parse_debug(pe, data=None)
Expand source code Browse git
@classmethod
def parse_debug(cls, pe: PE, data=None):
    result = {}
    pe.parse_data_directories(directories=[
        DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG']])
    for dbg in pe.DIRECTORY_ENTRY_DEBUG:
        if DEBUG_TYPE.get(dbg.struct.Type, None) != 'IMAGE_DEBUG_TYPE_CODEVIEW':
            continue
        with suppress(Exception):
            pdb = dbg.entry.PdbFileName
            if 0 in pdb:
                pdb = pdb[:pdb.index(0)]
            result.update(
                PdbPath=pdb.decode(cls.codec),
                PdbAge=dbg.entry.Age
            )
    return result

Methods

def parse_header(cls, pe, data=None)
Expand source code Browse git
def parse_header(cls, pe: PE, data=None) -> dict:
    def format_macro_name(name: str, prefix, convert=True):
        name = name.split('_')[prefix:]
        if convert:
            for k, part in enumerate(name):
                name[k] = part.upper() if len(part) <= 3 else part.capitalize()
        return ' '.join(name)

    major = pe.OPTIONAL_HEADER.MajorOperatingSystemVersion
    minor = pe.OPTIONAL_HEADER.MinorOperatingSystemVersion
    version = cls._WINVER.get(major, {0: 'Unknown'})

    try:
        MinimumOS = version[minor]
    except LookupError:
        MinimumOS = version[0]
    header_information = {
        'Machine': format_macro_name(MACHINE_TYPE[pe.FILE_HEADER.Machine], 3, False),
        'Subsystem': format_macro_name(SUBSYSTEM_TYPE[pe.OPTIONAL_HEADER.Subsystem], 2),
        'MinimumOS': MinimumOS,
    }

    rich_header = pe.parse_rich_header()
    rich = []
    if rich_header:
        it = rich_header.get('values', [])
        for idv in it[0::2]:
            info = cls._RICH_HEADER.get(idv, None)
            if info is None:
                info = guess_version(idv)
            if not info:
                continue
            rich.append(str(info))
        header_information['RICH'] = rich

    characteristics = [
        name for name, mask in image_characteristics
        if pe.FILE_HEADER.Characteristics & mask
    ]
    for typespec, flag in {
        'EXE': 'IMAGE_FILE_EXECUTABLE_IMAGE',
        'DLL': 'IMAGE_FILE_DLL',
        'SYS': 'IMAGE_FILE_SYSTEM'
    }.items():
        if flag in characteristics:
            header_information['Type'] = typespec
    address_width = None
    if 'IMAGE_FILE_16BIT_MACHINE' in characteristics:
        address_width = 4
    elif pe.FILE_HEADER.Machine == MACHINE_TYPE['IMAGE_FILE_MACHINE_I386']:
        address_width = 8
    elif pe.FILE_HEADER.Machine == MACHINE_TYPE['IMAGE_FILE_MACHINE_AMD64']:
        address_width = 16
    if address_width:
        header_information['Bits'] = 4 * address_width
    else:
        address_width = 16
    header_information['ImageBase'] = F'0x{pe.OPTIONAL_HEADER.ImageBase:0{address_width}}'
    return header_information

Inherited members

class peoverlay (certificate=True, directories=True, memdump=False)

This unit is implemented in refinery.units.formats.pe.peoverlay and has the following commandline Interface:

usage: peoverlay [-h] [-L] [-Q] [-0] [-v] [-c] [-d] [-m]

Returns the overlay of a PE file, i.e. anything that may have been
appended to the file. This does not include digital signatures. Use
pestrip to obtain only the body of the PE file after removing the overlay.

optional arguments:
  -c, --no-cert  Do not include digital signatures for the size
                 computation.
  -d, --no-dirs  Do not include any data directories for size computation
                 (implies --no-cert).
  -m, --memdump  Assume that the file data was a memory-mapped PE file.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class peoverlay(OverlayUnit):
    """
    Returns the overlay of a PE file, i.e. anything that may have been appended to the file.
    This does not include digital signatures. Use `refinery.pestrip` to obtain only the body
    of the PE file after removing the overlay.
    """
    def process(self, data: bytearray) -> bytearray:
        size = self._get_size(data)
        if isinstance(data, bytearray):
            data[:size] = []
            return data
        return data[size:]

Ancestors

Inherited members

class perc (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.pe.perc and has the following commandline Interface:

usage: perc [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
            [path [path ...]]

Extract PE file resources.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class perc(PathExtractorUnit):
    """
    Extract PE file resources.
    """
    def __init__(self, *paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path'):
        def fixpath(p: str):
            if regex or not p.isidentifier():
                return p
            return re.compile(FR'^.*?{re.escape(p)}.*$')
        super().__init__(*(fixpath(p) for p in paths),
            list=list, join_path=join_path, drop_path=drop_path, path=path)

    def _search(self, pe, directory, level=0, *parts):
        if level >= 3:
            self.log_warn(F'unexpected resource tree level {level + 1:d}')
        for entry in directory.entries:
            if entry.name:
                identifier = str(entry.name)
            elif level == 0 and entry.id in iter(RSRC):
                identifier = RSRC(entry.id).name
            elif entry.id is not None:
                identifier = str(entry.id)
            else:
                self.log_warn(F'resource entry has name {entry.name} and id {entry.id} at level {level + 1:d}')
                continue
            if entry.struct.DataIsDirectory:
                yield from self._search(pe, entry.directory, level + 1, *parts, identifier)
            else:
                path = '/'.join((*parts, identifier))
                yield UnpackResult(path, data=lambda p=pe, e=entry:
                    p.get_data(e.data.struct.OffsetToData, e.data.struct.Size))

    def unpack(self, data):
        pe = pefile.PE(data=data)
        try:
            yield from self._search(pe, pe.DIRECTORY_ENTRY_RESOURCE)
        except AttributeError:
            pass

Ancestors

Inherited members

class pesig

This unit is implemented in refinery.units.formats.pe.pesig and has the following commandline Interface:

usage: pesig [-h] [-L] [-Q] [-0] [-v]

Extracts the contents of the IMAGE_DIRECTORY_ENTRY_SECURITY entry of a PE
file, i.e. the digital signatures in DER format.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class pesig(Unit):
    """
    Extracts the contents of the IMAGE_DIRECTORY_ENTRY_SECURITY entry of a PE file,
    i.e. the digital signatures in DER format.
    """

    _SECDIRID = DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY']

    def __init__(self): pass

    def process(self, data: bytearray) -> bytearray:
        pe = PE(data=data, fast_load=True)
        pe.parse_data_directories(directories=[self._SECDIRID])
        security = pe.OPTIONAL_HEADER.DATA_DIRECTORY[self._SECDIRID]
        self.log_info(F'signature offset: 0x{security.VirtualAddress:08X}')
        self.log_info(F'signature length: 0x{security.Size:08X}')
        if security.VirtualAddress == 0 or security.Size == 0:
            raise ValueError(F'IMAGE_DIRECTORY_ENTRY_SECURITY ({self._SECDIRID}) is corrupt.')
        sgnoff = security.VirtualAddress + 8
        sgnend = sgnoff + security.Size
        length, revision, certtype = unpack('<IHH', data[sgnoff - 8:sgnoff])
        signature = data[sgnoff:sgnend]

        if len(signature) + 8 != length:
            raise RefineryPartialResult(
                F'Found {len(signature) + 8} bytes of signature, but length should be {length}.',
                partial=signature)

        return signature

Ancestors

Inherited members

class pestrip (certificate=True, directories=True, memdump=False)

This unit is implemented in refinery.units.formats.pe.pestrip and has the following commandline Interface:

usage: pestrip [-h] [-L] [-Q] [-0] [-v] [-c] [-d] [-m]

Removes the overlay of a PE file and returns the stipped executable. Use
peoverlay to extract the overlay.

optional arguments:
  -c, --no-cert  Do not include digital signatures for the size
                 computation.
  -d, --no-dirs  Do not include any data directories for size computation
                 (implies --no-cert).
  -m, --memdump  Assume that the file data was a memory-mapped PE file.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class pestrip(OverlayUnit):
    """
    Removes the overlay of a PE file and returns the stipped executable. Use `refinery.peoverlay`
    to extract the overlay.
    """
    def process(self, data: bytearray) -> bytearray:
        size = self._get_size(data)
        if isinstance(data, bytearray):
            data[size:] = []
            return data
        return data[:size]

Ancestors

Inherited members

class pkcs7

This unit is implemented in refinery.units.formats.pkcs7 and has the following commandline Interface:

usage: pkcs7 [-h] [-L] [-Q] [-0] [-v]

Converts PKCS7 encoded data to a JSON representation.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class pkcs7(Unit):
    """
    Converts PKCS7 encoded data to a JSON representation.
    """
    def process(self, data: bytes):
        signature = asn1crypto.cms.ContentInfo.load(data)
        with ParsedASN1ToJSON as encoder:
            return encoder.dumps(signature).encode(self.codec)

Ancestors

Inherited members

class stego (parts='RGB')

This unit is implemented in refinery.units.formats.stego and has the following commandline Interface:

usage: stego [-h] [-L] [-Q] [-0] [-v] [parts]

Decodes the RGBA (red/green/blue/alpha) values of the pixels of a given
image file and outputs these values as bytes. Each row of the image is
transformed and output as and individual chunk. To obtain the data in
columns, the transpose unit can be used.

positional arguments:
  parts          A string containing any ordering of the letters R, G, B,
                 and A (case-insensitive). These pixel components will be
                 extracted from every pixel in the given order. The
                 default value is RGB.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class stego(Unit):
    """
    Decodes the RGBA (red/green/blue/alpha) values of the pixels of a given image file and
    outputs these values as bytes. Each row of the image is transformed and output as and
    individual chunk. To obtain the data in columns, the `refinery.transpose` unit can be
    used.
    """
    def __init__(
        self,
        parts: arg('parts', nargs='?', type=str, help=(
            'A string containing any ordering of the letters R, G, B, and A (case-insensitive). '
            'These pixel components will be extracted from every pixel in the given order. The '
            'default value is {default}.'
        )) = 'RGB'
    ):
        super().__init__(
            parts=tuple(arg.as_option(p, PIXEL_PART) for p in parts)
        )

    def process(self, data):
        def coordinates(width, height):
            for x in range(width):
                for y in range(height):
                    yield x, y
        image = PIL.Image.open(MemoryFile(data))
        width, height = image.size
        for y in range(height):
            yield bytearray(
                image.getpixel((x, y))[p]
                for x in range(width)
                for p in self.args.parts
            )

Ancestors

Inherited members

class winreg (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.winreg and has the following commandline Interface:

usage: winreg [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
              [path [path ...]]

Extract values from a Windows registry hive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class winreg(PathExtractorUnit):
    """
    Extract values from a Windows registry hive.
    """
    def _walk(self, key, *path):
        here = '/'.join(path)
        if not self._check_reachable(here):
            self.log_debug(F'pruning search at {here}')
            return
        for value in key.values():
            vpath = F'{here}/{value.name()}'
            yield UnpackResult(vpath, lambda v=value: v.raw_data())
        for subkey in key.subkeys():
            yield from self._walk(subkey, *path, subkey.name())

    def unpack(self, data):
        with MemoryFile(data) as stream:
            root = Registry(stream).root()
            yield from self._walk(root, root.name())

Ancestors

Inherited members

class xtxml (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.xml and has the following commandline Interface:

usage: xtxml [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
             [path [path ...]]

Extract values from an XML document.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtxml(PathExtractorUnit):
    """
    Extract values from an XML document.
    """
    _STRICT_PATH_MATCHING = True

    def unpack(self, data):
        def walk(node: xml.XMLNode, *path: str):
            def extract(node: xml.XMLNode = node):
                if not node.children:
                    return node.content.encode(self.codec)
                with MemoryFile() as stream:
                    node.write(stream)
                    return bytes(stream.getbuffer() | ppxml)
            children_by_tag = defaultdict(list)
            for child in node.children:
                children_by_tag[child.tag].append(child)
            yield UnpackResult('/'.join(path), extract, **node.attributes)
            for tag, children in children_by_tag.items():
                if len(children) == 1:
                    yield from walk(children[0], *path, tag)
                    continue
                width = len(F'{len(children):X}')
                for k, child in enumerate(children):
                    yield from walk(child, *path, F'{tag}[0x{k:0{width}X}]')
        root = xml.parse(data)
        name = root.tag or 'xml'
        yield from walk(root, name)

Ancestors

Inherited members

class chop (size, truncate=False, into=False)

This unit is implemented in refinery.units.meta.chop and has the following commandline Interface:

usage: chop [-h] [-L] [-Q] [-0] [-v] [-t] [-i] N

Reinterprets the input as a sequence of equally sized chunks and outputs
this sequence.

positional arguments:
  N               Chop data into chunks of this size.

optional arguments:
  -t, --truncate  Truncate possible excess bytes at the end of the input,
                  by default they are appended as a single chunk.
  -i, --into      If this flag is specified, the size parameter determines
                  the number of blocks to be produced rather than the size
                  of each block. In this case, truncation is performed
                  before the data is split.

generic options:
  -h, --help      Show this help message and exit.
  -L, --lenient   Allow partial results as output.
  -Q, --quiet     Disables all log output.
  -0, --devnull   Do not produce any output.
  -v, --verbose   Specify up to two times to increase log level.
Expand source code Browse git
class chop(Unit):
    """
    Reinterprets the input as a sequence of equally sized chunks and outputs this sequence.
    """

    def __init__(
        self, size: arg.number('size', help='Chop data into chunks of this size.'),
        truncate: arg.switch('-t', help=(
            'Truncate possible excess bytes at the end of the input, by default they are appended as a single chunk.')) = False,
        into: arg.switch('-i', help=(
            'If this flag is specified, the size parameter determines the number of blocks to be produced rather than the size '
            'of each block. In this case, truncation is performed before the data is split.')) = False
    ):
        return super().__init__(size=size, into=into, truncate=truncate)

    def process(self, data):
        size = self.args.size
        if size < 1:
            raise ValueError('The chunk size has to be a positive integer value.')
        if self.args.into:
            size, remainder = divmod(len(data), size)
            if remainder and not self.args.truncate:
                partition = remainder * (size + 1)
                part1, part2 = data[:partition], data[partition:]
                yield from splitchunks(part1, size + 1)
                yield from splitchunks(part2, size)
                return

        yield from splitchunks(data, size, self.args.truncate)

Ancestors

Inherited members

class cm (invert=False, all=False, reset=False, size=False, index=False, ext=False, entropy=False, ic=False, magic=False, sha1=False, sha256=False, crc32=False, md5=False, hashes=False, *names)

This unit is implemented in refinery.units.meta.cm and has the following commandline Interface:

usage: cm [-h] [-L] [-Q] [-0] [-v] [-x | -a] [-r] [-S] [-I] [-F] [-E] [-C]
          [-M] [-1] [-2] [-3] [-5] [-H]
          [name [name ...]]

The Common Meta variables unit populates the set of meta variables of the
current chunk with commonly used metadata. The unit has no effect outside
a frame.

positional arguments:
  name           A variable name that can include the common properties:
                 mime, ext, magic, size, entropy, ic, crc32, sha1, sha256,
                 md5, index. If none is given, the variables index and
                 size are populated. For most of these, an optional
                 argument is available that can be used as a shorthand:

optional arguments:
  -x, --invert   populate only options that have not been specified
  -a, --all      populate all options
  -r, --reset    discard all meta variables that were not explicitly
                 specified
  -S, --size     size of the chunk
  -I, --index    index of the chunk in the current frame
  -F, --ext      guess file extension
  -E, --entropy  compute data entropy
  -C, --ic       compute the index of coincidence
  -M, --magic    compute file magic
  -1, --sha1     compute hash: SHA-1
  -2, --sha256   compute hash: SHA-256
  -3, --crc32    compute hash: CRC32
  -5, --md5      compute hash: MD5
  -H, --hashes   compute all common hashes

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class cm(Unit):
    """
    The Common Meta variables unit populates the set of meta variables of the current chunk with commonly
    used metadata. The unit has no effect outside a frame.
    """
    def __init__(
        self,
        invert  : arg.switch('-x', group='ALL', help='populate only options that have not been specified') = False,
        all     : arg.switch('-a', group='ALL', help='populate all options') = False,
        reset   : arg.switch('-r', help='discard all meta variables that were not explicitly specified') = False,
        size    : arg.switch('-S', help='size of the chunk') = False,
        index   : arg.switch('-I', help='index of the chunk in the current frame') = False,
        ext     : arg.switch('-F', help='guess file extension') = False,
        entropy : arg.switch('-E', help='compute data entropy') = False,
        ic      : arg.switch('-C', help='compute the index of coincidence') = False,
        magic   : arg.switch('-M', help='compute file magic') = False,
        sha1    : arg.switch('-1', help='compute hash: SHA-1') = False,
        sha256  : arg.switch('-2', help='compute hash: SHA-256') = False,
        crc32   : arg.switch('-3', help='compute hash: CRC32') = False,
        md5     : arg.switch('-5', help='compute hash: MD5') = False,
        hashes  : arg.switch('-H', help='compute all common hashes') = False,
        *names  : arg(metavar='name', help=(
            F'A variable name that can include the common properties: {_COMMON_PROPERTIES_LIST}.'
            R' If none is given, the variables index and size are populated. For most of these,'
            R' an optional argument is available that can be used as a shorthand:'))
    ):
        def stringify(name):
            if isinstance(name, str):
                return name
            return name.decode(self.codec)

        names = {stringify(name) for name in names}
        if hashes:
            md5 = sha256 = sha1 = crc32 = True
        if size:
            names.add('size')
        if index:
            names.add('index')
        if ext:
            names.add('ext')
        if entropy:
            names.add('entropy')
        if ic:
            names.add('ic')
        if magic:
            names.add('magic')
        if sha1:
            names.add('sha1')
        if sha256:
            names.add('sha256')
        if crc32:
            names.add('crc32')
        if md5:
            names.add('md5')
        if not names and not reset:
            names.update(('index', 'size'))
        if all:
            if invert:
                raise ValueError('invert and all are both enabled, resulting in empty configuration.')
            names = set(COMMON_PROPERTIES)
        elif invert:
            names = set(COMMON_PROPERTIES) - names
        super().__init__(names=names, reset=reset)

    def process(self, data):
        return data

    def filter(self, chunks):
        names = self.args.names
        reset = self.args.reset
        for index, chunk in enumerate(chunks):
            chunk: Chunk
            if not chunk.visible:
                continue
            meta = metavars(chunk)
            if reset:
                chunk.meta.clear()
            if 'index' in names:
                meta['index'] = index
            for name in names:
                chunk[name] = meta[name]
            yield chunk

Ancestors

Inherited members

class cull

This unit is implemented in refinery.units.meta.cull and has the following commandline Interface:

usage: cull [-h] [-L] [-Q] [-0] [-v]

Remove all chunks from the current frame if they are not visible. Chunks
can become invisible by exclusion through iff, iffp, iffs, iffx, or scope.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class cull(Unit):
    """
    Remove all chunks from the current `refinery.lib.frame` if they are not visible. Chunks can become invisible
    by exclusion through `refinery.iff`, `refinery.iffp`, `refinery.iffs`, `refinery.iffx`, or `refinery.scope`.
    """
    def filter(self, chunks: Iterable[Chunk]):
        for chunk in chunks:
            if chunk.visible:
                yield chunk

Ancestors

Inherited members

class dedup

This unit is implemented in refinery.units.meta.dedup and has the following commandline Interface:

usage: dedup [-h] [-L] [-Q] [-0] [-v]

Deduplicates a sequence of multiple inputs. The deduplication is limited
to the current frame.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class dedup(Unit):
    """
    Deduplicates a sequence of multiple inputs. The deduplication is limited to the current `refinery.lib.frame`.
    """

    def filter(self, chunks):
        barrier = set()
        for chunk in chunks:
            if not chunk.visible:
                yield chunk
                continue
            hashed = hash(chunk)
            if hashed not in barrier:
                barrier.add(hashed)
                yield chunk

Ancestors

Inherited members

class ef (*filenames, list, size=0, linewise=False)

This unit is implemented in refinery.units.meta.ef and has the following commandline Interface:

usage: ef [-h] [-L] [-Q] [-0] [-v] [-l] [-s N] [-w]
          FILEMASK [FILEMASK ...]

Short for "emit file". The unit reads files from disk and outputs them
individually. Has the ability to read large files in chunks.

positional arguments:
  FILEMASK        A list of file masks (with wildcard patterns). Each
                  matching file will be read from disk and emitted. In
                  addition to glob patterns, the file mask can include
                  format string expressions which will be substituted from
                  the current meta variables.

optional arguments:
  -l, --list      Only lists files with metadata.
  -s, --size N    If specified, files will be read in chunks of size N and
                  each chunk is emitted as one element in the output list.
  -w, --linewise  Read the file linewise. By default, one line is read at
                  a time. In line mode, the --size argument can be used to
                  read the given number of lines in each chunk.

generic options:
  -h, --help      Show this help message and exit.
  -L, --lenient   Allow partial results as output.
  -Q, --quiet     Disables all log output.
  -0, --devnull   Do not produce any output.
  -v, --verbose   Specify up to two times to increase log level.
Expand source code Browse git
class ef(Unit):
    """
    Short for "emit file". The unit reads files from disk and outputs them individually. Has the ability to
    read large files in chunks.
    """

    def __init__(self,
        *filenames: arg(metavar='FILEMASK', nargs='+', type=str, help=(
            'A list of file masks (with wildcard patterns). Each matching '
            'file will be read from disk and emitted. In addition to glob '
            'patterns, the file mask can include format string expressions '
            'which will be substituted from the current meta variables.'
        )),
        list: arg.switch('-l', help='Only lists files with metadata.'),
        size: arg.number('-s', help=(
            'If specified, files will be read in chunks of size N and each '
            'chunk is emitted as one element in the output list.'
        )) = 0,
        linewise: arg.switch('-w', help=(
            'Read the file linewise. By default, one line is read at a time. '
            'In line mode, the --size argument can be used to read the given '
            'number of lines in each chunk.'
        )) = False
    ):
        super().__init__(size=size, list=list, linewise=linewise, filenames=filenames)

    def _read_chunks(self, fd):
        while True:
            buffer = fd.read(self.args.size)
            if not buffer:
                break
            yield buffer

    def _read_lines(self, fd):
        count = self.args.size or 1
        if count == 1:
            while True:
                buffer = fd.readline()
                if not buffer:
                    break
                yield buffer
            return
        with MemoryFile() as out:
            while True:
                for _ in range(count):
                    buffer = fd.readline()
                    if not buffer:
                        break
                    out.write(buffer)
                if not out.tell():
                    break
                yield out.getvalue()
                out.seek(0)
                out.truncate()

    def process(self, data):
        meta = metavars(data, ghost=True)
        for mask in self.args.filenames:
            mask = meta.format_str(mask, self.codec, data)
            self.log_debug('scanning for mask:', mask)
            for filename in iglob(mask, recursive=True):
                if not isfile(filename):
                    continue
                if self.args.list:
                    try:
                        yield self.labelled(
                            filename.encode(self.codec),
                            size=getsize(filename),
                            atime=datetime.fromtimestamp(getatime(filename)).isoformat(' ', 'seconds'),
                            ctime=datetime.fromtimestamp(getctime(filename)).isoformat(' ', 'seconds'),
                            mtime=datetime.fromtimestamp(getmtime(filename)).isoformat(' ', 'seconds'),
                        )
                    except OSError:
                        self.log_warn(F'os error while scanning: {filename}')
                    continue
                try:
                    with open(filename, 'rb') as stream:
                        if self.args.linewise:
                            yield from self._read_lines(stream)
                        elif self.args.size:
                            yield from self._read_chunks(stream)
                        else:
                            data = stream.read()
                            self.log_info(lambda: F'reading: {filename} ({len(data)} bytes)')
                            yield self.labelled(data, path='/'.join(filename.split('\\')))
                except PermissionError:
                    self.log_warn('permission denied:', filename)
                except FileNotFoundError:
                    self.log_warn('file is missing:', filename)
                except Exception:
                    self.log_warn('unknown error while reading:', filename)

Ancestors

Inherited members

class emit (*data)

This unit is implemented in refinery.units.meta.emit and has the following commandline Interface:

usage: emit [-h] [-L] [-Q] [-0] [-v] [data [data ...]]

positional arguments:
  data           Data to be emitted. If no argument is specified, data is
                 retrieved from the clipboard. Multiple arguments are
                 output in framed format.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class emit(Unit):

    def __init__(self, *data: arg(help=(
        'Data to be emitted. If no argument is specified, data '
        'is retrieved from the clipboard. Multiple arguments are '
        'output in framed format.'
    ))):
        super().__init__(data=data)

    def process(self, data):
        if not self.args.data:
            import pyperclip
            data = pyperclip.paste()
            yield data and data.encode(self.codec, 'replace') or B''
        else:
            yield from self.args.data

    @classmethod
    def run(cls, argv=None, stream=None):
        super(emit, cls).run(
            argv=argv,
            stream=stream or open(__import__('os').devnull, 'rb')
        )

Ancestors

Inherited members

class group (size)

This unit is implemented in refinery.units.meta.group and has the following commandline Interface:

usage: group [-h] [-L] [-Q] [-0] [-v] N

Group incoming chunks into frames of the given size.

positional arguments:
  N              Size of each group; must be at least 2.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class group(Unit):
    """
    Group incoming chunks into frames of the given size.
    """
    def __init__(self, size: arg.number(help='Size of each group; must be at least 2.', bound=(2, None))):
        super().__init__(size=size)

    def process(self, data):
        members = data.temp or ()
        if len(members) >= self.args.size:
            raise RuntimeError(F'received {len(members) + 1} items in group')
        yield data
        yield from members

    def filter(self, chunks):
        members = []
        header = None
        for chunk in chunks:
            if not chunk.visible:
                yield chunk
                continue
            if len(members) > self.args.size - 2:
                yield header
                header = None
            if header is None:
                chunk.temp = members
                header = chunk
                members.clear()
            else:
                members.append(chunk)
        if header is not None:
            yield header

Ancestors

Inherited members

class iff (*expression, negate=False)

This unit is implemented in refinery.units.meta.iff and has the following commandline Interface:

usage: iff [-h] [-L] [-Q] [-0] [-v] [-n] [token [token ...]]

Filter incoming chunks depending on whether a given Python expression
evaluates to true.

positional arguments:
  token          All "token" arguments to this unit are joined with spaces
                 to produce the expression to be evaluated. This is done
                 so that unnecessary shell quoting is avoided.

optional arguments:
  -n, --negate   invert the logic of this filter; drop all matching chunks
                 instead of keeping them

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class iff(ConditionalUnit):
    """
    Filter incoming chunks depending on whether a given Python expression evaluates
    to true.
    """
    def __init__(
        self,
        *expression: arg(metavar='token', type=str, help=(
            'All "token" arguments to this unit are joined with spaces to produce the expression to be '
            'evaluated. This is done so that unnecessary shell quoting is avoided.')),
        negate=False
    ):
        super().__init__(negate=negate, expression=' '.join(expression))

    def match(self, chunk):
        return bool(PythonExpression.evaluate(self.args.expression, metavars(chunk)))

Ancestors

Inherited members

class iffp (*patterns, negate=False)

This unit is implemented in refinery.units.meta.iffp and has the following commandline Interface:

usage: iffp [-h] [-L] [-Q] [-0] [-v] [-n] [pattern [pattern ...]]

positional arguments:
  pattern

optional arguments:
  -n, --negate   invert the logic of this filter; drop all matching chunks
                 instead of keeping them

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class iffp(ConditionalUnit):
    F"""
    Filter incoming chunks depending on whether it matches any of a given set of patterns. The available
    patterns are the following: {", ".join(_PATTERNS)}.
    """

    def __init__(self, *patterns: arg.choice(metavar='pattern', choices=_PATTERNS), negate=False):
        super().__init__(negate=negate, patterns=patterns)

    def match(self, chunk):
        for name in self.args.patterns:
            p: pattern = _PATTERNS[name]
            if p.compiled.fullmatch(chunk):
                return True
        return False

Ancestors

Inherited members

class iffs (needle, negate=False)

This unit is implemented in refinery.units.meta.iffs and has the following commandline Interface:

usage: iffs [-h] [-L] [-Q] [-0] [-v] [-n] needle

Filter incoming chunks depending on whether they contain a given binary
substring.

positional arguments:
  needle         the string to search for

optional arguments:
  -n, --negate   invert the logic of this filter; drop all matching chunks
                 instead of keeping them

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class iffs(ConditionalUnit):
    """
    Filter incoming chunks depending on whether they contain a given binary substring.
    """
    def __init__(self, needle: arg(help='the string to search for'), negate=False):
        super().__init__(negate=negate, needle=needle)

    def match(self, chunk):
        return self.args.needle in chunk

Ancestors

Inherited members

class iffx (regex, multiline=False, ignorecase=False, negate=False, match=False)

This unit is implemented in refinery.units.meta.iffx and has the following commandline Interface:

usage: iffx [-h] [-L] [-Q] [-0] [-v] [-M] [-I] [-n] [-m] regex

Filter incoming chunks by discarding those that do not match the given
regular expression.

positional arguments:
  regex             Regular expression to match.

optional arguments:
  -M, --multiline   Caret and dollar match the beginning and end of a
                    line, a dot does not match line breaks.
  -I, --ignorecase  Ignore capitalization for alphabetic characters.
  -n, --negate      invert the logic of this filter; drop all matching
                    chunks instead of keeping them
  -m, --match       Perform a full match rather than matching anywhere in
                    the chunk.

generic options:
  -h, --help        Show this help message and exit.
  -L, --lenient     Allow partial results as output.
  -Q, --quiet       Disables all log output.
  -0, --devnull     Do not produce any output.
  -v, --verbose     Specify up to two times to increase log level.
Expand source code Browse git
class iffx(RegexUnit, ConditionalUnit):
    """
    Filter incoming chunks by discarding those that do not match the given
    regular expression.
    """
    def __init__(
        self, regex, multiline=False, ignorecase=False, negate=False,
        match: arg.switch('-m',
            help='Perform a full match rather than matching anywhere in the chunk.') = False
    ):
        super().__init__(regex=regex, negate=negate, multiline=multiline, ignorecase=ignorecase, match=match)

    def match(self, chunk):
        return bool(self._matcher(chunk))

    def filter(self, chunks):
        self._matcher = self.regex.fullmatch if self.args.match else self.regex.search
        yield from super().filter(chunks)

Ancestors

Inherited members

class pad (padding=b'\x00', absolute=0, blocksize=0, left=False)

This unit is implemented in refinery.units.meta.pad and has the following commandline Interface:

usage: pad [-h] [-L] [-Q] [-0] [-v] [-a N | -b N] [-l] [padding]

Allows padding of the input data. By default, multiple inputs are padded
to all have length equal to the size of the longest input. Other optional
size specifications override this behaviour.

positional arguments:
  padding            This custom binary sequence is used (repeatedly, if
                     necessary) to pad the input. The default is a zero
                     byte.

optional arguments:
  -a, --absolute N   Pad inputs to be at least N bytes in size.
  -b, --blocksize N  Pad inputs to any even multiple of N.
  -l, --left         Pad on the left instead of the right.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class pad(Unit):
    """
    Allows padding of the input data. By default, multiple inputs are padded
    to all have length equal to the size of the longest input. Other optional
    size specifications override this behaviour.
    """

    def __init__(
        self, padding: arg('padding', help=(
            'This custom binary sequence is used (repeatedly, if necessary) '
            'to pad the input. The default is a zero byte.')) = B'\0',
        absolute : arg.number('-a', group='HOW', help='Pad inputs to be at least N bytes in size.') = 0,
        blocksize: arg.number('-b', group='HOW', help='Pad inputs to any even multiple of N.') = 0,
        left: arg.switch('-l', help='Pad on the left instead of the right.') = False
    ):
        if absolute and blocksize:
            raise ValueError('Cannot pad simultaneously to a given block size and absolutely.')
        self.superinit(super(), **vars())
        self._maxlen = None

    @property
    def relative(self):
        if self.args.blocksize:
            return False
        if self.args.absolute:
            return False
        return True

    def _pad(self, data, size):
        missing = (size - len(data))
        if missing <= 0:
            return data
        pad = self.args.padding
        if missing > len(pad):
            pad *= (missing // len(pad))
        if self.args.left:
            return pad[:missing] + data
        else:
            data += pad[:missing]
            return data

    def filter(self, chunks):
        if self.relative:
            self.log_info('padding up to longest input')
            if not isinstance(chunks, list):
                chunks = list(chunks)
            self._maxlen = max(len(d) for d in chunks)
        else:
            self._maxlen = None
        yield from chunks

    def process(self, data):
        if self._maxlen is not None:
            return self._pad(data, self._maxlen)
        if self.args.blocksize:
            q, r = divmod(len(data), self.args.blocksize)
            size = (q + bool(r)) * self.args.blocksize
        else:
            size = self.args.absolute
        return self._pad(data, size)

Ancestors

Instance variables

var relative
Expand source code Browse git
@property
def relative(self):
    if self.args.blocksize:
        return False
    if self.args.absolute:
        return False
    return True

Inherited members

class pick (*slice)

This unit is implemented in refinery.units.meta.pick and has the following commandline Interface:

usage: pick [-h] [-L] [-Q] [-0] [-v] [slice [slice ...]]

Picks sequences from the array of multiple inputs. For example, pick 0 2:
will return all but the second ingested input (which has index 1).

positional arguments:
  slice          Specify start:stop:step in Python slice syntax.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class pick(Unit):
    """
    Picks sequences from the array of multiple inputs. For example, `pick 0 2:`
    will return all but the second ingested input (which has index `1`).
    """
    def __init__(self, *slice: arg(
        type=sliceobj, nargs='*', default=[slice(None, None)],
        help='Specify start:stop:step in Python slice syntax.'
    )):
        super().__init__(slice=slice)

    def filter(self, chunks):
        slices = deque(self.args.slice)
        discards = 0
        consumed = False

        def discardable(s):
            return s.stop and s.stop >= 0 and (s.step or 1) > 0 \
                and all(t.start >= s.stop for t in slices)

        while slices:
            s = slices.popleft()

            if not consumed:
                if not discardable(s):
                    self.log_debug(F'consumed input into buffer after {discards} skips')
                    chunks = [None] * discards + list(chunks)
                    consumed = True

            if consumed:
                yield from chunks[s]
            else:
                yield from islice(chain(repeat(None, discards), chunks), s.start, s.stop, s.step)
                discards = s.stop

        return

Ancestors

Inherited members

class pop (*names)

This unit is implemented in refinery.units.meta.pop and has the following commandline Interface:

usage: pop [-h] [-L] [-Q] [-0] [-v] [[name|count|@] [[name|count|@] ...]]

In processing order, remove visible chunks from the current frame and
store their contents in the given meta variables. All chunks in the input
stream are consequently made visible again.

positional arguments:
  [name|count|@]  Specify either the name of a single variable to receive
                  the contents of an input chunk, or an integer expression
                  that specifies a number of values to be removed from the
                  input without storing them. Additionally, it is possible
                  to specify the symbol "@" to remove a single chunk from
                  the input and merge its meta data into the following
                  ones. By default, a single merge is performed.

generic options:
  -h, --help      Show this help message and exit.
  -L, --lenient   Allow partial results as output.
  -Q, --quiet     Disables all log output.
  -0, --devnull   Do not produce any output.
  -v, --verbose   Specify up to two times to increase log level.
Expand source code Browse git
class pop(Unit):
    """
    In processing order, remove visible chunks from the current frame and store their contents in the given
    meta variables. All chunks in the input stream are consequently made visible again.
    """
    def __init__(
        self,
        *names: arg(type=str, metavar=F'[name|count|{_popcount._MERGE_SYMBOL}]', help=(
            R'Specify either the name of a single variable to receive the contents of an input chunk, or '
            R'an integer expression that specifies a number of values to be removed from the input without '
            F'storing them. Additionally, it is possible to specify the symbol "{_popcount._MERGE_SYMBOL}" '
            R'to remove a single chunk from the input and merge its meta data into the following ones. '
            F'By default, a single merge is performed.'
        ))
    ):
        if not names:
            names = _popcount._MERGE_SYMBOL,
        super().__init__(names=[_popcount(n) for n in names])

    def process(self, data):
        return data

    def filter(self, chunks):
        invisible = []
        variables = {}
        remaining = iter(self.args.names)

        it = iter(chunks)
        pop: _popcount = next(remaining).reset()

        for chunk in it:
            if not chunk.visible:
                self.log_debug('buffering invisible chunk')
                invisible.append(chunk)
                continue
            try:
                while not pop.into(variables, chunk):
                    pop = next(remaining).reset()
            except StopIteration:
                invisible.append(chunk)
                break
        try:
            next(remaining)
        except StopIteration:
            pass
        else:
            raise ValueError('Not all variables could be assigned.')

        for chunk in itertools.chain(invisible, it):
            chunk.meta.update(variables)
            chunk.visible = True
            yield chunk

Ancestors

Inherited members

class push (data=b'')

This unit is implemented in refinery.units.meta.push and has the following commandline Interface:

usage: push [-h] [-L] [-Q] [-0] [-v] [data]

The unit inserts an additional chunk before each input chunk and moves the
original data out of scope. This chunk is considered the "original" data,
while the one inserted in front of it is used as an intermediate result.
By default, this intermediate data is a copy of the input data. For
example:

    emit key=value | push [[| rex =(.*)$ $1 | pop v ]| repl var:v censored ]

will output key=censored. The application of rex turns the (duplicated)
data into just the value, which is then stored in the variable v. The
application of repl replaces this value with the hard-coded string
censored.

positional arguments:
  data           The data to be pushed, by default a copy of the input.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class push(Unit):
    """
    The unit inserts an additional chunk before each input chunk and moves the original
    data out of scope. This chunk is considered the "original" data, while the one inserted
    in front of it is used as an intermediate result. By default, this intermediate data is
    a copy of the input data. For example:

        emit key=value | push [[| rex =(.*)$ $1 | pop v ]| repl var:v censored ]

    will output `key=censored`. The application of `refinery.rex` turns the (duplicated)
    data into just the value, which is then stored in the variable `v`. The application
    of `refinery.repl` replaces this value with the hard-coded string `censored`.
    """
    def __init__(self, data: arg(help='The data to be pushed, by default a copy of the input.') = B''):
        super().__init__(data=data)

    def process(self, data):
        yield self.args.data or data
        if self.args.nesting > 0:
            data.set_next_scope(False)
        else:
            try:
                data.visible = False
            except AttributeError:
                self.log_warn('application has no effect outside frame.')
        yield data

Ancestors

Inherited members

class put (name, value)

This unit is implemented in refinery.units.meta.put and has the following commandline Interface:

usage: put [-h] [-L] [-Q] [-0] [-v] name value

Can be used to add a meta variable to the processed chunk. Note that meta
variables cease to exist outside a frame.

positional arguments:
  name           The name of the variable to be used.
  value          The value for the variable.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class put(Unit):
    """
    Can be used to add a meta variable to the processed chunk. Note that meta variables
    cease to exist outside a frame.
    """
    def __init__(
        self,
        name : arg(help='The name of the variable to be used.', type=str),
        value: arg(help='The value for the variable.', type=functools.partial(numseq, typecheck=False))
    ):
        super().__init__(name=check_variable_name(name), value=value)

    def process(self, data):
        value = self.args.value
        if not isinstance(value, (int, float)) and not isbuffer(value):
            try:
                len(value)
            except TypeError:
                if isinstance(value, itertools.repeat):
                    value = next(value)
                if not isinstance(value, (int, float)):
                    raise NotImplementedError(F'put does not support {value.__class__.__name__} values.')
            else:
                if not isinstance(value, list):
                    value = list(value)
        self.log_debug(F'storing {type(value).__name__}:', value)
        return self.labelled(data, **{self.args.name: value})

Ancestors

Inherited members

class scope (*slice, visible=True)

This unit is implemented in refinery.units.meta.scope and has the following commandline Interface:

usage: scope [-h] [-L] [-Q] [-0] [-v] [-n] [slice [slice ...]]

After using scope within in a frame, all the following operations will be
applied only to the selected indices. All remaining chunks still exist,
they are just not operated on. When the frame closes or the frame is being
rescoped by a second application of this unit, they become visible again.

positional arguments:
  slice          Specify start:stop:step in Python slice syntax.

optional arguments:
  -n, --not      Hide the given chunks instead of making them the only
                 ones visible.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class scope(FrameSlicer):
    """
    After using `refinery.scope` within in a `refinery.lib.frame`, all the
    following operations will be applied only to the selected indices. All
    remaining chunks still exist, they are just not operated on. When the
    frame closes or the frame is being rescoped by a second application of
    this unit, they become visible again.
    """
    def __init__(self, *slice, visible: arg.switch('-n', '--not', off=True, help=(
        'Hide the given chunks instead of making them the only ones visible.')) = True
    ):
        super().__init__(*slice, visible=visible)
        # Sort any slices with negative arguments to the back so we check
        # them last. This delays potential consumption of the chunks iterator
        # as much as possible.
        self.args.slice.sort(
            key=lambda s: (s.start or 0, s.stop or 0), reverse=True)

    def filter(self, chunks):
        it = iter(chunks)
        consumed = None
        size = None

        def buffered():
            yield from it
            while consumed:
                yield consumed.popleft()

        def shift(offset, default):
            nonlocal consumed, it, size
            if offset is None:
                return default
            if offset >= 0:
                return offset
            if consumed is None:
                from collections import deque
                self.log_info(F'consuming iterator to compute negative offset {offset}.')
                consumed = deque(it)
                size = len(consumed) + k + 1
            return max(0, offset + size)

        for k, chunk in enumerate(buffered()):
            for s in self.args.slice:
                if k in range(shift(s.start, 0), shift(s.stop, k + 1), s.step or 1):
                    chunk.visible = self.args.visible
                    break
            else:
                chunk.visible = not self.args.visible
            self.log_debug(chunk)
            yield chunk

Ancestors

Inherited members

class sep (separator=b'\n', scoped=False)

This unit is implemented in refinery.units.meta.sep and has the following commandline Interface:

usage: sep [-h] [-L] [-Q] [-0] [-v] [-s] [separator]

Multiple inputs are joined along a specified separator. If any of the
input Chunks is currently out of scope, sep turns makes them visible by
default. This can be prevented by using the -s flag.

positional arguments:
  separator      Separator; the default is a line break.

optional arguments:
  -s, --scoped   Maintain chunk scope; i.e. do not turn all input chunks
                 visible.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sep(Unit):
    """
    Multiple inputs are joined along a specified separator. If any of the input
    `refinery.lib.frame.Chunk`s is currently out of scope, `refinery.sep` turns
    makes them visible by default. This can be prevented by using the `-s` flag.
    """

    def __init__(
        self, separator: arg(help='Separator; the default is a line break.') = B'\n',
        scoped: arg.switch('-s', help=(
            'Maintain chunk scope; i.e. do not turn all input chunks visible.')) = False
    ):
        super().__init__(separator=separator, scoped=scoped)
        self.separate = False

    def filter(self, chunks):
        it = iter(chunks)
        try:
            chunk = next(it)
        except StopIteration:
            return
        self.separate = True
        for upcoming in it:
            if not self.args.scoped:
                chunk.visible = True
            yield chunk
            chunk = upcoming
        self.separate = False
        yield chunk

    def process(self, data):
        yield data
        if self.separate:
            yield self.args.separator

Ancestors

Inherited members

class sorted (key=None, descending=False)

This unit is implemented in refinery.units.meta.sorted and has the following commandline Interface:

usage: sorted [-h] [-L] [-Q] [-0] [-v] [-d] [key]

Sorts all elements of the input frame lexicographically. This unit is a
nop on single inputs.

positional arguments:
  key               A meta variable expression to sort by instead of
                    sorting the content.

optional arguments:
  -d, --descending  Sort in descending order, the default is ascending.

generic options:
  -h, --help        Show this help message and exit.
  -L, --lenient     Allow partial results as output.
  -Q, --quiet       Disables all log output.
  -0, --devnull     Do not produce any output.
  -v, --verbose     Specify up to two times to increase log level.
Expand source code Browse git
class sorted(Unit):
    """
    Sorts all elements of the input `refinery.lib.frame` lexicographically.
    This unit is a `refinery.nop` on single inputs.
    """

    def __init__(
        self,
        key: arg('key', type=str, help='A meta variable expression to sort by instead of sorting the content.') = None,
        descending: arg.switch('-d', help='Sort in descending order, the default is ascending.') = False
    ):
        super().__init__(key=key, descending=descending)

    def filter(self, chunks):
        sortbuffer = []
        invisibles = {}
        key = self.args.key

        if key is not None:
            def _key(chunk):
                return expression(metavars(chunk)), chunk
            expression = PythonExpression(key, all_variables_allowed=True)
            key = _key

        for k, chunk in enumerate(chunks):
            if not chunk.visible:
                r = k - len(invisibles)
                invisibles.setdefault(r, [])
                invisibles[r].append(chunk)
            else:
                sortbuffer.append(chunk)

        sortbuffer.sort(key=key, reverse=self.args.descending)

        if not invisibles:
            yield from sortbuffer
            return

        for r, chunk in enumerate(sortbuffer):
            if r in invisibles:
                yield from invisibles[r]
                del invisibles[r]
            yield chunk

        if invisibles:
            yield from invisibles[r]
            del invisibles[r]

        if invisibles:
            raise RefineryCriticalException(
                'for unknown reasons, invisible chunks were lost during '
                'the sorting process.'
            )

Ancestors

Inherited members

class swap (name)

This unit is implemented in refinery.units.meta.swap and has the following commandline Interface:

usage: swap [-h] [-L] [-Q] [-0] [-v] name

Swap the contents of an existing variable with the contents of the chunk.
The variable has to contain a binary string.

positional arguments:
  name           The meta variable name.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class swap(Unit):
    """
    Swap the contents of an existing variable with the contents of the chunk. The variable
    has to contain a binary string.
    """
    def __init__(self, name: arg(type=str, metavar='name', help='The meta variable name.')):
        super().__init__(name=check_variable_name(name))

    def process(self, data):
        name = self.args.name
        meta = metavars(data)
        try:
            meta = meta[name]
        except KeyError:
            meta = bytearray()
        if isinstance(meta, str):
            meta = meta.encode(self.codec)
        elif not isbuffer(meta):
            raise ValueError(F'Unable to swap data with variable {name} because it has type {type(meta).__name__}.')
        return self.labelled(meta, **{name: data})

Ancestors

Inherited members

class xfcc (variable='count', relative=False)

This unit is implemented in refinery.units.meta.xfcc and has the following commandline Interface:

usage: xfcc [-h] [-L] [-Q] [-0] [-v] [-r] [variable]

The cross frame chunk count unit! It computes the number of times a chunk
occurs across several frames of input. It consumes all frames in the
current and counts the number of times each item occurs. It converts a
frame tree of depth 2 into a new frame tree of depth 2 where the parent of
every leaf has this leaf as its only child. The leaves of this tree have
been enriched with a meta variable containing the number of times the
corresponding chunk has occurred in the input frame tree.

positional arguments:
  variable        The variable which is used as the accumulator

optional arguments:
  -r, --relative  Normalize the accumulator to a number between 0 and 1.

generic options:
  -h, --help      Show this help message and exit.
  -L, --lenient   Allow partial results as output.
  -Q, --quiet     Disables all log output.
  -0, --devnull   Do not produce any output.
  -v, --verbose   Specify up to two times to increase log level.
Expand source code Browse git
class xfcc(Unit):
    """
    The cross frame chunk count unit! It computes the number of times a chunk occurs across several frames
    of input. It consumes all frames in the current and counts the number of times each item occurs. It
    converts a frame tree of depth 2 into a new frame tree of depth 2 where the parent of every leaf has
    this leaf as its only child. The leaves of this tree have been enriched with a meta variable containing
    the number of times the corresponding chunk has occurred in the input frame tree.
    """
    def __init__(
        self,
        variable: arg(help='The variable which is used as the accumulator') = 'count',
        relative: arg.switch('-r', help='Normalize the accumulator to a number between 0 and 1.') = False
    ):
        super().__init__(variable=variable, relative=relative)
        self._trunk = None
        self._store = collections.defaultdict(int)

    def finish(self):
        if self.args.relative and self._store:
            maximum = max(self._store.values())
        for k, (chunk, count) in enumerate(self._store.items()):
            if self.args.relative:
                count /= maximum
            chunk._meta[self.args.variable] = count
            chunk._path = chunk.path[:-2] + (0, k)
            yield chunk
        self._store.clear()

    def _getcount(self, chunk):
        try:
            count = int(chunk.meta[self.args.variable])
        except (AttributeError, KeyError, TypeError):
            return 1
        else:
            return count

    def filter(self, chunks):
        it = iter(chunks)
        try:
            head = next(it)
        except StopIteration:
            return
        if len(head.path) < 2:
            self.log_warn(F'the current frame is nested {len(head.path)} layers deep, at least two layers are required.')
            yield head
            for item in it:
                self.log_debug(repr(item))
                yield item
            return
        trunk = head.path[:-2]
        store = self._store
        if trunk != self._trunk:
            yield from self.finish()
            self._trunk = trunk
        store[head] += self._getcount(head)
        for chunk in it:
            store[chunk] += self._getcount(chunk)

Ancestors

Inherited members

class couple (*commandline, buffer=False, noerror=False, cmdline=False, timeout=0.0)

This unit is implemented in refinery.units.misc.couple and has the following commandline Interface:

usage: couple [-h] [-L] [-Q] [-0] [-v] [-b] [-e] [-c] [-t T] ...

Turns any command into a refinery unit. Data is processed by feeding it to
the standard input of a process spawned from the given command line, and
then reading the standard output of that process as the result of the
operation. The main purpose of this unit is to allow using the syntax from
frame with other command line tools. By default, couple streams the output
from the executed command as individual outputs, but the buffer option can
be set to buffer all output of a single execution.

positional arguments:
  (all remaining)  All remaining command line tokens form an arbitrary
                   command line to be executed. Use format string syntax
                   to insert meta variables and incoming data chunks.

optional arguments:
  -b, --buffer     Buffer the command output for one execution rather than
                   streaming it.
  -e, --noerror    do not merge stdin and stderr; stderr will only be
                   output if -v is also specified.
  -c, --cmdline    pass incoming data as a commandline argument to the
                   process, not via stdin.
  -t, --timeout T  Set an execution timeout as a floating point number in
                   seconds, there is none by default.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class couple(Unit):
    """
    Turns any command into a refinery unit. Data is processed by feeding it to the standard input of a process spawned from
    the given command line, and then reading the standard output of that process as the result of the operation. The main
    purpose of this unit is to allow using the syntax from `refinery.lib.frame` with other command line tools. By default,
    `refinery.couple` streams the output from the executed command as individual outputs, but the `buffer` option can be set
    to buffer all output of a single execution.
    """

    _JOIN_TIME = 0.1

    def __init__(
        self, *commandline : arg(nargs='...', metavar='(all remaining)', help=(
            'All remaining command line tokens form an arbitrary command line to be executed. Use format string syntax '
            'to insert meta variables and incoming data chunks.')),
        buffer: arg.switch('-b', help='Buffer the command output for one execution rather than streaming it.') = False,
        noerror: arg('-e', help='do not merge stdin and stderr; stderr will only be output if -v is also specified.') = False,
        cmdline: arg('-c', help='pass incoming data as a commandline argument to the process, not via stdin.') = False,
        timeout: arg('-t', metavar='T',
            help='Set an execution timeout as a floating point number in seconds, there is none by default.') = 0.0
    ):
        if not commandline:
            raise ValueError('you need to provide a command line.')
        super().__init__(commandline=commandline, cmdline=cmdline, noerror=noerror, buffer=buffer, timeout=timeout)

    def process(self, data):
        def shlexjoin():
            import shlex
            return ' '.join(shlex.quote(cmd) for cmd in commandline)

        meta = metavars(data, ghost=True)
        commandline = [
            meta.format_str(cmd.decode(self.codec), self.codec, data)
            for cmd in self.args.commandline
        ]

        if self.args.cmdline:
            commandline.append(data.decode(self.codec))
            data = None

        self.log_debug(shlexjoin)

        posix = 'posix' in sys.builtin_module_names
        process = Popen(commandline,
            stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=False, close_fds=posix)

        if self.args.buffer and not self.args.timeout:
            out, err = process.communicate(data)
            for line in err.splitlines():
                self.log_debug(line)
            yield out
            return

        import io
        from threading import Thread, Event
        from queue import Queue, Empty
        from time import process_time, sleep

        start = 0
        result = None

        qerr = Queue()
        qout = Queue()
        done = Event()

        def adapter(stream, queue: Queue, event: Event):
            while not event.is_set():
                out = stream.read1()
                if out: queue.put(out)
                else: break
            stream.close()

        recvout = Thread(target=adapter, args=(process.stdout, qout, done), daemon=True)
        recverr = Thread(target=adapter, args=(process.stderr, qerr, done), daemon=True)

        recvout.start()
        recverr.start()

        if data:
            process.stdin.write(data)
        process.stdin.close()
        start = process_time()

        if self.args.buffer or self.args.timeout:
            result = io.BytesIO()

        def queue_read(q: Queue):
            try: return q.get_nowait()
            except Empty: return None

        errbuf = io.BytesIO()

        while True:
            out = queue_read(qout)
            err = None

            if self.args.noerror:
                err = queue_read(qerr)
            else:
                out = out or queue_read(qerr)

            if err and self.log_info():
                errbuf.write(err)
                errbuf.seek(0)
                lines = errbuf.readlines()
                errbuf.seek(0)
                errbuf.truncate()
                if lines:
                    if not (done.is_set() or lines[~0].endswith(B'\n')):
                        errbuf.write(lines.pop())
                    for line in lines:
                        msg = line.rstrip(B'\n')
                        if msg: self.log_info(msg)
            if out:
                if self.args.buffer or self.args.timeout:
                    result.write(out)
                if not self.args.buffer:
                    yield out

            if done.is_set():
                if recverr.is_alive():
                    self.log_warn('stderr receiver thread zombied')
                if recvout.is_alive():
                    self.log_warn('stdout receiver thread zombied')
                break
            elif not err and not out and process.poll() is not None:
                recverr.join(self._JOIN_TIME)
                recvout.join(self._JOIN_TIME)
                done.set()
            elif self.args.timeout:
                if process_time() - start > self.args.timeout:
                    self.log_info('terminating process after timeout expired')
                    done.set()
                    process.terminate()
                    for wait in range(4):
                        if process.poll() is not None:
                            break
                        sleep(self._JOIN_TIME)
                    else:
                        self.log_warn('process termination may have failed')
                    recverr.join(self._JOIN_TIME)
                    recvout.join(self._JOIN_TIME)
                    if not len(result.getbuffer()):
                        result = RuntimeError('timeout reached, process had no output')
                    else:
                        result = RefineryPartialResult(
                            'timeout reached, returning all collected output',
                            partial=result.getvalue())

        if isinstance(result, Exception):
            raise result
        elif self.args.buffer:
            yield result.getvalue()

Ancestors

Inherited members

class datefix (format='%Y-%m-%d %H:%M:%S', dos=False)

This unit is implemented in refinery.units.misc.datefix and has the following commandline Interface:

usage: datefix [-h] [-L] [-Q] [-0] [-v] [-d] [format]

Parses all kinds of date _formats and unifies them into the same format.

positional arguments:
  format         Specify the output format as a strftime-like string,
                 using ISO by default.

optional arguments:
  -d, --dos      Parse timestamps in DOS rather than Unix format.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class datefix(Unit):
    """
    Parses all kinds of date _formats and unifies them into the same format.
    """

    _FORMATS = [
        '%B %dth %Y %H:%M:%S (UTC)',  # November 27th 2019 17:37:02 (UTC)
        '%Y-%m-%dT%H:%M:%S',          # 2010-03-15T06:27:50
        '%Y-%m-%d %H:%M:%S',          # iso (2010-03-15 06:27:50.000000)
        '%Y-%m-%d %H:%M:%SZ%f',
        '%Y-%m-%dT%H:%M:%S.%f',
        '%Y-%m-%dT%H:%M:%SZ%f',
        '%a %b %d %Y %H:%M:%S',       # Thu Apr 24 2014 12:32:21
    ]

    _TIMEZONE_REGEXES = [re_compile(p) for p in [
        R'([+-])(\d{2})(\d{2})$',           # Thu Apr 24 2014 12:32:21 GMT-0700
        R'([+-])(\d{2}):(\d{2})$',          # 2017:09:11 23:47:22+02:00
        R'GMT([+-])(\d{2})(\d{2}) \(.+\)$'  # Thu Apr 24 2014 12:32:21 GMT-0700 (PDT)
    ]]

    def __init__(
        self,
        format: arg(help='Specify the output format as a strftime-like string, using ISO by default.') = '%Y-%m-%d %H:%M:%S',
        dos: arg('-d', help='Parse timestamps in DOS rather than Unix format.') = False
    ):
        super().__init__(format=format, dos=dos)

    @staticmethod
    def dostime(stamp: int) -> datetime:
        """
        Parses a given DOS timestamp into a datetime object.
        """
        d, t = stamp >> 16, stamp & 0xFFFF
        s = (t & 0x1F) << 1

        return datetime(
            year   = ((d & 0xFE00) >> 0x9) + 1980,  # noqa
            month  = ((d & 0x01E0) >> 0x5),         # noqa
            day    = ((d & 0x001F) >> 0x0),         # noqa
            hour   = ((t & 0xF800) >> 0xB),         # noqa
            minute = ((t & 0x07E0) >> 0x5),         # noqa
            second = 59 if s == 60 else s,          # noqa
        )

    def _format(self, dt: datetime) -> str:
        return dt.strftime(self.args.format)

    def _extract_timezone(self, data):
        for r in self._TIMEZONE_REGEXES:
            m = r.search(data)
            if not m:
                continue
            pm = m[1]
            td = timedelta(
                hours=int(m[2]), minutes=int(m[3]))
            if pm == '-':
                td = -td
            return data[:-len(m[0])].strip(), td

        return data, None

    @linewise
    def process(self, data: str) -> str:
        data = data.strip()

        # replace colons (i.e. for exiftool dates: 2017:01:01)
        if len(data) > 10 and data[4] == ':' and data[7] == ':':
            data = F'{data[0:4]}-{data[5:7]}-{data[8:]}'

        # strips Z at end (i.e. 20171022055144Z)
        if data.endswith('Z'):
            data = data[:-1]

        # parses timestamps and dates without much format
        if data.isdigit():
            time_stamp = int(data)
            if len(data) > 14:
                raise Exception('cannot parse all-numeric string as date: %s' % data)
            elif len(data) == 14:
                # i.e. 20111020193727
                return self._format(datetime.strptime(data, '%Y%m%d%H%M%S'))
            elif len(data) == 13:
                # i.e. 1458016535000
                time_stamp //= 1000
                data = data[:-3]
            if self.args.dos:
                return self._format(self.dostime(time_stamp))
            else:
                return self._format(datetime.utcfromtimestamp(time_stamp))

        data, time_delta = self._extract_timezone(data)

        for f in self._FORMATS:
            try:
                dt = datetime.strptime(data, f)
            except ValueError:
                continue
            return self._format(dt if time_delta is None else dt - time_delta)

        return data

Ancestors

Static methods

def dostime(stamp)

Parses a given DOS timestamp into a datetime object.

Expand source code Browse git
@staticmethod
def dostime(stamp: int) -> datetime:
    """
    Parses a given DOS timestamp into a datetime object.
    """
    d, t = stamp >> 16, stamp & 0xFFFF
    s = (t & 0x1F) << 1

    return datetime(
        year   = ((d & 0xFE00) >> 0x9) + 1980,  # noqa
        month  = ((d & 0x01E0) >> 0x5),         # noqa
        day    = ((d & 0x001F) >> 0x0),         # noqa
        hour   = ((t & 0xF800) >> 0xB),         # noqa
        minute = ((t & 0x07E0) >> 0x5),         # noqa
        second = 59 if s == 60 else s,          # noqa
    )

Inherited members

class drp (consecutive=False, min=1, max=∞, len=None, all=False, threshold=20, weight=0, buffer=1024, chug=False)

This unit is implemented in refinery.units.misc.drp and has the following commandline Interface:

usage: drp [-h] [-L] [-Q] [-0] [-v] [-c] [-n N] [-N N] [-l N] [-a] [-t N]
           [-w N] [-b N | -g]

Detect Repeating Patterns - detects the most prevalent repeating byte
pattern in a chunk of data. The unit computes a suffix tree which may
require a lot of memory for large buffers.

optional arguments:
  -c, --consecutive  Assume that the repeating pattern is consecutive when
                     observable.
  -n, --min N        Minimum size of the pattern to search for. Default is
                     1.
  -N, --max N        Maximum size of the pattern to search for. Default is
                     ∞.
  -l, --len N        Set the exact size of the pattern. This is equivalent
                     to --min=N --max=N.
  -a, --all          Produce one output for each repeating pattern that
                     was detected.
  -t, --threshold N  Patterns must match this performance threshold in
                     percent, lest they be discarded.
  -w, --weight N     Specifies how much longer patterns are favored over
                     small ones. Default is 0.
  -b, --buffer N     Maximum number of bytes to inspect at once. The
                     default is 1024.
  -g, --chug         Compute the prefix tree for the entire buffer instead
                     of chunking it.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class drp(Unit):
    """
    Detect Repeating Patterns - detects the most prevalent repeating byte pattern
    in a chunk of data. The unit computes a suffix tree which may require a lot of
    memory for large buffers.
    """
    def __init__(
        self,
        consecutive: arg.switch('-c', help='Assume that the repeating pattern is consecutive when observable.') = False,
        min: arg.number('-n', help='Minimum size of the pattern to search for. Default is {default}.') = 1,
        max: arg.number('-N', help='Maximum size of the pattern to search for. Default is {default}.') = INF,
        len: arg.number('-l', help='Set the exact size of the pattern. This is equivalent to --min=N --max=N.') = None,
        all: arg.switch('-a', help='Produce one output for each repeating pattern that was detected.') = False,
        threshold: arg.number('-t', help='Patterns must match this performance threshold in percent, lest they be discarded.') = 20,
        weight: arg.number('-w', help='Specifies how much longer patterns are favored over small ones. Default is {default}.') = 0,
        buffer: arg.number('-b', group='BFR', help='Maximum number of bytes to inspect at once. The default is {default}.') = 1024,
        chug  : arg.switch('-g', group='BFR', help='Compute the prefix tree for the entire buffer instead of chunking it.') = False
    ):
        if len is not None:
            min = max = len
        super().__init__(
            min=min,
            max=max,
            all=all,
            consecutive=consecutive,
            weight=weight,
            buffer=buffer,
            chug=chug,
            threshold=threshold
        )

    def _get_patterns(self, data):
        with stackdepth(len(data)):
            tree = SuffixTree(data)
        min_size = self.args.min
        max_size = self.args.max
        patterns = set()
        cursor = 0
        while cursor < len(data):
            node = tree.root
            rest = data[cursor:]
            remaining = len(rest)
            length = 0
            offset = None
            while node.children and length < remaining:
                for child in node.children.values():
                    if tree.data[child.start] == rest[length]:
                        node = child
                        break
                if node.start >= cursor:
                    break
                offset = node.start - length
                length = node.end + 1 - offset
            if offset is None:
                cursor += 1
                continue
            length = min(remaining, length)
            if max_size >= length >= min_size:
                pattern = rest[:length].tobytes()
                patterns.add(pattern)
            cursor += length
        del tree
        return patterns

    @staticmethod
    def _consecutive_count(data, pattern):
        length = len(pattern)
        if length == 1:
            return data.count(pattern)
        view = memoryview(data)
        return max(sum(1 for i in range(k, len(view), length) if view[i:i + length] == pattern)
            for k in range(len(pattern)))

    @staticmethod
    def _truncate_pattern(pattern):
        offset = 0
        for byte in pattern[1:]:
            if byte == pattern[offset]:
                offset += 1
            else:
                offset = 0
        if offset > 0:
            pattern = pattern[:-offset]
        return pattern

    def process(self, data):
        memview = memoryview(data)
        weight = 1 + (self.args.weight / 10)

        if self.args.chug:
            patterns = self._get_patterns(memview)
        else:
            patterns = set()
            chunksize = self.args.buffer
            for k in range(0, len(memview), chunksize):
                patterns |= self._get_patterns(memview[k:k + chunksize])
        if not patterns:
            raise RuntimeError('unexpected state: no repeating sequences found')

        self.log_debug('removing duplicate pattern detections')
        duplicates = set()
        maxlen = max(len(p) for p in patterns)
        for pattern in sorted(patterns, key=len):
            for k in range(2, maxlen // len(pattern) + 1):
                repeated = pattern * k
                if repeated in patterns:
                    duplicates.add(repeated)
        patterns -= duplicates

        self.log_debug(F'counting coverage of {len(patterns)} patterns')
        pattern_count = {p: data.count(p) for p in patterns}
        pattern_performance = dict(pattern_count)

        for consecutive in (False, True):
            if consecutive:
                self.log_debug(F're-counting coverage of {len(patterns)} patterns')
                patterns = {self._truncate_pattern(p) for p in patterns}
                pattern_performance = {p: self._consecutive_count(data, p) for p in patterns}

            self.log_debug('evaluating pattern performance')
            for pattern, count in pattern_performance.items():
                pattern_performance[pattern] = count * (len(pattern) ** weight)
            best_performance = max(pattern_performance.values())
            for pattern, performance in pattern_performance.items():
                pattern_performance[pattern] = performance / best_performance

            self.log_debug('removing patterns below performance threshold')
            threshold = self.args.threshold
            patterns = {p for p in patterns if pattern_performance[p] * 100 >= threshold}

            if not self.args.consecutive:
                break

        if self.args.all:
            for pattern in sorted(patterns, key=pattern_performance.get, reverse=True):
                yield self.labelled(pattern, count=pattern_count[pattern])
            return

        best_patterns = [p for p in patterns if pattern_performance[p] == 1.0]

        if len(best_patterns) > 1:
            self.log_warn('could not determine unique best repeating pattern, returning the first of these:')
            for k, pattern in enumerate(best_patterns):
                self.log_warn(F'{k:02d}.: {pattern.hex()}')

        yield best_patterns[0]

Ancestors

Inherited members

class nop

This unit is implemented in refinery.units.misc.nop and has the following commandline Interface:

usage: nop [-h] [-L] [-Q] [-0] [-v]

Does not change the data.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class nop(Unit):
    """
    Does not change the data.
    """
    pass

Ancestors

Inherited members

class deob_js_arrays

This unit is implemented in refinery.units.obfuscation.js.arrays and has the following commandline Interface:

usage: deob-js-arrays [-h] [-L] [-Q] [-0] [-v]

JavaScript deobfuscator to turn ["Z", "t", "s", "e"][0] into "Z".

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class deob_js_arrays(Deobfuscator):
    """
    JavaScript deobfuscator to turn `["Z", "t", "s", "e"][0]` into `"Z"`.
    """

    def deobfuscate(self, data):

        def litpick(match):
            try:
                array = match[1]
                index = int(match[2])
                lpick = array.split(',')[index].strip()
                self.log_debug(lambda: F'{lpick} = {match[0]}')
            except (TypeError, IndexError):
                lpick = match[0]
            return lpick

        p = R'\s{{0,5}}'.join([
            '\\[', '((?:{i}|{s})', '(?:,', '(?:{i}|{s})', ')*)', '\\]', '\\[', '({i})', '\\]'
        ]).format(i=formats.integer, s=formats.string)
        return re.sub(p, litpick, data)

Ancestors

Inherited members

class deob_js_getattr

This unit is implemented in refinery.units.obfuscation.js.getattr and has the following commandline Interface:

usage: deob-js-getattr [-h] [-L] [-Q] [-0] [-v]

JavaScript deobfuscator to turn WScript["CreateObject"] into
WScript.CreateObject.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --qui