Module refinery


        __     __  High Octane Triage Analysis          __
        ||    _||______ __       __________     _____   ||
        ||    \||___   \__| ____/   ______/___ / ____\  ||
========||=====||  | __/  |/    \  /==|  / __ \   __\===]|
        '======||  |   \  |   |  \_  _| \  ___/|  |     ||
               ||____  /__|___|__/  / |  \____]|  |     ||
===============''====\/=========/  /==|__|=====|__|======'
                               \  /
                                \/

The main package refinery exports all Units which are also of type Entry, i.e. they expose a shell command. The command line interface for each of these units is given below, this is the same text as would be available by executing the command with the -h or --help option. To better understand how the command line parameters are parsed, it is recommended to study the module documentation of the following library modules, as their content is relevant for command line use of the refinery.

  1. refinery.lib.frame
  2. refinery.lib.argformats
  3. refinery.lib.meta

Furthermore, the module documentation of refinery.units contains a brief example of how to write simple units.

Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
R"""
    ----------------------------------------------------------
            __     __  High Octane Triage Analysis          __
            ||    _||______ __       __________     _____   ||
            ||    \||___   \__| ____/   ______/___ / ____\  ||
    ========||=====||  | __/  |/    \  /==|  / __ \   __\===]|
            '======||  |   \  |   |  \_  _| \  ___/|  |     ||
                   ||____  /__|___|__/  / |  \____]|  |     ||
    ===============''====\/=========/  /==|__|=====|__|======'
                                   \  /
                                    \/

The main package `refinery` exports all `refinery.units.Unit`s which are also
of type `refinery.units.Entry`, i.e. they expose a shell command. The command
line interface for each of these units is given below, this is the same text
as would be available by executing the command with the `-h` or `--help`
option. To better understand how the command line parameters are parsed, it is
recommended to study the module documentation of the following library modules,
as their content is relevant for command line use of the `refinery`.

1. `refinery.lib.frame`
2. `refinery.lib.argformats`
3. `refinery.lib.meta`

Furthermore, the module documentation of `refinery.units` contains a brief
example of how to write simple units.
"""
__version__ = '0.4.6'
__pip_pkg__ = 'binary-refinery'

from typing import Dict, Type

import pickle
import pkg_resources

from refinery.units import arg, Unit


def _singleton(cls):
    return cls()


UNIT_CACHE_PATH = pkg_resources.resource_filename(__name__, '__init__.pkl')


@_singleton
class _cache:
    """
    Every unit can be imported from the refinery base module. The actual import
    is performed on demand to reduce import times. On first import of the refinery
    package, it creates a map of units and their corresponding module and stores
    this map as `__init__.pkl` in the package directory; this process can take
    several seconds. Subsequent imports of refinery should be faster, and the
    loading of units from the module is nearly as fast as specifying the full path.
    """
    units: Dict[str, str]
    cache: Dict[str, Type[Unit]]

    def __init__(self):
        self.reloading = False
        self.loaded = False
        self.units = {}
        self.cache = {}
        self.load()

    def load(self):
        try:
            with open(UNIT_CACHE_PATH, 'rb') as stream:
                self.units = pickle.load(stream)
        except (FileNotFoundError, EOFError):
            self.reload()
        else:
            self.loaded = True

    def save(self):
        try:
            with open(UNIT_CACHE_PATH, 'wb') as stream:
                pickle.dump(self.units, stream)
        except Exception:
            pass
        else:
            self.loaded = True

    def reload(self):
        if not self.reloading:
            from refinery.lib.loader import get_all_entry_points
            self.reloading = True
            self.units.clear()
            self.cache.clear()
            for executable in get_all_entry_points():
                name = executable.__qualname__
                self.units[name] = executable.__module__
                self.cache[name] = executable
            self.reloading = False
            self.save()

    def _resolve(self, name, retry=False):
        if retry:
            self.reload()
        try:
            module_path = self.units[name]
            module = __import__(module_path, None, None, [name])
            entry = getattr(module, name)
            self.cache[name] = entry
            return entry
        except (KeyError, ModuleNotFoundError):
            if not retry:
                return self._resolve(name, retry=True)
            raise AttributeError

    def __getitem__(self, name):
        return self._resolve(name)


@_singleton
class __pdoc__(dict):
    def __init__(self, *a, **kw):
        super().__init__()
        self._loaded = False

    def _strip_globals(self, hlp: str):
        def _strip(lines):
            triggered = False
            for line in lines:
                if triggered:
                    if line.lstrip() != line:
                        continue
                    triggered = False
                if line.lower().startswith('global options:'):
                    triggered = True
                    continue
                yield line
        return ''.join(_strip(hlp.splitlines(keepends=True)))

    def _load(self):
        if self._loaded:
            return
        from .explore import get_help_string
        self['Unit'] = False
        for name in _cache.units:
            unit = _cache[name]
            for base in unit.mro():
                try:
                    abstractmethods = base.__abstractmethods__
                except AttributeError:
                    break
                for method in abstractmethods:
                    at = getattr(unit, method, None)
                    bt = getattr(unit.mro()[1], method, None)
                    if at and at is not bt:
                        self[F'{name}.{method}'] = False
            hlp = get_help_string(unit, width=74)
            hlp = hlp.replace('\x60', '')
            hlp = self._strip_globals(hlp).strip()
            hlp = (
                F'This unit is implemented in `{unit.__module__}` and has the following '
                F'commandline Interface:\n```text\n{hlp}\n```'
            )
            self[name] = hlp
        self._loaded = True

    def items(self):
        self._load()
        return super().items()


def drain(stream):
    """
    A function wrapper around the `bytearray` data type. Can be used as the final sink in
    a refinery pipeline in Python code, i.e.:

        from refinery import *
        # ...
        output = data | carve('b64', single=True) | b64 | zl | drain
        assert isinstance(output, bytearray)
    """
    return bytearray(stream)


__all__ = [x for x, _ in sorted(_cache.units.items(), key=lambda x: x[1])] + [
    Unit.__name__, arg.__name__, '__pdoc__', 'drain', 'UNIT_CACHE_PATH']


def __getattr__(name):
    return _cache[name]


def __dir__():
    return __all__


def load(name):
    if _cache.loaded:
        return _cache.cache.get(name)
    return _cache[name]

Sub-modules

refinery.explore

A commandline script to search for binary refinery units based on keywords.

refinery.lib

Library functions used by various refinery units.

refinery.units

This package contains all refinery units. To write an executable refinery unit, it is sufficient to write a class that inherits from …

Functions

def drain(stream)

A function wrapper around the bytearray data type. Can be used as the final sink in a refinery pipeline in Python code, i.e.:

from refinery import *
# ...
output = data | carve('b64', single=True) | b64 | zl | drain
assert isinstance(output, bytearray)
Expand source code Browse git
def drain(stream):
    """
    A function wrapper around the `bytearray` data type. Can be used as the final sink in
    a refinery pipeline in Python code, i.e.:

        from refinery import *
        # ...
        output = data | carve('b64', single=True) | b64 | zl | drain
        assert isinstance(output, bytearray)
    """
    return bytearray(stream)

Classes

class add (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.add and has the following commandline Interface:

usage: add [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Add the given argument to each block.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class add(BinaryOperation):
    """
    Add the given argument to each block.
    """
    @staticmethod
    def operate(a, b): return a + b
    @staticmethod
    def inplace(a, b): a += b

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class bitrev (bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.bitrev and has the following commandline Interface:

usage: bitrev [-h] [-L] [-Q] [-0] [-v] [-E] [-B N]

Reverse the bits of every block.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class bitrev(UnaryOperation):
    """
    Reverse the bits of every block.
    """
    @staticmethod
    def operate(arg):
        raise RuntimeError('operate was called before the unit was initialized')

    def __init__(self, bigendian=False, blocksize=1):
        """
        Unreadable bit reversal operations due to:
        https://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64BitsDiv
        https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
        """
        super().__init__(bigendian=bigendian, blocksize=blocksize)

        if self.bytestream:
            def operate(v):
                return ((v * 0x202020202) & 0x10884422010) % 1023
        elif self.args.blocksize in (2, 4, 8):
            def operate(v):
                s = self.fbits
                m = self.fmask
                w = v
                while s > 1:
                    s >>= 1
                    m = m ^ (m << s)
                    w = ((w << s) & ~m) | ((w >> s) & m)
                return w
        else:
            def operate(v):
                w = v & 0
                for s in range(self.fbits):
                    w |= ((v >> s) & 1) << (self.fbits - s - 1)
                return w
        self.operate = operate

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class blockop (operation, *argument, seed=0, prologue=None, epilogue=None, inc=False, dec=False, cbc=False, bigendian=False, blocksize=1, precision=None)

This unit is implemented in refinery.units.blockwise.blockop and has the following commandline Interface:

usage: blockop [-h] [-L] [-Q] [-0] [-v] [-s SEED] [-p E]
               [-e E | -I | -D | -X] [-E] [-B N] [-P N]
               operation [argument [argument ...]]

This unit allows you to specify a custom Python expression where the
following variables are allowed.

- the variable A: same as V[0]
- the variable B: current block
- the variable N: number of bytes in the input
- the variable I: current index in the input
- the variable S: an optional seed value for an internal state
- the variable V: the vector of arguments

Each block of the input is replaced by the value of this expression.
Additionally, it is possible to specify prologue and epilogue expressions
which are used to update the state variable S before and after the update
of each block, respectively.

positional arguments:
  operation          A Python expression defining the operation.
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -s, --seed SEED    Optional seed value for the state variable S. The
                     default is zero. This can be an expression involving
                     the variable N.
  -p, --prologue E   Optional expression with which the state variable S
                     is updated before a block is operated on.
  -e, --epilogue E   Optional expression with which the state variable S
                     is updated after a block was operated on.
  -I, --inc          equivalent to --epilogue=S+1
  -D, --dec          equivalent to --epilogue=S-1
  -X, --cbc          equivalent to --epilogue=(B)
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.
  -P, --precision N  The size of the variables used for computing the
                     result. By default, this is equal to the block size.
                     The value may be zero, indicating that arbitrary
                     precision is required.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class blockop(ArithmeticUnit):
    """
    This unit allows you to specify a custom Python expression where the following variables are allowed.

    - the variable `A`: same as `V[0]`
    - the variable `B`: current block
    - the variable `N`: number of bytes in the input
    - the variable `I`: current index in the input
    - the variable `S`: an optional seed value for an internal state
    - the variable `V`: the vector of arguments

    Each block of the input is replaced by the value of this expression. Additionally, it is possible to
    specify prologue and epilogue expressions which are used to update the state variable `S` before and
    after the update of each block, respectively.
    """

    @staticmethod
    def _parse_op(definition):
        """
        An argparse type which uses the `refinery.lib.argformats.PythonExpression` parser to
        parse the expressions that can be passed to `refinery.blockop`. Essentially, these
        are Python expressions which can contain variables `B`, `A`, `S`, and `V`.
        """
        parsed = PythonExpression(definition, *'IBASNV', all_variables_allowed=True)

        def wrapper(index, block, state, argvector, total, meta):
            args = {'I': index, 'B': block, 'S': state, 'N': total}
            if argvector:
                args.update(A=argvector[0], V=argvector)
            return parsed(meta, **args)

        return wrapper

    def __init__(
        self, operation: arg(type=str, help='A Python expression defining the operation.'), *argument,
        seed: arg('-s', type=str, help=(
            'Optional seed value for the state variable S. The default is zero. This can be an expression '
            'involving the variable N.')) = 0,
        prologue: arg('-p', type=str, metavar='E', help=(
            'Optional expression with which the state variable S is updated before a block is operated on.')) = None,
        epilogue: arg('-e', type=str, metavar='E', group='EPI', help=(
            'Optional expression with which the state variable S is updated after a block was operated on.')) = None,
        inc: arg('-I', group='EPI', help='equivalent to --epilogue=S+1') = False,
        dec: arg('-D', group='EPI', help='equivalent to --epilogue=S-1') = False,
        cbc: arg('-X', group='EPI', help='equivalent to --epilogue=(B)') = False,
        bigendian=False, blocksize=1, precision=None
    ):
        for flag, flag_is_set, expression in [
            ('--cbc', cbc, '(B)'),
            ('--inc', inc, 'S+1'),
            ('--dec', dec, 'S-1'),
        ]:
            if flag_is_set:
                if epilogue is not None:
                    raise ValueError(
                        F'Ambiguous specification; epilogue was already set to {epilogue} '
                        F'when {flag} was parsed.'
                    )
                epilogue = expression

        self._index = IndexCounter()

        super().__init__(
            self._index,
            *argument,
            bigendian=bigendian,
            blocksize=blocksize,
            precision=precision,
            operation=self._parse_op(operation),
            seed=seed,
            prologue=prologue and self._parse_op(prologue),
            epilogue=epilogue and self._parse_op(epilogue),
        )

    @property
    def _is_ecb(self):
        return not self.args.epilogue and not self.args.prologue

    def process_ecb_fast(self, data):
        if not self._is_ecb:
            raise NoNumpy
        return super().process_ecb_fast(data)

    def process(self, data):
        seed = self.args.seed
        meta = metavars(data)
        if isinstance(seed, str):
            seed = PythonExpression(seed, 'N', constants=metavars(data))
        self._index.init(self.fmask)
        self._state = seed
        self._evaluation_arguments = [0, 0, 0, (), len(data), meta]
        if callable(self._state):
            self._state = self._state(meta, N=len(data))
        return super().process(data)

    def operate(self, block, index, *args):
        arguments = self._evaluation_arguments
        arguments[:4] = index, block, self._state, args
        if self.args.prologue:
            arguments[2] = self._state = self.args.prologue(*arguments)
        block = self.args.operation(*arguments) & self.fmask
        if self.args.epilogue:
            arguments[1] = block
            self._state = self.args.epilogue(*arguments)
        return block

    def inplace(self, block, *args) -> None:
        super().inplace(block, *args)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class map (index, image, blocksize=1)

This unit is implemented in refinery.units.blockwise.map and has the following commandline Interface:

usage: map [-h] [-L] [-Q] [-0] [-v] [-B N] index image

Each block of the input data which occurs as a block of the index argument
is replaced by the corresponding block of the image argument.

positional arguments:
  index              index characters
  image              image characters

optional arguments:
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class map(BlockTransformation):
    """
    Each block of the input data which occurs as a block of the index argument
    is replaced by the corresponding block of the image argument.
    """
    def __init__(
        self,
        index: arg(help='index characters'),
        image: arg(help='image characters'),
        blocksize=1
    ):
        super().__init__(blocksize=blocksize, index=index, image=image)

    def process(self, data):
        self._map = dict(zip(
            self.chunk(self.args.index),
            self.chunk(self.args.image)))
        return super().process(data)

    def process_block(self, token):
        return self._map.get(token, token)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class neg (bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.neg and has the following commandline Interface:

usage: neg [-h] [-L] [-Q] [-0] [-v] [-E] [-B N]

Each block of the input data is negated bitwise. This is sometimes also
called the bitwise complement or inverse.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class neg(UnaryOperation):
    """
    Each block of the input data is negated bitwise. This is sometimes
    also called the bitwise complement or inverse.
    """
    def operate(self, a): return ~a
    def inplace(self, a): a ^= self.fmask

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class pack (base=0, prefix=False, strict=False, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.pack and has the following commandline Interface:

usage: pack [-h] [-L] [-Q] [-0] [-v] [-R] [-r] [-s] [-E] [-B N] [base]

Scans the input data for numeric constants and packs them into a binary
format. This is useful to convert the textual representation of an array
of numbers into its binary form. For example, 123,34,256,12,1,234 would be
transformed into the byte sequence 7B22000C01EA, where 256 was wrapped and
packed as a null byte because the default block size is one byte. If the
above sequence would be packed with options -EB2, the result would be
equal to 007B00220100000C000100EA in hexadecimal.

positional arguments:
  base               Find only numbers in given base. Default of 0 means
                     that common expressions for hexadecimal, octal and
                     binary are accepted.

optional arguments:
  -r, --prefix       Add numeric prefixes like 0x, 0b, and 0o in reverse
                     mode.
  -s, --strict       Only parse integers that fit in one block of the
                     given block size.
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class pack(BlockTransformationBase):
    """
    Scans the input data for numeric constants and packs them into a binary
    format. This is useful to convert the textual representation of an array of
    numbers into its binary form. For example, `123,34,256,12,1,234` would be
    transformed into the byte sequence `7B22000C01EA`, where `256` was wrapped
    and packed as a null byte because the default block size is one byte. If
    the above sequence would be packed with options -EB2, the result would be
    equal to `007B00220100000C000100EA` in hexadecimal.
    """

    def __init__(self,
        base: arg(type=number[2:36], help=(
            'Find only numbers in given base. Default of 0 means that '
            'common expressions for hexadecimal, octal and binary are '
            'accepted.')) = 0,
        prefix  : arg.switch('-r', help='Add numeric prefixes like 0x, 0b, and 0o in reverse mode.') = False,
        strict  : arg.switch('-s', help='Only parse integers that fit in one block of the given block size.') = False,
        bigendian=False, blocksize=1
    ):
        super().__init__(
            base=base,
            prefix=prefix,
            strict=strict,
            bigendian=bigendian,
            blocksize=blocksize
        )

    @property
    def bytestream(self):
        # never alow bytes to be left unchunked
        return False

    def reverse(self, data):
        base = self.args.base or 10
        prefix = B''

        self.log_debug(F'using base {base:d}')

        if self.args.prefix:
            prefix = {
                0x02: b'0b',
                0x08: b'0o',
                0x10: b'0x'
            }.get(base, B'')

        converter = BaseUnit(base, not self.args.bigendian)

        for n in self.chunk(data, raw=True):
            yield prefix + converter.reverse(n)

    def process(self, data):
        def intb(integers):
            for n in integers:
                if self.args.base == 0 and n.startswith(B'0') and n[1:].isdigit():
                    n = B'0o' + n
                N = int(n, self.args.base)
                M = N & self.fmask
                self.log_debug(lambda: F'0x{M:0{self.fbits // 4}X}')
                if self.args.strict and M != N:
                    continue
                yield M

        if self.args.base == 0:
            pattern = formats.integer
        elif self.args.base <= 10:
            pattern = re.compile(B'[-+]?[0-%d]{1,64}' % (self.args.base - 1))
        else:
            pattern = re.compile(B'[-+]?[0-9a-%c]{1,20}' % (0x57 + self.args.base), re.IGNORECASE)

        return self.unchunk(intb(pattern.findall(data)))

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class rev (blocksize=1)

This unit is implemented in refinery.units.blockwise.rev and has the following commandline Interface:

usage: rev [-h] [-L] [-Q] [-0] [-v] [-B N]

The blocks of the input data are output in reverse order. If the length of
the input data is not a multiple of the block size, the data is truncated.

optional arguments:
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class rev(BlockTransformationBase):
    """
    The blocks of the input data are output in reverse order. If the length of
    the input data is not a multiple of the block size, the data is truncated.
    """
    def __init__(self, blocksize=1):
        super().__init__(blocksize=blocksize)

    def process(self, data):
        if self.bytestream:
            return data[::-1]
        else:
            rv = list(self.chunk(data, raw=True))[::-1]
            return self.rest(data) + self.unchunk(rv, raw=True)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class rotl (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.rotl and has the following commandline Interface:

usage: rotl [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Rotate the bits of each block left.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class rotl(BinaryOperation):
    """
    Rotate the bits of each block left.
    """
    def operate(self, value, shift):
        shift %= self.fbits
        return (value << shift) | (value >> (self.fbits - shift))

    def inplace(self, value, shift):
        shift %= self.fbits
        lower = value >> (self.fbits - shift)
        value <<= shift
        value |= lower

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class rotr (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.rotr and has the following commandline Interface:

usage: rotr [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Rotate the bits of each block right.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class rotr(BinaryOperation):
    """
    Rotate the bits of each block right.
    """
    def operate(self, value, shift):
        shift %= self.fbits
        return (value >> shift) | (value << (self.fbits - shift))

    def inplace(self, value, shift):
        shift %= self.fbits
        lower = value >> shift
        value <<= self.fbits - shift
        value |= lower

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class shl (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.shl and has the following commandline Interface:

usage: shl [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Shift the bits of each block left, filling with zero bits.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class shl(BinaryOperation):
    """
    Shift the bits of each block left, filling with zero bits.
    """
    @staticmethod
    def operate(a, b): return a << b
    @staticmethod
    def inplace(a, b): a <<= b

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class shr (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.shr and has the following commandline Interface:

usage: shr [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Shift the bits of each block right, filling with zero bits.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class shr(BinaryOperation):
    """
    Shift the bits of each block right, filling with zero bits.
    """
    @staticmethod
    def operate(a, b): return a >> b
    @staticmethod
    def inplace(a, b): a >>= b

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class sub (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.sub and has the following commandline Interface:

usage: sub [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Subtract the given argument from each block.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class sub(BinaryOperation):
    """
    Subtract the given argument from each block.
    """
    @staticmethod
    def operate(a, b): return a - b
    @staticmethod
    def inplace(a, b): a -= b

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class terminate (sentinel=0, blocksize=1, bigendian=False)

This unit is implemented in refinery.units.blockwise.terminate and has the following commandline Interface:

usage: terminate [-h] [-L] [-Q] [-0] [-v] [-B N] [-E] [sentinel]

The unit reads data from the incoming chunk in blocks of any given size
until the sentinel value is encountered. The output of the unit is all
data that was read, excluding the sentinel. The default block size is one
and the default sentinel value is zero, which corresponds to reading a
null-terminated string from the input. If the sentinel value is not found
anywhere in the incoming data, the complete input is returned as output.

positional arguments:
  sentinel           sentinel value to look for; default is 0

optional arguments:
  -B, --blocksize N  The size of each block in bytes, default is 1.
  -E, --bigendian    Read chunks in big endian.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class terminate(BlockTransformationBase):
    """
    The unit reads data from the incoming chunk in blocks of any given size until the
    sentinel value is encountered. The output of the unit is all data that was read,
    excluding the sentinel. The default block size is one and the default sentinel value
    is zero, which corresponds to reading a null-terminated string from the input.
    If the sentinel value is not found anywhere in the incoming data, the complete input
    is returned as output.
    """
    def __init__(
        self,
        sentinel: arg(type=numseq, help='sentinel value to look for; default is {default}') = 0,
        blocksize=1,
        bigendian=False
    ):
        if not isinstance(sentinel, int):
            sentinel = next(chunks.unpack(sentinel, blocksize, bigendian))
        super().__init__(blocksize=blocksize, bigendian=bigendian, sentinel=sentinel)

    def process(self, data: bytearray):
        sentinel: int = self.args.sentinel

        self.log_debug(F'using sentinel value: 0x{sentinel:0{self.args.blocksize*2}X}')

        if self.bytestream:
            pos = data.find(sentinel)
            if pos < 0:
                self.log_info(F'the sentinel value {sentinel} was not found')
            else:
                data[pos:] = []
            return data

        def seek(it):
            for chunk in it:
                if chunk == sentinel:
                    break
                yield chunk

        return self.unchunk(seek(self.chunk(data)))

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class transpose (padding=b'', blocksize=1)

This unit is implemented in refinery.units.blockwise.transpose and has the following commandline Interface:

usage: transpose [-h] [-L] [-Q] [-0] [-v] [-B N] [padding]

Interprets the sequence of blocks as rows of a matrix and returns the
blocks that correspond to the columns of this matrix.

positional arguments:
  padding            Optional byte sequence to use as padding for tail
                     end.

optional arguments:
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class transpose(BlockTransformationBase):
    """
    Interprets the sequence of blocks as rows of a matrix and returns the
    blocks that correspond to the columns of this matrix.
    """
    def __init__(
        self, padding: arg(help='Optional byte sequence to use as padding for tail end.') = B'',
        blocksize=1
    ):
        super().__init__(bigendian=False, blocksize=blocksize, padding=padding)

    def process(self, data):
        rest = self.rest(data)
        data = list(self.chunk(data, raw=True))

        if self.args.padding:
            while len(rest) < self.args.blocksize:
                rest += self.args.padding
            rest = rest[:self.args.blocksize]
            data.append(rest)
            rest = B''

        return self.unchunk((
            bytes(data[j][i] for j in range(len(data)))
            for i in range(self.args.blocksize)), raw=True)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xor (argument, bigendian=False, blocksize=1)

This unit is implemented in refinery.units.blockwise.xor and has the following commandline Interface:

usage: xor [-h] [-L] [-Q] [-0] [-v] [-E] [-B N] argument

Form the exclusive or of the input data with the given argument.

positional arguments:
  argument           A single numeric expression which provides the right
                     argument to the operation, where the left argument is
                     each block in the input data. This argument can also
                     contain a sequence of bytes which is then split into
                     blocks of the same size as the input data and used
                     cyclically.

optional arguments:
  -E, --bigendian    Read chunks in big endian.
  -B, --blocksize N  The size of each block in bytes, default is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class xor(BinaryOperation):
    """
    Form the exclusive or of the input data with the given argument.
    """
    def process_ecb_fast(self, data):
        try:
            return super().process_ecb_fast(data)
        except NoNumpy as E:
            try:
                from Crypto.Util.strxor import strxor
            except ModuleNotFoundError:
                raise E
            else:
                from itertools import islice, cycle
                take = len(data) // self.args.blocksize + 1
                argb = self.unchunk(islice(cycle(x & self.fmask for x in self.args.argument[0]), take))
                return strxor(data, argb[:len(data)])

    @staticmethod
    def operate(a, b): return a ^ b
    @staticmethod
    def inplace(a, b): a ^= b

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class aplib

This unit is implemented in refinery.units.compression.ap and has the following commandline Interface:

usage: aplib [-h] [-L] [-Q] [-0] [-v] [-R]

APLib compression and decompression.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class aplib(Unit):
    """
    APLib compression and decompression.
    """

    def reverse(self, buf):
        return compressor(buf).compress()

    def process(self, buf):
        return decompressor(buf).decompress()

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class blz

This unit is implemented in refinery.units.compression.blz and has the following commandline Interface:

usage: blz [-h] [-L] [-Q] [-0] [-v] [-R]

BriefLZ compression and decompression. The compression algorithm uses a
pure Python suffix tree implementation: It requires a lot of time &
memory.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class blz(Unit):
    """
    BriefLZ compression and decompression. The compression algorithm uses a pure Python suffix tree
    implementation: It requires a lot of time & memory.
    """
    def _begin(self, data):
        self._src = StructReader(memoryview(data))
        self._dst = MemoryFile(bytearray())
        return self

    def _reset(self):
        self._src.seek(0)
        self._dst.seek(0)
        self._dst.truncate()
        return self

    def _decompress(self):
        (
            signature,
            version,
            src_count,
            src_crc32,
            dst_count,
            dst_crc32,
        ) = self._src.read_struct('>6L')
        if signature != 0x626C7A1A:
            raise ValueError(F'Invalid BriefLZ signature: {signature:08X}, should be 626C7A1A.')
        if version > 10:
            raise ValueError(F'Invalid version number {version}, should be less than 10.')
        self.log_debug(F'signature: 0x{signature:08X} V{version}')
        self.log_debug(F'src count: 0x{src_count:08X}')
        self.log_debug(F'src crc32: 0x{src_crc32:08X}')
        self.log_debug(F'dst count: 0x{dst_count:08X}')
        self.log_debug(F'dst crc32: 0x{dst_crc32:08X}')
        src = self._src.getbuffer()
        src = src[24:24 + src_count]
        if len(src) < src_count:
            self.log_warn(F'Only {len(src)} bytes in buffer, but header annoucned a length of {src_count}.')
        if src_crc32:
            check = zlib.crc32(src)
            if check != src_crc32:
                self.log_warn(F'Invalid source data CRC {check:08X}, should be {src_crc32:08X}.')
        dst = self._decompress_chunk(dst_count)
        if not dst_crc32:
            return dst
        check = zlib.crc32(dst)
        if check != dst_crc32:
            self.log_warn(F'Invalid result data CRC {check:08X}, should be {dst_crc32:08X}.')
        return dst

    def _decompress_modded(self):
        self._src.seekrel(8)
        total_size = self._src.u64()
        chunk_size = self._src.u64()
        remaining = total_size
        self.log_debug(F'total size: 0x{total_size:016X}')
        self.log_debug(F'chunk size: 0x{chunk_size:016X}')
        while remaining > chunk_size:
            self._decompress_chunk(chunk_size)
            remaining -= chunk_size
        return self._decompress_chunk(remaining)

    def _decompress_chunk(self, size=None):
        bitcount = 0
        bitstore = 0
        decompressed = 1

        def readbit():
            nonlocal bitcount, bitstore
            if not bitcount:
                bitstore = int.from_bytes(self._src.read(2), 'little')
                bitcount = 0xF
            else:
                bitcount = bitcount - 1
            return (bitstore >> bitcount) & 1

        def readint():
            result = 2 + readbit()
            while readbit():
                result <<= 1
                result += readbit()
            return result

        self._dst.write(self._src.read(1))

        try:
            while not size or decompressed < size:
                if readbit():
                    length = readint() + 2
                    sector = readint() - 2
                    offset = self._src.read(1)[0] + 1
                    delta = offset + 0x100 * sector
                    available = self._dst.tell()
                    if delta not in range(available + 1):
                        raise RefineryPartialResult(
                            F'Requested rewind by 0x{delta:08X} bytes with only 0x{available:08X} bytes in output buffer.',
                            partial=self._dst.getvalue())
                    quotient, remainder = divmod(length, delta)
                    replay = memoryview(self._dst.getbuffer())
                    replay = bytes(replay[-delta:] if quotient else replay[-delta:length - delta])
                    replay = quotient * replay + replay[:remainder]
                    self._dst.write(replay)
                    decompressed += length
                else:
                    self._dst.write(self._src.read(1))
                    decompressed += 1
        except EOF as E:
            raise RefineryPartialResult(str(E), partial=self._dst.getbuffer())
        dst = self._dst.getbuffer()
        if decompressed < size:
            raise RefineryPartialResult(
                F'Attempted to decompress {size} bytes, got only {len(dst)}.', dst)
        if decompressed > size:
            raise RuntimeError('Decompressed buffer contained more bytes than expected.')
        return dst

    def _compress(self):
        from refinery.lib.suffixtree import SuffixTree

        try:
            self.log_info('computing suffix tree')
            tree = SuffixTree(self._src.getbuffer())
        except Exception:
            raise

        bitstore = 0  # The bit stream to be written
        bitcount = 0  # The number of bits in the bit stream
        buffer = MemoryFile(bytearray())

        # Write empty header and first byte of source
        self._dst.write(bytearray(24))
        self._dst.write(self._src.read(1))

        def writeint(n: int) -> None:
            """
            Write an integer to the bit stream.
            """
            nonlocal bitstore, bitcount
            nbits = n.bit_length()
            if nbits < 2:
                raise ValueError
            # The highest bit is implicitly assumed:
            n ^= 1 << (nbits - 1)
            remaining = nbits - 2
            while remaining:
                remaining -= 1
                bitstore <<= 2
                bitcount += 2
                bitstore |= ((n >> remaining) & 3) | 1
            bitstore <<= 2
            bitcount += 2
            bitstore |= (n & 1) << 1

        src = self._src.getbuffer()
        remaining = len(src) - 1
        self.log_info('compressing data')

        while True:
            cursor = len(src) - remaining
            rest = src[cursor:]
            if bitcount >= 0x10:
                block_count, bitcount = divmod(bitcount, 0x10)
                info_channel = bitstore >> bitcount
                bitstore = info_channel << bitcount ^ bitstore
                # The decompressor will read bits from top to bottom, and each 16 bit block has to be
                # little-endian encoded. The bit stream is encoded top to bottom bit in the bitstore
                # variable, and by encoding it as a big endian integer, the stream is in the correct
                # order. However, we need to swap adjacent bytes to achieve little endian encoding for
                # each of the blocks:
                info_channel = bytearray(info_channel.to_bytes(block_count * 2, 'big'))
                for k in range(block_count):
                    k0 = 2 * k + 0
                    k1 = 2 * k + 1
                    info_channel[k0], info_channel[k1] = info_channel[k1], info_channel[k0]
                info_channel = memoryview(info_channel)
                data_channel = memoryview(buffer.getbuffer())
                self._dst.write(info_channel[:2])
                self._dst.write(data_channel[:-1])
                self._dst.write(info_channel[2:])
                data_channel = bytes(data_channel[-1:])
                buffer.truncate(0)
                store = buffer if bitcount else self._dst
                store.write(data_channel)
            if remaining + bitcount < 0x10:
                buffer = buffer.getbuffer()
                if rest or buffer:
                    bitstore <<= 0x10 - bitcount
                    self._dst.write(bitstore.to_bytes(2, 'little'))
                    self._dst.write(buffer)
                    self._dst.write(rest)
                elif bitcount:
                    raise RuntimeError('Bitbuffer Overflow')
                break
            node = tree.root
            length = 0
            offset = 0
            sector = None
            while node.children and length < len(rest):
                for child in node.children.values():
                    if tree.data[child.start] == rest[length]:
                        node = child
                        break
                if node.start >= cursor:
                    break
                offset = node.start - length
                length = node.end + 1 - offset
            length = min(remaining, length)
            if length >= 4:
                sector, offset = divmod(cursor - offset - 1, 0x100)
            bitcount += 1
            bitstore <<= 1
            if sector is None:
                buffer.write(rest[:1])
                remaining -= 1
                continue
            bitstore |= 1
            buffer.write(bytes((offset,)))
            writeint(length - 2)
            writeint(sector + 2)
            remaining -= length

        self._dst.seek(24)
        dst = self._dst.peek()
        self._dst.seek(0)
        self._dst.write(struct.pack('>6L', 0x626C7A1A, 1, len(dst), zlib.crc32(dst), len(src), zlib.crc32(src)))
        return self._dst.getbuffer()

    def process(self, data):
        self._begin(data)
        partial = None
        try:
            return self._decompress()
        except ValueError as error:
            if isinstance(error, RefineryPartialResult):
                partial = error
            self.log_warn(F'Reverting to modified BriefLZ after decompression error: {error!s}')
            self._reset()

        try:
            return self._decompress_modded()
        except RefineryPartialResult:
            raise
        except Exception as error:
            if not partial:
                raise
            raise partial from error

    def reverse(self, data):
        return self._begin(data)._compress()

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class bz2 (level=9)

This unit is implemented in refinery.units.compression.bz2 and has the following commandline Interface:

usage: bz2 [-h] [-L] [-Q] [-0] [-v] [-R] [-l LEVEL]

BZip2 compression and decompression.

optional arguments:
  -l, --level LEVEL  compression level preset between 1 and 9

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class bz2(Unit):
    """
    BZip2 compression and decompression.
    """
    def __init__(self, level: arg('-l', type=number[1:9], help='compression level preset between 1 and 9') = 9):
        super().__init__(level=level)

    def process(self, data):
        return bz2_.decompress(data)

    def reverse(self, data):
        return bz2_.compress(data, self.args.level)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class decompress (prepend=True, tolerance=12, min_ratio=1)

This unit is implemented in refinery.units.compression.decompress and has the following commandline Interface:

usage: decompress [-h] [-L] [-Q] [-0] [-v] [-P] [-t N] [-r R]

Attempts all available decompression units against the input and returns
the output of the first successful one. If none succeeds, the data is
returned unaltered. The process is heavily biased against LZNT1
decompression due to a large tendency for LZNT1 false positives.

optional arguments:
  -P, --no-prepend   By default, if decompression fails, the unit attempts
                     to prefix the data with all possible values of a
                     single byte and decompress the result. This behavior
                     can be disabled with this flag.
  -t, --tolerance N  Maximum number of bytes to strip from the beginning
                     of the data; The default value is 12.
  -r, --min-ratio R  To determine whether a decompression algorithm was
                     successful, the ratio of compressed size to
                     decompressed size is required to be at least this
                     number, a floating point value R; default value is 1.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class decompress(Unit):
    """
    Attempts all available decompression units against the input and returns
    the output of the first successful one. If none succeeds, the data is
    returned unaltered. The process is heavily biased against LZNT1 decompression
    due to a large tendency for LZNT1 false positives.
    """
    def __init__(
        self,
        prepend: arg.switch('-P', '--no-prepend', off=True, help=(
            'By default, if decompression fails, the unit attempts to prefix '
            'the data with all possible values of a single byte and decompress '
            'the result. This behavior can be disabled with this flag.')
        ) = True,
        tolerance: arg.number('-t', help=(
            'Maximum number of bytes to strip from the beginning of the data; '
            'The default value is 12.')
        ) = 12,
        min_ratio: arg('-r', metavar='R', help=(
            'To determine whether a decompression algorithm was successful, the '
            'ratio of compressed size to decompressed size is required to be at '
            'least this number, a floating point value R; default value is 1.')
        ) = 1,
    ):
        if min_ratio <= 0:
            raise ValueError('The compression factor must be nonnegative.')
        super().__init__(tolerance=tolerance, prepend=prepend, min_ratio=min_ratio)
        self.engines = [
            engine() for engine in [zl, lzma, aplib, bz2, blz, lz4, lznt1]
        ]

    def process(self, data):
        best = None
        current_ratio = 1

        class result:
            unit = self

            def __init__(self, engine, cutoff=0, prefix=None):
                feed = data

                self.engine = engine
                self.prefix = prefix
                self.cutoff = cutoff

                if cutoff:
                    feed = data[cutoff:]
                if prefix is not None:
                    feed = prefix + data

                try:
                    self.result = engine.process(feed)
                except RefineryPartialResult as pr:
                    self.result = pr.partial
                except Exception:
                    self.result = B''

                if not self.result:
                    self.ratio = INF
                else:
                    self.ratio = len(data) / len(self.result)

            @property
            def unmodified(self):
                return not self.prefix and not self.cutoff

            def schedule(self):
                nonlocal best, current_ratio
                if self.ratio >= self.unit.args.min_ratio:
                    return
                prefix = hex(self.prefix[0]) if self.prefix else None
                r = 1 if self.unmodified and best and not best.unmodified else 0.9
                if self.engine.__class__ is lznt1:
                    r /= 2
                if not best or self.ratio / current_ratio < r:
                    self.unit.log_info(lambda: (
                        F'obtained {self.ratio:.2f} compression ratio with: prefix={prefix}, '
                        F'cutoff={self.cutoff}, engine={self.engine.name}'))
                    best = self
                    current_ratio = self.ratio

        for engine in self.engines:
            self.log_debug(F'attempting engine: {engine.name}')
            for t in range(self.args.tolerance):
                result(engine, t).schedule()
            if self.args.prepend:
                for p in range(0x100):
                    result(engine, 0, bytes((p,))).schedule()

        if best is None:
            self.log_warn('no compression engine worked, returning original data.')
            return data

        return best.result

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class lzma (filter=None, raw=False, alone=False, xz=False, level=9, delta=None)

This unit is implemented in refinery.units.compression.lz and has the following commandline Interface:

usage: lzma [-h] [-L] [-Q] [-0] [-v] [-R] [-r | -a | -x] [-l N] [-d N]
            [FILTER]

LZMA compression and decompression.

positional arguments:
  FILTER         Specifies a bcj filter to be applied. Possible values
                 are: ARM, ARMTHUMB, IA64, LZMA1, LZMA2, POWERPC, SPARC,
                 X86

optional arguments:
  -r, --raw      Use raw (no container) format.
  -a, --alone    Use the lzma container format.
  -x, --xz       Use the default xz format.
  -l, --level N  The compression level preset; between 0 and 9.
  -d, --delta N  Add a delta filter when compressing.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class lzma(Unit):
    """
    LZMA compression and decompression.
    """
    _LZMA_FILTER = extract_options(lzma_, 'FILTER_', 'DELTA')
    _LZMA_PARSER = OptionFactory(_LZMA_FILTER)

    def __init__(
        self, filter: arg.choice(choices=list(_LZMA_FILTER), metavar='FILTER', help=(
            'Specifies a bcj filter to be applied. Possible values are: {choices}')) = None,
        raw   : arg.switch('-r', group='MODE', help='Use raw (no container) format.') = False,
        alone : arg.switch('-a', group='MODE', help='Use the lzma container format.') = False,
        xz    : arg.switch('-x', group='MODE', help='Use the default xz format.') = False,
        level : arg.number('-l', bound=(0, 9), help='The compression level preset; between 0 and 9.') = 9,
        delta : arg.number('-d', help='Add a delta filter when compressing.') = None,
    ):
        filter = filter and self._LZMA_PARSER(filter)
        if (raw, alone, xz).count(True) > 1:
            raise ValueError('Only one container format can be enabled.')
        if level not in range(10):
            raise ValueError('Compression level must be a number between 0 and 9.')
        super().__init__(filter=filter, raw=raw, alone=alone, xz=xz, delta=delta,
            level=level | lzma_.PRESET_EXTREME)

    def _get_lz_mode_and_filters(self, reverse=False):
        mode = lzma_.FORMAT_AUTO
        filters = []
        if self.args.filter is not None:
            filters.append({'id': self.args.filter.value})
        if self.args.delta is not None:
            self.log_debug('adding delta filter')
            filters.append({
                'id': lzma_.FILTER_DELTA,
                'dist': self.args.delta
            })
        if self.args.alone:
            self.log_debug('setting alone format')
            mode = lzma_.FORMAT_ALONE
            filters.append({
                'id': lzma_.FILTER_LZMA1,
                'preset': self.args.level
            })
        elif self.args.raw:
            self.log_debug('setting raw format')
            mode = lzma_.FORMAT_RAW
            filters.append({
                'id': lzma_.FILTER_LZMA2,
                'preset': self.args.level
            })
        elif self.args.xz or reverse:
            if reverse and not self.log_debug('setting xz container format'):
                self.log_info('choosing default .xz container format for compression.')
            mode = lzma_.FORMAT_XZ
            filters.append({
                'id': lzma_.FILTER_LZMA2,
                'preset': self.args.level
            })
        return mode, filters

    def reverse(self, data):
        mode, filters = self._get_lz_mode_and_filters(True)
        lz = lzma_.LZMACompressor(mode, filters=filters)
        output = lz.compress(data)
        output += lz.flush()
        return output

    def process(self, data):
        keywords = {}
        mode, filters = self._get_lz_mode_and_filters(False)
        if self.args.raw:
            keywords['filters'] = filters
        lz = lzma_.LZMADecompressor(mode, **keywords)
        with MemoryFile() as output:
            pos, size = 0, 4096
            with MemoryFile(data) as stream:
                while not stream.eof and not stream.closed:
                    pos = stream.tell()
                    try:
                        chunk = lz.decompress(stream.read(size))
                    except (EOFError, lzma_.LZMAError) as error:
                        if size > 1:
                            lz = lzma_.LZMADecompressor(mode, **keywords)
                            stream.seek(0)
                            output.seek(0)
                            if pos > 0:
                                output.write(lz.decompress(stream.read(pos)))
                            msg = error.args[0] if len(error.args) == 1 else error.__class__.__name__
                            self.log_debug(F'decompression error, reverting to one byte at a time: {msg}')
                            size = 1
                        else:
                            remaining = len(stream.getbuffer()) - pos
                            raise RefineryPartialResult(F'compression failed with {remaining} bytes remaining', output.getvalue())
                    else:
                        output.write(chunk)
            return output.getvalue()

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class lz4

This unit is implemented in refinery.units.compression.lz4 and has the following commandline Interface:

usage: lz4 [-h] [-L] [-Q] [-0] [-v]

LZ4 block decompression. See also:
https://github.com/lz4/lz4/blob/master/doc/lz4_Block_format.md#compressed-
block-format

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class lz4(Unit):
    """
    LZ4 block decompression. See also:
    https://github.com/lz4/lz4/blob/master/doc/lz4_Block_format.md#compressed-block-format
    """
    def _read_block(self, reader, output, ubound=None):
        entry = reader.tell()
        lastend = 0

        def ubound_check():
            if ubound is None:
                return False
            consumed = reader.tell() - entry
            if consumed > ubound:
                raise ValueError(F'upper bound {ubound} exceeded by {consumed-ubound} in LZ4 block')
            return consumed == ubound

        while not reader.eof:
            reflen = reader.read_nibble()
            litlen = reader.read_nibble()
            litlen = reader.read_size(litlen)
            literal = reader.read(litlen)
            output.write(literal)
            if ubound_check(): break
            try: refpos = reader.u16()
            except EOF: break
            if refpos - 1 not in range(output.tell()):
                with StreamDetour(output, lastend):
                    if output.read(len(literal)) == literal:
                        # This literal could have been encoded in the last match, but it wasn't.
                        # Therefore, it is very likely that we have reached the end of the stream.
                        break
                position = reader.tell()
                remaining = len(literal) - position
                raise RefineryPartialResult(
                    F'encountered invalid match offset value {refpos} at position {position} with {remaining} bytes remaining',
                    partial=output.getvalue())
            reflen = reader.read_size(reflen)
            if ubound_check():
                raise ValueError('last sequence in block contained a match')
            reflen += 4
            available_bytes = min(refpos, reflen)
            q, r = divmod(reflen, available_bytes)
            with StreamDetour(output, -refpos, io.SEEK_CUR):
                match = output.read(available_bytes)
                match = q * match + match[:r]
                assert len(match) == reflen
                lastend = output.tell() - available_bytes + r
            output.write(match)

    def process(self, data):
        output = io.BytesIO()
        reader = LZ4Reader(memoryview(data))
        try:
            magic = reader.u32() == 0x184D2204
        except EOF:
            magic = False
        if not magic:
            reader.seek(0)
            self._read_block(reader, output)
            return output.getbuffer()

        (dict_id, rsrv1, content_checksummed, content_size,
            blocks_checksummed, blocks_independent, v2, v1) = reader.read_bits(8)
        rsrv2 = reader.read_nibble()
        try:
            block_maximum = {
                7: 0x400000,
                6: 0x100000,
                5: 0x040000,
                4: 0x010000,
            }[reader.read_integer(3)]
        except KeyError:
            raise ValueError('unknown maximum block size value in LZ4 frame header')
        rsrv3 = reader.read_bit()
        if any((rsrv1, rsrv2, rsrv3)):
            self.log_warn('nonzero reserved value in LZ4 frame header')
        if (v1, v2) != (0, 1):
            self.log_warn(F'invalid version ({v1},{v2}) in LZ4 frame header')
        content_size = content_size and reader.u64() or None
        dict_id = dict_id and reader.u32() or None
        # Header Checksum
        xxh = xxhash(data[4:reader.tell()]).intdigest() >> 8 & 0xFF
        chk = reader.read_byte()
        if chk != xxh:
            self.log_warn(F'header checksum {chk:02X} does not match computed value {xxh:02X}')

        self.log_debug(lambda: F'dictionary id: {dict_id}')
        self.log_debug(lambda: F'block max: 0x{block_maximum:X}')
        if content_size is not None:
            self.log_debug(lambda: F'chunk max: 0x{content_size:X}')
        self.log_debug(lambda: F'blocks independent: {bool(blocks_independent)}')
        self.log_debug(lambda: F'blocks checksummed: {bool(blocks_checksummed)}')

        blockindex = 0

        while True:
            blockindex += 1
            size = reader.read_integer(31)
            uncompressed = reader.read_bit()
            if not size:
                assert not uncompressed
                break
            self.log_info(F'reading block of size 0x{size:06X}')
            assert reader.byte_aligned
            assert size <= block_maximum, 'block size exceeds maximum size'
            if uncompressed:
                output.write(reader.read(size))
            else:
                self._read_block(reader, output, size)
            if blocks_checksummed:
                with StreamDetour(reader, -size, io.SEEK_CUR):
                    xxh = xxhash(reader.read(size)).intdigest()
                chk = reader.u32()
                if chk != xxh:
                    self.log_warn(F'block {blockindex} had checksum {chk:08X} which did not match computed value {xxh:08X}')
        if content_checksummed:
            self.log_info('computing checksum')
            xxh = xxhash(output.getbuffer()).intdigest()
            chk = reader.u32()
            if chk != xxh:
                self.log_warn(F'the given checksum {chk:08X} did not match the computed checksum {xxh:08X}')
        if not reader.eof:
            pos = reader.tell()
            self.log_warn(F'found {len(data)-pos} additional bytes starting at position 0x{pos:X} after compressed data')
        return output.getbuffer()

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class lznt1 (chunk_size=4096)

This unit is implemented in refinery.units.compression.lznt1 and has the following commandline Interface:

usage: lznt1 [-h] [-L] [-Q] [-0] [-v] [-R] [-c N]

LZNT1 compression and decompression. This compression algorithm is
expected by the Win32 API routine RtlDecompressBuffer, for example.

optional arguments:
  -c, --chunk-size N  Optionally specify the chunk size for compression,
                      default is 0x1000.

generic options:
  -h, --help          Show this help message and exit.
  -L, --lenient       Allow partial results as output.
  -Q, --quiet         Disables all log output.
  -0, --devnull       Do not produce any output.
  -v, --verbose       Specify up to two times to increase log level.
  -R, --reverse       Use the reverse operation.
Expand source code Browse git
class lznt1(Unit):
    """
    LZNT1 compression and decompression. This compression algorithm is expected
    by the Win32 API routine `RtlDecompressBuffer`, for example.
    """

    def _decompress_chunk(self, chunk):
        out = B''
        while chunk:
            flags = chunk[0]
            chunk = chunk[1:]
            for i in range(8):
                if not (flags >> i & 1):
                    out += chunk[:1]
                    chunk = chunk[1:]
                else:
                    flag = struct.unpack('<H', chunk[:2])[0]
                    pos = len(out) - 1
                    l_mask = 0xFFF
                    o_shift = 12
                    while pos >= 0x10:
                        l_mask >>= 1
                        o_shift -= 1
                        pos >>= 1
                    length = (flag & l_mask) + 3
                    offset = (flag >> o_shift) + 1
                    if length >= offset:
                        tmp = out[-offset:] * (0xFFF // len(out[-offset:]) + 1)
                        out += tmp[:length]
                    else:
                        out += out[-offset:length - offset]
                    chunk = chunk[2:]
                if len(chunk) == 0:
                    break
        return out

    def _find(self, src, target, max_len):
        result_offset = 0
        result_length = 0
        for i in range(1, max_len):
            offset = src.rfind(target[:i])
            if offset == -1:
                break
            tmp_offset = len(src) - offset
            tmp_length = i
            if tmp_offset == tmp_length:
                tmp = src[offset:] * (0xFFF // len(src[offset:]) + 1)
                for j in range(i, max_len + 1):
                    offset = tmp.rfind(target[:j])
                    if offset == -1:
                        break
                    tmp_length = j
            if tmp_length > result_length:
                result_offset = tmp_offset
                result_length = tmp_length
        if result_length < 3:
            return 0, 0
        return result_offset, result_length

    def _compress_chunk(self, chunk):
        blob = copy.copy(chunk)
        out = B''
        pow2 = 0x10
        l_mask3 = 0x1002
        o_shift = 12
        while len(blob) > 0:
            bits = 0
            tmp = B''
            for i in range(8):
                bits >>= 1
                while pow2 < (len(chunk) - len(blob)):
                    pow2 <<= 1
                    l_mask3 = (l_mask3 >> 1) + 1
                    o_shift -= 1
                if len(blob) < l_mask3:
                    max_len = len(blob)
                else:
                    max_len = l_mask3
                offset1, length1 = self._find(
                    chunk[:len(chunk) - len(blob)], blob, max_len)
                # try to find more compressed pattern
                offset2, length2 = self._find(
                    chunk[:len(chunk) - len(blob) + 1], blob[1:], max_len)
                if length1 < length2:
                    length1 = 0
                if length1 > 0:
                    symbol = ((offset1 - 1) << o_shift) | (length1 - 3)
                    tmp += struct.pack('<H', symbol)
                    bits |= 0x80  # set the highest bit
                    blob = blob[length1:]
                else:
                    tmp += blob[:1]
                    blob = blob[1:]
                if len(blob) == 0:
                    break
            out += struct.pack('B', bits >> (7 - i))
            out += tmp
        return out

    def reverse(self, buf):
        out = B''
        while buf:
            chunk = buf[:self.args.chunk_size]
            compressed = self._compress_chunk(chunk)
            if len(compressed) < len(chunk):  # chunk is compressed
                flags = 0xB000
                header = struct.pack('<H', flags | (len(compressed) - 1))
                out += header + compressed
            else:
                flags = 0x3000
                header = struct.pack('<H', flags | (len(chunk) - 1))
                out += header + chunk
            buf = buf[self.args.chunk_size:]
        return out

    def process(self, data):
        out = io.BytesIO()
        offset = 0
        while offset < len(data):
            try:
                header, = struct.unpack('<H', data[offset:offset + 2])
            except struct.error as err:
                raise RefineryPartialResult(str(err), partial=out.getvalue())
            offset += 2
            size = (header & 0xFFF) + 1
            if size + 1 >= len(data):
                raise RefineryPartialResult(
                    F'chunk header indicates size {size}, but only {len(data)} bytes remain.',
                    partial=out.getvalue()
                )
            chunk = data[offset:offset + size]
            offset += size
            if header & 0x8000:
                chunk = self._decompress_chunk(chunk)
            out.write(chunk)
        return out.getvalue()

    def __init__(self, chunk_size: arg.number('-c', help='Optionally specify the chunk size for compression, default is 0x1000.') = 0x1000):
        super().__init__(chunk_size=chunk_size)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class zl (level=9, window=15, force=False, zlib_header=False, gzip_header=False)

This unit is implemented in refinery.units.compression.zl and has the following commandline Interface:

usage: zl [-h] [-L] [-Q] [-0] [-v] [-R] [-l N] [-w N] [-f] [-z | -g]

ZLib compression and decompression.

optional arguments:
  -l, --level N      Specify a compression level between 0 and 9.
  -w, --window N     Manually specify the window size between 8 and 15.
  -f, --force        Decompress as far as possible, even if all known
                     methods fail.
  -z, --zlib-header  Use a ZLIB header.
  -g, --gzip-header  Use a GZIP header.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class zl(Unit):
    """
    ZLib compression and decompression.
    """

    def __init__(
        self,
        level  : arg.number('-l', bound=(0, 0X9), help='Specify a compression level between 0 and 9.') = 9,
        window : arg.number('-w', bound=(8, 0XF), help='Manually specify the window size between 8 and 15.') = 15,
        force  : arg.switch('-f', help='Decompress as far as possible, even if all known methods fail.') = False,
        zlib_header: arg.switch('-z', group='MODE', help='Use a ZLIB header.') = False,
        gzip_header: arg.switch('-g', group='MODE', help='Use a GZIP header.') = False
    ):
        if zlib_header and gzip_header:
            raise ValueError('You can only specify one header type (ZLIB or GZIP).')
        return super().__init__(level=level, window=window, force=force, zlib_header=zlib_header, gzip_header=gzip_header)

    def _force_decompress(self, data, mode):
        z = zlib.decompressobj(mode)

        def as_many_as_possible():
            for k in range(len(data)):
                try: yield z.decompress(data[k : k + 1])
                except zlib.error: break

        return B''.join(as_many_as_possible())

    def process(self, data):
        if data[0] == 0x78 or data[0:2] == B'\x1F\x8B' or self.args.zlib_header or self.args.gzip_header:
            mode_candidates = [self.args.window | 0x20, -self.args.window, 0]
        else:
            mode_candidates = [-self.args.window, self.args.window | 0x20, 0]
        for mode in mode_candidates:
            self.log_info(F'using mode {mode:+2d} for decompression')
            try:
                z = zlib.decompressobj(mode)
                return z.decompress(data)
            except zlib.error:
                pass
        if self.args.force:
            return self._force_decompress(data, mode_candidates[0])
        raise ValueError('could not detect any zlib stream.')

    def reverse(self, data):
        mode = -self.args.window
        if self.args.zlib_header:
            mode = -mode
        if self.args.gzip_header:
            mode = -mode | 0x10
        self.log_info(F'using mode {mode:+2d} for compression')
        zl = zlib.compressobj(self.args.level, zlib.DEFLATED, mode)
        zz = zl.compress(data)
        return zz + zl.flush(zlib.Z_FINISH)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class aes (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.aes and has the following commandline Interface:

usage: aes [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE] key

AES encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CCM, CFB, CTR, EAX, ECB, GCM, OCB, OFB, OPENPGP,
                     SIV. By default, the CBC mode is used when an IV is
                     is provided, and ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class aes(StandardBlockCipherUnit, cipher=AES):
    """
    AES encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class blowfish (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.blowfish and has the following commandline Interface:

usage: blowfish [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE]
                key

Blowfish encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CFB, CTR, EAX, ECB, OFB, OPENPGP. By default,
                     the CBC mode is used when an IV is is provided, and
                     ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class blowfish(StandardBlockCipherUnit, cipher=Blowfish):
    """
    Blowfish encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class cast (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.cast and has the following commandline Interface:

usage: cast [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE] key

CAST encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CFB, CTR, EAX, ECB, OFB, OPENPGP. By default,
                     the CBC mode is used when an IV is is provided, and
                     ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class cast(StandardBlockCipherUnit, cipher=CAST):
    """
    CAST encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class chacha (key, nonce=b'REFINERY', magic=b'', offset=0, rounds=20)

This unit is implemented in refinery.units.crypto.cipher.chacha and has the following commandline Interface:

usage: chacha [-h] [-L] [-Q] [-0] [-v] [-R] [-m MAGIC] [-x N] [-r N]
              key [nonce]

ChaCha encryption and decryption. The nonce must be 8 bytes long as
currently, only the original Bernstein algorithm is implemented.

positional arguments:
  key                The encryption key.
  nonce              The nonce. Default is the string REFINERY.

optional arguments:
  -m, --magic MAGIC  The magic constant; depends on the key size by
                     default.
  -x, --offset N     Optionally specify the stream index, default is 0.
  -r, --rounds N     The number of rounds. Has to be an even number.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class chacha(LatinCipherUnit):
    """
    ChaCha encryption and decryption. The nonce must be 8 bytes long as currently, only
    the original Bernstein algorithm is implemented.
    """
    def keystream(self) -> Iterable[int]:
        yield from ChaChaCipher(
            self.args.key,
            self.args.nonce,
            self.args.magic,
            self.args.rounds,
            self.args.offset
        )

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var blocksize
var key_sizes

Inherited members

class chacha20 (key, nonce=b'REFINERY')

This unit is implemented in refinery.units.crypto.cipher.chacha and has the following commandline Interface:

usage: chacha20 [-h] [-L] [-Q] [-0] [-v] [-R] key [nonce]

ChaCha20 and XChaCha20 encryption and decryption. For ChaCha20, the IV
(nonce) must be 8 or 12 bytes long; for XChaCha20, choose an IV which is
24 bytes long. Invoking this unit for ChaCha20 is functionally equivalent
to chacha with 20 rounds, but this unit uses the PyCryptodome library C
implementation rather than the pure Python implementation used by chacha.

positional arguments:
  key            The encryption key.
  nonce          The nonce. Default is the string REFINERY.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class chacha20(LatinCipherStandardUnit, cipher=ChaCha20):
    """
    ChaCha20 and XChaCha20 encryption and decryption. For ChaCha20, the IV (nonce) must
    be 8 or 12 bytes long; for XChaCha20, choose an IV which is 24 bytes long. Invoking
    this unit for ChaCha20 is functionally equivalent to `refinery.chacha` with 20 rounds,
    but this unit uses the PyCryptodome library C implementation rather than the pure
    Python implementation used by `refinery.chacha`.
    """
    pass

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var blocksize
var key_sizes

Inherited members

class des (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.des and has the following commandline Interface:

usage: des [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE] key

DES encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CFB, CTR, EAX, ECB, OFB, OPENPGP. By default,
                     the CBC mode is used when an IV is is provided, and
                     ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class des(StandardBlockCipherUnit, cipher=DES):
    """
    DES encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class des3 (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.des3 and has the following commandline Interface:

usage: des3 [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE] key

3-DES encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CFB, CTR, EAX, ECB, OFB, OPENPGP. By default,
                     the CBC mode is used when an IV is is provided, and
                     ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class des3(StandardBlockCipherUnit, cipher=DES3):
    """
    3-DES encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class hc128 (key, stateful=False)

This unit is implemented in refinery.units.crypto.cipher.hc128 and has the following commandline Interface:

usage: hc128 [-h] [-L] [-Q] [-0] [-v] [-R] [-s] key

HC-128 encryption and decryption.

positional arguments:
  key             The encryption key.

optional arguments:
  -s, --stateful  Do not reset the key stream while processing the chunks
                  of one frame.

generic options:
  -h, --help      Show this help message and exit.
  -L, --lenient   Allow partial results as output.
  -Q, --quiet     Disables all log output.
  -0, --devnull   Do not produce any output.
  -v, --verbose   Specify up to two times to increase log level.
  -R, --reverse   Use the reverse operation.
Expand source code Browse git
class hc128(StreamCipherUnit):
    """
    HC-128 encryption and decryption.
    """
    key_sizes = 32

    def keystream(self) -> Iterable[int]:
        return hc128cipher(self.args.key)

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var key_sizes
var blocksize

Inherited members

class rabbit (key, stateful=False, iv=b'')

This unit is implemented in refinery.units.crypto.cipher.rabbit and has the following commandline Interface:

usage: rabbit [-h] [-L] [-Q] [-0] [-v] [-R] [-s] [-I IV] key

RABBIT encryption and decryption.

positional arguments:
  key             The encryption key.

optional arguments:
  -s, --stateful  Do not reset the key stream while processing the chunks
                  of one frame.
  -I, --iv IV     Optional initialization vector.

generic options:
  -h, --help      Show this help message and exit.
  -L, --lenient   Allow partial results as output.
  -Q, --quiet     Disables all log output.
  -0, --devnull   Do not produce any output.
  -v, --verbose   Specify up to two times to increase log level.
  -R, --reverse   Use the reverse operation.
Expand source code Browse git
class rabbit(StreamCipherUnit):
    """
    RABBIT encryption and decryption.
    """
    key_sizes = 16

    def __init__(self, key, stateful=False, iv: arg('-I', '--iv', help='Optional initialization vector.') = B''):
        super().__init__(key=key, iv=iv, stateful=stateful)

    def keystream(self) -> Iterable[int]:
        if len(self.args.iv) not in (0, 8):
            raise ValueError('The IV length must be exactly 8 bytes.')
        return RabbitCipher(self.args.key, self.args.iv)

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var key_sizes
var blocksize

Inherited members

class rc2 (key, iv=b'', padding=None, mode=None)

This unit is implemented in refinery.units.crypto.cipher.rc2 and has the following commandline Interface:

usage: rc2 [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [-P ALG] [-M MODE] key

RC2 encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -I, --iv IV        Specifies the initialization vector. If none is
                     specified, then a block of zero bytes is used.
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.
  -M, --mode MODE    Choose cipher mode to be used. Possible values are:
                     CBC, CFB, CTR, EAX, ECB, OFB, OPENPGP. By default,
                     the CBC mode is used when an IV is is provided, and
                     ECB otherwise.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class rc2(StandardBlockCipherUnit, cipher=ARC2):
    """
    RC2 encryption and decryption.
    """
    pass

Ancestors

Class variables

var blocksize
var key_sizes

Inherited members

class rc4 (key)

This unit is implemented in refinery.units.crypto.cipher.rc4 and has the following commandline Interface:

usage: rc4 [-h] [-L] [-Q] [-0] [-v] [-R] key

RC4 encryption and decryption.

positional arguments:
  key            The encryption key.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class rc4(StandardCipherUnit, cipher=ARC4):
    """
    RC4 encryption and decryption.
    """
    def __init__(self, key): super().__init__(key)

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var blocksize
var key_sizes

Inherited members

class rc4mod (key, stateful=False, *, size=256)

This unit is implemented in refinery.units.crypto.cipher.rc4mod and has the following commandline Interface:

usage: rc4mod [-h] [-L] [-Q] [-0] [-v] [-R] [-s] [-t N] key

Implements a modified version of the RC4 stream cipher where the size of
the RC4 SBox can be altered.

positional arguments:
  key             The encryption key.

optional arguments:
  -s, --stateful  Do not reset the key stream while processing the chunks
                  of one frame.
  -t, --size N    Table size, 256 by default.

generic options:
  -h, --help      Show this help message and exit.
  -L, --lenient   Allow partial results as output.
  -Q, --quiet     Disables all log output.
  -0, --devnull   Do not produce any output.
  -v, --verbose   Specify up to two times to increase log level.
  -R, --reverse   Use the reverse operation.
Expand source code Browse git
class rc4mod(StreamCipherUnit):
    """
    Implements a modified version of the RC4 stream cipher where the size of the RC4 SBox can be altered.
    """

    def __init__(
        self, key, stateful=False, *,
        size: arg.number('-t', help='Table size, {default} by default.', bound=(1, None)) = 0x100
    ):
        super().__init__(key=key, stateful=stateful, size=size)

    def keystream(self):
        size = self.args.size
        tablerange = range(max(size, 0x100))
        b, table = 0, bytearray(k & 0xFF for k in tablerange)
        for a, keybyte in zip(tablerange, cycle(self.args.key)):
            t = table[a]
            b = (b + keybyte + t) % size
            table[a] = table[b]
            table[b] = t
        self.log_debug(lambda: F'SBOX = {table.hex(" ").upper()}', clip=True)
        b, a = 0, 0
        while True:
            a = (a + 1) % size
            t = table[a]
            b = (b + t) % size
            table[a] = table[b]
            table[b] = t
            yield table[(table[a] + t) % size]

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var blocksize
var key_sizes

Inherited members

class rncrypt (password)

This unit is implemented in refinery.units.crypto.cipher.rncrypt and has the following commandline Interface:

usage: rncrypt [-h] [-L] [-Q] [-0] [-v] [-R] password

Implements encryption and decryption using the RNCryptor specification.
See also: https://github.com/RNCryptor

positional arguments:
  password

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class rncrypt(Unit):
    """
    Implements encryption and decryption using the RNCryptor specification.
    See also: https://github.com/RNCryptor
    """
    def __init__(self, password: bytearray):
        super().__init__(password=password)

    def process(self, data: bytes) -> bytes:
        encryption_salt = data[2:10]
        hmac_salt = data[10:18]
        iv = data[18:34]
        cipher_text = data[34:-32]
        hmac_signature = data[-32:]
        encryption_key = self._pbkdf2(self.args.password, encryption_salt)
        hmac_key = self._pbkdf2(self.args.password, hmac_salt)
        if not hmac.compare_digest(self._hmac(hmac_key, data[:-32]), hmac_signature):
            raise ValueError("Failed to verify signature.")
        return unpad(
            self._aes_decrypt(encryption_key, iv, cipher_text),
            block_size=AES.block_size
        )

    def reverse(self, data: bytes) -> bytes:
        prng = Random.new()
        data = pad(data, block_size=AES.block_size)
        encryption_salt = prng.read(8)
        encryption_key = self._pbkdf2(self.args.password, encryption_salt)
        hmac_salt = prng.read(8)
        hmac_key = self._pbkdf2(self.args.password, hmac_salt)
        iv = prng.read(AES.block_size)
        cipher_text = self._aes_encrypt(encryption_key, iv, data)
        new_data = b'\x03\x01' + encryption_salt + hmac_salt + iv + cipher_text
        return new_data + self._hmac(hmac_key, new_data)

    def _aes_encrypt(self, key, iv, text):
        return AES.new(key, AES.MODE_CBC, iv).encrypt(text)

    def _aes_decrypt(self, key, iv, text):
        return AES.new(key, AES.MODE_CBC, iv).decrypt(text)

    def _hmac(self, key, data):
        return hmac.new(key, data, hashlib.sha256).digest()

    def _prf(self, secret, salt):
        return hmac.new(secret, salt, hashlib.sha1).digest()

    def _pbkdf2(self, password, salt, iterations=10000, key_length=32):
        return KDF.PBKDF2(password, salt, dkLen=key_length, count=iterations, prf=self._prf)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class rot (amount=13)

This unit is implemented in refinery.units.crypto.cipher.rot and has the following commandline Interface:

usage: rot [-h] [-L] [-Q] [-0] [-v] [N]

Rotate the characters of the alphabet by the given amount. The default
amount is 13, providing the common (and weak) string obfuscation method.

positional arguments:
  N              Number of letters to rotate by; Default is 13.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class rot(Unit):
    """
    Rotate the characters of the alphabet by the given amount. The default
    amount is 13, providing the common (and weak) string obfuscation method.
    """

    def __init__(self, amount: arg.number(help='Number of letters to rotate by; Default is 13.') = 13):
        super().__init__(amount=amount)

    def process(self, data: bytearray):
        rot = self.args.amount % 26
        for index, byte in enumerate(data):
            for alphabet in _LCASE, _UCASE:
                if byte in alphabet:
                    zero = alphabet[0]
                    data[index] = zero + (byte - zero + rot) % 26
                    break
        return data

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class rsa (key, swapkeys=False, textbook=False, padding=PAD.AUTO, rsautl=False)

This unit is implemented in refinery.units.crypto.cipher.rsa and has the following commandline Interface:

usage: rsa [-h] [-L] [-Q] [-0] [-v] [-R] [-s] [-t | -p PAD | -r] key

Implements single block RSA encryption and decryption. This unit can be
used to encrypt and decrypt blocks generated by openssl's rsautl tool when
using the mode -verify. When it is executed with a public key for
decryption or with a private key for encryption, it will perform a raw RSA
operation. The result of these operations are (un)padded using EMSA-
PKCS1-v1_5.

positional arguments:
  key                RSA key in PEM, DER, or Microsoft BLOB format.

optional arguments:
  -s, --swapkeys     Swap public and private exponent.
  -t, --textbook     Equivalent to --padding=NONE.
  -p, --padding PAD  Choose one of the following padding modes: AUTO,
                     NONE, OAEP, PKCS15, PKCS10. The default is AUTO.
  -r, --rsautl       Act as rsautl from OpenSSH; This is equivalent to
                     --swapkeys --padding=PKCS10

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class rsa(Unit):
    """
    Implements single block RSA encryption and decryption. This unit can be used to encrypt
    and decrypt blocks generated by openssl's `rsautl` tool when using the mode `-verify`.
    When it is executed with a public key for decryption or with a private key for encryption,
    it will perform a raw RSA operation. The result of these operations are (un)padded using
    EMSA-PKCS1-v1_5.
    """
    def __init__(
        self,
        key: arg(help='RSA key in PEM, DER, or Microsoft BLOB format.'),
        swapkeys: arg.switch('-s', help='Swap public and private exponent.') = False,
        textbook: arg.switch('-t', group='PAD', help='Equivalent to --padding=NONE.') = False,
        padding : arg.option('-p', group='PAD', choices=PAD,
            help='Choose one of the following padding modes: {choices}. The default is AUTO.') = PAD.AUTO,
        rsautl  : arg.switch('-r', group='PAD',
            help='Act as rsautl from OpenSSH; This is equivalent to --swapkeys --padding=PKCS10') = False,
    ):
        padding = arg.as_option(padding, PAD)
        if textbook:
            if padding != PAD.AUTO:
                raise ValueError('Conflicting padding options!')
            padding = padding.NONE
        if rsautl:
            if padding and padding != PAD.PKCS10:
                raise ValueError('Conflicting padding options!')
            swapkeys = True
            padding = PAD.PKCS10

        super().__init__(key=key, textbook=textbook, padding=padding, swapkeys=swapkeys)

        self._key_hash = None
        self._key_data = None

    @property
    def blocksize(self) -> int:
        return self.key.size_in_bytes()

    @property
    def _blocksize_plain(self) -> int:
        # PKCS#1 v1.5 padding is at least 11 bytes.
        return self.blocksize - 11

    @property
    def pub(self):
        return self.key.d if self.args.swapkeys else self.key.e

    @property
    def prv(self):
        return self.key.e if self.args.swapkeys else self.key.d

    def _get_msg(self, data):
        msg = int.from_bytes(data, byteorder='big')
        if msg > self.key.n:
            raise ValueError(F'This key can only handle messages of size {self.blocksize}.')
        return msg

    def _encrypt_raw(self, data):
        return pow(
            self._get_msg(data),
            self.pub,
            self.key.n
        ).to_bytes(self.blocksize, byteorder='big')

    def _decrypt_raw(self, data):
        return pow(
            self._get_msg(data),
            self.prv,
            self.key.n
        ).to_bytes(self.blocksize, byteorder='big')

    def _unpad(self, data, head, padbyte=None):
        if len(data) > self.blocksize:
            raise ValueError(F'This key can only handle messages of size {self.blocksize}.')
        if data.startswith(head):
            pos = data.find(B'\0', 2)
            if pos > 0:
                pad = data[2:pos]
                if padbyte is None or all(b == padbyte for b in pad):
                    return data[pos + 1:]
        raise ValueError('Incorrect padding')

    def _pad(self, data, head, padbyte=None):
        if len(data) > self._blocksize_plain:
            raise ValueError(F'This key can only encrypt messages of size at most {self._blocksize_plain}.')
        pad = self.blocksize - len(data) - len(head) - 1
        if padbyte is not None:
            padding = pad * bytes((padbyte,))
        else:
            padding = bytearray(1)
            while not all(padding):
                padding = bytearray(filter(None, padding))
                padding.extend(get_random_bytes(pad - len(padding)))
        return head + padding + B'\0' + data

    def _unpad_pkcs10(self, data):
        return self._unpad(data, B'\x00\x01', 0xFF)

    def _unpad_pkcs15(self, data):
        return self._unpad(data, B'\x00\x02', None)

    def _pad_pkcs10(self, data):
        return self._pad(data, B'\x00\x01', 0xFF)

    def _pad_pkcs15(self, data):
        return self._pad(data, B'\x00\x02', None)

    def _decrypt_block_OAEP(self, data):
        self.log_debug('Attempting decryption with PyCrypto PKCS1 OAEP.')
        result = PKCS1_OAEP.new(self.key).decrypt(data)
        if result is not None:
            return result
        raise ValueError('OAEP decryption was unsuccessful.')

    def _encrypt_block_OAEP(self, data):
        self.log_debug('Attempting encryption with PyCrypto PKCS1 OAEP.')
        result = PKCS1_OAEP.new(self.key).encrypt(data)
        if result is None:
            return result
        raise ValueError('OAEP encryption was unsuccessful.')

    def _decrypt_block(self, data):
        if self._oaep and self._pads in {PAD.AUTO, PAD.OAEP}:
            try:
                return self._decrypt_block_OAEP(data)
            except ValueError:
                if self._pads: raise
                self.log_debug('PyCrypto primitives failed, no longer attempting OAEP.')
                self._oaep = False

        result = self._decrypt_raw(data)

        if self._pads == PAD.NONE:
            return result
        elif self._pads == PAD.PKCS10:
            return self._unpad_pkcs10(result)
        elif self._pads == PAD.PKCS15:
            return self._unpad_pkcs15(result)
        elif self._pads == PAD.AUTO:
            with suppress(ValueError):
                data = self._unpad_pkcs10(result)
                self.log_info('Detected PKCS1.0 padding.')
                self._pads = PAD.PKCS10
                return data
            with suppress(ValueError):
                data = self._unpad_pkcs15(result)
                self.log_info('Detected PKCS1.5 padding.')
                self._pads = PAD.PKCS15
                return data
            self.log_warn('No padding worked, returning raw decrypted blocks.')
            self._pads = PAD.NONE
            return result
        else:
            raise ValueError(F'Invalid padding value: {self._pads!r}')

    def _encrypt_block(self, data):
        if self._pads in {PAD.AUTO, PAD.OAEP}:
            try:
                return self._encrypt_block_OAEP(data)
            except ValueError:
                if self._pads: raise
                self.log_debug('PyCrypto primitives for OAEP failed, falling back to PKCS1.5.')
                self._pads = PAD.PKCS15

        if self._pads == PAD.PKCS15:
            data = self._pad_pkcs15(data)
        elif self._pads == PAD.PKCS10:
            data = self._pad_pkcs10(data)

        return self._encrypt_raw(data)

    @property
    def key(self) -> RSA.RsaKey:
        key_blob = self.args.key
        key_hash = hash(key_blob)
        if key_hash != self._key_hash:
            self._key_hash = key_hash
            self._key_data = normalize_rsa_key(key_blob)
        return self._key_data

    def process(self, data):
        if not self.key.has_private():
            try:
                return self._unpad_pkcs10(self._encrypt_raw(data))
            except Exception as E:
                raise ValueError('A public key was given for decryption and rsautl mode resulted in an error.') from E
        self._oaep = True
        self._pads = self.args.padding
        return B''.join(self._decrypt_block(block) for block in splitchunks(data, self.blocksize))

    def reverse(self, data):
        self._pads = self.args.padding
        return B''.join(self._encrypt_block(block) for block in splitchunks(data, self._blocksize_plain))

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Instance variables

var blocksize
Expand source code Browse git
@property
def blocksize(self) -> int:
    return self.key.size_in_bytes()
var pub
Expand source code Browse git
@property
def pub(self):
    return self.key.d if self.args.swapkeys else self.key.e
var prv
Expand source code Browse git
@property
def prv(self):
    return self.key.e if self.args.swapkeys else self.key.d
var key
Expand source code Browse git
@property
def key(self) -> RSA.RsaKey:
    key_blob = self.args.key
    key_hash = hash(key_blob)
    if key_hash != self._key_hash:
        self._key_hash = key_hash
        self._key_data = normalize_rsa_key(key_blob)
    return self._key_data

Inherited members

class rsakey (output=RSAFormat.PEM)

This unit is implemented in refinery.units.crypto.cipher.rsakey and has the following commandline Interface:

usage: rsakey [-h] [-L] [-Q] [-0] [-v] [RSAFormat]

Parse RSA keys in various formats; PEM, DER, Microsoft BLOB, and W3C-XKMS
(XML) format are supported.

positional arguments:
  RSAFormat      Select an output format (PEM, DER, XKMS, TEXT, JSON),
                 default is PEM.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class rsakey(Unit):
    """
    Parse RSA keys in various formats; PEM, DER, Microsoft BLOB, and W3C-XKMS (XML) format are supported.
    """
    def __init__(self, output: arg.option(
        choices=RSAFormat,
        help='Select an output format ({choices}), default is {default}.') = RSAFormat.PEM
    ):
        super().__init__(output=arg.as_option(output, RSAFormat))

    def _xkms_wrap(self, number: int):
        size, r = divmod(number.bit_length(), 8)
        size += int(bool(r))
        return base64.b64encode(number.to_bytes(size, 'big'))

    def process(self, data):
        key = normalize_rsa_key(data)
        out = self.args.output
        if out is RSAFormat.PEM:
            yield key.export_key('PEM')
            return
        if out is RSAFormat.DER:
            yield key.export_key('DER')
            return
        components = {
            'Modulus' : key.n,
            'Exponent': key.e,
        }
        if key.has_private():
            decoded = DerSequence()
            decoded.decode(key.export_key('DER'))
            it = itertools.islice(decoded, 3, None)
            for v in ('D', 'P', 'Q', 'DP', 'DQ', 'InverseQ'):
                try:
                    components[v] = next(it)
                except StopIteration:
                    break
        if out is RSAFormat.XKMS:
            for tag in components:
                components[tag] = base64.b64encode(number.long_to_bytes(components[tag])).decode('ascii')
            tags = '\n'.join(F'\t<{tag}>{value}</{tag}>' for tag, value in components.items())
            yield F'<RSAKeyPair>\n{tags}\n</RSAKeyPair>'.encode(self.codec)
            return
        for tag in components:
            components[tag] = F'{components[tag]:X}'
        if out is RSAFormat.JSON:
            yield json.dumps(components).encode(self.codec)
            return
        if out is RSAFormat.TEXT:
            table = list(flattened(components))
            for key, value in table:
                value = '\n'.join(F'{L}' for L in textwrap.wrap(F'0x{value}', 80))
                yield F'-- {key+" ":-<77}\n{value!s}'.encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class salsa (key, nonce=b'REFINERY', magic=b'', offset=0, rounds=20)

This unit is implemented in refinery.units.crypto.cipher.salsa and has the following commandline Interface:

usage: salsa [-h] [-L] [-Q] [-0] [-v] [-R] [-m MAGIC] [-x N] [-r N]
             key [nonce]

Salsa encryption and decryption. The nonce must be 8 bytes long.

positional arguments:
  key                The encryption key.
  nonce              The nonce. Default is the string REFINERY.

optional arguments:
  -m, --magic MAGIC  The magic constant; depends on the key size by
                     default.
  -x, --offset N     Optionally specify the stream index, default is 0.
  -r, --rounds N     The number of rounds. Has to be an even number.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class salsa(LatinCipherUnit):
    """
    Salsa encryption and decryption. The nonce must be 8 bytes long.
    """
    def keystream(self) -> Iterable[int]:
        yield from SalsaCipher(
            self.args.key,
            self.args.nonce,
            self.args.magic,
            self.args.rounds,
            self.args.offset
        )

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var blocksize
var key_sizes

Inherited members

class salsa20 (key, nonce=b'REFINERY')

This unit is implemented in refinery.units.crypto.cipher.salsa and has the following commandline Interface:

usage: salsa20 [-h] [-L] [-Q] [-0] [-v] [-R] key [nonce]

Salsa20 encryption and decryption. This unit is functionally equivalent to
salsa with 20 rounds, but it uses the PyCryptodome library C
implementation rather than the pure Python implementation used by salsa.

positional arguments:
  key            The encryption key.
  nonce          The nonce. Default is the string REFINERY.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class salsa20(LatinCipherStandardUnit, cipher=Salsa20):
    """
    Salsa20 encryption and decryption. This unit is functionally equivalent to `refinery.salsa`
    with 20 rounds, but it uses the PyCryptodome library C implementation rather than the pure
    Python implementation used by `refinery.salsa`.
    """
    pass

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var blocksize
var key_sizes

Inherited members

class seal (key, stateful=False)

This unit is implemented in refinery.units.crypto.cipher.seal and has the following commandline Interface:

usage: seal [-h] [-L] [-Q] [-0] [-v] [-R] [-s] key

SEAL encryption and decryption.

positional arguments:
  key             The encryption key.

optional arguments:
  -s, --stateful  Do not reset the key stream while processing the chunks
                  of one frame.

generic options:
  -h, --help      Show this help message and exit.
  -L, --lenient   Allow partial results as output.
  -Q, --quiet     Disables all log output.
  -0, --devnull   Do not produce any output.
  -v, --verbose   Specify up to two times to increase log level.
  -R, --reverse   Use the reverse operation.
Expand source code Browse git
class seal(StreamCipherUnit):
    """
    SEAL encryption and decryption.
    """
    key_sizes = 20

    def keystream(self) -> Iterable[bytes]:
        return SEAL_Cipher(self.args.key)

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var key_sizes
var blocksize

Inherited members

class secstr (key=b'\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10', iv=None)

This unit is implemented in refinery.units.crypto.cipher.secstr and has the following commandline Interface:

usage: secstr [-h] [-L] [-Q] [-0] [-v] [-R] [-I IV] [key]

Implements the AES-based encryption scheme used by the PowerShell commands
ConvertFrom-SecureString and ConvertTo-SecureString.

positional arguments:
  key            Secure string encryption 16-byte AES key; the default are
                 the bytes from 1 to 16.

optional arguments:
  -I, --iv IV    Optionally specify an IV to use for encryption.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class secstr(Unit):
    """
    Implements the AES-based encryption scheme used by the PowerShell commands
    `ConvertFrom-SecureString` and `ConvertTo-SecureString`.
    """

    # This is a magic header value used for PowerShell secure strings.
    _MAGIC = bytes((
        0xEF, 0xAE, 0x3D, 0xD9, 0xDD, 0x75, 0xD7, 0xAE, 0xF8, 0xDD, 0xFD, 0x38,
        0xDB, 0x7E, 0x35, 0xDD, 0xBD, 0x7A, 0xD3, 0x9D, 0x1A, 0xE7, 0x7E, 0x39))

    # Secure strings include a decimal number formatted as a string directly
    # following the header. Presumably, this is the PowerShell version.
    _PSVER = 2

    def __init__(
        self, key: arg(
            help='Secure string encryption 16-byte AES key; the default are the bytes from 1 to 16.'
        ) = bytes(range(1, 17)),
        iv: arg('-I', help='Optionally specify an IV to use for encryption.') = None
    ):
        super().__init__(key=key, iv=iv)

    @property
    def key(self):
        key = self.args.key
        if len(key) != 0x10:
            raise ValueError('The encryption key has to be 16 bytes long.')
        return key

    @property
    def iv(self):
        iv = self.args.iv
        if iv is not None and len(iv) != 0x10:
            raise ValueError('The IV has to be 16 bytes long.')
        return iv

    def reverse(self, data):
        ivec = self.iv or urandom(0x10)
        if len(ivec) != 0x10:
            raise ValueError(self._IVERR)
        cipher = AES.new(self.key, AES.MODE_CBC, ivec)
        data = data.decode('latin-1').encode('utf-16LE')
        data = cipher.encrypt(pad(data, block_size=0x10))
        data = base64.b16encode(data).lower().decode('ascii')
        ivec = base64.b64encode(ivec).decode('ascii')
        data = '|'.join(('%d' % self._PSVER, ivec, data)).encode('utf-16LE')
        return base64.b64encode(self._MAGIC + data)

    def process(self, data):
        head, ivec, data = base64.b64decode(data).split(b'|\0')
        self.log_info('head:', head.hex())
        ivec = base64.b64decode(ivec.decode('utf-16LE'))
        self.log_info('ivec:', ivec.hex())
        data = base64.b16decode(data.decode('utf-16LE'), casefold=True)
        if len(data) % 0x10 != 0:
            self.log_info('data not block-aligned, padding with zeros')
            data += B'\0' * (0x10 - len(data) % 0x10)
        cipher = AES.new(self.key, AES.MODE_CBC, ivec)
        data = cipher.decrypt(data)
        try:
            data = unpad(data, block_size=0x10)
        except Exception:
            self.log_warn('decrypted data does not have PKCS7 padding')
        for p in range(0x10):
            try:
                return data[-p:].decode('utf-16LE').encode('latin-1')
            except UnicodeDecodeError:
                pass
            except UnicodeEncodeError:
                pass
        self.log_warn('result is not a padded unicode string, key is likely wrong')
        return data

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Instance variables

var key
Expand source code Browse git
@property
def key(self):
    key = self.args.key
    if len(key) != 0x10:
        raise ValueError('The encryption key has to be 16 bytes long.')
    return key
var iv
Expand source code Browse git
@property
def iv(self):
    iv = self.args.iv
    if iv is not None and len(iv) != 0x10:
        raise ValueError('The IV has to be 16 bytes long.')
    return iv

Inherited members

class vigenere (key, alphabet='abcdefghijklmnopqrstuvwxyz', operator='add', case_sensitive=False, ignore_unknown=False)

This unit is implemented in refinery.units.crypto.cipher.vigenere and has the following commandline Interface:

usage: vigenere [-h] [-L] [-Q] [-0] [-v] [-R] [-: OP] [-c] [-i]
                key [alphabet]

Encryption and decryption using the Vigenère-Bellaso polyalphabetic
cipher.

positional arguments:
  key                   The encryption key
  alphabet              The alphabet, by default the Latin one is used:
                        "abcdefghijklmnopqrstuvwxyz"

optional arguments:
  -:, --operator OP     Choose the vigenere block operation. The default
                        is add, and the available options are: add, sub,
                        xor
  -c, --case-sensitive  Unless this option is set, the key will be case
                        insensitive. Uppercase letters from the input are
                        transformed using the same shift as would be the
                        lowercase variant, but case is retained.
  -i, --ignore-unknown  Unless this option is set, the key stream will be
                        iterated even for letters that are not contained
                        in the alphabet.

generic options:
  -h, --help            Show this help message and exit.
  -L, --lenient         Allow partial results as output.
  -Q, --quiet           Disables all log output.
  -0, --devnull         Do not produce any output.
  -v, --verbose         Specify up to two times to increase log level.
  -R, --reverse         Use the reverse operation.
Expand source code Browse git
class vigenere(Unit):
    """
    Encryption and decryption using the Vigenère-Bellaso polyalphabetic cipher.
    """

    def __init__(
        self,
        key: arg(type=str, help='The encryption key'),
        alphabet: arg(
            help='The alphabet, by default the Latin one is used: "{default}"'
        ) = 'abcdefghijklmnopqrstuvwxyz',
        operator: arg.choice('-:', choices=['add', 'sub', 'xor'], metavar='OP', help=(
            'Choose the vigenere block operation. The default is {default}, and the available options are: {choices}')) = 'add',
        case_sensitive: arg.switch('-c', help=(
            'Unless this option is set, the key will be case insensitive. Uppercase letters from the input are transformed '
            'using the same shift as would be the lowercase variant, but case is retained.')) = False,
        ignore_unknown: arg.switch('-i', help=(
            'Unless this option is set, the key stream will be iterated even '
            'for letters that are not contained in the alphabet.'
        )) = False
    ):
        if not callable(operator):
            operator = {
                'add': __add__,
                'sub': __sub__,
                'xor': __xor__,
            }.get(operator.lower(), None)
            if operator is None:
                raise ValueError(F'The value {operator!r} is not valid as an operator.')
        if not case_sensitive:
            key = key.lower()
            alphabet = alphabet.lower()
            if len(set(alphabet)) != len(alphabet):
                raise ValueError('Duplicate entries detected in alphabet.')
        if not set(key) <= set(alphabet):
            raise ValueError('key contains letters which are not from the given alphabet')
        self.superinit(super(), **vars())

    def _tabula_recta(self, data, reverse=True):
        keystream = cycle(self.args.key)
        alphabet_size = len(self.args.alphabet)
        op = self.args.operator
        if reverse:
            op = _opeator_inverse[op]
        for letter in data:
            uppercase = not self.args.case_sensitive and letter.isupper()
            if uppercase:
                letter = letter.lower()
            try:
                position = self.args.alphabet.index(letter)
            except ValueError:
                yield letter
                if not self.args.ignore_unknown:
                    next(keystream)
                continue
            shift = self.args.alphabet.index(next(keystream))
            result = self.args.alphabet[op(position, shift) % alphabet_size]
            yield result.upper() if uppercase else result

    @unicoded
    def process(self, data):
        return ''.join(self._tabula_recta(data, True))

    @unicoded
    def reverse(self, data):
        return ''.join(self._tabula_recta(data, False))

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtea (key, padding=None)

This unit is implemented in refinery.units.crypto.cipher.xtea and has the following commandline Interface:

usage: xtea [-h] [-L] [-Q] [-0] [-v] [-R] [-P ALG] key

XTEA encryption and decryption.

positional arguments:
  key                The encryption key.

optional arguments:
  -P, --padding ALG  Choose a padding algorithm (PKCS7, ISO7816, X923,
                     RAW). The RAW algorithm does nothing. By default, all
                     other algorithms are attempted. In most cases, the
                     data was not correctly decrypted if none of these
                     work.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
  -R, --reverse      Use the reverse operation.
Expand source code Browse git
class xtea(BlockCipherUnitBase):
    """
    XTEA encryption and decryption.
    """
    blocksize = 16
    key_sizes = 16

    def __init__(self, key, padding=None):
        super().__init__(key=key, padding=padding)

    @property
    def key(self):
        return struct.unpack('4I', self.args.key)

    def encrypt(self, data):
        it = iter(self._load32(data))
        return self._stor64(self._encrypt_block(y, z, *self.key) for y, z in zip(it, it))

    def decrypt(self, data):
        it = iter(self._load32(data))
        return self._stor64(self._decrypt_block(y, z, *self.key) for y, z in zip(it, it))

    @staticmethod
    def _encrypt_block(y, z, k1, k2, k3, k4):
        sum_t = 0
        delta = 0x9E3779B9
        for _ in range(32, 0, -1):
            sum_t = (sum_t + delta) & 0xFFFFFFFF
            y = y + ((z << 4) + k1 ^ z + sum_t ^ (z >> 5) + k2) & 0xFFFFFFFF
            z = z + ((y << 4) + k3 ^ y + sum_t ^ (y >> 5) + k4) & 0xFFFFFFFF
        return y + (z << 0x20)

    @staticmethod
    def _decrypt_block(y, z, k1, k2, k3, k4):
        sum_t = 0xC6EF3720
        delta = 0x9E3779B9
        for _ in range(32, 0, -1):
            z = z - ((y << 4) + k3 ^ y + sum_t ^ (y >> 5) + k4) & 0xFFFFFFFF
            y = y - ((z << 4) + k1 ^ z + sum_t ^ (z >> 5) + k2) & 0xFFFFFFFF
            sum_t = (sum_t - delta) & 0xFFFFFFFF
        return y + (z << 0x20)

    @staticmethod
    def _load32(vector):
        Q, R = divmod(len(vector), 4)
        if R > 0:
            raise ValueError('Data not padded to a 16 byte boundary.')
        yield from struct.unpack(F'{Q}I', vector)

    @staticmethod
    def _stor64(vector):
        vector = tuple(vector)
        return struct.pack(F'{len(vector)}Q', *vector)

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var blocksize
var key_sizes

Instance variables

var key
Expand source code Browse git
@property
def key(self):
    return struct.unpack('4I', self.args.key)

Inherited members

class adler32 (text=False)

This unit is implemented in refinery.units.crypto.hash.checksums and has the following commandline Interface:

usage: adler32 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the Adler32 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class adler32(HashUnit):
    """
    Returns the Adler32 Hash of the input data.
    """
    def _algorithm(self, data: bytes) -> bytes:
        return struct.pack('>I', zlib.adler32(data))

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class crc32 (text=False)

This unit is implemented in refinery.units.crypto.hash.checksums and has the following commandline Interface:

usage: crc32 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the CRC32 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class crc32(HashUnit):
    """
    Returns the CRC32 Hash of the input data.
    """
    def _algorithm(self, data: bytes) -> bytes:
        return struct.pack('>I', zlib.crc32(data))

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class blk224 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: blk224 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the BLK224 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class blk224(HashUnit):
    """
    Returns the BLK224 Hash of the input data.
    """
    def _algorithm(self, data):
        return hashlib.blake2b(data, digest_size=28)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class blk256 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: blk256 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the BLK256 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class blk256(HashUnit):
    """
    Returns the BLK256 Hash of the input data.
    """
    def _algorithm(self, data):
        return hashlib.blake2b(data, digest_size=32)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class blk384 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: blk384 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the BLK384 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class blk384(HashUnit):
    """
    Returns the BLK384 Hash of the input data.
    """
    def _algorithm(self, data):
        return hashlib.blake2b(data, digest_size=48)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class blk512 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: blk512 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the BLK512 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class blk512(HashUnit):
    """
    Returns the BLK512 Hash of the input data.
    """
    def _algorithm(self, data):
        return hashlib.blake2b(data, digest_size=64)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class md2 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: md2 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the MD5 hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class md2(HashUnit):
    """
    Returns the MD5 hash of the input data.
    """
    def _algorithm(self, data):
        from Crypto.Hash import MD2
        return MD2.new(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class md4 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: md4 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the MD5 hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class md4(HashUnit):
    """
    Returns the MD5 hash of the input data.
    """
    def _algorithm(self, data):
        from Crypto.Hash import MD4
        return MD4.new(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class md5 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: md5 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the MD5 hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class md5(HashUnit):
    """
    Returns the MD5 hash of the input data.
    """
    def _algorithm(self, data):
        return hashlib.md5(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class sha1 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: sha1 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the SHA1 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sha1(HashUnit):
    """
    Returns the SHA1 Hash of the input data.
    """
    def _algorithm(self, data):
        return hashlib.sha1(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class sha224 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: sha224 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the SHA224 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sha224(HashUnit):
    """
    Returns the SHA224 Hash of the input data.
    """
    def _algorithm(self, data):
        return hashlib.sha224(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class sha256 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: sha256 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the SHA256 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sha256(HashUnit):
    """
    Returns the SHA256 Hash of the input data.
    """
    def _algorithm(self, data):
        return hashlib.sha256(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class sha384 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: sha384 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the SHA384 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sha384(HashUnit):
    """
    Returns the SHA384 Hash of the input data.
    """
    def _algorithm(self, data):
        return hashlib.sha384(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class sha512 (text=False)

This unit is implemented in refinery.units.crypto.hash.cryptographic and has the following commandline Interface:

usage: sha512 [-h] [-L] [-Q] [-0] [-v] [-t]

Returns the SHA512 Hash of the input data.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class sha512(HashUnit):
    """
    Returns the SHA512 Hash of the input data.
    """
    def _algorithm(self, data):
        return hashlib.sha512(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class imphash (text=False)

This unit is implemented in refinery.units.crypto.hash.imphash and has the following commandline Interface:

usage: imphash [-h] [-L] [-Q] [-0] [-v] [-t]

Implements the import hash for PE files.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class imphash(HashUnit):
    """
    Implements the import hash for PE files.
    """

    def _algorithm(self, data):
        pe = PE(data=data, fast_load=True)
        pe.parse_data_directories(directories=[IMAGE_DIRECTORY_ENTRY_IMPORT])
        th = pe.get_imphash()
        if not th:
            raise ValueError('no import directory.')
        return bytes.fromhex(th)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class mmh128x32 (seed=0, text=False)

This unit is implemented in refinery.units.crypto.hash.murmur and has the following commandline Interface:

usage: mmh128x32 [-h] [-L] [-Q] [-0] [-v] [-t] [N]

Returns the 128bit Murmur Hash of the input data, 64bit variant.

positional arguments:
  N              optional seed value

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class mmh128x32(MurMurHash):
    """
    Returns the 128bit Murmur Hash of the input data, 64bit variant.
    """
    def _algorithm(self, data: bytes) -> bytes:
        return mmh128digest32(data, self.args.seed)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class mmh128x64 (seed=0, text=False)

This unit is implemented in refinery.units.crypto.hash.murmur and has the following commandline Interface:

usage: mmh128x64 [-h] [-L] [-Q] [-0] [-v] [-t] [N]

Returns the 128bit Murmur Hash of the input data, 64bit variant.

positional arguments:
  N              optional seed value

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class mmh128x64(MurMurHash):
    """
    Returns the 128bit Murmur Hash of the input data, 64bit variant.
    """
    def _algorithm(self, data: bytes) -> bytes:
        return mmh128digest64(data, self.args.seed)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class mmh32 (seed=0, text=False)

This unit is implemented in refinery.units.crypto.hash.murmur and has the following commandline Interface:

usage: mmh32 [-h] [-L] [-Q] [-0] [-v] [-t] [N]

Returns the 32bit Murmur Hashof the input data.

positional arguments:
  N              optional seed value

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class mmh32(MurMurHash):
    """
    Returns the 32bit Murmur Hashof the input data.
    """
    def _algorithm(self, data: bytes) -> bytes:
        return mmh32digest(data, self.args.seed)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xxh (text=False)

This unit is implemented in refinery.units.crypto.hash.xxhash and has the following commandline Interface:

usage: xxh [-h] [-L] [-Q] [-0] [-v] [-t]

Implements the xxHash hashing algorithm.

optional arguments:
  -t, --text     Output a hexadecimal representation of the hash.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class xxh(HashUnit):
    """
    Implements the xxHash hashing algorithm.
    """
    def _algorithm(self, data):
        return xxhash(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class CryptDeriveKey (size, hash='MD5')

This unit is implemented in refinery.units.crypto.keyderive.CryptDeriveKey and has the following commandline Interface:

usage: CryptDeriveKey [-h] [-L] [-Q] [-0] [-v] size [hash]

An implementation of the CryptDeriveKey routine available from the Win32
API.

positional arguments:
  size           The number of bytes to generate.
  hash           Specify one of these algorithms (default is MD5): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class CryptDeriveKey(KeyDerivation):
    """
    An implementation of the CryptDeriveKey routine available from the Win32 API.
    """

    def __init__(self, size, hash='MD5'):
        if size > 21:
            raise ValueError('The CryptDeriveKey specification only provides keys up to length 21.')
        super().__init__(size=size, salt=None, hash=hash)

    def process(self, data):
        def digest(x):
            return self.hash.new(x).digest()
        if self.args.hash in (HASH.SHA224, HASH.SHA256, HASH.SHA384, HASH.SHA512):
            return digest(data)[:self.args.size]
        max_size = 2 * self.hash.digest_size
        value = digest(data)
        del data
        buffer1 = bytearray([0x36] * 64)
        buffer2 = bytearray([0x5C] * 64)
        for k, b in enumerate(value):
            buffer1[k] ^= b
            buffer2[k] ^= b
        buffer = digest(buffer1) + digest(buffer2)
        if self.args.size > max_size:
            raise RefineryPartialResult(
                F'too many bytes requested, can only provide {max_size}',
                partial=buffer
            )
        return buffer[:self.args.size]

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class DESDerive (size=8)

This unit is implemented in refinery.units.crypto.keyderive.DESDerive and has the following commandline Interface:

usage: DESDerive [-h] [-L] [-Q] [-0] [-v] [size]

Implements the same functionality as DES_string_to_key in OpenSSL. It
converts a string to an 8 byte DES key with odd byte parity, per FIPS
specification. This is not a modern key derivation function.

positional arguments:
  size           The number of bytes to generate, default is the maximum
                 of 8.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class DESDerive(KeyDerivation):
    """
    Implements the same functionality as `DES_string_to_key` in OpenSSL. It
    converts a string to an 8 byte DES key with odd byte parity, per FIPS
    specification. This is not a modern key derivation function.
    """
    def __init__(self, size: arg(help='The number of bytes to generate, default is the maximum of 8.') = 8):
        super().__init__(size=size, salt=None)

    def process(self, password):
        from Crypto.Cipher import DES
        from Crypto.Util.strxor import strxor

        key = bytearray(8)

        for i, j in enumerate(password):
            if ((i % 16) < 8):
                key[i % 8] ^= (j << 1) & 0xFF
            else:
                j = (((j << 4) & 0xf0) | ((j >> 4) & 0x0f))
                j = (((j << 2) & 0xcc) | ((j >> 2) & 0x33))
                j = (((j << 1) & 0xaa) | ((j >> 1) & 0x55))
                key[7 - (i % 8)] ^= j

        des_set_odd_parity(key)

        if password:
            n = len(password)
            password = password.ljust(n + 7 - ((n - 1) % 8), b'\0')
            des = DES.new(key, DES.MODE_ECB)
            for k in range(0, n, 8):
                key[:] = des.encrypt(strxor(password[k:k + 8], key))
            des_set_odd_parity(key)

        if self.args.size > 8:
            raise RefineryPartialResult('DESDerive can provide at most 8 bytes.', partial=key)

        return key[:self.args.size]

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class PasswordDeriveBytes (size, salt, iter=100, hash='SHA1')

This unit is implemented in refinery.units.crypto.keyderive.PasswordDeriveBytes and has the following commandline Interface:

usage: PasswordDeriveBytes [-h] [-L] [-Q] [-0] [-v]
                           size salt [iter] [hash]

An implementation of the PasswordDeriveBytes routine available from the
.NET standard library. According to documentation, it is an extension of
PBKDF1.

positional arguments:
  size           The number of bytes to generate.
  salt           Salt for the derivation.
  iter           Number of iterations; default is 100.
  hash           Specify one of these algorithms (default is SHA1): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class PasswordDeriveBytes(KeyDerivation):
    """
    An implementation of the PasswordDeriveBytes routine available from the .NET
    standard library. According to documentation, it is an extension of PBKDF1.
    """
    def __init__(self, size, salt, iter=100, hash='SHA1'):
        self.superinit(super(), **vars())

    def process(self, data):
        if self.codec != 'UTF8':
            data = data.decode(self.codec).encode('UTF8')
        data += self.args.salt
        for _ in range(self.args.iter - 1):
            data = self.hash.new(data).digest()
        counter, seedhash = 1, data
        data = self.hash.new(data).digest()
        while len(data) < self.args.size:
            data += self.hash.new(B'%d%s' % (counter, seedhash)).digest()
            counter += 1
        return data[:self.args.size]

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class HKDF (size, salt, hash='SHA512')

This unit is implemented in refinery.units.crypto.keyderive.hkdf and has the following commandline Interface:

usage: HKDF [-h] [-L] [-Q] [-0] [-v] size salt [hash]

HKDF Key derivation

positional arguments:
  size           The number of bytes to generate.
  salt           Salt for the derivation.
  hash           Specify one of these algorithms (default is SHA512): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class HKDF(KeyDerivation):
    """HKDF Key derivation"""

    def __init__(self, size, salt, hash='SHA512'):
        super().__init__(size=size, salt=salt, hash=hash)

    def process(self, data):
        from Crypto.Protocol.KDF import HKDF
        return HKDF(data, self.args.size, self.args.salt, self.hash)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class hmac (salt, hash='SHA1', size=None)

This unit is implemented in refinery.units.crypto.keyderive.hmac and has the following commandline Interface:

usage: hmac [-h] [-L] [-Q] [-0] [-v] salt [hash] [size]

HMAC based key derivation

positional arguments:
  salt           Salt for the derivation.
  hash           Specify one of these algorithms (default is SHA1): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384
  size           The number of bytes to generate.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class hmac(KeyDerivation):
    """
    HMAC based key derivation
    """

    def __init__(self, salt, hash='SHA1', size=None):
        super().__init__(salt=salt, size=size, hash=hash)

    def process(self, data):
        from Crypto.Hash import HMAC
        return HMAC.new(data, self.args.salt, digestmod=self.hash).digest()

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class kblob

This unit is implemented in refinery.units.crypto.keyderive.kblob and has the following commandline Interface:

usage: kblob [-h] [-L] [-Q] [-0] [-v]

Extracts a key from a Microsoft Crypto API BLOB structure.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class kblob(Unit):
    """
    Extracts a key from a Microsoft Crypto API BLOB structure.
    """

    def process(self, data):
        blob = CRYPTOKEY(data)
        try:
            return self.labelled(
                bytes(blob.key),
                type=blob.header.type.name,
                algorithm=blob.header.algorithm.name
            )
        except AttributeError as A:
            raise ValueError(F'unable to derive key from {blob.header.type!s}') from A

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class PBKDF1 (size, salt=b'\x00\x00\x00\x00\x00\x00\x00\x00', iter=1000, hash='SHA1')

This unit is implemented in refinery.units.crypto.keyderive.pbkdf1 and has the following commandline Interface:

usage: PBKDF1 [-h] [-L] [-Q] [-0] [-v] size [salt] [iter] [hash]

PBKDF1 Key derivation

positional arguments:
  size           The number of bytes to generate.
  salt           Salt for the derivation; default are 8 null bytes.
  iter           Number of iterations; default is 1000.
  hash           Specify one of these algorithms (default is SHA1): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class PBKDF1(KeyDerivation):
    """PBKDF1 Key derivation"""

    @arg('salt', help='Salt for the derivation; default are 8 null bytes.')
    def __init__(self, size, salt=bytes(8), iter=1000, hash='SHA1'):
        self.superinit(super(), **vars())

    def process(self, data):
        from Crypto.Protocol.KDF import PBKDF1
        return PBKDF1(
            data.decode(self.codec),
            self.args.salt,
            dkLen=self.args.size,
            count=self.args.iter,
            hashAlgo=self.hash
        )

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class PBKDF2 (size, salt, iter=1000, hash='SHA1')

This unit is implemented in refinery.units.crypto.keyderive.pbkdf2 and has the following commandline Interface:

usage: PBKDF2 [-h] [-L] [-Q] [-0] [-v] size salt [iter] [hash]

PBKDF2 Key derivation

positional arguments:
  size           The number of bytes to generate.
  salt           Salt for the derivation.
  iter           Number of iterations; default is 1000.
  hash           Specify one of these algorithms (default is SHA1): MD2,
                 MD4, MD5, SHA1, SHA256, SHA512, SHA224, SHA384

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class PBKDF2(KeyDerivation):
    """PBKDF2 Key derivation"""

    def __init__(self, size, salt, iter=1000, hash='SHA1'):
        self.superinit(super(), **vars())

    def process(self, data):
        from Crypto.Protocol.KDF import PBKDF2
        return PBKDF2(
            data.decode(self.codec),
            self.args.salt,
            dkLen=self.args.size,
            hmac_hash_module=self.hash,
            count=self.args.iter
        )

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class ucrypt (size=13, salt=b'AA')

This unit is implemented in refinery.units.crypto.keyderive.unixcrypt and has the following commandline Interface:

usage: ucrypt [-h] [-L] [-Q] [-0] [-v] [size] [salt]

Implements the classic Unix crypt algorithm.

positional arguments:
  size           The number of bytes to generate, default is 13.
  salt           Salt for the derivation, the default is "AA".

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class ucrypt(KeyDerivation):
    """
    Implements the classic Unix crypt algorithm.
    """
    def __init__(
        self,
        size: arg(help='The number of bytes to generate, default is 13.') = 13,
        salt: arg(help='Salt for the derivation, the default is "AA".') = B'AA'
    ):
        super().__init__(size=size, salt=salt)

    def process(self, data):
        crypted = bytes(UnixCrypt(data, salt=self.args.salt))
        if len(crypted) < self.args.size:
            raise RefineryPartialResult(
                F'unix crypt only provided {len(crypted)} bytes, but {self.args.size} '
                F'were requested.', partial=crypted
            )
        return crypted[:self.args.size]

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class atbash

This unit is implemented in refinery.units.encoding.atbash and has the following commandline Interface:

usage: atbash [-h] [-L] [-Q] [-0] [-v] [-R]

https://en.wikipedia.org/wiki/Atbash Atbash encoding and decoding. Fairly
useless in the 21st century, except for picking out crypto nerds.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class atbash(Unit):
    """
    https://en.wikipedia.org/wiki/Atbash
    Atbash encoding and decoding. Fairly useless in the 21st century, except
    for picking out crypto nerds.
    """

    def process(self, data: bytearray):
        uc = range(B'A'[0], B'Z'[0] + 1)
        lc = range(B'a'[0], B'z'[0] + 1)
        for k, letter in enumerate(data):
            if letter in uc:
                data[k] = uc[~uc.index(letter)]
                continue
            if letter in lc:
                data[k] = lc[~lc.index(letter)]
                continue
        return data

    reverse = process

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class b32

This unit is implemented in refinery.units.encoding.b32 and has the following commandline Interface:

usage: b32 [-h] [-L] [-Q] [-0] [-v] [-R]

Base32 encoding and decoding.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class b32(Unit):
    """
    Base32 encoding and decoding.
    """
    def reverse(self, data):
        return base64.b32encode(data)

    def process(self, data):
        return base64.b32decode(data, casefold=True)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class b64 (urlsafe=False)

This unit is implemented in refinery.units.encoding.b64 and has the following commandline Interface:

usage: b64 [-h] [-L] [-Q] [-0] [-v] [-R] [-u]

Base64 encoding and decoding.

optional arguments:
  -u, --urlsafe  use URL-safe alphabet

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class b64(Unit):
    """
    Base64 encoding and decoding.
    """
    def __init__(self, urlsafe: arg.switch('-u', help='use URL-safe alphabet') = False):
        super().__init__(urlsafe=urlsafe)

    @property
    def altchars(self):
        if self.args.urlsafe:
            return B'-_'

    def reverse(self, data):
        return base64.b64encode(data, altchars=self.altchars)

    def process(self, data: bytearray):
        if not data:
            return data
        if len(data) == 1:
            raise ValueError('single byte can not be base64-decoded.')
        data.extend(B'===')
        return base64.b64decode(data, altchars=self.altchars)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Instance variables

var altchars
Expand source code Browse git
@property
def altchars(self):
    if self.args.urlsafe:
        return B'-_'

Inherited members

class b85

This unit is implemented in refinery.units.encoding.b85 and has the following commandline Interface:

usage: b85 [-h] [-L] [-Q] [-0] [-v] [-R]

Base85 encoding and decoding.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class b85(Unit):
    """
    Base85 encoding and decoding.
    """
    def reverse(self, data):
        return base64.b85encode(data)

    def process(self, data):
        return base64.b85decode(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class base (base=0, little_endian=False, alphabet=b'')

This unit is implemented in refinery.units.encoding.base and has the following commandline Interface:

usage: base [-h] [-L] [-Q] [-0] [-v] [-R] [-e] [-a STR] [base]

Encodes and decodes integers in arbitrary base.

positional arguments:
  base                 Base to be used for conversion; The value defaults
                       to the length of the alphabet if given, or 0
                       otherwise. Base 0 treats the input as a Python
                       integer literal.

optional arguments:
  -e, --little-endian  Use little endian instead byte order.
  -a, --alphabet STR   The alphabet of digits. Has to have length at least
                       equal to the chosen base. The default is:
                       0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ.

generic options:
  -h, --help           Show this help message and exit.
  -L, --lenient        Allow partial results as output.
  -Q, --quiet          Disables all log output.
  -0, --devnull        Do not produce any output.
  -v, --verbose        Specify up to two times to increase log level.
  -R, --reverse        Use the reverse operation.
Expand source code Browse git
class base(Unit):
    """
    Encodes and decodes integers in arbitrary base.
    """

    _DEFAULT_APHABET = B'0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'

    def __init__(
        self,
        base: arg.number(bound=(2, None), metavar='base', help=(
            'Base to be used for conversion; The value defaults to the length of the alphabet '
            'if given, or 0 otherwise. Base 0 treats the input as a Python integer literal.')) = 0,
        little_endian: arg('-e', help='Use little endian instead byte order.') = False,
        alphabet: arg('-a', metavar='STR', help=(
            'The alphabet of digits. Has to have length at least equal to the chosen base. '
            'The default is: 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ.')) = B'',
    ):
        if alphabet:
            if len(alphabet) < 2:
                raise ValueError('an alphabet with at least two digits is required')
            if not base:
                base = len(alphabet)
        else:
            alphabet = self._DEFAULT_APHABET
        if base and base not in range(2, len(alphabet) + 1):
            raise ValueError(F'base may only be an integer between 2 and {len(alphabet)}')
        super().__init__(base=base, little_endian=little_endian, alphabet=alphabet)

    @property
    def byteorder(self):
        return 'little' if self.args.little_endian else 'big'

    def reverse(self, data):
        self.log_info('using byte order', self.byteorder)
        number = int.from_bytes(data, byteorder=self.byteorder)

        if number == 0:
            return B'0'
        if self.args.base == 0:
            return B'0x%X' % number
        if self.args.base > len(self.args.alphabet):
            raise ValueError(
                F'Only {len(self.args.alphabet)} available; not enough to '
                F'encode base {self.args.base}'
            )

        def reverse_result(number):
            while number:
                yield self.args.alphabet[number % self.args.base]
                number //= self.args.base

        return bytes(reversed(tuple(reverse_result(number))))

    def process(self, data):
        data = data.strip()
        base = self.args.base
        defaults = self._DEFAULT_APHABET[:base]
        alphabet = self.args.alphabet[:base]
        if len(alphabet) == len(defaults):
            if alphabet != defaults:
                self.log_info('translating input data to a default alphabet for faster conversion')
                data = data.translate(bytes.maketrans(alphabet, defaults))
            result = int(data, self.args.base)
        else:
            self.log_warn('very long alphabet, unable to use built-ins; reverting to (slow) fallback.')
            result = 0
            alphabet = {digit: k for k, digit in enumerate(alphabet)}
            for digit in data:
                result *= base
                result += alphabet[digit]
        size, rest = divmod(result.bit_length(), 8)
        size += int(bool(rest))
        return result.to_bytes(size, byteorder=self.byteorder)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Instance variables

var byteorder
Expand source code Browse git
@property
def byteorder(self):
    return 'little' if self.args.little_endian else 'big'

Inherited members

class cp1252

This unit is implemented in refinery.units.encoding.cp1252 and has the following commandline Interface:

usage: cp1252 [-h] [-L] [-Q] [-0] [-v] [-R]

Encodes and decodes Windows CP 1252 (aka Latin1) encoded string data.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class cp1252(Unit):
    """
    Encodes and decodes Windows CP 1252 (aka Latin1) encoded string data.
    """

    def process(self, data):
        return data.decode(self.codec).encode('cp1252')

    def reverse(self, data):
        return data.decode('cp1252').encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class esc (hex=False, unicode=False, greedy=False, quoted=False, bare=False, expand=False)

This unit is implemented in refinery.units.encoding.esc and has the following commandline Interface:

usage: esc [-h] [-L] [-Q] [-0] [-v] [-R] [-x] [-u] [-g] [-q] [-b] [-p]

Encodes and decodes common ASCII escape sequences.

optional arguments:
  -x, --hex      Hex encode everything, do not use C escape sequences.
  -u, --unicode  Use unicode escape sequences and UTF-8 encoding.
  -g, --greedy   Replace \x by x and \u by u when not followed by two or
                 four hex digits, respectively.
  -q, --quoted   Remove enclosing quotes while decoding and add them for
                 encoding.
  -b, --bare     Do not escape quote characters.
  -p, --expand   Decode sequences of the form \uHHLL as two bytes when the
                 upper byte is nonzero.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class esc(Unit):
    """
    Encodes and decodes common ASCII escape sequences.
    """
    _ESCAPE = {
        0x00: BR'\0',
        0x07: BR'\a',
        0x08: BR'\b',
        0x0C: BR'\f',
        0x0A: BR'\n',
        0x0D: BR'\r',
        0x09: BR'\t',
        0x0B: BR'\v',
        0x5C: BR'\\',
        0x27: BR'\'',
        0x22: BR'\"'
    }
    _UNESCAPE = {
        BR'0': B'\x00',
        BR'a': B'\x07',
        BR'b': B'\x08',
        BR'f': B'\x0C',
        BR'n': B'\x0A',
        BR'r': B'\x0D',
        BR't': B'\x09',
        BR'v': B'\x0B',
        B'\\': B'\x5C',
        BR"'": B'\x27',
        BR'"': B'\x22'
    }

    def __init__(self,
        hex     : arg.switch('-x', help='Hex encode everything, do not use C escape sequences.') = False,
        unicode : arg.switch('-u', help='Use unicode escape sequences and UTF-8 encoding.') = False,
        greedy  : arg.switch('-g', help='Replace \\x by x and \\u by u when not followed by two or four hex digits, respectively.') = False,
        quoted  : arg.switch('-q', help='Remove enclosing quotes while decoding and add them for encoding.') = False,
        bare    : arg.switch('-b', help='Do not escape quote characters.') = False,
        expand  : arg.switch('-p', help='Decode sequences of the form \\uHHLL as two bytes when the upper byte is nonzero.') = False,
    ) -> Unit: pass  # noqa

    def process(self, data):
        if self.args.quoted:
            quote = data[0]
            if data[~0] != quote:
                raise ValueError('string is not correctly quoted')
            data = data[1:~0]

        if self.args.unicode:
            return data.decode('UNICODE_ESCAPE').encode(self.codec)

        def unescape(match):
            c = match[1]
            if len(c) > 1:
                if c[0] == 0x75:
                    # unicode
                    upper = int(c[1:3], 16)
                    lower = int(c[3:5], 16)
                    if self.args.expand:
                        return bytes((upper, lower))
                    return bytes((lower,))
                elif c[0] == 0x78:
                    # hexadecimal
                    return bytes((int(c[1:3], 16),))
                else:
                    # octal escape sequence
                    return bytes((int(c, 8) & 0xFF,))
            elif c in B'ux':
                return c if self.args.greedy else match[0]
            return self._UNESCAPE.get(c, c)
        data = re.sub(
            RB'\\(u[a-fA-F0-9]{4}|x[a-fA-F0-9]{2}|[0-7]{3}|.)', unescape, data)
        return data

    def reverse(self, data):
        if self.args.unicode:
            string = data.decode(self.codec).encode('UNICODE_ESCAPE')
        else:
            if not self.args.hex:
                def escape(match):
                    c = match[0][0]
                    return self._ESCAPE.get(c, RB'\x%02x' % c)
                pattern = RB'[\x00-\x1F\x22\x27\x5C\x7F-\xFF]'
                if self.args.bare:
                    pattern = RB'[\x00-\x1F\x5C\x7F-\xFF]'
                string = re.sub(pattern, escape, data)
            else:
                string = bytearray(4 * len(data))
                for k in range(len(data)):
                    a = k * 4
                    b = k * 4 + 4
                    string[a:b] = RB'\x%02x' % data[k]
        if self.args.quoted:
            string = B'"%s"' % string
        return string

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class hex

This unit is implemented in refinery.units.encoding.hex and has the following commandline Interface:

usage: hex [-h] [-L] [-Q] [-0] [-v] [-R]

Hex-decodes and encodes binary data. Non hex characters are removed from
the input. For decoding, any odd trailing hex digits are stripped as two
hex digits are required to represent a byte.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class hex(Unit):
    """
    Hex-decodes and encodes binary data. Non hex characters are removed from
    the input. For decoding, any odd trailing hex digits are stripped as two
    hex digits are required to represent a byte.
    """

    def reverse(self, data):
        import base64
        return base64.b16encode(data)

    def process(self, data):
        import re
        import base64
        data = re.sub(B'[^A-Fa-f0-9]+', B'', data)
        if len(data) % 2:
            data = data[:-1]
        return base64.b16decode(data, casefold=True)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class htmlesc

This unit is implemented in refinery.units.encoding.htmlesc and has the following commandline Interface:

usage: htmlesc [-h] [-L] [-Q] [-0] [-v] [-R]

Encodes and decodes HTML entities.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class htmlesc(Unit):
    """
    Encodes and decodes HTML entities.
    """

    @unicoded
    def process(self, data: str) -> str:
        return html_entities.unescape(data)

    @unicoded
    def reverse(self, data: str) -> str:
        return html_entities.escape(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class netbios (key=b'A')

This unit is implemented in refinery.units.encoding.netbios and has the following commandline Interface:

usage: netbios [-h] [-L] [-Q] [-0] [-v] [-R] [key]

Encodes and decodes strings using the same algorithm that is used for
NetBIOS labels. Each byte 0xUL is encoded as two bytes, which are the sum
of 0xU and 0xL with an offset character, respectively. The default offset
is the capital letter A.

positional arguments:
  key            Provide a single letter to use as the offset.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class netbios(Unit):
    """
    Encodes and decodes strings using the same algorithm that is used for NetBIOS
    labels. Each byte 0xUL is encoded as two bytes, which are the sum of 0xU and
    0xL with an offset character, respectively. The default offset is the capital
    letter A.
    """

    def __init__(self, key: arg(help="Provide a single letter to use as the offset.") = B'A'):
        if len(key) != 1:
            raise ValueError("The key must be a binary string of length exactly 1")
        super().__init__(key=key[0])

    def reverse(self, data):
        result = bytearray(2 * len(data))
        for k, byte in enumerate(data):
            hi, lo = byte >> 4, byte & 15
            result[2 * k + 0] = hi + self.args.key
            result[2 * k + 1] = lo + self.args.key
        return result

    def process(self, data):
        def merge(it):
            while True:
                try:
                    hi = next(it) - self.args.key
                    lo = next(it) - self.args.key
                    if hi not in range(16) or lo not in range(16):
                        raise ValueError(F'Invalid character encoding detected: hi={hi:X}, lo={lo:X}.')
                    yield (hi << 4) | lo
                except StopIteration:
                    break
        return bytearray(merge(iter(data)))

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class ps1str

This unit is implemented in refinery.units.encoding.ps1str and has the following commandline Interface:

usage: ps1str [-h] [-L] [-Q] [-0] [-v] [-R]

Escapes and unescapes PowerShell strings.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class ps1str(Unit):
    """
    Escapes and unescapes PowerShell strings.
    """
    UNESCAPE = {
        '`0': '\0',
        '`a': '\a',
        '`b': '\b',
        '`f': '\f',
        '`n': '\n',
        '`r': '\r',
        '`t': '\t',
        '`v': '\v',
        '``': '`',
        "`'": '\'',
        '`"': '\"',
    }
    ESCAPE = {
        '`' : '``',
        '$' : '`$',
        '\0': '`0',
        '\a': '`a',
        '\b': '`b',
        '\f': '`f',
        '\n': '`n',
        '\r': '`r',
        '\t': '`t',
        '\v': '`v',
        '\'': "`'",
        '\"': '""',
    }

    def __init__(self): pass

    @unicoded
    def process(self, data):
        match = re.fullmatch(R'''@(['"])\s*\n(.*?)\n\s*\1@''', data)
        if match:
            return match.group(2)
        if data[0] not in ''''"''' or data[-1] != data[0]:
            raise ValueError(
                'No quotes found at beginning of input. To escape a PowerShell string, the '
                'quotes must be included because quote escaping depends on whether a single '
                'or double quote was used.')

        quote, data = data[0], data[1:-1]

        def unescape(match):
            string = match[0]
            return self.UNESCAPE.get(string, string[1:])

        if quote == '"':
            if re.search(R'(?<!`)\$(?=[\w\(\{\$\?\^:])', data):
                self.log_warn('Loss of information: double quoted string contains variable substitutions.')
            data = re.sub('`.', unescape, data)

        return data.replace(quote + quote, quote)

    @unicoded
    def reverse(self, data):
        def escaper(match):
            char = match[0]
            return ps1str.ESCAPE.get(char, char)
        return '"{}"'.format(re.sub(R'''[\x00\x07-\x0D`$'"]''', escaper, data))

Ancestors

Class variables

var optional_dependencies
var required_dependencies
var UNESCAPE
var ESCAPE

Inherited members

class recode (decode=None, encode='UTF8', decerr=None, encerr=None, errors=None)

This unit is implemented in refinery.units.encoding.recode and has the following commandline Interface:

usage: recode [-h] [-L] [-Q] [-0] [-v] [-R] [-d Handler] [-e Handler]
              [-E Handler]
              [decode-as] [encode-as]

Expects input string data encoded in the from encoding and encodes it in
the to encoding, then outputs the result.

positional arguments:
  decode-as             Input encoding; Guess encoding by default.
  encode-as             Output encoding; The default is UTF8.

optional arguments:
  -d, --decerr Handler  Specify an error handler for decoding.
  -e, --encerr Handler  Specify an error handler for encoding.
  -E, --errors Handler  Specify an error handler for both encoding and
                        decoding. The possible choices are the following:
                        STRICT, IGNORE, REPLACE, XMLREF, BACKSLASH,
                        SURROGATE

generic options:
  -h, --help            Show this help message and exit.
  -L, --lenient         Allow partial results as output.
  -Q, --quiet           Disables all log output.
  -0, --devnull         Do not produce any output.
  -v, --verbose         Specify up to two times to increase log level.
  -R, --reverse         Use the reverse operation.
Expand source code Browse git
class recode(Unit):
    """
    Expects input string data encoded in the `from` encoding and encodes it in
    the `to` encoding, then outputs the result.
    """

    def __init__(
        self,
        decode: arg(metavar='decode-as', type=str, help='Input encoding; Guess encoding by default.') = None,
        encode: arg(metavar='encode-as', type=str, help=F'Output encoding; The default is {Unit.codec}.') = Unit.codec,
        decerr: arg.option('-d', choices=Handler,
            help='Specify an error handler for decoding.') = None,
        encerr: arg.option('-e', choices=Handler,
            help='Specify an error handler for encoding.') = None,
        errors: arg.option('-E', choices=Handler, help=(
            'Specify an error handler for both encoding and decoding. '
            'The possible choices are the following: {choices}')) = None,
    ):
        super().__init__(
            decode=decode,
            encode=encode,
            decerr=arg.as_option(decerr or errors or 'STRICT', Handler).value,
            encerr=arg.as_option(encerr or errors or 'STRICT', Handler).value
        )

    @Unit.Requires('chardet', optional=False)
    def _chardet():
        import chardet
        return chardet

    def _detect(self, data):
        mv = memoryview(data)
        if not any(mv[1::2]): return 'utf-16le'
        if not any(mv[0::2]): return 'utf-16be'
        detection = self._chardet.detect(data)
        codec = detection['encoding']
        self.log_info(lambda: F'Using input encoding: {codec}, detected with {int(detection["confidence"]*100)}% confidence.')
        return codec

    def _recode(self, enc, dec, encerr, decerr, data):
        dec = dec or self._detect(data)
        return codecs.encode(codecs.decode(data, dec, errors=decerr), enc, errors=encerr)

    def reverse(self, data):
        return self._recode(self.args.decode, self.args.encode, self.args.decerr, self.args.encerr, data)

    def process(self, data):
        return self._recode(self.args.encode, self.args.decode, self.args.encerr, self.args.decerr, data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class u16

This unit is implemented in refinery.units.encoding.u16 and has the following commandline Interface:

usage: u16 [-h] [-L] [-Q] [-0] [-v] [-R]

Encodes and decodes UTF-16LE encoded string data.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class u16(Unit):
    """
    Encodes and decodes UTF-16LE encoded string data.
    """

    def reverse(self, data):
        return data.decode(self.codec).encode('utf-16LE')

    def process(self, data):
        return data.decode('utf-16LE').encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class url (plus=False, hex=False)

This unit is implemented in refinery.units.encoding.url and has the following commandline Interface:

usage: url [-h] [-L] [-Q] [-0] [-v] [-R] [-p] [-x]

Decodes and encodes URL-Encoding, which preserves only alphanumeric
characters and the symbols _, ., -, ~, \, and /. Every other character is
escaped by hex-encoding it and prefixing it with a percent symbol.

optional arguments:
  -p, --plus     also replace plus signs by spaces
  -x, --hex      hex encode every character in reverse mode

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class url(Unit):
    """
    Decodes and encodes URL-Encoding, which preserves only alphanumeric characters and the symbols `_`, `.`, `-`, `~`, `\\`, and `/`.
    Every other character is escaped by hex-encoding it and prefixing it with a percent symbol.
    """

    def __init__(
        self,
        plus: arg.switch('-p', help='also replace plus signs by spaces') = False,
        hex : arg.switch('-x', help='hex encode every character in reverse mode') = False
    ):
        super().__init__(plus=plus, hex=hex)

    def process(self, data):
        data = re.sub(
            B'\\%([0-9a-fA-F]{2})',
            lambda m: bytes((int(m[1], 16),)),
            data
        )
        if self.args.plus:
            data = data.replace(B'+', B' ')
        return data

    def reverse(self, data):
        if self.args.plus:
            data = data.replace(B' ', B'+')
        if not self.args.hex:
            return re.sub(B'[^a-zA-Z0-9_.-~\\/]', lambda m: B'%%%02X' % ord(m[0]), data)
        result = bytearray(len(data) * 3)
        offset = 0
        for byte in data:
            result[offset] = B'%'[0]
            offset += 1
            result[offset:offset + 2] = B'%02X' % byte
            offset += 2
        return result

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class uuenc

This unit is implemented in refinery.units.encoding.uuenc and has the following commandline Interface:

usage: uuenc [-h] [-L] [-Q] [-0] [-v] [-R]

Unit for uuencode.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class uuenc(Unit):
    """
    Unit for uuencode.
    """
    def process(self, data):
        with MemoryFile(data) as stream:
            with MemoryFile() as output:
                uu.decode(stream, output, quiet=True)
                return output.getvalue()

    def reverse(self, data):
        meta = metavars(data)
        path = meta.get('path', None)
        name = path and pathlib.Path(path).name
        with MemoryFile(data) as stream:
            with MemoryFile() as output:
                uu.encode(stream, output, name, backtick=True)
                return output.getvalue()

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class wshenc (marker=True)

This unit is implemented in refinery.units.encoding.wshenc and has the following commandline Interface:

usage: wshenc [-h] [-L] [-Q] [-0] [-v] [-R] [-m]

Windows Scripting Host encoding and decoding of VBScript (VBS/VBE) and
JScript (JS/JSE).

optional arguments:
  -m, --no-marker  Do not require magic marker when encoding and do not
                   search for marker when decoding.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
  -R, --reverse    Use the reverse operation.
Expand source code Browse git
class wshenc(Unit):
    """
    Windows Scripting Host encoding and decoding of VBScript (VBS/VBE) and JScript (JS/JSE).
    """

    _MARKER_INIT = RB'#@~^BINREF=='
    _MARKER_STOP = RB'BINREF==^#~@'

    _CHUNKS = (
        0x57, 0x6E, 0x7B, 0x4A, 0x4C, 0x41, 0x0B, 0x0B, 0x0B, 0x0C, 0x0C, 0x0C, 0x4A, 0x4C, 0x41,
        0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x12, 0x12, 0x12,
        0x13, 0x13, 0x13, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17,
        0x18, 0x18, 0x18, 0x19, 0x19, 0x19, 0x1A, 0x1A, 0x1A, 0x1B, 0x1B, 0x1B, 0x1C, 0x1C, 0x1C,
        0x1D, 0x1D, 0x1D, 0x1E, 0x1E, 0x1E, 0x1F, 0x1F, 0x1F, 0x2E, 0x2D, 0x32, 0x47, 0x75, 0x30,
        0x7A, 0x52, 0x21, 0x56, 0x60, 0x29, 0x42, 0x71, 0x5B, 0x6A, 0x5E, 0x38, 0x2F, 0x49, 0x33,
        0x26, 0x5C, 0x3D, 0x49, 0x62, 0x58, 0x41, 0x7D, 0x3A, 0x34, 0x29, 0x35, 0x32, 0x36, 0x65,
        0x5B, 0x20, 0x39, 0x76, 0x7C, 0x5C, 0x72, 0x7A, 0x56, 0x43, 0x7F, 0x73, 0x38, 0x6B, 0x66,
        0x39, 0x63, 0x4E, 0x70, 0x33, 0x45, 0x45, 0x2B, 0x6B, 0x68, 0x68, 0x62, 0x71, 0x51, 0x59,
        0x4F, 0x66, 0x78, 0x09, 0x76, 0x5E, 0x62, 0x31, 0x7D, 0x44, 0x64, 0x4A, 0x23, 0x54, 0x6D,
        0x75, 0x43, 0x71, 0x4A, 0x4C, 0x41, 0x7E, 0x3A, 0x60, 0x4A, 0x4C, 0x41, 0x5E, 0x7E, 0x53,
        0x40, 0x4C, 0x40, 0x77, 0x45, 0x42, 0x4A, 0x2C, 0x27, 0x61, 0x2A, 0x48, 0x5D, 0x74, 0x72,
        0x22, 0x27, 0x75, 0x4B, 0x37, 0x31, 0x6F, 0x44, 0x37, 0x4E, 0x79, 0x4D, 0x3B, 0x59, 0x52,
        0x4C, 0x2F, 0x22, 0x50, 0x6F, 0x54, 0x67, 0x26, 0x6A, 0x2A, 0x72, 0x47, 0x7D, 0x6A, 0x64,
        0x74, 0x39, 0x2D, 0x54, 0x7B, 0x20, 0x2B, 0x3F, 0x7F, 0x2D, 0x38, 0x2E, 0x2C, 0x77, 0x4C,
        0x30, 0x67, 0x5D, 0x6E, 0x53, 0x7E, 0x6B, 0x47, 0x6C, 0x66, 0x34, 0x6F, 0x35, 0x78, 0x79,
        0x25, 0x5D, 0x74, 0x21, 0x30, 0x43, 0x64, 0x23, 0x26, 0x4D, 0x5A, 0x76, 0x52, 0x5B, 0x25,
        0x63, 0x6C, 0x24, 0x3F, 0x48, 0x2B, 0x7B, 0x55, 0x28, 0x78, 0x70, 0x23, 0x29, 0x69, 0x41,
        0x28, 0x2E, 0x34, 0x73, 0x4C, 0x09, 0x59, 0x21, 0x2A, 0x33, 0x24, 0x44, 0x7F, 0x4E, 0x3F,
        0x6D, 0x50, 0x77, 0x55, 0x09, 0x3B, 0x53, 0x56, 0x55, 0x7C, 0x73, 0x69, 0x3A, 0x35, 0x61,
        0x5F, 0x61, 0x63, 0x65, 0x4B, 0x50, 0x46, 0x58, 0x67, 0x58, 0x3B, 0x51, 0x31, 0x57, 0x49,
        0x69, 0x22, 0x4F, 0x6C, 0x6D, 0x46, 0x5A, 0x4D, 0x68, 0x48, 0x25, 0x7C, 0x27, 0x28, 0x36,
        0x5C, 0x46, 0x70, 0x3D, 0x4A, 0x6E, 0x24, 0x32, 0x7A, 0x79, 0x41, 0x2F, 0x37, 0x3D, 0x5F,
        0x60, 0x5F, 0x4B, 0x51, 0x4F, 0x5A, 0x20, 0x42, 0x2C, 0x36, 0x65, 0x57)
    _OFFSETS = (
        0, 1, 2, 0, 1, 2, 1, 2, 2, 1, 2, 1, 0, 2, 1, 2, 0, 2, 1, 2, 0, 0, 1, 2, 2, 1, 0, 2, 1, 2, 2, 1,
        0, 0, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 2, 0, 2, 1, 0, 2, 1, 2, 0, 0, 1, 2, 2, 0, 0, 1, 2, 0, 2, 1)
    _ENCODER = {
        0x09 : [0x37, 0x69, 0x64], 0x0B : [0x0B, 0x0B, 0x0B], 0x0C : [0x0C, 0x0C, 0x0C],
        0x0E : [0x0E, 0x0E, 0x0E], 0x0F : [0x0F, 0x0F, 0x0F], 0x10 : [0x10, 0x10, 0x10],
        0x11 : [0x11, 0x11, 0x11], 0x12 : [0x12, 0x12, 0x12], 0x13 : [0x13, 0x13, 0x13],
        0x14 : [0x14, 0x14, 0x14], 0x15 : [0x15, 0x15, 0x15], 0x16 : [0x16, 0x16, 0x16],
        0x17 : [0x17, 0x17, 0x17], 0x18 : [0x18, 0x18, 0x18], 0x19 : [0x19, 0x19, 0x19],
        0x1A : [0x1A, 0x1A, 0x1A], 0x1B : [0x1B, 0x1B, 0x1B], 0x1C : [0x1C, 0x1C, 0x1C],
        0x1D : [0x1D, 0x1D, 0x1D], 0x1E : [0x1E, 0x1E, 0x1E], 0x1F : [0x1F, 0x1F, 0x1F],
        0x20 : [0x7E, 0x2C, 0x50], 0x21 : [0x5A, 0x65, 0x22], 0x22 : [0x45, 0x72, 0x4A],
        0x23 : [0x3A, 0x5B, 0x61], 0x24 : [0x79, 0x66, 0x5E], 0x25 : [0x59, 0x75, 0x5D],
        0x26 : [0x27, 0x4C, 0x5B], 0x27 : [0x76, 0x45, 0x42], 0x28 : [0x63, 0x76, 0x60],
        0x29 : [0x62, 0x2A, 0x23], 0x2A : [0x4D, 0x43, 0x65], 0x2B : [0x51, 0x33, 0x5F],
        0x2C : [0x53, 0x42, 0x7E], 0x2D : [0x52, 0x20, 0x4F], 0x2E : [0x20, 0x63, 0x52],
        0x2F : [0x26, 0x4A, 0x7A], 0x30 : [0x54, 0x5A, 0x21], 0x31 : [0x71, 0x38, 0x46],
        0x32 : [0x2B, 0x79, 0x20], 0x33 : [0x66, 0x32, 0x26], 0x34 : [0x2A, 0x57, 0x63],
        0x35 : [0x58, 0x6C, 0x2A], 0x36 : [0x7F, 0x2B, 0x76], 0x37 : [0x7B, 0x46, 0x47],
        0x38 : [0x30, 0x52, 0x25], 0x39 : [0x31, 0x4F, 0x2C], 0x3A : [0x6C, 0x3D, 0x29],
        0x3B : [0x49, 0x70, 0x69], 0x3D : [0x78, 0x7B, 0x27], 0x3F : [0x5F, 0x51, 0x67],
        0x40 : [0x40, None, 0x40], 0x41 : [0x29, 0x7A, 0x62], 0x42 : [0x24, 0x7E, 0x41],
        0x43 : [0x2F, 0x3B, 0x5A], 0x44 : [0x39, 0x47, 0x66], 0x45 : [0x33, 0x41, 0x32],
        0x46 : [0x6F, 0x77, 0x73], 0x47 : [0x21, 0x56, 0x4D], 0x48 : [0x75, 0x5F, 0x43],
        0x49 : [0x28, 0x26, 0x71], 0x4A : [0x42, 0x78, 0x39], 0x4B : [0x46, 0x6E, 0x7C],
        0x4C : [0x4A, 0x64, 0x53], 0x4D : [0x5C, 0x74, 0x48], 0x4E : [0x48, 0x67, 0x31],
        0x4F : [0x36, 0x7D, 0x72], 0x50 : [0x4B, 0x68, 0x6E], 0x51 : [0x7D, 0x35, 0x70],
        0x52 : [0x5D, 0x22, 0x49], 0x53 : [0x6A, 0x55, 0x3F], 0x54 : [0x50, 0x3A, 0x4B],
        0x55 : [0x69, 0x60, 0x6A], 0x56 : [0x23, 0x6A, 0x2E], 0x57 : [0x09, 0x71, 0x7F],
        0x58 : [0x70, 0x6F, 0x28], 0x59 : [0x65, 0x49, 0x35], 0x5A : [0x74, 0x5C, 0x7D],
        0x5B : [0x2C, 0x5D, 0x24], 0x5C : [0x77, 0x27, 0x2D], 0x5D : [0x44, 0x59, 0x54],
        0x5E : [0x3F, 0x25, 0x37], 0x5F : [0x6D, 0x7C, 0x7B], 0x60 : [0x7C, 0x23, 0x3D],
        0x61 : [0x43, 0x6D, 0x6C], 0x62 : [0x38, 0x28, 0x34], 0x63 : [0x5E, 0x31, 0x6D],
        0x64 : [0x5B, 0x39, 0x4E], 0x65 : [0x6E, 0x7F, 0x2B], 0x66 : [0x57, 0x36, 0x30],
        0x67 : [0x4C, 0x54, 0x6F], 0x68 : [0x34, 0x34, 0x74], 0x69 : [0x72, 0x62, 0x6B],
        0x6A : [0x25, 0x4E, 0x4C], 0x6B : [0x56, 0x30, 0x33], 0x6C : [0x73, 0x5E, 0x56],
        0x6D : [0x68, 0x73, 0x3A], 0x6E : [0x55, 0x09, 0x78], 0x6F : [0x47, 0x4B, 0x57],
        0x70 : [0x32, 0x61, 0x77], 0x71 : [0x35, 0x24, 0x3B], 0x72 : [0x2E, 0x4D, 0x44],
        0x73 : [0x64, 0x6B, 0x2F], 0x74 : [0x4F, 0x44, 0x59], 0x75 : [0x3B, 0x21, 0x45],
        0x76 : [0x2D, 0x37, 0x5C], 0x77 : [0x41, 0x53, 0x68], 0x78 : [0x61, 0x58, 0x36],
        0x79 : [0x7A, 0x48, 0x58], 0x7A : [0x22, 0x2E, 0x79], 0x7B : [0x60, 0x50, 0x09],
        0x7C : [0x6B, 0x2D, 0x75], 0x7D : [0x4E, 0x29, 0x38], 0x7E : [0x3D, 0x3F, 0x55],
        0x7F : [0x67, 0x2F, 0x51]
    }

    _ESCAPE = {
        0x40: B'@$',
        0x3C: B'@!',
        0x3E: B'@*',
        0x0D: B'@#',
        0x0A: B'@&',
    }

    _UNESCAPE = {
        B'@$': B'@',
        B'@!': B'<',
        B'@*': B'>',
        B'@#': B'\r',
        B'@&': B'\n',
    }

    def __init__(
        self,
        marker: arg.switch('-m', '--no-marker', off=True, help=(
            'Do not require magic marker when encoding and do not search for '
            'marker when decoding.')
        ) = True
    ):
        super().__init__(marker=marker)

    @classmethod
    def _chunk(cls, byte, index):
        k = byte - 9
        c = cls._CHUNKS[k * 3 : k * 3 + 3]
        return c[cls._OFFSETS[index % 64]]

    def _escape(self, iterable):
        escapes = bytes(self._ESCAPE)
        if self.args.marker:
            yield from self._MARKER_INIT
        for byte in iterable:
            if byte in escapes:
                yield from self._ESCAPE[byte]
            else:
                yield byte
        if self.args.marker:
            yield from self._MARKER_STOP

    def _unescape(self, data):
        def unescaper(m): return self._UNESCAPE[m[0]]
        return re.sub(RB'@[$!*#&]', unescaper, data)

    @classmethod
    def _decoded(cls, data):
        index = -1
        for byte in data:
            if byte < 128:
                index += 1
            if (byte == 9 or 31 < byte < 128) and byte != 60 and byte != 62 and byte != 64:
                byte = cls._chunk(byte, index)
            yield byte

    @classmethod
    def _encoded(cls, data):
        for i, byte in enumerate(data):
            try:
                sequence = cls._ENCODER[byte]
            except KeyError:
                yield byte
            else:
                offset = cls._OFFSETS[i % 0x40]
                yield sequence[offset]

    def reverse(self, data):
        return bytearray(self._escape(self._encoded(data)))

    def process(self, data):
        if self.args.marker:
            match = formats.wshenc.search(data)
            if not match:
                raise ValueError('Encoded script marker was not found.')
            data = match[0][12:-12]
        return bytearray(self._decoded(self._unescape(data)))

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xt7z (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', pwd=b'')

This unit is implemented in refinery.units.formats.archive.xt7z and has the following commandline Interface:

usage: xt7z [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
            [-p PWD]
            [path [path ...]]

Extract files from a 7zip archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -p, --pwd PWD    Optionally specify an extraction password.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xt7z(ArchiveUnit):
    """
    Extract files from a 7zip archive.
    """
    @ArchiveUnit.Requires('py7zr', optional=False)
    def _py7zr():
        import py7zr
        return py7zr

    def unpack(self, data):

        def mk7z(**keywords):
            return self._py7zr.SevenZipFile(MemoryFile(mv[zp:]), **keywords)

        pwd = self.args.pwd
        mv = memoryview(data)
        zp = max(0, data.find(B'7z\xBC\xAF\x27\x1C'))

        if pwd:
            archive = mk7z(password=pwd.decode(self.codec))
        else:
            archive = mk7z()
            for pwd in self._COMMON_PASSWORDS:
                try:
                    problem = archive.testzip()
                except self._py7zr.PasswordRequired:
                    problem = True
                if not problem:
                    break
                self.log_debug(F'trying password: {pwd}')
                archive = mk7z(password=pwd)

        for info in archive.list():
            def extract(archive: SevenZipFile = archive, info: FileInfo = info):
                archive.reset()
                return archive.read(info.filename).get(info.filename).read()
            if info.is_directory:
                continue
            yield self._pack(info.filename, info.creationtime, extract)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtace (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', pwd=b'')

This unit is implemented in refinery.units.formats.archive.xtace and has the following commandline Interface:

usage: xtace [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
             [-p PWD]
             [path [path ...]]

Extract files from an ACE archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -p, --pwd PWD    Optionally specify an extraction password.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtace(ArchiveUnit):
    """
    Extract files from an ACE archive.
    """
    def unpack(self, data):
        ace = acefile.open(MemoryFile(data, read_as_bytes=True))
        for member in ace.getmembers():
            member: acefile.AceMember
            comment = {} if not member.comment else {'comment': member.comment}
            yield self._pack(
                member.filename,
                member.datetime,
                lambda a=ace, m=member: a.read(m, pwd=self.args.pwd),
                **comment
            )

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtcpio (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', pwd=b'')

This unit is implemented in refinery.units.formats.archive.xtcpio and has the following commandline Interface:

usage: xtcpio [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
              [-p PWD]
              [path [path ...]]

Extract files from a CPIO archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -p, --pwd PWD    Optionally specify an extraction password.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtcpio(ArchiveUnit):
    """
    Extract files from a CPIO archive.
    """
    def unpack(self, data):
        def cpio():
            with suppress(EOF): return CPIOEntry(reader)
        reader = StructReader(memoryview(data))
        for entry in iter(cpio, None):
            if entry.name == 'TRAILER!!!':
                break
            yield self._pack(entry.name, entry.mtime, entry.data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtiso (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', fs='auto')

This unit is implemented in refinery.units.formats.archive.xtiso and has the following commandline Interface:

usage: xtiso [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
             [-s TYPE]
             [path [path ...]]

Extract files from a ISO archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -s, --fs TYPE    Specify a file system (udf, joliet, rr, iso, auto)
                   extension to use. The default setting auto will
                   automatically detect the first of the other available
                   options and use it.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtiso(ArchiveUnit):
    """
    Extract files from a ISO archive.
    """
    def __init__(self, *paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date',
        fs: arg.choice('-s', metavar='TYPE', choices=_ISO_FILE_SYSTEMS, help=(
            'Specify a file system ({choices}) extension to use. The default setting {default} will automatically '
            'detect the first of the other available options and use it.')) = 'auto'
    ):
        if fs not in _ISO_FILE_SYSTEMS:
            raise ValueError(F'invalid file system {fs}: must be udf, joliet, rr, iso, or auto.')
        super().__init__(*paths, list=list, join_path=join_path, drop_path=drop_path, path=path, date=date, fs=fs)

    @ArchiveUnit.Requires('pycdlib', optional=False)
    def _pycdlib():
        import pycdlib
        import pycdlib.dates

        def fixed_parse(self, datestr):
            datestr = datestr[:-3] + b'00\0'
            return original_parse(self, datestr)

        original_parse = pycdlib.dates.VolumeDescriptorDate.parse
        pycdlib.dates.VolumeDescriptorDate.parse = fixed_parse
        return pycdlib

    def unpack(self, data):
        with MemoryFile(data) as stream:
            iso = self._pycdlib.PyCdlib()
            iso.open_fp(stream)
            fs = self.args.fs
            if fs != 'auto':
                mkfacade = {
                    'iso'    : iso.get_iso9660_facade,
                    'udf'    : iso.get_udf_facade,
                    'joliet' : iso.get_joliet_facade,
                    'rr'     : iso.get_rock_ridge_facade,
                }
                facade = mkfacade[fs]()
            elif iso.has_udf():
                facade = iso.get_udf_facade()
            elif iso.has_joliet():
                facade = iso.get_joliet_facade()
            elif iso.has_rock_ridge():
                facade = iso.get_rock_ridge_facade()
            else:
                facade = iso.get_iso9660_facade()

            for root, _, files in facade.walk('/'):
                root = root.rstrip('/')
                for name in files:
                    name = name.lstrip('/')
                    path = F'{root}/{name}'
                    try:
                        info = facade.get_record(path)
                    except Exception:
                        info = None
                        date = None
                    else:
                        date = datetime.datetime(
                            info.date.years_since_1900 + 1900,
                            info.date.month,
                            info.date.day_of_month,
                            info.date.hour,
                            info.date.minute,
                            info.date.second,
                            tzinfo=datetime.timezone(datetime.timedelta(minutes=15 * info.date.gmtoffset))
                        )

                    def extract(info=info):
                        if info:
                            buffer = MemoryFile(bytearray(info.data_length))
                        else:
                            buffer = MemoryFile(bytearray())
                        facade.get_file_from_iso_fp(buffer, path)
                        return buffer.getvalue()

                    yield self._pack(path, date, extract)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtpyi (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', user_code=False, unmarshal=0)

This unit is implemented in refinery.units.formats.archive.xtpyi and has the following commandline Interface:

usage: xtpyi [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
             [-u | -y]
             [path [path ...]]

Extracts and decompiles files from a Python Installer (aka PyInstaller)
archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -u, --user-code  Extract only source code files from the root of the
                   archive. These usually implement the actual domain
                   logic.
  -y, --unmarshal  (DANGEROUS) Unmarshal embedded PYZ archives. Warning:
                   Maliciously crafted packages can potentially exploit
                   this to execute code. It is advised to only use this
                   option inside an isolated environment. Specify twice to
                   decompile unmarshalled Python bytecode.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtpyi(ArchiveUnit):
    """
    Extracts and decompiles files from a Python Installer (aka PyInstaller) archive.
    """
    def __init__(
        self, *paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date',
        user_code: arg.switch('-u', group='FILTER', help=(
            'Extract only source code files from the root of the archive. These usually implement '
            'the actual domain logic.')) = False,
        unmarshal: arg('-y', action='count', group='FILTER', help=(
            '(DANGEROUS) Unmarshal embedded PYZ archives. Warning: Maliciously crafted packages can '
            'potentially exploit this to execute code. It is advised to only use this option inside '
            'an isolated environment. Specify twice to decompile unmarshalled Python bytecode.'
        )) = 0
    ):
        super().__init__(
            *paths,
            list=list, join_path=join_path, drop_path=drop_path, path=path, date=date,
            unmarshal=unmarshal, user_code=user_code
        )

    @ArchiveUnit.Requires('xdis', optional=False)
    def _xdis():
        import xdis.load
        import xdis.magics
        import xdis.marsh
        import xdis
        A, B, C, *_ = sys.version_info
        V = F'{A}.{B}.{C}'
        if V not in xdis.magics.canonic_python_version:
            xdis.magics.add_canonic_versions(V, F'{A}.{B}')
        del A, B, C, V
        return xdis

    @ArchiveUnit.Requires('uncompyle6', optional=False)
    def _uncompyle6():
        import uncompyle6
        import uncompyle6.main
        return uncompyle6

    def unpack(self, data):
        view = memoryview(data)
        positions = [m.start() for m in re.finditer(re.escape(PyInstallerArchiveEpilogue.MagicSignature), view)]
        mode = Unmarshal(min(2, int(self.args.unmarshal)))
        self.log_debug(F'unmarshal mode: {mode.name}')
        if not positions:
            raise LookupError('unable to find PyInstaller signature')
        if len(positions) > 2:
            # first position is expected to be the sentinel value in the unpacker stub
            width = max(len(F'{p:X}') for p in positions)
            for position in positions:
                self.log_info(F'magic signature found at offset 0x{position:0{width}X}')
            self.log_warn(F'found {len(positions)-1} potential PyInstaller epilogue markers; using last one.')
        archive = PyInstallerArchiveEpilogue(view, positions[-1], mode)
        for name, file in archive.files.items():
            if self.args.user_code:
                if file.type != PiType.USERCODE:
                    continue
                if name.startswith('pyiboot'):
                    continue
            yield self._pack(name, None, file.data, type=file.type.name)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xttar (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date')

This unit is implemented in refinery.units.formats.archive.xttar and has the following commandline Interface:

usage: xttar [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
             [path [path ...]]

Extract files from a Tar archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xttar(ArchiveUnit):
    """
    Extract files from a Tar archive.
    """
    def __init__(self, *paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date'):
        super().__init__(*paths, list=list, join_path=join_path, drop_path=drop_path, path=path, date=date)

    def unpack(self, data):
        archive = tarfile.open(fileobj=MemoryFile(data))
        for info in archive.getmembers():
            if not info.isfile():
                continue
            extractor = archive.extractfile(info)
            if extractor is None:
                continue
            date = datetime.datetime.fromtimestamp(info.mtime)
            yield self._pack(info.name, date, lambda e=extractor: e.read())

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtzip (*paths, list=False, join_path=False, drop_path=False, path=b'path', date=b'date', pwd=b'')

This unit is implemented in refinery.units.formats.archive.xtzip and has the following commandline Interface:

usage: xtzip [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-D NAME]
             [-p PWD]
             [path [path ...]]

Extract files from a Zip archive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".
  -D, --date NAME  Name of the meta variable to receive the extracted file
                   date. The default value is "date".
  -p, --pwd PWD    Optionally specify an extraction password.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtzip(ArchiveUnit):
    """
    Extract files from a Zip archive.
    """
    @ArchiveUnit.Requires('chardet', optional=True)
    def _chardet():
        import chardet
        return chardet

    def unpack(self, data):
        password = self.args.pwd.decode(self.codec)
        archive = ZipFile(MemoryFile(data))

        if password:
            archive.setpassword(self.args.pwd)
        else:
            def password_invalid(pwd: Optional[str]):
                if pwd is not None:
                    archive.setpassword(pwd.encode(self.codec))
                try:
                    archive.testzip()
                except RuntimeError as E:
                    if 'password' not in str(E):
                        raise
                    return True
                else:
                    self.log_debug(pwd)
                    return False
            for pwd in [None, *self._COMMON_PASSWORDS]:
                if not password_invalid(pwd):
                    break
            else:
                raise RuntimeError('Archive is password-protected.')

        for info in archive.infolist():
            def xt(archive: ZipFile = archive, info: ZipInfo = info):
                try:
                    return archive.read(info.filename)
                except RuntimeError as E:
                    if 'password' not in str(E):
                        raise
                    if not password:
                        raise RuntimeError('archive is password-protected')
                    else:
                        raise RuntimeError(F'invalid password: {password}') from E
            if info.is_dir():
                continue
            try:
                date = datetime(*info.date_time)
            except Exception:
                date = None

            # courtesy of https://stackoverflow.com/a/37773438/9130824
            filename = info.filename
            if info.flag_bits & ZIP_FILENAME_UTF8_FLAG == 0:
                filename_bytes = filename.encode('437')
                try:
                    guessed_encoding = self._chardet.detect(filename_bytes)['encoding']
                except ImportError:
                    guessed_encoding = None
                guessed_encoding = guessed_encoding or 'cp1252'
                filename = filename_bytes.decode(guessed_encoding, 'replace')

            yield self._pack(filename, date, xt)

Ancestors

Class variables

var required_dependencies
var optional_dependencies

Inherited members

class csv (quote=b'"', delim=b',')

This unit is implemented in refinery.units.formats.csv and has the following commandline Interface:

usage: csv [-h] [-L] [-Q] [-0] [-v] [-q QUOTE] [-d DELIM]

Extracts the rows of a CSV document with header and converts them into
JSON chunks.

optional arguments:
  -q, --quote QUOTE  Specify the quote character, the default is a double
                     quote.
  -d, --delim DELIM  Specify the delimiter, the default is a single comma.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class csv(Unit):
    """
    Extracts the rows of a CSV document with header and converts them into JSON chunks.
    """
    def __init__(
        self,
        quote: Unit.Arg('-q', help='Specify the quote character, the default is a double quote.') = B'"',
        delim: Unit.Arg('-d', help='Specify the delimiter, the default is a single comma.') = B','
    ):
        super().__init__(quote=quote, delim=delim)

    def process(self, data):
        import csv
        import io
        import json

        quote = self.args.quote.decode(self.codec)
        delim = self.args.delim.decode(self.codec)

        def convert(field: str):
            if field.isdigit() and not field.startswith('0'):
                return int(field)
            date = isodate(field)
            if date is not None:
                return date.isoformat(' ', 'seconds')
            return field

        with io.TextIOWrapper(MemoryFile(data), self.codec) as stream:
            rows = csv.reader(stream, quotechar=quote, delimiter=delim, skipinitialspace=True)
            keys = next(rows)
            for row in rows:
                out = {key: convert(value) for key, value in zip(keys, row)}
                yield json.dumps(out, indent=4).encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class dsphp

This unit is implemented in refinery.units.formats.deserialize_php and has the following commandline Interface:

usage: dsphp [-h] [-L] [-Q] [-0] [-v] [-R]

Deserialize PHP serialized data and re-serialize as JSON.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class dsphp(Unit):
    """
    Deserialize PHP serialized data and re-serialize as JSON.
    """
    @Unit.Requires('phpserialize', optional=False)
    def _php():
        import phpserialize
        return phpserialize

    def reverse(self, data):
        return self._php.dumps(json.loads(data))

    def process(self, data):
        phpobject = self._php.phpobject

        class encoder(json.JSONEncoder):
            def default(self, obj):
                try:
                    return super().default(obj)
                except TypeError:
                    pass
                if isinstance(obj, bytes) or isinstance(obj, bytearray):
                    return obj.decode('utf8')
                if isinstance(obj, phpobject):
                    return obj._asdict()

        return json.dumps(
            self._php.loads(
                data,
                object_hook=phpobject,
                decode_strings=True
            ),
            indent=4,
            cls=encoder
        ).encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtmail (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.email and has the following commandline Interface:

usage: xtmail [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
              [path [path ...]]

Extract files and body from EMail messages. The unit supports both the
Outlook message format and regular MIME documents.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtmail(PathExtractorUnit):
    """
    Extract files and body from EMail messages. The unit supports both the Outlook message format
    and regular MIME documents.
    """
    def _get_headparts(self, head):
        mw = mimewords()
        mw = partial(mw.process.__wrapped__.__wrapped__, mw)
        jh = defaultdict(list)
        for key, value in head:
            jh[key].append(mw(''.join(t.lstrip() for t in value.splitlines(False))))
        jh = {k: v[0] if len(v) == 1 else [t for t in v if t] for k, v in jh.items()}
        yield UnpackResult('headers.txt',
            lambda h=head: '\n'.join(F'{k}: {v}' for k, v in h).encode(self.codec))
        yield UnpackResult('headers.json',
            lambda jsn=jh: json.dumps(jsn, indent=4).encode(self.codec))

    @PathExtractorUnit.Requires('extract_msg', optional=False)
    def _extract_msg():
        from extract_msg.message import Message
        return Message

    def _get_parts_outlook(self, data):
        def ensure_bytes(data):
            return data if isinstance(data, bytes) else data.encode(self.codec)

        def make_message(name, msg):
            with NoLogging:
                htm = msg.htmlBody
                txt = msg.body
            if txt:
                yield UnpackResult(F'{name}.txt', ensure_bytes(txt))
            if htm:
                yield UnpackResult(F'{name}.htm', ensure_bytes(htm))

        msgcount = 0

        with NoLogging:
            msg = self._extract_msg(bytes(data))

        yield from self._get_headparts(msg.header.items())
        yield from make_message('body', msg)

        def attachments(msg):
            for attachment in getattr(msg, 'attachments', ()):
                yield attachment
                if attachment.type == 'data':
                    continue
                yield from attachments(attachment.data)

        for attachment in attachments(msg):
            self.log_debug(attachment)
            if attachment.type == 'msg':
                msgcount += 1
                yield from make_message(F'attachments/msg_{msgcount:d}', attachment.data)
                continue
            if not isbuffer(attachment.data):
                self.log_warn(F'unknown attachment of type {attachment.type}, please report this!')
                continue
            path = attachment.longFilename or attachment.shortFilename
            yield UnpackResult(F'attachments/{path}', attachment.data)

    def _get_parts_regular(self, data):
        msg = BytesParser().parsebytes(data)

        yield from self._get_headparts(msg.items())

        for k, part in enumerate(msg.walk()):
            path = part.get_filename()
            elog = None
            if path is None:
                extension = file_extension(part.get_content_type(), 'txt')
                path = F'body.{extension}'
            else:
                path = F'attachments/{path}'
            try:
                data = part.get_payload(decode=True)
            except Exception as E:
                try:
                    data = part.get_payload(decode=False)
                except Exception as E:
                    elog = str(E)
                    data = None
                else:
                    from refinery import carve
                    self.log_warn(F'manually decoding part {k}, data might be corrupted: {path}')
                    if isinstance(data, str):
                        data = data.encode('latin1')
                    if isbuffer(data):
                        data = next(data | carve('b64', stripspace=True, single=True, decode=True))
                    else:
                        elog = str(E)
                        data = None
            if not data:
                if elog is not None:
                    self.log_warn(F'could not get content of message part {k}: {elog!s}')
                continue
            yield UnpackResult(path, data)

    def unpack(self, data):
        try:
            yield from self._get_parts_outlook(data)
        except Exception:
            self.log_debug('failed parsing input as Outlook message')
            yield from self._get_parts_regular(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class evtx (raw=False)

This unit is implemented in refinery.units.formats.evtx and has the following commandline Interface:

usage: evtx [-h] [-L] [-Q] [-0] [-v] [-r]

Extracts data from Windows Event Log files (EVTX). Each extracted log
entry is returned as a single output chunk in XML format.

optional arguments:
  -r, --raw      Extract raw event data rather than XML.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class evtx(Unit):
    """
    Extracts data from Windows Event Log files (EVTX). Each extracted log entry is returned as a single
    output chunk in XML format.
    """

    def __init__(self, raw: Unit.Arg.switch('-r', help='Extract raw event data rather than XML.') = False):
        super().__init__(raw=raw)

    @Unit.Requires('python-evtx')
    def _evtx():
        from Evtx.Evtx import Evtx
        return Evtx

    def process(self, data):
        with VirtualFileSystem() as vfs:
            raw = self.args.raw
            with self._evtx(VirtualFile(vfs, data)) as log:
                for record in log.records():
                    yield record.data() if raw else record.xml().encode(self.codec)

Ancestors

Class variables

var required_dependencies
var optional_dependencies

Inherited members

class vsect (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.exe.vsect and has the following commandline Interface:

usage: vsect [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
             [path [path ...]]

Extract sections/segments from PE, ELF, and MachO executables.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class vsect(PathExtractorUnit):
    """
    Extract sections/segments from PE, ELF, and MachO executables.
    """
    def unpack(self, data):
        mv = memoryview(data)
        for name, start, size in exeroute(
            data,
            self._unpack_elf,
            self._unpack_macho,
            self._unpack_pe
        ):
            end = start + size
            yield UnpackResult(name, mv[start:end])

    @staticmethod
    def _ascii(string: bytes) -> str:
        term = string.find(0)
        if term >= 0:
            string = string[:term]
        return string.decode('latin-1')

    def _unpack_pe(self, pe):
        for section in pe.sections:
            yield self._ascii(section.Name), section.PointerToRawData, section.SizeOfRawData

    def _unpack_elf(self, elf):
        for section in elf.iter_sections():
            if section.is_null():
                continue
            yield section.name, section['sh_offset'], section.data_size

    def _unpack_macho(self, macho):
        for header in macho.headers:
            for command in header.commands:
                header, segment, sections = command
                if not header.get_cmd_name().startswith('LC_SEGMENT'):
                    continue
                segname = self._ascii(segment.segname)
                yield segname, segment.fileoff, segment.filesize
                for section in sections:
                    secname = F'{segname}/{self._ascii(section.sectname)}'
                    yield secname, section.offset, section.size

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class vsnip (addresses, ascii=False, utf16=False, until=b'', base=None)

This unit is implemented in refinery.units.formats.exe.vsnip and has the following commandline Interface:

usage: vsnip [-h] [-L] [-Q] [-0] [-v] [-a | -u | -t B] [-b ADDR]
             start:count:align [start:count:align ...]

Extract data from PE, ELF, and MachO files based on virtual offsets.

positional arguments:
  start:count:align  Use Python slice syntax to describe an area of
                     virtual memory to read. If a chunksize is specified,
                     then the unit will always read a multiple of that
                     number of bytes

optional arguments:
  -a, --ascii        Read ASCII strings; equivalent to -th:00
  -u, --utf16        Read UTF16 strings; equivalent to -th:0000 (also sets
                     chunksize to 2)
  -t, --until B      Read until sequence B is read.
  -b, --base ADDR    Optionally specify a custom base address B.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class vsnip(Unit):
    """
    Extract data from PE, ELF, and MachO files based on virtual offsets.
    """

    def __init__(
        self, addresses: arg(type=sliceobj, nargs='+', metavar='start:count:align', help=(
            'Use Python slice syntax to describe an area of virtual memory to read. If a chunksize is '
            'specified, then the unit will always read a multiple of that number of bytes')),
        ascii: arg.switch('-a', group='END', help='Read ASCII strings; equivalent to -th:00') = False,
        utf16: arg.switch('-u', group='END', help='Read UTF16 strings; equivalent to -th:0000 (also sets chunksize to 2)') = False,
        until: arg.binary('-t', group='END', help='Read until sequence {varname} is read.') = B'',
        base : arg.number('-b', metavar='ADDR', help='Optionally specify a custom base address B.') = None,
    ):
        if sum(1 for t in (until, utf16, ascii) if t) > 1:
            raise ValueError('Only one of utf16, ascii, and until can be specified.')
        return super().__init__(addresses=addresses, utf16=utf16, ascii=ascii, until=until, base=base)

    def process(self, data):
        until = self.args.until
        addrs = self.args.addresses
        if self.args.ascii:
            until = B'\0'
        if self.args.utf16:
            until = B'\0\0'
            addrs = (slice(a.start, a.stop, 2) for a in addrs)

        for addr in addrs:
            area = MemoryArea(addr)
            offset, lbound = exeroute(
                data,
                self._get_buffer_range_elf,
                self._get_buffer_range_macho,
                self._get_buffer_range_pe,
                area.start
            )

            lbound = lbound or len(data)

            if not until:
                end = lbound
            else:
                end = offset - 1
                align = area.align
                while True:
                    end = data.find(until, end + 1)
                    if end not in range(offset, lbound):
                        raise EndOfStringNotFound
                    if (end - offset) % align == 0:
                        break

            if area.count:
                end = min(end, offset + area.count)

            yield data[offset:end]

    def _rebase(self, addr, truebase):
        self.log_info(F'using base address: 0x{truebase:X}')
        if self.args.base is None:
            return addr
        rebased = addr - self.args.base + truebase
        self.log_info(F'rebased to address: 0x{rebased:X}')
        return rebased

    def _get_buffer_range_elf(self, elf: ELFFile, address: int):
        PT_LOAD = {}
        if not elf.num_segments():
            raise LookupError('The elftools parser did not find any segments in this file.')
        for segment in elf.iter_segments():
            if segment.header.p_type == 'PT_LOAD':
                PT_LOAD[segment.header.p_vaddr] = segment
                self.log_info(F'Found PT_LOAD segment with base address 0x{segment.header.p_vaddr:x}')
        if not PT_LOAD:
            raise LookupError(F'Could not find any PT_LOAD segment containing 0x{address:x}.')
        addr = self._rebase(address, min(PT_LOAD))
        for segment in elf.iter_segments():
            begin = segment.header.p_vaddr
            size = segment.header.p_memsz
            delta = addr - begin
            if delta in range(size + 1):
                offset = segment.header.p_offset
                return offset + delta, offset + segment.header.p_filesz
        raise CompartmentNotFound(addr)

    def _get_buffer_range_macho(self, macho: MachO, address: int):
        for header in macho.headers:
            segments = [segment for header, segment, sections in header.commands
                if header.get_cmd_name().startswith('LC_SEGMENT') and segment.filesize > 0]
            addr = self._rebase(address, min(segment.vmaddr for segment in segments))
            for segment in segments:
                if addr in range(segment.vmaddr, segment.vmaddr + segment.vmsize):
                    offset = addr - segment.vmaddr
                    return offset + segment.fileoff, segment.fileoff + segment.filesize
        raise CompartmentNotFound(address)

    def _get_buffer_range_pe(self, pe: PEFile, address: int):
        base = pe.OPTIONAL_HEADER.ImageBase
        addr = self._rebase(address, base) - base
        offset = pe.get_offset_from_rva(addr)
        for section in pe.sections:
            if offset in range(section.PointerToRawData, section.PointerToRawData + section.SizeOfRawData):
                return offset, section.PointerToRawData + section.SizeOfRawData
        raise CompartmentNotFound(addr, 'section')

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class hexload (hexaddr=True, width=0, expand=False)

This unit is implemented in refinery.units.formats.hexload and has the following commandline Interface:

usage: hexload [-h] [-L] [-Q] [-0] [-v] [-R] [-A] [-W N] [-E]

Convert hex dumps back to the original data and vice versa. All options of
this unit apply to its reverse operation where binary data is converted to
a readable hexdump format. The default mode of the unit expects the input
data to contain a readable hexdump and converts it back to binary.

optional arguments:
  -A, --no-addr  Do not show addresses in hexdump
  -W, --width N  Specify the number of hexadecimal characters to use in
                 preview.
  -E, --expand   Do not compress sequences of identical lines in hexdump

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class hexload(HexViewer):
    """
    Convert hex dumps back to the original data and vice versa. All options of this unit apply
    to its reverse operation where binary data is converted to a readable hexdump format.
    The default mode of the unit expects the input data to contain a readable hexdump and
    converts it back to binary.
    """
    @regex
    class _ENCODED_BYTES:
        R"""
        (?ix)(?:^|(?<=\s))                      # encoded byte patches must be prefixed by white space
        (?:
            (?:                                 # separated chunks of hex data
                [a-f0-9]{2}                     # hexadecimal chunk; single byte (two hexadecimal letters)
                \s{1,2}                         # encoded byte followed by whitespace
                (?:                             # at least one more encoded byte
                    [a-f0-9]{2}                 # followed by more encoded bytes
                    (?:\s{1,2}[a-f0-9]{2})*     # unless it was just a single byte
                )?
            )
            | (?:[a-f0-9]{4}\s{1,2}(?:[a-f0-9]{4}(?:\s{1,2}[a-f0-9]{4})*)?)   # 2-byte chunks
            | (?:[a-f0-9]{8}\s{1,2}(?:[a-f0-9]{8}(?:\s{1,2}[a-f0-9]{8})*)?)   # 4-byte chunks
            | (?:(?:[a-f0-9]{2})+)              # continuous line of hexadecimal characters
        )(?=\s|$)                               # terminated by a whitespace or line end
        """

    def __init__(self, hexaddr=True, width=0, expand=False):
        super().__init__(hexaddr=hexaddr, width=width, expand=expand)
        self._hexline_pattern = re.compile(F'{make_hexline_pattern(1)}(?:[\r\n]|$)', flags=re.MULTILINE)

    def process(self, data: bytearray):
        lines = data.decode(self.codec).splitlines(keepends=False)

        if not lines:
            return None

        decoded_bytes = bytearray()
        encoded_byte_matches: List[Dict[int, int]] = []

        for line in lines:
            matches: Dict[int, int] = {}
            encoded_byte_matches.append(matches)
            for match in self._ENCODED_BYTES.finditer(line):
                a, b = match.span()
                matches[a] = b - a

        it = iter(encoded_byte_matches)
        offsets = set(next(it).keys())
        for matches in it:
            offsets.intersection_update(matches.keys())
        if not offsets:
            raise ValueError('unable to determine the position of the hex bytes in this dump')
        lengths: Dict[int, List[int]] = {offset: [] for offset in offsets}
        del offsets
        for matches in encoded_byte_matches:
            for offset in lengths:
                lengths[offset].append(matches[offset])
        for offset in lengths:
            lengths[offset].sort()
        midpoint = len(encoded_byte_matches) // 2
        offset, length = max(((offset, lengths[offset][midpoint]) for offset in lengths),
            key=operator.itemgetter(1))
        end = offset + length
        del lengths
        for line in lines:
            encoded_line = line[offset:end]
            self.log_debug(F'decoding: {encoded_line.strip()}')
            decoded_line = bytes.fromhex(encoded_line)
            decoded_bytes.extend(decoded_line)
            txt = line[match.end():]
            txt_stripped = txt.strip()
            if not txt_stripped:
                continue
            if len(decoded_line) not in range(len(txt_stripped), len(txt) + 1):
                self.log_warn(F'preview size {len(txt_stripped)} does not match decoding: {line}')
        if decoded_bytes:
            yield decoded_bytes

    def reverse(self, data):
        for line in self.hexdump(data):
            yield line.encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xthtml (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.html and has the following commandline Interface:

usage: xthtml [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
              [path [path ...]]

The unit processes an HTML document and extracts the contents of all
elemnts in the DOM of the given tag. The main purpose is to extract
scripts from HTML documents.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xthtml(PathExtractorUnit):
    """
    The unit processes an HTML document and extracts the contents of all elemnts in the DOM of the
    given tag. The main purpose is to extract scripts from HTML documents.
    """
    def unpack(self, data):
        def tree(root: HTMLNode, *path):

            def outer(root: HTMLNode = root):
                return root.recover(inner=False).encode(self.codec)

            def inner(root: HTMLNode = root):
                return root.recover().encode(self.codec)

            tagpath = '/'.join(path)

            if root.root:
                yield UnpackResult(tagpath, inner)
            else:
                yield UnpackResult(F'{tagpath}.outer', outer)
                yield UnpackResult(F'{tagpath}.inner', inner)

            tag_count = collections.defaultdict(int)
            tag_index = collections.defaultdict(int)
            for node in root.children:
                tag_count[node.tag] += 1
            for node in root.children:
                node: HTMLNode
                name: str = node.tag
                if node.textual:
                    continue
                if tag_count[node.tag] > 1:
                    tag_index[node.tag] = index = tag_index[node.tag] + 1
                    name = F'{name}({index})'
                yield from tree(node, *path, name)

        parser = HTMLTreeParser()
        parser.feed(data.decode(self.codec))
        root = parser.tos
        while root.parent:
            self.log_info(F'tag was not closed: {root.tag}')
            root = root.parent

        yield from tree(root, root.tag)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class httpresponse

This unit is implemented in refinery.units.formats.httpresponse and has the following commandline Interface:

usage: httpresponse [-h] [-L] [-Q] [-0] [-v]

Parses HTTP response text, as you would obtain from a packet dump. This
can be useful if chunked or compressed transfer encoding was used.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class httpresponse(Unit):
    """
    Parses HTTP response text, as you would obtain from a packet dump. This can be
    useful if chunked or compressed transfer encoding was used.
    """
    def process(self, data):
        with SockWrapper(data) as mock:
            mock.seek(0)
            parser = HTTPResponse(mock)
            parser.begin()
            return parser.read()

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class dsjava

This unit is implemented in refinery.units.formats.java.deserialize and has the following commandline Interface:

usage: dsjava [-h] [-L] [-Q] [-0] [-v]

Deserialize Java serialized data and re-serialize as JSON.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class dsjava(Unit):
    """
    Deserialize Java serialized data and re-serialize as JSON.
    """
    @Unit.Requires('javaobj-py3>=0.4.0.1', optional=False)
    def _javaobj():
        import javaobj.v2
        return javaobj.v2

    def process(self, data):
        with JavaEncoder as encoder:
            return encoder.dumps(self._javaobj.loads(data)).encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class jvdasm (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.java.jvdasm and has the following commandline Interface:

usage: jvdasm [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
              [path [path ...]]

Disassembles the JVM bytecode instructions of methods of classes defined
in Java class files. The unit is implemented as a PathExtractorUnit and
each path name corresponds to the name of one method defined in the class
file.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class jvdasm(PathExtractorUnit):
    """
    Disassembles the JVM bytecode instructions of methods of classes defined in Java class
    files. The unit is implemented as a `refinery.units.formats.PathExtractorUnit` and each
    path name corresponds to the name of one method defined in the class file.
    """
    _OPC_STRLEN = max(len(op.name) for op in opc)

    def _hex(self, bytestring, sep=''):
        return sep.join(F'{x:02x}' for x in bytestring)

    def unpack(self, data):
        jc = JvClassFile(data)
        tt = '  '
        opcw = self._OPC_STRLEN
        for method in jc.methods:
            for attribute in method.attributes:
                if attribute.name == 'Code': break
            else:
                self.log_warn(F'no code found for method: {method.name}')
                continue
            code: JvCode = attribute.parse(JvCode)
            with io.StringIO() as display:
                args, retval = re.match(R'^\((.*?)\)(.*?)$', method.descriptor).groups()
                print(F'{jc.this!s}::{method!s}{method.descriptor}', file=display)
                for op in code.disassembly:
                    olen = len(op.raw)
                    if op.table is None:
                        args = ', '.join(repr(a) for a in op.arguments)
                    else:
                        ow = 4 if op.code is opc.tableswitch else 8
                        olen = olen - (len(op.table) - 1) * ow
                        args = F'defaultjmp => {op.table[None]:#010x}'
                        jmps = []
                        for k, (key, jmp) in enumerate(op.table.items()):
                            if key is None:
                                continue
                            raw = self._hex(op.raw[olen + k * ow: olen + k * ow + ow], ' ')
                            jmps.append(F'{tt}{raw!s:<{opcw+15}} {key:#010x} => {jmp:#010x}')
                        args = '\n'.join((args, *jmps))
                    opch = self._hex(op.raw[:olen], ' ')
                    if len(opch) > 14:
                        opch += F'\n{tt}{tt:<15}'
                    print(F'{tt}{opch:<15}{op.code!r:<{opcw}} {args}', file=display)
                name = method.name
                if name.startswith('<'):
                    this = jc.this.value.split('/')
                    this = this[-1]
                    name = F'{this}${name[1:-1]}'
                yield UnpackResult(F'{name}.jd', display.getvalue().encode(self.codec))

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class jvstr

This unit is implemented in refinery.units.formats.java.jvstr and has the following commandline Interface:

usage: jvstr [-h] [-L] [-Q] [-0] [-v]

Extract string constants from Java class files.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class jvstr(Unit):
    """
    Extract string constants from Java class files.
    """
    def process(self, data):
        jc = JvClassFile(data)
        for string in jc.strings:
            yield string.encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xj0 (key, raw=False, all=False)

This unit is implemented in refinery.units.formats.json and has the following commandline Interface:

usage: xj0 [-h] [-L] [-Q] [-0] [-v] [-r | -a] key

Extracts a single field from a JSON document at depth 0. By default, the
unit applies a heuristic to extract remaining fields as metadata: String
values are extracted only if they do not exceed 80 characters in length
and do not contain any line breaks. Floating-point, integer, boolean
values, and lists of the latter are also extracted.

positional arguments:
  key            The key of the value to be extracted as the main body of
                 the chunk.

optional arguments:
  -r, --raw      Do not extract any other fields as metadata.
  -a, --all      Extract all other fields as metadata.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class xj0(Unit):
    """
    Extracts a single field from a JSON document at depth 0. By default, the unit applies a heuristic to
    extract remaining fields as metadata: String values are extracted only if they do not exceed 80
    characters in length and do not contain any line breaks. Floating-point, integer, boolean values, and
    lists of the latter are also extracted.
    """
    def __init__(
        self,
        key: Unit.Arg(help='The key of the value to be extracted as the main body of the chunk.'),
        raw: Unit.Arg('-r', group='META', help='Do not extract any other fields as metadata.') = False,
        all: Unit.Arg('-a', group='META', help='Extract all other fields as metadata.') = False
    ):
        super().__init__(key=key, raw=raw, all=all)

    def process(self, data):

        def acceptable(key, value, inside_list=False):
            if not is_valid_variable_name(key):
                return False
            if isinstance(value, dict):
                return False
            if isinstance(value, (float, int, bool)):
                return True
            if inside_list:
                return False
            if isinstance(value, list):
                return all(acceptable(key, t, True) for t in value)
            if isinstance(value, str):
                if self.args.all:
                    return True
                return len(value) in range(1, 80) and '\n' not in value

        doc: dict = json.loads(data)
        if not isinstance(doc, dict):
            raise ValueError('The input must be a JSON dictionary.')
        result = doc.pop(self.args.key.decode(self.codec), '').encode(self.codec)
        if self.args.raw:
            return result
        else:
            return self.labelled(result, **{
                key: value for key, value in doc.items() if acceptable(key, value)
            })

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtjson (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.json and has the following commandline Interface:

usage: xtjson [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
              [path [path ...]]

Extract values from a JSON document.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtjson(PathExtractorUnit):
    """
    Extract values from a JSON document.
    """
    _STRICT_PATH_MATCHING = True

    def unpack(self, data):

        def crawl(path, cursor):
            if isinstance(cursor, (dict, list)) and path:
                path = F'{path}/'
            if isinstance(cursor, dict):
                for key, value in cursor.items():
                    yield from crawl(F'{path}{key}', value)
            elif isinstance(cursor, list):
                width = len(F'{len(cursor)-1:d}')
                for key, value in enumerate(cursor):
                    yield from crawl(F'{path}#{key:0{width}d}', value)
            if path:
                yield path, cursor, cursor.__class__.__name__

        for path, item, typename in crawl('', json.loads(data)):
            def extract(item=item):
                if isinstance(item, (list, dict)):
                    dumped = json.dumps(item, indent=4)
                else:
                    dumped = str(item)
                return dumped.encode(self.codec)
            yield UnpackResult(path, extract, type=typename)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class msgpack

This unit is implemented in refinery.units.formats.msgpack and has the following commandline Interface:

usage: msgpack [-h] [-L] [-Q] [-0] [-v] [-R]

Converts a message-pack (msgpack) buffer to JSON and vice-versa.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
  -R, --reverse  Use the reverse operation.
Expand source code Browse git
class msgpack(Unit):
    """
    Converts a message-pack (msgpack) buffer to JSON and vice-versa.
    """
    def reverse(self, data):
        return mp.dumps(json.loads(data))

    def process(self, data):
        unpacker = mp.Unpacker(MemoryFile(data, read_as_bytes=True))
        while True:
            try:
                item = unpacker.unpack()
            except mp.exceptions.OutOfData:
                position = unpacker.tell()
                if position < len(data):
                    self.log_warn("oops")
                break
            except Exception as E:
                position = unpacker.tell()
                if not position:
                    raise
                view = memoryview(data)
                raise RefineryPartialResult(str(E), view[position:])
            else:
                yield json.dumps(item).encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class officecrypt (password=b'VelvetSweatshop')

This unit is implemented in refinery.units.formats.office.officecrypt and has the following commandline Interface:

usage: officecrypt [-h] [-L] [-Q] [-0] [-v] [B]

A simple proxy for the msoffcrypto package to decrypt office documents.

positional arguments:
  B              The document password. By default, the Excel default
                 password "VelvetSweatshop" is used.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class officecrypt(Unit):
    """
    A simple proxy for the `msoffcrypto` package to decrypt office documents.
    """

    def __init__(self, password: arg.binary(help=(
        'The document password. By default, the Excel default password "{default}" is used.'
    )) = b'VelvetSweatshop'):
        super().__init__(password=password)

    @Unit.Requires('msoffcrypto-tool', optional=False)
    def _msoffcrypto():
        import msoffcrypto
        return msoffcrypto

    def process(self, data):
        password: bytes = self.args.password
        with MemoryFile(data) as stream:
            doc = self._msoffcrypto.OfficeFile(stream)
            if not doc.is_encrypted():
                self.log_warn('the document is not encrypted; returning input')
                return data
            if password:
                doc.load_key(password=password.decode(self.codec))
            with MemoryFile(bytearray()) as output:
                doc.decrypt(output)
                return output.getvalue()

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xlxtr (*references)

This unit is implemented in refinery.units.formats.office.xlxtr and has the following commandline Interface:

usage: xlxtr [-h] [-L] [-Q] [-0] [-v] [reference [reference ...]]

Extract data from Microsoft Excel documents, both Legacy and new XML type
documents. A sheet reference is of the form B1 or 1.2, both specifying the
first cell of the second column. A cell range can be specified as B1:C12,
or 1.2:C12, or 1.2:12.3. Finally, the unit will always refer to the first
sheet in the document and to change this, specify the sheet name or index
separated by a hashtag, i.e. sheet#B1:C12 or 1#B1:C12. Note that indices
are 1-based. To get all elements of one sheet, use sheet#. The unit If
parsing a sheet reference fails, the script will assume that the given
reference specifies a sheet.

positional arguments:
  reference      A sheet reference to be extracted. If no sheet references
                 are given, the unit lists all sheet names.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class xlxtr(Unit):
    """
    Extract data from Microsoft Excel documents, both Legacy and new XML type documents. A sheet reference is of the form `B1` or `1.2`,
    both specifying the first cell of the second column. A cell range can be specified as `B1:C12`, or `1.2:C12`, or `1.2:12.3`. Finally,
    the unit will always refer to the first sheet in the document and to change this, specify the sheet name or index separated by a
    hashtag, i.e. `sheet#B1:C12` or `1#B1:C12`. Note that indices are 1-based. To get all elements of one sheet, use `sheet#`. The unit
    If parsing a sheet reference fails, the script will assume that the given reference specifies a sheet.
    """
    def __init__(self, *references: arg(metavar='reference', type=SheetReference, help=(
        'A sheet reference to be extracted. '
        'If no sheet references are given, the unit lists all sheet names.'
    ))):
        if not references:
            references = [SheetReference('*')]
        super().__init__(references=references)

    @Unit.Requires('xlrd2', optional=False)
    def _xlrd():
        import xlrd2
        return xlrd2

    @Unit.Requires('openpyxl', optional=False)
    def _openpyxl():
        import openpyxl
        return openpyxl

    def _rcmatch(self, sheet_index, sheet_name, row, col):
        assert row > 0
        assert col > 0
        if not self.args.references:
            return True
        for ref in self.args.references:
            ref: SheetReference
            if not ref.match(sheet_index, sheet_name):
                continue
            if (row, col) in ref:
                return True
        else:
            return False

    def _get_value(self, sheet_index, sheet, callable, row, col):
        if col <= 0 or row <= 0:
            raise ValueError(F'invalid cell reference ({row}, {col}) - indices must be positive numbers')
        if not self._rcmatch(sheet_index, sheet, row, col):
            return
        try:
            value = callable(row - 1, col - 1)
        except IndexError:
            return
        if not value:
            return
        if isinstance(value, float):
            if float(int(value)) == value:
                value = int(value)
        yield self.labelled(
            str(value).encode(self.codec),
            row=row,
            col=col,
            ref=_rc2ref(row, col),
            sheet=sheet
        )

    def _process_old(self, data):
        with io.StringIO() as logfile:
            wb = self._xlrd.open_workbook(file_contents=data, logfile=logfile, verbosity=self.args.verbose - 1, on_demand=True)
            logfile.seek(0)
            for entry in logfile:
                entry = entry.strip()
                if re.search(R'^[A-Z]+:', entry) or '***' in entry:
                    self.log_info(entry)
        for ref in self.args.references:
            ref: SheetReference
            for k, name in enumerate(wb.sheet_names()):
                if not ref.match(k, name):
                    continue
                sheet = wb.sheet_by_name(name)
                self.log_info(F'iterating {sheet.ncols} columns and {sheet.nrows} rows')
                for row, col in ref.cells(sheet.nrows, sheet.ncols):
                    yield from self._get_value(k, name, sheet.cell_value, row, col)

    def _process_new(self, data):
        workbook = self._openpyxl.load_workbook(MemoryFile(data), read_only=True)
        for ref in self.args.references:
            ref: SheetReference
            for k, name in enumerate(workbook.sheetnames):
                if not ref.match(k, name):
                    continue
                sheet = workbook[name]
                cells = [row for row in sheet.iter_rows(values_only=True)]
                nrows = len(cells)
                ncols = max(len(row) for row in cells)
                for row, col in ref.cells(nrows, ncols):
                    yield from self._get_value(k, name, lambda r, c: cells[r][c], row, col)

    def process(self, data):
        try:
            yield from self._process_new(data)
        except Exception as e:
            self.log_info(F'reverting to xlrd module due to exception: {e!s}')
            yield from self._process_old(data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtdoc (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.office.xtdoc and has the following commandline Interface:

usage: xtdoc [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
             [path [path ...]]

Extract files from an OLE document such as a Microsoft Word DOCX file.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtdoc(PathExtractorUnit):
    """
    Extract files from an OLE document such as a Microsoft Word DOCX file.
    """

    @PathExtractorUnit.Requires('olefile', optional=False)
    def _olefile():
        import olefile
        return olefile

    def unpack(self, data):
        with MemoryFile(data) as stream:
            try:
                oledoc = self._olefile.OleFileIO(stream)
            except OSError as error:
                self.log_info(F'error, {error}, treating input as zip file')
                yield from xtzip().unpack(data)
                return
            for item in oledoc.listdir():
                if not item or not item[-1]:
                    continue
                path = '/'.join(item)
                olestream = oledoc.openstream(path)
                c0 = ord(item[-1][:1])
                if c0 < 20:
                    item[-1] = F'[{c0:d}]{item[-1][1:]}'
                    path = '/'.join(item)
                self.log_debug('exploring:', path)
                yield UnpackResult(path, olestream.read())

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtrtf (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.office.xtrtf and has the following commandline Interface:

usage: xtrtf [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
             [path [path ...]]

Extract embedded objects in RTF documents.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtrtf(PathExtractorUnit):
    """
    Extract embedded objects in RTF documents.
    """
    @PathExtractorUnit.Requires('oletools')
    def _oletools():
        import oletools
        import oletools.rtfobj
        import oletools.oleobj
        return oletools

    def unpack(self, data):
        parser = self._oletools.rtfobj.RtfObjParser(data)
        parser.parse()
        width = len(str(len(parser.objects)))
        for k, item in enumerate(parser.objects):
            item: RtfObject
            path = item.filename or F'carve{k:0{width}}.bin'
            data = item.rawdata
            meta = {}
            if item.is_ole:
                if item.format_id == self._oletools.oleobj.OleObject.TYPE_EMBEDDED:
                    meta['ole_type'] = 'EMBEDDED'
                elif item.format_id == self._oletools.oleobj.OleObject.TYPE_LINKED:
                    meta['ole_type'] = 'LINKED'
                if item.is_package:
                    meta['src_path'] = item.src_path
                    meta['tmp_path'] = item.temp_path
                if item.clsid is not None:
                    meta['ole_info'] = item.clsid_desc
                    meta['ole_guid'] = item.clsid
                meta['ole_name'] = item.class_name
            if item.oledata:
                data = item.oledata
                pos = item.rawdata.find(data)
                if pos > 0:
                    meta['raw_header'] = item.rawdata[:pos]
                if item.olepkgdata:
                    data = item.olepkgdata
                    pos = item.oledata.find(data)
                    if pos >= 0:
                        meta['ole_header'] = item.oledata[:pos]
            yield UnpackResult(path, data, **meta)

Ancestors

Class variables

var required_dependencies
var optional_dependencies

Inherited members

class xtvba (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.office.xtvba and has the following commandline Interface:

usage: xtvba [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
             [path [path ...]]

Extract VBA macro code from Office documents.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtvba(PathExtractorUnit):
    """
    Extract VBA macro code from Office documents.
    """
    @PathExtractorUnit.Requires('oletools')
    def _olevba():
        from oletools import olevba
        return olevba

    def unpack(self, data):
        parser = self._olevba.VBA_Parser(
            metavars(data).get('path', None), data=bytes(data), relaxed=True)
        for _, path, _, code in parser.extract_all_macros():
            yield UnpackResult(path, code.encode(self.codec))

Ancestors

Class variables

var required_dependencies
var optional_dependencies

Inherited members

class pcap (merge=False)

This unit is implemented in refinery.units.formats.pcap and has the following commandline Interface:

usage: pcap [-h] [-L] [-Q] [-0] [-v] [-m]

Performs TCP stream reassembly from packet capture (PCAP) files. By
default, the unit emits the parts of each TCP conversation, attaching
several pieces of metadata to each such output: Included are the source
and destination socket address as well as the variable stream which
identifies the conversation which it was part of. The chunks are returned
in the order that the bytes were exchanged between source and destination.
When the --merge parameter is specified, the unit instead collects all
bytes going forward and backwards, respectively, and emitting these as two
chunks, for each TCP conversation that took place.

optional arguments:
  -m, --merge    Merge both parts of each TCP conversation into one chunk.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class pcap(Unit):
    """
    Performs TCP stream reassembly from packet capture (PCAP) files. By default, the unit emits the parts of
    each TCP conversation, attaching several pieces of metadata to each such output: Included are the source
    and destination socket address as well as the variable `stream` which identifies the conversation which
    it was part of. The chunks are returned in the order that the bytes were exchanged between source and
    destination. When the `--merge` parameter is specified, the unit instead collects all bytes going forward
    and backwards, respectively, and emitting these as two chunks, for each TCP conversation that took place.
    """

    def __init__(self, merge: arg.switch('-m', help='Merge both parts of each TCP conversation into one chunk.') = False):
        super().__init__(merge=merge)

    @Unit.Requires('pypcapkit[scapy]')
    def _pcapkit():
        import pcapkit
        return pcapkit

    def process(self, data):
        pcapkit = self._pcapkit
        logging.getLogger('pcapkit').disabled = True
        merge = self.args.merge

        with VirtualFileSystem() as fs:
            vf = VirtualFile(fs, data, 'pcap')
            extraction = pcapkit.extract(
                fin=vf.path, engine='scapy', store=False, nofile=True, extension=False, tcp=True, strict=True)
            tcp: list = list(extraction.reassembly.tcp)

        count, convo = 0, None
        src_buffer = MemoryFile()
        dst_buffer = MemoryFile()
        for stream in tcp:
            this_convo = Conversation.FromID(stream.id)
            if this_convo != convo:
                if count and merge:
                    if src_buffer.tell():
                        yield self.labelled(src_buffer.getvalue(), **convo.src_to_dst())
                        src_buffer.truncate(0)
                    if dst_buffer.tell():
                        yield self.labelled(dst_buffer.getvalue(), **convo.dst_to_src())
                        dst_buffer.truncate(0)
                count = count + 1
                convo = this_convo
            for packet in stream.packets:
                if not merge:
                    yield self.labelled(packet.data, **this_convo.src_to_dst(), stream=count)
                elif this_convo.src == convo.src:
                    src_buffer.write(packet.data)
                elif this_convo.dst == convo.src:
                    dst_buffer.write(packet.data)
                else:
                    raise RuntimeError(F'direction of packet {convo!s} in conversation {count} is unknown')

Ancestors

Class variables

var required_dependencies
var optional_dependencies

Inherited members

class dnblob

This unit is implemented in refinery.units.formats.pe.dotnet.dnblob and has the following commandline Interface:

usage: dnblob [-h] [-L] [-Q] [-0] [-v]

Extracts all blobs defined in the #Blob stream of .NET executables.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class dnblob(Unit):
    """
    Extracts all blobs defined in the `#Blob` stream of .NET executables.
    """
    def process(self, data):
        header = DotNetHeader(data, parse_resources=False)
        for blob in header.meta.Streams.Blob.values():
            yield blob

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class dncfx

This unit is implemented in refinery.units.formats.pe.dotnet.dncfx and has the following commandline Interface:

usage: dncfx [-h] [-L] [-Q] [-0] [-v]

Extracts the encrypted strings from ConfuserX protected .NET execuctables.
Each decrypted string is returned as a single output.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class dncfx(Unit):
    """
    Extracts the encrypted strings from ConfuserX protected .NET execuctables.
    Each decrypted string is returned as a single output.
    """
    _PATTERN_ARRAY_INIT = (
        BR'(\x1F.|\x20....)'      # load size of a chunk
        BR'\x8D.\x00\x00\x01'     # create a UInt32 array
        BR'\x25'                  # dup
        BR'\xD0%s\x04'            # ldtoken: RVA of array data
        BR'\x28.\x00\x00.'        # call to InitializeArray
    )

    def process(self, data):
        header = DotNetHeader(data, parse_resources=False)
        decompressor = lzma()

        class IntegerAssignment:
            def __init__(self, match):
                self.offset = match.start()
                self.value, = struct.unpack('<I', match[1])

        def get_size(match):
            ins = match[1]
            fmt = '<B' if ins[0] == 0x1F else '<I'
            result, = struct.unpack(fmt, ins[-struct.calcsize(fmt):])
            return result

        potential_seeds = [
            IntegerAssignment(m)
            for m in re.finditer(br'\x20(....)', data, re.DOTALL)
        ]

        for entry in header.meta.RVAs:
            offset = header.pe.get_offset_from_rva(entry.RVA)
            index = struct.pack('<I', entry.Field.Index)
            strings_found = 0
            for match in re.finditer(self._PATTERN_ARRAY_INIT % re.escape(index[:3]), data, flags=re.DOTALL):
                ms = match.start()

                def sortkey(t):
                    weight = abs(t.offset - ms)
                    if t.offset < ms:
                        # this weights assignments after the array initialization down, but still
                        # prefers them over assignments that are further away than 2kb
                        weight += 2000
                    return weight

                size = get_size(match)

                if size % 0x10 or size > 10000:
                    continue

                self.log_debug(F'found RVA {entry.Field.Index} initialized with length {size}.')
                potential_seeds.sort(key=sortkey)

                for seed in potential_seeds[1:400]:
                    # the first potential_seed will always be the assignment of the size variable
                    ciphertext = data[offset:offset + size * 4]
                    key = self._xs64star(seed.value)
                    key = chunks.pack(key, 4) + ciphertext[:-0x40]
                    decrypted = strxor(key, ciphertext)
                    try:
                        decompressed = decompressor(decrypted)
                    except Exception as e:
                        self.log_debug(
                            F'decompression failed for seed {seed.value:08X} at offset {seed.offset:08X}: {e}')
                        continue
                    else:
                        self.log_info(
                            F'decompression worked for seed {seed.value:08X} at offset {seed.offset:08X}.')
                    if len(decompressed) < 0x100:
                        continue
                    for string in self._extract_strings(decompressed):
                        strings_found += 1
                        yield string
                    if strings_found > 10:
                        break

    def _xs64star(self, state):
        for i in range(16):
            state ^= (state >> 12) & 0xFFFFFFFF
            state ^= (state << 25) & 0xFFFFFFFF
            state ^= (state >> 27) & 0xFFFFFFFF
            yield state & 0xFFFFFFFF

    def _extract_strings(self, blob):
        reader = StreamReader(blob)
        while reader.tell() < len(blob):
            try:
                size = reader.expect(UInt32)
                string = reader.expect(StringPrimitive, size=size, codec='UTF8', align=4)
            except ParserEOF:
                return
            if string:
                yield string.encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class dnds (dereference=True, encode=None, digest=None)

This unit is implemented in refinery.units.formats.pe.dotnet.dnds and has the following commandline Interface:

usage: dnds [-h] [-L] [-Q] [-0] [-v] [-r] [-e UNIT | -d HASH]

Expects data that has been formatted with the .NET class BinaryFormatter.
The output is a representation of the deserialized data in JSON format.

optional arguments:
  -r, --keep-references  Do not resolve Object references in serialized
                         data.
  -e, --encode UNIT      Select an encoder unit used to represent binary
                         data in the JSON output. Available are: HEX, ESC,
                         URL, B64.
  -d, --digest HASH      Select a hashing algorithm to digest binary data;
                         instead of the data, only the hash will be
                         displayed. The available algorithms are: MD5,
                         CRC32, SHA1, SHA256, SHA512.

generic options:
  -h, --help             Show this help message and exit.
  -L, --lenient          Allow partial results as output.
  -Q, --quiet            Disables all log output.
  -0, --devnull          Do not produce any output.
  -v, --verbose          Specify up to two times to increase log level.
Expand source code Browse git
class dnds(JSONEncoderUnit):
    """
    Expects data that has been formatted with the .NET class `BinaryFormatter`.
    The output is a representation of the deserialized data in JSON format.
    """

    def __init__(
        self, dereference: arg.switch('-r', '--keep-references', off=True,
            help='Do not resolve Object references in serialized data.') = True,
        encode=None, digest=None
    ):
        super().__init__(encode=encode, digest=digest, dereference=dereference)

    def process(self, data):
        self.log_debug('initializing parser, will fail on malformed stream')
        bf = BinaryFormatterParser(
            data,
            keep_meta=True,
            dereference=self.args.dereference,
            ignore_errors=not self.log_debug(),
        )

        return self.to_json([
            {
                'Type': repr(record),
                'Data': record
            } for record in bf
        ])

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class dnfields (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.pe.dotnet.dnfields and has the following commandline Interface:

usage: dnfields [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
                [path [path ...]]

This unit can extract data from constant field variables in classes of
.NET executables. Since the .NET header stores only the offset and not the
size of constant fields, heuristics are used to search for opcode
sequences that load the data and additional heuristics are used to guess
the size of the data type.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class dnfields(PathExtractorUnit):
    """
    This unit can extract data from constant field variables in classes of .NET
    executables. Since the .NET header stores only the offset and not the size of
    constant fields, heuristics are used to search for opcode sequences that load
    the data and additional heuristics are used to guess the size of the data
    type.
    """
    _SIZEMAP = {
        '^s?byte$'       : 1,
        '^s?char$'       : 2,
        '^[us]?int.?16$' : 2,
        '^[us]?int.?32$' : 4,
        '^[us]?int.?64$' : 8,
    }

    def _guess_field_info(self, tables, data, t) -> FieldInfo:
        pattern = (
            BR'(\x20....|\x1F.)'                # ldc.i4  count
            BR'\x8D(...)([\x01\x02])'           # newarr  col|row
            BR'\x25'                            # dup
            BR'\xD0\x%02x\x%02x\x%02x\x04'      # ldtoken t
            BR'(?:.{0,12}'                      # ...
            BR'\x80(...)\x04)?' % (             # stsfld variable
                (t >> 0x00) & 0xFF,
                (t >> 0x08) & 0xFF,
                (t >> 0x10) & 0xFF
            )
        )
        for match in re.finditer(pattern, data, flags=re.DOTALL):
            count, j, r, name = match.groups()
            count, j, r = struct.unpack('<LLB', B'%s%s\0%s' % (count[1:].ljust(4, B'\0'), j, r))
            if name:
                try:
                    name = struct.unpack('<L', B'%s\0' % name)
                    name = name[0]
                    name = tables[4][name - 1].Name
                except Exception as E:
                    self.log_info(F'attempt to parse field name failed: {E!s}')
                    name = None
            element = tables[r][j - 1]
            for pattern, size in self._SIZEMAP.items():
                if re.match(pattern, element.TypeName, flags=re.IGNORECASE):
                    return FieldInfo(element.TypeName, count, size, name)

    def unpack(self, data):
        header = DotNetHeader(data, parse_resources=False)
        tables = header.meta.Streams.Tables
        fields = tables.FieldRVA
        if not fields:
            return
        iwidth = len(str(len(fields)))
        rwidth = max(len(F'{field.RVA:X}') for field in fields)
        rwidth = max(rwidth, 4)

        for k, rv in enumerate(fields):
            index = rv.Field.Index
            field = tables.Field[index - 1]
            fname = field.Name
            if len(field.Signature) == 2:
                # Crude signature parser for non-array case. Reference:
                # https://www.codeproject.com/Articles/42649/NET-File-Format-Signatures-Under-the-Hood-Part-1
                # https://www.codeproject.com/Articles/42655/NET-file-format-Signatures-under-the-hood-Part-2
                guess = {
                    0x03: FieldInfo('Char',   1, 1, None),  # noqa
                    0x04: FieldInfo('SByte',  1, 1, None),  # noqa
                    0x05: FieldInfo('Byte',   1, 1, None),  # noqa
                    0x06: FieldInfo('Int16',  1, 2, None),  # noqa
                    0x07: FieldInfo('UInt16', 1, 2, None),  # noqa
                    0x08: FieldInfo('Int32',  1, 4, None),  # noqa
                    0x09: FieldInfo('UInt32', 1, 4, None),  # noqa
                    0x0A: FieldInfo('Int64',  1, 8, None),  # noqa
                    0x0B: FieldInfo('UInt64', 1, 8, None),  # noqa
                    0x0C: FieldInfo('Single', 1, 4, None),  # noqa
                    0x0D: FieldInfo('Double', 1, 8, None),  # noqa
                }.get(field.Signature[1], None)
            else:
                guess = self._guess_field_info(tables, data, index)
            if guess is None:
                self.log_debug(lambda: F'field {k:0{iwidth}d} name {field.Signature}: unable to guess type information')
                continue
            totalsize = guess.count * guess.size
            if guess.name is not None:
                fname = guess.name
            if not fname.isprintable():
                fname = F'F{rv.RVA:0{rwidth}X}'
            name = F'{fname}.{guess.type}'
            if guess.count > 1:
                name += F'[{guess.count}]'
            self.log_info(lambda: F'field {k:0{iwidth}d} at RVA 0x{rv.RVA:04X} of type {guess.type}, count: {guess.count}, name: {fname}')
            offset = header.pe.get_offset_from_rva(rv.RVA)
            yield UnpackResult(name, lambda t=offset, s=totalsize: data[t:t + s])

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class dnhdr (resources=False, encode=None, digest=None)

This unit is implemented in refinery.units.formats.pe.dotnet.dnhdr and has the following commandline Interface:

usage: dnhdr [-h] [-L] [-Q] [-0] [-v] [-r] [-e UNIT | -d HASH]

Expects data that has been formatted with the BinaryFormatter class. The
output is a representation of the deserialized data in JSON format.

optional arguments:
  -r, --resources    Also parse .NET resources.
  -e, --encode UNIT  Select an encoder unit used to represent binary data
                     in the JSON output. Available are: HEX, ESC, URL,
                     B64.
  -d, --digest HASH  Select a hashing algorithm to digest binary data;
                     instead of the data, only the hash will be displayed.
                     The available algorithms are: MD5, CRC32, SHA1,
                     SHA256, SHA512.

generic options:
  -h, --help         Show this help message and exit.
  -L, --lenient      Allow partial results as output.
  -Q, --quiet        Disables all log output.
  -0, --devnull      Do not produce any output.
  -v, --verbose      Specify up to two times to increase log level.
Expand source code Browse git
class dnhdr(JSONEncoderUnit):
    """
    Expects data that has been formatted with the `BinaryFormatter` class. The
    output is a representation of the deserialized data in JSON format.
    """
    def __init__(
        self,
        resources: arg.switch('-r', '--resources', help='Also parse .NET resources.') = False,
        encode=None, digest=None
    ):
        super().__init__(encode=encode, digest=digest, resources=resources)

    def process(self, data):
        dn = DotNetHeader(data, parse_resources=self.args.resources)
        dn = {
            'Head': dn.head,
            'Meta': dn.meta
        }

        if self.args.resources:
            dn['RSRC'] = dn.resources

        return self.to_json(dn)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class dnmr (*paths, list=False, join_path=False, drop_path=False, path=b'name', raw=False)

This unit is implemented in refinery.units.formats.pe.dotnet.dnmr and has the following commandline Interface:

usage: dnmr [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-P NAME] [-r]
            [path [path ...]]

Extracts subfiles from .NET managed resources.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "name".
  -r, --raw        Do not deserialize the managed resource entry data.

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class dnmr(PathExtractorUnit):
    """
    Extracts subfiles from .NET managed resources.
    """
    def __init__(
        self, *paths, list=False, join_path=False, drop_path=False, path=b'name',
        raw: arg.switch('-r', help='Do not deserialize the managed resource entry data.') = False
    ):
        super().__init__(*paths, list=list, join_path=join_path, drop_path=drop_path, path=path, raw=raw)

    def unpack(self, data):
        try:
            managed = NetStructuredResources(data)
        except NoManagedResource:
            managed = None
        if not managed:
            raise RefineryPartialResult('no managed resources found', partial=data)
        for entry in managed:
            if entry.Error:
                self.log_warn(F'entry {entry.Name} carried error message: {entry.Error}')
            data = entry.Data
            if not self.args.raw:
                if isinstance(entry.Value, str):
                    data = entry.Value.encode('utf-16le')
                elif isbuffer(entry.Value):
                    data = entry.Value
            yield UnpackResult(entry.Name, data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class dnrc (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.pe.dotnet.dnrc and has the following commandline Interface:

usage: dnrc [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
            [path [path ...]]

Extracts all .NET resources whose name matches any of the given patterns
and outputs them. Use the dnmr unit to extract subfiles from managed .NET
resources.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class dnrc(PathExtractorUnit):
    """
    Extracts all .NET resources whose name matches any of the given patterns
    and outputs them. Use the `refinery.units.formats.pe.dotnet.dnmr` unit to
    extract subfiles from managed .NET resources.
    """
    def unpack(self, data):
        header = DotNetHeader(data)

        if not header.resources:
            if self.args.list:
                return
            raise ValueError('This file contains no resources.')

        for resource in header.resources:
            yield UnpackResult(resource.Name, resource.Data)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class dnstr (user=True, meta=True)

This unit is implemented in refinery.units.formats.pe.dotnet.dnstr and has the following commandline Interface:

usage: dnstr [-h] [-L] [-Q] [-0] [-v] [-m | -u]

Extracts all strings defined in the #Strings and #US streams of .NET
executables.

optional arguments:
  -m, --meta     Only extract from #Strings.
  -u, --user     Only extract from #US.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class dnstr(Unit):
    """
    Extracts all strings defined in the `#Strings` and `#US` streams of .NET
    executables.
    """

    def __init__(
        self,
        user: arg.switch('-m', '--meta', off=True, group='HEAP', help='Only extract from #Strings.') = True,
        meta: arg.switch('-u', '--user', off=True, group='HEAP', help='Only extract from #US.') = True,
    ):
        if not meta and not user:
            raise ValueError('Either ascii or utf16 strings must be enabled.')
        super().__init__(meta=meta, user=user)

    def process(self, data):
        header = DotNetHeader(data, parse_resources=False)
        if self.args.meta:
            for string in header.meta.Streams.Strings.values():
                yield string.encode(self.codec)
        if self.args.user:
            for string in header.meta.Streams.US.values():
                yield string.encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class pemeta (all=True, debug=False, dotnet=False, signatures=False, timestamps=False, version=False, header=False, exports=False, imports=False, tabular=False, timeraw=False)

This unit is implemented in refinery.units.formats.pe.pemeta and has the following commandline Interface:

usage: pemeta [-h] [-L] [-Q] [-0] [-v] [-c] [-D] [-N] [-S] [-T] [-V] [-H]
              [-E] [-I] [-t] [-r]

Extract metadata from PE files. By default, all information except for
imports and exports are extracted.

optional arguments:
  -c, --custom      Unless enabled, all default categories will be
                    extracted.
  -D, --debug       Parse the PDB path from the debug directory.
  -N, --dotnet      Parse the .NET header.
  -S, --signatures  Parse digital signatures.
  -T, --timestamps  Extract time stamps.
  -V, --version     Parse the VERSION resource.
  -H, --header      Parse data from the PE header.
  -E, --exports     List all exported functions.
  -I, --imports     List all imported functions.
  -t, --tabular     Print information in a table rather than as JSON
  -r, --timeraw     Extract time stamps as numbers instead of human-
                    readable format.

generic options:
  -h, --help        Show this help message and exit.
  -L, --lenient     Allow partial results as output.
  -Q, --quiet       Disables all log output.
  -0, --devnull     Do not produce any output.
  -v, --verbose     Specify up to two times to increase log level.
Expand source code Browse git
class pemeta(Unit):
    """
    Extract metadata from PE files. By default, all information except for imports and exports are
    extracted.
    """
    def __init__(
        self, all : arg('-c', '--custom',
            help='Unless enabled, all default categories will be extracted.') = True,
        debug      : arg('-D', help='Parse the PDB path from the debug directory.') = False,
        dotnet     : arg('-N', help='Parse the .NET header.') = False,
        signatures : arg('-S', help='Parse digital signatures.') = False,
        timestamps : arg('-T', help='Extract time stamps.') = False,
        version    : arg('-V', help='Parse the VERSION resource.') = False,
        header     : arg('-H', help='Parse data from the PE header.') = False,
        exports    : arg('-E', help='List all exported functions.') = False,
        imports    : arg('-I', help='List all imported functions.') = False,
        tabular    : arg('-t', help='Print information in a table rather than as JSON') = False,
        timeraw    : arg('-r', help='Extract time stamps as numbers instead of human-readable format.') = False,
    ):
        super().__init__(
            debug=all or debug,
            dotnet=all or dotnet,
            signatures=all or signatures,
            timestamps=all or timestamps,
            version=all or version,
            header=all or header,
            imports=imports,
            exports=exports,
            timeraw=timeraw,
            tabular=tabular,
        )

    @classmethod
    def _ensure_string(cls, x):
        if not isinstance(x, str):
            x = repr(x) if not isinstance(x, bytes) else x.decode(cls.codec, 'backslashreplace')
        return x

    @classmethod
    def _parse_pedict(cls, bin):
        return dict((
            cls._ensure_string(key),
            cls._ensure_string(val)
        ) for key, val in bin.items() if val)

    @classmethod
    def parse_signature(cls, data: bytearray) -> dict:
        """
        Extracts a JSON-serializable and human readable dictionary with information about
        time stamp and code signing certificates that are attached to the input PE file.
        """
        from refinery.units.formats.pkcs7 import pkcs7
        from refinery.units.formats.pe.pesig import pesig

        try:
            signature = data | pesig | pkcs7 | json.loads
        except Exception as E:
            raise ValueError(F'PKCS7 parser failed with error: {E!s}')

        info = {}

        def find_timestamps(entry):
            if isinstance(entry, dict):
                if set(entry.keys()) == {'type', 'value'}:
                    if entry['type'] == 'signing_time':
                        return {'Timestamp': entry['value']}
                for value in entry.values():
                    result = find_timestamps(value)
                    if result is None:
                        continue
                    with suppress(KeyError):
                        result.setdefault('TimestampIssuer', entry['sid']['issuer']['common_name'])
                    return result
            elif isinstance(entry, list):
                for value in entry:
                    result = find_timestamps(value)
                    if result is None:
                        continue
                    return result

        timestamp_info = find_timestamps(signature)
        if timestamp_info is not None:
            info.update(timestamp_info)

        try:
            certificates = signature['content']['certificates']
        except KeyError:
            return info

        if len(certificates) == 1:
            main_certificate = certificates[0]['tbs_certificate']
        else:
            certificates_with_extended_use = []
            main_certificate = None
            for certificate in certificates:
                with suppress(Exception):
                    crt = certificate['tbs_certificate']
                    ext = [e for e in crt['extensions'] if e['extn_id'] == 'extended_key_usage' and e['extn_value'] != ['time_stamping']]
                    key = [e for e in crt['extensions'] if e['extn_id'] == 'key_usage']
                    if ext:
                        certificates_with_extended_use.append(certificate)
                    if any('key_cert_sign' in e['extn_value'] for e in key):
                        continue
                    if any('code_signing' in e['extn_value'] for e in ext):
                        main_certificate = certificate
                        break
            if main_certificate is None and len(certificates_with_extended_use) == 1:
                main_certificate = certificates_with_extended_use[0]
        if main_certificate:
            crt = main_certificate['tbs_certificate']
            serial = crt['serial_number']
            if not isinstance(serial, int):
                serial = int(serial, 0)
            serial = F'{serial:x}'
            if len(serial) % 2:
                serial = '0' + serial
            subject = crt['subject']
            location = [subject.get(t, '') for t in ('locality_name', 'state_or_province_name', 'country_name')]
            info.update(Subject=subject['common_name'])
            if any(location):
                info.update(SubjectLocation=', '.join(filter(None, location)))
            info.update(
                Issuer=crt['issuer']['common_name'], Fingerprint=main_certificate['fingerprint'], Serial=serial)
            return info
        return info

    @classmethod
    def parse_version(cls, pe: PE, data=None) -> dict:
        """
        Extracts a JSON-serializable and human readable dictionary with information about
        the version resource of an input PE file, if available.
        """
        pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']])
        for FileInfo in pe.FileInfo:
            for FileInfoEntry in FileInfo:
                with suppress(AttributeError):
                    for StringTableEntry in FileInfoEntry.StringTable:
                        StringTableEntryParsed = cls._parse_pedict(StringTableEntry.entries)
                        with suppress(AttributeError):
                            LangID = StringTableEntry.entries.get('LangID', None) or StringTableEntry.LangID
                            LangID = int(LangID, 0x10) if not isinstance(LangID, int) else LangID
                            LangHi = LangID >> 0x10
                            LangLo = LangID & 0xFFFF
                            Language = cls._LCID.get(LangHi, 'Language Neutral')
                            Charset = cls._CHARSET.get(LangLo, 'Unknown Charset')
                            StringTableEntryParsed.update(
                                LangID=F'{LangID:08X}',
                                Charset=Charset,
                                Language=Language
                            )
                        return StringTableEntryParsed

    @classmethod
    def parse_exports(cls, pe: PE, data=None) -> list:
        pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT']])
        info = []
        for k, exp in enumerate(pe.DIRECTORY_ENTRY_EXPORT.symbols):
            if not exp.name:
                info.append(F'@{k}')
            else:
                info.append(exp.name.decode('ascii'))
        return info

    @classmethod
    def parse_imports(cls, pe: PE, data=None) -> list:
        pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']])
        info = {}
        for idd in pe.DIRECTORY_ENTRY_IMPORT:
            dll = idd.dll.decode('ascii')
            if dll.lower().endswith('.dll'):
                dll = dll[:-4]
            imports = info.setdefault(dll, [])
            for imp in idd.imports:
                name = imp.name and imp.name.decode('ascii') or F'@{imp.ordinal}'
                imports.append(name)
        return info

    def parse_header(cls, pe: PE, data=None) -> dict:
        def format_macro_name(name: str, prefix, convert=True):
            name = name.split('_')[prefix:]
            if convert:
                for k, part in enumerate(name):
                    name[k] = part.upper() if len(part) <= 3 else part.capitalize()
            return ' '.join(name)

        major = pe.OPTIONAL_HEADER.MajorOperatingSystemVersion
        minor = pe.OPTIONAL_HEADER.MinorOperatingSystemVersion
        version = cls._WINVER.get(major, {0: 'Unknown'})

        try:
            MinimumOS = version[minor]
        except LookupError:
            MinimumOS = version[0]
        header_information = {
            'Machine': format_macro_name(MACHINE_TYPE[pe.FILE_HEADER.Machine], 3, False),
            'Subsystem': format_macro_name(SUBSYSTEM_TYPE[pe.OPTIONAL_HEADER.Subsystem], 2),
            'MinimumOS': MinimumOS,
        }

        rich_header = pe.parse_rich_header()
        rich = []
        if rich_header:
            it = rich_header.get('values', [])
            for idv in it[0::2]:
                info = cls._RICH_HEADER.get(idv, None)
                if info is None:
                    info = guess_version(idv)
                if not info:
                    continue
                rich.append(F'[{idv:08x}] {info}')
            header_information['RICH'] = rich

        characteristics = [
            name for name, mask in image_characteristics
            if pe.FILE_HEADER.Characteristics & mask
        ]
        for typespec, flag in {
            'EXE': 'IMAGE_FILE_EXECUTABLE_IMAGE',
            'DLL': 'IMAGE_FILE_DLL',
            'SYS': 'IMAGE_FILE_SYSTEM'
        }.items():
            if flag in characteristics:
                header_information['Type'] = typespec
        address_width = None
        if 'IMAGE_FILE_16BIT_MACHINE' in characteristics:
            address_width = 4
        elif pe.FILE_HEADER.Machine == MACHINE_TYPE['IMAGE_FILE_MACHINE_I386']:
            address_width = 8
        elif pe.FILE_HEADER.Machine == MACHINE_TYPE['IMAGE_FILE_MACHINE_AMD64']:
            address_width = 16
        if address_width:
            header_information['Bits'] = 4 * address_width
        else:
            address_width = 16
        header_information['ImageBase'] = F'0x{pe.OPTIONAL_HEADER.ImageBase:0{address_width}}'
        return header_information

    @classmethod
    def parse_time_stamps(cls, pe: PE, raw_time_stamps: bool) -> dict:
        """
        Extracts time stamps from the PE header (link time), as well as from the imports,
        exports, debug, and resource directory. The resource time stamp is also parsed as
        a DOS time stamp and returned as the "Delphi" time stamp.
        """
        if raw_time_stamps:
            def dt(ts): return ts
        else:
            def dt(ts):
                # parse as UTC but then forget time zone information
                return datetime.fromtimestamp(
                    ts,
                    tz=timezone.utc
                ).replace(tzinfo=None)

        pe.parse_data_directories(directories=[
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'],
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG'],
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']
        ])

        info = {}

        with suppress(AttributeError):
            info.update(Linker=dt(pe.FILE_HEADER.TimeDateStamp))

        with suppress(AttributeError):
            for entry in pe.DIRECTORY_ENTRY_IMPORT:
                info.update(Import=dt(entry.TimeDateStamp()))

        with suppress(AttributeError):
            for entry in pe.DIRECTORY_ENTRY_DEBUG:
                info.update(DbgDir=dt(entry.struct.TimeDateStamp))

        with suppress(AttributeError):
            Export = pe.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp
            if Export: info.update(Export=dt(Export))

        with suppress(AttributeError):
            res_timestamp = pe.DIRECTORY_ENTRY_RESOURCE.struct.TimeDateStamp
            if res_timestamp:
                with suppress(ValueError):
                    from refinery.units.misc.datefix import datefix
                    dos = datefix.dostime(res_timestamp)
                    info.update(Delphi=dos)
                    info.update(RsrcTS=dt(res_timestamp))

        def norm(value):
            if isinstance(value, int):
                return value
            return str(value)

        return {key: norm(value) for key, value in info.items()}

    @classmethod
    def parse_dotnet(cls, pe: PE, data):
        """
        Extracts a JSON-serializable and human readable dictionary with information about
        the .NET metadata of an input PE file.
        """
        header = DotNetHeader(data, pe=pe)
        tables = header.meta.Streams.Tables
        info = dict(
            RuntimeVersion=F'{header.head.MajorRuntimeVersion}.{header.head.MinorRuntimeVersion}',
            Version=F'{header.meta.MajorVersion}.{header.meta.MinorVersion}',
            VersionString=header.meta.VersionString
        )

        info['Flags'] = [name for name, check in header.head.KnownFlags.items() if check]

        if len(tables.Assembly) == 1:
            assembly = tables.Assembly[0]
            info.update(
                AssemblyName=assembly.Name,
                Release='{}.{}.{}.{}'.format(
                    assembly.MajorVersion,
                    assembly.MinorVersion,
                    assembly.BuildNumber,
                    assembly.RevisionNumber
                )
            )

        try:
            entry = header.head.EntryPointToken + pe.OPTIONAL_HEADER.ImageBase
            info.update(EntryPoint=F'0x{entry:08X}')
        except AttributeError:
            pass

        if len(tables.Module) == 1:
            module = tables.Module[0]
            info.update(ModuleName=module.Name)

        return info

    @classmethod
    def parse_debug(cls, pe: PE, data=None):
        result = {}
        pe.parse_data_directories(directories=[
            DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG']])
        for dbg in pe.DIRECTORY_ENTRY_DEBUG:
            if DEBUG_TYPE.get(dbg.struct.Type, None) != 'IMAGE_DEBUG_TYPE_CODEVIEW':
                continue
            with suppress(Exception):
                pdb = dbg.entry.PdbFileName
                if 0 in pdb:
                    pdb = pdb[:pdb.index(0)]
                result.update(
                    PdbPath=pdb.decode(cls.codec),
                    PdbAge=dbg.entry.Age
                )
        return result

    def process(self, data):
        result = {}
        pe = PE(data=data, fast_load=True)

        for switch, resolver, name in [
            (self.args.debug,   self.parse_debug,    'Debug'),    # noqa
            (self.args.dotnet,  self.parse_dotnet,   'DotNet'),   # noqa
            (self.args.header,  self.parse_header,   'Header'),   # noqa
            (self.args.version, self.parse_version,  'Version'),  # noqa
            (self.args.imports, self.parse_imports,  'Imports'),  # noqa
            (self.args.exports, self.parse_exports,  'Exports'),  # noqa
        ]:
            if not switch:
                continue
            self.log_debug(F'parsing: {name}')
            try:
                info = resolver(pe, data)
            except Exception as E:
                self.log_info(F'failed to obtain {name}: {E!s}')
                continue
            if info:
                result[name] = info

        signature = {}

        if self.args.timestamps or self.args.signatures:
            with suppress(Exception):
                signature = self.parse_signature(data)

        if self.args.timestamps:
            ts = self.parse_time_stamps(pe, self.args.timeraw)
            with suppress(KeyError):
                ts.update(Signed=signature['Timestamp'])
            result.update(TimeStamp=ts)

        if signature and self.args.signatures:
            result['Signature'] = signature

        if result:
            yield from ppjson(tabular=self.args.tabular)._pretty_output(result, indent=4, ensure_ascii=False)

    _LCID = {
        0x0436: 'Afrikaans-South Africa',
        0x041c: 'Albanian-Albania',
        0x045e: 'Amharic-Ethiopia',
        0x0401: 'Arabic (Saudi Arabia)',
        0x1401: 'Arabic (Algeria)',
        0x3c01: 'Arabic (Bahrain)',
        0x0c01: 'Arabic (Egypt)',
        0x0801: 'Arabic (Iraq)',
        0x2c01: 'Arabic (Jordan)',
        0x3401: 'Arabic (Kuwait)',
        0x3001: 'Arabic (Lebanon)',
        0x1001: 'Arabic (Libya)',
        0x1801: 'Arabic (Morocco)',
        0x2001: 'Arabic (Oman)',
        0x4001: 'Arabic (Qatar)',
        0x2801: 'Arabic (Syria)',
        0x1c01: 'Arabic (Tunisia)',
        0x3801: 'Arabic (U.A.E.)',
        0x2401: 'Arabic (Yemen)',
        0x042b: 'Armenian-Armenia',
        0x044d: 'Assamese',
        0x082c: 'Azeri (Cyrillic)',
        0x042c: 'Azeri (Latin)',
        0x042d: 'Basque',
        0x0423: 'Belarusian',
        0x0445: 'Bengali (India)',
        0x0845: 'Bengali (Bangladesh)',
        0x141A: 'Bosnian (Bosnia/Herzegovina)',
        0x0402: 'Bulgarian',
        0x0455: 'Burmese',
        0x0403: 'Catalan',
        0x045c: 'Cherokee-United States',
        0x0804: 'Chinese (People\'s Republic of China)',
        0x1004: 'Chinese (Singapore)',
        0x0404: 'Chinese (Taiwan)',
        0x0c04: 'Chinese (Hong Kong SAR)',
        0x1404: 'Chinese (Macao SAR)',
        0x041a: 'Croatian',
        0x101a: 'Croatian (Bosnia/Herzegovina)',
        0x0405: 'Czech',
        0x0406: 'Danish',
        0x0465: 'Divehi',
        0x0413: 'Dutch-Netherlands',
        0x0813: 'Dutch-Belgium',
        0x0466: 'Edo',
        0x0409: 'English (United States)',
        0x0809: 'English (United Kingdom)',
        0x0c09: 'English (Australia)',
        0x2809: 'English (Belize)',
        0x1009: 'English (Canada)',
        0x2409: 'English (Caribbean)',
        0x3c09: 'English (Hong Kong SAR)',
        0x4009: 'English (India)',
        0x3809: 'English (Indonesia)',
        0x1809: 'English (Ireland)',
        0x2009: 'English (Jamaica)',
        0x4409: 'English (Malaysia)',
        0x1409: 'English (New Zealand)',
        0x3409: 'English (Philippines)',
        0x4809: 'English (Singapore)',
        0x1c09: 'English (South Africa)',
        0x2c09: 'English (Trinidad)',
        0x3009: 'English (Zimbabwe)',
        0x0425: 'Estonian',
        0x0438: 'Faroese',
        0x0429: 'Farsi',
        0x0464: 'Filipino',
        0x040b: 'Finnish',
        0x040c: 'French (France)',
        0x080c: 'French (Belgium)',
        0x2c0c: 'French (Cameroon)',
        0x0c0c: 'French (Canada)',
        0x240c: 'French (Democratic Rep. of Congo)',
        0x300c: 'French (Cote d\'Ivoire)',
        0x3c0c: 'French (Haiti)',
        0x140c: 'French (Luxembourg)',
        0x340c: 'French (Mali)',
        0x180c: 'French (Monaco)',
        0x380c: 'French (Morocco)',
        0xe40c: 'French (North Africa)',
        0x200c: 'French (Reunion)',
        0x280c: 'French (Senegal)',
        0x100c: 'French (Switzerland)',
        0x1c0c: 'French (West Indies)',
        0x0462: 'Frisian-Netherlands',
        0x0467: 'Fulfulde-Nigeria',
        0x042f: 'FYRO Macedonian',
        0x083c: 'Gaelic (Ireland)',
        0x043c: 'Gaelic (Scotland)',
        0x0456: 'Galician',
        0x0437: 'Georgian',
        0x0407: 'German (Germany)',
        0x0c07: 'German (Austria)',
        0x1407: 'German (Liechtenstein)',
        0x1007: 'German (Luxembourg)',
        0x0807: 'German (Switzerland)',
        0x0408: 'Greek',
        0x0474: 'Guarani-Paraguay',
        0x0447: 'Gujarati',
        0x0468: 'Hausa-Nigeria',
        0x0475: 'Hawaiian (United States)',
        0x040d: 'Hebrew',
        0x0439: 'Hindi',
        0x040e: 'Hungarian',
        0x0469: 'Ibibio-Nigeria',
        0x040f: 'Icelandic',
        0x0470: 'Igbo-Nigeria',
        0x0421: 'Indonesian',
        0x045d: 'Inuktitut',
        0x0410: 'Italian (Italy)',
        0x0810: 'Italian (Switzerland)',
        0x0411: 'Japanese',
        0x044b: 'Kannada',
        0x0471: 'Kanuri-Nigeria',
        0x0860: 'Kashmiri',
        0x0460: 'Kashmiri (Arabic)',
        0x043f: 'Kazakh',
        0x0453: 'Khmer',
        0x0457: 'Konkani',
        0x0412: 'Korean',
        0x0440: 'Kyrgyz (Cyrillic)',
        0x0454: 'Lao',
        0x0476: 'Latin',
        0x0426: 'Latvian',
        0x0427: 'Lithuanian',
        0x043e: 'Malay-Malaysia',
        0x083e: 'Malay-Brunei Darussalam',
        0x044c: 'Malayalam',
        0x043a: 'Maltese',
        0x0458: 'Manipuri',
        0x0481: 'Maori-New Zealand',
        0x044e: 'Marathi',
        0x0450: 'Mongolian (Cyrillic)',
        0x0850: 'Mongolian (Mongolian)',
        0x0461: 'Nepali',
        0x0861: 'Nepali-India',
        0x0414: 'Norwegian (Bokmål)',
        0x0814: 'Norwegian (Nynorsk)',
        0x0448: 'Oriya',
        0x0472: 'Oromo',
        0x0479: 'Papiamentu',
        0x0463: 'Pashto',
        0x0415: 'Polish',
        0x0416: 'Portuguese-Brazil',
        0x0816: 'Portuguese-Portugal',
        0x0446: 'Punjabi',
        0x0846: 'Punjabi (Pakistan)',
        0x046B: 'Quecha (Bolivia)',
        0x086B: 'Quecha (Ecuador)',
        0x0C6B: 'Quecha (Peru)',
        0x0417: 'Rhaeto-Romanic',
        0x0418: 'Romanian',
        0x0818: 'Romanian (Moldava)',
        0x0419: 'Russian',
        0x0819: 'Russian (Moldava)',
        0x043b: 'Sami (Lappish)',
        0x044f: 'Sanskrit',
        0x046c: 'Sepedi',
        0x0c1a: 'Serbian (Cyrillic)',
        0x081a: 'Serbian (Latin)',
        0x0459: 'Sindhi (India)',
        0x0859: 'Sindhi (Pakistan)',
        0x045b: 'Sinhalese-Sri Lanka',
        0x041b: 'Slovak',
        0x0424: 'Slovenian',
        0x0477: 'Somali',
        0x042e: 'Sorbian',
        0x0c0a: 'Spanish (Modern Sort)',
        0x040a: 'Spanish (Traditional Sort)',
        0x2c0a: 'Spanish (Argentina)',
        0x400a: 'Spanish (Bolivia)',
        0x340a: 'Spanish (Chile)',
        0x240a: 'Spanish (Colombia)',
        0x140a: 'Spanish (Costa Rica)',
        0x1c0a: 'Spanish (Dominican Republic)',
        0x300a: 'Spanish (Ecuador)',
        0x440a: 'Spanish (El Salvador)',
        0x100a: 'Spanish (Guatemala)',
        0x480a: 'Spanish (Honduras)',
        0x580a: 'Spanish (Latin America)',
        0x080a: 'Spanish (Mexico)',
        0x4c0a: 'Spanish (Nicaragua)',
        0x180a: 'Spanish (Panama)',
        0x3c0a: 'Spanish (Paraguay)',
        0x280a: 'Spanish (Peru)',
        0x500a: 'Spanish (Puerto Rico)',
        0x540a: 'Spanish (United States)',
        0x380a: 'Spanish (Uruguay)',
        0x200a: 'Spanish (Venezuela)',
        0x0430: 'Sutu',
        0x0441: 'Swahili',
        0x041d: 'Swedish',
        0x081d: 'Swedish-Finland',
        0x045a: 'Syriac',
        0x0428: 'Tajik',
        0x045f: 'Tamazight (Arabic)',
        0x085f: 'Tamazight (Latin)',
        0x0449: 'Tamil',
        0x0444: 'Tatar',
        0x044a: 'Telugu',
        0x041e: 'Thai',
        0x0851: 'Tibetan (Bhutan)',
        0x0451: 'Tibetan (People\'s Republic of China)',
        0x0873: 'Tigrigna (Eritrea)',
        0x0473: 'Tigrigna (Ethiopia)',
        0x0431: 'Tsonga',
        0x0432: 'Tswana',
        0x041f: 'Turkish',
        0x0442: 'Turkmen',
        0x0480: 'Uighur-China',
        0x0422: 'Ukrainian',
        0x0420: 'Urdu',
        0x0820: 'Urdu-India',
        0x0843: 'Uzbek (Cyrillic)',
        0x0443: 'Uzbek (Latin)',
        0x0433: 'Venda',
        0x042a: 'Vietnamese',
        0x0452: 'Welsh',
        0x0434: 'Xhosa',
        0x0478: 'Yi',
        0x043d: 'Yiddish',
        0x046a: 'Yoruba',
        0x0435: 'Zulu',
        0x04ff: 'HID (Human Interface DeVITe)'
    }

    _CHARSET = {
        0x0000: '7-bit ASCII',
        0x03A4: 'Japan (Shift ? JIS X-0208)',
        0x03B5: 'Korea (Shift ? KSC 5601)',
        0x03B6: 'Taiwan (Big5)',
        0x04B0: 'Unicode',
        0x04E2: 'Latin-2 (Eastern European)',
        0x04E3: 'Cyrillic',
        0x04E4: 'Multilingual',
        0x04E5: 'Greek',
        0x04E6: 'Turkish',
        0x04E7: 'Hebrew',
        0x04E8: 'Arabic',
    }

    _WINVER = {
        3: {
            0x00: 'Windows NT 3',
            0x0A: 'Windows NT 3.1',
            0x32: 'Windows NT 3.5',
            0x33: 'Windows NT 3.51',
        },
        4: {
            0x00: 'Windows 95',
            0x0A: 'Windows 98',
        },
        5: {
            0x00: 'Windows 2000',
            0x5A: 'Windows Me',
            0x01: 'Windows XP',
            0x02: 'Windows Server 2003',
        },
        6: {
            0x00: 'Windows Vista',
            0x01: 'Windows 7',
            0x02: 'Windows 8',
            0x03: 'Windows 8.1',
        },
        10: {
            0x00: 'Windows 10',
        }
    }

    # copy of https://raw.githubusercontent.com/dishather/richprint/master/comp_id.txt
    _RICH_HEADER = {
        # Objects without @comp.id are collected under this record
        0x00010000: VersionInfo(VIT.ERR, 'Unmarked objects'),
        0x00000000: VersionInfo(VIT.ERR, 'Unmarked objects (old)'),

        # MSVS2019 v16.9.2
        0x010474d9: VersionInfo(VIT.OBJ, 'VS2019 v16.9.2 build 29913'),
        0x010374d9: VersionInfo(VIT.ASM, 'VS2019 v16.9.2 build 29913'),
        0x010574d9: VersionInfo(VIT.CPP, 'VS2019 v16.9.2 build 29913'),
        0x00ff74d9: VersionInfo(VIT.RES, 'VS2019 v16.9.2 build 29913'),
        0x010274d9: VersionInfo(VIT.LNK, 'VS2019 v16.9.2 build 29913'),
        0x010074d9: VersionInfo(VIT.EXP, 'VS2019 v16.9.2 build 29913'),
        0x010174d9: VersionInfo(VIT.IMP, 'VS2019 v16.9.2 build 29913'),

        # MSVS2019 v16.9.2
        # from https://walbourn.github.io/vs-2019-update-9/
        0x010474d6: VersionInfo(VIT.OBJ, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x010374d6: VersionInfo(VIT.ASM, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x010574d6: VersionInfo(VIT.CPP, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x00ff74d6: VersionInfo(VIT.RES, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x010274d6: VersionInfo(VIT.LNK, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x010074d6: VersionInfo(VIT.EXP, 'VS2019 v16.9.0 build 29910', interpolated=True),
        0x010174d6: VersionInfo(VIT.IMP, 'VS2019 v16.9.0 build 29910', interpolated=True),

        # MSVS2019 v16.8.5
        0x01047299: VersionInfo(VIT.OBJ, 'VS2019 v16.8.5 build 29337'),
        0x01037299: VersionInfo(VIT.ASM, 'VS2019 v16.8.5 build 29337'),
        0x01057299: VersionInfo(VIT.CPP, 'VS2019 v16.8.5 build 29337'),
        0x00ff7299: VersionInfo(VIT.RES, 'VS2019 v16.8.5 build 29337'),
        0x01027299: VersionInfo(VIT.LNK, 'VS2019 v16.8.5 build 29337'),
        0x01007299: VersionInfo(VIT.EXP, 'VS2019 v16.8.5 build 29337'),
        0x01017299: VersionInfo(VIT.IMP, 'VS2019 v16.8.5 build 29337'),

        # MSVS2019 v16.8.4
        0x01047298: VersionInfo(VIT.OBJ, 'VS2019 v16.8.4 build 29336'),
        0x01037298: VersionInfo(VIT.ASM, 'VS2019 v16.8.4 build 29336'),
        0x01057298: VersionInfo(VIT.CPP, 'VS2019 v16.8.4 build 29336'),
        0x00ff7298: VersionInfo(VIT.RES, 'VS2019 v16.8.4 build 29336'),
        0x01027298: VersionInfo(VIT.LNK, 'VS2019 v16.8.4 build 29336'),
        0x01007298: VersionInfo(VIT.EXP, 'VS2019 v16.8.4 build 29336'),
        0x01017298: VersionInfo(VIT.IMP, 'VS2019 v16.8.4 build 29336'),

        # MSVS2019 v16.8.3
        0x01047297: VersionInfo(VIT.OBJ, 'VS2019 v16.8.3 build 29335'),
        0x01037297: VersionInfo(VIT.ASM, 'VS2019 v16.8.3 build 29335'),
        0x01057297: VersionInfo(VIT.CPP, 'VS2019 v16.8.3 build 29335'),
        0x00ff7297: VersionInfo(VIT.RES, 'VS2019 v16.8.3 build 29335'),
        0x01027297: VersionInfo(VIT.LNK, 'VS2019 v16.8.3 build 29335'),
        0x01007297: VersionInfo(VIT.EXP, 'VS2019 v16.8.3 build 29335'),
        0x01017297: VersionInfo(VIT.IMP, 'VS2019 v16.8.3 build 29335'),

        # MSVS2019 v16.8.2
        0x01047296: VersionInfo(VIT.OBJ, 'VS2019 v16.8.2 build 29334'),
        0x01037296: VersionInfo(VIT.ASM, 'VS2019 v16.8.2 build 29334'),
        0x01057296: VersionInfo(VIT.CPP, 'VS2019 v16.8.2 build 29334'),
        0x00ff7296: VersionInfo(VIT.RES, 'VS2019 v16.8.2 build 29334'),
        0x01027296: VersionInfo(VIT.LNK, 'VS2019 v16.8.2 build 29334'),
        0x01007296: VersionInfo(VIT.EXP, 'VS2019 v16.8.2 build 29334'),
        0x01017296: VersionInfo(VIT.IMP, 'VS2019 v16.8.2 build 29334'),

        # MSVS2019 v16.8.0
        # from https://walbourn.github.io/vs-2019-update-8/
        0x01047295: VersionInfo(VIT.OBJ, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x01037295: VersionInfo(VIT.ASM, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x01057295: VersionInfo(VIT.CPP, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x00ff7295: VersionInfo(VIT.RES, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x01027295: VersionInfo(VIT.LNK, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x01007295: VersionInfo(VIT.EXP, 'VS2019 v16.8.0 build 29333', interpolated=True),
        0x01017295: VersionInfo(VIT.IMP, 'VS2019 v16.8.0 build 29333', interpolated=True),

        # MSVS2019 v16.7.5
        0x010471b8: VersionInfo(VIT.OBJ, 'VS2019 v16.7.5 build 29112'),
        0x010371b8: VersionInfo(VIT.ASM, 'VS2019 v16.7.5 build 29112'),
        0x010571b8: VersionInfo(VIT.CPP, 'VS2019 v16.7.5 build 29112'),
        0x00ff71b8: VersionInfo(VIT.RES, 'VS2019 v16.7.5 build 29112'),
        0x010271b8: VersionInfo(VIT.LNK, 'VS2019 v16.7.5 build 29112'),
        0x010071b8: VersionInfo(VIT.EXP, 'VS2019 v16.7.5 build 29112'),
        0x010171b8: VersionInfo(VIT.IMP, 'VS2019 v16.7.5 build 29112'),

        # MSVS2019 v16.7.1 .. 16.7.4
        0x010471b7: VersionInfo(VIT.OBJ, 'VS2019 v16.7.1 build 29111'),
        0x010371b7: VersionInfo(VIT.ASM, 'VS2019 v16.7.1 build 29111'),
        0x010571b7: VersionInfo(VIT.CPP, 'VS2019 v16.7.1 build 29111'),
        0x00ff71b7: VersionInfo(VIT.RES, 'VS2019 v16.7.1 build 29111'),
        0x010271b7: VersionInfo(VIT.LNK, 'VS2019 v16.7.1 build 29111'),
        0x010071b7: VersionInfo(VIT.EXP, 'VS2019 v16.7.1 build 29111'),
        0x010171b7: VersionInfo(VIT.IMP, 'VS2019 v16.7.1 build 29111'),

        # MSVS2019 v16.7.0
        0x010471b6: VersionInfo(VIT.OBJ, 'VS2019 v16.7.0 build 29110'),
        0x010371b6: VersionInfo(VIT.ASM, 'VS2019 v16.7.0 build 29110'),
        0x010571b6: VersionInfo(VIT.CPP, 'VS2019 v16.7.0 build 29110'),
        0x00ff71b6: VersionInfo(VIT.RES, 'VS2019 v16.7.0 build 29110'),
        0x010271b6: VersionInfo(VIT.LNK, 'VS2019 v16.7.0 build 29110'),
        0x010071b6: VersionInfo(VIT.EXP, 'VS2019 v16.7.0 build 29110'),
        0x010171b6: VersionInfo(VIT.IMP, 'VS2019 v16.7.0 build 29110'),

        # MSVS2019 v16.6.2 ... 16.6.5
        0x01047086: VersionInfo(VIT.OBJ, 'VS2019 v16.6.2 build 28806'),
        0x01037086: VersionInfo(VIT.ASM, 'VS2019 v16.6.2 build 28806'),
        0x01057086: VersionInfo(VIT.CPP, 'VS2019 v16.6.2 build 28806'),
        0x00ff7086: VersionInfo(VIT.RES, 'VS2019 v16.6.2 build 28806'),
        0x01027086: VersionInfo(VIT.LNK, 'VS2019 v16.6.2 build 28806'),
        0x01007086: VersionInfo(VIT.EXP, 'VS2019 v16.6.2 build 28806'),
        0x01017086: VersionInfo(VIT.IMP, 'VS2019 v16.6.2 build 28806'),

        # MSVS2019 v16.6.0
        0x01047085: VersionInfo(VIT.OBJ, 'VS2019 v16.6.0 build 28805'),
        0x01037085: VersionInfo(VIT.ASM, 'VS2019 v16.6.0 build 28805'),
        0x01057085: VersionInfo(VIT.CPP, 'VS2019 v16.6.0 build 28805'),
        0x00ff7085: VersionInfo(VIT.RES, 'VS2019 v16.6.0 build 28805'),
        0x01027085: VersionInfo(VIT.LNK, 'VS2019 v16.6.0 build 28805'),
        0x01007085: VersionInfo(VIT.EXP, 'VS2019 v16.6.0 build 28805'),
        0x01017085: VersionInfo(VIT.IMP, 'VS2019 v16.6.0 build 28805'),

        # MSVS2019 v16.5.5 (also 16.5.4)
        0x01046fc6: VersionInfo(VIT.OBJ, 'VS2019 v16.5.5 build 28614'),
        0x01036fc6: VersionInfo(VIT.ASM, 'VS2019 v16.5.5 build 28614'),
        0x01056fc6: VersionInfo(VIT.CPP, 'VS2019 v16.5.5 build 28614'),
        0x00ff6fc6: VersionInfo(VIT.RES, 'VS2019 v16.5.5 build 28614'),
        0x01026fc6: VersionInfo(VIT.LNK, 'VS2019 v16.5.5 build 28614'),
        0x01006fc6: VersionInfo(VIT.EXP, 'VS2019 v16.5.5 build 28614'),
        0x01016fc6: VersionInfo(VIT.IMP, 'VS2019 v16.5.5 build 28614'),

        # Visual Studio 2019 version 16.5.2 (values are interpolated)
        # source: https://walbourn.github.io/vs-2019-update-5/
        0x01046fc4: VersionInfo(VIT.OBJ, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x01036fc4: VersionInfo(VIT.ASM, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x01056fc4: VersionInfo(VIT.CPP, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x00ff6fc4: VersionInfo(VIT.RES, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x01026fc4: VersionInfo(VIT.LNK, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x01016fc4: VersionInfo(VIT.IMP, 'VS2019 v16.5.2 build 28612', interpolated=True),
        0x01006fc4: VersionInfo(VIT.EXP, 'VS2019 v16.5.2 build 28612', interpolated=True),

        # Visual Studio 2019 version 16.5.1 (values are interpolated)
        0x01046fc3: VersionInfo(VIT.OBJ, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x01036fc3: VersionInfo(VIT.ASM, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x01056fc3: VersionInfo(VIT.CPP, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x00ff6fc3: VersionInfo(VIT.RES, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x01026fc3: VersionInfo(VIT.LNK, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x01016fc3: VersionInfo(VIT.IMP, 'VS2019 v16.5.1 build 28611', interpolated=True),
        0x01006fc3: VersionInfo(VIT.EXP, 'VS2019 v16.5.1 build 28611', interpolated=True),

        # Visual Studio 2019 version 16.5.0 (values are interpolated)
        # source: https://walbourn.github.io/vs-2019-update-5/
        0x01046fc2: VersionInfo(VIT.OBJ, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x01036fc2: VersionInfo(VIT.ASM, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x01056fc2: VersionInfo(VIT.CPP, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x00ff6fc2: VersionInfo(VIT.RES, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x01026fc2: VersionInfo(VIT.LNK, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x01016fc2: VersionInfo(VIT.IMP, 'VS2019 v16.5.0 build 28610', interpolated=True),
        0x01006fc2: VersionInfo(VIT.EXP, 'VS2019 v16.5.0 build 28610', interpolated=True),

        # MSVS2019 v16.4.6 (values are interpolated)
        # source: https://walbourn.github.io/vs-2019-update-4/
        0x01046e9f: VersionInfo(VIT.OBJ, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x01036e9f: VersionInfo(VIT.ASM, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x01056e9f: VersionInfo(VIT.CPP, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x00ff6e9f: VersionInfo(VIT.RES, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x01026e9f: VersionInfo(VIT.LNK, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x01006e9f: VersionInfo(VIT.EXP, 'VS2019 v16.4.6 build 28319', interpolated=True),
        0x01016e9f: VersionInfo(VIT.IMP, 'VS2019 v16.4.6 build 28319', interpolated=True),

        # MSVS2019 v16.4.4 (values are interpolated)
        # source: https://walbourn.github.io/vs-2019-update-4/
        0x01046e9c: VersionInfo(VIT.OBJ, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x01036e9c: VersionInfo(VIT.ASM, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x01056e9c: VersionInfo(VIT.CPP, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x00ff6e9c: VersionInfo(VIT.RES, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x01026e9c: VersionInfo(VIT.LNK, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x01006e9c: VersionInfo(VIT.EXP, 'VS2019 v16.4.4 build 28316', interpolated=True),
        0x01016e9c: VersionInfo(VIT.IMP, 'VS2019 v16.4.4 build 28316', interpolated=True),

        # MSVS2019 v16.4.3
        0x01046e9b: VersionInfo(VIT.OBJ, 'VS2019 v16.4.3 build 28315'),
        0x01036e9b: VersionInfo(VIT.ASM, 'VS2019 v16.4.3 build 28315'),
        0x01056e9b: VersionInfo(VIT.CPP, 'VS2019 v16.4.3 build 28315'),
        0x00ff6e9b: VersionInfo(VIT.RES, 'VS2019 v16.4.3 build 28315'),
        0x01026e9b: VersionInfo(VIT.LNK, 'VS2019 v16.4.3 build 28315'),
        0x01006e9b: VersionInfo(VIT.EXP, 'VS2019 v16.4.3 build 28315'),
        0x01016e9b: VersionInfo(VIT.IMP, 'VS2019 v16.4.3 build 28315'),

        # Visual Studio 2019 version 16.4.0 (values are interpolated)
        0x01046e9a: VersionInfo(VIT.OBJ, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x01036e9a: VersionInfo(VIT.ASM, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x01056e9a: VersionInfo(VIT.CPP, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x00ff6e9a: VersionInfo(VIT.RES, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x01026e9a: VersionInfo(VIT.LNK, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x01016e9a: VersionInfo(VIT.IMP, 'VS2019 v16.4.0 build 28314', interpolated=True),
        0x01006e9a: VersionInfo(VIT.EXP, 'VS2019 v16.4.0 build 28314', interpolated=True),

        # Visual Studio 2019 version 16.3.2 (values are interpolated)
        0x01046dc9: VersionInfo(VIT.OBJ, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x01036dc9: VersionInfo(VIT.ASM, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x01056dc9: VersionInfo(VIT.CPP, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x00ff6dc9: VersionInfo(VIT.RES, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x01026dc9: VersionInfo(VIT.LNK, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x01016dc9: VersionInfo(VIT.IMP, 'VS2019 v16.3.2 build 28105', interpolated=True),
        0x01006dc9: VersionInfo(VIT.EXP, 'VS2019 v16.3.2 build 28105', interpolated=True),

        # Visual Studio 2019 version 16.2.3 (values are interpolated)
        0x01046d01: VersionInfo(VIT.OBJ, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x01036d01: VersionInfo(VIT.ASM, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x01056d01: VersionInfo(VIT.CPP, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x00ff6d01: VersionInfo(VIT.RES, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x01026d01: VersionInfo(VIT.LNK, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x01016d01: VersionInfo(VIT.IMP, 'VS2019 v16.2.3 build 27905', interpolated=True),
        0x01006d01: VersionInfo(VIT.EXP, 'VS2019 v16.2.3 build 27905', interpolated=True),

        # Visual Studio 2019 version 16.1.2 (values are interpolated)
        0x01046c36: VersionInfo(VIT.OBJ, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x01036c36: VersionInfo(VIT.ASM, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x01056c36: VersionInfo(VIT.CPP, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x00ff6c36: VersionInfo(VIT.RES, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x01026c36: VersionInfo(VIT.LNK, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x01016c36: VersionInfo(VIT.IMP, 'VS2019 v16.1.2 build 27702', interpolated=True),
        0x01006c36: VersionInfo(VIT.EXP, 'VS2019 v16.1.2 build 27702', interpolated=True),

        # MSVS2019 v16.0.0
        0x01046b74: VersionInfo(VIT.OBJ, 'VS2019 v16.0.0 build 27508'),
        0x01036b74: VersionInfo(VIT.ASM, 'VS2019 v16.0.0 build 27508'),
        0x01056b74: VersionInfo(VIT.CPP, 'VS2019 v16.0.0 build 27508'),
        0x00ff6b74: VersionInfo(VIT.RES, 'VS2019 v16.0.0 build 27508'),
        0x01026b74: VersionInfo(VIT.LNK, 'VS2019 v16.0.0 build 27508'),
        0x01006b74: VersionInfo(VIT.EXP, 'VS2019 v16.0.0 build 27508'),
        0x01016b74: VersionInfo(VIT.IMP, 'VS2019 v16.0.0 build 27508'),

        # Visual Studio 2017 version 15.9.11 (values are interpolated)
        0x01046996: VersionInfo(VIT.OBJ, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x01036996: VersionInfo(VIT.ASM, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x01056996: VersionInfo(VIT.CPP, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x00ff6996: VersionInfo(VIT.RES, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x01026996: VersionInfo(VIT.LNK, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x01016996: VersionInfo(VIT.IMP, 'VS2017 v15.9.11 build 27030', interpolated=True),
        0x01006996: VersionInfo(VIT.EXP, 'VS2017 v15.9.11 build 27030', interpolated=True),

        # Visual Studio 2017 version 15.9.7 (values are interpolated)
        0x01046993: VersionInfo(VIT.OBJ, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x01036993: VersionInfo(VIT.ASM, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x01056993: VersionInfo(VIT.CPP, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x00ff6993: VersionInfo(VIT.RES, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x01026993: VersionInfo(VIT.LNK, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x01016993: VersionInfo(VIT.IMP, 'VS2017 v15.9.7 build 27027', interpolated=True),
        0x01006993: VersionInfo(VIT.EXP, 'VS2017 v15.9.7 build 27027', interpolated=True),

        # Visual Studio 2017 version 15.9.5 (values are interpolated)
        0x01046992: VersionInfo(VIT.OBJ, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x01036992: VersionInfo(VIT.ASM, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x01056992: VersionInfo(VIT.CPP, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x00ff6992: VersionInfo(VIT.RES, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x01026992: VersionInfo(VIT.LNK, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x01016992: VersionInfo(VIT.IMP, 'VS2017 v15.9.5 build 27026', interpolated=True),
        0x01006992: VersionInfo(VIT.EXP, 'VS2017 v15.9.5 build 27026', interpolated=True),

        # Visual Studio 2017 version 15.9.4 (values are interpolated)
        0x01046991: VersionInfo(VIT.OBJ, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x01036991: VersionInfo(VIT.ASM, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x01056991: VersionInfo(VIT.CPP, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x00ff6991: VersionInfo(VIT.RES, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x01026991: VersionInfo(VIT.LNK, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x01016991: VersionInfo(VIT.IMP, 'VS2017 v15.9.4 build 27025', interpolated=True),
        0x01006991: VersionInfo(VIT.EXP, 'VS2017 v15.9.4 build 27025', interpolated=True),

        # Visual Studio 2017 version 15.9.1 (values are interpolated)
        0x0104698f: VersionInfo(VIT.OBJ, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x0103698f: VersionInfo(VIT.ASM, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x0105698f: VersionInfo(VIT.CPP, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x00ff698f: VersionInfo(VIT.RES, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x0102698f: VersionInfo(VIT.LNK, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x0101698f: VersionInfo(VIT.IMP, 'VS2017 v15.9.1 build 27023', interpolated=True),
        0x0100698f: VersionInfo(VIT.EXP, 'VS2017 v15.9.1 build 27023', interpolated=True),

        # Visual Studio 2017 version 15.8.5 (values are interpolated)
        # source: https://walbourn.github.io/vs-2017-15-8-update/
        0x0104686c: VersionInfo(VIT.OBJ, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x0103686c: VersionInfo(VIT.ASM, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x0105686c: VersionInfo(VIT.CPP, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x00ff686c: VersionInfo(VIT.RES, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x0102686c: VersionInfo(VIT.LNK, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x0101686c: VersionInfo(VIT.IMP, 'VS2017 v15.8.5 build 26732', interpolated=True),
        0x0100686c: VersionInfo(VIT.EXP, 'VS2017 v15.8.5 build 26732', interpolated=True),

        # Visual Studio 2017 version 15.8.9 (sic!) (values are interpolated)
        # source: https://walbourn.github.io/vs-2017-15-8-update/
        0x0104686a: VersionInfo(VIT.OBJ, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x0103686a: VersionInfo(VIT.ASM, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x0105686a: VersionInfo(VIT.CPP, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x00ff686a: VersionInfo(VIT.RES, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x0102686a: VersionInfo(VIT.LNK, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x0101686a: VersionInfo(VIT.IMP, 'VS2017 v15.8.9? build 26730', interpolated=True),
        0x0100686a: VersionInfo(VIT.EXP, 'VS2017 v15.8.9? build 26730', interpolated=True),

        # Visual Studio 2017 version 15.8.4 (values are interpolated)
        # source: https://walbourn.github.io/vs-2017-15-8-update/
        0x01046869: VersionInfo(VIT.OBJ, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x01036869: VersionInfo(VIT.ASM, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x01056869: VersionInfo(VIT.CPP, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x00ff6869: VersionInfo(VIT.RES, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x01026869: VersionInfo(VIT.LNK, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x01016869: VersionInfo(VIT.IMP, 'VS2017 v15.8.4 build 26729', interpolated=True),
        0x01006869: VersionInfo(VIT.EXP, 'VS2017 v15.8.4 build 26729', interpolated=True),

        # Visual Studio 2017 version 15.8.0 (values are interpolated)
        # source: https://walbourn.github.io/vs-2017-15-8-update/
        0x01046866: VersionInfo(VIT.OBJ, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x01036866: VersionInfo(VIT.ASM, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x01056866: VersionInfo(VIT.CPP, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x00ff6866: VersionInfo(VIT.RES, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x01026866: VersionInfo(VIT.LNK, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x01016866: VersionInfo(VIT.IMP, 'VS2017 v15.8.0 build 26726', interpolated=True),
        0x01006866: VersionInfo(VIT.EXP, 'VS2017 v15.8.0 build 26726', interpolated=True),

        # Visual Studio 2017 version 15.7.5 (values are interpolated)
        0x01046741: VersionInfo(VIT.OBJ, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x01036741: VersionInfo(VIT.ASM, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x01056741: VersionInfo(VIT.CPP, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x00ff6741: VersionInfo(VIT.RES, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x01026741: VersionInfo(VIT.LNK, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x01016741: VersionInfo(VIT.IMP, 'VS2017 v15.7.5 build 26433', interpolated=True),
        0x01006741: VersionInfo(VIT.EXP, 'VS2017 v15.7.5 build 26433', interpolated=True),

        # Visual Studio 2017 version 15.7.4 (values are interpolated)
        # source: https://walbourn.github.io/vs-2017-15-7-update/
        0x0104673f: VersionInfo(VIT.OBJ, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x0103673f: VersionInfo(VIT.ASM, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x0105673f: VersionInfo(VIT.CPP, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x00ff673f: VersionInfo(VIT.RES, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x0102673f: VersionInfo(VIT.LNK, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x0101673f: VersionInfo(VIT.IMP, 'VS2017 v15.7.4 build 26431', interpolated=True),
        0x0100673f: VersionInfo(VIT.EXP, 'VS2017 v15.7.4 build 26431', interpolated=True),

        # Visual Studio 2017 version 15.7.3 (values are interpolated)
        0x0104673e: VersionInfo(VIT.OBJ, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x0103673e: VersionInfo(VIT.ASM, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x0105673e: VersionInfo(VIT.CPP, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x00ff673e: VersionInfo(VIT.RES, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x0102673e: VersionInfo(VIT.LNK, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x0101673e: VersionInfo(VIT.IMP, 'VS2017 v15.7.3 build 26430', interpolated=True),
        0x0100673e: VersionInfo(VIT.EXP, 'VS2017 v15.7.3 build 26430', interpolated=True),

        # Visual Studio 2017 version 15.7.2 (values are interpolated)
        0x0104673d: VersionInfo(VIT.OBJ, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x0103673d: VersionInfo(VIT.ASM, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x0105673d: VersionInfo(VIT.CPP, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x00ff673d: VersionInfo(VIT.RES, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x0102673d: VersionInfo(VIT.LNK, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x0101673d: VersionInfo(VIT.IMP, 'VS2017 v15.7.2 build 26429', interpolated=True),
        0x0100673d: VersionInfo(VIT.EXP, 'VS2017 v15.7.2 build 26429', interpolated=True),

        # Visual Studio 2017 version 15.7.1 (values are interpolated)
        0x0104673c: VersionInfo(VIT.OBJ, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x0103673c: VersionInfo(VIT.ASM, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x0105673c: VersionInfo(VIT.CPP, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x00ff673c: VersionInfo(VIT.RES, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x0102673c: VersionInfo(VIT.LNK, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x0101673c: VersionInfo(VIT.IMP, 'VS2017 v15.7.1 build 26428', interpolated=True),
        0x0100673c: VersionInfo(VIT.EXP, 'VS2017 v15.7.1 build 26428', interpolated=True),

        # Visual Studio 2017 version 15.6.7 (values are interpolated)
        0x01046614: VersionInfo(VIT.OBJ, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x01036614: VersionInfo(VIT.ASM, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x01056614: VersionInfo(VIT.CPP, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x00ff6614: VersionInfo(VIT.RES, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x01026614: VersionInfo(VIT.LNK, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x01016614: VersionInfo(VIT.IMP, 'VS2017 v15.6.7 build 26132', interpolated=True),
        0x01006614: VersionInfo(VIT.EXP, 'VS2017 v15.6.7 build 26132', interpolated=True),

        # Visual Studio 2017 version 15.6.6 (values are interpolated)
        0x01046613: VersionInfo(VIT.OBJ, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x01036613: VersionInfo(VIT.ASM, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x01056613: VersionInfo(VIT.CPP, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x00ff6613: VersionInfo(VIT.RES, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x01026613: VersionInfo(VIT.LNK, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x01016613: VersionInfo(VIT.IMP, 'VS2017 v15.6.6 build 26131', interpolated=True),
        0x01006613: VersionInfo(VIT.EXP, 'VS2017 v15.6.6 build 26131', interpolated=True),

        # Visual Studio 2017 version 15.6.4 has the same build number
        # Visual Studio 2017 version 15.6.3 (values are interpolated)
        0x01046611: VersionInfo(VIT.OBJ, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x01036611: VersionInfo(VIT.ASM, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x01056611: VersionInfo(VIT.CPP, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x00ff6611: VersionInfo(VIT.RES, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x01026611: VersionInfo(VIT.LNK, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x01016611: VersionInfo(VIT.IMP, 'VS2017 v15.6.3 build 26129', interpolated=True),
        0x01006611: VersionInfo(VIT.EXP, 'VS2017 v15.6.3 build 26129', interpolated=True),

        # Visual Studio 2017 version 15.6.2 has the same build number
        # Visual Studio 2017 version 15.6.1 has the same build number
        # Visual Studio 2017 version 15.6.0 (values are interpolated)
        0x01046610: VersionInfo(VIT.OBJ, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x01036610: VersionInfo(VIT.ASM, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x01056610: VersionInfo(VIT.CPP, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x00ff6610: VersionInfo(VIT.RES, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x01026610: VersionInfo(VIT.LNK, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x01016610: VersionInfo(VIT.IMP, 'VS2017 v15.6.0 build 26128', interpolated=True),
        0x01006610: VersionInfo(VIT.EXP, 'VS2017 v15.6.0 build 26128', interpolated=True),

        # Visual Studio 2017 version 15.5.7 has the same build number
        # Visual Studio 2017 version 15.5.6 (values are interpolated)
        0x010464eb: VersionInfo(VIT.OBJ, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x010364eb: VersionInfo(VIT.ASM, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x010564eb: VersionInfo(VIT.CPP, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x00ff64eb: VersionInfo(VIT.RES, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x010264eb: VersionInfo(VIT.LNK, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x010164eb: VersionInfo(VIT.IMP, 'VS2017 v15.5.6 build 25835', interpolated=True),
        0x010064eb: VersionInfo(VIT.EXP, 'VS2017 v15.5.6 build 25835', interpolated=True),

        # MSVS2017 v15.5.4 (15.5.3 has the same build number)
        0x010464ea: VersionInfo(VIT.OBJ, 'VS2017 v15.5.4 build 25834'),
        0x010364ea: VersionInfo(VIT.ASM, 'VS2017 v15.5.4 build 25834'),
        0x010564ea: VersionInfo(VIT.CPP, 'VS2017 v15.5.4 build 25834'),
        0x00ff64ea: VersionInfo(VIT.RES, 'VS2017 v15.5.4 build 25834'),
        0x010264ea: VersionInfo(VIT.LNK, 'VS2017 v15.5.4 build 25834'),
        0x010064ea: VersionInfo(VIT.EXP, 'VS2017 v15.5.4 build 25834'),
        0x010164ea: VersionInfo(VIT.IMP, 'VS2017 v15.5.4 build 25834'),

        # Visual Studio 2017 version 15.5.2 (values are interpolated)
        0x010464e7: VersionInfo(VIT.OBJ, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x010364e7: VersionInfo(VIT.ASM, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x010564e7: VersionInfo(VIT.CPP, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x00ff64e7: VersionInfo(VIT.RES, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x010264e7: VersionInfo(VIT.LNK, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x010164e7: VersionInfo(VIT.IMP, 'VS2017 v15.5.2 build 25831', interpolated=True),
        0x010064e7: VersionInfo(VIT.EXP, 'VS2017 v15.5.2 build 25831', interpolated=True),

        # Visual Studio 2017 version 15.4.5 (values are interpolated)
        0x010463cb: VersionInfo(VIT.OBJ, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x010363cb: VersionInfo(VIT.ASM, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x010563cb: VersionInfo(VIT.CPP, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x00ff63cb: VersionInfo(VIT.RES, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x010263cb: VersionInfo(VIT.LNK, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x010163cb: VersionInfo(VIT.IMP, 'VS2017 v15.4.5 build 25547', interpolated=True),
        0x010063cb: VersionInfo(VIT.EXP, 'VS2017 v15.4.5 build 25547', interpolated=True),

        # Visual Studio 2017 version 15.4.4 (values are interpolated)
        0x010463c6: VersionInfo(VIT.OBJ, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x010363c6: VersionInfo(VIT.ASM, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x010563c6: VersionInfo(VIT.CPP, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x00ff63c6: VersionInfo(VIT.RES, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x010263c6: VersionInfo(VIT.LNK, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x010163c6: VersionInfo(VIT.IMP, 'VS2017 v15.4.4 build 25542', interpolated=True),
        0x010063c6: VersionInfo(VIT.EXP, 'VS2017 v15.4.4 build 25542', interpolated=True),

        # Visual Studio 2017 version 15.3.3 (values are interpolated)
        0x010463a3: VersionInfo(VIT.OBJ, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x010363a3: VersionInfo(VIT.ASM, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x010563a3: VersionInfo(VIT.CPP, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x00ff63a3: VersionInfo(VIT.RES, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x010263a3: VersionInfo(VIT.LNK, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x010163a3: VersionInfo(VIT.IMP, 'VS2017 v15.3.3 build 25507', interpolated=True),
        0x010063a3: VersionInfo(VIT.EXP, 'VS2017 v15.3.3 build 25507', interpolated=True),

        # Visual Studio 2017 version 15.3 (values are interpolated)
        # source: https://twitter.com/visualc/status/897853176002433024
        0x010463a2: VersionInfo(VIT.OBJ, 'VS2017 v15.3 build 25506', interpolated=True),
        0x010363a2: VersionInfo(VIT.ASM, 'VS2017 v15.3 build 25506', interpolated=True),
        0x010563a2: VersionInfo(VIT.CPP, 'VS2017 v15.3 build 25506', interpolated=True),
        0x00ff63a2: VersionInfo(VIT.RES, 'VS2017 v15.3 build 25506', interpolated=True),
        0x010263a2: VersionInfo(VIT.LNK, 'VS2017 v15.3 build 25506', interpolated=True),
        0x010163a2: VersionInfo(VIT.IMP, 'VS2017 v15.3 build 25506', interpolated=True),
        0x010063a2: VersionInfo(VIT.EXP, 'VS2017 v15.3 build 25506', interpolated=True),

        # Visual Studio 2017 version 15.2 has the same build number
        # Visual Studio 2017 version 15.1 has the same build number
        # Visual Studio 2017 version 15.0 (values are interpolated)
        0x010461b9: VersionInfo(VIT.OBJ, 'VS2017 v15.0 build 25017', interpolated=True),
        0x010361b9: VersionInfo(VIT.ASM, 'VS2017 v15.0 build 25017', interpolated=True),
        0x010561b9: VersionInfo(VIT.CPP, 'VS2017 v15.0 build 25017', interpolated=True),
        0x00ff61b9: VersionInfo(VIT.RES, 'VS2017 v15.0 build 25017', interpolated=True),
        0x010261b9: VersionInfo(VIT.LNK, 'VS2017 v15.0 build 25017', interpolated=True),
        0x010161b9: VersionInfo(VIT.IMP, 'VS2017 v15.0 build 25017', interpolated=True),
        0x010061b9: VersionInfo(VIT.EXP, 'VS2017 v15.0 build 25017', interpolated=True),

        # MSVS Community 2015 UPD3.1 (cl version 19.00.24215.1) - some IDs are interpolated
        # [ASM] is the same as in UPD3 build 24213
        0x01045e97: VersionInfo(VIT.OBJ, 'VS2015 UPD3.1 build 24215'),
        0x01055e97: VersionInfo(VIT.CPP, 'VS2015 UPD3.1 build 24215'),
        0x01025e97: VersionInfo(VIT.LNK, 'VS2015 UPD3.1 build 24215'),
        0x01005e97: VersionInfo(VIT.EXP, 'VS2015 UPD3.1 build 24215'),
        0x01015e97: VersionInfo(VIT.IMP, 'VS2015 UPD3.1 build 24215'),

        # MSVS Community 2015 UPD3 (cl version 19.00.24213.1)
        0x01045e95: VersionInfo(VIT.OBJ, 'VS2015 UPD3 build 24213'),
        0x01035e92: VersionInfo(VIT.ASM, 'VS2015 UPD3 build 24210'),
        0x01055e95: VersionInfo(VIT.CPP, 'VS2015 UPD3 build 24213'),
        0x00ff5e92: VersionInfo(VIT.RES, 'VS2015 UPD3 build 24210'),
        0x01025e95: VersionInfo(VIT.LNK, 'VS2015 UPD3 build 24213'),
        0x01005e95: VersionInfo(VIT.EXP, 'VS2015 UPD3 build 24213'),
        0x01015e95: VersionInfo(VIT.IMP, 'VS2015 UPD3 build 24213'),

        # Visual Studio 2015 Update 3 [14.0] (values are interpolated)
        0x01045e92: VersionInfo(VIT.OBJ, 'VS2015 Update 3 [14.0] build 24210', interpolated=True),
        # 01035e92 [ASM] VS2015 Update 3 [14.0] build 24210 (*)
        0x01055e92: VersionInfo(VIT.CPP, 'VS2015 Update 3 [14.0] build 24210', interpolated=True),
        # 00ff5e92 [RES] VS2015 Update 3 [14.0] build 24210 (*)
        0x01025e92: VersionInfo(VIT.LNK, 'VS2015 Update 3 [14.0] build 24210', interpolated=True),
        0x01015e92: VersionInfo(VIT.IMP, 'VS2015 Update 3 [14.0] build 24210', interpolated=True),
        0x01005e92: VersionInfo(VIT.EXP, 'VS2015 Update 3 [14.0] build 24210', interpolated=True),

        # MSVS Community 2015 UPD2 (14.0.25123.0?)
        0x01045d6e: VersionInfo(VIT.OBJ, 'VS2015 UPD2 build 23918'),
        0x01035d6e: VersionInfo(VIT.ASM, 'VS2015 UPD2 build 23918'),
        0x01055d6e: VersionInfo(VIT.CPP, 'VS2015 UPD2 build 23918'),
        0x00ff5d6e: VersionInfo(VIT.RES, 'VS2015 UPD2 build 23918'),
        0x01025d6e: VersionInfo(VIT.LNK, 'VS2015 UPD2 build 23918'),
        0x01005d6e: VersionInfo(VIT.EXP, 'VS2015 UPD2 build 23918'),
        0x01015d6e: VersionInfo(VIT.IMP, 'VS2015 UPD2 build 23918'),

        # MSVS Community 2015 14.0.24728.2 (UPD 1) 14.0.24720.0 D14REL
        0x01045bd2: VersionInfo(VIT.OBJ, 'VS2015 UPD1 build 23506'),
        0x01035bd2: VersionInfo(VIT.ASM, 'VS2015 UPD1 build 23506'),
        0x01055bd2: VersionInfo(VIT.CPP, 'VS2015 UPD1 build 23506'),
        0x00ff5bd2: VersionInfo(VIT.RES, 'VS2015 UPD1 build 23506'),
        0x01025bd2: VersionInfo(VIT.LNK, 'VS2015 UPD1 build 23506'),
        0x01005bd2: VersionInfo(VIT.EXP, 'VS2015 UPD1 build 23506'),
        0x01015bd2: VersionInfo(VIT.IMP, 'VS2015 UPD1 build 23506'),

        # MSVS Community 2015 [14.0]
        0x010459f2: VersionInfo(VIT.OBJ, 'VS2015 [14.0] build 23026'),
        0x010359f2: VersionInfo(VIT.ASM, 'VS2015 [14.0] build 23026'),
        0x010559f2: VersionInfo(VIT.CPP, 'VS2015 [14.0] build 23026'),
        0x00ff59f2: VersionInfo(VIT.RES, 'VS2015 [14.0] build 23026'),
        0x010259f2: VersionInfo(VIT.LNK, 'VS2015 [14.0] build 23026'),
        0x010059f2: VersionInfo(VIT.EXP, 'VS2015 [14.0] build 23026'),
        0x010159f2: VersionInfo(VIT.IMP, 'VS2015 [14.0] build 23026'),

        # Visual Studio 2013 Nobemver CTP [12.0] (values are interpolated)
        0x00e0527a: VersionInfo(VIT.OBJ, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00df527a: VersionInfo(VIT.ASM, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00e1527a: VersionInfo(VIT.CPP, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00db527a: VersionInfo(VIT.RES, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00de527a: VersionInfo(VIT.LNK, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00dd527a: VersionInfo(VIT.IMP, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),
        0x00dc527a: VersionInfo(VIT.EXP, 'VS2013 Nobemver CTP [12.0] build 21114', interpolated=True),

        # MSVS2013 12.0.40629.00 Update 5
        0x00e09eb5: VersionInfo(VIT.OBJ, 'VS2013 UPD5 build 40629'),
        0x00e19eb5: VersionInfo(VIT.CPP, 'VS2013 UPD5 build 40629'),
        # cvtres not updated since RTM version, so add interpolated one
        0x00db9eb5: VersionInfo(VIT.RES, 'VS2013 Update 5 [12.0] build 40629', interpolated=True),
        0x00de9eb5: VersionInfo(VIT.LNK, 'VS2013 UPD5 build 40629'),
        0x00dc9eb5: VersionInfo(VIT.EXP, 'VS2013 UPD5 build 40629'),
        0x00dd9eb5: VersionInfo(VIT.IMP, 'VS2013 UPD5 build 40629'),
        0x00df9eb5: VersionInfo(VIT.ASM, 'VS2013 UPD5 build 40629'),

        # MSVS2013 12.0.31101.00 Update 4 - not attested in real world, @comp.id is
        # calculated.
        0x00e0797d: VersionInfo(VIT.OBJ, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00e1797d: VersionInfo(VIT.CPP, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00db797d: VersionInfo(VIT.RES, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00de797d: VersionInfo(VIT.LNK, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00dc797d: VersionInfo(VIT.EXP, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00dd797d: VersionInfo(VIT.IMP, 'VS2013 UPD4 build 31101', interpolated=True),
        0x00df797d: VersionInfo(VIT.ASM, 'VS2013 UPD4 build 31101', interpolated=True),

        # MSVS2013 12.0.30723.00 Update 3 - not attested in real world, @comp.id is
        # calculated.
        0x00e07803: VersionInfo(VIT.OBJ, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00e17803: VersionInfo(VIT.CPP, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00db7803: VersionInfo(VIT.RES, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00de7803: VersionInfo(VIT.LNK, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00dc7803: VersionInfo(VIT.EXP, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00dd7803: VersionInfo(VIT.IMP, 'VS2013 UPD3 build 30723', interpolated=True),
        0x00df7803: VersionInfo(VIT.ASM, 'VS2013 UPD3 build 30723', interpolated=True),

        # MSVS2013 12.0.30501.00 Update 2 - not attested in real world, @comp.id is
        # calculated.
        0x00e07725: VersionInfo(VIT.OBJ, 'VS2013 UPD2 build 30501'),
        0x00e17725: VersionInfo(VIT.CPP, 'VS2013 UPD2 build 30501'),
        # cvtres not updated since RTM version, so add interpolated one
        0x00db7725: VersionInfo(VIT.RES, 'VS2013 Update 2 [12.0] build 30501', interpolated=True),
        0x00de7725: VersionInfo(VIT.LNK, 'VS2013 UPD2 build 30501'),
        0x00dc7725: VersionInfo(VIT.EXP, 'VS2013 UPD2 build 30501'),
        0x00dd7725: VersionInfo(VIT.IMP, 'VS2013 UPD2 build 30501'),
        0x00df7725: VersionInfo(VIT.ASM, 'VS2013 UPD2 build 30501'),

        # Visual Studio 2013 Update2 RC [12.0] (values are interpolated)
        0x00e07674: VersionInfo(VIT.OBJ, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00df7674: VersionInfo(VIT.ASM, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00e17674: VersionInfo(VIT.CPP, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00db7674: VersionInfo(VIT.RES, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00de7674: VersionInfo(VIT.LNK, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00dd7674: VersionInfo(VIT.IMP, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),
        0x00dc7674: VersionInfo(VIT.EXP, 'VS2013 Update2 RC [12.0] build 30324', interpolated=True),

        # MSVS2013 RTM
        # Looks like it doesn't always dump linker's comp.id
        # Visual Studio 2013 Update 1 [12.0] also has this build number
        0x00e0520d: VersionInfo(VIT.OBJ, 'VS2013 build 21005'),
        0x00e1520d: VersionInfo(VIT.CPP, 'VS2013 build 21005'),
        0x00db520d: VersionInfo(VIT.RES, 'VS2013 build 21005'),
        0x00de520d: VersionInfo(VIT.LNK, 'VS2013 build 21005'),
        0x00dc520d: VersionInfo(VIT.EXP, 'VS2013 build 21005'),
        0x00dd520d: VersionInfo(VIT.IMP, 'VS2013 build 21005'),
        0x00df520d: VersionInfo(VIT.ASM, 'VS2013 build 21005'),

        # Visual Studio 2013 RC [12.0] (values are interpolated)
        0x00e0515b: VersionInfo(VIT.OBJ, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00df515b: VersionInfo(VIT.ASM, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00e1515b: VersionInfo(VIT.CPP, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00db515b: VersionInfo(VIT.RES, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00de515b: VersionInfo(VIT.LNK, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00dd515b: VersionInfo(VIT.IMP, 'VS2013 RC [12.0] build 20827', interpolated=True),
        0x00dc515b: VersionInfo(VIT.EXP, 'VS2013 RC [12.0] build 20827', interpolated=True),

        # Visual Studio 2013 Preview [12.0] (values are interpolated)
        0x00e05089: VersionInfo(VIT.OBJ, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00df5089: VersionInfo(VIT.ASM, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00e15089: VersionInfo(VIT.CPP, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00db5089: VersionInfo(VIT.RES, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00de5089: VersionInfo(VIT.LNK, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00dd5089: VersionInfo(VIT.IMP, 'VS2013 Preview [12.0] build 20617', interpolated=True),
        0x00dc5089: VersionInfo(VIT.EXP, 'VS2013 Preview [12.0] build 20617', interpolated=True),

        # MSVS2012 Premium Update 4 (11.0.61030.00 Update 4)
        0x00ceee66: VersionInfo(VIT.OBJ, 'VS2012 UPD4 build 61030'),
        0x00cfee66: VersionInfo(VIT.CPP, 'VS2012 UPD4 build 61030'),
        0x00cdee66: VersionInfo(VIT.ASM, 'VS2012 UPD4 build 61030'),
        0x00c9ee66: VersionInfo(VIT.RES, 'VS2012 UPD4 build 61030'),
        0x00ccee66: VersionInfo(VIT.LNK, 'VS2012 UPD4 build 61030'),
        0x00caee66: VersionInfo(VIT.EXP, 'VS2012 UPD4 build 61030'),
        0x00cbee66: VersionInfo(VIT.IMP, 'VS2012 UPD4 build 61030'),

        # MSVS2012 Update 3 (17.00.60610.1 Update 3) - not attested in real world,
        # @comp.id is calculated.
        0x00ceecc2: VersionInfo(VIT.OBJ, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00cfecc2: VersionInfo(VIT.CPP, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00cdecc2: VersionInfo(VIT.ASM, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00c9ecc2: VersionInfo(VIT.RES, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00ccecc2: VersionInfo(VIT.LNK, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00caecc2: VersionInfo(VIT.EXP, 'VS2012 UPD3 build 60610', interpolated=True),
        0x00cbecc2: VersionInfo(VIT.IMP, 'VS2012 UPD3 build 60610', interpolated=True),

        # MSVS2012 Update 2 (17.00.60315.1 Update 2) - not attested in real world,
        # @comp.id is calculated.
        0x00ceeb9b: VersionInfo(VIT.OBJ, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00cfeb9b: VersionInfo(VIT.CPP, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00cdeb9b: VersionInfo(VIT.ASM, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00c9eb9b: VersionInfo(VIT.RES, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00cceb9b: VersionInfo(VIT.LNK, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00caeb9b: VersionInfo(VIT.EXP, 'VS2012 UPD2 build 60315', interpolated=True),
        0x00cbeb9b: VersionInfo(VIT.IMP, 'VS2012 UPD2 build 60315', interpolated=True),

        # MSVS2012 Update 1 (17.00.51106.1 Update 1) - not attested in real world,
        # @comp.id is calculated.
        0x00cec7a2: VersionInfo(VIT.OBJ, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00cfc7a2: VersionInfo(VIT.CPP, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00cdc7a2: VersionInfo(VIT.ASM, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00c9c7a2: VersionInfo(VIT.RES, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00ccc7a2: VersionInfo(VIT.LNK, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00cac7a2: VersionInfo(VIT.EXP, 'VS2012 UPD1 build 51106', interpolated=True),
        0x00cbc7a2: VersionInfo(VIT.IMP, 'VS2012 UPD1 build 51106', interpolated=True),

        # Visual Studio 2012 November CTP [11.0] (values are interpolated)
        0x00cec751: VersionInfo(VIT.OBJ, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00cdc751: VersionInfo(VIT.ASM, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00cfc751: VersionInfo(VIT.CPP, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00c9c751: VersionInfo(VIT.RES, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00ccc751: VersionInfo(VIT.LNK, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00cbc751: VersionInfo(VIT.IMP, 'VS2012 November CTP [11.0] build 51025', interpolated=True),
        0x00cac751: VersionInfo(VIT.EXP, 'VS2012 November CTP [11.0] build 51025', interpolated=True),

        # MSVS2012 Premium (11.0.50727.1 RTMREL)
        0x00cec627: VersionInfo(VIT.OBJ, 'VS2012 build 50727'),
        0x00cfc627: VersionInfo(VIT.CPP, 'VS2012 build 50727'),
        0x00c9c627: VersionInfo(VIT.RES, 'VS2012 build 50727'),
        0x00cdc627: VersionInfo(VIT.ASM, 'VS2012 build 50727'),
        0x00cac627: VersionInfo(VIT.EXP, 'VS2012 build 50727'),
        0x00cbc627: VersionInfo(VIT.IMP, 'VS2012 build 50727'),
        0x00ccc627: VersionInfo(VIT.LNK, 'VS2012 build 50727'),

        # MSVS2010 SP1 kb 983509 (10.0.40219.1 SP1Rel)
        0x00aa9d1b: VersionInfo(VIT.OBJ, 'VS2010 SP1 build 40219'),
        0x00ab9d1b: VersionInfo(VIT.CPP, 'VS2010 SP1 build 40219'),
        0x009d9d1b: VersionInfo(VIT.LNK, 'VS2010 SP1 build 40219'),
        0x009a9d1b: VersionInfo(VIT.RES, 'VS2010 SP1 build 40219'),
        0x009b9d1b: VersionInfo(VIT.EXP, 'VS2010 SP1 build 40219'),
        0x009c9d1b: VersionInfo(VIT.IMP, 'VS2010 SP1 build 40219'),
        0x009e9d1b: VersionInfo(VIT.ASM, 'VS2010 SP1 build 40219'),

        # MSVS2010 (10.0.30319.1 RTMRel)
        0x00aa766f: VersionInfo(VIT.OBJ, 'VS2010 build 30319'),
        0x00ab766f: VersionInfo(VIT.CPP, 'VS2010 build 30319'),
        0x009d766f: VersionInfo(VIT.LNK, 'VS2010 build 30319'),
        0x009a766f: VersionInfo(VIT.RES, 'VS2010 build 30319'),
        0x009b766f: VersionInfo(VIT.EXP, 'VS2010 build 30319'),
        0x009c766f: VersionInfo(VIT.IMP, 'VS2010 build 30319'),
        0x009e766f: VersionInfo(VIT.ASM, 'VS2010 build 30319'),

        # Visual Studio 2010 Beta 2 [10.0] (values are interpolated)
        0x00aa520b: VersionInfo(VIT.OBJ, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x009e520b: VersionInfo(VIT.ASM, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x00ab520b: VersionInfo(VIT.CPP, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x009a520b: VersionInfo(VIT.RES, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x009d520b: VersionInfo(VIT.LNK, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x009c520b: VersionInfo(VIT.IMP, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),
        0x009b520b: VersionInfo(VIT.EXP, 'VS2010 Beta 2 [10.0] build 21003', interpolated=True),

        # Visual Studio 2010 Beta 1 [10.0] (values are interpolated)
        0x00aa501a: VersionInfo(VIT.OBJ, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x009e501a: VersionInfo(VIT.ASM, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x00ab501a: VersionInfo(VIT.CPP, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x009a501a: VersionInfo(VIT.RES, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x009d501a: VersionInfo(VIT.LNK, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x009c501a: VersionInfo(VIT.IMP, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),
        0x009b501a: VersionInfo(VIT.EXP, 'VS2010 Beta 1 [10.0] build 20506', interpolated=True),

        # MSVS2008 SP1 (9.0.30729.1 SP)
        0x00837809: VersionInfo(VIT.OBJ, 'VS2008 SP1 build 30729'),
        0x00847809: VersionInfo(VIT.CPP, 'VS2008 SP1 build 30729'),
        # cvtres is the same as in VS2008, so add interpolated
        0x00947809: VersionInfo(VIT.RES, 'VS2008 SP1 [9.0] build 30729', interpolated=True),
        0x00957809: VersionInfo(VIT.ASM, 'VS2008 SP1 build 30729'),
        0x00927809: VersionInfo(VIT.EXP, 'VS2008 SP1 build 30729'),
        0x00937809: VersionInfo(VIT.IMP, 'VS2008 SP1 build 30729'),
        0x00917809: VersionInfo(VIT.LNK, 'VS2008 SP1 build 30729'),

        # MSVS2008 (9.0.21022.8 RTM)
        0x0083521e: VersionInfo(VIT.OBJ, 'VS2008 build 21022'),
        0x0084521e: VersionInfo(VIT.CPP, 'VS2008 build 21022'),
        0x0091521e: VersionInfo(VIT.LNK, 'VS2008 build 21022'),
        0x0094521e: VersionInfo(VIT.RES, 'VS2008 build 21022'),
        0x0092521e: VersionInfo(VIT.EXP, 'VS2008 build 21022'),
        0x0093521e: VersionInfo(VIT.IMP, 'VS2008 build 21022'),
        0x0095521e: VersionInfo(VIT.ASM, 'VS2008 build 21022'),

        # Visual Studio 2008 Beta 2 [9.0] (values are interpolated)
        0x008350e2: VersionInfo(VIT.OBJ, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x009550e2: VersionInfo(VIT.ASM, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x008450e2: VersionInfo(VIT.CPP, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x009450e2: VersionInfo(VIT.RES, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x009150e2: VersionInfo(VIT.LNK, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x009350e2: VersionInfo(VIT.IMP, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),
        0x009250e2: VersionInfo(VIT.EXP, 'VS2008 Beta 2 [9.0] build 20706', interpolated=True),

        # MSVS2005 (RTM.50727-4200) cl version: 14.00.50727.42
        # MSVS2005-SP1 dumps the same comp.id's.
        # It is strange, but there exists VS2012 with the same build number:
        # 11 Build 50727.1
        0x006dc627: VersionInfo(VIT.OBJ, 'VS2005 build 50727'),
        0x006ec627: VersionInfo(VIT.CPP, 'VS2005 build 50727'),
        0x0078c627: VersionInfo(VIT.LNK, 'VS2005 build 50727'),
        0x007cc627: VersionInfo(VIT.RES, 'VS2005 build 50727'),
        0x007ac627: VersionInfo(VIT.EXP, 'VS2005 build 50727'),
        0x007bc627: VersionInfo(VIT.IMP, 'VS2005 build 50727'),
        0x007dc627: VersionInfo(VIT.ASM, 'VS2005 build 50727'),

        # Visual Studio 2005 [8.0] (values are interpolated)
        0x006dc490: VersionInfo(VIT.OBJ, 'VS2005 [8.0] build 50320', interpolated=True),
        0x007dc490: VersionInfo(VIT.ASM, 'VS2005 [8.0] build 50320', interpolated=True),
        0x006ec490: VersionInfo(VIT.CPP, 'VS2005 [8.0] build 50320', interpolated=True),
        0x007cc490: VersionInfo(VIT.RES, 'VS2005 [8.0] build 50320', interpolated=True),
        0x0078c490: VersionInfo(VIT.LNK, 'VS2005 [8.0] build 50320', interpolated=True),
        0x007bc490: VersionInfo(VIT.IMP, 'VS2005 [8.0] build 50320', interpolated=True),
        0x007ac490: VersionInfo(VIT.EXP, 'VS2005 [8.0] build 50320', interpolated=True),

        # Visual Studio 2005 Beta 2 [8.0] (values are interpolated)
        0x006dc427: VersionInfo(VIT.OBJ, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x007dc427: VersionInfo(VIT.ASM, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x006ec427: VersionInfo(VIT.CPP, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x007cc427: VersionInfo(VIT.RES, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x0078c427: VersionInfo(VIT.LNK, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x007bc427: VersionInfo(VIT.IMP, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),
        0x007ac427: VersionInfo(VIT.EXP, 'VS2005 Beta 2 [8.0] build 50215', interpolated=True),

        # Visual Studio 2005 Beta 1 [8.0] (values are interpolated)
        0x006d9e9f: VersionInfo(VIT.OBJ, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x007d9e9f: VersionInfo(VIT.ASM, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x006e9e9f: VersionInfo(VIT.CPP, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x007c9e9f: VersionInfo(VIT.RES, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x00789e9f: VersionInfo(VIT.LNK, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x007b9e9f: VersionInfo(VIT.IMP, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),
        0x007a9e9f: VersionInfo(VIT.EXP, 'VS2005 Beta 1 [8.0] build 40607', interpolated=True),

        # Windows Server 2003 SP1 DDK (for AMD64) (values are interpolated)
        0x006d9d76: VersionInfo(VIT.OBJ, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x007d9d76: VersionInfo(VIT.ASM, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x006e9d76: VersionInfo(VIT.CPP, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x007c9d76: VersionInfo(VIT.RES, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x00789d76: VersionInfo(VIT.LNK, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x007b9d76: VersionInfo(VIT.IMP, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),
        0x007a9d76: VersionInfo(VIT.EXP, 'Windows Server 2003 SP1 DDK (for AMD64) build 40310', interpolated=True),

        # MSVS2003 (.NET) SP1 (kb918007)
        0x005f178e: VersionInfo(VIT.OBJ, 'VS2003 (.NET) SP1 build 6030'),
        0x0060178e: VersionInfo(VIT.CPP, 'VS2003 (.NET) SP1 build 6030'),
        0x005a178e: VersionInfo(VIT.LNK, 'VS2003 (.NET) SP1 build 6030'),
        0x000f178e: VersionInfo(VIT.ASM, 'VS2003 (.NET) SP1 build 6030'),
        # cvtres is the same version as without SP1
        0x005e178e: VersionInfo(VIT.RES, 'VS.NET 2003 SP1 [7.1] build 6030', interpolated=True),
        0x005c178e: VersionInfo(VIT.EXP, 'VS2003 (.NET) SP1 build 6030'),
        0x005d178e: VersionInfo(VIT.IMP, 'VS2003 (.NET) SP1 build 6030'),

        # Windows Server 2003 SP1 DDK (values are interpolated)
        0x005f0fc3: VersionInfo(VIT.OBJ, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x000f0fc3: VersionInfo(VIT.ASM, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x00600fc3: VersionInfo(VIT.CPP, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x005e0fc3: VersionInfo(VIT.RES, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x005a0fc3: VersionInfo(VIT.LNK, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x005d0fc3: VersionInfo(VIT.IMP, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),
        0x005c0fc3: VersionInfo(VIT.EXP, 'Windows Server 2003 SP1 DDK build 4035', interpolated=True),

        # MSVS2003 (.NET) 7.0.1.3088
        0x005f0c05: VersionInfo(VIT.OBJ, 'VS2003 (.NET) build 3077'),
        0x00600c05: VersionInfo(VIT.CPP, 'VS2003 (.NET) build 3077'),
        0x000f0c05: VersionInfo(VIT.ASM, 'VS2003 (.NET) build 3077'),
        0x005e0bec: VersionInfo(VIT.RES, 'VS2003 (.NET) build 3052'),
        0x005c0c05: VersionInfo(VIT.EXP, 'VS2003 (.NET) build 3077'),
        0x005d0c05: VersionInfo(VIT.IMP, 'VS2003 (.NET) build 3077'),
        0x005a0c05: VersionInfo(VIT.LNK, 'VS2003 (.NET) build 3077'),
        # Visual Studio .NET 2003 [7.1] (values are interpolated)
        0x005e0c05: VersionInfo(VIT.RES, 'VS.NET 2003 [7.1] build 3077', interpolated=True),

        # MSVS2002 (.NET) 7.0.9466
        0x001c24fa: VersionInfo(VIT.OBJ, 'VS2002 (.NET) build 9466'),
        0x001d24fa: VersionInfo(VIT.CPP, 'VS2002 (.NET) build 9466'),
        0x004024fa: VersionInfo(VIT.ASM, 'VS2002 (.NET) build 9466'),
        0x003d24fa: VersionInfo(VIT.LNK, 'VS2002 (.NET) build 9466'),
        0x004524fa: VersionInfo(VIT.RES, 'VS2002 (.NET) build 9466'),
        0x003f24fa: VersionInfo(VIT.EXP, 'VS2002 (.NET) build 9466'),
        0x001924fa: VersionInfo(VIT.IMP, 'VS2002 (.NET) build 9466'),

        # Windows XP SP1 DDK (values are interpolated)
        0x001c23d8: VersionInfo(VIT.OBJ, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x004023d8: VersionInfo(VIT.ASM, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x001d23d8: VersionInfo(VIT.CPP, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x004523d8: VersionInfo(VIT.RES, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x003d23d8: VersionInfo(VIT.LNK, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x001923d8: VersionInfo(VIT.IMP, 'Windows XP SP1 DDK build 9176', interpolated=True),
        0x003f23d8: VersionInfo(VIT.EXP, 'Windows XP SP1 DDK build 9176', interpolated=True),

        # MSVS98 6.0 SP6 (Enterprise edition)
        # Looks like linker may mix compids for C and C++ objects (why?)
        0x000a2636: VersionInfo(VIT.OBJ, 'VS98 (6.0) SP6 build 8804'),
        0x000b2636: VersionInfo(VIT.CPP, 'VS98 (6.0) SP6 build 8804'),

        # MSVC++ 6.0 SP5 (Enterprise edition)
        0x00152306: VersionInfo(VIT.OBJ, 'VC++ 6.0 SP5 build 8804'),
        0x00162306: VersionInfo(VIT.CPP, 'VC++ 6.0 SP5 build 8804'),
        0x000420ff: VersionInfo(VIT.LNK, 'VC++ 6.0 SP5 imp/exp build 8447'),
        0x000606c7: VersionInfo(VIT.RES, 'VS98 (6.0) SP6 cvtres build 1736'),

        # MSVS6.0 (no serVITepacks)
        0x000a1fe8: VersionInfo(VIT.OBJ, 'VS98 (6.0) build 8168'),
        0x000b1fe8: VersionInfo(VIT.CPP, 'VS98 (6.0) build 8168'),
        0x000606b8: VersionInfo(VIT.RES, 'VS98 (6.0) cvtres build 1720'),
        0x00041fe8: VersionInfo(VIT.LNK, 'VS98 (6.0) imp/exp build 8168'),

        # MSVS97 5.0 Enterprise Edition (cl 11.00.7022, link 5.00.7022)
        # Does NOT generate any @comp.id records, nor Rich headers.
        # SP3 added Rich-generating linker (albeit it doesn't identify itself),
        # and CVTRES and LIB(?) utilities that generate @comp.id records. There is no
        # distinction between import and export records yet. I marked the records as
        # [IMP] because VS98 linker seems to omit export records from the header; VS97
        # linker might do the same.
        0x00060684: VersionInfo(VIT.RES, 'VS97 (5.0) SP3 cvtres 5.00.1668'),
        0x00021c87: VersionInfo(VIT.IMP, 'VS97 (5.0) SP3 link 5.10.7303'),
    }

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Static methods

def parse_signature(data)

Extracts a JSON-serializable and human readable dictionary with information about time stamp and code signing certificates that are attached to the input PE file.

Expand source code Browse git
@classmethod
def parse_signature(cls, data: bytearray) -> dict:
    """
    Extracts a JSON-serializable and human readable dictionary with information about
    time stamp and code signing certificates that are attached to the input PE file.
    """
    from refinery.units.formats.pkcs7 import pkcs7
    from refinery.units.formats.pe.pesig import pesig

    try:
        signature = data | pesig | pkcs7 | json.loads
    except Exception as E:
        raise ValueError(F'PKCS7 parser failed with error: {E!s}')

    info = {}

    def find_timestamps(entry):
        if isinstance(entry, dict):
            if set(entry.keys()) == {'type', 'value'}:
                if entry['type'] == 'signing_time':
                    return {'Timestamp': entry['value']}
            for value in entry.values():
                result = find_timestamps(value)
                if result is None:
                    continue
                with suppress(KeyError):
                    result.setdefault('TimestampIssuer', entry['sid']['issuer']['common_name'])
                return result
        elif isinstance(entry, list):
            for value in entry:
                result = find_timestamps(value)
                if result is None:
                    continue
                return result

    timestamp_info = find_timestamps(signature)
    if timestamp_info is not None:
        info.update(timestamp_info)

    try:
        certificates = signature['content']['certificates']
    except KeyError:
        return info

    if len(certificates) == 1:
        main_certificate = certificates[0]['tbs_certificate']
    else:
        certificates_with_extended_use = []
        main_certificate = None
        for certificate in certificates:
            with suppress(Exception):
                crt = certificate['tbs_certificate']
                ext = [e for e in crt['extensions'] if e['extn_id'] == 'extended_key_usage' and e['extn_value'] != ['time_stamping']]
                key = [e for e in crt['extensions'] if e['extn_id'] == 'key_usage']
                if ext:
                    certificates_with_extended_use.append(certificate)
                if any('key_cert_sign' in e['extn_value'] for e in key):
                    continue
                if any('code_signing' in e['extn_value'] for e in ext):
                    main_certificate = certificate
                    break
        if main_certificate is None and len(certificates_with_extended_use) == 1:
            main_certificate = certificates_with_extended_use[0]
    if main_certificate:
        crt = main_certificate['tbs_certificate']
        serial = crt['serial_number']
        if not isinstance(serial, int):
            serial = int(serial, 0)
        serial = F'{serial:x}'
        if len(serial) % 2:
            serial = '0' + serial
        subject = crt['subject']
        location = [subject.get(t, '') for t in ('locality_name', 'state_or_province_name', 'country_name')]
        info.update(Subject=subject['common_name'])
        if any(location):
            info.update(SubjectLocation=', '.join(filter(None, location)))
        info.update(
            Issuer=crt['issuer']['common_name'], Fingerprint=main_certificate['fingerprint'], Serial=serial)
        return info
    return info
def parse_version(pe, data=None)

Extracts a JSON-serializable and human readable dictionary with information about the version resource of an input PE file, if available.

Expand source code Browse git
@classmethod
def parse_version(cls, pe: PE, data=None) -> dict:
    """
    Extracts a JSON-serializable and human readable dictionary with information about
    the version resource of an input PE file, if available.
    """
    pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']])
    for FileInfo in pe.FileInfo:
        for FileInfoEntry in FileInfo:
            with suppress(AttributeError):
                for StringTableEntry in FileInfoEntry.StringTable:
                    StringTableEntryParsed = cls._parse_pedict(StringTableEntry.entries)
                    with suppress(AttributeError):
                        LangID = StringTableEntry.entries.get('LangID', None) or StringTableEntry.LangID
                        LangID = int(LangID, 0x10) if not isinstance(LangID, int) else LangID
                        LangHi = LangID >> 0x10
                        LangLo = LangID & 0xFFFF
                        Language = cls._LCID.get(LangHi, 'Language Neutral')
                        Charset = cls._CHARSET.get(LangLo, 'Unknown Charset')
                        StringTableEntryParsed.update(
                            LangID=F'{LangID:08X}',
                            Charset=Charset,
                            Language=Language
                        )
                    return StringTableEntryParsed
def parse_exports(pe, data=None)
Expand source code Browse git
@classmethod
def parse_exports(cls, pe: PE, data=None) -> list:
    pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT']])
    info = []
    for k, exp in enumerate(pe.DIRECTORY_ENTRY_EXPORT.symbols):
        if not exp.name:
            info.append(F'@{k}')
        else:
            info.append(exp.name.decode('ascii'))
    return info
def parse_imports(pe, data=None)
Expand source code Browse git
@classmethod
def parse_imports(cls, pe: PE, data=None) -> list:
    pe.parse_data_directories(directories=[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT']])
    info = {}
    for idd in pe.DIRECTORY_ENTRY_IMPORT:
        dll = idd.dll.decode('ascii')
        if dll.lower().endswith('.dll'):
            dll = dll[:-4]
        imports = info.setdefault(dll, [])
        for imp in idd.imports:
            name = imp.name and imp.name.decode('ascii') or F'@{imp.ordinal}'
            imports.append(name)
    return info
def parse_time_stamps(pe, raw_time_stamps)

Extracts time stamps from the PE header (link time), as well as from the imports, exports, debug, and resource directory. The resource time stamp is also parsed as a DOS time stamp and returned as the "Delphi" time stamp.

Expand source code Browse git
@classmethod
def parse_time_stamps(cls, pe: PE, raw_time_stamps: bool) -> dict:
    """
    Extracts time stamps from the PE header (link time), as well as from the imports,
    exports, debug, and resource directory. The resource time stamp is also parsed as
    a DOS time stamp and returned as the "Delphi" time stamp.
    """
    if raw_time_stamps:
        def dt(ts): return ts
    else:
        def dt(ts):
            # parse as UTC but then forget time zone information
            return datetime.fromtimestamp(
                ts,
                tz=timezone.utc
            ).replace(tzinfo=None)

    pe.parse_data_directories(directories=[
        DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
        DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'],
        DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG'],
        DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_RESOURCE']
    ])

    info = {}

    with suppress(AttributeError):
        info.update(Linker=dt(pe.FILE_HEADER.TimeDateStamp))

    with suppress(AttributeError):
        for entry in pe.DIRECTORY_ENTRY_IMPORT:
            info.update(Import=dt(entry.TimeDateStamp()))

    with suppress(AttributeError):
        for entry in pe.DIRECTORY_ENTRY_DEBUG:
            info.update(DbgDir=dt(entry.struct.TimeDateStamp))

    with suppress(AttributeError):
        Export = pe.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp
        if Export: info.update(Export=dt(Export))

    with suppress(AttributeError):
        res_timestamp = pe.DIRECTORY_ENTRY_RESOURCE.struct.TimeDateStamp
        if res_timestamp:
            with suppress(ValueError):
                from refinery.units.misc.datefix import datefix
                dos = datefix.dostime(res_timestamp)
                info.update(Delphi=dos)
                info.update(RsrcTS=dt(res_timestamp))

    def norm(value):
        if isinstance(value, int):
            return value
        return str(value)

    return {key: norm(value) for key, value in info.items()}
def parse_dotnet(pe, data)

Extracts a JSON-serializable and human readable dictionary with information about the .NET metadata of an input PE file.

Expand source code Browse git
@classmethod
def parse_dotnet(cls, pe: PE, data):
    """
    Extracts a JSON-serializable and human readable dictionary with information about
    the .NET metadata of an input PE file.
    """
    header = DotNetHeader(data, pe=pe)
    tables = header.meta.Streams.Tables
    info = dict(
        RuntimeVersion=F'{header.head.MajorRuntimeVersion}.{header.head.MinorRuntimeVersion}',
        Version=F'{header.meta.MajorVersion}.{header.meta.MinorVersion}',
        VersionString=header.meta.VersionString
    )

    info['Flags'] = [name for name, check in header.head.KnownFlags.items() if check]

    if len(tables.Assembly) == 1:
        assembly = tables.Assembly[0]
        info.update(
            AssemblyName=assembly.Name,
            Release='{}.{}.{}.{}'.format(
                assembly.MajorVersion,
                assembly.MinorVersion,
                assembly.BuildNumber,
                assembly.RevisionNumber
            )
        )

    try:
        entry = header.head.EntryPointToken + pe.OPTIONAL_HEADER.ImageBase
        info.update(EntryPoint=F'0x{entry:08X}')
    except AttributeError:
        pass

    if len(tables.Module) == 1:
        module = tables.Module[0]
        info.update(ModuleName=module.Name)

    return info
def parse_debug(pe, data=None)
Expand source code Browse git
@classmethod
def parse_debug(cls, pe: PE, data=None):
    result = {}
    pe.parse_data_directories(directories=[
        DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DEBUG']])
    for dbg in pe.DIRECTORY_ENTRY_DEBUG:
        if DEBUG_TYPE.get(dbg.struct.Type, None) != 'IMAGE_DEBUG_TYPE_CODEVIEW':
            continue
        with suppress(Exception):
            pdb = dbg.entry.PdbFileName
            if 0 in pdb:
                pdb = pdb[:pdb.index(0)]
            result.update(
                PdbPath=pdb.decode(cls.codec),
                PdbAge=dbg.entry.Age
            )
    return result

Methods

def parse_header(cls, pe, data=None)
Expand source code Browse git
def parse_header(cls, pe: PE, data=None) -> dict:
    def format_macro_name(name: str, prefix, convert=True):
        name = name.split('_')[prefix:]
        if convert:
            for k, part in enumerate(name):
                name[k] = part.upper() if len(part) <= 3 else part.capitalize()
        return ' '.join(name)

    major = pe.OPTIONAL_HEADER.MajorOperatingSystemVersion
    minor = pe.OPTIONAL_HEADER.MinorOperatingSystemVersion
    version = cls._WINVER.get(major, {0: 'Unknown'})

    try:
        MinimumOS = version[minor]
    except LookupError:
        MinimumOS = version[0]
    header_information = {
        'Machine': format_macro_name(MACHINE_TYPE[pe.FILE_HEADER.Machine], 3, False),
        'Subsystem': format_macro_name(SUBSYSTEM_TYPE[pe.OPTIONAL_HEADER.Subsystem], 2),
        'MinimumOS': MinimumOS,
    }

    rich_header = pe.parse_rich_header()
    rich = []
    if rich_header:
        it = rich_header.get('values', [])
        for idv in it[0::2]:
            info = cls._RICH_HEADER.get(idv, None)
            if info is None:
                info = guess_version(idv)
            if not info:
                continue
            rich.append(F'[{idv:08x}] {info}')
        header_information['RICH'] = rich

    characteristics = [
        name for name, mask in image_characteristics
        if pe.FILE_HEADER.Characteristics & mask
    ]
    for typespec, flag in {
        'EXE': 'IMAGE_FILE_EXECUTABLE_IMAGE',
        'DLL': 'IMAGE_FILE_DLL',
        'SYS': 'IMAGE_FILE_SYSTEM'
    }.items():
        if flag in characteristics:
            header_information['Type'] = typespec
    address_width = None
    if 'IMAGE_FILE_16BIT_MACHINE' in characteristics:
        address_width = 4
    elif pe.FILE_HEADER.Machine == MACHINE_TYPE['IMAGE_FILE_MACHINE_I386']:
        address_width = 8
    elif pe.FILE_HEADER.Machine == MACHINE_TYPE['IMAGE_FILE_MACHINE_AMD64']:
        address_width = 16
    if address_width:
        header_information['Bits'] = 4 * address_width
    else:
        address_width = 16
    header_information['ImageBase'] = F'0x{pe.OPTIONAL_HEADER.ImageBase:0{address_width}}'
    return header_information

Inherited members

class peoverlay (certificate=True, directories=True, memdump=False)

This unit is implemented in refinery.units.formats.pe.peoverlay and has the following commandline Interface:

usage: peoverlay [-h] [-L] [-Q] [-0] [-v] [-c] [-d] [-m]

Returns the overlay of a PE file, i.e. anything that may have been
appended to the file. This does not include digital signatures. Use
pestrip to obtain only the body of the PE file after removing the overlay.

optional arguments:
  -c, --no-cert  Do not include digital signatures for the size
                 computation.
  -d, --no-dirs  Do not include any data directories for size computation
                 (implies --no-cert).
  -m, --memdump  Assume that the file data was a memory-mapped PE file.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class peoverlay(OverlayUnit):
    """
    Returns the overlay of a PE file, i.e. anything that may have been appended to the file.
    This does not include digital signatures. Use `refinery.pestrip` to obtain only the body
    of the PE file after removing the overlay.
    """
    def process(self, data: bytearray) -> bytearray:
        size = self._get_size(data)
        if isinstance(data, bytearray):
            data[:size] = []
            return data
        return data[size:]

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class perc (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.pe.perc and has the following commandline Interface:

usage: perc [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
            [path [path ...]]

Extract PE file resources.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class perc(PathExtractorUnit):
    """
    Extract PE file resources.
    """
    def __init__(self, *paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path'):
        def fixpath(p: str):
            if regex or not p.isidentifier():
                return p
            return re.compile(FR'^.*?{re.escape(p)}.*$')
        super().__init__(*(fixpath(p) for p in paths),
            list=list, join_path=join_path, drop_path=drop_path, path=path)

    def _search(self, pe, directory, level=0, *parts):
        if level >= 3:
            self.log_warn(F'unexpected resource tree level {level + 1:d}')
        for entry in directory.entries:
            if entry.name:
                identifier = str(entry.name)
            elif level == 0 and entry.id in iter(RSRC):
                identifier = RSRC(entry.id).name
            elif entry.id is not None:
                identifier = str(entry.id)
            else:
                self.log_warn(F'resource entry has name {entry.name} and id {entry.id} at level {level + 1:d}')
                continue
            if entry.struct.DataIsDirectory:
                yield from self._search(pe, entry.directory, level + 1, *parts, identifier)
            else:
                def extracted(p=pe, e=entry):
                    return p.get_data(e.data.struct.OffsetToData, e.data.struct.Size)
                path = '/'.join((*parts, identifier))
                yield UnpackResult(path, extracted)

    def unpack(self, data):
        pe = pefile.PE(data=data)
        try:
            yield from self._search(pe, pe.DIRECTORY_ENTRY_RESOURCE)
        except AttributeError:
            pass

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class pesig

This unit is implemented in refinery.units.formats.pe.pesig and has the following commandline Interface:

usage: pesig [-h] [-L] [-Q] [-0] [-v]

Extracts the contents of the IMAGE_DIRECTORY_ENTRY_SECURITY entry of a PE
file, i.e. the digital signatures in DER format.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class pesig(Unit):
    """
    Extracts the contents of the IMAGE_DIRECTORY_ENTRY_SECURITY entry of a PE file,
    i.e. the digital signatures in DER format.
    """

    _SECDIRID = DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY']

    def __init__(self): pass

    def process(self, data: bytearray) -> bytearray:
        pe = PE(data=data, fast_load=True)
        pe.parse_data_directories(directories=[self._SECDIRID])
        security = pe.OPTIONAL_HEADER.DATA_DIRECTORY[self._SECDIRID]
        self.log_info(F'signature offset: 0x{security.VirtualAddress:08X}')
        self.log_info(F'signature length: 0x{security.Size:08X}')
        if security.VirtualAddress == 0 or security.Size == 0:
            raise ValueError(F'IMAGE_DIRECTORY_ENTRY_SECURITY ({self._SECDIRID}) is corrupt.')
        sgnoff = security.VirtualAddress + 8
        sgnend = sgnoff + security.Size
        length, revision, certtype = unpack('<IHH', data[sgnoff - 8:sgnoff])
        signature = data[sgnoff:sgnend]

        if len(signature) + 8 != length:
            raise RefineryPartialResult(
                F'Found {len(signature) + 8} bytes of signature, but length should be {length}.',
                partial=signature)

        return signature

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class pestrip (certificate=True, directories=True, memdump=False)

This unit is implemented in refinery.units.formats.pe.pestrip and has the following commandline Interface:

usage: pestrip [-h] [-L] [-Q] [-0] [-v] [-c] [-d] [-m]

Removes the overlay of a PE file and returns the stipped executable. Use
peoverlay to extract the overlay.

optional arguments:
  -c, --no-cert  Do not include digital signatures for the size
                 computation.
  -d, --no-dirs  Do not include any data directories for size computation
                 (implies --no-cert).
  -m, --memdump  Assume that the file data was a memory-mapped PE file.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class pestrip(OverlayUnit):
    """
    Removes the overlay of a PE file and returns the stipped executable. Use `refinery.peoverlay`
    to extract the overlay.
    """
    def process(self, data: bytearray) -> bytearray:
        size = self._get_size(data)
        if isinstance(data, bytearray):
            data[size:] = []
            return data
        return data[:size]

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class pkcs7

This unit is implemented in refinery.units.formats.pkcs7 and has the following commandline Interface:

usage: pkcs7 [-h] [-L] [-Q] [-0] [-v]

Converts PKCS7 encoded data to a JSON representation.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class pkcs7(Unit):
    """
    Converts PKCS7 encoded data to a JSON representation.
    """
    @Unit.Requires('asn1crypto', optional=False)
    def _asn1crypto():
        import asn1crypto
        import asn1crypto.cms
        import asn1crypto.core
        import asn1crypto.x509
        return asn1crypto

    def process(self, data: bytes):
        signature = self._asn1crypto.cms.ContentInfo.load(data)
        with ParsedASN1ToJSON as encoder:
            return encoder.dumps(signature).encode(self.codec)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class stego (parts='RGB')

This unit is implemented in refinery.units.formats.stego and has the following commandline Interface:

usage: stego [-h] [-L] [-Q] [-0] [-v] [parts]

Decodes the RGBA (red/green/blue/alpha) values of the pixels of a given
image file and outputs these values as bytes. Each row of the image is
transformed and output as and individual chunk. To obtain the data in
columns, the transpose unit can be used.

positional arguments:
  parts          A string containing any ordering of the letters R, G, B,
                 and A (case-insensitive). These pixel components will be
                 extracted from every pixel in the given order. The
                 default value is RGB.

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class stego(Unit):
    """
    Decodes the RGBA (red/green/blue/alpha) values of the pixels of a given image file and
    outputs these values as bytes. Each row of the image is transformed and output as and
    individual chunk. To obtain the data in columns, the `refinery.transpose` unit can be
    used.
    """
    def __init__(
        self,
        parts: arg('parts', nargs='?', type=str, help=(
            'A string containing any ordering of the letters R, G, B, and A (case-insensitive). '
            'These pixel components will be extracted from every pixel in the given order. The '
            'default value is {default}.'
        )) = 'RGB'
    ):
        super().__init__(
            parts=tuple(arg.as_option(p, PIXEL_PART) for p in parts)
        )

    @Unit.Requires('Pillow', optional=False)
    def _image():
        from PIL import Image
        return Image

    def process(self, data):
        image = self._image.open(MemoryFile(data))
        width, height = image.size
        for y in range(height):
            yield bytearray(
                image.getpixel((x, y))[p]
                for x in range(width)
                for p in self.args.parts
            )

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class winreg (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.winreg and has the following commandline Interface:

usage: winreg [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
              [path [path ...]]

Extract values from a Windows registry hive.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class winreg(PathExtractorUnit):
    """
    Extract values from a Windows registry hive.
    """
    @PathExtractorUnit.Requires('python-registry', optional=False)
    def _registry():
        from Registry.Registry import Registry
        return Registry

    def _walk(self, key, *path):
        here = '/'.join(path)
        if not self._check_reachable(here):
            self.log_debug(F'pruning search at {here}')
            return
        for value in key.values():
            vpath = F'{here}/{value.name()}'
            yield UnpackResult(vpath, lambda v=value: v.raw_data())
        for subkey in key.subkeys():
            yield from self._walk(subkey, *path, subkey.name())

    def unpack(self, data):
        with MemoryFile(data) as stream:
            root = self._registry(stream).root()
            yield from self._walk(root, root.name())

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class xtxml (*paths, list=False, join_path=False, drop_path=False, regex=False, path=b'path')

This unit is implemented in refinery.units.formats.xml and has the following commandline Interface:

usage: xtxml [-h] [-L] [-Q] [-0] [-v] [-l] [-j | -d] [-r] [-P NAME]
             [path [path ...]]

Extract values from an XML document.

positional arguments:
  path             Wildcard pattern for the name of the item to be
                   extracted. Each item is returned as a separate output
                   of this unit. Paths may contain wildcards. The default
                   is a single wildcard, which means that every item will
                   be extracted.

optional arguments:
  -l, --list       Return all matching paths as UTF8-encoded output
                   chunks.
  -j, --join-path  Join path names from container with previous path
                   names.
  -d, --drop-path  Do not modify the path variable for output chunks.
  -r, --regex      Use regular expressions instead of wildcard patterns.
  -P, --path NAME  Name of the meta variable to receive the extracted
                   path. The default value is "path".

generic options:
  -h, --help       Show this help message and exit.
  -L, --lenient    Allow partial results as output.
  -Q, --quiet      Disables all log output.
  -0, --devnull    Do not produce any output.
  -v, --verbose    Specify up to two times to increase log level.
Expand source code Browse git
class xtxml(PathExtractorUnit):
    """
    Extract values from an XML document.
    """
    _STRICT_PATH_MATCHING = True

    def unpack(self, data):
        def walk(node: xml.XMLNode, *path: str):
            def extract(node: xml.XMLNode = node):
                if not node.children:
                    return node.content.encode(self.codec)
                with MemoryFile() as stream:
                    node.write(stream)
                    return bytes(stream.getbuffer() | ppxml)
            children_by_tag = defaultdict(list)
            for child in node.children:
                children_by_tag[child.tag].append(child)
            yield UnpackResult('/'.join(path), extract, **node.attributes)
            for tag, children in children_by_tag.items():
                if len(children) == 1:
                    yield from walk(children[0], *path, tag)
                    continue
                width = len(F'{len(children):X}')
                for k, child in enumerate(children):
                    yield from walk(child, *path, F'{tag}[0x{k:0{width}X}]')
        root = xml.parse(data)
        name = root.tag or 'xml'
        yield from walk(root, name)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class chop (size, truncate=False, into=False)

This unit is implemented in refinery.units.meta.chop and has the following commandline Interface:

usage: chop [-h] [-L] [-Q] [-0] [-v] [-t] [-i] N

Reinterprets the input as a sequence of equally sized chunks and outputs
this sequence.

positional arguments:
  N               Chop data into chunks of this size.

optional arguments:
  -t, --truncate  Truncate possible excess bytes at the end of the input,
                  by default they are appended as a single chunk.
  -i, --into      If this flag is specified, the size parameter determines
                  the number of blocks to be produced rather than the size
                  of each block. In this case, truncation is performed
                  before the data is split.

generic options:
  -h, --help      Show this help message and exit.
  -L, --lenient   Allow partial results as output.
  -Q, --quiet     Disables all log output.
  -0, --devnull   Do not produce any output.
  -v, --verbose   Specify up to two times to increase log level.
Expand source code Browse git
class chop(Unit):
    """
    Reinterprets the input as a sequence of equally sized chunks and outputs this sequence.
    """

    def __init__(
        self, size: arg.number('size', help='Chop data into chunks of this size.'),
        truncate: arg.switch('-t', help=(
            'Truncate possible excess bytes at the end of the input, by default they are appended as a single chunk.')) = False,
        into: arg.switch('-i', help=(
            'If this flag is specified, the size parameter determines the number of blocks to be produced rather than the size '
            'of each block. In this case, truncation is performed before the data is split.')) = False
    ):
        return super().__init__(size=size, into=into, truncate=truncate)

    def process(self, data):
        size = self.args.size
        if size < 1:
            raise ValueError('The chunk size has to be a positive integer value.')
        if self.args.into:
            size, remainder = divmod(len(data), size)
            if remainder and not self.args.truncate:
                partition = remainder * (size + 1)
                part1, part2 = data[:partition], data[partition:]
                yield from splitchunks(part1, size + 1)
                yield from splitchunks(part2, size)
                return

        yield from splitchunks(data, size, self.args.truncate)

Ancestors

Class variables

var optional_dependencies
var required_dependencies

Inherited members

class cm (invert=False, all=False, reset=False, size=False, index=False, ext=False, entropy=False, ic=False, magic=False, sha1=False, sha256=False, crc32=False, md5=False, hashes=False, *names)

This unit is implemented in refinery.units.meta.cm and has the following commandline Interface:

usage: cm [-h] [-L] [-Q] [-0] [-v] [-x | -a] [-r] [-S] [-I] [-F] [-E] [-C]
          [-M] [-1] [-2] [-3] [-5] [-H]
          [name [name ...]]

The Common Meta variables unit populates the set of meta variables of the
current chunk with commonly used metadata. The unit has no effect outside
a frame.

positional arguments:
  name           A variable name that can include the common properties:
                 mime, ext, magic, size, entropy, ic, crc32, sha1, sha256,
                 md5, index. If none is given, the variables index and
                 size are populated. For most of these, an optional
                 argument is available that can be used as a shorthand:

optional arguments:
  -x, --invert   populate only options that have not been specified
  -a, --all      populate all options
  -r, --reset    discard all meta variables that were not explicitly
                 specified
  -S, --size     size of the chunk
  -I, --index    index of the chunk in the current frame
  -F, --ext      guess file extension
  -E, --entropy  compute data entropy
  -C, --ic       compute the index of coincidence
  -M, --magic    compute file magic
  -1, --sha1     compute hash: SHA-1
  -2, --sha256   compute hash: SHA-256
  -3, --crc32    compute hash: CRC32
  -5, --md5      compute hash: MD5
  -H, --hashes   compute all common hashes

generic options:
  -h, --help     Show this help message and exit.
  -L, --lenient  Allow partial results as output.
  -Q, --quiet    Disables all log output.
  -0, --devnull  Do not produce any output.
  -v, --verbose  Specify up to two times to increase log level.
Expand source code Browse git
class cm(Unit):
    """
    The Common Meta variables unit populates the set of meta variables of the current chunk with commonly
    used metadata. The unit has no effect outside a frame.
    """
    def __init__(
        self,
        invert  : arg.switch('-x', group='ALL', help='populate only options that have not been specified') = False,
        all     : arg.switch('-a', group='ALL', help='populate all options') = False,
        reset   : arg.switch('-r', help='discard all meta variables that were not explicitly specified') = False,
        size    : arg.switch('-S', help='size of the chunk') = False,
        index   : arg.switch('-I', help='index of the chunk in the current frame') = False,
        ext     : arg.switch('-F', help='guess file extension') = False,
        entropy : arg.switch('-E', help='compute data entropy') = False,
        ic      : arg.switch('-C', help='compute the index of coincidence') = False,
        magic   : arg.switch('-M', help='compute file magic') = False,
        sha1    : arg.switch('-1', help='compute hash: SHA-1') = False,
        sha256  : arg.switch('-2', help='compute hash: SHA-256') = False,
        crc32   : arg.switch('-3', help='compute hash: CRC32') = False,
        md5     : arg.switch('-5', help='compute hash: MD5') = False,
        hashes  : arg.switch('-H', help='compute all common hashes') = False,
        *names  : arg(metavar='name', help=(
            F'A variable name that can include the common properties: {_COMMON_PROPERTIES_LIST}.'
            R' If none is given, the variables index and size are populated. For most of these,'
            R' an optional argument is available that can be used as a shorthand:'))
    ):
        def stringify(name):
            if isinstance(name, str):
                return name
            return name.decode(self.codec)

        names = {stringify(name) for name in names}
        if hashes:
            md5 = sha256 = sha1 = crc32 = True
        if size:
            names.add('size')
        if index:
            names.add('index')
        if ext:
            names.add('ext')
        if entropy:
            names.add('entropy')
        if ic:
            names.add('ic')
        if magic:
            names.add('magic')
        if sha1:
            names.add('sha1')
        if sha256:
            names.add('sha256')
        if crc32:
            names.add('crc32')
        if md5:
            names.add('md5')
        if not names and not reset:
            names.update(('index', 'size'))
        if all:
            if invert:
                raise ValueError('invert and all are both enabled, resulting in empty configuration.')
            names = set(COMMON_PROPERTIES)
        elif invert:
            names = set(COMMON_PROPERTIES) - names
        super().__init__(names=names, reset=reset)

    def process(self, data):
        return data

    def filter(self, chunks):
        names = self.args.names
        reset = self.args.reset
        f