Module refinery.units.formats.office.xlmdeobf

Expand source code Browse git
from __future__ import annotations

from refinery.lib.types import Param
from refinery.units.formats import Arg, Unit


class xlmdeobf(Unit):
    """
    Deobfuscates Excel v4.0 (XLM) macros from XLS, XLSM, and XLSB documents. Uses an inlined port
    of XLMMacroDeobfuscator to emulate XLM macro formulas.
    """
    @classmethod
    def handles(cls, data) -> bool | None:
        from refinery.lib.id import Fmt, get_microsoft_format, get_office_xml_type
        if get_microsoft_format(data) == Fmt.XLS:
            return True
        if get_office_xml_type(data) == Fmt.XLSX:
            return True

    def __init__(
        self,
        extract_only: Param[bool, Arg.Switch(
            '-x', help='Only extract cells without any emulation.'
        )] = False,
        sort_formulas: Param[bool, Arg.Switch(
            '-s', '--sort-formulas',
            help='Sort extracted formulas based on their cell address (implies -x).',
        )] = False,
        day: Param[int, Arg.Number(
            '-d',
            '--day',
            help='Specify the day of month',
        )] = -1,
        output_formula_format: Param[str, Arg.String(
            '-O', '--output-format',
            metavar='FMT',
            help='Specify the format for output formulas (using [[CELL-ADDR]], [[INT-FORMULA]], and [[STATUS]])',
        )] = 'CELL:[[CELL-ADDR]], [[STATUS]], [[INT-FORMULA]]',
        extract_formula_format: Param[str, Arg.String(
            '-E', '--extract-format',
            metavar='FMT',
            help='Specify the format for extracted formulas (using [[CELL-ADDR]], [[CELL-FORMULA]], and [[CELL-VALUE]])',
        )] = 'CELL:[[CELL-ADDR]], [[CELL-FORMULA]], [[CELL-VALUE]]',
        no_indent: Param[bool, Arg.Switch(
            '-I', '--no-indent',
            help='Do not show indent before formulas',
        )] = False,
        start_point: Param[str, Arg.String(
            '-c', '--start-point',
            help='Start interpretation from a specific cell address',
            metavar='CELL',
        )] = '',
        output_level: Param[int, Arg.Number(
            '-o',
            '--output-level',
            help=(
                'Set the level of details to be shown (0:all commands, 1: commands no jump 2:important '
                'commands 3:strings in important commands).'
            ),
        )] = 0,
        timeout: Param[int, Arg.Number(
            '-t',
            '--timeout',
            help='Stop emulation after N seconds (0: not interruption N>0: stop emulation after N seconds)',
        )] = 0,
    ):
        extract_only = sort_formulas or extract_only
        self.superinit(super(), **vars())

    @staticmethod
    def _show_cells(excel_doc, sorted_formulas=False):
        from refinery.lib.thirdparty.xlm.model import EvalResult
        macrosheets = excel_doc.get_macrosheets()
        for name in macrosheets:
            sheet = macrosheets[name]
            yield sheet.name, sheet.type
            if sorted_formulas:
                formulas = []
                for _, info in sheet.cells.items():
                    if info.formula is not None:
                        formulas.append((info, 'EXTRACTED', info.formula, '', info.value))
                formulas.sort(key=lambda x: (x[0].column, int(x[0].row) if EvalResult.is_int(x[0].row) else x[0].row))
                yield from formulas
            else:
                for _, info in sheet.cells.items():
                    if info.formula is not None:
                        yield info, 'EXTRACTED', info.formula, '', info.value
            for _, info in sheet.cells.items():
                if info.formula is None:
                    yield info, 'EXTRACTED', str(info.formula), '', info.value

    @staticmethod
    def _format_output(step, format_str: str, with_indent=True):
        cell_addr = step[0].get_local_address()
        status = step[1]
        formula = step[2]
        indent = '\t' * step[3]
        result = format_str
        result = result.replace('[[CELL-ADDR]]', f'{cell_addr:10}')
        result = result.replace('[[STATUS]]', f'{status.name:20}')
        if with_indent:
            formula = indent + formula
        result = result.replace('[[INT-FORMULA]]', formula)
        return result

    def process(self, data: bytearray):
        if data[:2] == B'\xD0\xCF':
            from refinery.lib.thirdparty.xlm.wrappers import XLSWrapper
            excel_doc = XLSWrapper(data)
        elif data[:2] == B'\x50\x4B':
            if b'workbook.bin' in data:
                from refinery.lib.thirdparty.xlm.wrappers import XLSBWrapper
                excel_doc = XLSBWrapper(data)
            else:
                from refinery.lib.thirdparty.xlm.wrappers import XLSMWrapper
                excel_doc = XLSMWrapper(data)
        else:
            raise ValueError('Input file type is not supported (expected XLS, XLSM, or XLSB).')

        if self.args.extract_only:
            lines: list[str] = []
            fmt: str = self.args.extract_formula_format
            for item in self._show_cells(excel_doc, self.args.sort_formulas):
                if len(item) == 2:
                    lines.append(f'SHEET: {item[0]}, {item[1]}')
                elif len(item) == 5:
                    line = fmt
                    line = line.replace('[[CELL-ADDR]]', item[0].get_local_address())
                    line = line.replace('[[CELL-FORMULA]]', item[2])
                    line = line.replace('[[CELL-VALUE]]', str(item[4]))
                    lines.append(line)
        else:
            from refinery.lib.thirdparty.xlm.interpreter import XLMInterpreter
            interpreter = XLMInterpreter(excel_doc, output_level=self.args.output_level)
            if self.args.day > 0:
                interpreter.day_of_month = self.args.day

            fmt = self.args.output_formula_format
            with_indent = not self.args.no_indent
            lines = []
            for step in interpreter.deobfuscate_macro(
                interactive=False,
                start_point=self.args.start_point,
                timeout=self.args.timeout,
                silent_mode=True,
            ):
                lines.append(self._format_output(step, fmt, with_indent))

        return '\n'.join(lines).encode(self.codec)

Classes

class xlmdeobf (extract_only=False, sort_formulas=False, day=-1, output_formula_format='CELL:[[CELL-ADDR]], [[STATUS]], [[INT-FORMULA]]', extract_formula_format='CELL:[[CELL-ADDR]], [[CELL-FORMULA]], [[CELL-VALUE]]', no_indent=False, start_point='', output_level=0, timeout=0)

Deobfuscates Excel v4.0 (XLM) macros from XLS, XLSM, and XLSB documents. Uses an inlined port of XLMMacroDeobfuscator to emulate XLM macro formulas.

Expand source code Browse git
class xlmdeobf(Unit):
    """
    Deobfuscates Excel v4.0 (XLM) macros from XLS, XLSM, and XLSB documents. Uses an inlined port
    of XLMMacroDeobfuscator to emulate XLM macro formulas.
    """
    @classmethod
    def handles(cls, data) -> bool | None:
        from refinery.lib.id import Fmt, get_microsoft_format, get_office_xml_type
        if get_microsoft_format(data) == Fmt.XLS:
            return True
        if get_office_xml_type(data) == Fmt.XLSX:
            return True

    def __init__(
        self,
        extract_only: Param[bool, Arg.Switch(
            '-x', help='Only extract cells without any emulation.'
        )] = False,
        sort_formulas: Param[bool, Arg.Switch(
            '-s', '--sort-formulas',
            help='Sort extracted formulas based on their cell address (implies -x).',
        )] = False,
        day: Param[int, Arg.Number(
            '-d',
            '--day',
            help='Specify the day of month',
        )] = -1,
        output_formula_format: Param[str, Arg.String(
            '-O', '--output-format',
            metavar='FMT',
            help='Specify the format for output formulas (using [[CELL-ADDR]], [[INT-FORMULA]], and [[STATUS]])',
        )] = 'CELL:[[CELL-ADDR]], [[STATUS]], [[INT-FORMULA]]',
        extract_formula_format: Param[str, Arg.String(
            '-E', '--extract-format',
            metavar='FMT',
            help='Specify the format for extracted formulas (using [[CELL-ADDR]], [[CELL-FORMULA]], and [[CELL-VALUE]])',
        )] = 'CELL:[[CELL-ADDR]], [[CELL-FORMULA]], [[CELL-VALUE]]',
        no_indent: Param[bool, Arg.Switch(
            '-I', '--no-indent',
            help='Do not show indent before formulas',
        )] = False,
        start_point: Param[str, Arg.String(
            '-c', '--start-point',
            help='Start interpretation from a specific cell address',
            metavar='CELL',
        )] = '',
        output_level: Param[int, Arg.Number(
            '-o',
            '--output-level',
            help=(
                'Set the level of details to be shown (0:all commands, 1: commands no jump 2:important '
                'commands 3:strings in important commands).'
            ),
        )] = 0,
        timeout: Param[int, Arg.Number(
            '-t',
            '--timeout',
            help='Stop emulation after N seconds (0: not interruption N>0: stop emulation after N seconds)',
        )] = 0,
    ):
        extract_only = sort_formulas or extract_only
        self.superinit(super(), **vars())

    @staticmethod
    def _show_cells(excel_doc, sorted_formulas=False):
        from refinery.lib.thirdparty.xlm.model import EvalResult
        macrosheets = excel_doc.get_macrosheets()
        for name in macrosheets:
            sheet = macrosheets[name]
            yield sheet.name, sheet.type
            if sorted_formulas:
                formulas = []
                for _, info in sheet.cells.items():
                    if info.formula is not None:
                        formulas.append((info, 'EXTRACTED', info.formula, '', info.value))
                formulas.sort(key=lambda x: (x[0].column, int(x[0].row) if EvalResult.is_int(x[0].row) else x[0].row))
                yield from formulas
            else:
                for _, info in sheet.cells.items():
                    if info.formula is not None:
                        yield info, 'EXTRACTED', info.formula, '', info.value
            for _, info in sheet.cells.items():
                if info.formula is None:
                    yield info, 'EXTRACTED', str(info.formula), '', info.value

    @staticmethod
    def _format_output(step, format_str: str, with_indent=True):
        cell_addr = step[0].get_local_address()
        status = step[1]
        formula = step[2]
        indent = '\t' * step[3]
        result = format_str
        result = result.replace('[[CELL-ADDR]]', f'{cell_addr:10}')
        result = result.replace('[[STATUS]]', f'{status.name:20}')
        if with_indent:
            formula = indent + formula
        result = result.replace('[[INT-FORMULA]]', formula)
        return result

    def process(self, data: bytearray):
        if data[:2] == B'\xD0\xCF':
            from refinery.lib.thirdparty.xlm.wrappers import XLSWrapper
            excel_doc = XLSWrapper(data)
        elif data[:2] == B'\x50\x4B':
            if b'workbook.bin' in data:
                from refinery.lib.thirdparty.xlm.wrappers import XLSBWrapper
                excel_doc = XLSBWrapper(data)
            else:
                from refinery.lib.thirdparty.xlm.wrappers import XLSMWrapper
                excel_doc = XLSMWrapper(data)
        else:
            raise ValueError('Input file type is not supported (expected XLS, XLSM, or XLSB).')

        if self.args.extract_only:
            lines: list[str] = []
            fmt: str = self.args.extract_formula_format
            for item in self._show_cells(excel_doc, self.args.sort_formulas):
                if len(item) == 2:
                    lines.append(f'SHEET: {item[0]}, {item[1]}')
                elif len(item) == 5:
                    line = fmt
                    line = line.replace('[[CELL-ADDR]]', item[0].get_local_address())
                    line = line.replace('[[CELL-FORMULA]]', item[2])
                    line = line.replace('[[CELL-VALUE]]', str(item[4]))
                    lines.append(line)
        else:
            from refinery.lib.thirdparty.xlm.interpreter import XLMInterpreter
            interpreter = XLMInterpreter(excel_doc, output_level=self.args.output_level)
            if self.args.day > 0:
                interpreter.day_of_month = self.args.day

            fmt = self.args.output_formula_format
            with_indent = not self.args.no_indent
            lines = []
            for step in interpreter.deobfuscate_macro(
                interactive=False,
                start_point=self.args.start_point,
                timeout=self.args.timeout,
                silent_mode=True,
            ):
                lines.append(self._format_output(step, fmt, with_indent))

        return '\n'.join(lines).encode(self.codec)

Ancestors

Subclasses

Class variables

var reverse

The type of the None singleton.

Inherited members