Module refinery.units.formats.office.vbastr

Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from typing import Union

from refinery.lib.tools import isbuffer
from refinery.units.formats import PathExtractorUnit, UnpackResult


def _txt(value: Union[bytes, str]):
    if value is None:
        return None
    if not isinstance(value, str):
        value = value.decode(vbastr.codec)
    return value


def _bin(value):
    if value is None:
        return None
    if not isbuffer(value):
        if not isinstance(value, str):
            value = str(value)
        value = value.encode(vbastr.codec)
    return value


class vbastr(PathExtractorUnit):
    """
    Extract VBA macro variables from Office documents. The items are extracted in a directory
    hierarchy that specifies their corresponding OLE stream. The stem of their file name is the
    same as the variable's name. The variable can define a caption, a control tip text, and a
    value; the unit extracts these with the synthesized file extension "cap", "tip", and "val",
    respectively.
    """
    @PathExtractorUnit.Requires('oletools', 'formats', 'office')
    def _olevba():
        from oletools import olevba
        return olevba

    def unpack(self, value):
        try:
            parser = self._olevba.VBA_Parser('.', data=bytes(value), relaxed=True)
        except self._olevba.FileOpenError:
            raise ValueError('Input data not recognized by VBA parser')
        try:
            for path, name, vars in parser.extract_form_strings_extended():
                if not vars:
                    continue
                name = _txt(vars['name'])
                for ext, key in {
                    'cap': 'caption',
                    'tip': 'control_tip_text',
                    'val': 'value',
                }.items():
                    value = _bin(vars.get(key))
                    if not value:
                        continue
                    yield UnpackResult(F'{path!s}/{name!s}/{name}.{ext}', value)
        except self._olevba.oleform.OleFormParsingError as error:
            from collections import Counter
            self.log_debug(str(error))
            self.log_info('extended form extraction failed with error; falling back to simple method')
            form_strings = list(parser.extract_form_strings())
            name_counter = Counter(name for _, name, _ in form_strings)
            dedup = Counter()
            for path, name, string in form_strings:
                if string is None:
                    continue
                if name_counter[name] > 1:
                    dedup[name] += 1
                    name = F'{name!s}.v{dedup[name]}'
                yield UnpackResult(F'{path!s}/{name!s}.val', _bin(string))

Classes

class vbastr (*paths, list=False, join_path=False, drop_path=False, fuzzy=0, exact=False, regex=False, path=b'path')

Extract VBA macro variables from Office documents. The items are extracted in a directory hierarchy that specifies their corresponding OLE stream. The stem of their file name is the same as the variable's name. The variable can define a caption, a control tip text, and a value; the unit extracts these with the synthesized file extension "cap", "tip", and "val", respectively.

Expand source code Browse git
class vbastr(PathExtractorUnit):
    """
    Extract VBA macro variables from Office documents. The items are extracted in a directory
    hierarchy that specifies their corresponding OLE stream. The stem of their file name is the
    same as the variable's name. The variable can define a caption, a control tip text, and a
    value; the unit extracts these with the synthesized file extension "cap", "tip", and "val",
    respectively.
    """
    @PathExtractorUnit.Requires('oletools', 'formats', 'office')
    def _olevba():
        from oletools import olevba
        return olevba

    def unpack(self, value):
        try:
            parser = self._olevba.VBA_Parser('.', data=bytes(value), relaxed=True)
        except self._olevba.FileOpenError:
            raise ValueError('Input data not recognized by VBA parser')
        try:
            for path, name, vars in parser.extract_form_strings_extended():
                if not vars:
                    continue
                name = _txt(vars['name'])
                for ext, key in {
                    'cap': 'caption',
                    'tip': 'control_tip_text',
                    'val': 'value',
                }.items():
                    value = _bin(vars.get(key))
                    if not value:
                        continue
                    yield UnpackResult(F'{path!s}/{name!s}/{name}.{ext}', value)
        except self._olevba.oleform.OleFormParsingError as error:
            from collections import Counter
            self.log_debug(str(error))
            self.log_info('extended form extraction failed with error; falling back to simple method')
            form_strings = list(parser.extract_form_strings())
            name_counter = Counter(name for _, name, _ in form_strings)
            dedup = Counter()
            for path, name, string in form_strings:
                if string is None:
                    continue
                if name_counter[name] > 1:
                    dedup[name] += 1
                    name = F'{name!s}.v{dedup[name]}'
                yield UnpackResult(F'{path!s}/{name!s}.val', _bin(string))

Ancestors

Class variables

var required_dependencies
var optional_dependencies

Methods

def unpack(self, value)
Expand source code Browse git
def unpack(self, value):
    try:
        parser = self._olevba.VBA_Parser('.', data=bytes(value), relaxed=True)
    except self._olevba.FileOpenError:
        raise ValueError('Input data not recognized by VBA parser')
    try:
        for path, name, vars in parser.extract_form_strings_extended():
            if not vars:
                continue
            name = _txt(vars['name'])
            for ext, key in {
                'cap': 'caption',
                'tip': 'control_tip_text',
                'val': 'value',
            }.items():
                value = _bin(vars.get(key))
                if not value:
                    continue
                yield UnpackResult(F'{path!s}/{name!s}/{name}.{ext}', value)
    except self._olevba.oleform.OleFormParsingError as error:
        from collections import Counter
        self.log_debug(str(error))
        self.log_info('extended form extraction failed with error; falling back to simple method')
        form_strings = list(parser.extract_form_strings())
        name_counter = Counter(name for _, name, _ in form_strings)
        dedup = Counter()
        for path, name, string in form_strings:
            if string is None:
                continue
            if name_counter[name] > 1:
                dedup[name] += 1
                name = F'{name!s}.v{dedup[name]}'
            yield UnpackResult(F'{path!s}/{name!s}.val', _bin(string))

Inherited members