Module refinery.units.formats.office.vbastr
Expand source code Browse git
from __future__ import annotations
import codecs
from refinery.lib.ole.forms import OleFormParsingError
from refinery.lib.ole.vba import FileOpenError, VBAParser
from refinery.lib.types import isbuffer
from refinery.units.formats import PathExtractorUnit, UnpackResult
def _txt(value: bytes | str):
if value is None:
return None
if not isinstance(value, str):
value = codecs.decode(value, vbastr.codec)
return value
def _bin(value):
if value is None:
return None
if not isbuffer(value):
if not isinstance(value, str):
value = str(value)
value = value.encode(vbastr.codec)
return value
class vbastr(PathExtractorUnit):
"""
Extract VBA macro variables from Office documents.
The items are extracted in a directory hierarchy that specifies their corresponding OLE
stream. The stem of their file name is the same as the variable's name. The variable
can define a caption, a control tip text, and a value; the unit extracts these with the
synthesized file extension "cap", "tip", and "val", respectively.
"""
@classmethod
def handles(cls, data) -> bool:
return data[:4] == B'\xD0\xCF\x11\xE0'
def unpack(self, value):
try:
parser = VBAParser(bytes(value))
except FileOpenError:
raise ValueError('Input data not recognized by VBA parser')
try:
for fv in parser.extract_form_strings_extended():
if not fv.variable:
continue
name = _txt(fv.variable['name'])
for ext, key in {
'cap': 'caption',
'tip': 'control_tip_text',
'val': 'value',
}.items():
value = _bin(fv.variable.get(key))
if not value:
continue
yield UnpackResult(F'{fv.filename!s}/{name!s}/{name}.{ext}', value)
except OleFormParsingError as error:
from collections import Counter
self.log_debug(str(error))
self.log_info('extended form extraction failed with error; falling back to simple method')
form_strings = list(parser.extract_form_strings())
name_counter = Counter(fs.stream_path for fs in form_strings)
dedup = Counter()
for fs in form_strings:
if fs.value is None:
continue
if name_counter[fs.stream_path] > 1:
dedup[fs.stream_path] += 1
stream_name = F'{fs.stream_path!s}.v{dedup[fs.stream_path]}'
else:
stream_name = fs.stream_path
yield UnpackResult(F'{fs.filename!s}/{stream_name!s}.val', _bin(fs.value))
Classes
class vbastr (*paths, list=False, join_path=False, drop_path=False, fuzzy=0, exact=False, regex=False, path=b'path')-
Extract VBA macro variables from Office documents.
The items are extracted in a directory hierarchy that specifies their corresponding OLE stream. The stem of their file name is the same as the variable's name. The variable can define a caption, a control tip text, and a value; the unit extracts these with the synthesized file extension "cap", "tip", and "val", respectively.
Expand source code Browse git
class vbastr(PathExtractorUnit): """ Extract VBA macro variables from Office documents. The items are extracted in a directory hierarchy that specifies their corresponding OLE stream. The stem of their file name is the same as the variable's name. The variable can define a caption, a control tip text, and a value; the unit extracts these with the synthesized file extension "cap", "tip", and "val", respectively. """ @classmethod def handles(cls, data) -> bool: return data[:4] == B'\xD0\xCF\x11\xE0' def unpack(self, value): try: parser = VBAParser(bytes(value)) except FileOpenError: raise ValueError('Input data not recognized by VBA parser') try: for fv in parser.extract_form_strings_extended(): if not fv.variable: continue name = _txt(fv.variable['name']) for ext, key in { 'cap': 'caption', 'tip': 'control_tip_text', 'val': 'value', }.items(): value = _bin(fv.variable.get(key)) if not value: continue yield UnpackResult(F'{fv.filename!s}/{name!s}/{name}.{ext}', value) except OleFormParsingError as error: from collections import Counter self.log_debug(str(error)) self.log_info('extended form extraction failed with error; falling back to simple method') form_strings = list(parser.extract_form_strings()) name_counter = Counter(fs.stream_path for fs in form_strings) dedup = Counter() for fs in form_strings: if fs.value is None: continue if name_counter[fs.stream_path] > 1: dedup[fs.stream_path] += 1 stream_name = F'{fs.stream_path!s}.v{dedup[fs.stream_path]}' else: stream_name = fs.stream_path yield UnpackResult(F'{fs.filename!s}/{stream_name!s}.val', _bin(fs.value))Ancestors
Subclasses
Class variables
var reverse-
The type of the None singleton.
Methods
def unpack(self, value)-
Expand source code Browse git
def unpack(self, value): try: parser = VBAParser(bytes(value)) except FileOpenError: raise ValueError('Input data not recognized by VBA parser') try: for fv in parser.extract_form_strings_extended(): if not fv.variable: continue name = _txt(fv.variable['name']) for ext, key in { 'cap': 'caption', 'tip': 'control_tip_text', 'val': 'value', }.items(): value = _bin(fv.variable.get(key)) if not value: continue yield UnpackResult(F'{fv.filename!s}/{name!s}/{name}.{ext}', value) except OleFormParsingError as error: from collections import Counter self.log_debug(str(error)) self.log_info('extended form extraction failed with error; falling back to simple method') form_strings = list(parser.extract_form_strings()) name_counter = Counter(fs.stream_path for fs in form_strings) dedup = Counter() for fs in form_strings: if fs.value is None: continue if name_counter[fs.stream_path] > 1: dedup[fs.stream_path] += 1 stream_name = F'{fs.stream_path!s}.v{dedup[fs.stream_path]}' else: stream_name = fs.stream_path yield UnpackResult(F'{fs.filename!s}/{stream_name!s}.val', _bin(fs.value))
Inherited members
PathExtractorUnit:CustomJoinBehaviourCustomPathSeparatorFilterEverythingRequiresactassemblecodecconsolefilterfinishhandlesis_quietis_reversibleisattylabelledleniencylog_alwayslog_debuglog_detachlog_faillog_infolog_levellog_warnloggernamenozzleoptional_dependenciesreadread1required_dependenciesresetrunsourcesuperinit
UnitBase: