Module refinery.units.formats.office.xtrtf
Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from typing import TYPE_CHECKING
from refinery.units.formats import PathExtractorUnit, UnpackResult
if TYPE_CHECKING:
from oletools.rtfobj import RtfObject
class xtrtf(PathExtractorUnit):
"""
Extract embedded objects in RTF documents.
"""
@PathExtractorUnit.Requires('oletools', 'formats', 'office', 'extended')
def _oletools():
import oletools
import oletools.rtfobj
import oletools.oleobj
return oletools
def unpack(self, data):
parser = self._oletools.rtfobj.RtfObjParser(data)
parser.parse()
width = len(str(len(parser.objects)))
for k, item in enumerate(parser.objects):
item: RtfObject
path = item.filename or F'carve{k:0{width}}.bin'
data = item.rawdata
meta = {}
if item.is_ole:
if item.format_id == self._oletools.oleobj.OleObject.TYPE_EMBEDDED:
meta['ole_type'] = 'EMBEDDED'
elif item.format_id == self._oletools.oleobj.OleObject.TYPE_LINKED:
meta['ole_type'] = 'LINKED'
if item.is_package:
meta['src_path'] = item.src_path
meta['tmp_path'] = item.temp_path
if item.clsid is not None:
meta['ole_info'] = item.clsid_desc
meta['ole_guid'] = item.clsid
meta['ole_name'] = item.class_name
if item.oledata:
data = item.oledata
pos = item.rawdata.find(data)
if pos > 0:
meta['raw_header'] = item.rawdata[:pos]
if item.olepkgdata:
data = item.olepkgdata
pos = item.oledata.find(data)
if pos >= 0:
meta['ole_header'] = item.oledata[:pos]
yield UnpackResult(path, data, **meta)
@classmethod
def handles(self, data: bytearray) -> bool:
return data[:500].lower().lstrip().startswith(b'{\\rtf')
Classes
class xtrtf (*paths, list=False, join_path=False, drop_path=False, fuzzy=0, exact=False, regex=False, path=b'path')
-
Extract embedded objects in RTF documents.
Expand source code Browse git
class xtrtf(PathExtractorUnit): """ Extract embedded objects in RTF documents. """ @PathExtractorUnit.Requires('oletools', 'formats', 'office', 'extended') def _oletools(): import oletools import oletools.rtfobj import oletools.oleobj return oletools def unpack(self, data): parser = self._oletools.rtfobj.RtfObjParser(data) parser.parse() width = len(str(len(parser.objects))) for k, item in enumerate(parser.objects): item: RtfObject path = item.filename or F'carve{k:0{width}}.bin' data = item.rawdata meta = {} if item.is_ole: if item.format_id == self._oletools.oleobj.OleObject.TYPE_EMBEDDED: meta['ole_type'] = 'EMBEDDED' elif item.format_id == self._oletools.oleobj.OleObject.TYPE_LINKED: meta['ole_type'] = 'LINKED' if item.is_package: meta['src_path'] = item.src_path meta['tmp_path'] = item.temp_path if item.clsid is not None: meta['ole_info'] = item.clsid_desc meta['ole_guid'] = item.clsid meta['ole_name'] = item.class_name if item.oledata: data = item.oledata pos = item.rawdata.find(data) if pos > 0: meta['raw_header'] = item.rawdata[:pos] if item.olepkgdata: data = item.olepkgdata pos = item.oledata.find(data) if pos >= 0: meta['ole_header'] = item.oledata[:pos] yield UnpackResult(path, data, **meta) @classmethod def handles(self, data: bytearray) -> bool: return data[:500].lower().lstrip().startswith(b'{\\rtf')
Ancestors
Class variables
var required_dependencies
var optional_dependencies
Methods
def unpack(self, data)
-
Expand source code Browse git
def unpack(self, data): parser = self._oletools.rtfobj.RtfObjParser(data) parser.parse() width = len(str(len(parser.objects))) for k, item in enumerate(parser.objects): item: RtfObject path = item.filename or F'carve{k:0{width}}.bin' data = item.rawdata meta = {} if item.is_ole: if item.format_id == self._oletools.oleobj.OleObject.TYPE_EMBEDDED: meta['ole_type'] = 'EMBEDDED' elif item.format_id == self._oletools.oleobj.OleObject.TYPE_LINKED: meta['ole_type'] = 'LINKED' if item.is_package: meta['src_path'] = item.src_path meta['tmp_path'] = item.temp_path if item.clsid is not None: meta['ole_info'] = item.clsid_desc meta['ole_guid'] = item.clsid meta['ole_name'] = item.class_name if item.oledata: data = item.oledata pos = item.rawdata.find(data) if pos > 0: meta['raw_header'] = item.rawdata[:pos] if item.olepkgdata: data = item.olepkgdata pos = item.oledata.find(data) if pos >= 0: meta['ole_header'] = item.oledata[:pos] yield UnpackResult(path, data, **meta)
Inherited members