Module refinery.units.formats.archive.xtchm
Expand source code Browse git
from __future__ import annotations
from refinery.units.formats import PathExtractorUnit, UnpackResult
from refinery.lib.chm import CHM, ChmHeader
class xtchm(PathExtractorUnit, docs='{0}{p}{PathExtractorUnit}'):
"""
Extract files from CHM (Windows Help) files.
"""
def unpack(self, data):
chm = CHM(memoryview(data))
self.log_info(F'language: {chm.header.language_name}')
self.log_info(F'codepage: {chm.header.codepage}')
for path, record in chm.filesystem.items():
def extract(chm=chm, record=record):
return chm.read(record)
if record.length <= 0:
continue
if path.startswith('::DataSpace'):
continue
yield UnpackResult(path, extract)
@classmethod
def handles(cls, data):
return data[:4] == ChmHeader.Magic
Classes
class xtchm (*paths, list=False, join_path=False, drop_path=False, fuzzy=0, exact=False, regex=False, path=b'path')
-
Extract files from CHM (Windows Help) files.
This unit is a path extractor which extracts data from a hierarchical structure. Each extracted item is emitted as a separate chunk and has attached to it a meta variable that contains its path within the source structure. The positional arguments to the command are patterns that can be used to filter the extracted items by their path. To view only the paths of all chunks, use the listing switch:
emit something | xtchm --list
Otherwise, extracted items are written to the standard output port and usually require a frame to properly process. In order to dump all extracted data to disk, the following pipeline can be used:
emit something | xtchm [| dump {path} ]
Expand source code Browse git
class xtchm(PathExtractorUnit, docs='{0}{p}{PathExtractorUnit}'): """ Extract files from CHM (Windows Help) files. """ def unpack(self, data): chm = CHM(memoryview(data)) self.log_info(F'language: {chm.header.language_name}') self.log_info(F'codepage: {chm.header.codepage}') for path, record in chm.filesystem.items(): def extract(chm=chm, record=record): return chm.read(record) if record.length <= 0: continue if path.startswith('::DataSpace'): continue yield UnpackResult(path, extract) @classmethod def handles(cls, data): return data[:4] == ChmHeader.Magic
Ancestors
Subclasses
Class variables
var required_dependencies
var optional_dependencies
var console
var reverse
Methods
def unpack(self, data)
-
Expand source code Browse git
def unpack(self, data): chm = CHM(memoryview(data)) self.log_info(F'language: {chm.header.language_name}') self.log_info(F'codepage: {chm.header.codepage}') for path, record in chm.filesystem.items(): def extract(chm=chm, record=record): return chm.read(record) if record.length <= 0: continue if path.startswith('::DataSpace'): continue yield UnpackResult(path, extract)
Inherited members