Module refinery.units.formats.archive.xtcab

Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from refinery.units.formats.archive import ArchiveUnit
from refinery.units import Chunk
from refinery.lib.cab import Cabinet, CabDisk


class xtcab(ArchiveUnit, docs='{0}{p}{PathExtractorUnit}'):
    """
    Extract files from CAB (cabinet) archives. Multi-volume archives can be extracted if all
    required disks are present as chunks within the current frame.
    """
    def unpack(self, data: Chunk):
        arc: Cabinet = data.temp
        arc.check()
        arc.process()
        one = len(arc.files) == 1
        self.log_info(F'processing CAB with {len(arc)} disks')
        for id, files in arc.files.items():
            for file in files:
                path = file.name
                if not one:
                    path = F'CAB{id:04X}/{path}'
                yield self._pack(path, file.timestamp, lambda f=file: f.decompress())

    def filter(self, inputs):
        box = None
        cab = Cabinet()
        for chunk in inputs:
            if box is None:
                box = chunk
                box.temp = cab
            if cab.needs_more_disks():
                cab.append(chunk)
            else:
                yield box
                box = chunk
                cab = box.temp = Cabinet()
        if box:
            yield box

    @classmethod
    def handles(cls, data: bytearray):
        return data.startswith(CabDisk.MAGIC)

Classes

class xtcab (*paths, list=False, join_path=False, drop_path=False, fuzzy=0, exact=False, regex=False, path=b'path', date=b'date', pwd=b'')

Extract files from CAB (cabinet) archives. Multi-volume archives can be extracted if all required disks are present as chunks within the current frame.

This unit is a path extractor which extracts data from a hierarchical structure. Each extracted item is emitted as a separate chunk and has attached to it a meta variable that contains its path within the source structure. The positional arguments to the command are patterns that can be used to filter the extracted items by their path. To view only the paths of all chunks, use the listing switch:

emit something | xtcab --list

Otherwise, extracted items are written to the standard output port and usually require a frame to properly process. In order to dump all extracted data to disk, the following pipeline can be used:

emit something | xtcab [| dump {path} ]
Expand source code Browse git
class xtcab(ArchiveUnit, docs='{0}{p}{PathExtractorUnit}'):
    """
    Extract files from CAB (cabinet) archives. Multi-volume archives can be extracted if all
    required disks are present as chunks within the current frame.
    """
    def unpack(self, data: Chunk):
        arc: Cabinet = data.temp
        arc.check()
        arc.process()
        one = len(arc.files) == 1
        self.log_info(F'processing CAB with {len(arc)} disks')
        for id, files in arc.files.items():
            for file in files:
                path = file.name
                if not one:
                    path = F'CAB{id:04X}/{path}'
                yield self._pack(path, file.timestamp, lambda f=file: f.decompress())

    def filter(self, inputs):
        box = None
        cab = Cabinet()
        for chunk in inputs:
            if box is None:
                box = chunk
                box.temp = cab
            if cab.needs_more_disks():
                cab.append(chunk)
            else:
                yield box
                box = chunk
                cab = box.temp = Cabinet()
        if box:
            yield box

    @classmethod
    def handles(cls, data: bytearray):
        return data.startswith(CabDisk.MAGIC)

Ancestors

Class variables

var required_dependencies
var optional_dependencies

Methods

def unpack(self, data)
Expand source code Browse git
def unpack(self, data: Chunk):
    arc: Cabinet = data.temp
    arc.check()
    arc.process()
    one = len(arc.files) == 1
    self.log_info(F'processing CAB with {len(arc)} disks')
    for id, files in arc.files.items():
        for file in files:
            path = file.name
            if not one:
                path = F'CAB{id:04X}/{path}'
            yield self._pack(path, file.timestamp, lambda f=file: f.decompress())

Inherited members