Module refinery.units.formats.archive.xtiss

Expand source code Browse git
from __future__ import annotations

import zlib

from itertools import cycle, islice

from refinery.lib.id import buffer_contains
from refinery.lib.structures import StructReader
from refinery.units.formats.archive import ArchiveUnit


class ISSReader(StructReader[bytearray]):

    MAGIC = {
        b'InstallShield\0': 1,
        b'ISSetupStream\0': 2,
    }

    def iss_archive_header(self):
        signature = bytes(self.read(14))
        try:
            self.__version = self.MAGIC[signature]
        except KeyError:
            raise ValueError('invalid signature for ISS archive')
        file_count = self.u16()
        self.seekrel(0x04)
        self.seekrel(0x08)
        self.seekrel(0x02)
        self.seekrel(0x10)
        return file_count

    def iss_file_header(self):
        if self.__version == 1:
            name = self.read(260).rstrip(B'\0').decode('utf8')
            flags = self.u32()
            self.seekrel(4)
            size = self.u32()
            self.seekrel(8)
            is_unicode = self.u16()
            self.seekrel(30)
        else:
            name_length = self.u32()
            flags = self.u32()
            self.seekrel(2)
            size = self.u32()
            self.seekrel(8)
            is_unicode = self.u16()
            name = self.read(name_length).decode('utf-16le')
        return name, size, flags, is_unicode

    def iss_file(self):
        name, size, flags, is_unicode = self.iss_file_header()

        def _data(
            data: bytearray = self.read(size),
            seed: bytes = name.encode('utf8'),
            _is4: bool = flags & 4 == 4,
            _isu: bool = is_unicode
        ):
            key = bytes(x ^ k for x, k in zip(seed, cycle(B'\x13\x35\x86\x07')))
            if _is4:
                key = bytes(islice(cycle(key), 0, 1024))
            for (i, b), k in zip(enumerate(data), cycle(key)):
                data[i] = ~(k ^ (b << 4 | b >> 4)) & 0xFF
            if _isu:
                data = zlib.decompress(data)
            return data
        return name, _data


class xtiss(ArchiveUnit, docs='{0}{p}{PathExtractorUnit}'):
    """
    Extract files from Install Shield Setup (ISS) files.
    """
    def unpack(self, data: bytearray):
        offset = max(data.rfind(magic) for magic in ISSReader.MAGIC)
        if offset < 0:
            raise ValueError('ISS magic not found.')
        data[:offset] = []

        reader = ISSReader(data)
        count = reader.iss_archive_header()

        self.log_info(F'archive contains {count} files according to header')

        for _ in range(count):
            name, data = reader.iss_file()
            yield self._pack(name, None, data)

    @classmethod
    def handles(cls, data) -> bool | None:
        if data[:2] != B'MZ':
            return False
        return any(buffer_contains(data, m) for m in ISSReader.MAGIC)

Classes

class ISSReader (data, bigendian=None)

An extension of a MemoryFile which provides methods to read structured data.

Expand source code Browse git
class ISSReader(StructReader[bytearray]):

    MAGIC = {
        b'InstallShield\0': 1,
        b'ISSetupStream\0': 2,
    }

    def iss_archive_header(self):
        signature = bytes(self.read(14))
        try:
            self.__version = self.MAGIC[signature]
        except KeyError:
            raise ValueError('invalid signature for ISS archive')
        file_count = self.u16()
        self.seekrel(0x04)
        self.seekrel(0x08)
        self.seekrel(0x02)
        self.seekrel(0x10)
        return file_count

    def iss_file_header(self):
        if self.__version == 1:
            name = self.read(260).rstrip(B'\0').decode('utf8')
            flags = self.u32()
            self.seekrel(4)
            size = self.u32()
            self.seekrel(8)
            is_unicode = self.u16()
            self.seekrel(30)
        else:
            name_length = self.u32()
            flags = self.u32()
            self.seekrel(2)
            size = self.u32()
            self.seekrel(8)
            is_unicode = self.u16()
            name = self.read(name_length).decode('utf-16le')
        return name, size, flags, is_unicode

    def iss_file(self):
        name, size, flags, is_unicode = self.iss_file_header()

        def _data(
            data: bytearray = self.read(size),
            seed: bytes = name.encode('utf8'),
            _is4: bool = flags & 4 == 4,
            _isu: bool = is_unicode
        ):
            key = bytes(x ^ k for x, k in zip(seed, cycle(B'\x13\x35\x86\x07')))
            if _is4:
                key = bytes(islice(cycle(key), 0, 1024))
            for (i, b), k in zip(enumerate(data), cycle(key)):
                data[i] = ~(k ^ (b << 4 | b >> 4)) & 0xFF
            if _isu:
                data = zlib.decompress(data)
            return data
        return name, _data

Ancestors

Class variables

var MAGIC

The type of the None singleton.

Methods

def iss_archive_header(self)
Expand source code Browse git
def iss_archive_header(self):
    signature = bytes(self.read(14))
    try:
        self.__version = self.MAGIC[signature]
    except KeyError:
        raise ValueError('invalid signature for ISS archive')
    file_count = self.u16()
    self.seekrel(0x04)
    self.seekrel(0x08)
    self.seekrel(0x02)
    self.seekrel(0x10)
    return file_count
def iss_file_header(self)
Expand source code Browse git
def iss_file_header(self):
    if self.__version == 1:
        name = self.read(260).rstrip(B'\0').decode('utf8')
        flags = self.u32()
        self.seekrel(4)
        size = self.u32()
        self.seekrel(8)
        is_unicode = self.u16()
        self.seekrel(30)
    else:
        name_length = self.u32()
        flags = self.u32()
        self.seekrel(2)
        size = self.u32()
        self.seekrel(8)
        is_unicode = self.u16()
        name = self.read(name_length).decode('utf-16le')
    return name, size, flags, is_unicode
def iss_file(self)
Expand source code Browse git
def iss_file(self):
    name, size, flags, is_unicode = self.iss_file_header()

    def _data(
        data: bytearray = self.read(size),
        seed: bytes = name.encode('utf8'),
        _is4: bool = flags & 4 == 4,
        _isu: bool = is_unicode
    ):
        key = bytes(x ^ k for x, k in zip(seed, cycle(B'\x13\x35\x86\x07')))
        if _is4:
            key = bytes(islice(cycle(key), 0, 1024))
        for (i, b), k in zip(enumerate(data), cycle(key)):
            data[i] = ~(k ^ (b << 4 | b >> 4)) & 0xFF
        if _isu:
            data = zlib.decompress(data)
        return data
    return name, _data

Inherited members

class xtiss (*paths, list=False, join_path=False, drop_path=False, fuzzy=0, exact=False, regex=False, path=b'path', exclude=None, date=b'date', pwd=b'')

Extract files from Install Shield Setup (ISS) files.

This unit extracts items with an associated virtual path from a container; each extracted item is emitted as a separate chunk with a corresponding meta variable named "path".

Positional arguments to xtiss are patterns to filter the extracted items. Use the -x flag to add an exclusion pattern. To extract all files with a foo or bar extension, but none that has the word "temp" in its path:

xtiss .foo .bar -x temp

To view only the paths of all chunks, use the listing switch:

emit data | ... | xtiss -l

Otherwise, extracted items are written to the standard output port and usually require a frame to properly process. In order to dump all extracted data to disk, the following pipeline can be used:

emit data | ... | xtiss [| dump extracted/{path} ]

The value {path} is a placeholder which is substituted by the virtual path of the extracted item. When using xtiss to unpack a file on disk, the following pattern can be useful:

ef pack.bin [| xtiss -j | d2p ]

The unit ef is also a path extractor. By specifying -j (or --join), the paths of extracted items are combined. Here, d2p is a shortcut for dump {path}. It deconflicts the joined paths with the local file system: If pack.bin contains items one.txt and two.txt, the following local file tree would be the result:

pack.bin
pack/one.txt
pack/two.txt

Finally, the -d (or --drop) switch can be used to not create (or alter) the path metadata at all, which is useful in cases where path metadata from a previous unit should be preserved.

Expand source code Browse git
class xtiss(ArchiveUnit, docs='{0}{p}{PathExtractorUnit}'):
    """
    Extract files from Install Shield Setup (ISS) files.
    """
    def unpack(self, data: bytearray):
        offset = max(data.rfind(magic) for magic in ISSReader.MAGIC)
        if offset < 0:
            raise ValueError('ISS magic not found.')
        data[:offset] = []

        reader = ISSReader(data)
        count = reader.iss_archive_header()

        self.log_info(F'archive contains {count} files according to header')

        for _ in range(count):
            name, data = reader.iss_file()
            yield self._pack(name, None, data)

    @classmethod
    def handles(cls, data) -> bool | None:
        if data[:2] != B'MZ':
            return False
        return any(buffer_contains(data, m) for m in ISSReader.MAGIC)

Ancestors

Subclasses

Class variables

var reverse

The type of the None singleton.

Methods

def unpack(self, data)
Expand source code Browse git
def unpack(self, data: bytearray):
    offset = max(data.rfind(magic) for magic in ISSReader.MAGIC)
    if offset < 0:
        raise ValueError('ISS magic not found.')
    data[:offset] = []

    reader = ISSReader(data)
    count = reader.iss_archive_header()

    self.log_info(F'archive contains {count} files according to header')

    for _ in range(count):
        name, data = reader.iss_file()
        yield self._pack(name, None, data)

Inherited members