Module refinery.units.formats.archive.xtnsis

Expand source code Browse git
from __future__ import annotations

import itertools
import re

from refinery.lib.id import buffer_contains
from refinery.lib.nsis.archive import NSArchive
from refinery.units.formats.archive import ArchiveUnit


class xtnsis(ArchiveUnit, docs='{0}{p}{PathExtractorUnit}'):
    """
    Extract files from NSIS archives. Nullsoft Scriptable Install System is a Windows installer
    framework often used for software distribution.
    """

    @classmethod
    def _find_archive_offset(cls, data: bytearray, before: int = -1, flawmax=2):
        def signatures(*magics):
            for changes in range(flawmax + 1):
                for magic in magics:
                    if not changes:
                        yield 0, magic
                        continue
                    for positions in itertools.permutations(range(len(magic)), r=changes):
                        signature = bytearray(magic)
                        for p in positions:
                            signature[p] = 0x2E
                        yield changes, bytes(signature)
        best_guess = None
        search_space = memoryview(data)
        for flaws, sig in signatures(*NSArchive.MAGICS):
            if flaws > 1:
                search_space = search_space[:0x20_000]
            matches = [m.start() - 4 for m in re.finditer(sig, search_space, flags=re.DOTALL)]
            if before >= 0:
                matches = [match for match in matches if match < before]
            matches.reverse()
            archive = None
            for match in matches:
                if match % 0x200 == 0:
                    archive = match
                    break
            if not archive:
                if matches and not best_guess:
                    best_guess = matches[-1]
            else:
                msg = F'Archive signature was found at offset 0x{archive:X}'
                if flaws > 0:
                    msg = F'{msg}; it has {flaws} imperfections and was likely modified'
                cls.log_info(F'{msg}.')
                return archive
        if best_guess:
            cls.log_info(F'A signature was found at offset 0x{best_guess:08X}; it is not properly aligned.')
            return best_guess
        return None

    def unpack(self, data):
        memory = memoryview(data)
        before = -1
        _error = None
        while True:
            offset = self._find_archive_offset(data, before)
            if offset is None:
                _error = _error or ValueError('Unable to find an NSIS archive marker.')
                raise _error
            try:
                arc = NSArchive.Parse(memory[offset:], log=self.log_debug)
            except Exception as e:
                _error = e
                before = offset
            else:
                break

        def info():
            yield F'{arc.header.type.name} archive'
            yield F'compression type {arc.method.value}'
            yield F'mystery value 0x{arc.header.unknown_value:X}'
            yield 'solid archive' if arc.solid else 'fragmented archive'
            yield '64-bit header' if arc.header.is64bit else '32-bit header'
            yield 'unicode' if arc.header.unicode else 'ascii'

        self.log_info(', '.join(info()))

        for item in arc.header.items:
            path = item.path or F'item-0x{item.offset:X}'
            yield self._pack(path, item.mtime, lambda i=item: arc._extract_item(i).data)

        yield self._pack('setup.bin', None, arc.header_data)
        yield self._pack('setup.nsis', None, arc.script.encode(self.codec))

    @classmethod
    def handles(cls, data) -> bool:
        return any(buffer_contains(data, magic) for magic in NSArchive.MAGICS)

Classes

class xtnsis (*paths, list=False, join_path=False, drop_path=False, fuzzy=0, exact=False, regex=False, path=b'path', exclude=None, date=b'date', pwd=b'')

Extract files from NSIS archives. Nullsoft Scriptable Install System is a Windows installer framework often used for software distribution.

This unit extracts items with an associated virtual path from a container; each extracted item is emitted as a separate chunk with a corresponding meta variable named "path".

Positional arguments to xtnsis are patterns to filter the extracted items. Use the -x flag to add an exclusion pattern. To extract all files with a foo or bar extension, but none that has the word "temp" in its path:

xtnsis .foo .bar -x temp

To view only the paths of all chunks, use the listing switch:

emit data | ... | xtnsis -l

Otherwise, extracted items are written to the standard output port and usually require a frame to properly process. In order to dump all extracted data to disk, the following pipeline can be used:

emit data | ... | xtnsis [| dump extracted/{path} ]

The value {path} is a placeholder which is substituted by the virtual path of the extracted item. When using xtnsis to unpack a file on disk, the following pattern can be useful:

ef pack.bin [| xtnsis -j | d2p ]

The unit ef is also a path extractor. By specifying -j (or --join), the paths of extracted items are combined. Here, d2p is a shortcut for dump {path}. It deconflicts the joined paths with the local file system: If pack.bin contains items one.txt and two.txt, the following local file tree would be the result:

pack.bin
pack/one.txt
pack/two.txt

Finally, the -d (or --drop) switch can be used to not create (or alter) the path metadata at all, which is useful in cases where path metadata from a previous unit should be preserved.

Expand source code Browse git
class xtnsis(ArchiveUnit, docs='{0}{p}{PathExtractorUnit}'):
    """
    Extract files from NSIS archives. Nullsoft Scriptable Install System is a Windows installer
    framework often used for software distribution.
    """

    @classmethod
    def _find_archive_offset(cls, data: bytearray, before: int = -1, flawmax=2):
        def signatures(*magics):
            for changes in range(flawmax + 1):
                for magic in magics:
                    if not changes:
                        yield 0, magic
                        continue
                    for positions in itertools.permutations(range(len(magic)), r=changes):
                        signature = bytearray(magic)
                        for p in positions:
                            signature[p] = 0x2E
                        yield changes, bytes(signature)
        best_guess = None
        search_space = memoryview(data)
        for flaws, sig in signatures(*NSArchive.MAGICS):
            if flaws > 1:
                search_space = search_space[:0x20_000]
            matches = [m.start() - 4 for m in re.finditer(sig, search_space, flags=re.DOTALL)]
            if before >= 0:
                matches = [match for match in matches if match < before]
            matches.reverse()
            archive = None
            for match in matches:
                if match % 0x200 == 0:
                    archive = match
                    break
            if not archive:
                if matches and not best_guess:
                    best_guess = matches[-1]
            else:
                msg = F'Archive signature was found at offset 0x{archive:X}'
                if flaws > 0:
                    msg = F'{msg}; it has {flaws} imperfections and was likely modified'
                cls.log_info(F'{msg}.')
                return archive
        if best_guess:
            cls.log_info(F'A signature was found at offset 0x{best_guess:08X}; it is not properly aligned.')
            return best_guess
        return None

    def unpack(self, data):
        memory = memoryview(data)
        before = -1
        _error = None
        while True:
            offset = self._find_archive_offset(data, before)
            if offset is None:
                _error = _error or ValueError('Unable to find an NSIS archive marker.')
                raise _error
            try:
                arc = NSArchive.Parse(memory[offset:], log=self.log_debug)
            except Exception as e:
                _error = e
                before = offset
            else:
                break

        def info():
            yield F'{arc.header.type.name} archive'
            yield F'compression type {arc.method.value}'
            yield F'mystery value 0x{arc.header.unknown_value:X}'
            yield 'solid archive' if arc.solid else 'fragmented archive'
            yield '64-bit header' if arc.header.is64bit else '32-bit header'
            yield 'unicode' if arc.header.unicode else 'ascii'

        self.log_info(', '.join(info()))

        for item in arc.header.items:
            path = item.path or F'item-0x{item.offset:X}'
            yield self._pack(path, item.mtime, lambda i=item: arc._extract_item(i).data)

        yield self._pack('setup.bin', None, arc.header_data)
        yield self._pack('setup.nsis', None, arc.script.encode(self.codec))

    @classmethod
    def handles(cls, data) -> bool:
        return any(buffer_contains(data, magic) for magic in NSArchive.MAGICS)

Ancestors

Subclasses

Class variables

var reverse

The type of the None singleton.

Methods

def unpack(self, data)
Expand source code Browse git
def unpack(self, data):
    memory = memoryview(data)
    before = -1
    _error = None
    while True:
        offset = self._find_archive_offset(data, before)
        if offset is None:
            _error = _error or ValueError('Unable to find an NSIS archive marker.')
            raise _error
        try:
            arc = NSArchive.Parse(memory[offset:], log=self.log_debug)
        except Exception as e:
            _error = e
            before = offset
        else:
            break

    def info():
        yield F'{arc.header.type.name} archive'
        yield F'compression type {arc.method.value}'
        yield F'mystery value 0x{arc.header.unknown_value:X}'
        yield 'solid archive' if arc.solid else 'fragmented archive'
        yield '64-bit header' if arc.header.is64bit else '32-bit header'
        yield 'unicode' if arc.header.unicode else 'ascii'

    self.log_info(', '.join(info()))

    for item in arc.header.items:
        path = item.path or F'item-0x{item.offset:X}'
        yield self._pack(path, item.mtime, lambda i=item: arc._extract_item(i).data)

    yield self._pack('setup.bin', None, arc.header_data)
    yield self._pack('setup.nsis', None, arc.script.encode(self.codec))

Inherited members