Module refinery.units.formats.swf

Expand source code Browse git
from __future__ import annotations

import codecs
import enum
import lzma
import struct
import zlib

from refinery.lib.id import Fmt, get_media_format
from refinery.lib.structures import Struct, StructReader, StructReaderBits
from refinery.units.formats import PathExtractorUnit, UnpackResult


class SWFCompression(bytes, enum.Enum):
    NONE = b'FWS'
    ZLIB = b'CWS'
    LZMA = b'ZWS'


class SWFTagType(int, enum.Enum):
    End                 = 0   # noqa
    DefineBits          = 6   # noqa
    JPEGTables          = 8   # noqa
    DefineFont          = 10  # noqa
    DoAction            = 12  # noqa
    DefineSound         = 14  # noqa
    DefineBitsLossless  = 20  # noqa
    DefineBitsJPEG2     = 21  # noqa
    DefineBitsJPEG3     = 35  # noqa
    DefineBitsLossless2 = 36  # noqa
    DefineFont2         = 48  # noqa
    DoInitAction        = 59  # noqa
    DefineVideoStream   = 60  # noqa
    VideoFrame          = 61  # noqa
    DoABC               = 72  # noqa
    DefineFont3         = 75  # noqa
    SymbolClass         = 76  # noqa
    DoABC2              = 82  # noqa
    DefineBinaryData    = 87  # noqa
    DefineBitsJPEG4     = 90  # noqa
    DefineFont4         = 91  # noqa


class SWFSoundCodec(int, enum.Enum):
    UncompressedNE = 0   # noqa
    ADPCM          = 1   # noqa
    MP3            = 2   # noqa
    UncompressedLE = 3   # noqa
    Nellymoser16k  = 4   # noqa
    Nellymoser8k   = 5   # noqa
    Nellymoser     = 6   # noqa
    Speex          = 11  # noqa


_SOUND_EXTENSIONS = {
    SWFSoundCodec.UncompressedNE : 'wav',
    SWFSoundCodec.ADPCM          : 'wav',
    SWFSoundCodec.MP3            : 'mp3',
    SWFSoundCodec.UncompressedLE : 'wav',
    SWFSoundCodec.Nellymoser16k  : 'pcm',
    SWFSoundCodec.Nellymoser8k   : 'pcm',
    SWFSoundCodec.Nellymoser     : 'pcm',
    SWFSoundCodec.Speex          : 'spx',
}


def sound_extension(codec: int) -> str:
    try:
        return _SOUND_EXTENSIONS[SWFSoundCodec(codec)]
    except (ValueError, KeyError):
        return 'bin'


class SWFHeader(Struct):
    def __init__(self, reader: StructReader):
        sig = bytes(reader.read_exactly(3))
        try:
            self.compression = SWFCompression(sig)
        except ValueError:
            raise ValueError(F'Invalid SWF signature: {sig!r}')
        self.version = reader.u8()
        self.file_length = reader.u32()


class SWFRect(Struct):
    def __init__(self, reader: StructReaderBits):
        reader.bigendian = True
        nbits = reader.read_integer(5)
        self.xmin = reader.read_integer(nbits, signed=True)
        self.xmax = reader.read_integer(nbits, signed=True)
        self.ymin = reader.read_integer(nbits, signed=True)
        self.ymax = reader.read_integer(nbits, signed=True)
        reader.byte_align()
        reader.bigendian = False


class SWFTag(Struct):
    def __init__(self, reader: StructReader):
        tag_code_and_length = reader.u16()
        type_code = tag_code_and_length >> 6
        length = tag_code_and_length & 0x3F
        if length == 0x3F:
            length = reader.u32()
        try:
            self.type = SWFTagType(type_code)
        except ValueError:
            self.type = type_code
        self.length = length
        self.data = reader.read_exactly(length)


class SWF(Struct):
    def __init__(self, reader: StructReader):
        self.header = SWFHeader(reader)
        rest = reader.read()
        if self.header.compression == SWFCompression.ZLIB:
            rest = zlib.decompress(rest)
        elif self.header.compression == SWFCompression.LZMA:
            compressed_length = int.from_bytes(bytes(rest[:4]), 'little')
            lzma_props = bytes(rest[4:9])
            lzma_data = bytes(rest[9:4 + compressed_length])
            uncompressed_size = self.header.file_length - 8
            header = lzma_props + struct.pack('<Q', uncompressed_size)
            rest = lzma.decompress(header + lzma_data, format=lzma.FORMAT_ALONE)
        body = StructReaderBits(memoryview(bytearray(rest)))
        self.rect = SWFRect(body)
        body.bigendian = False
        self.frame_rate = body.u16()
        self.frame_count = body.u16()
        self.tags: list[SWFTag] = []
        while not body.eof:
            tag = SWFTag(body)
            self.tags.append(tag)
            if isinstance(tag.type, SWFTagType) and tag.type == SWFTagType.End:
                break


def _png_chunk(chunk_type: bytes, data: bytes | bytearray) -> bytearray:
    chunk = bytearray()
    chunk.extend(struct.pack('>I', len(data)))
    chunk.extend(chunk_type)
    chunk.extend(data)
    crc = zlib.crc32(chunk_type)
    crc = zlib.crc32(data, crc)
    chunk.extend(struct.pack('>I', crc & 0xFFFFFFFF))
    return chunk


def reconstruct_jpeg(
    data: bytes | bytearray | memoryview,
    jpeg_tables: bytes | bytearray | memoryview | None = None,
) -> bytearray:
    view = memoryview(data)
    out = bytearray()
    if jpeg_tables is not None:
        tables = memoryview(jpeg_tables)
        tables_body = bytes(tables)
        if tables_body[:2] == b'\xFF\xD8':
            tables_body = tables_body[2:]
        if tables_body[-2:] == b'\xFF\xD9':
            tables_body = tables_body[:-2]
        image_body = bytes(view)
        if image_body[:2] == b'\xFF\xD8':
            image_body = image_body[2:]
        out.extend(b'\xFF\xD8')
        out.extend(tables_body)
        out.extend(image_body)
    else:
        out.extend(bytes(view))
    result = bytearray()
    k = 0
    length = len(out)
    while k < length:
        if k + 3 < length and out[k] == 0xFF and out[k + 1] == 0xD9:
            if out[k + 2] == 0xFF and out[k + 3] == 0xD8:
                k += 4
                continue
        result.append(out[k])
        k += 1
    if result[:2] != b'\xFF\xD8':
        result[0:0] = b'\xFF\xD8'
    if result[-2:] != b'\xFF\xD9':
        result.extend(b'\xFF\xD9')
    return result


def reconstruct_png(
    width: int,
    height: int,
    has_alpha: bool,
    format_code: int,
    color_table_size: int,
    zlib_data: bytes | bytearray | memoryview,
) -> bytearray:
    raw = zlib.decompress(bytes(zlib_data))
    png = bytearray(b'\x89PNG\r\n\x1A\n')
    if format_code == 3:
        bit_depth = 8
        if has_alpha:
            color_type = 3
        else:
            color_type = 3
        ihdr = struct.pack('>IIBBBBB', width, height, bit_depth, color_type, 0, 0, 0)
        png.extend(_png_chunk(b'IHDR', ihdr))
        entry_size = 4 if has_alpha else 3
        table_bytes = color_table_size * entry_size
        palette_data = raw[:table_bytes]
        pixel_data = raw[table_bytes:]
        plte = bytearray()
        trns = bytearray()
        for i in range(color_table_size):
            offset = i * entry_size
            if has_alpha:
                plte.extend(palette_data[offset + 1:offset + 4])
                trns.append(palette_data[offset])
            else:
                plte.extend(palette_data[offset:offset + 3])
        png.extend(_png_chunk(b'PLTE', bytes(plte)))
        if has_alpha:
            png.extend(_png_chunk(b'tRNS', bytes(trns)))
        rows = bytearray()
        stride = width
        for y in range(height):
            rows.append(0)
            row_start = y * stride
            rows.extend(pixel_data[row_start:row_start + stride])
        idat = zlib.compress(bytes(rows))
        png.extend(_png_chunk(b'IDAT', idat))
    elif format_code == 4:
        bit_depth = 8
        color_type = 2
        ihdr = struct.pack('>IIBBBBB', width, height, bit_depth, color_type, 0, 0, 0)
        png.extend(_png_chunk(b'IHDR', ihdr))
        rows = bytearray()
        stride = width * 2
        for y in range(height):
            rows.append(0)
            for x in range(width):
                offset = y * stride + x * 2
                pixel = int.from_bytes(raw[offset:offset + 2], 'little')
                r = ((pixel >> 10) & 0x1F) * 255 // 31
                g = ((pixel >> 5) & 0x1F) * 255 // 31
                b = (pixel & 0x1F) * 255 // 31
                rows.extend((r, g, b))
        idat = zlib.compress(bytes(rows))
        png.extend(_png_chunk(b'IDAT', idat))
    elif format_code == 5:
        if has_alpha:
            bit_depth = 8
            color_type = 6
            ihdr = struct.pack('>IIBBBBB', width, height, bit_depth, color_type, 0, 0, 0)
            png.extend(_png_chunk(b'IHDR', ihdr))
            rows = bytearray()
            stride = width * 4
            for y in range(height):
                rows.append(0)
                for x in range(width):
                    offset = y * stride + x * 4
                    a = raw[offset]
                    r = raw[offset + 1]
                    g = raw[offset + 2]
                    b = raw[offset + 3]
                    if a > 0 and a < 255:
                        r = min(r * 255 // a, 255)
                        g = min(g * 255 // a, 255)
                        b = min(b * 255 // a, 255)
                    rows.extend((r, g, b, a))
            idat = zlib.compress(bytes(rows))
            png.extend(_png_chunk(b'IDAT', idat))
        else:
            bit_depth = 8
            color_type = 2
            ihdr = struct.pack('>IIBBBBB', width, height, bit_depth, color_type, 0, 0, 0)
            png.extend(_png_chunk(b'IHDR', ihdr))
            rows = bytearray()
            stride = width * 4
            for y in range(height):
                rows.append(0)
                for x in range(width):
                    offset = y * stride + x * 4
                    rows.extend(raw[offset + 1:offset + 4])
            idat = zlib.compress(bytes(rows))
            png.extend(_png_chunk(b'IDAT', idat))
    else:
        raise ValueError(F'Unknown lossless bitmap format code: {format_code}')
    png.extend(_png_chunk(b'IEND', b''))
    return png


class swf(PathExtractorUnit):
    """
    Extract content from Shockwave Flash (SWF) files; multimedia data and ActionScript bytecode.
    """

    @classmethod
    def handles(cls, data) -> bool | None:
        return get_media_format(data) == Fmt.SWF

    def unpack(self, data):
        parsed = SWF.Parse(data)
        jpeg_tables: bytes | bytearray | memoryview | None = None
        symbol_names: dict[int, str] = {}
        counters: dict[str, int] = {}

        for tag in parsed.tags:
            if tag.type == SWFTagType.JPEGTables:
                if len(tag.data) > 0:
                    jpeg_tables = tag.data
            elif tag.type == SWFTagType.SymbolClass:
                reader = StructReader(memoryview(tag.data))
                count = reader.u16()
                for _ in range(count):
                    char_id = reader.u16()
                    name = reader.read_c_string()
                    try:
                        name = codecs.decode(name, 'utf-8')
                    except Exception:
                        name = codecs.decode(name, 'latin-1')
                    if name:
                        symbol_names[char_id] = name

        def _unique(base_path: str) -> str:
            count = counters.get(base_path, 0)
            counters[base_path] = count + 1
            if count == 0:
                return base_path
            return F'{base_path}.{count}'

        def _name(char_id: int, fallback: str) -> str:
            name = symbol_names.get(char_id)
            if name is not None:
                name = name.replace('\\', '/').rsplit('/', 1)[-1]
                name = name.replace('\0', '')
                if name:
                    return name
            return fallback

        for tag in parsed.tags:
            tt = tag.type
            td = memoryview(tag.data)

            if tt == SWFTagType.DefineBits:
                reader = StructReader(td)
                char_id = reader.u16()
                image_data = reader.read()
                name = _name(char_id, F'{char_id}')
                path = _unique(F'images/{name}.jpg')
                yield UnpackResult(path, reconstruct_jpeg(image_data, jpeg_tables))

            elif tt == SWFTagType.DefineBitsJPEG2:
                reader = StructReader(td)
                char_id = reader.u16()
                image_data = bytes(reader.read())
                name = _name(char_id, F'{char_id}')
                if image_data[:8] == b'\x89PNG\r\n\x1A\n':
                    ext = 'png'
                    payload = image_data
                elif image_data[:6] in (b'GIF87a', b'GIF89a'):
                    ext = 'gif'
                    payload = image_data
                else:
                    ext = 'jpg'
                    payload = reconstruct_jpeg(image_data)
                path = _unique(F'images/{name}.{ext}')
                yield UnpackResult(path, payload)

            elif tt == SWFTagType.DefineBitsJPEG3:
                reader = StructReader(td)
                char_id = reader.u16()
                alpha_offset = reader.u32()
                image_data = reader.read_exactly(alpha_offset)
                name = _name(char_id, F'{char_id}')
                path = _unique(F'images/{name}.jpg')
                yield UnpackResult(path, reconstruct_jpeg(image_data))

            elif tt == SWFTagType.DefineBitsJPEG4:
                reader = StructReader(td)
                char_id = reader.u16()
                alpha_offset = reader.u32()
                reader.u16()
                image_data = reader.read_exactly(alpha_offset)
                name = _name(char_id, F'{char_id}')
                path = _unique(F'images/{name}.jpg')
                yield UnpackResult(path, reconstruct_jpeg(image_data))

            elif tt == SWFTagType.DefineBitsLossless:
                reader = StructReader(td)
                char_id = reader.u16()
                fmt = reader.u8()
                w = reader.u16()
                h = reader.u16()
                if fmt == 3:
                    color_table_size = reader.u8() + 1
                else:
                    color_table_size = 0
                zdata = reader.read()
                name = _name(char_id, F'{char_id}')
                path = _unique(F'images/{name}.png')
                yield UnpackResult(path, reconstruct_png(
                    w, h, False, fmt, color_table_size, zdata))

            elif tt == SWFTagType.DefineBitsLossless2:
                reader = StructReader(td)
                char_id = reader.u16()
                fmt = reader.u8()
                w = reader.u16()
                h = reader.u16()
                if fmt == 3:
                    color_table_size = reader.u8() + 1
                else:
                    color_table_size = 0
                zdata = reader.read()
                name = _name(char_id, F'{char_id}')
                path = _unique(F'images/{name}.png')
                yield UnpackResult(path, reconstruct_png(
                    w, h, True, fmt, color_table_size, zdata))

            elif tt == SWFTagType.DefineSound:
                reader = StructReader(td)
                char_id = reader.u16()
                flags = reader.u8()
                codec_id = (flags >> 4) & 0xF
                reader.u32()
                sound_data = bytes(reader.read())
                ext = sound_extension(codec_id)
                name = _name(char_id, F'{char_id}')
                path = _unique(F'sounds/{name}.{ext}')
                yield UnpackResult(path, sound_data)

            elif tt == SWFTagType.VideoFrame:
                reader = StructReader(td)
                stream_id = reader.u16()
                frame_num = reader.u16()
                video_data = bytes(reader.read())
                path = _unique(F'video/{stream_id}/{frame_num}.bin')
                yield UnpackResult(path, video_data)

            elif tt == SWFTagType.DoAction:
                path = _unique('scripts/doaction.as')
                yield UnpackResult(path, bytes(td))

            elif tt == SWFTagType.DoInitAction:
                reader = StructReader(td)
                sprite_id = reader.u16()
                bytecode = bytes(reader.read())
                path = _unique(F'scripts/initaction_{sprite_id}.as')
                yield UnpackResult(path, bytecode)

            elif tt == SWFTagType.DoABC:
                path = _unique('scripts/abc.abc')
                yield UnpackResult(path, bytes(td))

            elif tt == SWFTagType.DoABC2:
                reader = StructReader(td)
                reader.u32()
                abc_name = reader.read_c_string()
                try:
                    abc_name = codecs.decode(abc_name, 'utf-8')
                except Exception:
                    abc_name = codecs.decode(abc_name, 'latin-1')
                if not abc_name:
                    abc_name = 'abc'
                abc_data = bytes(reader.read())
                path = _unique(F'scripts/{abc_name}.abc')
                yield UnpackResult(path, abc_data)

            elif tt == SWFTagType.DefineBinaryData:
                reader = StructReader(td)
                tag_id = reader.u16()
                reader.u32()
                binary_data = bytes(reader.read())
                name = _name(tag_id, F'{tag_id}')
                path = _unique(F'binary/{name}.bin')
                yield UnpackResult(path, binary_data)

            elif tt in (
                SWFTagType.DefineFont,
                SWFTagType.DefineFont2,
                SWFTagType.DefineFont3,
                SWFTagType.DefineFont4,
            ):
                reader = StructReader(td)
                char_id = reader.u16()
                path = _unique(F'fonts/{char_id}.font')
                yield UnpackResult(path, bytes(td))

Functions

def sound_extension(codec)
Expand source code Browse git
def sound_extension(codec: int) -> str:
    try:
        return _SOUND_EXTENSIONS[SWFSoundCodec(codec)]
    except (ValueError, KeyError):
        return 'bin'
def reconstruct_jpeg(data, jpeg_tables=None)
Expand source code Browse git
def reconstruct_jpeg(
    data: bytes | bytearray | memoryview,
    jpeg_tables: bytes | bytearray | memoryview | None = None,
) -> bytearray:
    view = memoryview(data)
    out = bytearray()
    if jpeg_tables is not None:
        tables = memoryview(jpeg_tables)
        tables_body = bytes(tables)
        if tables_body[:2] == b'\xFF\xD8':
            tables_body = tables_body[2:]
        if tables_body[-2:] == b'\xFF\xD9':
            tables_body = tables_body[:-2]
        image_body = bytes(view)
        if image_body[:2] == b'\xFF\xD8':
            image_body = image_body[2:]
        out.extend(b'\xFF\xD8')
        out.extend(tables_body)
        out.extend(image_body)
    else:
        out.extend(bytes(view))
    result = bytearray()
    k = 0
    length = len(out)
    while k < length:
        if k + 3 < length and out[k] == 0xFF and out[k + 1] == 0xD9:
            if out[k + 2] == 0xFF and out[k + 3] == 0xD8:
                k += 4
                continue
        result.append(out[k])
        k += 1
    if result[:2] != b'\xFF\xD8':
        result[0:0] = b'\xFF\xD8'
    if result[-2:] != b'\xFF\xD9':
        result.extend(b'\xFF\xD9')
    return result
def reconstruct_png(width, height, has_alpha, format_code, color_table_size, zlib_data)
Expand source code Browse git
def reconstruct_png(
    width: int,
    height: int,
    has_alpha: bool,
    format_code: int,
    color_table_size: int,
    zlib_data: bytes | bytearray | memoryview,
) -> bytearray:
    raw = zlib.decompress(bytes(zlib_data))
    png = bytearray(b'\x89PNG\r\n\x1A\n')
    if format_code == 3:
        bit_depth = 8
        if has_alpha:
            color_type = 3
        else:
            color_type = 3
        ihdr = struct.pack('>IIBBBBB', width, height, bit_depth, color_type, 0, 0, 0)
        png.extend(_png_chunk(b'IHDR', ihdr))
        entry_size = 4 if has_alpha else 3
        table_bytes = color_table_size * entry_size
        palette_data = raw[:table_bytes]
        pixel_data = raw[table_bytes:]
        plte = bytearray()
        trns = bytearray()
        for i in range(color_table_size):
            offset = i * entry_size
            if has_alpha:
                plte.extend(palette_data[offset + 1:offset + 4])
                trns.append(palette_data[offset])
            else:
                plte.extend(palette_data[offset:offset + 3])
        png.extend(_png_chunk(b'PLTE', bytes(plte)))
        if has_alpha:
            png.extend(_png_chunk(b'tRNS', bytes(trns)))
        rows = bytearray()
        stride = width
        for y in range(height):
            rows.append(0)
            row_start = y * stride
            rows.extend(pixel_data[row_start:row_start + stride])
        idat = zlib.compress(bytes(rows))
        png.extend(_png_chunk(b'IDAT', idat))
    elif format_code == 4:
        bit_depth = 8
        color_type = 2
        ihdr = struct.pack('>IIBBBBB', width, height, bit_depth, color_type, 0, 0, 0)
        png.extend(_png_chunk(b'IHDR', ihdr))
        rows = bytearray()
        stride = width * 2
        for y in range(height):
            rows.append(0)
            for x in range(width):
                offset = y * stride + x * 2
                pixel = int.from_bytes(raw[offset:offset + 2], 'little')
                r = ((pixel >> 10) & 0x1F) * 255 // 31
                g = ((pixel >> 5) & 0x1F) * 255 // 31
                b = (pixel & 0x1F) * 255 // 31
                rows.extend((r, g, b))
        idat = zlib.compress(bytes(rows))
        png.extend(_png_chunk(b'IDAT', idat))
    elif format_code == 5:
        if has_alpha:
            bit_depth = 8
            color_type = 6
            ihdr = struct.pack('>IIBBBBB', width, height, bit_depth, color_type, 0, 0, 0)
            png.extend(_png_chunk(b'IHDR', ihdr))
            rows = bytearray()
            stride = width * 4
            for y in range(height):
                rows.append(0)
                for x in range(width):
                    offset = y * stride + x * 4
                    a = raw[offset]
                    r = raw[offset + 1]
                    g = raw[offset + 2]
                    b = raw[offset + 3]
                    if a > 0 and a < 255:
                        r = min(r * 255 // a, 255)
                        g = min(g * 255 // a, 255)
                        b = min(b * 255 // a, 255)
                    rows.extend((r, g, b, a))
            idat = zlib.compress(bytes(rows))
            png.extend(_png_chunk(b'IDAT', idat))
        else:
            bit_depth = 8
            color_type = 2
            ihdr = struct.pack('>IIBBBBB', width, height, bit_depth, color_type, 0, 0, 0)
            png.extend(_png_chunk(b'IHDR', ihdr))
            rows = bytearray()
            stride = width * 4
            for y in range(height):
                rows.append(0)
                for x in range(width):
                    offset = y * stride + x * 4
                    rows.extend(raw[offset + 1:offset + 4])
            idat = zlib.compress(bytes(rows))
            png.extend(_png_chunk(b'IDAT', idat))
    else:
        raise ValueError(F'Unknown lossless bitmap format code: {format_code}')
    png.extend(_png_chunk(b'IEND', b''))
    return png

Classes

class SWFCompression (*args, **kwds)

bytes(iterable_of_ints) -> bytes bytes(string, encoding[, errors]) -> bytes bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer bytes(int) -> bytes object of size given by the parameter initialized with null bytes bytes() -> empty bytes object

Construct an immutable array of bytes from: - an iterable yielding integers in range(256) - a text string encoded using the specified encoding - any object implementing the buffer API. - an integer

Expand source code Browse git
class SWFCompression(bytes, enum.Enum):
    NONE = b'FWS'
    ZLIB = b'CWS'
    LZMA = b'ZWS'

Ancestors

  • builtins.bytes
  • enum.Enum

Class variables

var NONE

The type of the None singleton.

var ZLIB

The type of the None singleton.

var LZMA

The type of the None singleton.

class SWFTagType (*args, **kwds)

int([x]) -> integer int(x, base=10) -> integer

Convert a number or string to an integer, or return 0 if no arguments are given. If x is a number, return x.int(). For floating-point numbers, this truncates towards zero.

If x is not a number or if base is given, then x must be a string, bytes, or bytearray instance representing an integer literal in the given base. The literal can be preceded by '+' or '-' and be surrounded by whitespace. The base defaults to 10. Valid bases are 0 and 2-36. Base 0 means to interpret the base from the string as an integer literal.

>>> int('0b100', base=0)
4
Expand source code Browse git
class SWFTagType(int, enum.Enum):
    End                 = 0   # noqa
    DefineBits          = 6   # noqa
    JPEGTables          = 8   # noqa
    DefineFont          = 10  # noqa
    DoAction            = 12  # noqa
    DefineSound         = 14  # noqa
    DefineBitsLossless  = 20  # noqa
    DefineBitsJPEG2     = 21  # noqa
    DefineBitsJPEG3     = 35  # noqa
    DefineBitsLossless2 = 36  # noqa
    DefineFont2         = 48  # noqa
    DoInitAction        = 59  # noqa
    DefineVideoStream   = 60  # noqa
    VideoFrame          = 61  # noqa
    DoABC               = 72  # noqa
    DefineFont3         = 75  # noqa
    SymbolClass         = 76  # noqa
    DoABC2              = 82  # noqa
    DefineBinaryData    = 87  # noqa
    DefineBitsJPEG4     = 90  # noqa
    DefineFont4         = 91  # noqa

Ancestors

  • builtins.int
  • enum.Enum

Class variables

var End

The type of the None singleton.

var DefineBits

The type of the None singleton.

var JPEGTables

The type of the None singleton.

var DefineFont

The type of the None singleton.

var DoAction

The type of the None singleton.

var DefineSound

The type of the None singleton.

var DefineBitsLossless

The type of the None singleton.

var DefineBitsJPEG2

The type of the None singleton.

var DefineBitsJPEG3

The type of the None singleton.

var DefineBitsLossless2

The type of the None singleton.

var DefineFont2

The type of the None singleton.

var DoInitAction

The type of the None singleton.

var DefineVideoStream

The type of the None singleton.

var VideoFrame

The type of the None singleton.

var DoABC

The type of the None singleton.

var DefineFont3

The type of the None singleton.

var SymbolClass

The type of the None singleton.

var DoABC2

The type of the None singleton.

var DefineBinaryData

The type of the None singleton.

var DefineBitsJPEG4

The type of the None singleton.

var DefineFont4

The type of the None singleton.

class SWFSoundCodec (*args, **kwds)

int([x]) -> integer int(x, base=10) -> integer

Convert a number or string to an integer, or return 0 if no arguments are given. If x is a number, return x.int(). For floating-point numbers, this truncates towards zero.

If x is not a number or if base is given, then x must be a string, bytes, or bytearray instance representing an integer literal in the given base. The literal can be preceded by '+' or '-' and be surrounded by whitespace. The base defaults to 10. Valid bases are 0 and 2-36. Base 0 means to interpret the base from the string as an integer literal.

>>> int('0b100', base=0)
4
Expand source code Browse git
class SWFSoundCodec(int, enum.Enum):
    UncompressedNE = 0   # noqa
    ADPCM          = 1   # noqa
    MP3            = 2   # noqa
    UncompressedLE = 3   # noqa
    Nellymoser16k  = 4   # noqa
    Nellymoser8k   = 5   # noqa
    Nellymoser     = 6   # noqa
    Speex          = 11  # noqa

Ancestors

  • builtins.int
  • enum.Enum

Class variables

var UncompressedNE

The type of the None singleton.

var ADPCM

The type of the None singleton.

var MP3

The type of the None singleton.

var UncompressedLE

The type of the None singleton.

var Nellymoser16k

The type of the None singleton.

var Nellymoser8k

The type of the None singleton.

var Nellymoser

The type of the None singleton.

var Speex

The type of the None singleton.

class SWFHeader (reader)

A class to parse structured data. A Struct class can be instantiated as follows:

foo = Struct(data, bar=29)

The initialization routine of the structure will be called with a single argument reader. If the object data is already a StructReader, then it will be passed as reader. Otherwise, the argument will be wrapped in a StructReader. Additional arguments to the struct are passed through.

Expand source code Browse git
class SWFHeader(Struct):
    def __init__(self, reader: StructReader):
        sig = bytes(reader.read_exactly(3))
        try:
            self.compression = SWFCompression(sig)
        except ValueError:
            raise ValueError(F'Invalid SWF signature: {sig!r}')
        self.version = reader.u8()
        self.file_length = reader.u32()

Ancestors

  • Struct
  • typing.Generic
  • collections.abc.Buffer

Static methods

def Parse(reader, *args, **kwargs)
class SWFRect (reader)

A class to parse structured data. A Struct class can be instantiated as follows:

foo = Struct(data, bar=29)

The initialization routine of the structure will be called with a single argument reader. If the object data is already a StructReader, then it will be passed as reader. Otherwise, the argument will be wrapped in a StructReader. Additional arguments to the struct are passed through.

Expand source code Browse git
class SWFRect(Struct):
    def __init__(self, reader: StructReaderBits):
        reader.bigendian = True
        nbits = reader.read_integer(5)
        self.xmin = reader.read_integer(nbits, signed=True)
        self.xmax = reader.read_integer(nbits, signed=True)
        self.ymin = reader.read_integer(nbits, signed=True)
        self.ymax = reader.read_integer(nbits, signed=True)
        reader.byte_align()
        reader.bigendian = False

Ancestors

  • Struct
  • typing.Generic
  • collections.abc.Buffer

Static methods

def Parse(reader, *args, **kwargs)
class SWFTag (reader)

A class to parse structured data. A Struct class can be instantiated as follows:

foo = Struct(data, bar=29)

The initialization routine of the structure will be called with a single argument reader. If the object data is already a StructReader, then it will be passed as reader. Otherwise, the argument will be wrapped in a StructReader. Additional arguments to the struct are passed through.

Expand source code Browse git
class SWFTag(Struct):
    def __init__(self, reader: StructReader):
        tag_code_and_length = reader.u16()
        type_code = tag_code_and_length >> 6
        length = tag_code_and_length & 0x3F
        if length == 0x3F:
            length = reader.u32()
        try:
            self.type = SWFTagType(type_code)
        except ValueError:
            self.type = type_code
        self.length = length
        self.data = reader.read_exactly(length)

Ancestors

  • Struct
  • typing.Generic
  • collections.abc.Buffer

Static methods

def Parse(reader, *args, **kwargs)
class SWF (reader)

A class to parse structured data. A Struct class can be instantiated as follows:

foo = Struct(data, bar=29)

The initialization routine of the structure will be called with a single argument reader. If the object data is already a StructReader, then it will be passed as reader. Otherwise, the argument will be wrapped in a StructReader. Additional arguments to the struct are passed through.

Expand source code Browse git
class SWF(Struct):
    def __init__(self, reader: StructReader):
        self.header = SWFHeader(reader)
        rest = reader.read()
        if self.header.compression == SWFCompression.ZLIB:
            rest = zlib.decompress(rest)
        elif self.header.compression == SWFCompression.LZMA:
            compressed_length = int.from_bytes(bytes(rest[:4]), 'little')
            lzma_props = bytes(rest[4:9])
            lzma_data = bytes(rest[9:4 + compressed_length])
            uncompressed_size = self.header.file_length - 8
            header = lzma_props + struct.pack('<Q', uncompressed_size)
            rest = lzma.decompress(header + lzma_data, format=lzma.FORMAT_ALONE)
        body = StructReaderBits(memoryview(bytearray(rest)))
        self.rect = SWFRect(body)
        body.bigendian = False
        self.frame_rate = body.u16()
        self.frame_count = body.u16()
        self.tags: list[SWFTag] = []
        while not body.eof:
            tag = SWFTag(body)
            self.tags.append(tag)
            if isinstance(tag.type, SWFTagType) and tag.type == SWFTagType.End:
                break

Ancestors

  • Struct
  • typing.Generic
  • collections.abc.Buffer

Static methods

def Parse(reader, *args, **kwargs)
class swf (*paths, list=False, join_path=False, drop_path=False, fuzzy=0, exact=False, regex=False, path=b'path')

Extract content from Shockwave Flash (SWF) files; multimedia data and ActionScript bytecode.

Expand source code Browse git
class swf(PathExtractorUnit):
    """
    Extract content from Shockwave Flash (SWF) files; multimedia data and ActionScript bytecode.
    """

    @classmethod
    def handles(cls, data) -> bool | None:
        return get_media_format(data) == Fmt.SWF

    def unpack(self, data):
        parsed = SWF.Parse(data)
        jpeg_tables: bytes | bytearray | memoryview | None = None
        symbol_names: dict[int, str] = {}
        counters: dict[str, int] = {}

        for tag in parsed.tags:
            if tag.type == SWFTagType.JPEGTables:
                if len(tag.data) > 0:
                    jpeg_tables = tag.data
            elif tag.type == SWFTagType.SymbolClass:
                reader = StructReader(memoryview(tag.data))
                count = reader.u16()
                for _ in range(count):
                    char_id = reader.u16()
                    name = reader.read_c_string()
                    try:
                        name = codecs.decode(name, 'utf-8')
                    except Exception:
                        name = codecs.decode(name, 'latin-1')
                    if name:
                        symbol_names[char_id] = name

        def _unique(base_path: str) -> str:
            count = counters.get(base_path, 0)
            counters[base_path] = count + 1
            if count == 0:
                return base_path
            return F'{base_path}.{count}'

        def _name(char_id: int, fallback: str) -> str:
            name = symbol_names.get(char_id)
            if name is not None:
                name = name.replace('\\', '/').rsplit('/', 1)[-1]
                name = name.replace('\0', '')
                if name:
                    return name
            return fallback

        for tag in parsed.tags:
            tt = tag.type
            td = memoryview(tag.data)

            if tt == SWFTagType.DefineBits:
                reader = StructReader(td)
                char_id = reader.u16()
                image_data = reader.read()
                name = _name(char_id, F'{char_id}')
                path = _unique(F'images/{name}.jpg')
                yield UnpackResult(path, reconstruct_jpeg(image_data, jpeg_tables))

            elif tt == SWFTagType.DefineBitsJPEG2:
                reader = StructReader(td)
                char_id = reader.u16()
                image_data = bytes(reader.read())
                name = _name(char_id, F'{char_id}')
                if image_data[:8] == b'\x89PNG\r\n\x1A\n':
                    ext = 'png'
                    payload = image_data
                elif image_data[:6] in (b'GIF87a', b'GIF89a'):
                    ext = 'gif'
                    payload = image_data
                else:
                    ext = 'jpg'
                    payload = reconstruct_jpeg(image_data)
                path = _unique(F'images/{name}.{ext}')
                yield UnpackResult(path, payload)

            elif tt == SWFTagType.DefineBitsJPEG3:
                reader = StructReader(td)
                char_id = reader.u16()
                alpha_offset = reader.u32()
                image_data = reader.read_exactly(alpha_offset)
                name = _name(char_id, F'{char_id}')
                path = _unique(F'images/{name}.jpg')
                yield UnpackResult(path, reconstruct_jpeg(image_data))

            elif tt == SWFTagType.DefineBitsJPEG4:
                reader = StructReader(td)
                char_id = reader.u16()
                alpha_offset = reader.u32()
                reader.u16()
                image_data = reader.read_exactly(alpha_offset)
                name = _name(char_id, F'{char_id}')
                path = _unique(F'images/{name}.jpg')
                yield UnpackResult(path, reconstruct_jpeg(image_data))

            elif tt == SWFTagType.DefineBitsLossless:
                reader = StructReader(td)
                char_id = reader.u16()
                fmt = reader.u8()
                w = reader.u16()
                h = reader.u16()
                if fmt == 3:
                    color_table_size = reader.u8() + 1
                else:
                    color_table_size = 0
                zdata = reader.read()
                name = _name(char_id, F'{char_id}')
                path = _unique(F'images/{name}.png')
                yield UnpackResult(path, reconstruct_png(
                    w, h, False, fmt, color_table_size, zdata))

            elif tt == SWFTagType.DefineBitsLossless2:
                reader = StructReader(td)
                char_id = reader.u16()
                fmt = reader.u8()
                w = reader.u16()
                h = reader.u16()
                if fmt == 3:
                    color_table_size = reader.u8() + 1
                else:
                    color_table_size = 0
                zdata = reader.read()
                name = _name(char_id, F'{char_id}')
                path = _unique(F'images/{name}.png')
                yield UnpackResult(path, reconstruct_png(
                    w, h, True, fmt, color_table_size, zdata))

            elif tt == SWFTagType.DefineSound:
                reader = StructReader(td)
                char_id = reader.u16()
                flags = reader.u8()
                codec_id = (flags >> 4) & 0xF
                reader.u32()
                sound_data = bytes(reader.read())
                ext = sound_extension(codec_id)
                name = _name(char_id, F'{char_id}')
                path = _unique(F'sounds/{name}.{ext}')
                yield UnpackResult(path, sound_data)

            elif tt == SWFTagType.VideoFrame:
                reader = StructReader(td)
                stream_id = reader.u16()
                frame_num = reader.u16()
                video_data = bytes(reader.read())
                path = _unique(F'video/{stream_id}/{frame_num}.bin')
                yield UnpackResult(path, video_data)

            elif tt == SWFTagType.DoAction:
                path = _unique('scripts/doaction.as')
                yield UnpackResult(path, bytes(td))

            elif tt == SWFTagType.DoInitAction:
                reader = StructReader(td)
                sprite_id = reader.u16()
                bytecode = bytes(reader.read())
                path = _unique(F'scripts/initaction_{sprite_id}.as')
                yield UnpackResult(path, bytecode)

            elif tt == SWFTagType.DoABC:
                path = _unique('scripts/abc.abc')
                yield UnpackResult(path, bytes(td))

            elif tt == SWFTagType.DoABC2:
                reader = StructReader(td)
                reader.u32()
                abc_name = reader.read_c_string()
                try:
                    abc_name = codecs.decode(abc_name, 'utf-8')
                except Exception:
                    abc_name = codecs.decode(abc_name, 'latin-1')
                if not abc_name:
                    abc_name = 'abc'
                abc_data = bytes(reader.read())
                path = _unique(F'scripts/{abc_name}.abc')
                yield UnpackResult(path, abc_data)

            elif tt == SWFTagType.DefineBinaryData:
                reader = StructReader(td)
                tag_id = reader.u16()
                reader.u32()
                binary_data = bytes(reader.read())
                name = _name(tag_id, F'{tag_id}')
                path = _unique(F'binary/{name}.bin')
                yield UnpackResult(path, binary_data)

            elif tt in (
                SWFTagType.DefineFont,
                SWFTagType.DefineFont2,
                SWFTagType.DefineFont3,
                SWFTagType.DefineFont4,
            ):
                reader = StructReader(td)
                char_id = reader.u16()
                path = _unique(F'fonts/{char_id}.font')
                yield UnpackResult(path, bytes(td))

Ancestors

Subclasses

Class variables

var reverse

The type of the None singleton.

Methods

def unpack(self, data)
Expand source code Browse git
def unpack(self, data):
    parsed = SWF.Parse(data)
    jpeg_tables: bytes | bytearray | memoryview | None = None
    symbol_names: dict[int, str] = {}
    counters: dict[str, int] = {}

    for tag in parsed.tags:
        if tag.type == SWFTagType.JPEGTables:
            if len(tag.data) > 0:
                jpeg_tables = tag.data
        elif tag.type == SWFTagType.SymbolClass:
            reader = StructReader(memoryview(tag.data))
            count = reader.u16()
            for _ in range(count):
                char_id = reader.u16()
                name = reader.read_c_string()
                try:
                    name = codecs.decode(name, 'utf-8')
                except Exception:
                    name = codecs.decode(name, 'latin-1')
                if name:
                    symbol_names[char_id] = name

    def _unique(base_path: str) -> str:
        count = counters.get(base_path, 0)
        counters[base_path] = count + 1
        if count == 0:
            return base_path
        return F'{base_path}.{count}'

    def _name(char_id: int, fallback: str) -> str:
        name = symbol_names.get(char_id)
        if name is not None:
            name = name.replace('\\', '/').rsplit('/', 1)[-1]
            name = name.replace('\0', '')
            if name:
                return name
        return fallback

    for tag in parsed.tags:
        tt = tag.type
        td = memoryview(tag.data)

        if tt == SWFTagType.DefineBits:
            reader = StructReader(td)
            char_id = reader.u16()
            image_data = reader.read()
            name = _name(char_id, F'{char_id}')
            path = _unique(F'images/{name}.jpg')
            yield UnpackResult(path, reconstruct_jpeg(image_data, jpeg_tables))

        elif tt == SWFTagType.DefineBitsJPEG2:
            reader = StructReader(td)
            char_id = reader.u16()
            image_data = bytes(reader.read())
            name = _name(char_id, F'{char_id}')
            if image_data[:8] == b'\x89PNG\r\n\x1A\n':
                ext = 'png'
                payload = image_data
            elif image_data[:6] in (b'GIF87a', b'GIF89a'):
                ext = 'gif'
                payload = image_data
            else:
                ext = 'jpg'
                payload = reconstruct_jpeg(image_data)
            path = _unique(F'images/{name}.{ext}')
            yield UnpackResult(path, payload)

        elif tt == SWFTagType.DefineBitsJPEG3:
            reader = StructReader(td)
            char_id = reader.u16()
            alpha_offset = reader.u32()
            image_data = reader.read_exactly(alpha_offset)
            name = _name(char_id, F'{char_id}')
            path = _unique(F'images/{name}.jpg')
            yield UnpackResult(path, reconstruct_jpeg(image_data))

        elif tt == SWFTagType.DefineBitsJPEG4:
            reader = StructReader(td)
            char_id = reader.u16()
            alpha_offset = reader.u32()
            reader.u16()
            image_data = reader.read_exactly(alpha_offset)
            name = _name(char_id, F'{char_id}')
            path = _unique(F'images/{name}.jpg')
            yield UnpackResult(path, reconstruct_jpeg(image_data))

        elif tt == SWFTagType.DefineBitsLossless:
            reader = StructReader(td)
            char_id = reader.u16()
            fmt = reader.u8()
            w = reader.u16()
            h = reader.u16()
            if fmt == 3:
                color_table_size = reader.u8() + 1
            else:
                color_table_size = 0
            zdata = reader.read()
            name = _name(char_id, F'{char_id}')
            path = _unique(F'images/{name}.png')
            yield UnpackResult(path, reconstruct_png(
                w, h, False, fmt, color_table_size, zdata))

        elif tt == SWFTagType.DefineBitsLossless2:
            reader = StructReader(td)
            char_id = reader.u16()
            fmt = reader.u8()
            w = reader.u16()
            h = reader.u16()
            if fmt == 3:
                color_table_size = reader.u8() + 1
            else:
                color_table_size = 0
            zdata = reader.read()
            name = _name(char_id, F'{char_id}')
            path = _unique(F'images/{name}.png')
            yield UnpackResult(path, reconstruct_png(
                w, h, True, fmt, color_table_size, zdata))

        elif tt == SWFTagType.DefineSound:
            reader = StructReader(td)
            char_id = reader.u16()
            flags = reader.u8()
            codec_id = (flags >> 4) & 0xF
            reader.u32()
            sound_data = bytes(reader.read())
            ext = sound_extension(codec_id)
            name = _name(char_id, F'{char_id}')
            path = _unique(F'sounds/{name}.{ext}')
            yield UnpackResult(path, sound_data)

        elif tt == SWFTagType.VideoFrame:
            reader = StructReader(td)
            stream_id = reader.u16()
            frame_num = reader.u16()
            video_data = bytes(reader.read())
            path = _unique(F'video/{stream_id}/{frame_num}.bin')
            yield UnpackResult(path, video_data)

        elif tt == SWFTagType.DoAction:
            path = _unique('scripts/doaction.as')
            yield UnpackResult(path, bytes(td))

        elif tt == SWFTagType.DoInitAction:
            reader = StructReader(td)
            sprite_id = reader.u16()
            bytecode = bytes(reader.read())
            path = _unique(F'scripts/initaction_{sprite_id}.as')
            yield UnpackResult(path, bytecode)

        elif tt == SWFTagType.DoABC:
            path = _unique('scripts/abc.abc')
            yield UnpackResult(path, bytes(td))

        elif tt == SWFTagType.DoABC2:
            reader = StructReader(td)
            reader.u32()
            abc_name = reader.read_c_string()
            try:
                abc_name = codecs.decode(abc_name, 'utf-8')
            except Exception:
                abc_name = codecs.decode(abc_name, 'latin-1')
            if not abc_name:
                abc_name = 'abc'
            abc_data = bytes(reader.read())
            path = _unique(F'scripts/{abc_name}.abc')
            yield UnpackResult(path, abc_data)

        elif tt == SWFTagType.DefineBinaryData:
            reader = StructReader(td)
            tag_id = reader.u16()
            reader.u32()
            binary_data = bytes(reader.read())
            name = _name(tag_id, F'{tag_id}')
            path = _unique(F'binary/{name}.bin')
            yield UnpackResult(path, binary_data)

        elif tt in (
            SWFTagType.DefineFont,
            SWFTagType.DefineFont2,
            SWFTagType.DefineFont3,
            SWFTagType.DefineFont4,
        ):
            reader = StructReader(td)
            char_id = reader.u16()
            path = _unique(F'fonts/{char_id}.font')
            yield UnpackResult(path, bytes(td))

Inherited members