Module `refinery.lib.dotnet.resources`

Parsing of managed .NET resources, which are .NET resource directories which begin with the magic sequence 0xBEEFCACE. These resources can contain several entries of serialized data. The main reference used for this parser was the dnSpy source code.

Expand source code Browse git

"""
Parsing of managed .NET resources, which are .NET resource directories which
begin with the magic sequence `0xBEEFCACE`. These resources can contain several
entries of serialized data. The main reference used for this parser was the
dnSpy source code.
"""
from __future__ import annotations

import datetime
import enum
import re

from refinery.lib.dotnet.deserialize import (
    BinaryFormatterParser,
    DotNetRsrcReader,
    DotNetStruct,
)
from refinery.lib.dotnet.header import DotNetStructReader
from refinery.lib.id import buffer_contains


class NoManagedResource(AssertionError):
    pass


def stream(reader: DotNetStructReader):
    return BinaryFormatterParser(reader.read_length_prefixed())


class RsrcPrimitive(enum.IntEnum):
    Null      = 0x00 # noqa
    String    = 0x01 # noqa
    Boolean   = 0x02 # noqa
    Char      = 0x03 # noqa
    Byte      = 0x04 # noqa
    SByte     = 0x05 # noqa
    Int16     = 0x06 # noqa
    UInt16    = 0x07 # noqa
    Int32     = 0x08 # noqa
    UInt32    = 0x09 # noqa
    Int64     = 0x0A # noqa
    UInt64    = 0x0B # noqa
    Single    = 0x0C # noqa
    Double    = 0x0D # noqa
    Decimal   = 0x0E # noqa
    DateTime  = 0x0F # noqa
    TimeSpan  = 0x10 # noqa
    ByteArray = 0x20 # noqa
    Stream    = 0x21 # noqa


RsrcUserTypeBase = 0x40


RsrcPrimitiveDispatch = {
    RsrcPrimitive.Null      : DotNetStructReader.read_dn_null,
    RsrcPrimitive.Boolean   : DotNetStructReader.read_bool_byte,
    RsrcPrimitive.Byte      : DotNetStructReader.read_byte,
    RsrcPrimitive.Char      : DotNetStructReader.read_char,
    RsrcPrimitive.Decimal   : DotNetStructReader.read_dn_decimal,
    RsrcPrimitive.Single    : DotNetStructReader.f32,
    RsrcPrimitive.Double    : DotNetStructReader.f64,
    RsrcPrimitive.Int16     : DotNetStructReader.i16,
    RsrcPrimitive.Int32     : DotNetStructReader.i32,
    RsrcPrimitive.Int64     : DotNetStructReader.i64,
    RsrcPrimitive.SByte     : DotNetStructReader.i8,
    RsrcPrimitive.TimeSpan  : DotNetStructReader.read_dn_time_span,
    RsrcPrimitive.DateTime  : DotNetStructReader.read_dn_date_time,
    RsrcPrimitive.UInt16    : DotNetStructReader.u16,
    RsrcPrimitive.UInt32    : DotNetStructReader.u32,
    RsrcPrimitive.UInt64    : DotNetStructReader.u64,
    RsrcPrimitive.String    : DotNetStructReader.read_dn_length_prefixed_string,
    RsrcPrimitive.ByteArray : DotNetStructReader.read_length_prefixed,
    RsrcPrimitive.Stream    : stream,
}


class NetResource(DotNetStruct):
    Value: int | str | bool | list | memoryview | datetime.datetime | datetime.timedelta | None
    Data: memoryview
    TypeName: str
    Error: str | None

    def __init__(self, reader: DotNetStructReader, base: int):
        self.Name = reader.read_dn_length_prefixed_string(codec='utf-16le')
        self.Offset = reader.u32() + base
        self.Size = 0
        self.Error = None


class NetManifestResource(DotNetStruct):

    def __init__(self, reader: DotNetStructReader):
        self.Signature = reader.u32()
        if self.Signature != 0xBEEFCACE:
            raise NoManagedResource
        self.ReaderCount = reader.u32()
        self.ReaderTypeLength = reader.u32()
        tr = DotNetRsrcReader(reader.read_exactly(self.ReaderTypeLength))
        self.ReaderType = rt = tr.read_dn_string_primitive()
        self.ResourceSetType = tr.read_dn_string_primitive()

        if not re.match(r"^System\.Resources\.ResourceReader,\s*mscorlib", rt):
            raise AssertionError('unknown resource reader')

        self.Version = reader.u32()
        ResourceCount = reader.u32()
        RsrcTypeCount = reader.u32()

        ResourceTypes = [reader.read_dn_length_prefixed_string()
            for _ in range(RsrcTypeCount)]

        reader.byte_align(8)
        self.ResourceHashes = [reader.u32() for _ in range(ResourceCount)]
        ResourceNameOffsets = [reader.u32() for _ in range(ResourceCount)]
        self.DataSectionOffset = base = reader.u32()
        rsrc: list[NetResource] = []
        self.Resources = rsrc

        for k in range(ResourceCount):
            with reader.detour():
                reader.skip(ResourceNameOffsets[k])
                rsrc.append(NetResource(reader, base))

        if rsrc:
            rsrc.sort(key=lambda r: r.Offset)
            it = iter(rsrc)
            next(it)
            ends = [r.Offset for r in it]
            ends.append(len(reader))
            for r, end in zip(rsrc, ends):
                r.Size = end - r.Offset - 1

        for Entry in rsrc:

            reader.seek(Entry.Offset)
            TypeCode = reader.read_dn_encoded_integer()
            Entry.Error = None
            Entry.Value = Entry.Data = reader.read_exactly(Entry.Size)

            if TypeCode >= RsrcUserTypeBase:
                Entry.TypeName = ResourceTypes[TypeCode - RsrcUserTypeBase]
                try:
                    Deserialized = BinaryFormatterParser(
                        Entry.Data,
                        ignore_errors=False,
                        dereference=False,
                        keep_meta=False
                    )
                except Exception as error:
                    Entry.Error = F'Failed to deserialize entry data: {error}'
                    continue
                try:
                    _, _, _, Data = Deserialized
                except ValueError:
                    Entry.Error = F'Deserialized entry has {len(Deserialized)} records, 4 were expected.'
                    continue
                if not buffer_contains(Entry.Data, Data):
                    Entry.Error = 'The computed entry value is not a substring of the entry data.'
                    Entry.Value = Entry.Data
                else:
                    Entry.Value = Data
            else:
                try:
                    Type = RsrcPrimitive(TypeCode)
                except ValueError:
                    Entry.TypeName = F'UnknownType[{TypeCode:#x}]'
                else:
                    Entry.TypeName = Type.name
                    package = DotNetStructReader(Entry.Value)
                    Entry.Value = RsrcPrimitiveDispatch[Type](package)


def NetStructuredResources(data):
    return NetManifestResource(DotNetStructReader(memoryview(data))).Resources

Functions

def stream(reader)

Expand source code Browse git

def stream(reader: DotNetStructReader):
    return BinaryFormatterParser(reader.read_length_prefixed())

def NetStructuredResources(data)

Expand source code Browse git

def NetStructuredResources(data):
    return NetManifestResource(DotNetStructReader(memoryview(data))).Resources

Classes

class NoManagedResource (*args, **kwargs)

Assertion failed.

Expand source code Browse git

class NoManagedResource(AssertionError):
    pass

Ancestors

builtins.AssertionError
builtins.Exception
builtins.BaseException

class RsrcPrimitive (*args, **kwds)

Enum where members are also (and must be) ints

Expand source code Browse git

class RsrcPrimitive(enum.IntEnum):
    Null      = 0x00 # noqa
    String    = 0x01 # noqa
    Boolean   = 0x02 # noqa
    Char      = 0x03 # noqa
    Byte      = 0x04 # noqa
    SByte     = 0x05 # noqa
    Int16     = 0x06 # noqa
    UInt16    = 0x07 # noqa
    Int32     = 0x08 # noqa
    UInt32    = 0x09 # noqa
    Int64     = 0x0A # noqa
    UInt64    = 0x0B # noqa
    Single    = 0x0C # noqa
    Double    = 0x0D # noqa
    Decimal   = 0x0E # noqa
    DateTime  = 0x0F # noqa
    TimeSpan  = 0x10 # noqa
    ByteArray = 0x20 # noqa
    Stream    = 0x21 # noqa

Ancestors

enum.IntEnum
builtins.int
enum.ReprEnum
enum.Enum

Class variables

var Null: The type of the None singleton.
var String: The type of the None singleton.
var Boolean: The type of the None singleton.
var Char: The type of the None singleton.
var Byte: The type of the None singleton.
var SByte: The type of the None singleton.
var Int16: The type of the None singleton.
var UInt16: The type of the None singleton.
var Int32: The type of the None singleton.
var UInt32: The type of the None singleton.
var Int64: The type of the None singleton.
var UInt64: The type of the None singleton.
var Single: The type of the None singleton.
var Double: The type of the None singleton.
var Decimal: The type of the None singleton.
var DateTime: The type of the None singleton.
var TimeSpan: The type of the None singleton.
var ByteArray: The type of the None singleton.
var Stream: The type of the None singleton.

class NetResource (reader, base)

A class to parse structured data. A Struct class can be instantiated as follows:

foo = Struct(data, bar=29)

The initialization routine of the structure will be called with a single argument reader. If the object data is already a StructReader, then it will be passed as reader. Otherwise, the argument will be wrapped in a StructReader. Additional arguments to the struct are passed through.

Expand source code Browse git

class NetResource(DotNetStruct):
    Value: int | str | bool | list | memoryview | datetime.datetime | datetime.timedelta | None
    Data: memoryview
    TypeName: str
    Error: str | None

    def __init__(self, reader: DotNetStructReader, base: int):
        self.Name = reader.read_dn_length_prefixed_string(codec='utf-16le')
        self.Offset = reader.u32() + base
        self.Size = 0
        self.Error = None

Ancestors

DotNetStruct
Struct
typing.Generic
collections.abc.Buffer

Class variables

var Value: The type of the None singleton.
var Data: The type of the None singleton.
var TypeName: The type of the None singleton.
var Error: The type of the None singleton.

Static methods

def Parse(reader, *args, **kwargs)

class NetManifestResource (reader)

A class to parse structured data. A Struct class can be instantiated as follows:

foo = Struct(data, bar=29)

Expand source code Browse git

class NetManifestResource(DotNetStruct):

    def __init__(self, reader: DotNetStructReader):
        self.Signature = reader.u32()
        if self.Signature != 0xBEEFCACE:
            raise NoManagedResource
        self.ReaderCount = reader.u32()
        self.ReaderTypeLength = reader.u32()
        tr = DotNetRsrcReader(reader.read_exactly(self.ReaderTypeLength))
        self.ReaderType = rt = tr.read_dn_string_primitive()
        self.ResourceSetType = tr.read_dn_string_primitive()

        if not re.match(r"^System\.Resources\.ResourceReader,\s*mscorlib", rt):
            raise AssertionError('unknown resource reader')

        self.Version = reader.u32()
        ResourceCount = reader.u32()
        RsrcTypeCount = reader.u32()

        ResourceTypes = [reader.read_dn_length_prefixed_string()
            for _ in range(RsrcTypeCount)]

        reader.byte_align(8)
        self.ResourceHashes = [reader.u32() for _ in range(ResourceCount)]
        ResourceNameOffsets = [reader.u32() for _ in range(ResourceCount)]
        self.DataSectionOffset = base = reader.u32()
        rsrc: list[NetResource] = []
        self.Resources = rsrc

        for k in range(ResourceCount):
            with reader.detour():
                reader.skip(ResourceNameOffsets[k])
                rsrc.append(NetResource(reader, base))

        if rsrc:
            rsrc.sort(key=lambda r: r.Offset)
            it = iter(rsrc)
            next(it)
            ends = [r.Offset for r in it]
            ends.append(len(reader))
            for r, end in zip(rsrc, ends):
                r.Size = end - r.Offset - 1

        for Entry in rsrc:

            reader.seek(Entry.Offset)
            TypeCode = reader.read_dn_encoded_integer()
            Entry.Error = None
            Entry.Value = Entry.Data = reader.read_exactly(Entry.Size)

            if TypeCode >= RsrcUserTypeBase:
                Entry.TypeName = ResourceTypes[TypeCode - RsrcUserTypeBase]
                try:
                    Deserialized = BinaryFormatterParser(
                        Entry.Data,
                        ignore_errors=False,
                        dereference=False,
                        keep_meta=False
                    )
                except Exception as error:
                    Entry.Error = F'Failed to deserialize entry data: {error}'
                    continue
                try:
                    _, _, _, Data = Deserialized
                except ValueError:
                    Entry.Error = F'Deserialized entry has {len(Deserialized)} records, 4 were expected.'
                    continue
                if not buffer_contains(Entry.Data, Data):
                    Entry.Error = 'The computed entry value is not a substring of the entry data.'
                    Entry.Value = Entry.Data
                else:
                    Entry.Value = Data
            else:
                try:
                    Type = RsrcPrimitive(TypeCode)
                except ValueError:
                    Entry.TypeName = F'UnknownType[{TypeCode:#x}]'
                else:
                    Entry.TypeName = Type.name
                    package = DotNetStructReader(Entry.Value)
                    Entry.Value = RsrcPrimitiveDispatch[Type](package)

Ancestors

DotNetStruct
Struct
typing.Generic
collections.abc.Buffer

Static methods

def Parse(reader, *args, **kwargs)