Module refinery.units.formats.pe.pefix

Expand source code Browse git
from __future__ import annotations

from enum import Enum

from refinery.lib.executable import align
from refinery.lib.shared import pefile
from refinery.lib.structures import StructReader
from refinery.lib.types import Param
from refinery.units import Arg, Unit


class ImgState(bytes, Enum):
    x32 = B'\x0B\x01'
    x64 = B'\x0B\x02'
    ROM = B'\x07\x01'


class MachineType(int, Enum):
    UNKNOWN     = 0x0000 # noqa
    I386        = 0x014C # noqa
    R3000       = 0x0162 # noqa
    R4000       = 0x0166 # noqa
    R10000      = 0x0168 # noqa
    WCEMIPSV2   = 0x0169 # noqa
    ALPHA       = 0x0184 # noqa
    SH3         = 0x01A2 # noqa
    SH3DSP      = 0x01A3 # noqa
    SH3E        = 0x01A4 # noqa
    SH4         = 0x01A6 # noqa
    SH5         = 0x01A8 # noqa
    ARM         = 0x01C0 # noqa
    THUMB       = 0x01C2 # noqa
    ARMNT       = 0x01C4 # noqa
    AM33        = 0x01D3 # noqa
    POWERPC     = 0x01F0 # noqa
    POWERPCFP   = 0x01F1 # noqa
    IA64        = 0x0200 # noqa
    MIPS16      = 0x0266 # noqa
    ALPHA64     = 0x0284 # noqa
    AXP64       = 0x0284 # noqa
    MIPSFPU     = 0x0366 # noqa
    MIPSFPU16   = 0x0466 # noqa
    TRICORE     = 0x0520 # noqa
    CEF         = 0x0CEF # noqa
    EBC         = 0x0EBC # noqa
    RISCV32     = 0x5032 # noqa
    RISCV64     = 0x5064 # noqa
    RISCV128    = 0x5128 # noqa
    LOONGARCH32 = 0x6232 # noqa
    LOONGARCH64 = 0x6264 # noqa
    AMD64       = 0x8664 # noqa
    M32R        = 0x9041 # noqa
    ARM64       = 0xAA64 # noqa
    CEE         = 0xC0EE # noqa


class pefix(Unit):
    """
    Take as input a buffer that represents a stripped PE file, i.e. magic numbers and other
    relevant parts of the header have been stripped. The unit attempts to repair the damage
    and return something that can be parsed.
    """
    def __init__(self, unmap: Param[bool, Arg.Switch('-u', help=(
        'Overwrite all section file start offsets with the virtual offset.'
    ))]):
        super().__init__(unmap=unmap)

    def process(self, data):
        sr = StructReader(data)
        sr.write(B'MZ')
        sr.seekset(0x3C)
        nt = sr.u16()
        oh = nt + 0x18
        sr.seekset(nt)
        sr.write(B'PE')
        sr.seekrel(2)
        mt = sr.u16()

        try:
            mt = MachineType(mt)
        except Exception:
            mt = None

        sr.seekset(oh)
        ms = bytes(sr.peek(2))

        try:
            ms = ImgState(ms)
        except ValueError:
            ms = {
                None: None,
                MachineType.I386  : ImgState.x32,
                MachineType.IA64  : ImgState.x64,
                MachineType.AMD64 : ImgState.x64,
            }.get(mt)

        if ms is None:
            self.log_warn('could not determine image state; nulling field')
            sr.write(B'\0\0')
        else:
            sr.write(ms.value)

        if mt is None:
            if mt := {
                None: None,
                ImgState.x32: MachineType.I386,
                ImgState.x64: MachineType.AMD64,
            }.get(ms):
                assert isinstance(mt, MachineType)
                sr.seekset(nt + 4)
                sr.write(mt.value.to_bytes(2, 'little'))

        pe = pefile.PE(data=data, fast_load=True)

        if (alignment := pe.OPTIONAL_HEADER.FileAlignment) not in {1 << k for k in range(9, 16)}:
            for k in range(9, 16):
                alignment = 1 << k
                size_of_headers = 0x28 * len(pe.sections) + oh + 0xF0
                soh = align(alignment, size_of_headers)
                if any(data[size_of_headers:soh]):
                    raise ValueError('nonzero bytes in what must be header padding')
                if any(data[soh:soh + 8]):
                    pe.OPTIONAL_HEADER.SizeOfHeaders = soh
                    break
            else:
                raise ValueError('unable to find a valid file alignment')

        pe.OPTIONAL_HEADER.FileAlignment = alignment
        pe.OPTIONAL_HEADER.SectionAlignment = max(pe.OPTIONAL_HEADER.SectionAlignment, alignment)

        if self.args.unmap:
            last = pe.OPTIONAL_HEADER.SizeOfImage
            for section in pe.sections:
                section.PointerToRawData = section.VirtualAddress
                section.SizeOfRawData = section.Misc_VirtualSize
                last = section.VirtualAddress + section.Misc_VirtualSize
            pe.OPTIONAL_HEADER.SizeOfImage = last

        return pe.write()

Classes

class ImgState (*args, **kwds)

bytes(iterable_of_ints) -> bytes bytes(string, encoding[, errors]) -> bytes bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer bytes(int) -> bytes object of size given by the parameter initialized with null bytes bytes() -> empty bytes object

Construct an immutable array of bytes from: - an iterable yielding integers in range(256) - a text string encoded using the specified encoding - any object implementing the buffer API. - an integer

Expand source code Browse git
class ImgState(bytes, Enum):
    x32 = B'\x0B\x01'
    x64 = B'\x0B\x02'
    ROM = B'\x07\x01'

Ancestors

  • builtins.bytes
  • enum.Enum

Class variables

var x32

The type of the None singleton.

var x64

The type of the None singleton.

var ROM

The type of the None singleton.

class MachineType (*args, **kwds)

int([x]) -> integer int(x, base=10) -> integer

Convert a number or string to an integer, or return 0 if no arguments are given. If x is a number, return x.int(). For floating-point numbers, this truncates towards zero.

If x is not a number or if base is given, then x must be a string, bytes, or bytearray instance representing an integer literal in the given base. The literal can be preceded by '+' or '-' and be surrounded by whitespace. The base defaults to 10. Valid bases are 0 and 2-36. Base 0 means to interpret the base from the string as an integer literal.

>>> int('0b100', base=0)
4
Expand source code Browse git
class MachineType(int, Enum):
    UNKNOWN     = 0x0000 # noqa
    I386        = 0x014C # noqa
    R3000       = 0x0162 # noqa
    R4000       = 0x0166 # noqa
    R10000      = 0x0168 # noqa
    WCEMIPSV2   = 0x0169 # noqa
    ALPHA       = 0x0184 # noqa
    SH3         = 0x01A2 # noqa
    SH3DSP      = 0x01A3 # noqa
    SH3E        = 0x01A4 # noqa
    SH4         = 0x01A6 # noqa
    SH5         = 0x01A8 # noqa
    ARM         = 0x01C0 # noqa
    THUMB       = 0x01C2 # noqa
    ARMNT       = 0x01C4 # noqa
    AM33        = 0x01D3 # noqa
    POWERPC     = 0x01F0 # noqa
    POWERPCFP   = 0x01F1 # noqa
    IA64        = 0x0200 # noqa
    MIPS16      = 0x0266 # noqa
    ALPHA64     = 0x0284 # noqa
    AXP64       = 0x0284 # noqa
    MIPSFPU     = 0x0366 # noqa
    MIPSFPU16   = 0x0466 # noqa
    TRICORE     = 0x0520 # noqa
    CEF         = 0x0CEF # noqa
    EBC         = 0x0EBC # noqa
    RISCV32     = 0x5032 # noqa
    RISCV64     = 0x5064 # noqa
    RISCV128    = 0x5128 # noqa
    LOONGARCH32 = 0x6232 # noqa
    LOONGARCH64 = 0x6264 # noqa
    AMD64       = 0x8664 # noqa
    M32R        = 0x9041 # noqa
    ARM64       = 0xAA64 # noqa
    CEE         = 0xC0EE # noqa

Ancestors

  • builtins.int
  • enum.Enum

Class variables

var UNKNOWN

The type of the None singleton.

var I386

The type of the None singleton.

var R3000

The type of the None singleton.

var R4000

The type of the None singleton.

var R10000

The type of the None singleton.

var WCEMIPSV2

The type of the None singleton.

var ALPHA

The type of the None singleton.

var SH3

The type of the None singleton.

var SH3DSP

The type of the None singleton.

var SH3E

The type of the None singleton.

var SH4

The type of the None singleton.

var SH5

The type of the None singleton.

var ARM

The type of the None singleton.

var THUMB

The type of the None singleton.

var ARMNT

The type of the None singleton.

var AM33

The type of the None singleton.

var POWERPC

The type of the None singleton.

var POWERPCFP

The type of the None singleton.

var IA64

The type of the None singleton.

var MIPS16

The type of the None singleton.

var ALPHA64

The type of the None singleton.

var AXP64

The type of the None singleton.

var MIPSFPU

The type of the None singleton.

var MIPSFPU16

The type of the None singleton.

var TRICORE

The type of the None singleton.

var CEF

The type of the None singleton.

var EBC

The type of the None singleton.

var RISCV32

The type of the None singleton.

var RISCV64

The type of the None singleton.

var RISCV128

The type of the None singleton.

var LOONGARCH32

The type of the None singleton.

var LOONGARCH64

The type of the None singleton.

var AMD64

The type of the None singleton.

var M32R

The type of the None singleton.

var ARM64

The type of the None singleton.

var CEE

The type of the None singleton.

class pefix (unmap)

Take as input a buffer that represents a stripped PE file, i.e. magic numbers and other relevant parts of the header have been stripped. The unit attempts to repair the damage and return something that can be parsed.

Expand source code Browse git
class pefix(Unit):
    """
    Take as input a buffer that represents a stripped PE file, i.e. magic numbers and other
    relevant parts of the header have been stripped. The unit attempts to repair the damage
    and return something that can be parsed.
    """
    def __init__(self, unmap: Param[bool, Arg.Switch('-u', help=(
        'Overwrite all section file start offsets with the virtual offset.'
    ))]):
        super().__init__(unmap=unmap)

    def process(self, data):
        sr = StructReader(data)
        sr.write(B'MZ')
        sr.seekset(0x3C)
        nt = sr.u16()
        oh = nt + 0x18
        sr.seekset(nt)
        sr.write(B'PE')
        sr.seekrel(2)
        mt = sr.u16()

        try:
            mt = MachineType(mt)
        except Exception:
            mt = None

        sr.seekset(oh)
        ms = bytes(sr.peek(2))

        try:
            ms = ImgState(ms)
        except ValueError:
            ms = {
                None: None,
                MachineType.I386  : ImgState.x32,
                MachineType.IA64  : ImgState.x64,
                MachineType.AMD64 : ImgState.x64,
            }.get(mt)

        if ms is None:
            self.log_warn('could not determine image state; nulling field')
            sr.write(B'\0\0')
        else:
            sr.write(ms.value)

        if mt is None:
            if mt := {
                None: None,
                ImgState.x32: MachineType.I386,
                ImgState.x64: MachineType.AMD64,
            }.get(ms):
                assert isinstance(mt, MachineType)
                sr.seekset(nt + 4)
                sr.write(mt.value.to_bytes(2, 'little'))

        pe = pefile.PE(data=data, fast_load=True)

        if (alignment := pe.OPTIONAL_HEADER.FileAlignment) not in {1 << k for k in range(9, 16)}:
            for k in range(9, 16):
                alignment = 1 << k
                size_of_headers = 0x28 * len(pe.sections) + oh + 0xF0
                soh = align(alignment, size_of_headers)
                if any(data[size_of_headers:soh]):
                    raise ValueError('nonzero bytes in what must be header padding')
                if any(data[soh:soh + 8]):
                    pe.OPTIONAL_HEADER.SizeOfHeaders = soh
                    break
            else:
                raise ValueError('unable to find a valid file alignment')

        pe.OPTIONAL_HEADER.FileAlignment = alignment
        pe.OPTIONAL_HEADER.SectionAlignment = max(pe.OPTIONAL_HEADER.SectionAlignment, alignment)

        if self.args.unmap:
            last = pe.OPTIONAL_HEADER.SizeOfImage
            for section in pe.sections:
                section.PointerToRawData = section.VirtualAddress
                section.SizeOfRawData = section.Misc_VirtualSize
                last = section.VirtualAddress + section.Misc_VirtualSize
            pe.OPTIONAL_HEADER.SizeOfImage = last

        return pe.write()

Ancestors

Subclasses

Inherited members