Module refinery.lib.executable
This module implements an abstraction layer executable loader for PE, ELF, and MachO files. The provided interface is the same for all executables. It powers the following units:
Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This module implements an abstraction layer executable loader for PE, ELF, and MachO files.
The provided interface is the same for all executables. It powers the following units:
- `refinery.vsnip`
- `refinery.vsect`
- `refinery.vaddr`
- `refinery.vmemref`
"""
from __future__ import annotations
import sys
import re
import itertools
from typing import TYPE_CHECKING, ClassVar, NamedTuple
from os import devnull as DEVNULL
from abc import ABC, abstractmethod
from enum import Enum
from functools import lru_cache
from uuid import uuid4
from macholib.MachO import load_command, MachO, MachOHeader
from pefile import PE as PEFile, SectionStructure, MACHINE_TYPE, DIRECTORY_ENTRY
from elftools.elf.elffile import ELFFile, SymbolTableSection
from refinery.lib.structures import MemoryFile
from refinery.lib.types import INF, ByteStr
if TYPE_CHECKING:
from typing import Type, Callable, ParamSpec, TypeVar, Generator, Optional, Union, Iterable, List
_T = TypeVar('_T')
_P = ParamSpec('_P')
class ParsingFailure(ValueError):
"""
Exception generated for parsing errors of an input `refinery.lib.executable.Executable`.
"""
def __init__(self, kind):
super().__init__(F'unable to parse input as {kind} file')
_MACHO_ARCHS = {
1 : 'VAX',
6 : 'MC680x0',
7 : 'X86',
16777223 : 'X86_64',
10 : 'MC98000',
11 : 'HPPA',
12 : 'ARM',
13 : 'MC88000',
14 : 'SPARC',
15 : 'I860',
18 : 'POWERPC',
16777234 : 'POWERPC64',
}
def align(alignment: int, value: int, down=False) -> int:
"""
Given an alignment size and an integer value, compute the byte boundary to where this value
would be aligned. By default, the next higher address that satisfies the alignment is computed;
The optional parameter `down` can be set to `True` to instead return the next lower one.
"""
if alignment >= 2:
incomplete_chunk_count = value % alignment
if incomplete_chunk_count > 0:
if not down:
value += alignment - incomplete_chunk_count
else:
value -= incomplete_chunk_count
return value
def exeroute(
data : bytearray,
handler_elf : Callable[_P, _T],
handler_macho : Callable[_P, _T],
handler_pe : Callable[_P, _T],
*args,
**kwargs
) -> _T:
"""
Given some input `data` representing the raw bytes of an `refinery.lib.executable.Executable`,
route this data to one of three handlers for the ELF, MachO, or PE format. All additional
(keyword) arguments are forwarded to the handler. The function checks for well-known signature
bytes and magic numbers to route the data.
"""
if data[:2] == B'MZ':
try:
parsed = PEFile(data=data, fast_load=True)
except Exception as E:
raise ParsingFailure('PE') from E
else:
return handler_pe(parsed, *args, **kwargs)
if data[:4] == B'\x7FELF':
try:
parsed = ELFFile(MemoryFile(data))
except Exception as E:
raise ParsingFailure('ELF') from E
else:
return handler_elf(parsed, *args, **kwargs)
if set(data[:4]) <= {0xFE, 0xED, 0xFA, 0xCE, 0xCF}:
class InMemoryMachO(MachO):
def __init__(self): super().__init__(DEVNULL)
def load(self, _): return super().load(MemoryFile(data))
try:
parsed = InMemoryMachO()
assert parsed.headers
except Exception as E:
raise ParsingFailure('MachO') from E
else:
return handler_macho(parsed, *args, **kwargs)
raise ValueError('Unknown executable format')
class Range(NamedTuple):
"""
A range of bytes specified by a lower and an upper bound. A `refinery.lib.executable.Range`
can be subtracted from another one to return a list of ranges that are the result of
removing the former from the latter. This operation is the only reason for using a custom
class over the builtin `range` object, which does not support this.
"""
lower: int
upper: int
def range(self):
"""
Convertsion to a `range` object.
"""
return range(self.lower, self.upper)
def slice(self):
"""
Conversion to a `slice` object.
"""
return slice(self.lower, self.upper)
def __len__(self):
return self.upper - self.lower
def __contains__(self, addr: int):
return self.lower <= addr < self.upper
def __str__(self):
return F'0x{self.lower:X}:0x{self.upper:X}'
def __repr__(self):
return F'<{self.__class__.__name__}:{self!s}>'
def __sub__(self, them: Range) -> List[Range]:
pieces = []
if self.lower < them.lower:
pieces.append(Range(self.lower, min(them.lower, self.upper)))
if them.upper < self.upper:
pieces.append(Range(max(self.lower, them.upper), self.upper))
return pieces
class BoxedOffset(NamedTuple):
"""
An offset together with a range of available bytes at that location.
"""
box: Range
position: int
def __str__(self):
return F'0x{self.position:X} in {self.box!s}'
def __repr__(self):
return F'<{self.__class__.__name__}:{self!s}>'
class Location(NamedTuple):
"""
A location in an `refinery.lib.executable.Executable`. Contains `refinery.lib.executable.BoxedOffset`
for both its physical and virtual range of bytes.
"""
physical: BoxedOffset
virtual: BoxedOffset
def __str__(self):
return F'V={self.virtual!s}; P={self.physical!s}'
def __repr__(self):
return F'<{self.__class__.__name__}:{self!s}>'
class ArchItem(NamedTuple):
"""
An item of the `refinery.lib.executable.Arch` enumeration. It is used to store the register
size in bits for a given architecture.
"""
id: int
pointer_size: int
@classmethod
def New(cls, pointer_size: int):
return cls(uuid4(), pointer_size)
class Arch(ArchItem, Enum):
"""
An enumeration of supported architectures and their register sizes.
"""
X32 = ArchItem.New(32)
X64 = ArchItem.New(64)
ARM32 = ArchItem.New(32)
ARM64 = ArchItem.New(64)
MIPS16 = ArchItem.New(16)
MIPS32 = ArchItem.New(32)
MIPS64 = ArchItem.New(64)
PPC32 = ArchItem.New(32)
PPC64 = ArchItem.New(64)
SPARC32 = ArchItem.New(32)
SPARC64 = ArchItem.New(64)
class LT(str, Enum):
"""
An enumeration to distinguish between physical and virtual address types.
"""
PHYSICAL = 'offset'
VIRTUAL = 'address'
class ET(str, Enum):
"""
An enumeration to distinguish various executable types.
"""
ELF = 'ELF'
MachO = 'MachO'
PE = 'PE'
BLOB = 'BLOB'
class BO(str, Enum):
"""
An enumeration to distinguish big and little endian.
"""
BE = 'big'
LE = 'little'
class Section(NamedTuple):
"""
An abstract representation of a section inside an `refinery.lib.executable.Executable`.
"""
name: str
physical: Range
virtual: Range
synthetic: bool
def as_segment(self, populate_sections=False) -> Segment:
sections = [self] if populate_sections else None
return Segment(self.physical, self.virtual, sections, self.name)
def __str__(self):
return str(self.as_segment())
def __repr__(self):
return F'<{self.__class__.__name__}:{self!s}>'
class Symbol(NamedTuple):
address: int
name: Optional[str] = None
code: bool = True
exported: bool = True
meta: Optional[dict] = None
def get_name(self, default: str = 'entry'):
return self.name or default
def get_meta(self):
return self.meta or {}
class Segment(NamedTuple):
"""
An abstract representation of a segment inside an `refinery.lib.executable.Executable`.
"""
physical: Range
virtual: Range
sections: Optional[List[Section]]
name: Optional[str] = None
def as_section(self) -> Section:
if self.name is None:
raise ValueError('Unable to convert nameless segment to section.')
return Section(self.name, self.physical, self.virtual, False)
def __str__(self):
msg = F'P=[{self.physical!s}];V=[{self.virtual!s}]'
if self.name is not None:
msg = F'{self.name}:{msg}'
return msg
def __repr__(self):
return F'<{self.__class__.__name__}:{self!s}>'
class CompartmentNotFound(LookupError):
"""
This exception is raised when `refinery.lib.executable.Executable.lookup_location` fails to
find a `refinery.lib.executable.Segment` that contains the given location.
"""
def __init__(self, lt: LT, location: int):
super().__init__(F'Unable to find a segment that contains the {lt.value} 0x{location:X}.')
self.location_type = lt
self.location = location
class Executable(ABC):
"""
An abstract representation of a parsed executable in memory.
"""
_data: ByteStr
_head: Union[PEFile, ELFFile, MachO]
_base: Optional[int]
_type: ET
blob: ClassVar[bool] = False
@classmethod
def Load(cls: Type[_T], data: ByteStr, base: Optional[int] = None) -> _T:
"""
Uses the `refinery.lib.executable.exeroute` function to parse the input data with one of
the following specializations of this class:
- `refinery.lib.executable.ExecutableELF`
- `refinery.lib.executable.ExecutableMachO`
- `refinery.lib.executable.ExecutablePE`
"""
return exeroute(
data,
ExecutableELF,
ExecutableMachO,
ExecutablePE,
data,
base,
)
def __init__(self, head: Union[PEFile, ELFFile, MachO], data: ByteStr, base: Optional[int] = None):
self._data = data
self._head = head
self._base = base
@property
def head(self):
"""
Return the internal object representing the parsed file format header.
"""
return self._head
@property
def type(self):
"""
Returns the `refinery.lib.executable.ET` instance that identifies the executable type.
"""
return self._type
def __getitem__(self, key: Union[int, slice, Range]):
return self.read(key)
def __contains__(self, key: Union[int, slice, Range]):
try:
self.read(key)
except LookupError:
return False
else:
return True
def read(self, key: Union[int, slice, Range]) -> memoryview:
"""
Read data from the binary based on a given address. If the input `key` is a single integer,
the function reads a single byte from the given address.
"""
if isinstance(key, Range):
key = slice(key.lower, key.upper)
elif isinstance(key, int):
key = slice(key, key + 1, 1)
if key.start is None:
raise LookupError(R'Slice indices with unspecified start are not supported.')
if key.stop is not None and key.stop < key.start:
raise LookupError(R'The slice end must lie after the slice start.')
box = self.location_from_address(key.start)
if key.stop is None:
end = box.physical.box.upper
elif key.stop <= box.virtual.box.upper:
end = box.physical.position + (key.stop - key.start)
else:
raise LookupError(F'The end address 0x{key.stop:X} is beyond the section end 0x{box.virtual.box.upper:X}.')
return self.data[box.physical.position:end]
@staticmethod
def ascii(string: Union[str, ByteStr]) -> str:
"""
If the input `string` is a `str` instance, the function returns the input value. Byte
strings are truncated to the first occurrence of a null byte and then decoded using
the `latin-1` codec.
"""
if isinstance(string, str):
return string
for k, b in enumerate(string):
if b == 0:
string = string[:k]
break
return string.decode('latin-1')
def rebase_usr_to_img(self, addr: int) -> int:
return addr - self.base + self.image_defined_base()
def rebase_img_to_usr(self, addr: int) -> int:
return addr - self.image_defined_base() + self.base
@property
def base(self) -> int:
"""
Return the base address when mapped to memory. This is either the value passed to the
constructor, or `refinery.lib.exectuable.Executable.image_defined_base`.
"""
if self._base is None:
return self.image_defined_base()
return self._base
@base.setter
def base(self, value: int):
self._base = value
@property
def data(self) -> memoryview:
"""
Return a (readonly) view to the raw bytes of the executable image.
"""
view = memoryview(self._data)
if sys.version_info >= (3, 8):
view = view.toreadonly()
return view
@property
def pointer_size(self) -> int:
"""
Return the size of a pointer in bits. Depends on `refinery.lib.executable.Executable.arch`.
"""
return self.arch().pointer_size
def location_from_address(self, address: int) -> Location:
"""
Return a `refinery.lib.executable.Location` from the given address.
"""
return self.lookup_location(address, LT.VIRTUAL)
def location_from_offset(self, offset: int) -> Location:
"""
Return a `refinery.lib.executable.Location` from the given file offset.
"""
return self.lookup_location(offset, LT.PHYSICAL)
def image_defined_size(self) -> int:
"""
Returns the size of the executable on disk.
"""
size = 0
for segment in self.segments():
size = max(size, segment.physical.upper)
for section in self.sections():
size = max(size, section.physical.upper)
return size
def image_defined_address_space(self) -> Range:
"""
Returns the size of the executalbe in memory.
"""
upper = 0
lower = INF
for segment in self.segments():
upper = max(upper, segment.virtual.upper)
lower = min(lower, segment.virtual.lower)
for section in self.sections():
upper = max(upper, section.virtual.upper)
lower = min(lower, section.virtual.lower)
if upper < lower:
raise RuntimeError(F'The computed address space upper bound 0x{upper:X} is less than the computed lower bound 0x{lower:X}.')
return Range(lower, upper)
def lookup_location(self, location: int, lt: LT) -> Location:
"""
For a address or file offset, compute the corresponding `refinery.lib.executable.Location`.
"""
for part in itertools.chain(self.sections(), self.segments()):
phys = part.physical
virt = part.virtual
if lt is LT.PHYSICAL and location in phys:
return Location(
BoxedOffset(phys, location),
BoxedOffset(virt, virt.lower + location - phys.lower)
)
if lt is LT.VIRTUAL and location in virt:
return Location(
BoxedOffset(phys, phys.lower + location - virt.lower),
BoxedOffset(virt, location)
)
else:
raise CompartmentNotFound(lt, location)
@abstractmethod
def symbols(self) -> Generator[Symbol, None, None]:
"""
Generates a list of symbols in the executable.
"""
...
@abstractmethod
def byte_order(self) -> BO:
"""
The byte order used by the architecture of this executable.
"""
...
@abstractmethod
def image_defined_base(self) -> int:
"""
The image defined base address when mapped to memory.
"""
...
@abstractmethod
def arch(self) -> Arch:
"""
The architecture for which this executable was built.
"""
...
@abstractmethod
def _sections(self) -> Generator[Section, None, None]:
...
@abstractmethod
def _segments(self, populate_sections=False) -> Generator[Segment, None, None]:
...
def segments(self, populate_sections=False) -> Generator[Segment, None, None]:
"""
An iterable of all `refinery.lib.executable.Segment`s in this executable.
"""
yield from self._segments(populate_sections=populate_sections)
def sections(self) -> Generator[Section, None, None]:
"""
An iterable of all `refinery.lib.executable.Section`s in this executable.
"""
ib = self.image_defined_base()
missing = [Range(0, len(self._data))]
offsets = {}
for section in self._sections():
missing = [piece for patch in missing for piece in patch - section.physical]
offsets[section.physical.lower] = section.virtual.lower
yield section
if not missing:
return
offsets.setdefault(0, ib)
for gap in missing:
p_floor = min((k for k in offsets if k <= gap.lower), key=lambda p: p - gap.lower)
v_floor = offsets[p_floor]
v_lower = v_floor + (gap.lower - p_floor)
v_upper = v_lower + len(gap)
if gap.lower == 0:
name = R'synthesized/.header'
elif gap.upper == len(self._data):
name = R'synthesized/.overlay'
elif any(self._data[gap.slice()]):
name = F'synthesized/.gap-{gap.lower:08X}-{gap.upper:08X}'
else:
name = F'synthesized/.zeros-{gap.lower:08X}'
yield Section(name, gap, Range(v_lower, v_upper), True)
class ExecutableCodeBlob(Executable):
"""
A dummy specialization of `refinery.lib.executable.Executable` that represents an unstructured
blob of (shell)code. All information that would usually be obtained from a file header must be
provided in the constructor for this object.
"""
_head: Type[None] = None
_type = ET.BLOB
_byte_order: BO
_arch: Arch
blob = True
def __init__(self, data, base=None, arch: Arch = Arch.X32, byte_order: BO = BO.LE):
super().__init__(None, data, base)
self._byte_order = byte_order
self._arch = arch
def image_defined_base(self) -> int:
return 0
def byte_order(self) -> BO:
return self._byte_order
def arch(self) -> Arch:
return self._arch
def symbols(self) -> Generator[Symbol, None, None]:
yield Symbol(0)
def _sections(self) -> Generator[Section, None, None]:
v = Range(self.base, self.base + len(self.data))
p = Range(0, len(self.data))
yield Section('blob', p, v, False)
def _segments(self, populate_sections=False) -> Generator[Segment, None, None]:
for s in self.sections():
yield s.as_segment(populate_sections=populate_sections)
class ExecutablePE(Executable):
"""
A Windows Portable Executable (PE) file.
"""
_head: PEFile
_type = ET.PE
def image_defined_base(self) -> int:
return self._head.OPTIONAL_HEADER.ImageBase
def image_defined_size(self, overlay=True, sections=True, directories=True, certificate=True, memdump=False) -> int:
"""
This fuction determines the size of a PE file, optionally taking into account the
pefile module overlay computation, section information, data directory information,
and certificate entries.
"""
pe = self._head
overlay_value = overlay and pe.get_overlay_data_start_offset() or 0
sections_value = sections and super().image_defined_size() or 0
memdump_value = memdump and self.image_defined_address_space().upper or 0
cert_entry = pe.OPTIONAL_HEADER.DATA_DIRECTORY[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY']]
if directories:
directories_value = max((
pe.get_offset_from_rva(d.VirtualAddress) + d.Size
for d in pe.OPTIONAL_HEADER.DATA_DIRECTORY
if d.name != 'IMAGE_DIRECTORY_ENTRY_SECURITY'
), default=0)
if certificate:
# The certificate overlay is given as a file offset
# rather than a virtual address.
cert_value = cert_entry.VirtualAddress + cert_entry.Size
else:
cert_value = 0
directories_value = max(directories_value, cert_value)
else:
directories_value = 0
return max(
overlay_value,
sections_value,
directories_value,
memdump_value
)
def _sections(self) -> Generator[Section, None, None]:
sections: Iterable[SectionStructure] = iter(self._head.sections)
ib = self.image_defined_base()
for section in sections:
p_lower = section.PointerToRawData
p_upper = p_lower + section.SizeOfRawData
v_lower = section.VirtualAddress + ib
v_lower = self.rebase_img_to_usr(v_lower)
v_upper = v_lower + section.Misc_VirtualSize
p = Range(p_lower, p_upper)
v = Range(v_lower, v_upper)
yield Section(self.ascii(section.Name), p, v, False)
def _segments(self, populate_sections=False) -> Generator[Segment, None, None]:
for section in self.sections():
yield section.as_segment(populate_sections)
def arch(self) -> Arch:
arch = self._head.FILE_HEADER.Machine
arch = MACHINE_TYPE[arch]
try:
return {
'IMAGE_FILE_MACHINE_I386' : Arch.X32,
'IMAGE_FILE_MACHINE_AMD64' : Arch.X64,
'IMAGE_FILE_MACHINE_ARM' : Arch.ARM32,
'IMAGE_FILE_MACHINE_THUMB' : Arch.ARM32,
'IMAGE_FILE_MACHINE_ARMNT' : Arch.ARM64,
'IMAGE_FILE_MACHINE_MIPS16' : Arch.MIPS16,
}[arch]
except KeyError:
raise LookupError(F'Unsupported architecture: {arch}')
def byte_order(self) -> BO:
return BO.LE
def symbols(self) -> Generator[Symbol, None, None]:
base = self.image_defined_base()
head = self._head
yield Symbol(head.OPTIONAL_HEADER.AddressOfEntryPoint + base)
head.parse_data_directories(directories=[
DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'],
DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'],
DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT'],
])
try:
exports = head.DIRECTORY_ENTRY_EXPORT.symbols
except AttributeError:
return
for exp in exports:
name = exp.name
if not name:
continue
yield Symbol(exp.address + base, name.decode('ascii'))
for itype in ['IMPORT', 'DELAY_IMPORT']:
try:
imports = getattr(head, F'DIRECTORY_ENTRY_{itype}').imports
except AttributeError:
continue
for idd in imports:
dll: str = idd.dll.decode('ascii')
if dll.lower().endswith('.dll'):
dll = dll[:-4]
for imp in idd.imports:
if name := imp.name:
name = name.decode('ascii')
yield Symbol(imp.address, name, exported=False)
class ExecutableELF(Executable):
"""
A file in Executable and Linkable Format (ELF).
"""
_head: ELFFile
_type = ET.ELF
@lru_cache(maxsize=1)
def image_defined_base(self) -> int:
return min(self._pt_load(), default=0)
@lru_cache(maxsize=1)
def _pt_load(self):
PT_LOAD = {}
if not self._head.num_segments():
raise LookupError('The elftools parser did not find any segments in this file.')
for segment in self._head.iter_segments():
if segment.header.p_type == 'PT_LOAD':
PT_LOAD[segment.header.p_vaddr] = segment
if not PT_LOAD:
raise LookupError('Could not find any PT_LOAD segment.')
return PT_LOAD
def _convert_section(self, section) -> Section:
p_lower = section['sh_offset']
v_lower = section['sh_addr']
v_lower = self.rebase_img_to_usr(v_lower)
v_upper = v_lower + align(section['sh_addralign'], section.data_size)
p_upper = p_lower + section.data_size
return Section(self.ascii(section.name), Range(p_lower, p_upper), Range(v_lower, v_upper), False)
def _sections(self) -> Generator[Section, None, None]:
for section in self._head.iter_sections():
if section.is_null():
continue
yield self._convert_section(section)
def _segments(self, populate_sections=False) -> Generator[Segment, None, None]:
for segment in self._head.iter_segments():
header = segment.header
p_lower = header.p_offset
v_lower = header.p_vaddr
v_lower = self.rebase_img_to_usr(v_lower)
p_upper = p_lower + header.p_filesz
v_upper = v_lower + header.p_memsz
if not populate_sections:
sections = None
else:
sections = [
self._convert_section(section)
for section in self._head.iter_sections()
if segment.section_in_segment(section)
]
yield Segment(Range(p_lower, p_upper), Range(v_lower, v_upper), sections)
def arch(self) -> Arch:
arch = self._head.header['e_machine']
try:
return {
'EM_SPARC' : Arch.SPARC32,
'EM_SPARCV9' : Arch.SPARC64,
'EM_386' : Arch.X32,
'EM_X86_64' : Arch.X64,
'EM_MIPS' : Arch.MIPS32,
'EM_PPC' : Arch.PPC32,
'EM_PPC64' : Arch.PPC64,
'EM_ARM' : Arch.ARM32,
}[arch]
except KeyError:
raise LookupError(F'Unsupported architecture: {arch}')
def byte_order(self) -> BO:
return BO.LE if self.head.little_endian else BO.BE
def symbols(self) -> Generator[Symbol, None, None]:
ee = self._head.header['e_entry']
symbols = {ee: Symbol(ee)}
try:
sections = list(self._head.iter_sections())
except Exception:
return
for section in sections:
if not isinstance(section, SymbolTableSection):
continue
if section['sh_entsize'] == 0:
continue
for sym in section.iter_symbols():
st_name = sym.name
if sym['st_info']['type'] == 'STT_SECTION' and sym['st_shndx'] < len(sections) and sym['st_name'] == 0:
try:
st_name = self._head.get_section(sym['st_shndx']).name
except Exception:
pass
st_addr = sym['st_value']
st_name = re.sub('[\x01-\x1f]+', '', st_name)
st_type = sym['st_info']['type']
st_bind = sym['st_info']['bind']
st_size = sym['st_size']
insert = False
try:
prev = symbols[st_addr]
except KeyError:
insert = True
else:
insert = prev.name is None or len(prev.name) < len(st_name)
if insert:
symbols[st_addr] = Symbol(
st_addr,
st_name,
st_type == 'STT_FUNC',
st_bind == 'STB_GLOBAL',
dict(
st_type=st_type,
st_bind=st_bind,
st_size=st_size,
)
)
for addr in sorted(symbols):
yield symbols[addr]
class ExecutableMachO(Executable):
"""
A MachO-executable.
"""
_head: MachO
_type = ET.MachO
def symbols(self) -> Generator[Symbol, None, None]:
raise NotImplementedError
@lru_cache(maxsize=1)
def image_defined_base(self) -> int:
return min(seg.vmaddr for seg, _ in self._macho_segments() if seg.vmaddr > 0)
def _macho_segments(self):
headers: List[MachOHeader] = self._head.headers
for header in headers:
for cmd, segment, sections in header.commands:
cmd: load_command
if not cmd.get_cmd_name().startswith('LC_SEGMENT'):
continue
if segment.filesize <= 0:
continue
yield segment, sections
def _segments(self, populate_sections=False) -> Generator[Segment, None, None]:
for segment, sections in self._macho_segments():
v_lower = segment.vmaddr
v_lower = self.rebase_img_to_usr(v_lower)
p_lower = segment.fileoff
v_upper = v_lower + segment.vmsize
p_upper = p_lower + segment.filesize
segment_name = self.ascii(segment.segname)
if not populate_sections:
sections = None
else:
sections = [
self._convert_section(section, segment_name)
for section in sections
]
yield Segment(
Range(p_lower, p_upper),
Range(v_lower, v_upper),
sections,
segment_name
)
def _sections(self) -> Generator[Section, None, None]:
for segment in self.segments(populate_sections=True):
yield segment.as_section()
yield from segment.sections
def _convert_section(self, section, segment: str) -> Section:
name = self.ascii(section.sectname)
p_lower = section.offset
v_lower = section.addr
v_lower = self.rebase_img_to_usr(v_lower)
p_upper = p_lower + section.size
v_upper = v_lower + align(section.align, section.size)
return Section(F'{segment}/{name}', Range(p_lower, p_upper), Range(v_lower, v_upper), False)
def arch(self) -> Arch:
cputype = self._head.headers[0].header.cputype
try:
arch = _MACHO_ARCHS[cputype]
except KeyError:
arch = F'UNKNOWN(0x{cputype:X})'
try:
return {
'X86' : Arch.X32,
'X86_64' : Arch.X64,
'ARM' : Arch.ARM32,
'SPARC' : Arch.SPARC32,
'POWERPC' : Arch.PPC32,
'POWERPC64' : Arch.PPC64,
}[arch]
except KeyError:
raise LookupError(F'Unsupported architecture: {arch}')
def byte_order(self) -> BO:
headers: List[MachOHeader] = self._head.headers
return {
'<': BO.LE,
'>': BO.BE,
}[headers[0].endian]
Functions
def align(alignment, value, down=False)
-
Given an alignment size and an integer value, compute the byte boundary to where this value would be aligned. By default, the next higher address that satisfies the alignment is computed; The optional parameter
down
can be set toTrue
to instead return the next lower one.Expand source code Browse git
def align(alignment: int, value: int, down=False) -> int: """ Given an alignment size and an integer value, compute the byte boundary to where this value would be aligned. By default, the next higher address that satisfies the alignment is computed; The optional parameter `down` can be set to `True` to instead return the next lower one. """ if alignment >= 2: incomplete_chunk_count = value % alignment if incomplete_chunk_count > 0: if not down: value += alignment - incomplete_chunk_count else: value -= incomplete_chunk_count return value
def exeroute(data, handler_elf, handler_macho, handler_pe, *args, **kwargs)
-
Given some input
data
representing the raw bytes of anExecutable
, route this data to one of three handlers for the ELF, MachO, or PE format. All additional (keyword) arguments are forwarded to the handler. The function checks for well-known signature bytes and magic numbers to route the data.Expand source code Browse git
def exeroute( data : bytearray, handler_elf : Callable[_P, _T], handler_macho : Callable[_P, _T], handler_pe : Callable[_P, _T], *args, **kwargs ) -> _T: """ Given some input `data` representing the raw bytes of an `refinery.lib.executable.Executable`, route this data to one of three handlers for the ELF, MachO, or PE format. All additional (keyword) arguments are forwarded to the handler. The function checks for well-known signature bytes and magic numbers to route the data. """ if data[:2] == B'MZ': try: parsed = PEFile(data=data, fast_load=True) except Exception as E: raise ParsingFailure('PE') from E else: return handler_pe(parsed, *args, **kwargs) if data[:4] == B'\x7FELF': try: parsed = ELFFile(MemoryFile(data)) except Exception as E: raise ParsingFailure('ELF') from E else: return handler_elf(parsed, *args, **kwargs) if set(data[:4]) <= {0xFE, 0xED, 0xFA, 0xCE, 0xCF}: class InMemoryMachO(MachO): def __init__(self): super().__init__(DEVNULL) def load(self, _): return super().load(MemoryFile(data)) try: parsed = InMemoryMachO() assert parsed.headers except Exception as E: raise ParsingFailure('MachO') from E else: return handler_macho(parsed, *args, **kwargs) raise ValueError('Unknown executable format')
Classes
class ParsingFailure (kind)
-
Exception generated for parsing errors of an input
Executable
.Expand source code Browse git
class ParsingFailure(ValueError): """ Exception generated for parsing errors of an input `refinery.lib.executable.Executable`. """ def __init__(self, kind): super().__init__(F'unable to parse input as {kind} file')
Ancestors
- builtins.ValueError
- builtins.Exception
- builtins.BaseException
class Range (lower, upper)
-
A range of bytes specified by a lower and an upper bound. A
Range
can be subtracted from another one to return a list of ranges that are the result of removing the former from the latter. This operation is the only reason for using a custom class over the builtinrange
object, which does not support this.Expand source code Browse git
class Range(NamedTuple): """ A range of bytes specified by a lower and an upper bound. A `refinery.lib.executable.Range` can be subtracted from another one to return a list of ranges that are the result of removing the former from the latter. This operation is the only reason for using a custom class over the builtin `range` object, which does not support this. """ lower: int upper: int def range(self): """ Convertsion to a `range` object. """ return range(self.lower, self.upper) def slice(self): """ Conversion to a `slice` object. """ return slice(self.lower, self.upper) def __len__(self): return self.upper - self.lower def __contains__(self, addr: int): return self.lower <= addr < self.upper def __str__(self): return F'0x{self.lower:X}:0x{self.upper:X}' def __repr__(self): return F'<{self.__class__.__name__}:{self!s}>' def __sub__(self, them: Range) -> List[Range]: pieces = [] if self.lower < them.lower: pieces.append(Range(self.lower, min(them.lower, self.upper))) if them.upper < self.upper: pieces.append(Range(max(self.lower, them.upper), self.upper)) return pieces
Ancestors
- builtins.tuple
Instance variables
var lower
-
Alias for field number 0
var upper
-
Alias for field number 1
Methods
def range(self)
-
Convertsion to a
range
object.Expand source code Browse git
def range(self): """ Convertsion to a `range` object. """ return range(self.lower, self.upper)
def slice(self)
-
Conversion to a
slice
object.Expand source code Browse git
def slice(self): """ Conversion to a `slice` object. """ return slice(self.lower, self.upper)
class BoxedOffset (box, position)
-
An offset together with a range of available bytes at that location.
Expand source code Browse git
class BoxedOffset(NamedTuple): """ An offset together with a range of available bytes at that location. """ box: Range position: int def __str__(self): return F'0x{self.position:X} in {self.box!s}' def __repr__(self): return F'<{self.__class__.__name__}:{self!s}>'
Ancestors
- builtins.tuple
Instance variables
var box
-
Alias for field number 0
var position
-
Alias for field number 1
class Location (physical, virtual)
-
A location in an
Executable
. ContainsBoxedOffset
for both its physical and virtual range of bytes.Expand source code Browse git
class Location(NamedTuple): """ A location in an `refinery.lib.executable.Executable`. Contains `refinery.lib.executable.BoxedOffset` for both its physical and virtual range of bytes. """ physical: BoxedOffset virtual: BoxedOffset def __str__(self): return F'V={self.virtual!s}; P={self.physical!s}' def __repr__(self): return F'<{self.__class__.__name__}:{self!s}>'
Ancestors
- builtins.tuple
Instance variables
var physical
-
Alias for field number 0
var virtual
-
Alias for field number 1
class ArchItem (id, pointer_size)
-
An item of the
Arch
enumeration. It is used to store the register size in bits for a given architecture.Expand source code Browse git
class ArchItem(NamedTuple): """ An item of the `refinery.lib.executable.Arch` enumeration. It is used to store the register size in bits for a given architecture. """ id: int pointer_size: int @classmethod def New(cls, pointer_size: int): return cls(uuid4(), pointer_size)
Ancestors
- builtins.tuple
Subclasses
Static methods
def New(pointer_size)
-
Expand source code Browse git
@classmethod def New(cls, pointer_size: int): return cls(uuid4(), pointer_size)
Instance variables
var id
-
Alias for field number 0
var pointer_size
-
Alias for field number 1
class Arch (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration of supported architectures and their register sizes.
Expand source code Browse git
class Arch(ArchItem, Enum): """ An enumeration of supported architectures and their register sizes. """ X32 = ArchItem.New(32) X64 = ArchItem.New(64) ARM32 = ArchItem.New(32) ARM64 = ArchItem.New(64) MIPS16 = ArchItem.New(16) MIPS32 = ArchItem.New(32) MIPS64 = ArchItem.New(64) PPC32 = ArchItem.New(32) PPC64 = ArchItem.New(64) SPARC32 = ArchItem.New(32) SPARC64 = ArchItem.New(64)
Ancestors
- ArchItem
- builtins.tuple
- enum.Enum
Class variables
var X32
var X64
var ARM32
var ARM64
var MIPS16
var MIPS32
var MIPS64
var PPC32
var PPC64
var SPARC32
var SPARC64
Inherited members
class LT (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration to distinguish between physical and virtual address types.
Expand source code Browse git
class LT(str, Enum): """ An enumeration to distinguish between physical and virtual address types. """ PHYSICAL = 'offset' VIRTUAL = 'address'
Ancestors
- builtins.str
- enum.Enum
Class variables
var PHYSICAL
var VIRTUAL
class ET (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration to distinguish various executable types.
Expand source code Browse git
class ET(str, Enum): """ An enumeration to distinguish various executable types. """ ELF = 'ELF' MachO = 'MachO' PE = 'PE' BLOB = 'BLOB'
Ancestors
- builtins.str
- enum.Enum
Class variables
var ELF
var MachO
var PE
var BLOB
class BO (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration to distinguish big and little endian.
Expand source code Browse git
class BO(str, Enum): """ An enumeration to distinguish big and little endian. """ BE = 'big' LE = 'little'
Ancestors
- builtins.str
- enum.Enum
Class variables
var BE
var LE
class Section (name, physical, virtual, synthetic)
-
An abstract representation of a section inside an
Executable
.Expand source code Browse git
class Section(NamedTuple): """ An abstract representation of a section inside an `refinery.lib.executable.Executable`. """ name: str physical: Range virtual: Range synthetic: bool def as_segment(self, populate_sections=False) -> Segment: sections = [self] if populate_sections else None return Segment(self.physical, self.virtual, sections, self.name) def __str__(self): return str(self.as_segment()) def __repr__(self): return F'<{self.__class__.__name__}:{self!s}>'
Ancestors
- builtins.tuple
Instance variables
var name
-
Alias for field number 0
var physical
-
Alias for field number 1
var virtual
-
Alias for field number 2
var synthetic
-
Alias for field number 3
Methods
def as_segment(self, populate_sections=False)
-
Expand source code Browse git
def as_segment(self, populate_sections=False) -> Segment: sections = [self] if populate_sections else None return Segment(self.physical, self.virtual, sections, self.name)
class Symbol (address, name=None, code=True, exported=True, meta=None)
-
Symbol(address, name, code, exported, meta)
Expand source code Browse git
class Symbol(NamedTuple): address: int name: Optional[str] = None code: bool = True exported: bool = True meta: Optional[dict] = None def get_name(self, default: str = 'entry'): return self.name or default def get_meta(self): return self.meta or {}
Ancestors
- builtins.tuple
Instance variables
var address
-
Alias for field number 0
var name
-
Alias for field number 1
var code
-
Alias for field number 2
var exported
-
Alias for field number 3
var meta
-
Alias for field number 4
Methods
def get_name(self, default='entry')
-
Expand source code Browse git
def get_name(self, default: str = 'entry'): return self.name or default
def get_meta(self)
-
Expand source code Browse git
def get_meta(self): return self.meta or {}
class Segment (physical, virtual, sections, name=None)
-
An abstract representation of a segment inside an
Executable
.Expand source code Browse git
class Segment(NamedTuple): """ An abstract representation of a segment inside an `refinery.lib.executable.Executable`. """ physical: Range virtual: Range sections: Optional[List[Section]] name: Optional[str] = None def as_section(self) -> Section: if self.name is None: raise ValueError('Unable to convert nameless segment to section.') return Section(self.name, self.physical, self.virtual, False) def __str__(self): msg = F'P=[{self.physical!s}];V=[{self.virtual!s}]' if self.name is not None: msg = F'{self.name}:{msg}' return msg def __repr__(self): return F'<{self.__class__.__name__}:{self!s}>'
Ancestors
- builtins.tuple
Instance variables
var physical
-
Alias for field number 0
var virtual
-
Alias for field number 1
var sections
-
Alias for field number 2
var name
-
Alias for field number 3
Methods
def as_section(self)
-
Expand source code Browse git
def as_section(self) -> Section: if self.name is None: raise ValueError('Unable to convert nameless segment to section.') return Section(self.name, self.physical, self.virtual, False)
class CompartmentNotFound (lt, location)
-
This exception is raised when
Executable.lookup_location()
fails to find aSegment
that contains the given location.Expand source code Browse git
class CompartmentNotFound(LookupError): """ This exception is raised when `refinery.lib.executable.Executable.lookup_location` fails to find a `refinery.lib.executable.Segment` that contains the given location. """ def __init__(self, lt: LT, location: int): super().__init__(F'Unable to find a segment that contains the {lt.value} 0x{location:X}.') self.location_type = lt self.location = location
Ancestors
- builtins.LookupError
- builtins.Exception
- builtins.BaseException
class Executable (head, data, base=None)
-
An abstract representation of a parsed executable in memory.
Expand source code Browse git
class Executable(ABC): """ An abstract representation of a parsed executable in memory. """ _data: ByteStr _head: Union[PEFile, ELFFile, MachO] _base: Optional[int] _type: ET blob: ClassVar[bool] = False @classmethod def Load(cls: Type[_T], data: ByteStr, base: Optional[int] = None) -> _T: """ Uses the `refinery.lib.executable.exeroute` function to parse the input data with one of the following specializations of this class: - `refinery.lib.executable.ExecutableELF` - `refinery.lib.executable.ExecutableMachO` - `refinery.lib.executable.ExecutablePE` """ return exeroute( data, ExecutableELF, ExecutableMachO, ExecutablePE, data, base, ) def __init__(self, head: Union[PEFile, ELFFile, MachO], data: ByteStr, base: Optional[int] = None): self._data = data self._head = head self._base = base @property def head(self): """ Return the internal object representing the parsed file format header. """ return self._head @property def type(self): """ Returns the `refinery.lib.executable.ET` instance that identifies the executable type. """ return self._type def __getitem__(self, key: Union[int, slice, Range]): return self.read(key) def __contains__(self, key: Union[int, slice, Range]): try: self.read(key) except LookupError: return False else: return True def read(self, key: Union[int, slice, Range]) -> memoryview: """ Read data from the binary based on a given address. If the input `key` is a single integer, the function reads a single byte from the given address. """ if isinstance(key, Range): key = slice(key.lower, key.upper) elif isinstance(key, int): key = slice(key, key + 1, 1) if key.start is None: raise LookupError(R'Slice indices with unspecified start are not supported.') if key.stop is not None and key.stop < key.start: raise LookupError(R'The slice end must lie after the slice start.') box = self.location_from_address(key.start) if key.stop is None: end = box.physical.box.upper elif key.stop <= box.virtual.box.upper: end = box.physical.position + (key.stop - key.start) else: raise LookupError(F'The end address 0x{key.stop:X} is beyond the section end 0x{box.virtual.box.upper:X}.') return self.data[box.physical.position:end] @staticmethod def ascii(string: Union[str, ByteStr]) -> str: """ If the input `string` is a `str` instance, the function returns the input value. Byte strings are truncated to the first occurrence of a null byte and then decoded using the `latin-1` codec. """ if isinstance(string, str): return string for k, b in enumerate(string): if b == 0: string = string[:k] break return string.decode('latin-1') def rebase_usr_to_img(self, addr: int) -> int: return addr - self.base + self.image_defined_base() def rebase_img_to_usr(self, addr: int) -> int: return addr - self.image_defined_base() + self.base @property def base(self) -> int: """ Return the base address when mapped to memory. This is either the value passed to the constructor, or `refinery.lib.exectuable.Executable.image_defined_base`. """ if self._base is None: return self.image_defined_base() return self._base @base.setter def base(self, value: int): self._base = value @property def data(self) -> memoryview: """ Return a (readonly) view to the raw bytes of the executable image. """ view = memoryview(self._data) if sys.version_info >= (3, 8): view = view.toreadonly() return view @property def pointer_size(self) -> int: """ Return the size of a pointer in bits. Depends on `refinery.lib.executable.Executable.arch`. """ return self.arch().pointer_size def location_from_address(self, address: int) -> Location: """ Return a `refinery.lib.executable.Location` from the given address. """ return self.lookup_location(address, LT.VIRTUAL) def location_from_offset(self, offset: int) -> Location: """ Return a `refinery.lib.executable.Location` from the given file offset. """ return self.lookup_location(offset, LT.PHYSICAL) def image_defined_size(self) -> int: """ Returns the size of the executable on disk. """ size = 0 for segment in self.segments(): size = max(size, segment.physical.upper) for section in self.sections(): size = max(size, section.physical.upper) return size def image_defined_address_space(self) -> Range: """ Returns the size of the executalbe in memory. """ upper = 0 lower = INF for segment in self.segments(): upper = max(upper, segment.virtual.upper) lower = min(lower, segment.virtual.lower) for section in self.sections(): upper = max(upper, section.virtual.upper) lower = min(lower, section.virtual.lower) if upper < lower: raise RuntimeError(F'The computed address space upper bound 0x{upper:X} is less than the computed lower bound 0x{lower:X}.') return Range(lower, upper) def lookup_location(self, location: int, lt: LT) -> Location: """ For a address or file offset, compute the corresponding `refinery.lib.executable.Location`. """ for part in itertools.chain(self.sections(), self.segments()): phys = part.physical virt = part.virtual if lt is LT.PHYSICAL and location in phys: return Location( BoxedOffset(phys, location), BoxedOffset(virt, virt.lower + location - phys.lower) ) if lt is LT.VIRTUAL and location in virt: return Location( BoxedOffset(phys, phys.lower + location - virt.lower), BoxedOffset(virt, location) ) else: raise CompartmentNotFound(lt, location) @abstractmethod def symbols(self) -> Generator[Symbol, None, None]: """ Generates a list of symbols in the executable. """ ... @abstractmethod def byte_order(self) -> BO: """ The byte order used by the architecture of this executable. """ ... @abstractmethod def image_defined_base(self) -> int: """ The image defined base address when mapped to memory. """ ... @abstractmethod def arch(self) -> Arch: """ The architecture for which this executable was built. """ ... @abstractmethod def _sections(self) -> Generator[Section, None, None]: ... @abstractmethod def _segments(self, populate_sections=False) -> Generator[Segment, None, None]: ... def segments(self, populate_sections=False) -> Generator[Segment, None, None]: """ An iterable of all `refinery.lib.executable.Segment`s in this executable. """ yield from self._segments(populate_sections=populate_sections) def sections(self) -> Generator[Section, None, None]: """ An iterable of all `refinery.lib.executable.Section`s in this executable. """ ib = self.image_defined_base() missing = [Range(0, len(self._data))] offsets = {} for section in self._sections(): missing = [piece for patch in missing for piece in patch - section.physical] offsets[section.physical.lower] = section.virtual.lower yield section if not missing: return offsets.setdefault(0, ib) for gap in missing: p_floor = min((k for k in offsets if k <= gap.lower), key=lambda p: p - gap.lower) v_floor = offsets[p_floor] v_lower = v_floor + (gap.lower - p_floor) v_upper = v_lower + len(gap) if gap.lower == 0: name = R'synthesized/.header' elif gap.upper == len(self._data): name = R'synthesized/.overlay' elif any(self._data[gap.slice()]): name = F'synthesized/.gap-{gap.lower:08X}-{gap.upper:08X}' else: name = F'synthesized/.zeros-{gap.lower:08X}' yield Section(name, gap, Range(v_lower, v_upper), True)
Ancestors
- abc.ABC
Subclasses
Class variables
var blob
Static methods
def Load(data, base=None)
-
Uses the
exeroute()
function to parse the input data with one of the following specializations of this class:Expand source code Browse git
@classmethod def Load(cls: Type[_T], data: ByteStr, base: Optional[int] = None) -> _T: """ Uses the `refinery.lib.executable.exeroute` function to parse the input data with one of the following specializations of this class: - `refinery.lib.executable.ExecutableELF` - `refinery.lib.executable.ExecutableMachO` - `refinery.lib.executable.ExecutablePE` """ return exeroute( data, ExecutableELF, ExecutableMachO, ExecutablePE, data, base, )
def ascii(string)
-
If the input
string
is astr
instance, the function returns the input value. Byte strings are truncated to the first occurrence of a null byte and then decoded using thelatin-1
codec.Expand source code Browse git
@staticmethod def ascii(string: Union[str, ByteStr]) -> str: """ If the input `string` is a `str` instance, the function returns the input value. Byte strings are truncated to the first occurrence of a null byte and then decoded using the `latin-1` codec. """ if isinstance(string, str): return string for k, b in enumerate(string): if b == 0: string = string[:k] break return string.decode('latin-1')
Instance variables
var head
-
Return the internal object representing the parsed file format header.
Expand source code Browse git
@property def head(self): """ Return the internal object representing the parsed file format header. """ return self._head
var type
-
Returns the
ET
instance that identifies the executable type.Expand source code Browse git
@property def type(self): """ Returns the `refinery.lib.executable.ET` instance that identifies the executable type. """ return self._type
var base
-
Return the base address when mapped to memory. This is either the value passed to the constructor, or
refinery.lib.exectuable.Executable.image_defined_base
.Expand source code Browse git
@property def base(self) -> int: """ Return the base address when mapped to memory. This is either the value passed to the constructor, or `refinery.lib.exectuable.Executable.image_defined_base`. """ if self._base is None: return self.image_defined_base() return self._base
var data
-
Return a (readonly) view to the raw bytes of the executable image.
Expand source code Browse git
@property def data(self) -> memoryview: """ Return a (readonly) view to the raw bytes of the executable image. """ view = memoryview(self._data) if sys.version_info >= (3, 8): view = view.toreadonly() return view
var pointer_size
-
Return the size of a pointer in bits. Depends on
Executable.arch()
.Expand source code Browse git
@property def pointer_size(self) -> int: """ Return the size of a pointer in bits. Depends on `refinery.lib.executable.Executable.arch`. """ return self.arch().pointer_size
Methods
def read(self, key)
-
Read data from the binary based on a given address. If the input
key
is a single integer, the function reads a single byte from the given address.Expand source code Browse git
def read(self, key: Union[int, slice, Range]) -> memoryview: """ Read data from the binary based on a given address. If the input `key` is a single integer, the function reads a single byte from the given address. """ if isinstance(key, Range): key = slice(key.lower, key.upper) elif isinstance(key, int): key = slice(key, key + 1, 1) if key.start is None: raise LookupError(R'Slice indices with unspecified start are not supported.') if key.stop is not None and key.stop < key.start: raise LookupError(R'The slice end must lie after the slice start.') box = self.location_from_address(key.start) if key.stop is None: end = box.physical.box.upper elif key.stop <= box.virtual.box.upper: end = box.physical.position + (key.stop - key.start) else: raise LookupError(F'The end address 0x{key.stop:X} is beyond the section end 0x{box.virtual.box.upper:X}.') return self.data[box.physical.position:end]
def rebase_usr_to_img(self, addr)
-
Expand source code Browse git
def rebase_usr_to_img(self, addr: int) -> int: return addr - self.base + self.image_defined_base()
def rebase_img_to_usr(self, addr)
-
Expand source code Browse git
def rebase_img_to_usr(self, addr: int) -> int: return addr - self.image_defined_base() + self.base
def location_from_address(self, address)
-
Return a
Location
from the given address.Expand source code Browse git
def location_from_address(self, address: int) -> Location: """ Return a `refinery.lib.executable.Location` from the given address. """ return self.lookup_location(address, LT.VIRTUAL)
def location_from_offset(self, offset)
-
Return a
Location
from the given file offset.Expand source code Browse git
def location_from_offset(self, offset: int) -> Location: """ Return a `refinery.lib.executable.Location` from the given file offset. """ return self.lookup_location(offset, LT.PHYSICAL)
def image_defined_size(self)
-
Returns the size of the executable on disk.
Expand source code Browse git
def image_defined_size(self) -> int: """ Returns the size of the executable on disk. """ size = 0 for segment in self.segments(): size = max(size, segment.physical.upper) for section in self.sections(): size = max(size, section.physical.upper) return size
def image_defined_address_space(self)
-
Returns the size of the executalbe in memory.
Expand source code Browse git
def image_defined_address_space(self) -> Range: """ Returns the size of the executalbe in memory. """ upper = 0 lower = INF for segment in self.segments(): upper = max(upper, segment.virtual.upper) lower = min(lower, segment.virtual.lower) for section in self.sections(): upper = max(upper, section.virtual.upper) lower = min(lower, section.virtual.lower) if upper < lower: raise RuntimeError(F'The computed address space upper bound 0x{upper:X} is less than the computed lower bound 0x{lower:X}.') return Range(lower, upper)
def lookup_location(self, location, lt)
-
For a address or file offset, compute the corresponding
Location
.Expand source code Browse git
def lookup_location(self, location: int, lt: LT) -> Location: """ For a address or file offset, compute the corresponding `refinery.lib.executable.Location`. """ for part in itertools.chain(self.sections(), self.segments()): phys = part.physical virt = part.virtual if lt is LT.PHYSICAL and location in phys: return Location( BoxedOffset(phys, location), BoxedOffset(virt, virt.lower + location - phys.lower) ) if lt is LT.VIRTUAL and location in virt: return Location( BoxedOffset(phys, phys.lower + location - virt.lower), BoxedOffset(virt, location) ) else: raise CompartmentNotFound(lt, location)
def symbols(self)
-
Generates a list of symbols in the executable.
Expand source code Browse git
@abstractmethod def symbols(self) -> Generator[Symbol, None, None]: """ Generates a list of symbols in the executable. """ ...
def byte_order(self)
-
The byte order used by the architecture of this executable.
Expand source code Browse git
@abstractmethod def byte_order(self) -> BO: """ The byte order used by the architecture of this executable. """ ...
def image_defined_base(self)
-
The image defined base address when mapped to memory.
Expand source code Browse git
@abstractmethod def image_defined_base(self) -> int: """ The image defined base address when mapped to memory. """ ...
def arch(self)
-
The architecture for which this executable was built.
Expand source code Browse git
@abstractmethod def arch(self) -> Arch: """ The architecture for which this executable was built. """ ...
def segments(self, populate_sections=False)
-
An iterable of all
Segment
s in this executable.Expand source code Browse git
def segments(self, populate_sections=False) -> Generator[Segment, None, None]: """ An iterable of all `refinery.lib.executable.Segment`s in this executable. """ yield from self._segments(populate_sections=populate_sections)
def sections(self)
-
An iterable of all
Section
s in this executable.Expand source code Browse git
def sections(self) -> Generator[Section, None, None]: """ An iterable of all `refinery.lib.executable.Section`s in this executable. """ ib = self.image_defined_base() missing = [Range(0, len(self._data))] offsets = {} for section in self._sections(): missing = [piece for patch in missing for piece in patch - section.physical] offsets[section.physical.lower] = section.virtual.lower yield section if not missing: return offsets.setdefault(0, ib) for gap in missing: p_floor = min((k for k in offsets if k <= gap.lower), key=lambda p: p - gap.lower) v_floor = offsets[p_floor] v_lower = v_floor + (gap.lower - p_floor) v_upper = v_lower + len(gap) if gap.lower == 0: name = R'synthesized/.header' elif gap.upper == len(self._data): name = R'synthesized/.overlay' elif any(self._data[gap.slice()]): name = F'synthesized/.gap-{gap.lower:08X}-{gap.upper:08X}' else: name = F'synthesized/.zeros-{gap.lower:08X}' yield Section(name, gap, Range(v_lower, v_upper), True)
class ExecutableCodeBlob (data, base=None, arch=Arch.X32, byte_order=BO.LE)
-
A dummy specialization of
Executable
that represents an unstructured blob of (shell)code. All information that would usually be obtained from a file header must be provided in the constructor for this object.Expand source code Browse git
class ExecutableCodeBlob(Executable): """ A dummy specialization of `refinery.lib.executable.Executable` that represents an unstructured blob of (shell)code. All information that would usually be obtained from a file header must be provided in the constructor for this object. """ _head: Type[None] = None _type = ET.BLOB _byte_order: BO _arch: Arch blob = True def __init__(self, data, base=None, arch: Arch = Arch.X32, byte_order: BO = BO.LE): super().__init__(None, data, base) self._byte_order = byte_order self._arch = arch def image_defined_base(self) -> int: return 0 def byte_order(self) -> BO: return self._byte_order def arch(self) -> Arch: return self._arch def symbols(self) -> Generator[Symbol, None, None]: yield Symbol(0) def _sections(self) -> Generator[Section, None, None]: v = Range(self.base, self.base + len(self.data)) p = Range(0, len(self.data)) yield Section('blob', p, v, False) def _segments(self, populate_sections=False) -> Generator[Segment, None, None]: for s in self.sections(): yield s.as_segment(populate_sections=populate_sections)
Ancestors
- Executable
- abc.ABC
Class variables
var blob
Inherited members
class ExecutablePE (head, data, base=None)
-
A Windows Portable Executable (PE) file.
Expand source code Browse git
class ExecutablePE(Executable): """ A Windows Portable Executable (PE) file. """ _head: PEFile _type = ET.PE def image_defined_base(self) -> int: return self._head.OPTIONAL_HEADER.ImageBase def image_defined_size(self, overlay=True, sections=True, directories=True, certificate=True, memdump=False) -> int: """ This fuction determines the size of a PE file, optionally taking into account the pefile module overlay computation, section information, data directory information, and certificate entries. """ pe = self._head overlay_value = overlay and pe.get_overlay_data_start_offset() or 0 sections_value = sections and super().image_defined_size() or 0 memdump_value = memdump and self.image_defined_address_space().upper or 0 cert_entry = pe.OPTIONAL_HEADER.DATA_DIRECTORY[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY']] if directories: directories_value = max(( pe.get_offset_from_rva(d.VirtualAddress) + d.Size for d in pe.OPTIONAL_HEADER.DATA_DIRECTORY if d.name != 'IMAGE_DIRECTORY_ENTRY_SECURITY' ), default=0) if certificate: # The certificate overlay is given as a file offset # rather than a virtual address. cert_value = cert_entry.VirtualAddress + cert_entry.Size else: cert_value = 0 directories_value = max(directories_value, cert_value) else: directories_value = 0 return max( overlay_value, sections_value, directories_value, memdump_value ) def _sections(self) -> Generator[Section, None, None]: sections: Iterable[SectionStructure] = iter(self._head.sections) ib = self.image_defined_base() for section in sections: p_lower = section.PointerToRawData p_upper = p_lower + section.SizeOfRawData v_lower = section.VirtualAddress + ib v_lower = self.rebase_img_to_usr(v_lower) v_upper = v_lower + section.Misc_VirtualSize p = Range(p_lower, p_upper) v = Range(v_lower, v_upper) yield Section(self.ascii(section.Name), p, v, False) def _segments(self, populate_sections=False) -> Generator[Segment, None, None]: for section in self.sections(): yield section.as_segment(populate_sections) def arch(self) -> Arch: arch = self._head.FILE_HEADER.Machine arch = MACHINE_TYPE[arch] try: return { 'IMAGE_FILE_MACHINE_I386' : Arch.X32, 'IMAGE_FILE_MACHINE_AMD64' : Arch.X64, 'IMAGE_FILE_MACHINE_ARM' : Arch.ARM32, 'IMAGE_FILE_MACHINE_THUMB' : Arch.ARM32, 'IMAGE_FILE_MACHINE_ARMNT' : Arch.ARM64, 'IMAGE_FILE_MACHINE_MIPS16' : Arch.MIPS16, }[arch] except KeyError: raise LookupError(F'Unsupported architecture: {arch}') def byte_order(self) -> BO: return BO.LE def symbols(self) -> Generator[Symbol, None, None]: base = self.image_defined_base() head = self._head yield Symbol(head.OPTIONAL_HEADER.AddressOfEntryPoint + base) head.parse_data_directories(directories=[ DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_EXPORT'], DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_IMPORT'], DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT'], ]) try: exports = head.DIRECTORY_ENTRY_EXPORT.symbols except AttributeError: return for exp in exports: name = exp.name if not name: continue yield Symbol(exp.address + base, name.decode('ascii')) for itype in ['IMPORT', 'DELAY_IMPORT']: try: imports = getattr(head, F'DIRECTORY_ENTRY_{itype}').imports except AttributeError: continue for idd in imports: dll: str = idd.dll.decode('ascii') if dll.lower().endswith('.dll'): dll = dll[:-4] for imp in idd.imports: if name := imp.name: name = name.decode('ascii') yield Symbol(imp.address, name, exported=False)
Ancestors
- Executable
- abc.ABC
Methods
def image_defined_size(self, overlay=True, sections=True, directories=True, certificate=True, memdump=False)
-
This fuction determines the size of a PE file, optionally taking into account the pefile module overlay computation, section information, data directory information, and certificate entries.
Expand source code Browse git
def image_defined_size(self, overlay=True, sections=True, directories=True, certificate=True, memdump=False) -> int: """ This fuction determines the size of a PE file, optionally taking into account the pefile module overlay computation, section information, data directory information, and certificate entries. """ pe = self._head overlay_value = overlay and pe.get_overlay_data_start_offset() or 0 sections_value = sections and super().image_defined_size() or 0 memdump_value = memdump and self.image_defined_address_space().upper or 0 cert_entry = pe.OPTIONAL_HEADER.DATA_DIRECTORY[DIRECTORY_ENTRY['IMAGE_DIRECTORY_ENTRY_SECURITY']] if directories: directories_value = max(( pe.get_offset_from_rva(d.VirtualAddress) + d.Size for d in pe.OPTIONAL_HEADER.DATA_DIRECTORY if d.name != 'IMAGE_DIRECTORY_ENTRY_SECURITY' ), default=0) if certificate: # The certificate overlay is given as a file offset # rather than a virtual address. cert_value = cert_entry.VirtualAddress + cert_entry.Size else: cert_value = 0 directories_value = max(directories_value, cert_value) else: directories_value = 0 return max( overlay_value, sections_value, directories_value, memdump_value )
Inherited members
class ExecutableELF (head, data, base=None)
-
A file in Executable and Linkable Format (ELF).
Expand source code Browse git
class ExecutableELF(Executable): """ A file in Executable and Linkable Format (ELF). """ _head: ELFFile _type = ET.ELF @lru_cache(maxsize=1) def image_defined_base(self) -> int: return min(self._pt_load(), default=0) @lru_cache(maxsize=1) def _pt_load(self): PT_LOAD = {} if not self._head.num_segments(): raise LookupError('The elftools parser did not find any segments in this file.') for segment in self._head.iter_segments(): if segment.header.p_type == 'PT_LOAD': PT_LOAD[segment.header.p_vaddr] = segment if not PT_LOAD: raise LookupError('Could not find any PT_LOAD segment.') return PT_LOAD def _convert_section(self, section) -> Section: p_lower = section['sh_offset'] v_lower = section['sh_addr'] v_lower = self.rebase_img_to_usr(v_lower) v_upper = v_lower + align(section['sh_addralign'], section.data_size) p_upper = p_lower + section.data_size return Section(self.ascii(section.name), Range(p_lower, p_upper), Range(v_lower, v_upper), False) def _sections(self) -> Generator[Section, None, None]: for section in self._head.iter_sections(): if section.is_null(): continue yield self._convert_section(section) def _segments(self, populate_sections=False) -> Generator[Segment, None, None]: for segment in self._head.iter_segments(): header = segment.header p_lower = header.p_offset v_lower = header.p_vaddr v_lower = self.rebase_img_to_usr(v_lower) p_upper = p_lower + header.p_filesz v_upper = v_lower + header.p_memsz if not populate_sections: sections = None else: sections = [ self._convert_section(section) for section in self._head.iter_sections() if segment.section_in_segment(section) ] yield Segment(Range(p_lower, p_upper), Range(v_lower, v_upper), sections) def arch(self) -> Arch: arch = self._head.header['e_machine'] try: return { 'EM_SPARC' : Arch.SPARC32, 'EM_SPARCV9' : Arch.SPARC64, 'EM_386' : Arch.X32, 'EM_X86_64' : Arch.X64, 'EM_MIPS' : Arch.MIPS32, 'EM_PPC' : Arch.PPC32, 'EM_PPC64' : Arch.PPC64, 'EM_ARM' : Arch.ARM32, }[arch] except KeyError: raise LookupError(F'Unsupported architecture: {arch}') def byte_order(self) -> BO: return BO.LE if self.head.little_endian else BO.BE def symbols(self) -> Generator[Symbol, None, None]: ee = self._head.header['e_entry'] symbols = {ee: Symbol(ee)} try: sections = list(self._head.iter_sections()) except Exception: return for section in sections: if not isinstance(section, SymbolTableSection): continue if section['sh_entsize'] == 0: continue for sym in section.iter_symbols(): st_name = sym.name if sym['st_info']['type'] == 'STT_SECTION' and sym['st_shndx'] < len(sections) and sym['st_name'] == 0: try: st_name = self._head.get_section(sym['st_shndx']).name except Exception: pass st_addr = sym['st_value'] st_name = re.sub('[\x01-\x1f]+', '', st_name) st_type = sym['st_info']['type'] st_bind = sym['st_info']['bind'] st_size = sym['st_size'] insert = False try: prev = symbols[st_addr] except KeyError: insert = True else: insert = prev.name is None or len(prev.name) < len(st_name) if insert: symbols[st_addr] = Symbol( st_addr, st_name, st_type == 'STT_FUNC', st_bind == 'STB_GLOBAL', dict( st_type=st_type, st_bind=st_bind, st_size=st_size, ) ) for addr in sorted(symbols): yield symbols[addr]
Ancestors
- Executable
- abc.ABC
Inherited members
class ExecutableMachO (head, data, base=None)
-
A MachO-executable.
Expand source code Browse git
class ExecutableMachO(Executable): """ A MachO-executable. """ _head: MachO _type = ET.MachO def symbols(self) -> Generator[Symbol, None, None]: raise NotImplementedError @lru_cache(maxsize=1) def image_defined_base(self) -> int: return min(seg.vmaddr for seg, _ in self._macho_segments() if seg.vmaddr > 0) def _macho_segments(self): headers: List[MachOHeader] = self._head.headers for header in headers: for cmd, segment, sections in header.commands: cmd: load_command if not cmd.get_cmd_name().startswith('LC_SEGMENT'): continue if segment.filesize <= 0: continue yield segment, sections def _segments(self, populate_sections=False) -> Generator[Segment, None, None]: for segment, sections in self._macho_segments(): v_lower = segment.vmaddr v_lower = self.rebase_img_to_usr(v_lower) p_lower = segment.fileoff v_upper = v_lower + segment.vmsize p_upper = p_lower + segment.filesize segment_name = self.ascii(segment.segname) if not populate_sections: sections = None else: sections = [ self._convert_section(section, segment_name) for section in sections ] yield Segment( Range(p_lower, p_upper), Range(v_lower, v_upper), sections, segment_name ) def _sections(self) -> Generator[Section, None, None]: for segment in self.segments(populate_sections=True): yield segment.as_section() yield from segment.sections def _convert_section(self, section, segment: str) -> Section: name = self.ascii(section.sectname) p_lower = section.offset v_lower = section.addr v_lower = self.rebase_img_to_usr(v_lower) p_upper = p_lower + section.size v_upper = v_lower + align(section.align, section.size) return Section(F'{segment}/{name}', Range(p_lower, p_upper), Range(v_lower, v_upper), False) def arch(self) -> Arch: cputype = self._head.headers[0].header.cputype try: arch = _MACHO_ARCHS[cputype] except KeyError: arch = F'UNKNOWN(0x{cputype:X})' try: return { 'X86' : Arch.X32, 'X86_64' : Arch.X64, 'ARM' : Arch.ARM32, 'SPARC' : Arch.SPARC32, 'POWERPC' : Arch.PPC32, 'POWERPC64' : Arch.PPC64, }[arch] except KeyError: raise LookupError(F'Unsupported architecture: {arch}') def byte_order(self) -> BO: headers: List[MachOHeader] = self._head.headers return { '<': BO.LE, '>': BO.BE, }[headers[0].endian]
Ancestors
- Executable
- abc.ABC
Inherited members