Module refinery.units.formats.pe.dotnet.dnarrays
Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import collections
import functools
import re
import itertools
import json
from refinery.units import Unit
from refinery.lib.dotnet.header import DotNetHeader, NetMetaDataTables
from refinery.lib.structures import StructReader
from refinery.lib.types import ByteStr
class dnarrays(Unit):
"""
Extracts arrays of strings or integers that are encoded in the .NET binary as IL opcodes.
The data is exported as JSON.
"""
@staticmethod
def _read_int(reader: StructReader):
value = reader.read_byte() - 0x16
if value < 0:
raise ValueError
elif value <= 8:
return value
elif value == 9:
return reader.read_byte()
elif value == 10:
return reader.u32()
else:
raise ValueError
@staticmethod
def _read_str(reader: StructReader, header: DotNetHeader):
if reader.read_byte() != 0x72:
raise ValueError
token: int = reader.read_integer(24)
value: str = header.meta.Streams.US[token]
if reader.read_byte() != 0x70:
raise ValueError
return value
_STACK_ARRAY_PATTERN_STR = re.compile(
BR'''(?x)
(?: [\x16-\x1E]|\x1F.|\x20.{4} ) # load array length
(?: \x8D...\x01 ) # newarr System.String
(?:
(?: \x25 ) # dup
(?: [\x16-\x1E]|\x1F.|\x20.{4} ) # load integer index
(?: \x72...\x70 ) # load the string
(?: \xA2 ) # stelem.ref
){4,}
''', flags=re.DOTALL)
def _str_arrays(self, data: ByteStr, header: DotNetHeader, tables: NetMetaDataTables):
for match in self._STACK_ARRAY_PATTERN_STR.finditer(data):
reader = StructReader(match[0])
result: list[str] = []
size = self._read_int(reader)
if reader.read_byte() != 0x8D:
raise RuntimeError
stt = reader.read_integer(24)
if reader.read_byte() != 0x01:
raise RuntimeError
if stt < 1 or tables.TypeRef[stt - 1].TypeName != 'String':
continue
self.log_info(F'str array pattern at 0x{match.start():X}, size {size}')
for k in range(size):
if reader.read_byte() != 0x25:
raise RuntimeError
if self._read_int(reader) != k:
break
result.append(self._read_str(reader, header))
if reader.read_byte() != 0xA2:
raise RuntimeError
else:
yield match.start(), result
_STACK_ARRAY_PATTERN_INT = re.compile(
BR'''(?x)
( \x12.|\xFE\x0D.. ) # load array variable
(?: [\x16-\x1E]|\x1F.|\x20.{4} ) # push integer value
(?: \x52 ) # store value into array
(?:
(?: \1 ) # load same array variable
(?: [\x16-\x1E]|\x1F.|\x20.{4} ) # load integer index
(?: \x58 ) # add; compute offset
(?: [\x16-\x1E]|\x1F.|\x20.{4} ) # push integer value
(?: \x52 ) # store value into array
){4,}
''', flags=re.DOTALL)
def _int_arrays(self, data: ByteStr, header: DotNetHeader, tables: NetMetaDataTables):
for match in self._STACK_ARRAY_PATTERN_INT.finditer(data):
self.log_info(F'int array pattern at 0x{match.start():X}')
reader = StructReader(match[0])
result: list[int] = []
opc, = reader.peek(1)
skip = {0x12: 2, 0xFE: 4}[opc]
reader.seekrel(skip)
for index in itertools.count(1):
result.append(self._read_int(reader))
assert reader.read_byte() == 0x52
if reader.eof:
yield match.start(), result
break
reader.seekrel(skip)
if self._read_int(reader) != index:
self.log_info('index inconsistency; aborting')
break
assert reader.read_byte() == 0x58
def process(self, data):
@functools.lru_cache(maxsize=None)
def method(offset: int):
rva = header.pe.get_rva_from_offset(offset)
method = min(tables.MethodDef, key=lambda m: (m.RVA > rva, rva - m.RVA))
return method.Name
header = DotNetHeader(data)
tables = header.meta.Streams.Tables
arrays = dict(itertools.chain(
self._int_arrays(data, header, tables),
self._str_arrays(data, header, tables),
))
result = collections.defaultdict(list)
for offset in sorted(arrays):
result[method(offset)].append(arrays[offset])
result = {m: {F'v{k}': v for k, v in enumerate(t, 1)} for m, t in result.items()}
return json.dumps(result, indent=4).encode(self.codec)
Classes
class dnarrays
-
Extracts arrays of strings or integers that are encoded in the .NET binary as IL opcodes. The data is exported as JSON.
Expand source code Browse git
class dnarrays(Unit): """ Extracts arrays of strings or integers that are encoded in the .NET binary as IL opcodes. The data is exported as JSON. """ @staticmethod def _read_int(reader: StructReader): value = reader.read_byte() - 0x16 if value < 0: raise ValueError elif value <= 8: return value elif value == 9: return reader.read_byte() elif value == 10: return reader.u32() else: raise ValueError @staticmethod def _read_str(reader: StructReader, header: DotNetHeader): if reader.read_byte() != 0x72: raise ValueError token: int = reader.read_integer(24) value: str = header.meta.Streams.US[token] if reader.read_byte() != 0x70: raise ValueError return value _STACK_ARRAY_PATTERN_STR = re.compile( BR'''(?x) (?: [\x16-\x1E]|\x1F.|\x20.{4} ) # load array length (?: \x8D...\x01 ) # newarr System.String (?: (?: \x25 ) # dup (?: [\x16-\x1E]|\x1F.|\x20.{4} ) # load integer index (?: \x72...\x70 ) # load the string (?: \xA2 ) # stelem.ref ){4,} ''', flags=re.DOTALL) def _str_arrays(self, data: ByteStr, header: DotNetHeader, tables: NetMetaDataTables): for match in self._STACK_ARRAY_PATTERN_STR.finditer(data): reader = StructReader(match[0]) result: list[str] = [] size = self._read_int(reader) if reader.read_byte() != 0x8D: raise RuntimeError stt = reader.read_integer(24) if reader.read_byte() != 0x01: raise RuntimeError if stt < 1 or tables.TypeRef[stt - 1].TypeName != 'String': continue self.log_info(F'str array pattern at 0x{match.start():X}, size {size}') for k in range(size): if reader.read_byte() != 0x25: raise RuntimeError if self._read_int(reader) != k: break result.append(self._read_str(reader, header)) if reader.read_byte() != 0xA2: raise RuntimeError else: yield match.start(), result _STACK_ARRAY_PATTERN_INT = re.compile( BR'''(?x) ( \x12.|\xFE\x0D.. ) # load array variable (?: [\x16-\x1E]|\x1F.|\x20.{4} ) # push integer value (?: \x52 ) # store value into array (?: (?: \1 ) # load same array variable (?: [\x16-\x1E]|\x1F.|\x20.{4} ) # load integer index (?: \x58 ) # add; compute offset (?: [\x16-\x1E]|\x1F.|\x20.{4} ) # push integer value (?: \x52 ) # store value into array ){4,} ''', flags=re.DOTALL) def _int_arrays(self, data: ByteStr, header: DotNetHeader, tables: NetMetaDataTables): for match in self._STACK_ARRAY_PATTERN_INT.finditer(data): self.log_info(F'int array pattern at 0x{match.start():X}') reader = StructReader(match[0]) result: list[int] = [] opc, = reader.peek(1) skip = {0x12: 2, 0xFE: 4}[opc] reader.seekrel(skip) for index in itertools.count(1): result.append(self._read_int(reader)) assert reader.read_byte() == 0x52 if reader.eof: yield match.start(), result break reader.seekrel(skip) if self._read_int(reader) != index: self.log_info('index inconsistency; aborting') break assert reader.read_byte() == 0x58 def process(self, data): @functools.lru_cache(maxsize=None) def method(offset: int): rva = header.pe.get_rva_from_offset(offset) method = min(tables.MethodDef, key=lambda m: (m.RVA > rva, rva - m.RVA)) return method.Name header = DotNetHeader(data) tables = header.meta.Streams.Tables arrays = dict(itertools.chain( self._int_arrays(data, header, tables), self._str_arrays(data, header, tables), )) result = collections.defaultdict(list) for offset in sorted(arrays): result[method(offset)].append(arrays[offset]) result = {m: {F'v{k}': v for k, v in enumerate(t, 1)} for m, t in result.items()} return json.dumps(result, indent=4).encode(self.codec)
Ancestors
Class variables
var required_dependencies
var optional_dependencies
Inherited members