Module refinery.lib.scripts.js.deobfuscation.reflection
Inline reflectively executed JavaScript code: eval, Function constructor, constructor chains, and setTimeout/setInterval with string arguments. An obfuscator which wraps the entire program in
Function(param, code)(proxyObject)
is handled as a special case with automatic proxy object resolution.
Expand source code Browse git
"""
Inline reflectively executed JavaScript code: eval, Function constructor, constructor chains, and
setTimeout/setInterval with string arguments. An obfuscator which wraps the entire program in
Function(param, code)(proxyObject)
is handled as a special case with automatic proxy object resolution.
"""
from __future__ import annotations
from typing import NamedTuple
from refinery.lib.scripts import Expression, Node, _clone_node, _replace_in_parent
from refinery.lib.scripts.js.deobfuscation.helpers import (
ScriptLevelTransformer,
access_key,
get_body,
is_side_effect_free,
property_key,
string_value,
)
from refinery.lib.scripts.js.model import (
JsAssignmentExpression,
JsBlockStatement,
JsCallExpression,
JsExpressionStatement,
JsFunctionExpression,
JsIdentifier,
JsMemberExpression,
JsNewExpression,
JsObjectExpression,
JsParenthesizedExpression,
JsProperty,
JsPropertyKind,
JsReturnStatement,
JsScript,
JsSequenceExpression,
JsStringLiteral,
JsUnaryExpression,
Statement,
)
_GLOBAL_OBJECTS = frozenset({'window', 'globalThis', 'self', 'global'})
_TIMER_FUNCTIONS = frozenset({'setTimeout', 'setInterval'})
def _unwrap_parens(node: Expression) -> Expression:
while isinstance(node, JsParenthesizedExpression) and node.expression is not None:
node = node.expression
return node
def _try_parse(code: str) -> JsScript | None:
try:
from refinery.lib.scripts.js.parser import JsParser
parsed = JsParser(code).parse()
except Exception:
return None
if not parsed.body:
return None
return parsed
def _is_identifier(node: Node, name: str) -> bool:
return isinstance(node, JsIdentifier) and node.name == name
def _is_function_identifier(node: Expression) -> bool:
return _is_identifier(_unwrap_parens(node), 'Function')
def _extract_eval_code(node: JsCallExpression) -> str | None:
"""
Extract the code string from `eval("code")` or `(eval)("code")`.
"""
if node.callee is None:
return None
callee = _unwrap_parens(node.callee)
if not _is_identifier(callee, 'eval'):
return None
if len(node.arguments) != 1:
return None
return string_value(node.arguments[0])
def _extract_indirect_eval_code(node: JsCallExpression) -> str | None:
"""
Extract the code string from indirect eval patterns:
- `(0, eval)("code")`
- `window.eval("code")` / `globalThis.eval("code")`
"""
if len(node.arguments) != 1:
return None
callee = _unwrap_parens(node.callee) if node.callee is not None else None
if isinstance(callee, JsSequenceExpression):
exprs = callee.expressions
if len(exprs) >= 2 and _is_identifier(exprs[-1], 'eval'):
if all(is_side_effect_free(e) for e in exprs[:-1]):
return string_value(node.arguments[0])
if (
isinstance(callee, JsMemberExpression)
and isinstance(callee.object, JsIdentifier)
and callee.object.name in _GLOBAL_OBJECTS
and isinstance(callee.property, JsIdentifier)
and callee.property.name == 'eval'
and not callee.computed
):
return string_value(node.arguments[0])
return None
def _extract_timer_code(node: JsCallExpression) -> str | None:
"""
Extract the code string from `setTimeout("code", ...)` or `setInterval("code", ...)`.
"""
if node.callee is None:
return None
callee = _unwrap_parens(node.callee)
if not isinstance(callee, JsIdentifier) or callee.name not in _TIMER_FUNCTIONS:
return None
if not node.arguments:
return None
return string_value(node.arguments[0])
def _extract_function_body_code(
constructor_call: JsCallExpression | JsNewExpression,
) -> str | None:
"""
Extract the body code string from Function constructor calls:
Function("code")
Function("a", "b", "code")
new Function("code")
The last string argument is the function body; preceding string arguments are parameter
names (ignored for now).
"""
callee = constructor_call.callee
if callee is None:
return None
if not _is_function_identifier(callee):
return None
args = constructor_call.arguments
if not args:
return None
last = args[-1]
if not isinstance(last, JsStringLiteral):
return None
if not all(isinstance(a, JsStringLiteral) for a in args):
return None
return last.value
def _is_constructor_chain(node: JsCallExpression | JsNewExpression) -> bool:
"""
Detect a constructor chain callee pattern equivalent to `Function`:
<literal>.constructor.constructor
"""
callee = node.callee
if not isinstance(callee, JsMemberExpression):
return False
if access_key(callee) != 'constructor':
return False
inner = callee.object
if not isinstance(inner, JsMemberExpression):
return False
if access_key(inner) != 'constructor':
return False
base = inner.object
if base is None:
return False
return isinstance(base, (JsStringLiteral, JsIdentifier)) or is_side_effect_free(base)
def _extract_constructor_chain_code(node: JsCallExpression) -> str | None:
"""
Extract code from constructor chain IIFE patterns:
"".constructor.constructor("code")()
[].constructor.constructor("code")()
"""
inner_call = node.callee
if not isinstance(inner_call, JsCallExpression):
return None
if not _is_constructor_chain(inner_call):
return None
if len(inner_call.arguments) != 1:
return None
return string_value(inner_call.arguments[0])
def _extract_invoked_function_body(node: JsCallExpression) -> str | None:
"""
Extract code from immediately-invoked Function constructors:
Function("code")()
new Function("code")()
Function("a", "b", "code")(args)
"""
inner = node.callee
if not isinstance(inner, (JsCallExpression, JsNewExpression)):
return None
return _extract_function_body_code(inner)
def _extract_getter_target(func: Expression | None) -> str | JsUnaryExpression | None:
"""
Extract the value returned by a getter. Expected patterns:
- `{ return <identifier>; }` -> returns the identifier name as `str`
- a `typeof` expression -> returns a `JsUnaryExpression` clone
"""
if not isinstance(func, JsFunctionExpression):
return None
if func.body is None or not isinstance(func.body, JsBlockStatement):
return None
body = func.body.body
if len(body) != 1:
return None
stmt = body[0]
if not isinstance(stmt, JsReturnStatement) or stmt.argument is None:
return None
arg = stmt.argument
if isinstance(arg, JsIdentifier):
return arg.name
if (
isinstance(arg, JsUnaryExpression)
and arg.operator == 'typeof'
and isinstance(arg.operand, JsIdentifier)
):
return arg
return None
def _extract_setter_target(func: Expression | None) -> str | None:
"""
Extract the global assigned in a setter. Expected pattern:
{ return <global> = <param>; }
where the function has exactly one parameter.
"""
if not isinstance(func, JsFunctionExpression):
return None
if len(func.params) != 1 or not isinstance(func.params[0], JsIdentifier):
return None
param_name = func.params[0].name
if func.body is None or not isinstance(func.body, JsBlockStatement):
return None
body = func.body.body
if len(body) != 1:
return None
stmt = body[0]
if isinstance(stmt, JsReturnStatement):
expr = stmt.argument
elif isinstance(stmt, JsExpressionStatement):
expr = stmt.expression
else:
return None
if not isinstance(expr, JsAssignmentExpression) or expr.operator != '=':
return None
if not isinstance(expr.left, JsIdentifier):
return None
if not isinstance(expr.right, JsIdentifier) or expr.right.name != param_name:
return None
return expr.left.name
class _ProxyMapping(NamedTuple):
getters: dict[str, str | JsUnaryExpression]
setters: dict[str, str]
def _build_proxy_mapping(
obj: JsObjectExpression,
) -> _ProxyMapping | None:
"""
Build getter and setter mappings from a pack proxy object. Returns `(getters, setters)` or
`None` if any property is malformed.
"""
getters: dict[str, str | JsUnaryExpression] = {}
setters: dict[str, str] = {}
for prop in obj.properties:
if not isinstance(prop, JsProperty):
return None
key = property_key(prop)
if key is None:
return None
if prop.kind == JsPropertyKind.GET:
target = _extract_getter_target(prop.value)
if target is None:
return None
getters[key] = target
elif prop.kind == JsPropertyKind.SET:
target = _extract_setter_target(prop.value)
if target is None:
return None
setters[key] = target
else:
return None
return _ProxyMapping(getters, setters)
def _substitute_proxy_accesses(
parsed: JsScript,
param_name: str,
getters: dict[str, str | JsUnaryExpression],
setters: dict[str, str],
) -> bool:
"""
Replace all `param[key]` accesses in the parsed code with the resolved globals from the proxy
mapping. Returns `True` if every access was resolved successfully.
"""
for node in list(parsed.walk()):
if not isinstance(node, JsMemberExpression):
continue
if not isinstance(node.object, JsIdentifier) or node.object.name != param_name:
continue
key = access_key(node)
if key is None:
return False
parent = node.parent
if (
isinstance(parent, JsAssignmentExpression)
and parent.left is node
and parent.operator == '='
):
if key not in setters:
return False
_replace_in_parent(node, JsIdentifier(name=setters[key]))
else:
if key not in getters:
return False
target = getters[key]
if isinstance(target, str):
_replace_in_parent(node, JsIdentifier(name=target))
else:
_replace_in_parent(node, _clone_node(target))
return True
def _try_unpack_function_constructor(
node: JsCallExpression,
) -> list[Statement] | None:
"""
Unpack an immediately-invoked `Function` constructor whose single argument is a proxy object
with getter/setter properties that redirect to global variables:
Function("p", "p.abc = p.def(p.ghi)")(
{get abc() { return x }, set abc(v) { x = v }, get def() { return y }, ...}
)
Parses the code string, resolves all `p.key` accesses through the proxy mapping back to their
original global identifiers, and returns the recovered statement list. Returns `None` if the
node does not match or if any proxy access cannot be resolved.
"""
inner = node.callee
if not isinstance(inner, JsCallExpression):
return None
if inner.callee is None or not _is_function_identifier(inner.callee):
return None
if len(node.arguments) != 1 or not isinstance(node.arguments[0], JsObjectExpression):
return None
proxy_obj = node.arguments[0]
inner_args = inner.arguments
if len(inner_args) == 1:
param_name = ''
code = string_value(inner_args[0])
elif len(inner_args) == 2:
param_name = string_value(inner_args[0])
code = string_value(inner_args[1])
if param_name is None:
return None
else:
return None
if code is None:
return None
mapping = _build_proxy_mapping(proxy_obj)
if mapping is None:
return None
getters, setters = mapping
parsed = _try_parse(code)
if parsed is None:
return None
if param_name and not _substitute_proxy_accesses(parsed, param_name, getters, setters):
return None
return list(parsed.body)
def _is_pack_shaped(node: JsCallExpression) -> bool:
"""
Return `True` when the call has the shape of a pack pattern: the callee is a `Function()`
call and the outer argument is an object expression. When this shape is detected, the generic
function-body extraction should be skipped to avoid inlining code with unresolved proxy
references.
"""
inner = node.callee
if not isinstance(inner, JsCallExpression) or inner.callee is None:
return False
if not _is_function_identifier(inner.callee):
return False
return len(node.arguments) == 1 and isinstance(node.arguments[0], JsObjectExpression)
class JsReflectionInlining(ScriptLevelTransformer):
"""
Inline reflective code execution: `eval`, `Function` constructor, constructor chains, and
indirect invocation via `setTimeout` and `setInterval`.
"""
def _process_script(self, node: JsScript) -> None:
self._inline_statements(node)
self._inline_expressions(node)
def _inline_statements(self, root: JsScript) -> None:
for container in list(root.walk()):
body = get_body(container)
if body is None:
continue
is_script = isinstance(container, JsScript)
i = 0
while i < len(body):
parsed = self._try_resolve_statement(body[i])
if parsed is None:
i += 1
continue
if is_script:
parsed = self._sanitize_for_script_scope(parsed)
if parsed is None:
i += 1
continue
for stmt in parsed:
stmt.parent = container
body[i:i + 1] = parsed
self.mark_changed()
i += len(parsed)
@staticmethod
def _sanitize_for_script_scope(stmts: list[Statement]) -> list[Statement] | None:
for stmt in stmts[:-1] if stmts else ():
if isinstance(stmt, JsReturnStatement):
return None
if stmts and isinstance(stmts[-1], JsReturnStatement):
last = stmts[-1]
if last.argument is not None:
stmts = stmts[:-1] + [JsExpressionStatement(expression=last.argument)]
else:
stmts = stmts[:-1]
return stmts
def _inline_expressions(self, root: JsScript) -> None:
for node in list(root.walk()):
if not isinstance(node, JsCallExpression):
continue
if isinstance(node.parent, JsExpressionStatement):
continue
replacement = self._try_resolve_expression(node)
if replacement is None:
continue
_replace_in_parent(node, replacement)
self.mark_changed()
def _try_resolve_statement(self, stmt: Statement) -> list[Statement] | None:
if not isinstance(stmt, JsExpressionStatement) or stmt.expression is None:
return None
node = stmt.expression
if not isinstance(node, JsCallExpression):
return None
pack_result = _try_unpack_function_constructor(node)
if pack_result is not None:
return pack_result
if _is_pack_shaped(node):
return None
code = (
_extract_eval_code(node)
or _extract_indirect_eval_code(node)
or _extract_invoked_function_body(node)
or _extract_constructor_chain_code(node)
or _extract_timer_code(node)
)
if code is None:
return None
parsed = _try_parse(code)
if parsed is None:
return None
return list(parsed.body)
@staticmethod
def _try_resolve_expression(node: JsCallExpression) -> Expression | None:
code = (
_extract_eval_code(node)
or _extract_indirect_eval_code(node)
or _extract_invoked_function_body(node)
or _extract_constructor_chain_code(node)
)
if code is None:
return None
parsed = _try_parse(code)
if parsed is None:
return None
body = parsed.body
if len(body) == 1:
stmt = body[0]
if isinstance(stmt, JsExpressionStatement) and stmt.expression is not None:
return stmt.expression
if isinstance(stmt, JsReturnStatement) and stmt.argument is not None:
return stmt.argument
return None
Classes
class JsReflectionInlining-
Inline reflective code execution:
eval,Functionconstructor, constructor chains, and indirect invocation viasetTimeoutandsetInterval.Expand source code Browse git
class JsReflectionInlining(ScriptLevelTransformer): """ Inline reflective code execution: `eval`, `Function` constructor, constructor chains, and indirect invocation via `setTimeout` and `setInterval`. """ def _process_script(self, node: JsScript) -> None: self._inline_statements(node) self._inline_expressions(node) def _inline_statements(self, root: JsScript) -> None: for container in list(root.walk()): body = get_body(container) if body is None: continue is_script = isinstance(container, JsScript) i = 0 while i < len(body): parsed = self._try_resolve_statement(body[i]) if parsed is None: i += 1 continue if is_script: parsed = self._sanitize_for_script_scope(parsed) if parsed is None: i += 1 continue for stmt in parsed: stmt.parent = container body[i:i + 1] = parsed self.mark_changed() i += len(parsed) @staticmethod def _sanitize_for_script_scope(stmts: list[Statement]) -> list[Statement] | None: for stmt in stmts[:-1] if stmts else (): if isinstance(stmt, JsReturnStatement): return None if stmts and isinstance(stmts[-1], JsReturnStatement): last = stmts[-1] if last.argument is not None: stmts = stmts[:-1] + [JsExpressionStatement(expression=last.argument)] else: stmts = stmts[:-1] return stmts def _inline_expressions(self, root: JsScript) -> None: for node in list(root.walk()): if not isinstance(node, JsCallExpression): continue if isinstance(node.parent, JsExpressionStatement): continue replacement = self._try_resolve_expression(node) if replacement is None: continue _replace_in_parent(node, replacement) self.mark_changed() def _try_resolve_statement(self, stmt: Statement) -> list[Statement] | None: if not isinstance(stmt, JsExpressionStatement) or stmt.expression is None: return None node = stmt.expression if not isinstance(node, JsCallExpression): return None pack_result = _try_unpack_function_constructor(node) if pack_result is not None: return pack_result if _is_pack_shaped(node): return None code = ( _extract_eval_code(node) or _extract_indirect_eval_code(node) or _extract_invoked_function_body(node) or _extract_constructor_chain_code(node) or _extract_timer_code(node) ) if code is None: return None parsed = _try_parse(code) if parsed is None: return None return list(parsed.body) @staticmethod def _try_resolve_expression(node: JsCallExpression) -> Expression | None: code = ( _extract_eval_code(node) or _extract_indirect_eval_code(node) or _extract_invoked_function_body(node) or _extract_constructor_chain_code(node) ) if code is None: return None parsed = _try_parse(code) if parsed is None: return None body = parsed.body if len(body) == 1: stmt = body[0] if isinstance(stmt, JsExpressionStatement) and stmt.expression is not None: return stmt.expression if isinstance(stmt, JsReturnStatement) and stmt.argument is not None: return stmt.argument return NoneAncestors