Module refinery.lib.scripts.js.deobfuscation.stringarray
Resolve the string-array rotation pattern produced by popular JavaScript obfuscators.
The obfuscator extracts all string literals into a single array, wraps access through an accessor function and scrambles the array via a rotation IIFE that push/shifts until a checksum computed from parseInt of the array's own elements matches a target constant. This transformer detects that three-part pattern, simulates the rotation in Python, resolves every accessor call to its string literal, and removes the dead definitions.
Expand source code Browse git
"""
Resolve the string-array rotation pattern produced by popular JavaScript obfuscators.
The obfuscator extracts all string literals into a single array, wraps access through an accessor
function and scrambles the array via a rotation IIFE that push/shifts until a checksum computed
from parseInt of the array's own elements matches a target constant. This transformer detects that
three-part pattern, simulates the rotation in Python, resolves every accessor call to its string
literal, and removes the dead definitions.
"""
from __future__ import annotations
import base64
import enum
import functools
from refinery.lib.scripts import (
Node,
Transformer,
_remove_from_parent,
_replace_in_parent,
)
from refinery.lib.scripts.js.deobfuscation.helpers import (
BINARY_OPS,
js_parse_int,
make_string_literal,
property_key,
remove_declarator,
string_value,
)
from refinery.lib.scripts.js.model import (
JsArrayExpression,
JsAssignmentExpression,
JsBinaryExpression,
JsCallExpression,
JsExpressionStatement,
JsFunctionDeclaration,
JsFunctionExpression,
JsIdentifier,
JsIfStatement,
JsMemberExpression,
JsNumericLiteral,
JsObjectExpression,
JsParenthesizedExpression,
JsProperty,
JsReturnStatement,
JsScript,
JsStringLiteral,
JsUnaryExpression,
JsVariableDeclaration,
JsVariableDeclarator,
JsWhileStatement,
)
from typing import NamedTuple, Sequence
class Encoding(enum.Enum):
NONE = 'none'
B64 = 'base64'
RC4 = 'rc4'
class ArrayFunction(NamedTuple):
"""
Result of detecting the array-holder function pattern.
"""
node: JsFunctionDeclaration
name: str
strings: list[str]
class AccessorFunction(NamedTuple):
"""
Result of detecting the accessor function pattern.
"""
node: JsFunctionDeclaration
name: str
base_offset: int
class RotationIIFE(NamedTuple):
"""
Result of detecting the rotation IIFE pattern.
"""
node: JsExpressionStatement
target: int
body: Sequence[Node]
class ChecksumInfo(NamedTuple):
"""
Result of extracting the checksum expression from the rotation IIFE.
"""
node: Node
local_accessors: frozenset[str]
wrappers: dict[str, 'AccessorWrapperInfo']
prop_maps: dict[str, dict[str, int | str]]
def _find_array_function(body: Sequence[Node]) -> ArrayFunction | None:
"""
Detect the array-holder function pattern:
function NAME() {
var x = ['str0', 'str1', ...];
NAME = function() { return x; };
return NAME();
}
Returns (node, function_name, initial_string_list) or None.
"""
for statement in body:
if not isinstance(statement, JsFunctionDeclaration):
continue
if statement.id is None or statement.body is None:
continue
name = statement.id.name
statements = statement.body.body
if len(statements) < 2:
continue
array_literal: list[str] | None = None
has_self_reassignment = False
for s in statements:
if isinstance(s, JsVariableDeclaration):
for decl in s.declarations:
if (
isinstance(decl, JsVariableDeclarator)
and isinstance(decl.init, JsArrayExpression)
):
elements = []
for element in decl.init.elements:
if (sv := string_value(element)) is None:
break
elements.append(sv)
else:
if elements:
array_literal = elements
elif (
isinstance(s, JsExpressionStatement)
and isinstance(assign := s.expression, JsAssignmentExpression)
and isinstance(assign.left, JsIdentifier)
and assign.left.name == name
):
has_self_reassignment = True
if array_literal is not None and has_self_reassignment:
return ArrayFunction(statement, name, array_literal)
return None
def _find_accessor_function(
body: Sequence[Node],
array_fn_name: str,
) -> AccessorFunction | None:
"""
Detect the accessor function pattern:
function NAME(param, _unused) {
param = param - BASE_OFFSET;
var v = ARRAY_FN();
var r = v[param];
return r;
}
Returns (node, function_name, base_offset) or None.
"""
for stmt in body:
if not isinstance(stmt, JsFunctionDeclaration):
continue
if stmt.id is None or stmt.body is None:
continue
if len(stmt.params) != 2:
continue
fn_name = stmt.id.name
first_param = stmt.params[0]
if not isinstance(first_param, JsIdentifier):
continue
param_name = first_param.name
base_offset: int | None = None
calls_array_fn = False
for s in stmt.body.body:
if isinstance(s, JsExpressionStatement) and isinstance(s.expression, JsAssignmentExpression):
assign = s.expression
if (
isinstance(assign.left, JsIdentifier)
and assign.left.name == param_name
and isinstance(assign.right, JsBinaryExpression)
and assign.right.operator == '-'
and isinstance(assign.right.left, JsIdentifier)
and assign.right.left.name == param_name
and assign.right.right is not None
):
try:
base_offset = int(_eval_arithmetic(assign.right.right))
except _EvalError:
pass
elif isinstance(s, JsVariableDeclaration):
for decl in s.declarations:
if isinstance(decl, JsVariableDeclarator) and isinstance(decl.init, JsCallExpression):
if isinstance(decl.init.callee, JsIdentifier) and decl.init.callee.name == array_fn_name:
calls_array_fn = True
if base_offset is not None and calls_array_fn:
return AccessorFunction(stmt, fn_name, base_offset)
return None
def _find_rotation_iife(
body: Sequence[Node],
array_fn_name: str,
) -> RotationIIFE | None:
"""
Detect the rotation IIFE pattern:
(function(getArray, target) {
var arr = getArray();
while (true) { try { ... parseInt ... push(shift) } catch { push(shift) } }
})(ARRAY_FN, TARGET_NUMBER);
Returns (statement_node, target_checksum, iife_body_statements) or None.
"""
for stmt in body:
if not isinstance(stmt, JsExpressionStatement):
continue
expr = stmt.expression
if isinstance(expr, JsParenthesizedExpression):
expr = expr.expression
call = expr
if not isinstance(call, JsCallExpression):
continue
if not isinstance(call.callee, JsFunctionExpression):
continue
if len(call.arguments) != 2:
continue
first_arg = call.arguments[0]
second_arg = call.arguments[1]
if not (isinstance(first_arg, JsIdentifier) and first_arg.name == array_fn_name):
continue
try:
target = int(_eval_arithmetic(second_arg))
except _EvalError:
continue
fn_body = call.callee.body
if fn_body is None:
continue
has_while = False
for s in fn_body.body:
if isinstance(s, JsWhileStatement):
has_while = True
break
if has_while:
return RotationIIFE(stmt, target, fn_body.body)
return None
class _EvalError(Exception):
pass
class AccessorWrapperInfo(NamedTuple):
"""
Describes an inner wrapper function inside the rotation IIFE that forwards to the main
accessor with reordered arguments and an additional offset subtraction.
"""
target: str
offset: int
idx_param_pos: int
key_param_pos: int
def _resolve_member_access(
node: JsMemberExpression,
prop_maps: dict[str, dict[str, int | str]],
) -> int | str | None:
"""
Resolve a member expression `OBJ.KEY` against a set of known property maps collected from
object literals in the IIFE body. Returns the resolved value or None.
"""
if not isinstance(node.object, JsIdentifier):
return None
obj_name = node.object.name
if obj_name not in prop_maps:
return None
prop = node.property
if isinstance(prop, JsIdentifier):
key = prop.name
elif isinstance(prop, JsStringLiteral):
key = prop.value
else:
return None
return prop_maps[obj_name].get(key)
def _resolve_constant(
node: Node,
prop_maps: dict[str, dict[str, int | str]],
) -> int | None:
"""
Resolve a node to an integer constant, handling numeric literals, member accesses against known
property maps, unary negation, and parenthesized expressions. Returns None on failure.
"""
if isinstance(node, JsNumericLiteral):
return int(node.value)
if isinstance(node, JsParenthesizedExpression) and node.expression:
return _resolve_constant(node.expression, prop_maps)
if isinstance(node, JsMemberExpression):
resolved = _resolve_member_access(node, prop_maps)
return resolved if isinstance(resolved, int) else None
if isinstance(node, JsUnaryExpression) and node.operator == '-' and node.operand:
inner = _resolve_constant(node.operand, prop_maps)
return -inner if inner is not None else None
try:
return int(_eval_arithmetic(node))
except _EvalError:
return None
def _extract_wrapper_offset(
idx_arg: Node,
prop_maps: dict[str, dict[str, int | str]],
) -> tuple[str, int] | None:
"""
Analyze the index argument of a wrapper's forwarding call and extract `(param_name, offset)`
where offset is the constant subtracted from the call-site argument. The effective accessor
index is `call_arg - offset`. Handles these patterns:
param => (param, 0)
param - CONST => (param, CONST)
param - -CONST => (param, -CONST)
param - (-CONST) => (param, -CONST)
param + CONST => (param, -CONST)
"""
if isinstance(idx_arg, JsIdentifier):
return idx_arg.name, 0
if isinstance(idx_arg, JsBinaryExpression) and isinstance(idx_arg.left, JsIdentifier):
right_val = _resolve_constant(idx_arg.right, prop_maps)
if right_val is None:
return None
if idx_arg.operator == '-':
return idx_arg.left.name, right_val
if idx_arg.operator == '+':
return idx_arg.left.name, -right_val
return None
def _parse_object_props(
obj: JsObjectExpression,
allow_strings: bool = False,
) -> dict[str, int | str]:
"""
Extract `{key: value}` pairs from an object expression. Keys are identifier names or string
literal values. Values are integers (via `_eval_arithmetic`) and optionally strings (via
`string_value`) when `allow_strings` is True.
"""
props: dict[str, int | str] = {}
for prop in obj.properties:
if not isinstance(prop, JsProperty) or prop.value is None:
continue
key = property_key(prop)
if key is None:
continue
if allow_strings:
sv = string_value(prop.value)
if sv is not None:
props[key] = sv
continue
try:
props[key] = int(_eval_arithmetic(prop.value))
except _EvalError:
pass
return props
def _collect_local_prop_maps(
body: Sequence[Node],
) -> dict[str, dict[str, int | str]]:
"""
Scan the leading variable declarations in a function body for object literals and return a
mapping from variable name to its `{key: int}` property map. Stops at the first non-variable
declaration statement.
"""
local_props: dict[str, dict[str, int | str]] = {}
for s in body:
if not isinstance(s, JsVariableDeclaration):
break
for decl in s.declarations:
if (
isinstance(decl, JsVariableDeclarator)
and isinstance(decl.id, JsIdentifier)
and isinstance(decl.init, JsObjectExpression)
and (lp := _parse_object_props(decl.init))
):
local_props[decl.id.name] = lp
return local_props
def _match_wrapper(
fn: JsFunctionDeclaration,
accessor_name: str,
prop_maps: dict[str, dict[str, int | str]],
) -> AccessorWrapperInfo | None:
"""
Check whether a function declaration is a wrapper that forwards to *accessor_name* with
reordered arguments and a constant offset. Returns a `AccessorWrapperInfo` on match, else None.
"""
if fn.id is None or fn.body is None:
return None
if len(fn.body.body) < 1 or len(fn.params) < 2:
return None
ret = fn.body.body[-1]
if not isinstance(ret, JsReturnStatement) or ret.argument is None:
return None
call = ret.argument
if not isinstance(call, JsCallExpression) or not isinstance(call.callee, JsIdentifier):
return None
if call.callee.name != accessor_name:
return None
if len(call.arguments) != 2:
return None
local_props = dict(prop_maps)
local_props.update(_collect_local_prop_maps(fn.body.body[:-1]))
idx_arg = call.arguments[0]
key_arg = call.arguments[1]
if not isinstance(key_arg, JsIdentifier):
return None
offset = _extract_wrapper_offset(idx_arg, local_props)
if offset is None:
return None
idx_param_name, offset_value = offset
param_names = [p.name for p in fn.params if isinstance(p, JsIdentifier)]
if idx_param_name not in param_names or key_arg.name not in param_names:
return None
return AccessorWrapperInfo(
target=accessor_name,
offset=offset_value,
idx_param_pos=param_names.index(idx_param_name),
key_param_pos=param_names.index(key_arg.name),
)
def _collect_iife_wrappers(
iife_body: Sequence[Node],
accessor_name: str,
) -> tuple[dict[str, AccessorWrapperInfo], dict[str, dict[str, int | str]]]:
"""
Scan the rotation IIFE body for inner wrapper functions and their associated offset objects.
Returns `(wrappers, prop_maps)` where `wrappers` maps wrapper function names to their
`refinery.lib.scripts.js.deobfuscation.AccessorWrapperInfo` and `prop_maps` maps object
variable names to their `{key: value}` dicts.
Inner wrappers follow the pattern::
var a = { p: 0x4 };
function f(x, y) {
return g(y - a.p, x);
}
The wrapper swaps argument order and subtracts a constant offset from the index parameter.
"""
prop_maps: dict[str, dict[str, int | str]] = {}
wrappers: dict[str, AccessorWrapperInfo] = {}
for s in iife_body:
if isinstance(s, JsVariableDeclaration):
for decl in s.declarations:
if (
isinstance(decl, JsVariableDeclarator)
and isinstance(decl.id, JsIdentifier)
and isinstance(decl.init, JsObjectExpression)
):
props = _parse_object_props(decl.init, allow_strings=True)
if props:
prop_maps[decl.id.name] = props
if isinstance(s, JsFunctionDeclaration):
info = _match_wrapper(s, accessor_name, prop_maps)
if info is not None and s.id is not None:
wrappers[s.id.name] = info
return wrappers, prop_maps
def _collect_all_wrappers(
root: Node,
accessor_name: str,
) -> dict[str, AccessorWrapperInfo]:
"""
Walk the entire AST to find all function declarations that forward to the accessor with a
constant offset. Unlike `_collect_iife_wrappers`, this finds wrappers at any nesting level
and handles local offset objects declared inside the wrapper body.
"""
wrappers: dict[str, AccessorWrapperInfo] = {}
for s in root.walk():
if isinstance(s, JsFunctionDeclaration):
info = _match_wrapper(s, accessor_name, {})
if info is not None and s.id is not None:
wrappers[s.id.name] = info
return wrappers
def _extract_checksum_expression(
iife_body: Sequence[Node],
accessor_name: str,
) -> ChecksumInfo | None:
"""
Extract the checksum expression AST node, local accessor aliases, and inner wrapper resolution
data from the rotation IIFE body. The wrapper and property-map data allows `_eval_checksum` to
evaluate wrapper calls on the fly without mutating the AST.
"""
local_accessors: set[str] = {accessor_name}
for s in iife_body:
if isinstance(s, JsVariableDeclaration):
for decl in s.declarations:
if (
isinstance(decl, JsVariableDeclarator)
and isinstance(decl.id, JsIdentifier)
and isinstance(decl.init, JsIdentifier)
and decl.init.name == accessor_name
):
local_accessors.add(decl.id.name)
checksum_node: Node | None = None
for s in iife_body:
if isinstance(s, JsWhileStatement) and s.body is not None:
for ws in s.walk():
if isinstance(ws, JsVariableDeclaration):
for decl in ws.declarations:
if isinstance(decl, JsVariableDeclarator) and decl.init is not None:
checksum_node = decl.init
break
if checksum_node is not None:
break
break
if checksum_node is None:
return None
wrappers, prop_maps = _collect_iife_wrappers(iife_body, accessor_name)
return ChecksumInfo(
checksum_node,
frozenset(local_accessors),
wrappers,
prop_maps,
)
def _decode_string(raw: str, encoding: Encoding, key: str | None = None) -> str:
"""
Decode a raw string from the array according to the encoding mode. For RC4, a key must be
supplied. Raises _EvalError when decoding is not possible.
"""
if encoding == Encoding.NONE:
return raw
try:
if encoding == Encoding.B64:
return _decode_base64(raw)
if encoding == Encoding.RC4:
if key is None:
raise _EvalError
return _decrypt_rc4(raw, key)
except _EvalError:
raise
except (UnicodeDecodeError, ValueError):
raise _EvalError
raise _EvalError
def _eval_arithmetic(node: Node) -> float:
"""
Evaluate a pure arithmetic expression AST to a float. Handles numeric literals, unary `+`/`-`,
binary operators, and parenthesized expressions. Raises `_EvalError` on any node that is not
statically computable.
"""
if isinstance(node, JsNumericLiteral):
return float(node.value)
if isinstance(node, JsParenthesizedExpression) and node.expression:
return _eval_arithmetic(node.expression)
if isinstance(node, JsUnaryExpression) and node.operand:
if node.operator == '-':
return -_eval_arithmetic(node.operand)
if node.operator == '+':
return _eval_arithmetic(node.operand)
if isinstance(node, JsBinaryExpression) and node.left and node.right:
left = _eval_arithmetic(node.left)
right = _eval_arithmetic(node.right)
fn = BINARY_OPS.get(node.operator)
if fn is not None:
if node.operator == '/' and right == 0:
raise _EvalError
return fn(left, right)
raise _EvalError
def _eval_checksum(
node: Node,
local_accessors: frozenset[str],
strings: list[str],
base_offset: int,
encoding: Encoding = Encoding.NONE,
wrappers: dict[str, AccessorWrapperInfo] | None = None,
prop_maps: dict[str, dict[str, int | str]] | None = None,
) -> float:
"""
Evaluate a checksum expression against the current array state. Handles the arithmetic
operators (`+`, `-`, `*`, `/`), unary negation, parentheses, `parseInt` calls on accessor
lookups, and numeric literals. When inner wrapper functions are present, wrapper calls are
resolved to direct accessor calls on the fly without modifying the AST.
Raises `refinery.lib.scripts.js.deobfuscation.stringarray._EvalError` on any unrecognized
pattern.
"""
recurse = functools.partial(
_eval_checksum,
local_accessors=local_accessors,
strings=strings,
base_offset=base_offset,
encoding=encoding,
wrappers=wrappers,
prop_maps=prop_maps,
)
if isinstance(node, JsNumericLiteral):
return float(node.value)
if isinstance(node, JsParenthesizedExpression) and node.expression:
return recurse(node.expression)
if isinstance(node, JsUnaryExpression) and node.operand:
if node.operator == '-':
return -recurse(node.operand)
if node.operator == '+':
return recurse(node.operand)
if isinstance(node, JsBinaryExpression) and node.left and node.right:
lhs = recurse(node.left)
rhs = recurse(node.right)
fn = BINARY_OPS.get(node.operator)
if fn is not None:
if node.operator == '/' and rhs == 0:
raise _EvalError
return fn(lhs, rhs)
if isinstance(node, JsCallExpression) and isinstance(node.callee, JsIdentifier):
if node.callee.name == 'parseInt' and len(node.arguments) >= 1:
inner = node.arguments[0]
if isinstance(inner, JsStringLiteral):
result = js_parse_int(inner.value)
if result is None:
raise _EvalError
return float(result)
if isinstance(inner, JsCallExpression) and isinstance(inner.callee, JsIdentifier):
idx, key = _resolve_accessor_call(
inner, local_accessors, wrappers, prop_maps,
)
if 0 <= (i := idx - base_offset) < len(strings):
raw = strings[i]
decoded = _decode_string(raw, encoding, key)
if (result := js_parse_int(decoded)) is None:
raise _EvalError
return float(result)
raise _EvalError
raise _EvalError
def _resolve_accessor_call(
call: JsCallExpression,
local_accessors: frozenset[str],
wrappers: dict[str, AccessorWrapperInfo] | None,
prop_maps: dict[str, dict[str, int | str]] | None,
) -> tuple[int, str | None]:
"""
Resolve an accessor or wrapper call to `(index, rc4_key)`. Handles both direct accessor calls
(`accessor(idx, key)`) and inner wrapper calls (`wrapper(obj.key, obj.idx)`). Raises
`refinery.lib.scripts.js.deobfuscation.stringarray._EvalError` when the call cannot be resolved.
"""
callee_name = call.callee.name if isinstance(call.callee, JsIdentifier) else None
if callee_name is not None and callee_name in local_accessors and len(call.arguments) >= 1:
idx = int(_eval_arithmetic(call.arguments[0]))
key: str | None = None
if len(call.arguments) >= 2 and isinstance(call.arguments[1], JsStringLiteral):
key = call.arguments[1].value
return idx, key
if wrappers and callee_name is not None:
wrapper = wrappers.get(callee_name)
if wrapper is not None:
n_args = max(wrapper.idx_param_pos, wrapper.key_param_pos) + 1
if len(call.arguments) >= n_args:
raw_idx = call.arguments[wrapper.idx_param_pos]
raw_key = call.arguments[wrapper.key_param_pos]
pm = prop_maps or {}
idx_value = _resolve_constant(raw_idx, pm)
if idx_value is None:
raise _EvalError
idx_value -= wrapper.offset
key_value = _resolve_string_arg(raw_key, pm)
return idx_value, key_value
raise _EvalError
def _resolve_string_arg(
node: Node,
prop_maps: dict[str, dict[str, int | str]],
) -> str | None:
"""
Resolve an argument node to a string, handling member access against known property maps.
Returns None when the argument is not a string (non-RC4 case).
"""
if isinstance(node, JsStringLiteral):
return node.value
if isinstance(node, JsMemberExpression):
resolved = _resolve_member_access(node, prop_maps)
if isinstance(resolved, str):
return resolved
return None
def _simulate_rotation(
strings: list[str],
base_offset: int,
checksum_node: Node,
local_accessors: frozenset[str],
target: int,
encoding: Encoding = Encoding.NONE,
wrappers: dict[str, AccessorWrapperInfo] | None = None,
prop_maps: dict[str, dict[str, int | str]] | None = None,
) -> list[str] | None:
"""
Simulate the array rotation loop. For each rotation position, evaluate the checksum
expression against the current array state. Stop when the checksum matches the target,
or bail after len(strings) attempts.
"""
array = list(strings)
n = len(array)
for _ in range(n):
try:
if int(_eval_checksum(
checksum_node, local_accessors, array, base_offset, encoding, wrappers, prop_maps,
)) == target:
return array
except _EvalError:
pass
array.append(array.pop(0))
return None
def _collect_accessor_aliases(body: Sequence[Node], accessor_name: str) -> set[str]:
"""
Collect all variable names that are directly assigned the accessor function identifier,
walking the entire AST. These aliases are used at the top level (e.g. var _0xcbb5cc = _0x4914)
and inside function bodies (e.g. var _0x4bad70 = _0x4914).
"""
aliases: set[str] = set()
for stmt in body:
for node in stmt.walk():
if (
isinstance(node, JsVariableDeclarator)
and isinstance(node.id, JsIdentifier)
and isinstance(node.init, JsIdentifier)
and node.init.name == accessor_name
):
aliases.add(node.id.name)
return aliases
_B64_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/='
_B64_STANDARD = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
_B64_TRANSLATE = str.maketrans(_B64_ALPHABET, _B64_STANDARD)
def _detect_encoding(accessor_node: JsFunctionDeclaration) -> Encoding:
"""
Detect the string encoding mode by inspecting the accessor function body. The base64 and RC4
variants inject an `if (NAME['...'] === undefined)` init guard that contains the base64
alphabet string and one (base64) or two (RC4) inner function definitions.
"""
if accessor_node.body is None:
return Encoding.NONE
for stmt in accessor_node.body.body:
if not isinstance(stmt, JsIfStatement):
continue
inner_functions = 0
has_alphabet = False
for child in stmt.walk():
if isinstance(child, JsStringLiteral) and child.value == _B64_ALPHABET:
has_alphabet = True
if (
isinstance(child, JsVariableDeclarator)
and isinstance(child.init, JsFunctionExpression)
):
inner_functions += 1
if has_alphabet:
return Encoding.RC4 if inner_functions >= 2 else Encoding.B64
return Encoding.NONE
def _custom_b64decode(s: str) -> bytes:
"""
Decode a string using the obfuscator's custom base64 alphabet (lowercase letters first)
and tolerate missing padding.
"""
translated = s.translate(_B64_TRANSLATE)
pad = len(translated) % 4
if pad:
translated += '=' * (4 - pad)
return base64.b64decode(translated)
def _decode_base64(s: str) -> str:
"""
Decode a base64-encoded string as produced by the obfuscator.
"""
return _custom_b64decode(s).decode('utf-8')
def _decrypt_rc4(s: str, key: str) -> str:
"""
Base64-decode, UTF-8-decode, then RC4-decrypt a string using the given key. The obfuscator's
RC4 operates on Unicode character codes (after UTF-8 decode), not raw bytes, because its
inline atob uses `decodeURIComponent` which interprets the base64 output as UTF-8.
"""
data = _decode_base64(s)
sbox = list(range(256))
j = 0
for i in range(256):
j = (j + sbox[i] + ord(key[i % len(key)])) % 256
sbox[i], sbox[j] = sbox[j], sbox[i]
i = j = 0
out: list[str] = []
for ch in data:
i = (i + 1) % 256
j = (j + sbox[i]) % 256
sbox[i], sbox[j] = sbox[j], sbox[i]
out.append(chr(ord(ch) ^ sbox[(sbox[i] + sbox[j]) % 256]))
return ''.join(out)
def _replace_accessor_calls(
root: Node,
aliases: set[str],
raw_lookup: dict[int, str],
encoding: Encoding,
wrappers: dict[str, AccessorWrapperInfo] | None = None,
prop_maps: dict[str, dict[str, int | str]] | None = None,
) -> int:
"""
Walk the entire AST and replace accessor calls with decoded string literals. Handles direct
accessor calls where the first argument is a pure arithmetic expression, and also wrapper calls
that forward to the accessor with reordered arguments and an offset subtraction. For RC4, the
decryption key is taken from the second argument (or resolved from the wrapper's key parameter).
Returns the number of replaced calls.
"""
count = 0
local_accessors = frozenset(aliases)
wrapper_names = set(wrappers) if wrappers else set()
for node in list(root.walk()):
if not isinstance(node, JsCallExpression):
continue
if not isinstance(node.callee, JsIdentifier):
continue
if node.callee.name not in aliases and node.callee.name not in wrapper_names:
continue
try:
idx, key = _resolve_accessor_call(node, local_accessors, wrappers, prop_maps)
except _EvalError:
continue
raw = raw_lookup.get(idx)
if raw is None:
continue
try:
value = _decode_string(raw, encoding, key)
except _EvalError:
continue
_replace_in_parent(node, make_string_literal(value))
count += 1
return count
def _find_remaining_calls(
root: JsScript,
aliases: set[str],
wrapper_names: set[str],
) -> tuple[bool, set[int]]:
"""
Determine whether unresolved accessor or wrapper calls remain in the AST, excluding calls that
are inside dead wrapper function bodies (which will be removed). Returns `(has_remaining,
dead_node_ids)` so the dead-node set can be reused during cleanup.
"""
dead_nodes: set[int] = set()
for n in root.walk():
if isinstance(n, JsFunctionDeclaration) and n.id is not None and n.id.name in wrapper_names:
dead_nodes.add(id(n))
elif (
isinstance(n, JsCallExpression)
and isinstance(n.callee, JsIdentifier)
and (n.callee.name in aliases or n.callee.name in wrapper_names)
):
p = n.parent
while p is not None:
if id(p) in dead_nodes:
break
p = p.parent
else:
return True, dead_nodes
return False, dead_nodes
def _cleanup_infrastructure(
root: JsScript,
body: Sequence[Node],
array: ArrayFunction,
accessor: AccessorFunction,
dead_nodes: set[int],
aliases: set[str],
) -> None:
"""
Remove the string-array infrastructure (array function, accessor function, rotation IIFE, dead
wrapper declarations, and accessor alias declarators) once all calls have been resolved.
"""
_remove_from_parent(array.node)
_remove_from_parent(accessor.node)
rotation = _find_rotation_iife(body, array.name)
if rotation is not None:
_remove_from_parent(rotation.node)
for n in list(root.walk()):
if id(n) in dead_nodes:
_remove_from_parent(n)
elif (
isinstance(n, JsVariableDeclarator)
and isinstance(n.id, JsIdentifier)
and n.id.name in aliases
and isinstance(n.init, JsIdentifier)
and n.init.name in aliases
):
remove_declarator(n)
class _CachedResolution(NamedTuple):
"""
Cached result of a successful array rotation simulation, stored on the JsScript node to survive
across pipeline iterations. This prevents re-simulation failures when the simplifier modifies the
checksum expression in the rotation IIFE between string array passes.
"""
resolved: list[str]
base_offset: int
encoding: Encoding
_CACHE_ATTR = '_stringarray_cache'
class JsStringArrayResolver(Transformer):
def visit_JsScript(self, node: JsScript):
body = node.body
array = _find_array_function(body)
if array is None:
return None
accessor = _find_accessor_function(body, array.name)
if accessor is None:
return None
encoding = _detect_encoding(accessor.node)
cache: _CachedResolution | None = getattr(node, _CACHE_ATTR, None)
if (
cache is not None
and cache.base_offset == accessor.base_offset
and cache.encoding == encoding
):
resolved = cache.resolved
else:
rotation = _find_rotation_iife(body, array.name)
if rotation is None:
return None
checksum = _extract_checksum_expression(rotation.body, accessor.name)
if checksum is None:
return None
resolved = _simulate_rotation(
array.strings,
accessor.base_offset,
checksum.node,
checksum.local_accessors,
rotation.target,
encoding,
checksum.wrappers or None,
checksum.prop_maps or None,
)
if resolved is None:
return None
setattr(node, _CACHE_ATTR, _CachedResolution(resolved, accessor.base_offset, encoding))
aliases = _collect_accessor_aliases(body, accessor.name)
aliases.add(accessor.name)
raw_lookup = {i + accessor.base_offset: s for i, s in enumerate(resolved)}
all_wrappers = _collect_all_wrappers(node, accessor.name)
if _replace_accessor_calls(
node, aliases, raw_lookup, encoding, all_wrappers,
) == 0:
return None
wrapper_names = set(all_wrappers)
has_remaining, dead_nodes = _find_remaining_calls(node, aliases, wrapper_names)
if not has_remaining:
_cleanup_infrastructure(node, body, array, accessor, dead_nodes, aliases)
self.mark_changed()
return None
def generic_visit(self, node: Node):
pass
Classes
class Encoding (*args, **kwds)-
Create a collection of name/value pairs.
Example enumeration:
>>> class Color(Enum): ... RED = 1 ... BLUE = 2 ... GREEN = 3Access them by:
- attribute access:
Color.RED
- value lookup:
Color(1)
- name lookup:
Color['RED']
Enumerations can be iterated over, and know how many members they have:
>>> len(Color) 3>>> list(Color) [<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]Methods can be added to enumerations, and members can have their own attributes – see the documentation for details.
Expand source code Browse git
class Encoding(enum.Enum): NONE = 'none' B64 = 'base64' RC4 = 'rc4'Ancestors
- enum.Enum
Class variables
var NONE-
The type of the None singleton.
var B64-
The type of the None singleton.
var RC4-
The type of the None singleton.
class ArrayFunction (node, name, strings)-
Result of detecting the array-holder function pattern.
Expand source code Browse git
class ArrayFunction(NamedTuple): """ Result of detecting the array-holder function pattern. """ node: JsFunctionDeclaration name: str strings: list[str]Ancestors
- builtins.tuple
Instance variables
var node-
Alias for field number 0
Expand source code Browse git
class ArrayFunction(NamedTuple): """ Result of detecting the array-holder function pattern. """ node: JsFunctionDeclaration name: str strings: list[str] var name-
Alias for field number 1
Expand source code Browse git
class ArrayFunction(NamedTuple): """ Result of detecting the array-holder function pattern. """ node: JsFunctionDeclaration name: str strings: list[str] var strings-
Alias for field number 2
Expand source code Browse git
class ArrayFunction(NamedTuple): """ Result of detecting the array-holder function pattern. """ node: JsFunctionDeclaration name: str strings: list[str]
class AccessorFunction (node, name, base_offset)-
Result of detecting the accessor function pattern.
Expand source code Browse git
class AccessorFunction(NamedTuple): """ Result of detecting the accessor function pattern. """ node: JsFunctionDeclaration name: str base_offset: intAncestors
- builtins.tuple
Instance variables
var node-
Alias for field number 0
Expand source code Browse git
class AccessorFunction(NamedTuple): """ Result of detecting the accessor function pattern. """ node: JsFunctionDeclaration name: str base_offset: int var name-
Alias for field number 1
Expand source code Browse git
class AccessorFunction(NamedTuple): """ Result of detecting the accessor function pattern. """ node: JsFunctionDeclaration name: str base_offset: int var base_offset-
Alias for field number 2
Expand source code Browse git
class AccessorFunction(NamedTuple): """ Result of detecting the accessor function pattern. """ node: JsFunctionDeclaration name: str base_offset: int
class RotationIIFE (node, target, body)-
Result of detecting the rotation IIFE pattern.
Expand source code Browse git
class RotationIIFE(NamedTuple): """ Result of detecting the rotation IIFE pattern. """ node: JsExpressionStatement target: int body: Sequence[Node]Ancestors
- builtins.tuple
Instance variables
var node-
Alias for field number 0
Expand source code Browse git
class RotationIIFE(NamedTuple): """ Result of detecting the rotation IIFE pattern. """ node: JsExpressionStatement target: int body: Sequence[Node] var target-
Alias for field number 1
Expand source code Browse git
class RotationIIFE(NamedTuple): """ Result of detecting the rotation IIFE pattern. """ node: JsExpressionStatement target: int body: Sequence[Node] var body-
Alias for field number 2
Expand source code Browse git
class RotationIIFE(NamedTuple): """ Result of detecting the rotation IIFE pattern. """ node: JsExpressionStatement target: int body: Sequence[Node]
class ChecksumInfo (node, local_accessors, wrappers, prop_maps)-
Result of extracting the checksum expression from the rotation IIFE.
Expand source code Browse git
class ChecksumInfo(NamedTuple): """ Result of extracting the checksum expression from the rotation IIFE. """ node: Node local_accessors: frozenset[str] wrappers: dict[str, 'AccessorWrapperInfo'] prop_maps: dict[str, dict[str, int | str]]Ancestors
- builtins.tuple
Instance variables
var node-
Alias for field number 0
Expand source code Browse git
class ChecksumInfo(NamedTuple): """ Result of extracting the checksum expression from the rotation IIFE. """ node: Node local_accessors: frozenset[str] wrappers: dict[str, 'AccessorWrapperInfo'] prop_maps: dict[str, dict[str, int | str]] var local_accessors-
Alias for field number 1
Expand source code Browse git
class ChecksumInfo(NamedTuple): """ Result of extracting the checksum expression from the rotation IIFE. """ node: Node local_accessors: frozenset[str] wrappers: dict[str, 'AccessorWrapperInfo'] prop_maps: dict[str, dict[str, int | str]] var wrappers-
Alias for field number 2
Expand source code Browse git
class ChecksumInfo(NamedTuple): """ Result of extracting the checksum expression from the rotation IIFE. """ node: Node local_accessors: frozenset[str] wrappers: dict[str, 'AccessorWrapperInfo'] prop_maps: dict[str, dict[str, int | str]] var prop_maps-
Alias for field number 3
Expand source code Browse git
class ChecksumInfo(NamedTuple): """ Result of extracting the checksum expression from the rotation IIFE. """ node: Node local_accessors: frozenset[str] wrappers: dict[str, 'AccessorWrapperInfo'] prop_maps: dict[str, dict[str, int | str]]
class AccessorWrapperInfo (target, offset, idx_param_pos, key_param_pos)-
Describes an inner wrapper function inside the rotation IIFE that forwards to the main accessor with reordered arguments and an additional offset subtraction.
Expand source code Browse git
class AccessorWrapperInfo(NamedTuple): """ Describes an inner wrapper function inside the rotation IIFE that forwards to the main accessor with reordered arguments and an additional offset subtraction. """ target: str offset: int idx_param_pos: int key_param_pos: intAncestors
- builtins.tuple
Instance variables
var target-
Alias for field number 0
Expand source code Browse git
class AccessorWrapperInfo(NamedTuple): """ Describes an inner wrapper function inside the rotation IIFE that forwards to the main accessor with reordered arguments and an additional offset subtraction. """ target: str offset: int idx_param_pos: int key_param_pos: int var offset-
Alias for field number 1
Expand source code Browse git
class AccessorWrapperInfo(NamedTuple): """ Describes an inner wrapper function inside the rotation IIFE that forwards to the main accessor with reordered arguments and an additional offset subtraction. """ target: str offset: int idx_param_pos: int key_param_pos: int var idx_param_pos-
Alias for field number 2
Expand source code Browse git
class AccessorWrapperInfo(NamedTuple): """ Describes an inner wrapper function inside the rotation IIFE that forwards to the main accessor with reordered arguments and an additional offset subtraction. """ target: str offset: int idx_param_pos: int key_param_pos: int var key_param_pos-
Alias for field number 3
Expand source code Browse git
class AccessorWrapperInfo(NamedTuple): """ Describes an inner wrapper function inside the rotation IIFE that forwards to the main accessor with reordered arguments and an additional offset subtraction. """ target: str offset: int idx_param_pos: int key_param_pos: int
class JsStringArrayResolver-
In-place tree rewriter. Each visit method may return a replacement node or
Noneto keep the original. Tracks whether any transformation was applied via thechangedflag.Expand source code Browse git
class JsStringArrayResolver(Transformer): def visit_JsScript(self, node: JsScript): body = node.body array = _find_array_function(body) if array is None: return None accessor = _find_accessor_function(body, array.name) if accessor is None: return None encoding = _detect_encoding(accessor.node) cache: _CachedResolution | None = getattr(node, _CACHE_ATTR, None) if ( cache is not None and cache.base_offset == accessor.base_offset and cache.encoding == encoding ): resolved = cache.resolved else: rotation = _find_rotation_iife(body, array.name) if rotation is None: return None checksum = _extract_checksum_expression(rotation.body, accessor.name) if checksum is None: return None resolved = _simulate_rotation( array.strings, accessor.base_offset, checksum.node, checksum.local_accessors, rotation.target, encoding, checksum.wrappers or None, checksum.prop_maps or None, ) if resolved is None: return None setattr(node, _CACHE_ATTR, _CachedResolution(resolved, accessor.base_offset, encoding)) aliases = _collect_accessor_aliases(body, accessor.name) aliases.add(accessor.name) raw_lookup = {i + accessor.base_offset: s for i, s in enumerate(resolved)} all_wrappers = _collect_all_wrappers(node, accessor.name) if _replace_accessor_calls( node, aliases, raw_lookup, encoding, all_wrappers, ) == 0: return None wrapper_names = set(all_wrappers) has_remaining, dead_nodes = _find_remaining_calls(node, aliases, wrapper_names) if not has_remaining: _cleanup_infrastructure(node, body, array, accessor, dead_nodes, aliases) self.mark_changed() return None def generic_visit(self, node: Node): passAncestors
Methods
def visit_JsScript(self, node)-
Expand source code Browse git
def visit_JsScript(self, node: JsScript): body = node.body array = _find_array_function(body) if array is None: return None accessor = _find_accessor_function(body, array.name) if accessor is None: return None encoding = _detect_encoding(accessor.node) cache: _CachedResolution | None = getattr(node, _CACHE_ATTR, None) if ( cache is not None and cache.base_offset == accessor.base_offset and cache.encoding == encoding ): resolved = cache.resolved else: rotation = _find_rotation_iife(body, array.name) if rotation is None: return None checksum = _extract_checksum_expression(rotation.body, accessor.name) if checksum is None: return None resolved = _simulate_rotation( array.strings, accessor.base_offset, checksum.node, checksum.local_accessors, rotation.target, encoding, checksum.wrappers or None, checksum.prop_maps or None, ) if resolved is None: return None setattr(node, _CACHE_ATTR, _CachedResolution(resolved, accessor.base_offset, encoding)) aliases = _collect_accessor_aliases(body, accessor.name) aliases.add(accessor.name) raw_lookup = {i + accessor.base_offset: s for i, s in enumerate(resolved)} all_wrappers = _collect_all_wrappers(node, accessor.name) if _replace_accessor_calls( node, aliases, raw_lookup, encoding, all_wrappers, ) == 0: return None wrapper_names = set(all_wrappers) has_remaining, dead_nodes = _find_remaining_calls(node, aliases, wrapper_names) if not has_remaining: _cleanup_infrastructure(node, body, array, accessor, dead_nodes, aliases) self.mark_changed() return None def generic_visit(self, node)-
Expand source code Browse git
def generic_visit(self, node: Node): pass