Module refinery.lib.scripts.js.deobfuscation.helpers
Shared utilities for JavaScript deobfuscation transforms.
Expand source code Browse git
"""
Shared utilities for JavaScript deobfuscation transforms.
"""
from __future__ import annotations
import math
import operator
import re
from typing import TYPE_CHECKING, Callable, Iterator, Sequence
if TYPE_CHECKING:
from typing import TypeAlias
LiteralValue: TypeAlias = str | int | float | bool | list | dict | None
from refinery.lib.scripts import (
Expression,
Node,
Statement,
Transformer,
_clone_node,
_remove_from_parent,
_replace_in_parent,
)
from refinery.lib.scripts.js.model import (
JsArrayExpression,
JsArrowFunctionExpression,
JsAssignmentExpression,
JsBinaryExpression,
JsBlockStatement,
JsBooleanLiteral,
JsCallExpression,
JsConditionalExpression,
JsForInStatement,
JsForOfStatement,
JsFunctionDeclaration,
JsFunctionExpression,
JsIdentifier,
JsLogicalExpression,
JsMemberExpression,
JsNullLiteral,
JsNumericLiteral,
JsObjectExpression,
JsProperty,
JsReturnStatement,
JsScript,
JsSequenceExpression,
JsStringLiteral,
JsUnaryExpression,
JsVariableDeclaration,
JsVariableDeclarator,
JsVarKind,
JsWhileStatement,
)
from refinery.lib.scripts.js.token import FUTURE_RESERVED, KEYWORDS
SIMPLE_IDENTIFIER = re.compile(r'^[a-zA-Z_$][a-zA-Z_$0-9]*$')
JS_RESERVED = frozenset(set(KEYWORDS) | FUTURE_RESERVED | {'undefined'})
FUNCTION_NODE_TYPES = (JsFunctionDeclaration, JsFunctionExpression, JsArrowFunctionExpression)
def _to_int32(v: int | float) -> int:
v = int(v) & 0xFFFFFFFF
return v - 0x100000000 if v >= 0x80000000 else v
BINARY_OPS: dict[str, Callable] = {
'+' : operator.add,
'-' : operator.sub,
'*' : operator.mul,
'/' : operator.truediv,
'%' : math.fmod,
'**' : operator.pow,
'|' : lambda a, b: _to_int32(int(a) | int(b)),
'&' : lambda a, b: _to_int32(int(a) & int(b)),
'^' : lambda a, b: _to_int32(int(a) ^ int(b)),
'<<' : lambda a, b: _to_int32(int(a) << (int(b) & 0x1F)),
'>>' : lambda a, b: _to_int32(_to_int32(int(a)) >> (int(b) & 0x1F)),
}
RELATIONAL_OPS: dict[str, Callable] = {
'<' : operator.lt,
'>' : operator.gt,
'<=': operator.le,
'>=': operator.ge,
}
def eval_binary_op(op: str, left: int | float, right: int | float) -> int | float | bool | None:
"""
Evaluate a JavaScript binary operator on two numeric operands. Returns the result value, or
`None` when the operator is unknown or the computation overflows/divides by zero. Handles
arithmetic, bitwise, relational, equality, and the unsigned right shift `>>>`.
"""
if op in ('===', '=='):
return left == right
if op in ('!==', '!='):
return left != right
rel = RELATIONAL_OPS.get(op)
if rel is not None:
return rel(left, right)
if op == '>>>':
a = int(left) & 0xFFFFFFFF
b = int(right) & 0x1F
return (a >> b) & 0xFFFFFFFF
fn = BINARY_OPS.get(op)
if fn is None:
return None
try:
return fn(left, right)
except (ZeroDivisionError, OverflowError, ValueError):
return None
def escape_js_string(value: str, quote: str = "'") -> str:
"""
Escape a string for use in a JavaScript string literal. Returns the escaped body without
surrounding quotes. Backslash is escaped first to avoid double-escaping. Control characters
not covered by named escapes are emitted as `\\xHH`; surrogates as `\\uXXXX`.
"""
def _residue(m: re.Match[str]):
cp = ord(m.group())
if cp > 0xFF:
return F'\\u{cp:04X}'
return F'\\x{cp:02x}'
value = value.replace('\\', r'\\')
value = value.replace('\n', r'\n')
value = value.replace('\r', r'\r')
value = value.replace('\t', r'\t')
value = value.replace('\0', r'\0')
value = value.replace(quote, F'\\{quote}')
return re.sub(r'[\x01-\x1f\ud800-\udfff]', _residue, value)
def string_value(node: Expression | None) -> str | None:
if isinstance(node, JsStringLiteral):
return node.value
return None
def property_key(prop: JsProperty) -> str | None:
"""
Extract the string key from a property node. Handles both string-literal keys and plain
identifier keys. Returns `None` for computed keys.
"""
if prop.computed:
return None
if isinstance(prop.key, JsStringLiteral):
return prop.key.value
if isinstance(prop.key, JsIdentifier):
return prop.key.name
return None
def access_key(node: JsMemberExpression) -> str | None:
"""
Extract the string key from a member-access expression. Handles both computed (`obj['key']`)
and dot (`obj.key`) accesses.
"""
if node.computed:
return string_value(node.property)
if isinstance(node.property, JsIdentifier):
return node.property.name
return None
def make_string_literal(value: str) -> JsStringLiteral:
escaped = escape_js_string(value)
raw = F"'{escaped}'"
return JsStringLiteral(value=value, raw=raw)
def numeric_value(node: Expression) -> int | float | None:
if isinstance(node, JsNumericLiteral):
return node.value
return None
def make_numeric_literal(value: int | float) -> JsNumericLiteral:
if isinstance(value, float):
if value == 0.0 and str(value).startswith('-'):
raw = '-0'
elif value == int(value):
raw = str(int(value))
else:
raw = str(value)
else:
raw = str(value)
return JsNumericLiteral(value=value, raw=raw)
def extract_literal_value(node: Node) -> tuple[bool, LiteralValue]:
"""
Extract a Python value from a literal AST node. Returns `(True, value)` on success or
`(False, None)` when the node is not a recognized literal form. Handles string, numeric,
boolean, null literals, `void expr`, negative numerics, `!0`/`!1`, and array expressions
where all elements are themselves literals.
"""
if isinstance(node, JsStringLiteral):
return True, node.value
if isinstance(node, JsNumericLiteral):
return True, node.value
if isinstance(node, JsBooleanLiteral):
return True, node.value
if isinstance(node, JsNullLiteral):
return True, None
if isinstance(node, JsUnaryExpression):
if node.operator == 'void' and isinstance(node.operand, (JsNumericLiteral, JsStringLiteral)):
return True, None
if node.operator == '-' and isinstance(node.operand, JsNumericLiteral):
return True, -node.operand.value
if node.operator == '+' and isinstance(node.operand, JsNumericLiteral):
return True, node.operand.value
if node.operator == '!' and isinstance(node.operand, JsNumericLiteral):
return True, not bool(node.operand.value)
if isinstance(node, JsArrayExpression):
items: list[LiteralValue] = []
for el in node.elements:
if el is None:
return False, None
ok, val = extract_literal_value(el)
if not ok:
return False, None
items.append(val)
return True, items
return False, None
def value_to_node(value: object) -> Expression | None:
"""
Convert a Python value to the corresponding AST literal node. Returns `None` when the value
type is not representable as a literal expression.
"""
if isinstance(value, str):
return make_string_literal(value)
if isinstance(value, bool):
return JsBooleanLiteral(value=value)
if isinstance(value, int):
if value < 0:
return JsUnaryExpression(operator='-', operand=make_numeric_literal(-value))
return make_numeric_literal(value)
if isinstance(value, float):
if value != value:
return JsIdentifier(name='NaN')
if value == float('inf'):
return JsIdentifier(name='Infinity')
if value == float('-inf'):
return JsUnaryExpression(operator='-', operand=JsIdentifier(name='Infinity'))
if value < 0:
return JsUnaryExpression(operator='-', operand=make_numeric_literal(-value))
return make_numeric_literal(value)
if isinstance(value, list):
elements: list[Expression | None] = []
for item in value:
el = value_to_node(item)
if el is None:
return None
elements.append(el)
return JsArrayExpression(elements=elements)
if value is None:
return JsUnaryExpression(
operator='void',
operand=JsNumericLiteral(value=0, raw='0'),
)
return None
def is_literal(node: Node) -> bool:
if isinstance(node, (JsStringLiteral, JsNumericLiteral, JsBooleanLiteral, JsNullLiteral)):
return True
if isinstance(node, JsUnaryExpression):
if node.operator == 'void' and isinstance(node.operand, (JsNumericLiteral, JsStringLiteral)):
return True
if node.operator == '-' and isinstance(node.operand, JsNumericLiteral):
return True
return False
def member_key(node: JsMemberExpression) -> str | None:
"""
Flatten a chain of property accesses into a dot-separated key string. Handles both dot
notation and computed access with string-literal keys. Returns `None` if the chain contains
a dynamic computed access that cannot be resolved to a static key.
"""
parts: list[str] = []
cursor: Expression | None = node
while isinstance(cursor, JsMemberExpression):
key = access_key(cursor)
if key is None:
return None
parts.append(key)
cursor = cursor.object
if not isinstance(cursor, JsIdentifier):
return None
parts.append(cursor.name)
parts.reverse()
return '.'.join(parts)
def is_while_true(node: JsWhileStatement) -> bool:
"""
Check whether the while-loop condition is `true`, `!![]`, or `!0` — the forms the
obfuscator uses for infinite loops.
"""
test = node.test
if isinstance(test, JsBooleanLiteral) and test.value is True:
return True
if not isinstance(test, JsUnaryExpression) or test.operator != '!':
return False
inner = test.operand
if isinstance(inner, JsNumericLiteral) and inner.value == 0:
return True
if isinstance(inner, JsUnaryExpression) and inner.operator == '!':
return True
return False
def is_valid_identifier(name: str) -> bool:
return bool(SIMPLE_IDENTIFIER.match(name)) and name not in JS_RESERVED
def is_simple_expression(node: Node) -> bool:
"""
Check whether a node is a side-effect-free leaf expression: a literal value, an identifier, or
a unary operator applied to a literal (e.g. `-42`).
"""
if is_literal(node) or isinstance(node, JsIdentifier):
return True
if isinstance(node, JsUnaryExpression) and node.operand is not None:
return is_literal(node.operand)
return False
def is_write_target(node: JsIdentifier) -> bool:
"""
Return whether this identifier is a write target: the left-hand side of an assignment
expression, or the iteration variable of a `for-in` / `for-of` statement.
"""
p = node.parent
if isinstance(p, JsAssignmentExpression) and p.left is node:
return True
if isinstance(p, (JsForInStatement, JsForOfStatement)) and p.left is node:
return True
return False
def is_binding_site(node: JsIdentifier) -> bool:
"""
Return whether this identifier is in a binding position (variable declarator id or function
declaration name) rather than a reference/read position.
"""
p = node.parent
if isinstance(p, JsVariableDeclarator) and p.id is node:
return True
if isinstance(p, JsFunctionDeclaration) and p.id is node:
return True
return False
def is_reference(node: JsIdentifier) -> bool:
"""
Return whether this identifier is in a true variable reference position: not a binding site,
not a non-computed member property, and not a non-computed object-literal key.
"""
p = node.parent
if p is None:
return False
if isinstance(p, JsVariableDeclarator) and p.id is node:
return False
if isinstance(p, JsFunctionDeclaration) and p.id is node:
return False
if isinstance(p, JsMemberExpression) and p.property is node and not p.computed:
return False
if isinstance(p, JsProperty) and p.key is node and not p.computed:
return False
return True
def is_truthy(node: Node) -> bool | None:
"""
Return the JavaScript truthiness of a literal node, or `None` when the value cannot be
determined statically.
"""
if isinstance(node, JsBooleanLiteral):
return node.value
if isinstance(node, JsNumericLiteral):
# return correct value for NaN
return (v := node.value) != 0 and v == v
if isinstance(node, JsStringLiteral):
return bool(node.value)
if isinstance(node, JsNullLiteral):
return False
if isinstance(node, JsIdentifier) and node.name == 'undefined':
return False
if isinstance(node, JsArrayExpression):
return True
return None
def is_statically_evaluable(node: Node) -> bool:
"""
Return whether the node can be evaluated to a known truthiness at transform time. This
includes all literal types and the `undefined` identifier.
"""
return (
is_literal(node)
or (isinstance(node, JsIdentifier) and node.name == 'undefined')
or isinstance(node, JsArrayExpression)
)
def is_nullish(node: Node) -> bool:
"""
Return whether the node is statically known to be `null` or `undefined`.
"""
if isinstance(node, JsNullLiteral):
return True
if isinstance(node, JsIdentifier) and node.name == 'undefined':
return True
return False
def _is_safe_property_base(node: Node, defunct: set[str] | None = None) -> bool:
"""
Check whether property access on *node* is guaranteed to be side-effect-free. Returns `True`
when the object is a value that cannot have custom getters: literals, fresh
object/array/function expressions, or identifiers in the *defunct* set (being removed, so
their getters are irrelevant to live code). Chained member expressions are safe when their
root base is safe.
"""
if isinstance(node, (JsStringLiteral, JsNumericLiteral, JsBooleanLiteral, JsNullLiteral)):
return True
if isinstance(node, (JsObjectExpression, JsArrayExpression, JsFunctionExpression)):
return True
if isinstance(node, JsIdentifier):
return bool(defunct) and node.name in defunct
if isinstance(node, JsMemberExpression) and node.object is not None:
return _is_safe_property_base(node.object, defunct)
return False
def is_side_effect_free(node: Node, defunct: set[str] | None = None) -> bool:
"""
Conservative check for whether an expression can be removed without observable side effects.
When *defunct* is provided, calls to identifiers in that set are treated as side-effect-free
(the function no longer exists in scope).
"""
if isinstance(node, (JsStringLiteral, JsNumericLiteral, JsBooleanLiteral, JsNullLiteral)):
return True
if isinstance(node, JsIdentifier):
return True
if isinstance(node, JsFunctionExpression):
return True
if isinstance(node, JsUnaryExpression):
if node.operator == 'delete':
return False
return node.operand is not None and is_side_effect_free(node.operand, defunct)
if isinstance(node, JsMemberExpression):
if node.object is None:
return False
if not is_side_effect_free(node.object, defunct):
return False
if node.property is not None and not is_side_effect_free(node.property, defunct):
return False
return _is_safe_property_base(node.object, defunct)
if isinstance(node, (JsBinaryExpression, JsLogicalExpression)):
return (
node.left is not None
and is_side_effect_free(node.left, defunct)
and node.right is not None
and is_side_effect_free(node.right, defunct)
)
if isinstance(node, JsConditionalExpression):
return (
node.test is not None
and is_side_effect_free(node.test, defunct)
and node.consequent is not None
and is_side_effect_free(node.consequent, defunct)
and node.alternate is not None
and is_side_effect_free(node.alternate, defunct)
)
if isinstance(node, JsObjectExpression):
for prop in node.properties:
if not isinstance(prop, JsProperty):
return False
if prop.value is not None and not is_side_effect_free(prop.value, defunct):
return False
return True
if isinstance(node, JsArrayExpression):
return all(
elem is None or is_side_effect_free(elem, defunct) for elem in node.elements
)
if isinstance(node, JsSequenceExpression):
return all(is_side_effect_free(e, defunct) for e in node.expressions)
if isinstance(node, JsCallExpression):
if defunct and isinstance(node.callee, JsIdentifier) and node.callee.name in defunct:
return all(is_side_effect_free(arg, defunct) for arg in node.arguments)
if isinstance(node.callee, JsFunctionExpression):
return all(is_side_effect_free(arg, defunct) for arg in node.arguments)
return False
def js_parse_int(s: str, radix: int = 10) -> int | None:
"""
Replicate the semantics of JavaScript's `parseInt(string, radix)`. Strips leading whitespace,
handles an optional `+`/`-` sign, and for radix 16 skips a leading `0x`/`0X` prefix. Parses
leading characters valid for the given radix (2-36) and stops at the first invalid one. Returns
`None` when no valid digits are found (JS would return `NaN`).
"""
if radix == 0:
radix = 10
if not (2 <= radix <= 36):
return None
s = s.strip()
if not s:
return None
sign = 1
if s[0] in '+-':
if s[0] == '-':
sign = -1
s = s[1:]
if radix == 16 and len(s) >= 2 and s[0] == '0' and s[1] in 'xX':
s = s[2:]
digits: list[str] = []
for ch in s:
if '0' <= ch <= '9':
if ord(ch) - ord('0') >= radix:
break
digits.append(ch)
elif 'a' <= ch <= 'z' or 'A' <= ch <= 'Z':
if ord(ch.lower()) - ord('a') + 10 >= radix:
break
digits.append(ch)
else:
break
if not digits:
return None
return sign * int(''.join(digits), radix)
def get_body(node: Node) -> list[Statement] | None:
"""
Return the statement body list of a node if it has one (JsScript or JsBlockStatement).
"""
if isinstance(node, (JsScript, JsBlockStatement)):
return node.body
return None
def remove_declarator(declarator: JsVariableDeclarator) -> None:
"""
Remove a `JsVariableDeclarator` from its parent `JsVariableDeclaration`. If the declaration
has no remaining declarators afterward, remove it from the body as well.
"""
var_decl = declarator.parent
_remove_from_parent(declarator)
if isinstance(var_decl, JsVariableDeclaration) and not var_decl.declarations:
_remove_from_parent(var_decl)
def extract_identifier_params(params: list) -> list[str] | None:
"""
Extract plain identifier names from a function's parameter list. Returns `None` if any parameter
is not a simple `JsIdentifier` (e.g. destructuring or rest patterns).
"""
names: list[str] = []
for p in params:
if not isinstance(p, JsIdentifier):
return None
names.append(p.name)
return names
def is_closed_expression(node: Node, allowed_names: set[str]) -> bool:
"""
Check whether every leaf in the expression tree is either a literal or an identifier whose
name is in *allowed_names*. This ensures the expression has no free variables.
"""
children = list(node.children())
if not children:
if isinstance(node, JsIdentifier):
return node.name in allowed_names
return is_simple_expression(node)
return all(is_closed_expression(child, allowed_names) for child in children)
def substitute_params(
expression: Node,
param_names: Sequence[str],
arguments: Sequence[Node],
) -> Node:
"""
Deep-clone *expression* and replace every `JsIdentifier` whose name appears in *param_names*
with a clone of the positionally corresponding node from *arguments*.
"""
cloned = _clone_node(expression)
mapping = dict(zip(param_names, arguments))
for node in list(cloned.walk()):
if isinstance(node, JsIdentifier) and node.name in mapping:
_replace_in_parent(node, _clone_node(mapping[node.name]))
return cloned
def try_inline_trivial_function(
func: JsFunctionExpression,
call_args: list,
*,
relaxed: bool = False,
) -> Node | None:
"""
If *func* is a trivial wrapper (single return whose expression uses only the function's
parameters), substitute call-site arguments into a clone of the return expression. Returns the
inlined expression or `None` if the function is not a simple wrapper.
When *relaxed* is False (default), all arguments must be side-effect-free simple expressions.
When *relaxed* is True, only arguments used more than once in the return expression need to be
simple (prevents duplicating side effects while allowing complex single-use arguments).
"""
if func.body is None or not isinstance(func.body, JsBlockStatement):
return None
body = func.body.body
if len(body) != 1:
return None
stmt = body[0]
if not isinstance(stmt, JsReturnStatement) or stmt.argument is None:
return None
param_names = extract_identifier_params(func.params)
if param_names is None:
return None
if len(call_args) != len(param_names):
return None
expr = stmt.argument
if not is_closed_expression(expr, set(param_names)):
return None
if relaxed:
for i, name in enumerate(param_names):
uses = sum(1 for n in expr.walk() if isinstance(n, JsIdentifier) and n.name == name)
if uses > 1 and not is_simple_expression(call_args[i]):
return None
return substitute_params(expr, param_names, call_args)
def walk_scope(root: Node, *, include_root_body: bool = False) -> Iterator[Node]:
"""
Walk the AST under *root* without descending into nested function bodies. Function boundary
nodes are yielded (so their identifiers can be inspected) but their subtrees are suppressed.
Children are visited in source order.
When *include_root_body* is True and *root* is itself a function, its body IS traversed (only
inner functions are skipped). This is useful when *root* represents the scope being analyzed.
"""
stack: list[Node] = [root]
while stack:
node = stack.pop()
yield node
if isinstance(node, (JsFunctionDeclaration, JsFunctionExpression, JsArrowFunctionExpression)):
if not (include_root_body and node is root):
continue
children = list(node.children())
children.reverse()
for child in children:
stack.append(child)
def collect_identifier_names(node: Node) -> set[str]:
"""
Collect the names of all `JsIdentifier` nodes in the subtree rooted at *node*.
"""
return {n.name for n in node.walk() if isinstance(n, JsIdentifier)}
def find_enclosing_body(node: Node) -> list[Statement] | None:
"""
Walk up parent pointers from *node* to find the body list that directly contains it. Returns
the `body` attribute of the nearest `JsBlockStatement` or `JsScript` ancestor whose body
list includes *node* (or an ancestor of *node*).
"""
child = node
parent = node.parent
while parent is not None:
if isinstance(parent, (JsBlockStatement, JsScript)):
if child in parent.body:
return parent.body
child = parent
parent = parent.parent
return None
def _body_declares_var(body: list, name: str) -> bool:
"""
Check whether a function body's statement list contains a `var` declaration that includes a
declarator with the given *name*.
"""
for stmt in body:
if not isinstance(stmt, JsVariableDeclaration):
continue
if stmt.kind != JsVarKind.VAR:
continue
for decl in stmt.declarations:
if isinstance(decl, JsVariableDeclarator) and isinstance(decl.id, JsIdentifier):
if decl.id.name == name:
return True
return False
def function_binds_name(func: Node, name: str) -> bool:
"""
Check if a function creates a local binding for `name` (parameter, function name, or var
declaration anywhere in its body — excluding nested functions).
"""
if isinstance(func, JsFunctionDeclaration) and func.id is not None and func.id.name == name:
return True
for p in (getattr(func, 'params', None) or []):
if isinstance(p, JsIdentifier) and p.name == name:
return True
body = getattr(func, 'body', None)
if not isinstance(body, JsBlockStatement):
return False
stack: list[Node] = [body]
while stack:
node = stack.pop()
if isinstance(node, FUNCTION_NODE_TYPES):
continue
if isinstance(node, JsVariableDeclaration) and node.kind == JsVarKind.VAR:
for decl in node.declarations:
if isinstance(decl, JsVariableDeclarator) and isinstance(decl.id, JsIdentifier):
if decl.id.name == name:
return True
for child in node.children():
stack.append(child)
return False
def _is_shadowed(node: Node, name: str) -> bool:
"""
Walk up from *node* through all enclosing function boundaries and check whether any of them
shadows *name* via a `var` declaration or a function parameter.
This intentionally checks only function boundaries, NOT block/script-level declarations.
`has_remaining_references` relies on this: a script-level `var x = expr` must not be considered
"shadowed" at its own declaration site, or the function will incorrectly conclude no references
remain.
"""
parent = node.parent
while parent is not None:
if isinstance(parent, FUNCTION_NODE_TYPES):
for param in getattr(parent, 'params', ()):
if isinstance(param, JsIdentifier) and param.name == name:
return True
body = getattr(parent, 'body', None)
if isinstance(body, JsBlockStatement):
if _body_declares_var(body.body, name):
return True
parent = parent.parent
return False
def has_remaining_references(
root: Node,
name: str,
exclude: Node | None = None,
exclude_ids: set[int] | None = None,
check_shadowing: bool = False,
) -> bool:
"""
Check whether *name* is referenced anywhere in the subtree of *root*, excluding nodes that
belong to *exclude* (by identity) or whose `id()` is in *exclude_ids*. When *check_shadowing*
is True, identifiers inside function bodies that shadow *name* via `var`/param are skipped.
Bare hoisted declarations (`var NAME;` with no initializer) are never counted.
"""
if exclude is not None:
if exclude_ids is None:
exclude_ids = set()
exclude_ids = exclude_ids | {id(n) for n in exclude.walk()}
for node in root.walk():
if exclude_ids and id(node) in exclude_ids:
continue
parent = node.parent
if exclude_ids and parent is not None and id(parent) in exclude_ids:
continue
if not isinstance(node, JsIdentifier) or node.name != name:
continue
if (
isinstance(parent, JsVariableDeclarator)
and parent.id is node
and parent.init is None
):
continue
if check_shadowing and _is_shadowed(node, name):
continue
return True
return False
class BodyProcessingTransformer(Transformer):
"""
Intermediate base for JS deobfuscation transformers that process the statement list (body) of
`JsScript` and `JsBlockStatement` nodes after visiting children. Subclasses override
`_process_body`.
"""
def visit_JsScript(self, node: JsScript):
self.generic_visit(node)
self._process_body(node, node.body)
return None
def visit_JsBlockStatement(self, node: JsBlockStatement):
self.generic_visit(node)
self._process_body(node, node.body)
return None
def _process_body(self, parent: Node, body: list[Statement]) -> None:
raise NotImplementedError
def _replace_body(
self,
parent: Node,
body: list[Statement],
replacement: list[Statement],
) -> None:
"""
Replace the contents of *body* with *replacement*, fix parent pointers, and mark the
transformer as changed.
"""
body.clear()
body.extend(replacement)
for stmt in body:
stmt.parent = parent
self.mark_changed()
class ScopeProcessingTransformer(Transformer):
"""
Base for transforms that process at function-scope boundaries. Visits `JsScript` and each
function body (`JsFunctionDeclaration`, `JsFunctionExpression`, `JsArrowFunctionExpression`).
Subclasses may override either `_process_scope` or `_process_scope_body`.
"""
def visit_JsScript(self, node: JsScript):
self.generic_visit(node)
self._process_scope(node)
return None
def visit_JsFunctionDeclaration(self, node: JsFunctionDeclaration):
self.generic_visit(node)
if isinstance(node.body, JsBlockStatement):
self._process_scope(node.body)
return None
def visit_JsFunctionExpression(self, node: JsFunctionExpression):
self.generic_visit(node)
if isinstance(node.body, JsBlockStatement):
self._process_scope(node.body)
return None
def visit_JsArrowFunctionExpression(self, node: JsArrowFunctionExpression):
self.generic_visit(node)
if isinstance(node.body, JsBlockStatement):
self._process_scope(node.body)
return None
def _process_scope(self, scope: Node) -> None:
"""
Receives the raw scope node (`JsScript` or `JsBlockStatement`).
"""
body = get_body(scope)
if body is not None:
self._process_scope_body(scope, body)
def _process_scope_body(self, scope: Node, body: list) -> None:
"""
Receives the scope node and its `body` list. The `_process_scope` method extracts the body
and delegates here.
"""
raise NotImplementedError
class ScriptLevelTransformer(Transformer):
"""
Base for transforms that process the entire script manually rather than using the recursive
visitor. Subclasses override `_process_script`.
"""
def visit_JsScript(self, node: JsScript):
self._process_script(node)
return None
def generic_visit(self, node: Node):
pass
def _process_script(self, node: JsScript) -> None:
raise NotImplementedError
Functions
def eval_binary_op(op, left, right)-
Evaluate a JavaScript binary operator on two numeric operands. Returns the result value, or
Nonewhen the operator is unknown or the computation overflows/divides by zero. Handles arithmetic, bitwise, relational, equality, and the unsigned right shift>>>.Expand source code Browse git
def eval_binary_op(op: str, left: int | float, right: int | float) -> int | float | bool | None: """ Evaluate a JavaScript binary operator on two numeric operands. Returns the result value, or `None` when the operator is unknown or the computation overflows/divides by zero. Handles arithmetic, bitwise, relational, equality, and the unsigned right shift `>>>`. """ if op in ('===', '=='): return left == right if op in ('!==', '!='): return left != right rel = RELATIONAL_OPS.get(op) if rel is not None: return rel(left, right) if op == '>>>': a = int(left) & 0xFFFFFFFF b = int(right) & 0x1F return (a >> b) & 0xFFFFFFFF fn = BINARY_OPS.get(op) if fn is None: return None try: return fn(left, right) except (ZeroDivisionError, OverflowError, ValueError): return None def escape_js_string(value, quote="'")-
Escape a string for use in a JavaScript string literal. Returns the escaped body without surrounding quotes. Backslash is escaped first to avoid double-escaping. Control characters not covered by named escapes are emitted as
\xHH; surrogates as\uXXXX.Expand source code Browse git
def escape_js_string(value: str, quote: str = "'") -> str: """ Escape a string for use in a JavaScript string literal. Returns the escaped body without surrounding quotes. Backslash is escaped first to avoid double-escaping. Control characters not covered by named escapes are emitted as `\\xHH`; surrogates as `\\uXXXX`. """ def _residue(m: re.Match[str]): cp = ord(m.group()) if cp > 0xFF: return F'\\u{cp:04X}' return F'\\x{cp:02x}' value = value.replace('\\', r'\\') value = value.replace('\n', r'\n') value = value.replace('\r', r'\r') value = value.replace('\t', r'\t') value = value.replace('\0', r'\0') value = value.replace(quote, F'\\{quote}') return re.sub(r'[\x01-\x1f\ud800-\udfff]', _residue, value) def string_value(node)-
Expand source code Browse git
def string_value(node: Expression | None) -> str | None: if isinstance(node, JsStringLiteral): return node.value return None def property_key(prop)-
Extract the string key from a property node. Handles both string-literal keys and plain identifier keys. Returns
Nonefor computed keys.Expand source code Browse git
def property_key(prop: JsProperty) -> str | None: """ Extract the string key from a property node. Handles both string-literal keys and plain identifier keys. Returns `None` for computed keys. """ if prop.computed: return None if isinstance(prop.key, JsStringLiteral): return prop.key.value if isinstance(prop.key, JsIdentifier): return prop.key.name return None def access_key(node)-
Extract the string key from a member-access expression. Handles both computed (
obj['key']) and dot (obj.key) accesses.Expand source code Browse git
def access_key(node: JsMemberExpression) -> str | None: """ Extract the string key from a member-access expression. Handles both computed (`obj['key']`) and dot (`obj.key`) accesses. """ if node.computed: return string_value(node.property) if isinstance(node.property, JsIdentifier): return node.property.name return None def make_string_literal(value)-
Expand source code Browse git
def make_string_literal(value: str) -> JsStringLiteral: escaped = escape_js_string(value) raw = F"'{escaped}'" return JsStringLiteral(value=value, raw=raw) def numeric_value(node)-
Expand source code Browse git
def numeric_value(node: Expression) -> int | float | None: if isinstance(node, JsNumericLiteral): return node.value return None def make_numeric_literal(value)-
Expand source code Browse git
def make_numeric_literal(value: int | float) -> JsNumericLiteral: if isinstance(value, float): if value == 0.0 and str(value).startswith('-'): raw = '-0' elif value == int(value): raw = str(int(value)) else: raw = str(value) else: raw = str(value) return JsNumericLiteral(value=value, raw=raw) def extract_literal_value(node)-
Extract a Python value from a literal AST node. Returns
(True, value)on success or(False, None)when the node is not a recognized literal form. Handles string, numeric, boolean, null literals,void expr, negative numerics,!0/!1, and array expressions where all elements are themselves literals.Expand source code Browse git
def extract_literal_value(node: Node) -> tuple[bool, LiteralValue]: """ Extract a Python value from a literal AST node. Returns `(True, value)` on success or `(False, None)` when the node is not a recognized literal form. Handles string, numeric, boolean, null literals, `void expr`, negative numerics, `!0`/`!1`, and array expressions where all elements are themselves literals. """ if isinstance(node, JsStringLiteral): return True, node.value if isinstance(node, JsNumericLiteral): return True, node.value if isinstance(node, JsBooleanLiteral): return True, node.value if isinstance(node, JsNullLiteral): return True, None if isinstance(node, JsUnaryExpression): if node.operator == 'void' and isinstance(node.operand, (JsNumericLiteral, JsStringLiteral)): return True, None if node.operator == '-' and isinstance(node.operand, JsNumericLiteral): return True, -node.operand.value if node.operator == '+' and isinstance(node.operand, JsNumericLiteral): return True, node.operand.value if node.operator == '!' and isinstance(node.operand, JsNumericLiteral): return True, not bool(node.operand.value) if isinstance(node, JsArrayExpression): items: list[LiteralValue] = [] for el in node.elements: if el is None: return False, None ok, val = extract_literal_value(el) if not ok: return False, None items.append(val) return True, items return False, None def value_to_node(value)-
Convert a Python value to the corresponding AST literal node. Returns
Nonewhen the value type is not representable as a literal expression.Expand source code Browse git
def value_to_node(value: object) -> Expression | None: """ Convert a Python value to the corresponding AST literal node. Returns `None` when the value type is not representable as a literal expression. """ if isinstance(value, str): return make_string_literal(value) if isinstance(value, bool): return JsBooleanLiteral(value=value) if isinstance(value, int): if value < 0: return JsUnaryExpression(operator='-', operand=make_numeric_literal(-value)) return make_numeric_literal(value) if isinstance(value, float): if value != value: return JsIdentifier(name='NaN') if value == float('inf'): return JsIdentifier(name='Infinity') if value == float('-inf'): return JsUnaryExpression(operator='-', operand=JsIdentifier(name='Infinity')) if value < 0: return JsUnaryExpression(operator='-', operand=make_numeric_literal(-value)) return make_numeric_literal(value) if isinstance(value, list): elements: list[Expression | None] = [] for item in value: el = value_to_node(item) if el is None: return None elements.append(el) return JsArrayExpression(elements=elements) if value is None: return JsUnaryExpression( operator='void', operand=JsNumericLiteral(value=0, raw='0'), ) return None def is_literal(node)-
Expand source code Browse git
def is_literal(node: Node) -> bool: if isinstance(node, (JsStringLiteral, JsNumericLiteral, JsBooleanLiteral, JsNullLiteral)): return True if isinstance(node, JsUnaryExpression): if node.operator == 'void' and isinstance(node.operand, (JsNumericLiteral, JsStringLiteral)): return True if node.operator == '-' and isinstance(node.operand, JsNumericLiteral): return True return False def member_key(node)-
Flatten a chain of property accesses into a dot-separated key string. Handles both dot notation and computed access with string-literal keys. Returns
Noneif the chain contains a dynamic computed access that cannot be resolved to a static key.Expand source code Browse git
def member_key(node: JsMemberExpression) -> str | None: """ Flatten a chain of property accesses into a dot-separated key string. Handles both dot notation and computed access with string-literal keys. Returns `None` if the chain contains a dynamic computed access that cannot be resolved to a static key. """ parts: list[str] = [] cursor: Expression | None = node while isinstance(cursor, JsMemberExpression): key = access_key(cursor) if key is None: return None parts.append(key) cursor = cursor.object if not isinstance(cursor, JsIdentifier): return None parts.append(cursor.name) parts.reverse() return '.'.join(parts) def is_while_true(node)-
Check whether the while-loop condition is
true,!![], or!0— the forms the obfuscator uses for infinite loops.Expand source code Browse git
def is_while_true(node: JsWhileStatement) -> bool: """ Check whether the while-loop condition is `true`, `!![]`, or `!0` — the forms the obfuscator uses for infinite loops. """ test = node.test if isinstance(test, JsBooleanLiteral) and test.value is True: return True if not isinstance(test, JsUnaryExpression) or test.operator != '!': return False inner = test.operand if isinstance(inner, JsNumericLiteral) and inner.value == 0: return True if isinstance(inner, JsUnaryExpression) and inner.operator == '!': return True return False def is_valid_identifier(name)-
Expand source code Browse git
def is_valid_identifier(name: str) -> bool: return bool(SIMPLE_IDENTIFIER.match(name)) and name not in JS_RESERVED def is_simple_expression(node)-
Check whether a node is a side-effect-free leaf expression: a literal value, an identifier, or a unary operator applied to a literal (e.g.
-42).Expand source code Browse git
def is_simple_expression(node: Node) -> bool: """ Check whether a node is a side-effect-free leaf expression: a literal value, an identifier, or a unary operator applied to a literal (e.g. `-42`). """ if is_literal(node) or isinstance(node, JsIdentifier): return True if isinstance(node, JsUnaryExpression) and node.operand is not None: return is_literal(node.operand) return False def is_write_target(node)-
Return whether this identifier is a write target: the left-hand side of an assignment expression, or the iteration variable of a
for-in/for-ofstatement.Expand source code Browse git
def is_write_target(node: JsIdentifier) -> bool: """ Return whether this identifier is a write target: the left-hand side of an assignment expression, or the iteration variable of a `for-in` / `for-of` statement. """ p = node.parent if isinstance(p, JsAssignmentExpression) and p.left is node: return True if isinstance(p, (JsForInStatement, JsForOfStatement)) and p.left is node: return True return False def is_binding_site(node)-
Return whether this identifier is in a binding position (variable declarator id or function declaration name) rather than a reference/read position.
Expand source code Browse git
def is_binding_site(node: JsIdentifier) -> bool: """ Return whether this identifier is in a binding position (variable declarator id or function declaration name) rather than a reference/read position. """ p = node.parent if isinstance(p, JsVariableDeclarator) and p.id is node: return True if isinstance(p, JsFunctionDeclaration) and p.id is node: return True return False def is_reference(node)-
Return whether this identifier is in a true variable reference position: not a binding site, not a non-computed member property, and not a non-computed object-literal key.
Expand source code Browse git
def is_reference(node: JsIdentifier) -> bool: """ Return whether this identifier is in a true variable reference position: not a binding site, not a non-computed member property, and not a non-computed object-literal key. """ p = node.parent if p is None: return False if isinstance(p, JsVariableDeclarator) and p.id is node: return False if isinstance(p, JsFunctionDeclaration) and p.id is node: return False if isinstance(p, JsMemberExpression) and p.property is node and not p.computed: return False if isinstance(p, JsProperty) and p.key is node and not p.computed: return False return True def is_truthy(node)-
Return the JavaScript truthiness of a literal node, or
Nonewhen the value cannot be determined statically.Expand source code Browse git
def is_truthy(node: Node) -> bool | None: """ Return the JavaScript truthiness of a literal node, or `None` when the value cannot be determined statically. """ if isinstance(node, JsBooleanLiteral): return node.value if isinstance(node, JsNumericLiteral): # return correct value for NaN return (v := node.value) != 0 and v == v if isinstance(node, JsStringLiteral): return bool(node.value) if isinstance(node, JsNullLiteral): return False if isinstance(node, JsIdentifier) and node.name == 'undefined': return False if isinstance(node, JsArrayExpression): return True return None def is_statically_evaluable(node)-
Return whether the node can be evaluated to a known truthiness at transform time. This includes all literal types and the
undefinedidentifier.Expand source code Browse git
def is_statically_evaluable(node: Node) -> bool: """ Return whether the node can be evaluated to a known truthiness at transform time. This includes all literal types and the `undefined` identifier. """ return ( is_literal(node) or (isinstance(node, JsIdentifier) and node.name == 'undefined') or isinstance(node, JsArrayExpression) ) def is_nullish(node)-
Return whether the node is statically known to be
nullorundefined.Expand source code Browse git
def is_nullish(node: Node) -> bool: """ Return whether the node is statically known to be `null` or `undefined`. """ if isinstance(node, JsNullLiteral): return True if isinstance(node, JsIdentifier) and node.name == 'undefined': return True return False def is_side_effect_free(node, defunct=None)-
Conservative check for whether an expression can be removed without observable side effects. When defunct is provided, calls to identifiers in that set are treated as side-effect-free (the function no longer exists in scope).
Expand source code Browse git
def is_side_effect_free(node: Node, defunct: set[str] | None = None) -> bool: """ Conservative check for whether an expression can be removed without observable side effects. When *defunct* is provided, calls to identifiers in that set are treated as side-effect-free (the function no longer exists in scope). """ if isinstance(node, (JsStringLiteral, JsNumericLiteral, JsBooleanLiteral, JsNullLiteral)): return True if isinstance(node, JsIdentifier): return True if isinstance(node, JsFunctionExpression): return True if isinstance(node, JsUnaryExpression): if node.operator == 'delete': return False return node.operand is not None and is_side_effect_free(node.operand, defunct) if isinstance(node, JsMemberExpression): if node.object is None: return False if not is_side_effect_free(node.object, defunct): return False if node.property is not None and not is_side_effect_free(node.property, defunct): return False return _is_safe_property_base(node.object, defunct) if isinstance(node, (JsBinaryExpression, JsLogicalExpression)): return ( node.left is not None and is_side_effect_free(node.left, defunct) and node.right is not None and is_side_effect_free(node.right, defunct) ) if isinstance(node, JsConditionalExpression): return ( node.test is not None and is_side_effect_free(node.test, defunct) and node.consequent is not None and is_side_effect_free(node.consequent, defunct) and node.alternate is not None and is_side_effect_free(node.alternate, defunct) ) if isinstance(node, JsObjectExpression): for prop in node.properties: if not isinstance(prop, JsProperty): return False if prop.value is not None and not is_side_effect_free(prop.value, defunct): return False return True if isinstance(node, JsArrayExpression): return all( elem is None or is_side_effect_free(elem, defunct) for elem in node.elements ) if isinstance(node, JsSequenceExpression): return all(is_side_effect_free(e, defunct) for e in node.expressions) if isinstance(node, JsCallExpression): if defunct and isinstance(node.callee, JsIdentifier) and node.callee.name in defunct: return all(is_side_effect_free(arg, defunct) for arg in node.arguments) if isinstance(node.callee, JsFunctionExpression): return all(is_side_effect_free(arg, defunct) for arg in node.arguments) return False def js_parse_int(s, radix=10)-
Replicate the semantics of JavaScript's
parseInt(string, radix). Strips leading whitespace, handles an optional+/-sign, and for radix 16 skips a leading0x/0Xprefix. Parses leading characters valid for the given radix (2-36) and stops at the first invalid one. ReturnsNonewhen no valid digits are found (JS would returnNaN).Expand source code Browse git
def js_parse_int(s: str, radix: int = 10) -> int | None: """ Replicate the semantics of JavaScript's `parseInt(string, radix)`. Strips leading whitespace, handles an optional `+`/`-` sign, and for radix 16 skips a leading `0x`/`0X` prefix. Parses leading characters valid for the given radix (2-36) and stops at the first invalid one. Returns `None` when no valid digits are found (JS would return `NaN`). """ if radix == 0: radix = 10 if not (2 <= radix <= 36): return None s = s.strip() if not s: return None sign = 1 if s[0] in '+-': if s[0] == '-': sign = -1 s = s[1:] if radix == 16 and len(s) >= 2 and s[0] == '0' and s[1] in 'xX': s = s[2:] digits: list[str] = [] for ch in s: if '0' <= ch <= '9': if ord(ch) - ord('0') >= radix: break digits.append(ch) elif 'a' <= ch <= 'z' or 'A' <= ch <= 'Z': if ord(ch.lower()) - ord('a') + 10 >= radix: break digits.append(ch) else: break if not digits: return None return sign * int(''.join(digits), radix) def get_body(node)-
Return the statement body list of a node if it has one (JsScript or JsBlockStatement).
Expand source code Browse git
def get_body(node: Node) -> list[Statement] | None: """ Return the statement body list of a node if it has one (JsScript or JsBlockStatement). """ if isinstance(node, (JsScript, JsBlockStatement)): return node.body return None def remove_declarator(declarator)-
Remove a
JsVariableDeclaratorfrom its parentJsVariableDeclaration. If the declaration has no remaining declarators afterward, remove it from the body as well.Expand source code Browse git
def remove_declarator(declarator: JsVariableDeclarator) -> None: """ Remove a `JsVariableDeclarator` from its parent `JsVariableDeclaration`. If the declaration has no remaining declarators afterward, remove it from the body as well. """ var_decl = declarator.parent _remove_from_parent(declarator) if isinstance(var_decl, JsVariableDeclaration) and not var_decl.declarations: _remove_from_parent(var_decl) def extract_identifier_params(params)-
Extract plain identifier names from a function's parameter list. Returns
Noneif any parameter is not a simpleJsIdentifier(e.g. destructuring or rest patterns).Expand source code Browse git
def extract_identifier_params(params: list) -> list[str] | None: """ Extract plain identifier names from a function's parameter list. Returns `None` if any parameter is not a simple `JsIdentifier` (e.g. destructuring or rest patterns). """ names: list[str] = [] for p in params: if not isinstance(p, JsIdentifier): return None names.append(p.name) return names def is_closed_expression(node, allowed_names)-
Check whether every leaf in the expression tree is either a literal or an identifier whose name is in allowed_names. This ensures the expression has no free variables.
Expand source code Browse git
def is_closed_expression(node: Node, allowed_names: set[str]) -> bool: """ Check whether every leaf in the expression tree is either a literal or an identifier whose name is in *allowed_names*. This ensures the expression has no free variables. """ children = list(node.children()) if not children: if isinstance(node, JsIdentifier): return node.name in allowed_names return is_simple_expression(node) return all(is_closed_expression(child, allowed_names) for child in children) def substitute_params(expression, param_names, arguments)-
Deep-clone expression and replace every
JsIdentifierwhose name appears in param_names with a clone of the positionally corresponding node from arguments.Expand source code Browse git
def substitute_params( expression: Node, param_names: Sequence[str], arguments: Sequence[Node], ) -> Node: """ Deep-clone *expression* and replace every `JsIdentifier` whose name appears in *param_names* with a clone of the positionally corresponding node from *arguments*. """ cloned = _clone_node(expression) mapping = dict(zip(param_names, arguments)) for node in list(cloned.walk()): if isinstance(node, JsIdentifier) and node.name in mapping: _replace_in_parent(node, _clone_node(mapping[node.name])) return cloned def try_inline_trivial_function(func, call_args, *, relaxed=False)-
If func is a trivial wrapper (single return whose expression uses only the function's parameters), substitute call-site arguments into a clone of the return expression. Returns the inlined expression or
Noneif the function is not a simple wrapper.When relaxed is False (default), all arguments must be side-effect-free simple expressions. When relaxed is True, only arguments used more than once in the return expression need to be simple (prevents duplicating side effects while allowing complex single-use arguments).
Expand source code Browse git
def try_inline_trivial_function( func: JsFunctionExpression, call_args: list, *, relaxed: bool = False, ) -> Node | None: """ If *func* is a trivial wrapper (single return whose expression uses only the function's parameters), substitute call-site arguments into a clone of the return expression. Returns the inlined expression or `None` if the function is not a simple wrapper. When *relaxed* is False (default), all arguments must be side-effect-free simple expressions. When *relaxed* is True, only arguments used more than once in the return expression need to be simple (prevents duplicating side effects while allowing complex single-use arguments). """ if func.body is None or not isinstance(func.body, JsBlockStatement): return None body = func.body.body if len(body) != 1: return None stmt = body[0] if not isinstance(stmt, JsReturnStatement) or stmt.argument is None: return None param_names = extract_identifier_params(func.params) if param_names is None: return None if len(call_args) != len(param_names): return None expr = stmt.argument if not is_closed_expression(expr, set(param_names)): return None if relaxed: for i, name in enumerate(param_names): uses = sum(1 for n in expr.walk() if isinstance(n, JsIdentifier) and n.name == name) if uses > 1 and not is_simple_expression(call_args[i]): return None return substitute_params(expr, param_names, call_args) def walk_scope(root, *, include_root_body=False)-
Walk the AST under root without descending into nested function bodies. Function boundary nodes are yielded (so their identifiers can be inspected) but their subtrees are suppressed. Children are visited in source order.
When include_root_body is True and root is itself a function, its body IS traversed (only inner functions are skipped). This is useful when root represents the scope being analyzed.
Expand source code Browse git
def walk_scope(root: Node, *, include_root_body: bool = False) -> Iterator[Node]: """ Walk the AST under *root* without descending into nested function bodies. Function boundary nodes are yielded (so their identifiers can be inspected) but their subtrees are suppressed. Children are visited in source order. When *include_root_body* is True and *root* is itself a function, its body IS traversed (only inner functions are skipped). This is useful when *root* represents the scope being analyzed. """ stack: list[Node] = [root] while stack: node = stack.pop() yield node if isinstance(node, (JsFunctionDeclaration, JsFunctionExpression, JsArrowFunctionExpression)): if not (include_root_body and node is root): continue children = list(node.children()) children.reverse() for child in children: stack.append(child) def collect_identifier_names(node)-
Collect the names of all
JsIdentifiernodes in the subtree rooted at node.Expand source code Browse git
def collect_identifier_names(node: Node) -> set[str]: """ Collect the names of all `JsIdentifier` nodes in the subtree rooted at *node*. """ return {n.name for n in node.walk() if isinstance(n, JsIdentifier)} def find_enclosing_body(node)-
Walk up parent pointers from node to find the body list that directly contains it. Returns the
bodyattribute of the nearestJsBlockStatementorJsScriptancestor whose body list includes node (or an ancestor of node).Expand source code Browse git
def find_enclosing_body(node: Node) -> list[Statement] | None: """ Walk up parent pointers from *node* to find the body list that directly contains it. Returns the `body` attribute of the nearest `JsBlockStatement` or `JsScript` ancestor whose body list includes *node* (or an ancestor of *node*). """ child = node parent = node.parent while parent is not None: if isinstance(parent, (JsBlockStatement, JsScript)): if child in parent.body: return parent.body child = parent parent = parent.parent return None def function_binds_name(func, name)-
Check if a function creates a local binding for
name(parameter, function name, or var declaration anywhere in its body — excluding nested functions).Expand source code Browse git
def function_binds_name(func: Node, name: str) -> bool: """ Check if a function creates a local binding for `name` (parameter, function name, or var declaration anywhere in its body — excluding nested functions). """ if isinstance(func, JsFunctionDeclaration) and func.id is not None and func.id.name == name: return True for p in (getattr(func, 'params', None) or []): if isinstance(p, JsIdentifier) and p.name == name: return True body = getattr(func, 'body', None) if not isinstance(body, JsBlockStatement): return False stack: list[Node] = [body] while stack: node = stack.pop() if isinstance(node, FUNCTION_NODE_TYPES): continue if isinstance(node, JsVariableDeclaration) and node.kind == JsVarKind.VAR: for decl in node.declarations: if isinstance(decl, JsVariableDeclarator) and isinstance(decl.id, JsIdentifier): if decl.id.name == name: return True for child in node.children(): stack.append(child) return False def has_remaining_references(root, name, exclude=None, exclude_ids=None, check_shadowing=False)-
Check whether name is referenced anywhere in the subtree of root, excluding nodes that belong to exclude (by identity) or whose
id()is in exclude_ids. When check_shadowing is True, identifiers inside function bodies that shadow name viavar/param are skipped. Bare hoisted declarations (var NAME;with no initializer) are never counted.Expand source code Browse git
def has_remaining_references( root: Node, name: str, exclude: Node | None = None, exclude_ids: set[int] | None = None, check_shadowing: bool = False, ) -> bool: """ Check whether *name* is referenced anywhere in the subtree of *root*, excluding nodes that belong to *exclude* (by identity) or whose `id()` is in *exclude_ids*. When *check_shadowing* is True, identifiers inside function bodies that shadow *name* via `var`/param are skipped. Bare hoisted declarations (`var NAME;` with no initializer) are never counted. """ if exclude is not None: if exclude_ids is None: exclude_ids = set() exclude_ids = exclude_ids | {id(n) for n in exclude.walk()} for node in root.walk(): if exclude_ids and id(node) in exclude_ids: continue parent = node.parent if exclude_ids and parent is not None and id(parent) in exclude_ids: continue if not isinstance(node, JsIdentifier) or node.name != name: continue if ( isinstance(parent, JsVariableDeclarator) and parent.id is node and parent.init is None ): continue if check_shadowing and _is_shadowed(node, name): continue return True return False
Classes
class BodyProcessingTransformer-
Intermediate base for JS deobfuscation transformers that process the statement list (body) of
JsScriptandJsBlockStatementnodes after visiting children. Subclasses override_process_body.Expand source code Browse git
class BodyProcessingTransformer(Transformer): """ Intermediate base for JS deobfuscation transformers that process the statement list (body) of `JsScript` and `JsBlockStatement` nodes after visiting children. Subclasses override `_process_body`. """ def visit_JsScript(self, node: JsScript): self.generic_visit(node) self._process_body(node, node.body) return None def visit_JsBlockStatement(self, node: JsBlockStatement): self.generic_visit(node) self._process_body(node, node.body) return None def _process_body(self, parent: Node, body: list[Statement]) -> None: raise NotImplementedError def _replace_body( self, parent: Node, body: list[Statement], replacement: list[Statement], ) -> None: """ Replace the contents of *body* with *replacement*, fix parent pointers, and mark the transformer as changed. """ body.clear() body.extend(replacement) for stmt in body: stmt.parent = parent self.mark_changed()Ancestors
Subclasses
Methods
def visit_JsScript(self, node)-
Expand source code Browse git
def visit_JsScript(self, node: JsScript): self.generic_visit(node) self._process_body(node, node.body) return None def visit_JsBlockStatement(self, node)-
Expand source code Browse git
def visit_JsBlockStatement(self, node: JsBlockStatement): self.generic_visit(node) self._process_body(node, node.body) return None
class ScopeProcessingTransformer-
Base for transforms that process at function-scope boundaries. Visits
JsScriptand each function body (JsFunctionDeclaration,JsFunctionExpression,JsArrowFunctionExpression). Subclasses may override either_process_scopeor_process_scope_body.Expand source code Browse git
class ScopeProcessingTransformer(Transformer): """ Base for transforms that process at function-scope boundaries. Visits `JsScript` and each function body (`JsFunctionDeclaration`, `JsFunctionExpression`, `JsArrowFunctionExpression`). Subclasses may override either `_process_scope` or `_process_scope_body`. """ def visit_JsScript(self, node: JsScript): self.generic_visit(node) self._process_scope(node) return None def visit_JsFunctionDeclaration(self, node: JsFunctionDeclaration): self.generic_visit(node) if isinstance(node.body, JsBlockStatement): self._process_scope(node.body) return None def visit_JsFunctionExpression(self, node: JsFunctionExpression): self.generic_visit(node) if isinstance(node.body, JsBlockStatement): self._process_scope(node.body) return None def visit_JsArrowFunctionExpression(self, node: JsArrowFunctionExpression): self.generic_visit(node) if isinstance(node.body, JsBlockStatement): self._process_scope(node.body) return None def _process_scope(self, scope: Node) -> None: """ Receives the raw scope node (`JsScript` or `JsBlockStatement`). """ body = get_body(scope) if body is not None: self._process_scope_body(scope, body) def _process_scope_body(self, scope: Node, body: list) -> None: """ Receives the scope node and its `body` list. The `_process_scope` method extracts the body and delegates here. """ raise NotImplementedErrorAncestors
Subclasses
Methods
def visit_JsScript(self, node)-
Expand source code Browse git
def visit_JsScript(self, node: JsScript): self.generic_visit(node) self._process_scope(node) return None def visit_JsFunctionDeclaration(self, node)-
Expand source code Browse git
def visit_JsFunctionDeclaration(self, node: JsFunctionDeclaration): self.generic_visit(node) if isinstance(node.body, JsBlockStatement): self._process_scope(node.body) return None def visit_JsFunctionExpression(self, node)-
Expand source code Browse git
def visit_JsFunctionExpression(self, node: JsFunctionExpression): self.generic_visit(node) if isinstance(node.body, JsBlockStatement): self._process_scope(node.body) return None def visit_JsArrowFunctionExpression(self, node)-
Expand source code Browse git
def visit_JsArrowFunctionExpression(self, node: JsArrowFunctionExpression): self.generic_visit(node) if isinstance(node.body, JsBlockStatement): self._process_scope(node.body) return None
class ScriptLevelTransformer-
Base for transforms that process the entire script manually rather than using the recursive visitor. Subclasses override
_process_script.Expand source code Browse git
class ScriptLevelTransformer(Transformer): """ Base for transforms that process the entire script manually rather than using the recursive visitor. Subclasses override `_process_script`. """ def visit_JsScript(self, node: JsScript): self._process_script(node) return None def generic_visit(self, node: Node): pass def _process_script(self, node: JsScript) -> None: raise NotImplementedErrorAncestors
Subclasses
- JsRemoveReDoS
- JsAssignmentsAsFunctionArgs
- JsBase91StringDecoder
- JsFunctionEvaluator
- JsReflectionInlining
- JsRestArrayUnpacking
- JsStringArrayResolver
- JsArrayUnshuffle
- JsCallWrapperInliner
Methods
def visit_JsScript(self, node)-
Expand source code Browse git
def visit_JsScript(self, node: JsScript): self._process_script(node) return None def generic_visit(self, node)-
Expand source code Browse git
def generic_visit(self, node: Node): pass