Module refinery.units.scripting.deobfuscate
Expand source code Browse git
from __future__ import annotations
from typing import TYPE_CHECKING, Callable, NamedTuple
if TYPE_CHECKING:
from refinery.lib.scripts import Node
from refinery.lib.scripts.js.model import JsErrorNode
from refinery.lib.scripts.js.parser import JsParser
from refinery.lib.scripts.js.synth import JsSynthesizer
from refinery.lib.scripts.ps1.model import Ps1ErrorNode
from refinery.lib.scripts.ps1.parser import Ps1Parser
from refinery.lib.scripts.ps1.synth import Ps1Synthesizer
from refinery.lib.scripts.vba.model import VbaErrorNode
from refinery.lib.scripts.vba.parser import VbaParser
from refinery.lib.scripts.vba.synth import VbaSynthesizer
from refinery.lib.types import INF
from refinery.units.scripting import IterativeDeobfuscator
class _Backend(NamedTuple):
name: str
parser: type[JsParser] | type[Ps1Parser] | type[VbaParser]
deobfuscate: Callable[..., bool]
synthesizer: type[JsSynthesizer] | type[Ps1Synthesizer] | type[VbaSynthesizer]
error: type[JsErrorNode] | type[Ps1ErrorNode] | type[VbaErrorNode]
class defu(IterativeDeobfuscator):
"""
Universal script deobfuscator supporting JavaScript, PowerShell, and VBA.
Attempts to parse the input as JavaScript, PowerShell, and VBA, then selects the language
whose parser produces the fewest error nodes and applies the corresponding deobfuscation
pipeline. The deobfuscation is executed iteratively until the output does not change any
more; running the unit twice does not change the output.
"""
_backend: _Backend
@classmethod
def _backends(cls):
from refinery.lib.scripts.js.deobfuscation import deobfuscate as js_deobfuscate
from refinery.lib.scripts.js.model import JsErrorNode
from refinery.lib.scripts.js.parser import JsParser
from refinery.lib.scripts.js.synth import JsSynthesizer
yield _Backend('js', JsParser, js_deobfuscate, JsSynthesizer, JsErrorNode)
from refinery.lib.scripts.ps1.deobfuscation import deobfuscate as ps1_deobfuscate
from refinery.lib.scripts.ps1.model import Ps1ErrorNode
from refinery.lib.scripts.ps1.parser import Ps1Parser
from refinery.lib.scripts.ps1.synth import Ps1Synthesizer
yield _Backend('ps1', Ps1Parser, ps1_deobfuscate, Ps1Synthesizer, Ps1ErrorNode)
from refinery.lib.scripts.vba.deobfuscation import deobfuscate as vba_deobfuscate
from refinery.lib.scripts.vba.model import VbaErrorNode
from refinery.lib.scripts.vba.parser import VbaParser
from refinery.lib.scripts.vba.synth import VbaSynthesizer
yield _Backend('vba', VbaParser, vba_deobfuscate, VbaSynthesizer, VbaErrorNode)
def parse(self, data: str) -> Node:
best_ast: Node | None = None
best_errors = INF()
best_backend = None
for backend in self._backends():
try:
ast = backend.parser(data).parse()
errors = sum(
len(n.text) for n in ast.walk() if isinstance(n, backend.error))
except Exception:
continue
if errors < best_errors:
best_errors = errors
best_ast = ast
best_backend = backend
if errors == 0:
break
if best_backend is None or best_ast is None or best_errors * 2 > len(data):
raise ValueError('none of the available parsers was able to parse the input')
self._backend = best_backend
self.log_info(F'using {best_backend.name} with {best_errors / len(data) * 100:.2f}% errors')
return best_ast
def transform(self, ast: Node) -> bool:
return self._backend.deobfuscate(ast)
def synthesize(self, ast: Node) -> str:
return self._backend.synthesizer().convert(ast)
Classes
class defu (timeout=100)-
Universal script deobfuscator supporting JavaScript, PowerShell, and VBA.
Attempts to parse the input as JavaScript, PowerShell, and VBA, then selects the language whose parser produces the fewest error nodes and applies the corresponding deobfuscation pipeline. The deobfuscation is executed iteratively until the output does not change any more; running the unit twice does not change the output.
Expand source code Browse git
class defu(IterativeDeobfuscator): """ Universal script deobfuscator supporting JavaScript, PowerShell, and VBA. Attempts to parse the input as JavaScript, PowerShell, and VBA, then selects the language whose parser produces the fewest error nodes and applies the corresponding deobfuscation pipeline. The deobfuscation is executed iteratively until the output does not change any more; running the unit twice does not change the output. """ _backend: _Backend @classmethod def _backends(cls): from refinery.lib.scripts.js.deobfuscation import deobfuscate as js_deobfuscate from refinery.lib.scripts.js.model import JsErrorNode from refinery.lib.scripts.js.parser import JsParser from refinery.lib.scripts.js.synth import JsSynthesizer yield _Backend('js', JsParser, js_deobfuscate, JsSynthesizer, JsErrorNode) from refinery.lib.scripts.ps1.deobfuscation import deobfuscate as ps1_deobfuscate from refinery.lib.scripts.ps1.model import Ps1ErrorNode from refinery.lib.scripts.ps1.parser import Ps1Parser from refinery.lib.scripts.ps1.synth import Ps1Synthesizer yield _Backend('ps1', Ps1Parser, ps1_deobfuscate, Ps1Synthesizer, Ps1ErrorNode) from refinery.lib.scripts.vba.deobfuscation import deobfuscate as vba_deobfuscate from refinery.lib.scripts.vba.model import VbaErrorNode from refinery.lib.scripts.vba.parser import VbaParser from refinery.lib.scripts.vba.synth import VbaSynthesizer yield _Backend('vba', VbaParser, vba_deobfuscate, VbaSynthesizer, VbaErrorNode) def parse(self, data: str) -> Node: best_ast: Node | None = None best_errors = INF() best_backend = None for backend in self._backends(): try: ast = backend.parser(data).parse() errors = sum( len(n.text) for n in ast.walk() if isinstance(n, backend.error)) except Exception: continue if errors < best_errors: best_errors = errors best_ast = ast best_backend = backend if errors == 0: break if best_backend is None or best_ast is None or best_errors * 2 > len(data): raise ValueError('none of the available parsers was able to parse the input') self._backend = best_backend self.log_info(F'using {best_backend.name} with {best_errors / len(data) * 100:.2f}% errors') return best_ast def transform(self, ast: Node) -> bool: return self._backend.deobfuscate(ast) def synthesize(self, ast: Node) -> str: return self._backend.synthesizer().convert(ast)Ancestors
Subclasses
Class variables
var reverse-
The type of the None singleton.
Methods
def parse(self, data)-
Expand source code Browse git
def parse(self, data: str) -> Node: best_ast: Node | None = None best_errors = INF() best_backend = None for backend in self._backends(): try: ast = backend.parser(data).parse() errors = sum( len(n.text) for n in ast.walk() if isinstance(n, backend.error)) except Exception: continue if errors < best_errors: best_errors = errors best_ast = ast best_backend = backend if errors == 0: break if best_backend is None or best_ast is None or best_errors * 2 > len(data): raise ValueError('none of the available parsers was able to parse the input') self._backend = best_backend self.log_info(F'using {best_backend.name} with {best_errors / len(data) * 100:.2f}% errors') return best_ast def transform(self, ast)-
Expand source code Browse git
def transform(self, ast: Node) -> bool: return self._backend.deobfuscate(ast) def synthesize(self, ast)-
Expand source code Browse git
def synthesize(self, ast: Node) -> str: return self._backend.synthesizer().convert(ast)
Inherited members