Module refinery.lib.thirdparty.batch_interpreter
Expand source code Browse git
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Based on source code from: https://github.com/DissectMalware/batch_deobfuscator
# Original License: MIT License
# Copyright (c) 2018 Malwrologist
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# NOTE: The code was refactored and the following modifications were made:
# - allow stripping used variable definitions
# - allow stripping echo commands
# - allow stripping comments
# - allow processing of strings rather than files
from __future__ import annotations
import re
import os
import io
from dataclasses import dataclass, field
from enum import Enum, IntFlag, auto
from typing import Dict, Generator, List, NamedTuple, Optional
from collections import defaultdict
class _T(str, Enum):
OPEN = '(' # noqa
CLOSE = ')' # noqa
ESCAPE = '^' # noqa
CONNECT = '&' # noqa
PIPE = '|' # noqa
QUOTE = '"' # noqa
V1 = '%' # noqa
V2 = '!' # noqa
SPACE = ' ' # noqa
class _S(int, Enum):
INIT = auto()
STRING = auto()
ESCAPE = auto()
VARIABLE_TYPE1 = auto()
VARIABLE_TYPE2 = auto()
class STRIP(IntFlag):
DEFINITION = 0b001 # noqa
ECHO = 0b010 # noqa
COMMENT = 0b100 # noqa
ALL = 0b111 # noqa
NONE = 0b000 # noqa
@dataclass
class ScriptVariable:
definitions: Dict[int, str] = field(default_factory=dict)
evaluations: List[int] = field(default_factory=list)
@property
def value(self):
latest = max(self.definitions)
return self.definitions[latest]
class DeobfuscatedLine(NamedTuple):
depth: int
value: str
class BatchDeobfuscator:
variables: Dict[str, ScriptVariable]
pending_subcommand: Optional[str]
def __init__(self):
if os.name == 'nt':
variables = {name.lower(): value for name, value in os.environ.items()}
else:
variables = {
'allusersprofile' : 'C:\\ProgramData',
'appdata' : 'C:\\Users\\puncher\\AppData\\Roaming',
'commonprogramfiles' : 'C:\\Program Files\\Common Files',
'commonprogramfiles(x86)"' : 'C:\\Program Files (x86)\\Common Files',
'commonprogramw6432' : 'C:\\Program Files\\Common Files',
'computername' : 'MISCREANTTEARS',
'comspec' : 'C:\\WINDOWS\\system32\\cmd.exe',
'driverdata' : 'C:\\Windows\\System32\\Drivers\\DriverData',
'fps_browser_app_profile_string' : 'Internet Explorer',
'fps_browser_user_profile_string' : 'Default',
'homedrive' : 'C:',
'homepath' : '\\Users\\puncher',
'java_home' : 'C:\\Program Files\\Amazon Corretto\\jdk11.0.7_10',
'localappdata' : 'C:\\Users\\puncher\\AppData\\Local',
'logonserver' : '\\\\MISCREANTTEARS',
'number_of_processors' : '4',
'onedrive' : 'C:\\Users\\puncher\\OneDrive',
'os' : 'Windows_NT',
'path' : 'C:\\Program Files\\Amazon Corretto\\jdk11.0.7_10\\bin;C:\\WINDOWS\\system32;'
'C:\\WINDOWS;C:\\WINDOWS\\System32\\Wbem;C:\\WINDOWS\\System32\\WindowsPowerS'
'hell\\v1.0\\;C:\\Program Files\\dotnet\\;C:\\Program Files\\Microsoft SQL Se'
'rver\\130\\Tools\\Binn\\;C:\\Users\\puncher\\AppData\\Local\\Microsoft\\Wind'
'owsApps;%USERPROFILE%\\AppData\\Local\\Microsoft\\WindowsApps;',
'pathext' : '.COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC',
'processor_architecture' : 'AMD64',
'processor_identifier' : 'Intel Core Ti-83 Family 6 Model 158 Stepping 10, GenuineIntel',
'processor_level' : '6',
'processor_revision' : '9e0a',
'programdata' : 'C:\\ProgramData',
'programfiles' : 'C:\\Program Files',
'programfiles(x86)"' : 'C:\\Program Files (x86)',
'programw6432' : 'C:\\Program Files',
'psmodulepath' : 'C:\\WINDOWS\\system32\\WindowsPowerShell\\v1.0\\Modules\\',
'public' : 'C:\\Users\\Public',
'sessionname' : 'Console',
'systemdrive' : 'C:',
'systemroot' : 'C:\\WINDOWS',
'temp' : 'C:\\Users\\puncher\\AppData\\Local\\Temp',
'tmp' : 'C:\\Users\\puncher\\AppData\\Local\\Temp',
'userdomain' : 'MISCREANTTEARS',
'userdomain_roamingprofile' : 'MISCREANTTEARS',
'username' : 'puncher',
'userprofile' : 'C:\\Users\\puncher',
'windir' : 'C:\\WINDOWS',
'__compat_layer' : 'DetectorsMessageBoxErrors',
}
self.pending_subcommand = None
self.variables = defaultdict(ScriptVariable)
for name, value in variables.items():
self.variables[name].definitions[-1] = value
def read_logical_lines(self, text: str):
logical_line = io.StringIO()
for line in text.splitlines(False):
if not line.endswith(_T.ESCAPE):
logical_line.write(line)
yield logical_line.getvalue()
logical_line.seek(0)
logical_line.truncate(0)
else:
logical_line.write(line)
logical_line.write('\n')
def commands(self, logical_line: str) -> Generator[str, None, None]:
state = _S.INIT
start = 0
for offset, token in enumerate(logical_line):
if state is _S.INIT:
if token == _T.QUOTE:
state = _S.STRING
elif token == _T.ESCAPE:
state = _S.ESCAPE
elif token == _T.CONNECT or token == _T.PIPE:
yield logical_line[start:offset].strip()
start = offset + 1
elif state is _S.STRING:
if token == _T.QUOTE:
state = state.INIT
elif state is _S.ESCAPE:
state = _S.INIT
last_command = logical_line[start:].strip()
if last_command:
yield last_command
def evaluate_variable(self, lno, variable):
pattern = (
r'''(?P<delim>%|!)\s*(?P<name>[\w#$'()*+,-.?@\[\]`{}~ ]+)'''
r'''(:~\s*(?P<index>[+-]?\d+)\s*,\s*(?P<length>[+-]?\d+)\s*)?(?P=delim)''')
match = re.fullmatch(pattern, variable)
if match is None:
return variable
var = self.variables.get(match.group('name').lower())
if var is None:
return variable
value = var.value
var.evaluations.append(lno)
if match.group('index'):
index = int(match.group('index'))
length = int(match.group('length'))
if length >= 0:
value = value[index : length + index]
else:
value = value[index : length]
return value
return value
def interpret(self, lno: int, command: str) -> None:
command = command.strip()
index = 0
last = len(command) - 1
while index < last and (command[index] == _T.SPACE or command[index] == _T.OPEN):
if command[index] == _T.OPEN:
while last > index and (command[last] == _T.SPACE or command[last] == _T.CLOSE):
if command[last] == _T.CLOSE:
last -= 1
break
last -= 1
index += 1
command = command[index : last + 1]
if command.lower().startswith('cmd'):
pattern = r"\s*(call)?cmd(.exe)?\s*((\/A|\/U|\/Q|\/D)\s+|((\/E|\/F|\/V):(ON|OFF))\s*)*(\/c|\/r)\s*(?P<cmd>.*)"
match = re.search(pattern, command, re.IGNORECASE)
if match and match.group('cmd'):
cmd = match.group('cmd').strip(_T.QUOTE)
self.pending_subcommand = cmd
else:
pattern = (
r"(\s*(call)?\s*set\s+\"?(?P<var>[\w#$'()*+,-.?@\[\]`{}~ ]+)=\s*(?P<val>[^\"\n]*)\"?)|"
r"(\s*(call)?\s*set\s+/p\s+\"?(?P<input>[\w#$'()*+,-.?@\[\]`{}~ ]+)=[^\"\n]*\"?)"
)
match = re.search(pattern, command, re.IGNORECASE)
if match is None:
return
var = match.group('input')
if var is not None:
var = var.lower()
val = '__input__'
else:
var = match.group('var').lower()
val = match.group('val')
self.variables[var].definitions[lno] = val
def normalize(self, lno: int, command: str):
result = ''
state = _S.INIT
stack = []
for token in command:
if state == _S.INIT:
if token == _T.QUOTE:
state = _S.STRING
result += token
elif token in ',;\t':
# commas (",") are replaced by spaces, unless they are part of a string in doublequotes
# semicolons (";") are replaced by spaces, unless they are part of a string in doublequotes
# tabs are replaced by a single space
# http://www.robvanderwoude.com/parameters.php
result += _T.SPACE
elif token == _T.ESCAPE:
state = _S.ESCAPE
stack.append(_S.INIT)
elif token == _T.V1:
variable_start = len(result)
result += _T.V1
stack.append(_S.INIT)
state = _S.VARIABLE_TYPE1
elif token == _T.V2:
variable_start = len(result)
result += _T.V1
stack.append(_S.INIT)
state = _S.VARIABLE_TYPE2
else:
result += token
elif state == _S.STRING:
if token == _T.QUOTE:
state = _S.INIT
result += token
elif token == _T.V1:
variable_start = len(result)
result += _T.V1
stack.append(_S.STRING)
state = _S.VARIABLE_TYPE1
elif token == _T.V2:
variable_start = len(result)
result += _T.V1
stack.append(_S.STRING)
state = _S.VARIABLE_TYPE2
elif token == _T.ESCAPE:
state = _S.ESCAPE
stack.append(_S.STRING)
else:
result += token
elif state == _S.VARIABLE_TYPE1:
if token.isdigit() and result[-1] == _T.V1:
result += token
state = stack.pop()
elif token == _T.V1 and result[-1] != _T.V1:
result += _T.V1
value = self.evaluate_variable(lno, result[variable_start:].lower())
result = result[:variable_start]
result += value
state = stack.pop()
elif token == _T.V1:
variable_start = len(result)
result += token
elif token == _T.QUOTE:
if stack[-1] == _S.STRING:
result += token
stack.pop()
state = _S.INIT
else:
result += token
elif token == _T.ESCAPE:
state = _S.ESCAPE
stack.append(_S.VARIABLE_TYPE1)
else:
result += token
elif state == _S.VARIABLE_TYPE2:
if token == _T.V2 and result[-1] != _T.V1:
result += _T.V1
value = self.evaluate_variable(lno, result[variable_start:].lower())
result = result[:variable_start]
result += value
state = stack.pop()
elif token == _T.V2:
variable_start = len(result)
result += token
elif token == _T.QUOTE:
if stack[-1] == _S.STRING:
result += token
stack.pop()
state = _S.INIT
else:
result += token
elif token == _T.ESCAPE:
state = _S.ESCAPE
stack.append(_S.VARIABLE_TYPE1)
else:
result += token
elif state == _S.ESCAPE:
result += token
state = stack.pop()
return result.strip()
def _interpret(self, text: str, lno: int = 0, depth: int = 0) -> Generator[DeobfuscatedLine, None, None]:
for line in self.read_logical_lines(text):
for command in self.commands(line):
normalized = self.normalize(lno, command)
self.interpret(lno, normalized)
yield DeobfuscatedLine(depth, normalized)
lno += 1
if self.pending_subcommand is not None:
subcommand = BatchDeobfuscator()
for name, var in self.variables.items():
subcommand.variables[name].definitions[-1] = var.value
for result in subcommand._interpret(self.pending_subcommand, lno, depth + 1):
yield result
lno += 1
self.pending_subcommand = None
def deobfuscate(
self,
text: str,
mode: STRIP = STRIP.NONE
) -> str:
lines = list(self._interpret(text))
used = set()
if mode & STRIP.DEFINITION:
for variable in self.variables.values():
if variable.evaluations:
used.update(variable.definitions.keys())
def tab(depth):
return ' ' * 3 * depth
def output():
depth = 0
for lno, line in enumerate(lines):
if lno in used:
continue
if not line.value:
continue
cmd = line.value.split()[0].lower()
cmd, _, _ = cmd.partition('/')
if mode & STRIP.ECHO and cmd == 'echo':
continue
if mode & STRIP.COMMENT and (line.value.startswith('::') or cmd == 'rem'):
continue
if line.depth > depth:
yield F'{tab(depth)}:: SUBCOMMAND'
depth = line.depth
yield F'{tab(depth)}{line.value}'
return '\n'.join(output())
Classes
class STRIP (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Expand source code Browse git
class STRIP(IntFlag): DEFINITION = 0b001 # noqa ECHO = 0b010 # noqa COMMENT = 0b100 # noqa ALL = 0b111 # noqa NONE = 0b000 # noqa
Ancestors
- enum.IntFlag
- builtins.int
- enum.Flag
- enum.Enum
Class variables
var DEFINITION
var ECHO
var COMMENT
var ALL
var NONE
class ScriptVariable (definitions=<factory>, evaluations=<factory>)
-
ScriptVariable(definitions: 'Dict[int, str]' =
, evaluations: 'List[int]' = ) Expand source code Browse git
class ScriptVariable: definitions: Dict[int, str] = field(default_factory=dict) evaluations: List[int] = field(default_factory=list) @property def value(self): latest = max(self.definitions) return self.definitions[latest]
Class variables
var definitions
var evaluations
Instance variables
var value
-
Expand source code Browse git
@property def value(self): latest = max(self.definitions) return self.definitions[latest]
class DeobfuscatedLine (depth, value)
-
DeobfuscatedLine(depth, value)
Expand source code Browse git
class DeobfuscatedLine(NamedTuple): depth: int value: str
Ancestors
- builtins.tuple
Instance variables
var depth
-
Alias for field number 0
var value
-
Alias for field number 1
class BatchDeobfuscator
-
Expand source code Browse git
class BatchDeobfuscator: variables: Dict[str, ScriptVariable] pending_subcommand: Optional[str] def __init__(self): if os.name == 'nt': variables = {name.lower(): value for name, value in os.environ.items()} else: variables = { 'allusersprofile' : 'C:\\ProgramData', 'appdata' : 'C:\\Users\\puncher\\AppData\\Roaming', 'commonprogramfiles' : 'C:\\Program Files\\Common Files', 'commonprogramfiles(x86)"' : 'C:\\Program Files (x86)\\Common Files', 'commonprogramw6432' : 'C:\\Program Files\\Common Files', 'computername' : 'MISCREANTTEARS', 'comspec' : 'C:\\WINDOWS\\system32\\cmd.exe', 'driverdata' : 'C:\\Windows\\System32\\Drivers\\DriverData', 'fps_browser_app_profile_string' : 'Internet Explorer', 'fps_browser_user_profile_string' : 'Default', 'homedrive' : 'C:', 'homepath' : '\\Users\\puncher', 'java_home' : 'C:\\Program Files\\Amazon Corretto\\jdk11.0.7_10', 'localappdata' : 'C:\\Users\\puncher\\AppData\\Local', 'logonserver' : '\\\\MISCREANTTEARS', 'number_of_processors' : '4', 'onedrive' : 'C:\\Users\\puncher\\OneDrive', 'os' : 'Windows_NT', 'path' : 'C:\\Program Files\\Amazon Corretto\\jdk11.0.7_10\\bin;C:\\WINDOWS\\system32;' 'C:\\WINDOWS;C:\\WINDOWS\\System32\\Wbem;C:\\WINDOWS\\System32\\WindowsPowerS' 'hell\\v1.0\\;C:\\Program Files\\dotnet\\;C:\\Program Files\\Microsoft SQL Se' 'rver\\130\\Tools\\Binn\\;C:\\Users\\puncher\\AppData\\Local\\Microsoft\\Wind' 'owsApps;%USERPROFILE%\\AppData\\Local\\Microsoft\\WindowsApps;', 'pathext' : '.COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC', 'processor_architecture' : 'AMD64', 'processor_identifier' : 'Intel Core Ti-83 Family 6 Model 158 Stepping 10, GenuineIntel', 'processor_level' : '6', 'processor_revision' : '9e0a', 'programdata' : 'C:\\ProgramData', 'programfiles' : 'C:\\Program Files', 'programfiles(x86)"' : 'C:\\Program Files (x86)', 'programw6432' : 'C:\\Program Files', 'psmodulepath' : 'C:\\WINDOWS\\system32\\WindowsPowerShell\\v1.0\\Modules\\', 'public' : 'C:\\Users\\Public', 'sessionname' : 'Console', 'systemdrive' : 'C:', 'systemroot' : 'C:\\WINDOWS', 'temp' : 'C:\\Users\\puncher\\AppData\\Local\\Temp', 'tmp' : 'C:\\Users\\puncher\\AppData\\Local\\Temp', 'userdomain' : 'MISCREANTTEARS', 'userdomain_roamingprofile' : 'MISCREANTTEARS', 'username' : 'puncher', 'userprofile' : 'C:\\Users\\puncher', 'windir' : 'C:\\WINDOWS', '__compat_layer' : 'DetectorsMessageBoxErrors', } self.pending_subcommand = None self.variables = defaultdict(ScriptVariable) for name, value in variables.items(): self.variables[name].definitions[-1] = value def read_logical_lines(self, text: str): logical_line = io.StringIO() for line in text.splitlines(False): if not line.endswith(_T.ESCAPE): logical_line.write(line) yield logical_line.getvalue() logical_line.seek(0) logical_line.truncate(0) else: logical_line.write(line) logical_line.write('\n') def commands(self, logical_line: str) -> Generator[str, None, None]: state = _S.INIT start = 0 for offset, token in enumerate(logical_line): if state is _S.INIT: if token == _T.QUOTE: state = _S.STRING elif token == _T.ESCAPE: state = _S.ESCAPE elif token == _T.CONNECT or token == _T.PIPE: yield logical_line[start:offset].strip() start = offset + 1 elif state is _S.STRING: if token == _T.QUOTE: state = state.INIT elif state is _S.ESCAPE: state = _S.INIT last_command = logical_line[start:].strip() if last_command: yield last_command def evaluate_variable(self, lno, variable): pattern = ( r'''(?P<delim>%|!)\s*(?P<name>[\w#$'()*+,-.?@\[\]`{}~ ]+)''' r'''(:~\s*(?P<index>[+-]?\d+)\s*,\s*(?P<length>[+-]?\d+)\s*)?(?P=delim)''') match = re.fullmatch(pattern, variable) if match is None: return variable var = self.variables.get(match.group('name').lower()) if var is None: return variable value = var.value var.evaluations.append(lno) if match.group('index'): index = int(match.group('index')) length = int(match.group('length')) if length >= 0: value = value[index : length + index] else: value = value[index : length] return value return value def interpret(self, lno: int, command: str) -> None: command = command.strip() index = 0 last = len(command) - 1 while index < last and (command[index] == _T.SPACE or command[index] == _T.OPEN): if command[index] == _T.OPEN: while last > index and (command[last] == _T.SPACE or command[last] == _T.CLOSE): if command[last] == _T.CLOSE: last -= 1 break last -= 1 index += 1 command = command[index : last + 1] if command.lower().startswith('cmd'): pattern = r"\s*(call)?cmd(.exe)?\s*((\/A|\/U|\/Q|\/D)\s+|((\/E|\/F|\/V):(ON|OFF))\s*)*(\/c|\/r)\s*(?P<cmd>.*)" match = re.search(pattern, command, re.IGNORECASE) if match and match.group('cmd'): cmd = match.group('cmd').strip(_T.QUOTE) self.pending_subcommand = cmd else: pattern = ( r"(\s*(call)?\s*set\s+\"?(?P<var>[\w#$'()*+,-.?@\[\]`{}~ ]+)=\s*(?P<val>[^\"\n]*)\"?)|" r"(\s*(call)?\s*set\s+/p\s+\"?(?P<input>[\w#$'()*+,-.?@\[\]`{}~ ]+)=[^\"\n]*\"?)" ) match = re.search(pattern, command, re.IGNORECASE) if match is None: return var = match.group('input') if var is not None: var = var.lower() val = '__input__' else: var = match.group('var').lower() val = match.group('val') self.variables[var].definitions[lno] = val def normalize(self, lno: int, command: str): result = '' state = _S.INIT stack = [] for token in command: if state == _S.INIT: if token == _T.QUOTE: state = _S.STRING result += token elif token in ',;\t': # commas (",") are replaced by spaces, unless they are part of a string in doublequotes # semicolons (";") are replaced by spaces, unless they are part of a string in doublequotes # tabs are replaced by a single space # http://www.robvanderwoude.com/parameters.php result += _T.SPACE elif token == _T.ESCAPE: state = _S.ESCAPE stack.append(_S.INIT) elif token == _T.V1: variable_start = len(result) result += _T.V1 stack.append(_S.INIT) state = _S.VARIABLE_TYPE1 elif token == _T.V2: variable_start = len(result) result += _T.V1 stack.append(_S.INIT) state = _S.VARIABLE_TYPE2 else: result += token elif state == _S.STRING: if token == _T.QUOTE: state = _S.INIT result += token elif token == _T.V1: variable_start = len(result) result += _T.V1 stack.append(_S.STRING) state = _S.VARIABLE_TYPE1 elif token == _T.V2: variable_start = len(result) result += _T.V1 stack.append(_S.STRING) state = _S.VARIABLE_TYPE2 elif token == _T.ESCAPE: state = _S.ESCAPE stack.append(_S.STRING) else: result += token elif state == _S.VARIABLE_TYPE1: if token.isdigit() and result[-1] == _T.V1: result += token state = stack.pop() elif token == _T.V1 and result[-1] != _T.V1: result += _T.V1 value = self.evaluate_variable(lno, result[variable_start:].lower()) result = result[:variable_start] result += value state = stack.pop() elif token == _T.V1: variable_start = len(result) result += token elif token == _T.QUOTE: if stack[-1] == _S.STRING: result += token stack.pop() state = _S.INIT else: result += token elif token == _T.ESCAPE: state = _S.ESCAPE stack.append(_S.VARIABLE_TYPE1) else: result += token elif state == _S.VARIABLE_TYPE2: if token == _T.V2 and result[-1] != _T.V1: result += _T.V1 value = self.evaluate_variable(lno, result[variable_start:].lower()) result = result[:variable_start] result += value state = stack.pop() elif token == _T.V2: variable_start = len(result) result += token elif token == _T.QUOTE: if stack[-1] == _S.STRING: result += token stack.pop() state = _S.INIT else: result += token elif token == _T.ESCAPE: state = _S.ESCAPE stack.append(_S.VARIABLE_TYPE1) else: result += token elif state == _S.ESCAPE: result += token state = stack.pop() return result.strip() def _interpret(self, text: str, lno: int = 0, depth: int = 0) -> Generator[DeobfuscatedLine, None, None]: for line in self.read_logical_lines(text): for command in self.commands(line): normalized = self.normalize(lno, command) self.interpret(lno, normalized) yield DeobfuscatedLine(depth, normalized) lno += 1 if self.pending_subcommand is not None: subcommand = BatchDeobfuscator() for name, var in self.variables.items(): subcommand.variables[name].definitions[-1] = var.value for result in subcommand._interpret(self.pending_subcommand, lno, depth + 1): yield result lno += 1 self.pending_subcommand = None def deobfuscate( self, text: str, mode: STRIP = STRIP.NONE ) -> str: lines = list(self._interpret(text)) used = set() if mode & STRIP.DEFINITION: for variable in self.variables.values(): if variable.evaluations: used.update(variable.definitions.keys()) def tab(depth): return ' ' * 3 * depth def output(): depth = 0 for lno, line in enumerate(lines): if lno in used: continue if not line.value: continue cmd = line.value.split()[0].lower() cmd, _, _ = cmd.partition('/') if mode & STRIP.ECHO and cmd == 'echo': continue if mode & STRIP.COMMENT and (line.value.startswith('::') or cmd == 'rem'): continue if line.depth > depth: yield F'{tab(depth)}:: SUBCOMMAND' depth = line.depth yield F'{tab(depth)}{line.value}' return '\n'.join(output())
Class variables
var variables
var pending_subcommand
Methods
def read_logical_lines(self, text)
-
Expand source code Browse git
def read_logical_lines(self, text: str): logical_line = io.StringIO() for line in text.splitlines(False): if not line.endswith(_T.ESCAPE): logical_line.write(line) yield logical_line.getvalue() logical_line.seek(0) logical_line.truncate(0) else: logical_line.write(line) logical_line.write('\n')
def commands(self, logical_line)
-
Expand source code Browse git
def commands(self, logical_line: str) -> Generator[str, None, None]: state = _S.INIT start = 0 for offset, token in enumerate(logical_line): if state is _S.INIT: if token == _T.QUOTE: state = _S.STRING elif token == _T.ESCAPE: state = _S.ESCAPE elif token == _T.CONNECT or token == _T.PIPE: yield logical_line[start:offset].strip() start = offset + 1 elif state is _S.STRING: if token == _T.QUOTE: state = state.INIT elif state is _S.ESCAPE: state = _S.INIT last_command = logical_line[start:].strip() if last_command: yield last_command
def evaluate_variable(self, lno, variable)
-
Expand source code Browse git
def evaluate_variable(self, lno, variable): pattern = ( r'''(?P<delim>%|!)\s*(?P<name>[\w#$'()*+,-.?@\[\]`{}~ ]+)''' r'''(:~\s*(?P<index>[+-]?\d+)\s*,\s*(?P<length>[+-]?\d+)\s*)?(?P=delim)''') match = re.fullmatch(pattern, variable) if match is None: return variable var = self.variables.get(match.group('name').lower()) if var is None: return variable value = var.value var.evaluations.append(lno) if match.group('index'): index = int(match.group('index')) length = int(match.group('length')) if length >= 0: value = value[index : length + index] else: value = value[index : length] return value return value
def interpret(self, lno, command)
-
Expand source code Browse git
def interpret(self, lno: int, command: str) -> None: command = command.strip() index = 0 last = len(command) - 1 while index < last and (command[index] == _T.SPACE or command[index] == _T.OPEN): if command[index] == _T.OPEN: while last > index and (command[last] == _T.SPACE or command[last] == _T.CLOSE): if command[last] == _T.CLOSE: last -= 1 break last -= 1 index += 1 command = command[index : last + 1] if command.lower().startswith('cmd'): pattern = r"\s*(call)?cmd(.exe)?\s*((\/A|\/U|\/Q|\/D)\s+|((\/E|\/F|\/V):(ON|OFF))\s*)*(\/c|\/r)\s*(?P<cmd>.*)" match = re.search(pattern, command, re.IGNORECASE) if match and match.group('cmd'): cmd = match.group('cmd').strip(_T.QUOTE) self.pending_subcommand = cmd else: pattern = ( r"(\s*(call)?\s*set\s+\"?(?P<var>[\w#$'()*+,-.?@\[\]`{}~ ]+)=\s*(?P<val>[^\"\n]*)\"?)|" r"(\s*(call)?\s*set\s+/p\s+\"?(?P<input>[\w#$'()*+,-.?@\[\]`{}~ ]+)=[^\"\n]*\"?)" ) match = re.search(pattern, command, re.IGNORECASE) if match is None: return var = match.group('input') if var is not None: var = var.lower() val = '__input__' else: var = match.group('var').lower() val = match.group('val') self.variables[var].definitions[lno] = val
def normalize(self, lno, command)
-
Expand source code Browse git
def normalize(self, lno: int, command: str): result = '' state = _S.INIT stack = [] for token in command: if state == _S.INIT: if token == _T.QUOTE: state = _S.STRING result += token elif token in ',;\t': # commas (",") are replaced by spaces, unless they are part of a string in doublequotes # semicolons (";") are replaced by spaces, unless they are part of a string in doublequotes # tabs are replaced by a single space # http://www.robvanderwoude.com/parameters.php result += _T.SPACE elif token == _T.ESCAPE: state = _S.ESCAPE stack.append(_S.INIT) elif token == _T.V1: variable_start = len(result) result += _T.V1 stack.append(_S.INIT) state = _S.VARIABLE_TYPE1 elif token == _T.V2: variable_start = len(result) result += _T.V1 stack.append(_S.INIT) state = _S.VARIABLE_TYPE2 else: result += token elif state == _S.STRING: if token == _T.QUOTE: state = _S.INIT result += token elif token == _T.V1: variable_start = len(result) result += _T.V1 stack.append(_S.STRING) state = _S.VARIABLE_TYPE1 elif token == _T.V2: variable_start = len(result) result += _T.V1 stack.append(_S.STRING) state = _S.VARIABLE_TYPE2 elif token == _T.ESCAPE: state = _S.ESCAPE stack.append(_S.STRING) else: result += token elif state == _S.VARIABLE_TYPE1: if token.isdigit() and result[-1] == _T.V1: result += token state = stack.pop() elif token == _T.V1 and result[-1] != _T.V1: result += _T.V1 value = self.evaluate_variable(lno, result[variable_start:].lower()) result = result[:variable_start] result += value state = stack.pop() elif token == _T.V1: variable_start = len(result) result += token elif token == _T.QUOTE: if stack[-1] == _S.STRING: result += token stack.pop() state = _S.INIT else: result += token elif token == _T.ESCAPE: state = _S.ESCAPE stack.append(_S.VARIABLE_TYPE1) else: result += token elif state == _S.VARIABLE_TYPE2: if token == _T.V2 and result[-1] != _T.V1: result += _T.V1 value = self.evaluate_variable(lno, result[variable_start:].lower()) result = result[:variable_start] result += value state = stack.pop() elif token == _T.V2: variable_start = len(result) result += token elif token == _T.QUOTE: if stack[-1] == _S.STRING: result += token stack.pop() state = _S.INIT else: result += token elif token == _T.ESCAPE: state = _S.ESCAPE stack.append(_S.VARIABLE_TYPE1) else: result += token elif state == _S.ESCAPE: result += token state = stack.pop() return result.strip()
def deobfuscate(self, text, mode=STRIP.NONE)
-
Expand source code Browse git
def deobfuscate( self, text: str, mode: STRIP = STRIP.NONE ) -> str: lines = list(self._interpret(text)) used = set() if mode & STRIP.DEFINITION: for variable in self.variables.values(): if variable.evaluations: used.update(variable.definitions.keys()) def tab(depth): return ' ' * 3 * depth def output(): depth = 0 for lno, line in enumerate(lines): if lno in used: continue if not line.value: continue cmd = line.value.split()[0].lower() cmd, _, _ = cmd.partition('/') if mode & STRIP.ECHO and cmd == 'echo': continue if mode & STRIP.COMMENT and (line.value.startswith('::') or cmd == 'rem'): continue if line.depth > depth: yield F'{tab(depth)}:: SUBCOMMAND' depth = line.depth yield F'{tab(depth)}{line.value}' return '\n'.join(output())