Module refinery.units.scripting.ps1arg
Expand source code Browse git
from __future__ import annotations
import base64
import codecs
import io
import os
import re
from refinery.units import Unit
def _split_argv(cmdline: str) -> list[str]:
"""
Split a command line into argv using MSVC CRT rules.
The algorithm follows the MSVC C runtime documentation:
- Whitespace outside quotes separates arguments.
- A double quote toggles quote state.
- Backslashes are literal unless immediately preceding a double quote.
- 2N backslashes before a quote produce N backslashes and the quote toggles state.
- 2N+1 backslashes before a quote produce N backslashes and a literal quote.
"""
args: list[str] = []
buf = io.StringIO()
i = 0
n = len(cmdline)
quoted = False
while i < n:
c = cmdline[i]
if c == '\\':
bs = 0
while i < n and cmdline[i] == '\\':
bs += 1
i += 1
if i < n and cmdline[i] == '"':
buf.write('\\' * (bs // 2))
if bs % 2 == 1:
buf.write('"')
else:
quoted = not quoted
i += 1
else:
buf.write('\\' * bs)
elif c == '"':
quoted = not quoted
i += 1
elif c in (' ', '\t') and not quoted:
if b := buf.getvalue():
args.append(b)
buf.seek(0)
buf.truncate(0)
i += 1
else:
buf.write(c)
i += 1
if b := buf.getvalue():
args.append(b)
return args
_PS_EXECUTABLE_NAMES = frozenset({
'powershell',
'powershell.exe',
'pwsh',
'pwsh.exe',
})
_PARAM_WITH_NEXT_ARG = {
'executionpolicy',
'ep',
'windowstyle',
'configurationname',
'custompipename',
'settingsfile',
'workingdirectory',
'encodedarguments',
'encodedargument',
'outputformat',
'inputformat',
}
_KNOWN_SWITCHES = {
'command' : 'c',
'encodedcommand' : 'e',
'file' : 'f',
'executionpolicy' : 'ex',
'ep' : 'ep',
'windowstyle' : 'w',
'noprofile' : 'nop',
'noninteractive' : 'noni',
'nologo' : 'nol',
'noexit' : 'noe',
'sta' : 's',
'mta' : 'm',
'outputformat' : 'o',
'inputformat' : 'inp',
'configurationname' : 'con',
'custompipename' : 'cu',
'settingsfile' : 'se',
'workingdirectory' : 'wo',
'encodedarguments' : 'encodeda',
'encodedargument' : 'encodeda',
'version' : 'v',
'login' : 'l',
'help' : 'h',
}
def _match_switch(key: str) -> str | None:
"""
Match a PowerShell command-line switch using prefix matching. Strips the leading dash or
slash, then does a case-insensitive prefix match against known parameters, respecting the
minimum unique prefix length.
"""
key = key.lstrip('-/')
if key.startswith('-'):
key = key[1:]
key = key.lower()
if not key:
return None
for param, min_prefix in _KNOWN_SWITCHES.items():
if param.startswith(key) and len(key) >= len(min_prefix):
return param
return None
class ps1arg(Unit):
"""
Extracts PowerShell code from a powershell.exe command line.
Parses command lines like the following and extracts the actual PowerShell code:
- powershell.exe -nop -w 1 -enc BASE64
- powershell.exe -command "& { ... }"
The unit handles CRT argument-level escaping (backslash-double-quote for literal quotes) and
base64-encoded commands. This is useful for analyzing malware samples that contain the full
command line for powershell.exe, including CRT-level quote escaping that is not valid inside
PowerShell itself.
"""
def process(self, data: bytearray):
text = codecs.decode(data, self.codec, errors='surrogateescape')
argv = _split_argv(text)
if not argv:
raise ValueError('empty command line')
i = 0
name = os.path.basename(argv[0]).lower()
if name in _PS_EXECUTABLE_NAMES or re.fullmatch(
r'(?i)(?:.*[\\/])?(?:powershell|pwsh)(?:\.exe)?', argv[0]
):
i = 1
command_args: list[str] = []
while i < len(argv):
arg = argv[i]
if not arg.startswith(('-', '/')):
command_args.append(arg)
i += 1
continue
switch = _match_switch(arg)
self.log_info(switch)
if switch == 'command':
i += 1
result = ' '.join(argv[i:])
return codecs.encode(
result, self.codec, errors='surrogateescape')
if switch == 'encodedcommand':
i += 1
if i >= len(argv):
raise ValueError('-EncodedCommand requires an argument')
blob = argv[i]
blob += '=' * (-len(blob) % 4)
raw = base64.b64decode(blob)
return codecs.decode(raw, 'utf-16-le').encode(self.codec)
if switch == 'file':
raise ValueError('-File parameter found; code is in a file, not in the command line')
if switch is not None and switch in _PARAM_WITH_NEXT_ARG:
i += 2
continue
if switch is not None:
i += 1
continue
command_args.append(arg)
i += 1
if command_args:
result = ' '.join(command_args)
return codecs.encode(
result, self.codec, errors='surrogateescape')
raise ValueError(
'no PowerShell code found in command line')
Classes
class ps1arg-
Extracts PowerShell code from a powershell.exe command line.
Parses command lines like the following and extracts the actual PowerShell code:
- powershell.exe -nop -w 1 -enc BASE64
- powershell.exe -command "& { … }"
The unit handles CRT argument-level escaping (backslash-double-quote for literal quotes) and base64-encoded commands. This is useful for analyzing malware samples that contain the full command line for powershell.exe, including CRT-level quote escaping that is not valid inside PowerShell itself.
Expand source code Browse git
class ps1arg(Unit): """ Extracts PowerShell code from a powershell.exe command line. Parses command lines like the following and extracts the actual PowerShell code: - powershell.exe -nop -w 1 -enc BASE64 - powershell.exe -command "& { ... }" The unit handles CRT argument-level escaping (backslash-double-quote for literal quotes) and base64-encoded commands. This is useful for analyzing malware samples that contain the full command line for powershell.exe, including CRT-level quote escaping that is not valid inside PowerShell itself. """ def process(self, data: bytearray): text = codecs.decode(data, self.codec, errors='surrogateescape') argv = _split_argv(text) if not argv: raise ValueError('empty command line') i = 0 name = os.path.basename(argv[0]).lower() if name in _PS_EXECUTABLE_NAMES or re.fullmatch( r'(?i)(?:.*[\\/])?(?:powershell|pwsh)(?:\.exe)?', argv[0] ): i = 1 command_args: list[str] = [] while i < len(argv): arg = argv[i] if not arg.startswith(('-', '/')): command_args.append(arg) i += 1 continue switch = _match_switch(arg) self.log_info(switch) if switch == 'command': i += 1 result = ' '.join(argv[i:]) return codecs.encode( result, self.codec, errors='surrogateescape') if switch == 'encodedcommand': i += 1 if i >= len(argv): raise ValueError('-EncodedCommand requires an argument') blob = argv[i] blob += '=' * (-len(blob) % 4) raw = base64.b64decode(blob) return codecs.decode(raw, 'utf-16-le').encode(self.codec) if switch == 'file': raise ValueError('-File parameter found; code is in a file, not in the command line') if switch is not None and switch in _PARAM_WITH_NEXT_ARG: i += 2 continue if switch is not None: i += 1 continue command_args.append(arg) i += 1 if command_args: result = ' '.join(command_args) return codecs.encode( result, self.codec, errors='surrogateescape') raise ValueError( 'no PowerShell code found in command line')Ancestors
Subclasses
Class variables
var reverse-
The type of the None singleton.
Inherited members