Module refinery.lib.patterns
Library of regular expression patterns.
Expand source code Browse git
"""
Library of regular expression patterns.
"""
from __future__ import annotations
import enum
import functools
import re
from typing import TYPE_CHECKING, Callable, Iterator, overload
from refinery.lib.patterns.tlds import tlds
from refinery.lib.tools import normalize_to_display, normalize_to_identifier
from refinery.lib.types import buf
if TYPE_CHECKING:
from re import Match
class PatternMethods:
@overload
def split(self, string: buf, maxsplit: int = 0) -> list[bytes]:
...
@overload
def split(self, string: str, maxsplit: int = 0) -> list[str]:
...
def split(self, string, maxsplit=0) -> list:
...
@overload
def fullmatch(self, string: buf, pos: int = 0, endpos: int | None = None) -> Match[bytes]:
...
@overload
def fullmatch(self, string: str, pos: int = 0, endpos: int | None = None) -> Match[str]:
...
def fullmatch(self, string, pos=0, endpos=None) -> Match:
...
@overload
def search(self, string: buf, pos: int = 0, endpos: int | None = None) -> Match[bytes]:
...
@overload
def search(self, string: str, pos: int = 0, endpos: int | None = None) -> Match[str]:
...
def search(self, string, pos=0, endpos=None) -> Match:
...
@overload
def sub(self, repl: buf | Callable[[Match[bytes]], buf], string: buf, count: int = 0) -> bytes:
...
@overload
def sub(self, repl: str | Callable[[Match[str]], str], string: str, count: int = 0) -> str:
...
def sub(self, repl, string, count=0) -> str | bytes:
...
@overload
def finditer(self, string: buf, pos: int = 0, endpos: int | None = None) -> Iterator[Match[bytes]]:
...
@overload
def finditer(self, string: str, pos: int = 0, endpos: int | None = None) -> Iterator[Match[str]]:
...
def finditer(self, string, pos=0, endpos=None) -> Iterator[Match]:
...
else:
PatternMethods = object
def _sized_suffix(lower: int, upper: int):
if lower <= 0:
if upper <= 0:
return '*'
else:
return F'{{1,{upper}}}'
elif upper <= 0:
if lower == 1:
return '+'
else:
return F'{{{lower},}}'
else:
return F'{{{lower},{upper}}}'
class pattern(PatternMethods):
"""
A wrapper for regular expression pattern objects created from re.compile,
allowing combination of several patterns into one via overloaded
operators.
"""
str_pattern: str
bin_pattern: bytes
def __init__(self, pattern: str, flags: int = 0):
self.str_pattern = pattern
self.bin_pattern = pattern.encode('ascii')
self.flags = flags
def __bytes__(self):
return self.bin_pattern
@functools.cached_property
def bin(self):
return re.compile(B'(%s)' % self.bin_pattern, flags=self.flags)
@functools.cached_property
def str(self):
return re.compile(self.str_pattern, flags=self.flags)
def __hash__(self):
return hash((self.str_pattern, self.flags))
def __eq__(self, other):
if isinstance(other, str):
return self.str_pattern == other and self.flags == 0
if isinstance(other, pattern):
return self.str_pattern == other.str_pattern and self.flags == other.flags
return False
def __str__(self):
return self.str_pattern
def __getattr__(self, verb):
if not hasattr(re.Pattern, verb):
raise AttributeError(verb)
bin_attr = getattr(self.bin, verb)
if not callable(bin_attr):
return bin_attr
str_attr = getattr(self.str, verb)
def wrapper(*args, **kwargs):
for argument in args:
if isinstance(argument, str):
return str_attr(*args, **kwargs)
else:
return bin_attr(*args, **kwargs)
functools.update_wrapper(wrapper, bin_attr)
return wrapper
class alphabet(pattern):
"""
A pattern object representing strings of letters from a given alphabet, with
an optional prefix and suffix.
"""
def __init__(
self,
repeat: str,
prefix: str = '',
suffix: str = '',
lower: int = 1,
upper: int = 0,
prefix_min: int = 0,
prefix_max: int = 0,
suffix_min: int = 0,
suffix_max: int = 0,
token_size: int = 1,
flags: int = 0,
**kwargs
):
self.repeat = repeat
self.prefix = prefix
self.suffix = suffix
self.suffix_min = suffix_min
self.suffix_max = suffix_max
self.prefix_min = prefix_min
self.prefix_max = prefix_max
self.token_size = token_size
lower = lower - suffix_max - prefix_max
upper = upper - suffix_min - prefix_min
if token_size > 1:
lower, _r = divmod(lower, token_size)
if _r and lower == 0:
lower = _r
upper, _r = divmod(upper, token_size)
if _r and upper >= 0:
upper += 1
self.lower = lower
self.upper = upper
count = _sized_suffix(lower, upper)
pattern.__init__(self,
R'{b}(?:{r}){c}{a}'.format(
r=repeat,
b=prefix,
c=count,
a=suffix
),
flags,
**kwargs
)
class tokenize(pattern):
"""
A pattern representing a sequence of tokens matching the `token` pattern, separated
by sequences matching the pattern `sep`. The optional parameter `bound` is required
before and after each token, its default value is the regular expression zero length
match for a word boundary.
"""
def __init__(self, token, sep, bound='\\b', unique_sep=False, sep_ignores_whitespace=True, **kwargs):
if unique_sep:
if sep_ignores_whitespace:
p = (
R'(?:{b}{t}{b}\s{{0,50}}(?P<__sep__>{s})\s{{0,50}})'
R'(?:(?:{b}{t}{b}\s{{0,50}}(?P=__sep__)\s{{0,50}})+{b}{t}{b}|{b}{t}{b})'
)
else:
p = R'(?:{b}{t}{b}(?P<__sep__>{s}))(?:(?:{b}{t}{b}(?P=__sep__))+{b}{t}{b}|{b}{t}{b})'
else:
p = R'(?:{b}{t}{b}{s})+(?:{b}{t}{b})'
pattern.__init__(self, p.format(s=sep, b=bound, t=token), **kwargs)
class _PatternEnum(enum.Enum):
@classmethod
def get(cls, name, default=None):
try:
return cls[name]
except KeyError:
return default
def __str__(self):
return str(self.value)
def __bytes__(self):
return bytes(self.value)
def __repr__(self):
return F'<pattern {self.name}: {self.value}>'
def __getattr__(self, name):
if name in dir(re.Pattern):
return getattr(self.value, name)
raise AttributeError
@property
def display(self):
return normalize_to_display(self.name)
_TLDS = R'(?i:{possible_tld})(?!(?:{dealbreakers}))'.format(
possible_tld='|'.join(tlds),
dealbreakers='|'.join([
R'[a-z]',
R'[A-Za-z]{3}',
R'\.\w\w',
R'\([\'"\w)]'
])
)
# see https://tools.ietf.org/html/rfc2181#section-11
_format_serrated_domain = (
R'(?:\w[a-zA-Z0-9\-\_]{{0,62}}?\.){repeat}'
R'\w[a-zA-Z0-9\-\_]{{0,62}}\.{tlds}'
)
_format_defanged_domain = (
R'(?:\w[a-zA-Z0-9\-\_]{{0,62}}?(?:\[\.\]|\.)){repeat}'
R'\w[a-zA-Z0-9\-\_]{{0,62}}(?:\[\.\]|\.){tlds}'
)
_pattern_utf8 = R'(?:[\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})+'
_pattern_b92 = R'~|(?:[!-_a-}]{2})+[!-_a-}]?'
_pattern_serrated_domain = _format_serrated_domain.format(repeat='{0,20}', tlds=_TLDS)
_pattern_defanged_domain = _format_defanged_domain.format(repeat='{0,20}', tlds=_TLDS)
_pattern_subdomain = _format_serrated_domain.format(repeat='{1,20}', tlds=_TLDS)
_pattern_octet = R'(?:1\d\d|2[0-4]\d|25[0-5]|[1-9]?\d)'
_pattern_serrated_ipv4 = R'(?<![0-9])(?:{o}\.){{3}}{o}(?![0-9])'.format(o=_pattern_octet)
_pattern_defanged_ipv4 = R'(?:{o}{d}){{3}}{o}'.format(o=_pattern_octet, d=R'(?:\[\.\]|\.)')
# Taken from: https://stackoverflow.com/a/17871737/9130824
_pattern_ipv6 = (
R'('
R'([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|' # 1:2:3:4:5:6:7:8
R'[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|' # 1::3:4:5:6:7:8 1::3:4:5:6:7:8 1::8
R'([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|' # 1::4:5:6:7:8 1:2::4:5:6:7:8 1:2::8
R'([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|' # 1::5:6:7:8 1:2:3::5:6:7:8 1:2:3::8
R'([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|' # 1::6:7:8 1:2:3:4::6:7:8 1:2:3:4::8
R'([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|' # 1::7:8 1:2:3:4:5::7:8 1:2:3:4:5::8
R'([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|' # 1::8 1:2:3:4:5:6::8 1:2:3:4:5:6::8
R'([0-9a-fA-F]{1,4}:){1,7}:|' # 1:: 1:2:3:4:5:6:7::
R':((:[0-9a-fA-F]{1,4}){1,7}|:)|' # ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::
R'fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|' # fe80::7:8%eth0 fe80::7:8%1 (link-local IPv6 addresses with zone index)
R'::(ffff(:0{1,4}){0,1}:){0,1}' #
R'((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}' #
R'(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|' # ::255.255.255.255 ::ffff:255.255.255.255 ::ffff:0:255.255.255.255
R'([0-9a-fA-F]{1,4}:){1,4}:' # (IPv4-mapped IPv6 addresses and IPv4-translated addresses)
R'((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}' #
R'(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])' # 2001:db8:3:4::192.0.2.33 64:ff9b::192.0.2.33
R')' # (IPv4-Embedded IPv6 Address)
)
_pattern_serrated_socket = f'(?:{_pattern_serrated_ipv4}|{_pattern_serrated_domain})(?::\\d{{2,5}})'
_pattern_defanged_socket = f'(?:{_pattern_defanged_ipv4}|{_pattern_defanged_domain})(?::\\d{{2,5}})'
_pattern_serrated_hostname = _pattern_serrated_socket + '?'
_pattern_defanged_hostname = _pattern_defanged_socket + '?'
def _sized_pattern_integer(lower: int = 0, upper: int = 0):
x = _sized_suffix(max(1, lower - 3), upper - 2)
o = _sized_suffix(max(0, lower - 3), upper - 2)
d = _sized_suffix(max(0, lower - 2), upper - 1)
return (
F'[-+]?(?:0[bB][01]{x}|0[xX][0-9a-fA-F]{x}|0[1-7][0-7]{o}|[1-9][0-9]{d}|0)'
R'(?=[uU]?[iI]\d{1,2}|[LlHh]|[^a-zA-Z0-9]|$)'
)
_pattern_integer = _sized_pattern_integer()
_pattern_float = R'[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?'
_pattern_number = F'(?:(?:{_pattern_integer})|(?:{_pattern_float}))'
_pattern_number = (
'[-+]?(?:0[bB][01]+|0[xX][0-9a-fA-F]+|0[1-7][0-7]*|(?:[1-9][0-9]*|0)(?P<__fp1>\\.[0-9]*)?|(?P<__fp2>\\.[0-9]+))'
'(?(__fp1)(?:[eE][-+]?[0-9]+)?|(?(__fp2)(?:[eE][-+]?[0-9]+)?|(?=[uU]?[iI]\\d{1,2}|[LlHh]|[^a-zA-Z0-9]|$)))'
)
_pattern_date_elements = {
'A': '(?:{})'.format('|'.join([
'[sS]un(?:day)?',
'[mM]on(?:day)?',
'[tT]ue(?:sday)?',
'[wW]ed(?:nesday)?',
'[tT]hu(?:rsday)?',
'[fF]ri(?:day)?',
'[sS]at(?:urday)?',
])),
'B': '(?:{})'.format('|'.join([
'[jJ]an(?:uary)?',
'[fF]eb(?:ruary)?',
'[mM]ar(?:ch)?',
'[aA]pr(?:il)?',
'[mM]ay',
'[jJ]un(?:e)?',
'[jJ]ul(?:y)?',
'[aA]ug(?:ust)?',
'[sS]ep(?:tember)?',
'[oO]ct(?:ober)?',
'[nN]ov(?:ember)?',
'[dD]ec(?:ember)?',
])),
'D': '(?:[23]?(?:1st|2nd|3rd|[4-9]th)|20th|30th)',
'd': '(?:0?[1-9]|[12][0-9]|3[01])',
'm': '(?:0[1-9]|1[012])',
'I': '(?:0[1-9]|1[0-2])',
'p': '(?:[ap]m|[AP]M)',
'H': '(?:[01][0-9]|2[0-3])',
'M': '(?:[0-5][0-9])',
'S': '(?:[0-5][0-9])',
'z': '(?:[-+](?:[0-9]{2}){1,3}(?:\\.[0-9]{6})?)',
'y': '(?:[0-9]{2})',
'Y': '(?:[0-9]{4})',
'c': '(?:[,;]|\\s|[,;]\\s)',
'gap': '\\s{1,3}'
}
_pattern_time = r'(?:{H}:{M}(?::{S})?|{I}:{M}(?::{S})?{c}?\(?{p}\)?)'.format_map(_pattern_date_elements)
_pattern_date_elements['T'] = _pattern_time
_pattern_date_list = [
R'{A}{c}(?:{d}|{D}){gap}{B}{c}{Y}(?:\s{T})?',
R'{B}\s(?:{d}|{D}){c}{Y}(?:\s{T})?',
R'{Y}[-:]{m}[-:]{d}(?:[T\x20]{H}:{M}(?::{S})?(?:[Z.][0-9]{{6}})?{z}?)',
R'{m}/{d}/{Y}(?:{c}{T})?',
R'{A}{c}{B}{c}(?:{d}|{D}){c}{T}(?:\s\(?UTC\)?)?\s{Y}',
]
_pattern_date = '|'.join(
_p.format_map(_pattern_date_elements) for _p in _pattern_date_list)
def _sized_pattern_string(lower: int = 0, upper: int = 0):
ml = _sized_suffix((lower - 6) // 1, (upper - 6))
sl = _sized_suffix((lower - 2) // 2, (upper - 2))
str_dq = FR'"(?:[^"\\\r\n]|\\[^\r\n]){sl}"'
str_sq = FR"'(?:[^'\\\r\n]|\\[^\r\n]){sl}'"
str_js = FR'`(?:[^`\\]|\\.){sl}`'
str_mul_dq = FR'"""(?:.(?!""")){ml}"""'
str_mul_sq = FR"'''(?:.(?!''')){ml}'''"
return '(?:{})'.format('|'.join((
str_dq,
str_sq,
str_js,
str_mul_dq,
str_mul_sq,
)))
def _sized_pattern_cmdstr(lower: int = 0, upper: int = 0):
n = _sized_suffix((lower - 2) // 2, upper - 2)
return FR'''(?:"(?:""|[^"]){n}"|'(?:''|[^']){n}')'''
_pattern_cmdstr = _sized_pattern_cmdstr()
_pattern_ps1str = R'''(?:(?:@"\s*?[\r\n].*?[\r\n]"@)|(?:@'\s*?[\r\n].*?[\r\n]'@)|(?:"(?:`.|""|[^"\n])*")|(?:'(?:''|[^'\n])*'))'''
_pattern_vbastr = R'''"(?:""|[^"])*"'''
_pattern_vbaint = R'(?:&[bB][01]+|&[hH][0-9a-fA-F]+|&[oO][0-7]*|[-+]?(?:[1-9][0-9]*|0))(?=\b|$)'
_pattern_string = _sized_pattern_string()
_pattern_urlenc = R'''(?:%[0-9a-fA-F]{2}|[0-9a-zA-Z\-\._~\?!$&=])+'''
_pattern_urlhex = R'''(?:%[0-9a-fA-F]{2})+'''
_pattern_json = (
R'''\s{0,20}[\[\{](?:"(?:[^"\\\r\n]|\\[^\r\n])*'''
R'''"(?:\s*[:,])?|(?:none|true|false|%s|%s|\]|\})(?:\s*,)?|[,\]\}\[\{\s]+)*[\]\}]'''
) % (_pattern_integer, _pattern_float)
_pattern_wshenc = R'''#@~\^[ -~]{6}==(?:.*?)[ -~]{6}==\^#~@'''
_part_url_credentials = (
R'(?:([^"\'\s\x00-\x20\x7E-\xFF]{1,256})?'
R'(?::([^"\'\s\x00-\x20\x7E-\xFF]{0,256})?)?@)?'
)
_prefix_serrated_url = R'(([a-zA-Z]{2,20}:)?\/\/)' + _part_url_credentials
_prefix_defanged_url = R'(([a-zA-Z]{2,20}(?:\[:\]|:))?\/\/)' + _part_url_credentials
_suffix_combined_url = R'([/?#](?:[#/=:;$!?&.,\w\+\%\-\*\'~@()](?![a-zA-Z]{2,20}://))*)?'
_pattern_serrated_url = F'{_prefix_serrated_url}({_pattern_serrated_hostname}){_suffix_combined_url}'
_pattern_defanged_url = F'{_prefix_defanged_url}({_pattern_defanged_hostname}){_suffix_combined_url}'
_pattern_email = fR'(?:[a-zA-Z0-9_\.\+\-]{{1,256}}?)@(?:{_pattern_serrated_domain})'
_pattern_guid = R'(?:\b|\{)[0-9A-Fa-f]{8}(?:\-[0-9A-Fa-f]{4}){3}\-[0-9A-Fa-f]{12}(?:\}|\b)'
_pattern_pathpart_nospace = R'[-\w+,.;@\]\[{}^`~]+' # R'[^/\\:"<>|\s\x7E-\xFF\x00-\x1F\xAD]+'
_pattern_win_path_element = R'(?:{n} ){{0,4}}{n}'.format(n=_pattern_pathpart_nospace)
_pattern_nix_path_element = R'(?:{n} ){{0,1}}{n}'.format(n=_pattern_pathpart_nospace)
_pattern_win_env_variable = R'%[a-zA-Z][a-zA-Z0-9_\-\(\)]*%'
_pattern_win_path_template_abs = R'(?:{s})(?P<__pathsep__>[\\\/])(?:{p}(?P=__pathsep__))*{p}(?:(?P=__pathsep__)|\b)'
_pattern_win_path_template_rel = R'(?:{p}|)\\(?:{p}\\)*{p}(?:\\|\b)'
_pattern_win_path_template = F'(?:{_pattern_win_path_template_abs}|{_pattern_win_path_template_rel})'
_pattern_win_root = '|'.join([
_pattern_win_env_variable, # environment variable
R'[A-Za-z]:', # drive letter with colon
R'\\\\[a-zA-Z0-9_.$@]{1,50}', # UNC path
R'HK[A-Z_]{1,30}', # registry root key
])
_pattern_win_path = _pattern_win_path_template.format(
s=_pattern_win_root,
p=_pattern_win_path_element
)
_pattern_win_path_terse = _pattern_win_path_template.format(
s=_pattern_win_root,
p=_pattern_pathpart_nospace
)
_pattern_nix_path_template = R'(?:/(?:{n}/)+|(?:{n}/){{2,}}){n}'
_pattern_nix_path = _pattern_nix_path_template.format(
n=_pattern_nix_path_element)
_pattern_nix_path_terse = _pattern_nix_path_template.format(
n=_pattern_pathpart_nospace)
_pattern_any_path = R'(?:{nix})|(?:{win})'.format(
nix=_pattern_nix_path,
win=_pattern_win_path,
)
_pattern_any_path_terse = R'(?:{nix})|(?:{win})'.format(
nix=_pattern_nix_path_terse,
win=_pattern_win_path_terse,
)
_pattern_uuencode = R'begin\s+\d{3}\s+[\x20!-~]+?\r?\n(?:M[\x20-\x60]{60}\r?\n)*(?:.*?\r?\n)?`\r?\nend'
def make_hexline_pattern(blocksize: int) -> str:
return R'(?:{s}+\s+)?\s*({h})(?:[ \t]+(.+?))?'.format(
h=tokenize(
RF'(?:0[xX])?[0-9a-fA-F]{{{2 * blocksize}}}h?',
sep=R'[- \t\/:;,\\]{1,3}'
).str_pattern,
s=R'[-\w:;,#\.\$\?!\/\\=\(\)\[\]\{\}]'
)
_pattern_hexline = make_hexline_pattern(1)
_pattern_pem = (
R'-----BEGIN(?:\s[A-Z0-9]+)+-----{n}'
R'(?:{b}{{40,100}}{n})*{b}{{1,100}}={{0,3}}{n}'
R'-----END(?:\s[A-Z0-9]+)+-----'
).format(n=R'(?:\r\n|\n\r|\n)', b=R'[0-9a-zA-Z\+\/]')
AnsiColor = pattern(R'\x1b\[(?:22|[34]\d|(?:9|10)[0-8]|[0-2])(?:;\d+)*m')
class checks(_PatternEnum):
json = pattern(_pattern_json)
"Data that consists of JSON-like tokens; cannot detect actual JSON data."
class formats(_PatternEnum):
"""
An enumeration of patterns for certain formats.
"""
int = pattern(_pattern_integer)
"Integer expressions"
flt = pattern(_pattern_float)
"Floating point number expressions"
num = pattern(_pattern_number)
"Either an integer or a float"
str = pattern(_pattern_string)
"C syntax string literal"
cmdstr = pattern(_pattern_cmdstr)
"Windows command line escaped string literal"
ps1str = pattern(_pattern_ps1str, flags=re.DOTALL)
"PowerShell escaped string literal"
vbastr = pattern(_pattern_vbastr)
"VBS/VBA string literal"
vbaint = pattern(_pattern_vbaint)
"VBS/VBA integer literal"
printable = alphabet(R'[\s!-~]')
"Any sequence of printable characters"
urlquote = pattern(_pattern_urlenc)
"Any sequence of url-encoded characters, default char set"
urlhex = pattern(_pattern_urlhex)
"A hex-encoded buffer using URL escape sequences"
intarray = tokenize(_pattern_integer, sep=R'[;,]', bound='', unique_sep=True)
"Sequences of integers, separated by commas or semicolons"
strarray = tokenize(_pattern_string, sep=R'[;,]', bound='', unique_sep=True)
"Sequences of strings, separated by commas or semicolons"
numarray = tokenize(_pattern_number, sep=R'[;,]', bound='', unique_sep=True)
"Sequences of numbers, separated by commas or semicolons"
hexarray = tokenize(R'[0-9A-Fa-f]{2}', sep=R'[;,]', bound='', unique_sep=True)
"Arrays of hexadecimal strings, separated by commas or semicolons"
word = alphabet(R'\\w')
"Sequences of word characters"
letters = alphabet(R'[a-zA-Z]')
"Sequences of alphabetic characters"
wshenc = pattern(_pattern_wshenc)
"Encoded Windows Scripting Host Scripts (JS/VBS)"
alnum = alphabet(R'[a-zA-Z0-9]')
"Sequences of alpha-numeric characters"
b32 = pattern('[A-Z2-7]+|[a-z2-7+]')
"Base32 encoded strings"
b58 = alphabet(R'(?:[1-9A-HJ-NP-Za-km-z]')
"Base58 encoded strings"
b62 = alphabet(R'(?:[0-9A-Za-z]')
"Base62 encoded strings"
b64 = alphabet(R'(?:[0-9a-zA-Z\+/]{4})', suffix=R'(?:(?:[0-9a-zA-Z\+/]{2,3})={0,3})?', suffix_max=6, token_size=4)
"Base64 encoded strings"
b85 = alphabet(R'[-!+*()#-&^-~0-9;-Z]')
"Base85 encoded strings"
a85 = alphabet(R'[!-u]')
"Ascii85 encoded strings"
z85 = alphabet(R'[-0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]')
"Z85 encoded strings"
b92 = pattern(_pattern_b92)
"Base92 encoded strings"
b64u = alphabet(R'[-\w]{4}', suffix=R'(?:[-\w]{2,3}={0,3})?', suffix_max=6)
"Base64 encoded strings using URL-safe alphabet"
hex = alphabet(R'[0-9a-fA-F]{2}', token_size=2)
"Hexadecimal strings"
b16 = alphabet(R'[0-9A-F]{2}', token_size=2)
"Uppercase hexadecimal strings"
b16s = tokenize(R'[0-9a-fA-F]+', R'\s*', bound='')
"Hexadecimal strings"
b64s = alphabet(R'[-\s\w\+/]', suffix=R'(?:={0,3})?', suffix_max=3)
"Base64 encoded strings, separated by whitespace"
b85s = alphabet(R'[-!+*()#-&^-~0-9;-Z\s]')
"Base85 encoded string, separated by whitespace"
a85s = alphabet(R'[!-u\s]')
"Ascii85 encoded string, separated by whitespace"
z85s = alphabet(R'[-\s0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]')
"Z85 encoded string, separated by whitespace"
utf8 = pattern(_pattern_utf8)
"A sequence of bytes that can be decoded as UTF8."
hexdump = tokenize(_pattern_hexline, bound='', sep=R'\s*\n')
"""
This pattern matches a typical hexdump output where hexadecimally encoded
bytes are followed by a string which contains dots or printable characters
from the dump. For example:
46 4F 4F 0A 42 41 52 0A FOO.BAR.
F0 0B AA BA F0 0B ......
"""
uuenc = pattern(_pattern_uuencode)
"UUEncoded data"
# shortcuts
float = flt
integer = int
number = num
string = str
@classmethod
def from_dashname(cls, key: str):
if key.startswith('[') and key.endswith(']'):
key = key[1:-1] + 'array'
return getattr(cls, normalize_to_identifier(key))
class wallets(_PatternEnum):
# https://gist.github.com/etherx-dev/76559d9e6d916917a960e33ceea91481
ADA = pattern("addr1[a-z0-9]+")
ATOM = pattern("cosmos[-\\w\\.]{10,}")
BCH = pattern("(bitcoincash:)?(q|p)[a-z0-9]{41}|(BITCOINCASH:)?(Q|P)[A-Z0-9]{41}")
BTC = pattern("(?:[13][a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-z0-9]{25,39})")
BTCP = pattern("5[HJK][1-9A-Za-z][^A-HJ-NP-Za-km-z0-9]{48}")
DASH = pattern("X[1-9A-HJ-NP-Za-km-z]{33}")
DOGE = pattern("D{1}[5-9A-HJ-NP-U]{1}[1-9A-HJ-NP-Za-km-z]{32}")
DOT = pattern("1[0-9a-zA-Z]{47}")
ETH = pattern("0x[a-fA-F0-9]{40}")
IOTA = pattern("iota[a-z0-9]{10,}")
LSK = pattern("[0-9]{19}L")
LTC = pattern("[LM3][a-km-zA-HJ-NP-Z1-9]{26,33}")
NEO = pattern("N[0-9a-zA-Z]{33}")
ONE = pattern("(?:bnb|one)1[a-z0-9]{38}")
ONT = pattern("A[0-9a-zA-Z]{33}")
RONIN = pattern("ronin:[a-fA-F0-9]{40}")
TERRA = pattern("terra1[a-z0-9]{38}")
XEM = pattern("N[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}")
XLM = pattern("G[A-D][A-Z2-7]{54}")
XMR = pattern("4[0-9AB][1-9A-HJ-NP-Za-km-z]{90,120}")
XRP = pattern("r[0-9a-zA-Z]{24,34}")
class indicators(_PatternEnum):
"""
An enumeration of patterns for indicators.
"""
domain = pattern(_pattern_serrated_domain)
"Domain names"
email = pattern(_pattern_email)
"Email addresses"
guid = pattern(_pattern_guid)
"Windows GUID strings"
date = pattern(_pattern_date)
"A date or timestamp value in a common format"
ipv4 = pattern(_pattern_serrated_ipv4)
"String representations of IPv4 addresses"
ipv6 = pattern(_pattern_ipv6)
"String representations of IPv6 addresses"
md5 = alphabet('[0-9A-Fa-f]', lower=32, upper=32)
"Hexadecimal strings of length 32"
sha1 = alphabet('[0-9A-Fa-f]', lower=40, upper=40)
"Hexadecimal strings of length 40"
sha256 = alphabet('[0-9A-Fa-f]', lower=64, upper=64)
"Hexadecimal strings of length 64"
hostname = pattern(_pattern_serrated_hostname)
"Any domain name or IPv4 address, optionally followed by a colon and a port number."
socket = pattern(_pattern_serrated_socket)
"Any domain name or IPv4 address followed by a colon and a (port) number"
subdomain = pattern(_pattern_subdomain)
"A domain which contains at least three parts, including the top level"
url = pattern(_pattern_serrated_url)
"Uniform resource locator addresses"
pem = pattern(_pattern_pem)
"A pattern matching PEM encoded cryptographic parameters"
path = pattern(_pattern_any_path)
"Windows and Linux path names"
winpath = pattern(_pattern_win_path)
"Windows path names"
nixpath = pattern(_pattern_nix_path)
"Posix path names"
evar = pattern(_pattern_win_env_variable)
"Windows environment variables, i.e. something like `%APPDATA%`"
@classmethod
def from_dashname(cls, key):
return getattr(cls, normalize_to_identifier(key))
class defanged(_PatternEnum):
"""
An enumeration of patterns for defanged indicators. Used only by the reverse
operation of `refinery.defang`.
"""
hostname = pattern(_pattern_defanged_hostname)
"A defanged `refinery.lib.patterns.indicators.hostname`."
url = pattern(_pattern_defanged_url)
"A defanged `refinery.lib.patterns.indicators.url`."
def pattern_with_size_limits(p: pattern, lower: int | None, upper: int | None) -> pattern:
"""
This attempts to construct a pattern from a given format that includes the given lower and
upper bounds on total match size. This is not always possible.
"""
lower = max(0, lower or 0)
upper = max(0, upper or 0)
handlers = {
formats.int.value : _sized_pattern_integer,
formats.cmdstr.value : _sized_pattern_cmdstr,
formats.string.value : _sized_pattern_string,
}
if isinstance(p, alphabet):
return alphabet(
p.repeat,
p.prefix,
p.suffix,
lower,
upper,
p.prefix_min,
p.prefix_max,
p.suffix_min,
p.suffix_max,
p.token_size,
flags=p.flags,
)
elif h := handlers.get(p):
return pattern(h(lower, upper), formats.int.value.flags)
return p
Sub-modules
refinery.lib.patterns.tlds
Functions
def make_hexline_pattern(blocksize)-
Expand source code Browse git
def make_hexline_pattern(blocksize: int) -> str: return R'(?:{s}+\s+)?\s*({h})(?:[ \t]+(.+?))?'.format( h=tokenize( RF'(?:0[xX])?[0-9a-fA-F]{{{2 * blocksize}}}h?', sep=R'[- \t\/:;,\\]{1,3}' ).str_pattern, s=R'[-\w:;,#\.\$\?!\/\\=\(\)\[\]\{\}]' ) def pattern_with_size_limits(p, lower, upper)-
This attempts to construct a pattern from a given format that includes the given lower and upper bounds on total match size. This is not always possible.
Expand source code Browse git
def pattern_with_size_limits(p: pattern, lower: int | None, upper: int | None) -> pattern: """ This attempts to construct a pattern from a given format that includes the given lower and upper bounds on total match size. This is not always possible. """ lower = max(0, lower or 0) upper = max(0, upper or 0) handlers = { formats.int.value : _sized_pattern_integer, formats.cmdstr.value : _sized_pattern_cmdstr, formats.string.value : _sized_pattern_string, } if isinstance(p, alphabet): return alphabet( p.repeat, p.prefix, p.suffix, lower, upper, p.prefix_min, p.prefix_max, p.suffix_min, p.suffix_max, p.token_size, flags=p.flags, ) elif h := handlers.get(p): return pattern(h(lower, upper), formats.int.value.flags) return p
Classes
class pattern (pattern, flags=0)-
A wrapper for regular expression pattern objects created from re.compile, allowing combination of several patterns into one via overloaded operators.
Expand source code Browse git
class pattern(PatternMethods): """ A wrapper for regular expression pattern objects created from re.compile, allowing combination of several patterns into one via overloaded operators. """ str_pattern: str bin_pattern: bytes def __init__(self, pattern: str, flags: int = 0): self.str_pattern = pattern self.bin_pattern = pattern.encode('ascii') self.flags = flags def __bytes__(self): return self.bin_pattern @functools.cached_property def bin(self): return re.compile(B'(%s)' % self.bin_pattern, flags=self.flags) @functools.cached_property def str(self): return re.compile(self.str_pattern, flags=self.flags) def __hash__(self): return hash((self.str_pattern, self.flags)) def __eq__(self, other): if isinstance(other, str): return self.str_pattern == other and self.flags == 0 if isinstance(other, pattern): return self.str_pattern == other.str_pattern and self.flags == other.flags return False def __str__(self): return self.str_pattern def __getattr__(self, verb): if not hasattr(re.Pattern, verb): raise AttributeError(verb) bin_attr = getattr(self.bin, verb) if not callable(bin_attr): return bin_attr str_attr = getattr(self.str, verb) def wrapper(*args, **kwargs): for argument in args: if isinstance(argument, str): return str_attr(*args, **kwargs) else: return bin_attr(*args, **kwargs) functools.update_wrapper(wrapper, bin_attr) return wrapperSubclasses
Class variables
var str_pattern-
The type of the None singleton.
var bin_pattern-
The type of the None singleton.
Instance variables
var bin-
Expand source code Browse git
@functools.cached_property def bin(self): return re.compile(B'(%s)' % self.bin_pattern, flags=self.flags) var str-
Expand source code Browse git
@functools.cached_property def str(self): return re.compile(self.str_pattern, flags=self.flags)
class alphabet (repeat, prefix='', suffix='', lower=1, upper=0, prefix_min=0, prefix_max=0, suffix_min=0, suffix_max=0, token_size=1, flags=0, **kwargs)-
A pattern object representing strings of letters from a given alphabet, with an optional prefix and suffix.
Expand source code Browse git
class alphabet(pattern): """ A pattern object representing strings of letters from a given alphabet, with an optional prefix and suffix. """ def __init__( self, repeat: str, prefix: str = '', suffix: str = '', lower: int = 1, upper: int = 0, prefix_min: int = 0, prefix_max: int = 0, suffix_min: int = 0, suffix_max: int = 0, token_size: int = 1, flags: int = 0, **kwargs ): self.repeat = repeat self.prefix = prefix self.suffix = suffix self.suffix_min = suffix_min self.suffix_max = suffix_max self.prefix_min = prefix_min self.prefix_max = prefix_max self.token_size = token_size lower = lower - suffix_max - prefix_max upper = upper - suffix_min - prefix_min if token_size > 1: lower, _r = divmod(lower, token_size) if _r and lower == 0: lower = _r upper, _r = divmod(upper, token_size) if _r and upper >= 0: upper += 1 self.lower = lower self.upper = upper count = _sized_suffix(lower, upper) pattern.__init__(self, R'{b}(?:{r}){c}{a}'.format( r=repeat, b=prefix, c=count, a=suffix ), flags, **kwargs )Ancestors
Inherited members
class tokenize (token, sep, bound='\\b', unique_sep=False, sep_ignores_whitespace=True, **kwargs)-
A pattern representing a sequence of tokens matching the
tokenpattern, separated by sequences matching the patternsep. The optional parameterboundis required before and after each token, its default value is the regular expression zero length match for a word boundary.Expand source code Browse git
class tokenize(pattern): """ A pattern representing a sequence of tokens matching the `token` pattern, separated by sequences matching the pattern `sep`. The optional parameter `bound` is required before and after each token, its default value is the regular expression zero length match for a word boundary. """ def __init__(self, token, sep, bound='\\b', unique_sep=False, sep_ignores_whitespace=True, **kwargs): if unique_sep: if sep_ignores_whitespace: p = ( R'(?:{b}{t}{b}\s{{0,50}}(?P<__sep__>{s})\s{{0,50}})' R'(?:(?:{b}{t}{b}\s{{0,50}}(?P=__sep__)\s{{0,50}})+{b}{t}{b}|{b}{t}{b})' ) else: p = R'(?:{b}{t}{b}(?P<__sep__>{s}))(?:(?:{b}{t}{b}(?P=__sep__))+{b}{t}{b}|{b}{t}{b})' else: p = R'(?:{b}{t}{b}{s})+(?:{b}{t}{b})' pattern.__init__(self, p.format(s=sep, b=bound, t=token), **kwargs)Ancestors
Inherited members
class checks (*args, **kwds)-
Create a collection of name/value pairs.
Example enumeration:
>>> class Color(Enum): ... RED = 1 ... BLUE = 2 ... GREEN = 3Access them by:
- attribute access:
Color.RED
- value lookup:
Color(1)
- name lookup:
Color['RED']
Enumerations can be iterated over, and know how many members they have:
>>> len(Color) 3>>> list(Color) [<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]Methods can be added to enumerations, and members can have their own attributes – see the documentation for details.
Expand source code Browse git
class checks(_PatternEnum): json = pattern(_pattern_json) "Data that consists of JSON-like tokens; cannot detect actual JSON data."Ancestors
- refinery.lib.patterns._PatternEnum
- enum.Enum
Class variables
var json-
Data that consists of JSON-like tokens; cannot detect actual JSON data.
class formats (*args, **kwds)-
An enumeration of patterns for certain formats.
Expand source code Browse git
class formats(_PatternEnum): """ An enumeration of patterns for certain formats. """ int = pattern(_pattern_integer) "Integer expressions" flt = pattern(_pattern_float) "Floating point number expressions" num = pattern(_pattern_number) "Either an integer or a float" str = pattern(_pattern_string) "C syntax string literal" cmdstr = pattern(_pattern_cmdstr) "Windows command line escaped string literal" ps1str = pattern(_pattern_ps1str, flags=re.DOTALL) "PowerShell escaped string literal" vbastr = pattern(_pattern_vbastr) "VBS/VBA string literal" vbaint = pattern(_pattern_vbaint) "VBS/VBA integer literal" printable = alphabet(R'[\s!-~]') "Any sequence of printable characters" urlquote = pattern(_pattern_urlenc) "Any sequence of url-encoded characters, default char set" urlhex = pattern(_pattern_urlhex) "A hex-encoded buffer using URL escape sequences" intarray = tokenize(_pattern_integer, sep=R'[;,]', bound='', unique_sep=True) "Sequences of integers, separated by commas or semicolons" strarray = tokenize(_pattern_string, sep=R'[;,]', bound='', unique_sep=True) "Sequences of strings, separated by commas or semicolons" numarray = tokenize(_pattern_number, sep=R'[;,]', bound='', unique_sep=True) "Sequences of numbers, separated by commas or semicolons" hexarray = tokenize(R'[0-9A-Fa-f]{2}', sep=R'[;,]', bound='', unique_sep=True) "Arrays of hexadecimal strings, separated by commas or semicolons" word = alphabet(R'\\w') "Sequences of word characters" letters = alphabet(R'[a-zA-Z]') "Sequences of alphabetic characters" wshenc = pattern(_pattern_wshenc) "Encoded Windows Scripting Host Scripts (JS/VBS)" alnum = alphabet(R'[a-zA-Z0-9]') "Sequences of alpha-numeric characters" b32 = pattern('[A-Z2-7]+|[a-z2-7+]') "Base32 encoded strings" b58 = alphabet(R'(?:[1-9A-HJ-NP-Za-km-z]') "Base58 encoded strings" b62 = alphabet(R'(?:[0-9A-Za-z]') "Base62 encoded strings" b64 = alphabet(R'(?:[0-9a-zA-Z\+/]{4})', suffix=R'(?:(?:[0-9a-zA-Z\+/]{2,3})={0,3})?', suffix_max=6, token_size=4) "Base64 encoded strings" b85 = alphabet(R'[-!+*()#-&^-~0-9;-Z]') "Base85 encoded strings" a85 = alphabet(R'[!-u]') "Ascii85 encoded strings" z85 = alphabet(R'[-0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]') "Z85 encoded strings" b92 = pattern(_pattern_b92) "Base92 encoded strings" b64u = alphabet(R'[-\w]{4}', suffix=R'(?:[-\w]{2,3}={0,3})?', suffix_max=6) "Base64 encoded strings using URL-safe alphabet" hex = alphabet(R'[0-9a-fA-F]{2}', token_size=2) "Hexadecimal strings" b16 = alphabet(R'[0-9A-F]{2}', token_size=2) "Uppercase hexadecimal strings" b16s = tokenize(R'[0-9a-fA-F]+', R'\s*', bound='') "Hexadecimal strings" b64s = alphabet(R'[-\s\w\+/]', suffix=R'(?:={0,3})?', suffix_max=3) "Base64 encoded strings, separated by whitespace" b85s = alphabet(R'[-!+*()#-&^-~0-9;-Z\s]') "Base85 encoded string, separated by whitespace" a85s = alphabet(R'[!-u\s]') "Ascii85 encoded string, separated by whitespace" z85s = alphabet(R'[-\s0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]') "Z85 encoded string, separated by whitespace" utf8 = pattern(_pattern_utf8) "A sequence of bytes that can be decoded as UTF8." hexdump = tokenize(_pattern_hexline, bound='', sep=R'\s*\n') """ This pattern matches a typical hexdump output where hexadecimally encoded bytes are followed by a string which contains dots or printable characters from the dump. For example: 46 4F 4F 0A 42 41 52 0A FOO.BAR. F0 0B AA BA F0 0B ...... """ uuenc = pattern(_pattern_uuencode) "UUEncoded data" # shortcuts float = flt integer = int number = num string = str @classmethod def from_dashname(cls, key: str): if key.startswith('[') and key.endswith(']'): key = key[1:-1] + 'array' return getattr(cls, normalize_to_identifier(key))Ancestors
- refinery.lib.patterns._PatternEnum
- enum.Enum
Class variables
var int-
Integer expressions
var flt-
Floating point number expressions
var num-
Either an integer or a float
var str-
C syntax string literal
var cmdstr-
Windows command line escaped string literal
var ps1str-
PowerShell escaped string literal
var vbastr-
VBS/VBA string literal
var vbaint-
VBS/VBA integer literal
var printable-
Any sequence of printable characters
var urlquote-
Any sequence of url-encoded characters, default char set
var urlhex-
A hex-encoded buffer using URL escape sequences
var intarray-
Sequences of integers, separated by commas or semicolons
var strarray-
Sequences of strings, separated by commas or semicolons
var numarray-
Sequences of numbers, separated by commas or semicolons
var hexarray-
Arrays of hexadecimal strings, separated by commas or semicolons
var word-
Sequences of word characters
var letters-
Sequences of alphabetic characters
var wshenc-
Encoded Windows Scripting Host Scripts (JS/VBS)
var alnum-
Sequences of alpha-numeric characters
var b32-
Base32 encoded strings
var b58-
Base58 encoded strings
var b62-
Base62 encoded strings
var b64-
Base64 encoded strings
var b85-
Base85 encoded strings
var a85-
Ascii85 encoded strings
var z85-
Z85 encoded strings
var b92-
Base92 encoded strings
var b64u-
Base64 encoded strings using URL-safe alphabet
var hex-
Hexadecimal strings
var b16-
Uppercase hexadecimal strings
var b16s-
Hexadecimal strings
var b64s-
Base64 encoded strings, separated by whitespace
var b85s-
Base85 encoded string, separated by whitespace
var a85s-
Ascii85 encoded string, separated by whitespace
var z85s-
Z85 encoded string, separated by whitespace
var utf8-
A sequence of bytes that can be decoded as UTF8.
var hexdump-
This pattern matches a typical hexdump output where hexadecimally encoded bytes are followed by a string which contains dots or printable characters from the dump. For example:
46 4F 4F 0A 42 41 52 0A FOO.BAR. F0 0B AA BA F0 0B ...... var uuenc-
UUEncoded data
var float-
The type of the None singleton.
var integer-
The type of the None singleton.
var number-
The type of the None singleton.
var string-
The type of the None singleton.
Static methods
def from_dashname(key)
class wallets (*args, **kwds)-
Create a collection of name/value pairs.
Example enumeration:
>>> class Color(Enum): ... RED = 1 ... BLUE = 2 ... GREEN = 3Access them by:
- attribute access:
Color.RED
- value lookup:
Color(1)
- name lookup:
Color['RED']
Enumerations can be iterated over, and know how many members they have:
>>> len(Color) 3>>> list(Color) [<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]Methods can be added to enumerations, and members can have their own attributes – see the documentation for details.
Expand source code Browse git
class wallets(_PatternEnum): # https://gist.github.com/etherx-dev/76559d9e6d916917a960e33ceea91481 ADA = pattern("addr1[a-z0-9]+") ATOM = pattern("cosmos[-\\w\\.]{10,}") BCH = pattern("(bitcoincash:)?(q|p)[a-z0-9]{41}|(BITCOINCASH:)?(Q|P)[A-Z0-9]{41}") BTC = pattern("(?:[13][a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-z0-9]{25,39})") BTCP = pattern("5[HJK][1-9A-Za-z][^A-HJ-NP-Za-km-z0-9]{48}") DASH = pattern("X[1-9A-HJ-NP-Za-km-z]{33}") DOGE = pattern("D{1}[5-9A-HJ-NP-U]{1}[1-9A-HJ-NP-Za-km-z]{32}") DOT = pattern("1[0-9a-zA-Z]{47}") ETH = pattern("0x[a-fA-F0-9]{40}") IOTA = pattern("iota[a-z0-9]{10,}") LSK = pattern("[0-9]{19}L") LTC = pattern("[LM3][a-km-zA-HJ-NP-Z1-9]{26,33}") NEO = pattern("N[0-9a-zA-Z]{33}") ONE = pattern("(?:bnb|one)1[a-z0-9]{38}") ONT = pattern("A[0-9a-zA-Z]{33}") RONIN = pattern("ronin:[a-fA-F0-9]{40}") TERRA = pattern("terra1[a-z0-9]{38}") XEM = pattern("N[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}") XLM = pattern("G[A-D][A-Z2-7]{54}") XMR = pattern("4[0-9AB][1-9A-HJ-NP-Za-km-z]{90,120}") XRP = pattern("r[0-9a-zA-Z]{24,34}")Ancestors
- refinery.lib.patterns._PatternEnum
- enum.Enum
Class variables
var ADA-
The type of the None singleton.
var ATOM-
The type of the None singleton.
var BCH-
The type of the None singleton.
var BTC-
The type of the None singleton.
var BTCP-
The type of the None singleton.
var DASH-
The type of the None singleton.
var DOGE-
The type of the None singleton.
var DOT-
The type of the None singleton.
var ETH-
The type of the None singleton.
var IOTA-
The type of the None singleton.
var LSK-
The type of the None singleton.
var LTC-
The type of the None singleton.
var NEO-
The type of the None singleton.
var ONE-
The type of the None singleton.
var ONT-
The type of the None singleton.
var RONIN-
The type of the None singleton.
var TERRA-
The type of the None singleton.
var XEM-
The type of the None singleton.
var XLM-
The type of the None singleton.
var XMR-
The type of the None singleton.
var XRP-
The type of the None singleton.
class indicators (*args, **kwds)-
An enumeration of patterns for indicators.
Expand source code Browse git
class indicators(_PatternEnum): """ An enumeration of patterns for indicators. """ domain = pattern(_pattern_serrated_domain) "Domain names" email = pattern(_pattern_email) "Email addresses" guid = pattern(_pattern_guid) "Windows GUID strings" date = pattern(_pattern_date) "A date or timestamp value in a common format" ipv4 = pattern(_pattern_serrated_ipv4) "String representations of IPv4 addresses" ipv6 = pattern(_pattern_ipv6) "String representations of IPv6 addresses" md5 = alphabet('[0-9A-Fa-f]', lower=32, upper=32) "Hexadecimal strings of length 32" sha1 = alphabet('[0-9A-Fa-f]', lower=40, upper=40) "Hexadecimal strings of length 40" sha256 = alphabet('[0-9A-Fa-f]', lower=64, upper=64) "Hexadecimal strings of length 64" hostname = pattern(_pattern_serrated_hostname) "Any domain name or IPv4 address, optionally followed by a colon and a port number." socket = pattern(_pattern_serrated_socket) "Any domain name or IPv4 address followed by a colon and a (port) number" subdomain = pattern(_pattern_subdomain) "A domain which contains at least three parts, including the top level" url = pattern(_pattern_serrated_url) "Uniform resource locator addresses" pem = pattern(_pattern_pem) "A pattern matching PEM encoded cryptographic parameters" path = pattern(_pattern_any_path) "Windows and Linux path names" winpath = pattern(_pattern_win_path) "Windows path names" nixpath = pattern(_pattern_nix_path) "Posix path names" evar = pattern(_pattern_win_env_variable) "Windows environment variables, i.e. something like `%APPDATA%`" @classmethod def from_dashname(cls, key): return getattr(cls, normalize_to_identifier(key))Ancestors
- refinery.lib.patterns._PatternEnum
- enum.Enum
Class variables
var domain-
Domain names
var email-
Email addresses
var guid-
Windows GUID strings
var date-
A date or timestamp value in a common format
var ipv4-
String representations of IPv4 addresses
var ipv6-
String representations of IPv6 addresses
var md5-
Hexadecimal strings of length 32
var sha1-
Hexadecimal strings of length 40
var sha256-
Hexadecimal strings of length 64
var hostname-
Any domain name or IPv4 address, optionally followed by a colon and a port number.
var socket-
Any domain name or IPv4 address followed by a colon and a (port) number
var subdomain-
A domain which contains at least three parts, including the top level
var url-
Uniform resource locator addresses
var pem-
A pattern matching PEM encoded cryptographic parameters
var path-
Windows and Linux path names
var winpath-
Windows path names
var nixpath-
Posix path names
var evar-
Windows environment variables, i.e. something like
%APPDATA%
Static methods
def from_dashname(key)
class defanged (*args, **kwds)-
An enumeration of patterns for defanged indicators. Used only by the reverse operation of
defang.Expand source code Browse git
class defanged(_PatternEnum): """ An enumeration of patterns for defanged indicators. Used only by the reverse operation of `refinery.defang`. """ hostname = pattern(_pattern_defanged_hostname) "A defanged `refinery.lib.patterns.indicators.hostname`." url = pattern(_pattern_defanged_url) "A defanged `refinery.lib.patterns.indicators.url`."Ancestors
- refinery.lib.patterns._PatternEnum
- enum.Enum
Class variables
var hostname-
A defanged
indicators.hostname. var url-
A defanged
indicators.url.