Module refinery.lib.patterns

Library of regular expression patterns.

Expand source code Browse git
"""
Library of regular expression patterns.
"""
from __future__ import annotations

import enum
import functools
import re

from typing import TYPE_CHECKING, Callable, Iterator, overload

from refinery.lib.patterns.tlds import tlds
from refinery.lib.tools import normalize_to_identifier
from refinery.lib.types import buf

if TYPE_CHECKING:
    from re import Match

    class PatternMethods:
        @overload
        def split(self, string: buf, maxsplit: int = 0) -> list[bytes]:
            ...

        @overload
        def split(self, string: str, maxsplit: int = 0) -> list[str]:
            ...

        def split(self, string, maxsplit=0) -> list:
            ...

        @overload
        def fullmatch(self, string: buf, pos: int = 0, endpos: int | None = None) -> Match[bytes]:
            ...

        @overload
        def fullmatch(self, string: str, pos: int = 0, endpos: int | None = None) -> Match[str]:
            ...

        def fullmatch(self, string, pos=0, endpos=None) -> Match:
            ...

        @overload
        def search(self, string: buf, pos: int = 0, endpos: int | None = None) -> Match[bytes]:
            ...

        @overload
        def search(self, string: str, pos: int = 0, endpos: int | None = None) -> Match[str]:
            ...

        def search(self, string, pos=0, endpos=None) -> Match:
            ...

        @overload
        def sub(self, repl: buf | Callable[[Match[bytes]], buf], string: buf, count: int = 0) -> bytes:
            ...

        @overload
        def sub(self, repl: str | Callable[[Match[str]], str], string: str, count: int = 0) -> str:
            ...

        def sub(self, repl, string, count=0) -> str | bytes:
            ...

        @overload
        def finditer(self, string: buf, pos: int = 0, endpos: int | None = None) -> Iterator[Match[bytes]]:
            ...

        @overload
        def finditer(self, string: str, pos: int = 0, endpos: int | None = None) -> Iterator[Match[str]]:
            ...

        def finditer(self, string, pos=0, endpos=None) -> Iterator[Match]:
            ...
else:
    PatternMethods = object


def _sized_suffix(lower: int, upper: int):
    if lower <= 0:
        if upper <= 0:
            return '*'
        else:
            return F'{{1,{upper}}}'
    elif upper <= 0:
        if lower == 1:
            return '+'
        else:
            return F'{{{lower},}}'
    else:
        return F'{{{lower},{upper}}}'


class pattern(PatternMethods):
    """
    A wrapper for regular expression pattern objects created from re.compile,
    allowing combination of several patterns into one via overloaded
    operators.
    """
    str_pattern: str
    bin_pattern: bytes

    def __init__(self, pattern: str, flags: int = 0, description: str = ''):
        self.str_pattern = pattern
        self.bin_pattern = pattern.encode('ascii')
        self.flags = flags
        self.description = description

    def __bytes__(self):
        return self.bin_pattern

    @functools.cached_property
    def bin(self):
        return re.compile(B'(%s)' % self.bin_pattern, flags=self.flags)

    @functools.cached_property
    def str(self):
        return re.compile(self.str_pattern, flags=self.flags)

    def __hash__(self):
        return hash((self.str_pattern, self.flags))

    def __eq__(self, other):
        if isinstance(other, str):
            return self.str_pattern == other and self.flags == 0
        if isinstance(other, pattern):
            return self.str_pattern == other.str_pattern and self.flags == other.flags
        return False

    def __str__(self):
        return self.str_pattern

    def __getattr__(self, verb):
        if not hasattr(re.Pattern, verb):
            raise AttributeError(verb)
        bin_attr = getattr(self.bin, verb)
        if not callable(bin_attr):
            return bin_attr
        str_attr = getattr(self.str, verb)

        def wrapper(*args, **kwargs):
            for argument in args:
                if isinstance(argument, str):
                    return str_attr(*args, **kwargs)
            else:
                return bin_attr(*args, **kwargs)

        functools.update_wrapper(wrapper, bin_attr)
        return wrapper


class alphabet(pattern):
    """
    A pattern object representing strings of letters from a given alphabet, with
    an optional prefix and suffix.
    """
    def __init__(
        self,
        repeat: str,
        prefix: str = '',
        suffix: str = '',
        lower: int = 1,
        upper: int = 0,
        prefix_min: int = 0,
        prefix_max: int = 0,
        suffix_min: int = 0,
        suffix_max: int = 0,
        token_size: int = 1,
        flags: int = 0,
        **kwargs
    ):
        self.repeat = repeat
        self.prefix = prefix
        self.suffix = suffix
        self.suffix_min = suffix_min
        self.suffix_max = suffix_max
        self.prefix_min = prefix_min
        self.prefix_max = prefix_max
        self.token_size = token_size
        lower = lower - suffix_max - prefix_max
        upper = upper - suffix_min - prefix_min
        if token_size > 1:
            lower, _r = divmod(lower, token_size)
            if _r and lower == 0:
                lower = _r
            upper, _r = divmod(upper, token_size)
            if _r and upper >= 0:
                upper += 1
        self.lower = lower
        self.upper = upper
        count = _sized_suffix(lower, upper)
        pattern.__init__(self,
            R'{b}(?:{r}){c}{a}'.format(
                r=repeat,
                b=prefix,
                c=count,
                a=suffix
            ),
            flags,
            **kwargs
        )


class tokenize(pattern):
    """
    A pattern representing a sequence of tokens matching the `token` pattern, separated
    by sequences matching the pattern `sep`. The optional parameter `bound` is required
    before and after each token, its default value is the regular expression zero length
    match for a word boundary.
    """
    def __init__(self, token, sep, bound='\\b', unique_sep=False, sep_ignores_whitespace=True, **kwargs):
        if unique_sep:
            if sep_ignores_whitespace:
                p = (
                    R'(?:{b}{t}{b}\s{{0,50}}(?P<__sep__>{s})\s{{0,50}})'
                    R'(?:(?:{b}{t}{b}\s{{0,50}}(?P=__sep__)\s{{0,50}})+{b}{t}{b}|{b}{t}{b})'
                )
            else:
                p = R'(?:{b}{t}{b}(?P<__sep__>{s}))(?:(?:{b}{t}{b}(?P=__sep__))+{b}{t}{b}|{b}{t}{b})'
        else:
            p = R'(?:{b}{t}{b}{s})+(?:{b}{t}{b})'
        pattern.__init__(self, p.format(s=sep, b=bound, t=token), **kwargs)


class _PatternEnum(enum.Enum):
    @classmethod
    def get(cls, name, default=None):
        try:
            return cls[name]
        except KeyError:
            return default

    def __str__(self):
        return str(self.value)

    def __bytes__(self):
        return bytes(self.value)

    def __repr__(self):
        return F'<pattern {self.name}: {self.value}>'

    def __getattr__(self, name):
        if name in dir(re.Pattern):
            return getattr(self.value, name)
        raise AttributeError

    @property
    def description(self):
        return self.value.description

    @classmethod
    def make_table_with_shorts(cls, name: str):
        alias = {p.name: name for name, p in cls.__members__.items() if name != p.name}
        for p in cls:
            if p.name.endswith('array'):
                alias[p.name] = F'[{p.name[:-5]}]'
        width = max(len(p.name) for p in cls) + 4
        table = [
            (name.upper(), 'SHORT', 'DESCRIPTION'), *(
                (p.name, alias.get(p.name, ''), p.description) for p in cls)]
        return '\n'.join((
            F'{"":>4}{n:>{width}} {s:<5} {d}' for n, s, d in table))

    @classmethod
    def make_table(cls, name: str):
        width = max(len(p.name) for p in cls) + 4
        table = [(name.upper(), 'DESCRIPTION'), *((p.name, p.description) for p in cls)]
        return '\n'.join((F'{"":>4}{n:>{width}} {d}' for n, d in table))


_TLDS = R'(?i:{possible_tld})(?!(?:{dealbreakers}))'.format(
    possible_tld='|'.join(tlds),
    dealbreakers='|'.join([
        R'[a-z]',
        R'[A-Za-z]{3}',
        R'\.\w\w',
        R'\([\'"\w)]'
    ])
)

# see https://tools.ietf.org/html/rfc2181#section-11
_format_serrated_domain = (
    R'(?:\w[a-zA-Z0-9\-\_]{{0,62}}?\.){repeat}'
    R'\w[a-zA-Z0-9\-\_]{{0,62}}\.{tlds}'
)
_format_defanged_domain = (
    R'(?:\w[a-zA-Z0-9\-\_]{{0,62}}?(?:\[\.\]|\(.\)|\.)){repeat}'
    R'\w[a-zA-Z0-9\-\_]{{0,62}}(?:\[\.\]|\(.\)|\.){tlds}'
)

_pattern_utf8 = R'(?:[\x00-\x7F]|[\xC0-\xDF][\x80-\xBF]|[\xE0-\xEF][\x80-\xBF]{2}|[\xF0-\xF7][\x80-\xBF]{3})+'
_pattern_b92 = R'~|(?:[!-_a-}]{2})+[!-_a-}]?'

_pattern_serrated_domain = _format_serrated_domain.format(repeat='{0,20}', tlds=_TLDS)
_pattern_defanged_domain = _format_defanged_domain.format(repeat='{0,20}', tlds=_TLDS)

_pattern_subdomain = _format_serrated_domain.format(repeat='{1,20}', tlds=_TLDS)

_pattern_octet = R'(?:1\d\d|2[0-4]\d|25[0-5]|[1-9]?\d)'
_pattern_serrated_ipv4 = R'(?<![0-9])(?:{o}\.){{3}}{o}(?![0-9])'.format(o=_pattern_octet)
_pattern_defanged_ipv4 = R'(?:{o}{d}){{3}}{o}'.format(o=_pattern_octet, d=R'(?:\[\.\]|\.)')

# Taken from: https://stackoverflow.com/a/17871737/9130824
_pattern_ipv6 = (
    R'('
    R'([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|'          # 1:2:3:4:5:6:7:8
    R'[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|'       # 1::3:4:5:6:7:8   1::3:4:5:6:7:8  1::8
    R'([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|'  # 1::4:5:6:7:8     1:2::4:5:6:7:8  1:2::8
    R'([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|'  # 1::5:6:7:8       1:2:3::5:6:7:8  1:2:3::8
    R'([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|'  # 1::6:7:8         1:2:3:4::6:7:8  1:2:3:4::8
    R'([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|'  # 1::7:8           1:2:3:4:5::7:8  1:2:3:4:5::8
    R'([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|'         # 1::8             1:2:3:4:5:6::8  1:2:3:4:5:6::8
    R'([0-9a-fA-F]{1,4}:){1,7}:|'                         # 1::                              1:2:3:4:5:6:7::
    R':((:[0-9a-fA-F]{1,4}){1,7}|:)|'                     # ::2:3:4:5:6:7:8  ::2:3:4:5:6:7:8 ::8       ::
    R'fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|'     # fe80::7:8%eth0   fe80::7:8%1     (link-local IPv6 addresses with zone index)
    R'::(ffff(:0{1,4}){0,1}:){0,1}'                       #
    R'((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}'  #
    R'(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|'          # ::255.255.255.255   ::ffff:255.255.255.255  ::ffff:0:255.255.255.255
    R'([0-9a-fA-F]{1,4}:){1,4}:'                          # (IPv4-mapped IPv6 addresses and IPv4-translated addresses)
    R'((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}'  #
    R'(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])'           # 2001:db8:3:4::192.0.2.33  64:ff9b::192.0.2.33
    R')'                                                  # (IPv4-Embedded IPv6 Address)
)

_pattern_serrated_socket = f'(?:{_pattern_serrated_ipv4}|{_pattern_serrated_domain})(?::\\d{{2,5}})'
_pattern_defanged_socket = f'(?:{_pattern_defanged_ipv4}|{_pattern_defanged_domain})(?::\\d{{2,5}})'

_pattern_serrated_host = _pattern_serrated_socket + '?'
_pattern_defanged_host = _pattern_defanged_socket + '?'


def _sized_pattern_integer(lower: int = 0, upper: int = 0):
    x = _sized_suffix(max(1, lower - 3), upper - 2)
    o = _sized_suffix(max(0, lower - 3), upper - 2)
    d = _sized_suffix(max(0, lower - 2), upper - 1)
    return (
        F'[-+]?(?:0[bB][01]{x}|0[xX][0-9a-fA-F]{x}|0[1-7][0-7]{o}|[1-9][0-9]{d}|0)'
        R'(?=[uU]?[iI]\d{1,2}|[LlHh]|[^a-zA-Z0-9]|$)'
    )


_pattern_integer = _sized_pattern_integer()
_pattern_float = R'[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?'
_pattern_number = F'(?:(?:{_pattern_integer})|(?:{_pattern_float}))'
_pattern_number = (
    '[-+]?(?:0[bB][01]+|0[xX][0-9a-fA-F]+|0[1-7][0-7]*|(?:[1-9][0-9]*|0)(?P<__fp1__>\\.[0-9]*)?|(?P<__fp2__>\\.[0-9]+))'
    '(?(__fp1__)(?:[eE][-+]?[0-9]+)?|(?(__fp2__)(?:[eE][-+]?[0-9]+)?|(?=[uU]?[iI]\\d{1,2}|[LlHh]|[^a-zA-Z0-9]|$)))'
)


_pattern_date_elements = {
    'A': '(?:{})'.format('|'.join([
        '[sS]un(?:day)?',
        '[mM]on(?:day)?',
        '[tT]ue(?:sday)?',
        '[wW]ed(?:nesday)?',
        '[tT]hu(?:rsday)?',
        '[fF]ri(?:day)?',
        '[sS]at(?:urday)?',
    ])),
    'B': '(?:{})'.format('|'.join([
        '[jJ]an(?:uary)?',
        '[fF]eb(?:ruary)?',
        '[mM]ar(?:ch)?',
        '[aA]pr(?:il)?',
        '[mM]ay',
        '[jJ]un(?:e)?',
        '[jJ]ul(?:y)?',
        '[aA]ug(?:ust)?',
        '[sS]ep(?:tember)?',
        '[oO]ct(?:ober)?',
        '[nN]ov(?:ember)?',
        '[dD]ec(?:ember)?',
    ])),
    'D': '(?:[23]?(?:1st|2nd|3rd|[4-9]th)|20th|30th)',
    'd': '(?:0?[1-9]|[12][0-9]|3[01])',
    'm': '(?:0[1-9]|1[012])',
    'I': '(?:0[1-9]|1[0-2])',
    'p': '(?:[ap]m|[AP]M)',
    'H': '(?:[01][0-9]|2[0-3])',
    'M': '(?:[0-5][0-9])',
    'S': '(?:[0-5][0-9])',
    'z': '(?:[-+](?:[0-9]{2}){1,3}(?:\\.[0-9]{6})?)',
    'y': '(?:[0-9]{2})',
    'Y': '(?:[0-9]{4})',
    'c': '(?:[,;]|\\s|[,;]\\s)',
    'gap': '\\s{1,3}'
}

_pattern_time = r'(?:{H}:{M}(?::{S})?|{I}:{M}(?::{S})?{c}?\(?{p}\)?)'.format_map(_pattern_date_elements)
_pattern_date_elements['T'] = _pattern_time

_pattern_date_list = [
    R'{A}{c}(?:{d}|{D}){gap}{B}{c}{Y}(?:\s{T})?',
    R'{B}\s(?:{d}|{D}){c}{Y}(?:\s{T})?',
    R'{Y}[-:]{m}[-:]{d}(?:[T\x20]{H}:{M}(?::{S})?(?:[Z.][0-9]{{6}})?{z}?)',
    R'{m}/{d}/{Y}(?:{c}{T})?',
    R'{A}{c}{B}{c}(?:{d}|{D}){c}{T}(?:\s\(?UTC\)?)?\s{Y}',
]

_pattern_date = '|'.join(
    _p.format_map(_pattern_date_elements) for _p in _pattern_date_list)


def _sized_pattern_string(lower: int = 0, upper: int = 0):
    ml = _sized_suffix((lower - 6) // 1, (upper - 6))
    sl = _sized_suffix((lower - 2) // 2, (upper - 2))
    str_dq_r = FR'[rR]"[^\r\n]{sl}"'
    str_sq_r = FR"[rR]'[^\r\n]{sl}'"
    str_dq = FR'"(?:[^"\\\r\n]|\\[^\r\n]){sl}"'
    str_sq = FR"'(?:[^'\\\r\n]|\\[^\r\n]){sl}'"
    str_js = FR'`(?:[^`\\]|\\.){sl}`'
    str_mul_dq_r = FR'[rR]""".{ml}?"""'
    str_mul_sq_r = FR"[rR]'''.{ml}?'''"
    str_mul_dq = FR'"""(?:[^\\]|\\.){ml}?"""'
    str_mul_sq = FR"'''(?:[^\\]|\\.){ml}?'''"
    return '(?:{})'.format('|'.join((
        str_mul_dq_r,
        str_mul_sq_r,
        str_mul_dq,
        str_mul_sq,
        str_dq_r,
        str_sq_r,
        str_dq,
        str_sq,
        str_js,
    )))


def _sized_pattern_cmdstr(lower: int = 0, upper: int = 0):
    n = _sized_suffix((lower - 2) // 2, upper - 2)
    return FR'''(?:"(?:""|[^"]){n}"|'(?:''|[^']){n}')'''


_pattern_cmdstr = _sized_pattern_cmdstr()
_pattern_ps1str = R'''(?:(?:@"\s*?[\r\n].*?[\r\n]"@)|(?:@'\s*?[\r\n].*?[\r\n]'@)|(?:"(?:`.|""|[^"\n])*")|(?:'(?:''|[^'\n])*'))'''
_pattern_vbastr = R'''"(?:""|[^"])*"'''
_pattern_vbaint = R'(?:&[bB][01]+|&[hH][0-9a-fA-F]+|&[oO][0-7]*|[-+]?(?:[1-9][0-9]*|0))(?=\b|$)'
_pattern_string = _sized_pattern_string()

_pattern_urlenc = R'''(?:%[0-9a-fA-F]{2}|[0-9a-zA-Z\-\._~\?!$&=])+'''
_pattern_urlhex = R'''(?:%[0-9a-fA-F]{2})+'''

_pattern_htmlesc = R'''(?:&(?:#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z]{2,32}[0-9]{0,2});?)+'''

_pattern_json = (
    R'''\s{0,20}[\[\{](?:"(?:[^"\\\r\n]|\\[^\r\n])*'''
    R'''"(?:\s*[:,])?|(?:none|true|false|%s|%s|\]|\})(?:\s*,)?|[,\]\}\[\{\s]+)*[\]\}]'''
) % (_pattern_integer, _pattern_float)

_pattern_wshenc = R'''#@~\^[ -~]{6}==(?:.*?)[ -~]{6}==\^#~@'''

_part_url_credentials = (
    R'(?:([^"\'\s\x00-\x20\x7E-\xFF]{1,256})?'
    R'(?::([^"\'\s\x00-\x20\x7E-\xFF]{0,256})?)?@)?'
)
_prefix_serrated_url = R'(([a-zA-Z]{2,20}:)?\/\/)' + _part_url_credentials
_prefix_defanged_url = R'(([a-zA-Z]{2,20}(?:\[:\]|:))?\/\/)' + _part_url_credentials
_suffix_combined_url = R'([/?#](?:[#/=:;$!?&.,\w\+\%\-\*\'~@()](?![a-zA-Z]{2,20}://))*)?'

_pattern_serrated_url = F'{_prefix_serrated_url}({_pattern_serrated_host}){_suffix_combined_url}'
_pattern_defanged_url = F'{_prefix_defanged_url}({_pattern_defanged_host}){_suffix_combined_url}'

_pattern_email = fR'(?:[a-zA-Z0-9_\.\+\-]{{1,256}}?)@(?:{_pattern_serrated_domain})'
_pattern_guid = R'(?:\b|\{)[0-9A-Fa-f]{8}(?:\-[0-9A-Fa-f]{4}){3}\-[0-9A-Fa-f]{12}(?:\}|\b)'

_pattern_pathpart_nospace = R'[-\w+,.;@\]\[{}^`~#=]{1,256}'  # R'[^/\\:"<>|\s\x7E-\xFF\x00-\x1F\xAD]+'
_pattern_win_path_element = R'(?:{n} ){{0,4}}{n}'.format(n=_pattern_pathpart_nospace)
_pattern_nix_path_element = R'(?:{n} ){{0,1}}{n}'.format(n=_pattern_pathpart_nospace)
_pattern_win_env_variable = R'%[a-zA-Z][a-zA-Z0-9_\-\(\)]*%'

_pattern_win_path_template_abs = R'(?:{s})(?P<__pathsep__>[\\\/])(?:{p}(?P=__pathsep__)){{0,256}}{p}(?:(?P=__pathsep__)|\b)'
_pattern_win_path_template_rel = R'(?:{p}|)\\(?:{p}\\){{0,256}}{p}(?:\\|\b)'
_pattern_win_path_template = F'(?:{_pattern_win_path_template_abs}|{_pattern_win_path_template_rel})'

_pattern_win_root = '|'.join([
    _pattern_win_env_variable,    # environment variable
    R'[A-Za-z]:',                 # drive letter with colon
    R'\\\\[a-zA-Z0-9_.$@]{1,50}', # UNC path
    R'HK[A-Z_]{1,30}',            # registry root key
])
_pattern_win_path = _pattern_win_path_template.format(
    s=_pattern_win_root,
    p=_pattern_win_path_element
)
_pattern_win_path_terse = _pattern_win_path_template.format(
    s="[A-Za-z]:",
    p=_pattern_pathpart_nospace
)

_pattern_nix_path_template = R'(?:/(?:{n}/)+|(?:{n}/){{2,}}){n}'
_pattern_nix_path = _pattern_nix_path_template.format(
    n=_pattern_nix_path_element)
_pattern_nix_path_terse = _pattern_nix_path_template.format(
    n=_pattern_pathpart_nospace)

_pattern_any_path = R'(?:{nix})|(?:{win})'.format(
    nix=_pattern_nix_path,
    win=_pattern_win_path,
)
_pattern_any_path_terse = R'(?:{nix})|(?:{win})'.format(
    nix=_pattern_nix_path_terse,
    win=_pattern_win_path_terse,
)

_pattern_uuencode = R'begin\s+\d{3}\s+[\x20!-~]+?\r?\n(?:M[\x20-\x60]{60}\r?\n)*(?:.*?\r?\n)?`\r?\nend'


def make_hexline_pattern(blocksize: int) -> str:
    return R'(?:{s}+\s+)?\s*({h})(?:[ \t]+(.+?))?'.format(
        h=tokenize(
            RF'(?:0[xX])?[0-9a-fA-F]{{{2 * blocksize}}}h?',
            sep=R'[- \t\/:;,\\]{1,3}'
        ).str_pattern,
        s=R'[-\w:;,#\.\$\?!\/\\=\(\)\[\]\{\}]'
    )


_pattern_hexline = make_hexline_pattern(1)

_pattern_pem = (
    R'-----BEGIN(?:\s[A-Z0-9]+)+-----{n}'
    R'(?:{b}{{40,100}}{n})*{b}{{1,100}}={{0,3}}{n}'
    R'-----END(?:\s[A-Z0-9]+)+-----'
).format(n=R'(?:\r\n|\n\r|\n)', b=R'[0-9a-zA-Z\+\/]')


AnsiColor = pattern(R'\x1b\[(?:22|[34]\d|(?:9|10)[0-8]|0?[0-2])(?:;\d+){0,20}m')


class checks(_PatternEnum):
    json = pattern(_pattern_json,
        description="Data that consists of JSON-like tokens; cannot detect actual JSON data.")
    path_element_nospace = pattern(_pattern_pathpart_nospace,
        description="A string that can be a valid file system path component and contains no spaces.")


class formats(_PatternEnum):
    """
    An enumeration of patterns for certain formats.
    """
    integer = pattern(_pattern_integer,
        description="any integer literal expression")
    float = pattern(_pattern_float,
        description="floating point literals")
    number = pattern(_pattern_number,
        description="either an integer or a float")
    string = pattern(_pattern_string,
        description="c-syntax string literal")
    cmdstr = pattern(_pattern_cmdstr,
        description="Windows command line escaped string literal")
    ps1str = pattern(_pattern_ps1str, flags=re.DOTALL,
        description="PowerShell escaped string literal")
    vbastr = pattern(_pattern_vbastr,
        description="VBS/VBA string literal")
    vbaint = pattern(_pattern_vbaint,
        description="VBS/VBA integer literal")
    printable = alphabet(R'[\s!-~]',
        description="printable strings (includes whitespace)")
    urlquote = pattern(_pattern_urlenc,
        description="url-encoded characters, default char set")
    urlhex = pattern(_pattern_urlhex,
        description="hex-encoded buffer using URL escape sequences")
    htmlesc = pattern(_pattern_htmlesc,
        description="sequence of HTML-escape characters")
    intarray = tokenize(_pattern_integer, sep=R'[;,]', bound='', unique_sep=True,
        description="integers separated by commas or semicolons")
    strarray = tokenize(_pattern_string, sep=R'[;,]', bound='', unique_sep=True,
        description="strings separated by commas or semicolons")
    numarray = tokenize(_pattern_number, sep=R'[;,]', bound='', unique_sep=True,
        description="numbers separated by commas or semicolons")
    hexarray = tokenize(R'[0-9A-Fa-f]{2}', sep=R'[;,]', bound='', unique_sep=True,
        description="hex sequences separated by commas or semicolons")
    letters = alphabet(R'[a-zA-Z]',
        description="alphabetic characters")
    wshenc = pattern(_pattern_wshenc,
        description="encoded Windows Scripting Host Scripts (JS/VBS)")
    alnum = alphabet(R'[a-zA-Z0-9]',
        description="alphanumeric characters")
    base32 = pattern('[A-Z2-7]+|[a-z2-7+]',
        description="Base32 encoded strings")
    base58 = alphabet(R'(?:[1-9A-HJ-NP-Za-km-z]',
        description="Base58 encoded strings")
    base62 = alphabet(R'(?:[0-9A-Za-z]',
        description="Base62 encoded strings")
    base64 = alphabet(R'(?:[0-9a-zA-Z\+/]{4})', suffix=R'(?:(?:[0-9a-zA-Z\+/]{2,3})={0,3})?', suffix_max=6, token_size=4,
        description="Base64 encoded strings")
    base85 = alphabet(R'[-!+*()#-&^-~0-9;-Z]',
        description="Base85 encoded strings")
    ascii85 = alphabet(R'[!-u]',
        description="Ascii85 encoded strings")
    z85 = alphabet(R'[-0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]',
        description="Z85 encoded strings")
    base92 = pattern(_pattern_b92,
        description="Base92 encoded strings")
    base64u = alphabet(R'[-\w]{4}', suffix=R'(?:[-\w]{2,3}={0,3})?', suffix_max=6,
        description="Base64 encoded strings using URL-safe alphabet")
    hex = alphabet(R'[0-9a-fA-F]{2}', token_size=2,
        description="hexadecimal strings")
    base16 = alphabet(R'[0-9A-F]{2}', token_size=2,
        description="uppercase hexadecimal strings")
    base16s = tokenize(R'[0-9a-fA-F]+', R'\s*', bound='',
        description="hexadecimal strings")
    base64s = alphabet(R'[-\s\w\+/]', suffix=R'(?:={0,3})?', suffix_max=3,
        description="Base64 encoded strings, separated by whitespace")
    base85s = alphabet(R'[-!+*()#-&^-~0-9;-Z\s]',
        description="Base85 encoded string, separated by whitespace")
    a85s = alphabet(R'[!-u\s]',
        description="Ascii85 encoded string, separated by whitespace")
    z85s = alphabet(R'[-\s0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]',
        description="Z85 encoded string, separated by whitespace")
    utf8 = pattern(_pattern_utf8,
        description="sequences of bytes that can be decoded as UTF8")
    hexdump = tokenize(_pattern_hexline, bound='', sep=R'\s*\n',
        description="typical hexdump output")
    uuenc = pattern(_pattern_uuencode,
        description="UUEncoded data")

    # shortcuts
    flt = float
    int = integer
    num = number
    str = string
    b32 = base32
    b58 = base58
    b62 = base62
    b64 = base64
    b85 = base85
    b92 = base92
    a85 = ascii85
    b16 = base16
    b64u = base64u
    b16s = base16s
    b64s = base64s
    b85s = base85s
    ps = printable
    hd = hexdump
    uq = urlquote
    uh = urlhex

    @classmethod
    def from_dashname(cls, key: str):
        if key.startswith('[') and key.endswith(']'):
            key = key[1:-1] + 'array'
        return getattr(cls, normalize_to_identifier(key))


class wallets(_PatternEnum):
    # https://gist.github.com/etherx-dev/76559d9e6d916917a960e33ceea91481
    ADA = pattern("addr1[a-z0-9]{98}|(Ae2|DdzFF)[1-9A-HJ-NP-Za-km-z]{50,104}")
    ATOM = pattern("cosmos[-\\w\\.]{10,}")
    AVAX = pattern("0x([0-9a-f]{40}|[0-9A-F]{40})|(X-|P-)avax1[0ac-hj-np-z2-9]{38}")
    BCH = pattern("(bitcoincash:)?(q|p)[0ac-hj-np-z2-9]{41}|(BITCOINCASH:)?(Q|P)[0AC-HJ-NP-Z2-9]{41}")
    BNB = pattern("0x([0-9a-f]{40}|[0-9A-F]{40})")
    BTC = pattern("(?:[13][a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-z0-9]{25,39})")
    BTCP = pattern("5[HJK][1-9A-Za-z][^A-HJ-NP-Za-km-z0-9]{48}")
    DASH = pattern("X[1-9A-HJ-NP-Za-km-z]{33}")
    DOGE = pattern("D{1}[5-9A-HJ-NP-U]{1}[1-9A-HJ-NP-Za-km-z]{32}")
    DOT = pattern("1[0-9a-zA-Z]{47}")
    ETH = pattern("0x([0-9a-f]{40}|[0-9A-F]{40})")
    IOTA = pattern("iota[a-z0-9]{10,}")
    LSK = pattern("[0-9]{19}L")
    LTC = pattern("[LM3][a-km-zA-HJ-NP-Z1-9]{26,33}")
    NEO = pattern("N[0-9a-zA-Z]{33}")
    ONE = pattern("(?:bnb|one)1[a-z0-9]{38}")
    ONT = pattern("A[0-9a-zA-Z]{33}")
    RONIN = pattern("ronin:([a-f0-9]{40}|[A-F0-9]{40})")
    RVN = pattern("R[1-9A-HJ-NP-Za-km-z]{33}")
    SOLANA = pattern("[1-9A-HJ-NP-Za-km-z]{32,44}")
    TERRA = pattern("terra1[a-z0-9]{38}")
    TON = pattern("[UE][Qf][0-9a-zA-Z_-]{46}")
    TRON = pattern("T[A-Za-z1-9]{33}")
    XEM = pattern("N[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}")
    XLM = pattern("G[A-D][A-Z2-7]{54}")
    XMR = pattern("4[0-9AB][1-9A-HJ-NP-Za-km-z]{90,120}")
    XRP = pattern("r[0-9a-zA-Z]{24,34}|X[1-9A-HJ-NP-Za-km-z]{46}")
    ZCASH = pattern("(t[13][1-9A-HJ-NP-Za-km-z]{33}|z[cs][1-9A-HJ-NP-Za-km-z]{76,93}|u[1-9A-HJ-NP-Za-km-z]{100,1000})")


class indicators(_PatternEnum):
    """
    An enumeration of patterns for indicators.
    """
    date = pattern(_pattern_date,
        description="date or timestamp value in a common format")
    domain = pattern(_pattern_serrated_domain,
        description="domain names")
    email = pattern(_pattern_email,
        description="email addresses")
    guid = pattern(_pattern_guid,
        description="Windows GUID")
    ipv4 = pattern(_pattern_serrated_ipv4,
        description="IPv4 address string")
    ipv6 = pattern(_pattern_ipv6,
        description="IPv6 address string")
    host = pattern(_pattern_serrated_host,
        description="domain or IPv4 optionally followed by colon and port")
    socket = pattern(_pattern_serrated_socket,
        description="domain or IPv4 followed by colon and port number")
    url = pattern(_pattern_serrated_url,
        description="uniform resource locator addresses")
    md5 = alphabet('[0-9A-Fa-f]', lower=32, upper=32,
        description="hex strings of length 32")
    sha1 = alphabet('[0-9A-Fa-f]', lower=40, upper=40,
        description="hex strings of length 40")
    sha256 = alphabet('[0-9A-Fa-f]', lower=64, upper=64,
        description="hex strings of length 64")
    subdomain = pattern(_pattern_subdomain,
        description="domain containing at least three parts including TLD")
    pem = pattern(_pattern_pem,
        description="PEM encoded cryptographic parameters")
    path = pattern(_pattern_any_path,
        description="any file path")
    nixpath = pattern(_pattern_nix_path,
        description="file paths (Linux)")
    winpath = pattern(_pattern_win_path,
        description="file paths (Windows)")
    tpath = pattern(_pattern_any_path_terse,
        description="file paths without whitespace")
    nixtpath = pattern(_pattern_nix_path_terse,
        description="tpath for Linux")
    wintpath = pattern(_pattern_win_path_terse,
        description="tpath for Windows")
    evar = pattern(_pattern_win_env_variable,
        description="Windows environment variable, i.e. %AppData%")

    hostname = host

    @classmethod
    def from_dashname(cls, key):
        return getattr(cls, normalize_to_identifier(key))


class defanged(_PatternEnum):
    """
    An enumeration of patterns for defanged indicators. Used only by the reverse
    operation of `refinery.defang`.
    """
    host = pattern(_pattern_defanged_host)
    "A defanged `refinery.lib.patterns.indicators.host`."
    url = pattern(_pattern_defanged_url)
    "A defanged `refinery.lib.patterns.indicators.url`."


def pattern_with_size_limits(p: pattern, lower: int | None, upper: int | None) -> pattern:
    """
    This attempts to construct a pattern from a given format that includes the given lower and
    upper bounds on total match size. This is not always possible.
    """
    lower = max(0, lower or 0)
    upper = max(0, upper or 0)
    handlers = {
        formats.int.value     : _sized_pattern_integer,
        formats.cmdstr.value  : _sized_pattern_cmdstr,
        formats.string.value  : _sized_pattern_string,
    }
    if isinstance(p, alphabet):
        return alphabet(
            p.repeat,
            p.prefix,
            p.suffix,
            lower,
            upper,
            p.prefix_min,
            p.prefix_max,
            p.suffix_min,
            p.suffix_max,
            p.token_size,
            flags=p.flags,
        )
    elif h := handlers.get(p):
        return pattern(h(lower, upper), formats.int.value.flags)
    return p

Sub-modules

refinery.lib.patterns.tlds

Functions

def make_hexline_pattern(blocksize)
Expand source code Browse git
def make_hexline_pattern(blocksize: int) -> str:
    return R'(?:{s}+\s+)?\s*({h})(?:[ \t]+(.+?))?'.format(
        h=tokenize(
            RF'(?:0[xX])?[0-9a-fA-F]{{{2 * blocksize}}}h?',
            sep=R'[- \t\/:;,\\]{1,3}'
        ).str_pattern,
        s=R'[-\w:;,#\.\$\?!\/\\=\(\)\[\]\{\}]'
    )
def pattern_with_size_limits(p, lower, upper)

This attempts to construct a pattern from a given format that includes the given lower and upper bounds on total match size. This is not always possible.

Expand source code Browse git
def pattern_with_size_limits(p: pattern, lower: int | None, upper: int | None) -> pattern:
    """
    This attempts to construct a pattern from a given format that includes the given lower and
    upper bounds on total match size. This is not always possible.
    """
    lower = max(0, lower or 0)
    upper = max(0, upper or 0)
    handlers = {
        formats.int.value     : _sized_pattern_integer,
        formats.cmdstr.value  : _sized_pattern_cmdstr,
        formats.string.value  : _sized_pattern_string,
    }
    if isinstance(p, alphabet):
        return alphabet(
            p.repeat,
            p.prefix,
            p.suffix,
            lower,
            upper,
            p.prefix_min,
            p.prefix_max,
            p.suffix_min,
            p.suffix_max,
            p.token_size,
            flags=p.flags,
        )
    elif h := handlers.get(p):
        return pattern(h(lower, upper), formats.int.value.flags)
    return p

Classes

class pattern (pattern, flags=0, description='')

A wrapper for regular expression pattern objects created from re.compile, allowing combination of several patterns into one via overloaded operators.

Expand source code Browse git
class pattern(PatternMethods):
    """
    A wrapper for regular expression pattern objects created from re.compile,
    allowing combination of several patterns into one via overloaded
    operators.
    """
    str_pattern: str
    bin_pattern: bytes

    def __init__(self, pattern: str, flags: int = 0, description: str = ''):
        self.str_pattern = pattern
        self.bin_pattern = pattern.encode('ascii')
        self.flags = flags
        self.description = description

    def __bytes__(self):
        return self.bin_pattern

    @functools.cached_property
    def bin(self):
        return re.compile(B'(%s)' % self.bin_pattern, flags=self.flags)

    @functools.cached_property
    def str(self):
        return re.compile(self.str_pattern, flags=self.flags)

    def __hash__(self):
        return hash((self.str_pattern, self.flags))

    def __eq__(self, other):
        if isinstance(other, str):
            return self.str_pattern == other and self.flags == 0
        if isinstance(other, pattern):
            return self.str_pattern == other.str_pattern and self.flags == other.flags
        return False

    def __str__(self):
        return self.str_pattern

    def __getattr__(self, verb):
        if not hasattr(re.Pattern, verb):
            raise AttributeError(verb)
        bin_attr = getattr(self.bin, verb)
        if not callable(bin_attr):
            return bin_attr
        str_attr = getattr(self.str, verb)

        def wrapper(*args, **kwargs):
            for argument in args:
                if isinstance(argument, str):
                    return str_attr(*args, **kwargs)
            else:
                return bin_attr(*args, **kwargs)

        functools.update_wrapper(wrapper, bin_attr)
        return wrapper

Subclasses

Class variables

var str_pattern

The type of the None singleton.

var bin_pattern

The type of the None singleton.

Instance variables

var bin
Expand source code Browse git
@functools.cached_property
def bin(self):
    return re.compile(B'(%s)' % self.bin_pattern, flags=self.flags)
var str
Expand source code Browse git
@functools.cached_property
def str(self):
    return re.compile(self.str_pattern, flags=self.flags)
class alphabet (repeat, prefix='', suffix='', lower=1, upper=0, prefix_min=0, prefix_max=0, suffix_min=0, suffix_max=0, token_size=1, flags=0, **kwargs)

A pattern object representing strings of letters from a given alphabet, with an optional prefix and suffix.

Expand source code Browse git
class alphabet(pattern):
    """
    A pattern object representing strings of letters from a given alphabet, with
    an optional prefix and suffix.
    """
    def __init__(
        self,
        repeat: str,
        prefix: str = '',
        suffix: str = '',
        lower: int = 1,
        upper: int = 0,
        prefix_min: int = 0,
        prefix_max: int = 0,
        suffix_min: int = 0,
        suffix_max: int = 0,
        token_size: int = 1,
        flags: int = 0,
        **kwargs
    ):
        self.repeat = repeat
        self.prefix = prefix
        self.suffix = suffix
        self.suffix_min = suffix_min
        self.suffix_max = suffix_max
        self.prefix_min = prefix_min
        self.prefix_max = prefix_max
        self.token_size = token_size
        lower = lower - suffix_max - prefix_max
        upper = upper - suffix_min - prefix_min
        if token_size > 1:
            lower, _r = divmod(lower, token_size)
            if _r and lower == 0:
                lower = _r
            upper, _r = divmod(upper, token_size)
            if _r and upper >= 0:
                upper += 1
        self.lower = lower
        self.upper = upper
        count = _sized_suffix(lower, upper)
        pattern.__init__(self,
            R'{b}(?:{r}){c}{a}'.format(
                r=repeat,
                b=prefix,
                c=count,
                a=suffix
            ),
            flags,
            **kwargs
        )

Ancestors

Inherited members

class tokenize (token, sep, bound='\\b', unique_sep=False, sep_ignores_whitespace=True, **kwargs)

A pattern representing a sequence of tokens matching the token pattern, separated by sequences matching the pattern sep. The optional parameter bound is required before and after each token, its default value is the regular expression zero length match for a word boundary.

Expand source code Browse git
class tokenize(pattern):
    """
    A pattern representing a sequence of tokens matching the `token` pattern, separated
    by sequences matching the pattern `sep`. The optional parameter `bound` is required
    before and after each token, its default value is the regular expression zero length
    match for a word boundary.
    """
    def __init__(self, token, sep, bound='\\b', unique_sep=False, sep_ignores_whitespace=True, **kwargs):
        if unique_sep:
            if sep_ignores_whitespace:
                p = (
                    R'(?:{b}{t}{b}\s{{0,50}}(?P<__sep__>{s})\s{{0,50}})'
                    R'(?:(?:{b}{t}{b}\s{{0,50}}(?P=__sep__)\s{{0,50}})+{b}{t}{b}|{b}{t}{b})'
                )
            else:
                p = R'(?:{b}{t}{b}(?P<__sep__>{s}))(?:(?:{b}{t}{b}(?P=__sep__))+{b}{t}{b}|{b}{t}{b})'
        else:
            p = R'(?:{b}{t}{b}{s})+(?:{b}{t}{b})'
        pattern.__init__(self, p.format(s=sep, b=bound, t=token), **kwargs)

Ancestors

Inherited members

class checks (*args, **kwds)

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access:

Color.RED

  • value lookup:

Color(1)

  • name lookup:

Color['RED']

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes – see the documentation for details.

Expand source code Browse git
class checks(_PatternEnum):
    json = pattern(_pattern_json,
        description="Data that consists of JSON-like tokens; cannot detect actual JSON data.")
    path_element_nospace = pattern(_pattern_pathpart_nospace,
        description="A string that can be a valid file system path component and contains no spaces.")

Ancestors

  • refinery.lib.patterns._PatternEnum
  • enum.Enum

Class variables

var json

The type of the None singleton.

var path_element_nospace

The type of the None singleton.

class formats (*args, **kwds)

An enumeration of patterns for certain formats.

Expand source code Browse git
class formats(_PatternEnum):
    """
    An enumeration of patterns for certain formats.
    """
    integer = pattern(_pattern_integer,
        description="any integer literal expression")
    float = pattern(_pattern_float,
        description="floating point literals")
    number = pattern(_pattern_number,
        description="either an integer or a float")
    string = pattern(_pattern_string,
        description="c-syntax string literal")
    cmdstr = pattern(_pattern_cmdstr,
        description="Windows command line escaped string literal")
    ps1str = pattern(_pattern_ps1str, flags=re.DOTALL,
        description="PowerShell escaped string literal")
    vbastr = pattern(_pattern_vbastr,
        description="VBS/VBA string literal")
    vbaint = pattern(_pattern_vbaint,
        description="VBS/VBA integer literal")
    printable = alphabet(R'[\s!-~]',
        description="printable strings (includes whitespace)")
    urlquote = pattern(_pattern_urlenc,
        description="url-encoded characters, default char set")
    urlhex = pattern(_pattern_urlhex,
        description="hex-encoded buffer using URL escape sequences")
    htmlesc = pattern(_pattern_htmlesc,
        description="sequence of HTML-escape characters")
    intarray = tokenize(_pattern_integer, sep=R'[;,]', bound='', unique_sep=True,
        description="integers separated by commas or semicolons")
    strarray = tokenize(_pattern_string, sep=R'[;,]', bound='', unique_sep=True,
        description="strings separated by commas or semicolons")
    numarray = tokenize(_pattern_number, sep=R'[;,]', bound='', unique_sep=True,
        description="numbers separated by commas or semicolons")
    hexarray = tokenize(R'[0-9A-Fa-f]{2}', sep=R'[;,]', bound='', unique_sep=True,
        description="hex sequences separated by commas or semicolons")
    letters = alphabet(R'[a-zA-Z]',
        description="alphabetic characters")
    wshenc = pattern(_pattern_wshenc,
        description="encoded Windows Scripting Host Scripts (JS/VBS)")
    alnum = alphabet(R'[a-zA-Z0-9]',
        description="alphanumeric characters")
    base32 = pattern('[A-Z2-7]+|[a-z2-7+]',
        description="Base32 encoded strings")
    base58 = alphabet(R'(?:[1-9A-HJ-NP-Za-km-z]',
        description="Base58 encoded strings")
    base62 = alphabet(R'(?:[0-9A-Za-z]',
        description="Base62 encoded strings")
    base64 = alphabet(R'(?:[0-9a-zA-Z\+/]{4})', suffix=R'(?:(?:[0-9a-zA-Z\+/]{2,3})={0,3})?', suffix_max=6, token_size=4,
        description="Base64 encoded strings")
    base85 = alphabet(R'[-!+*()#-&^-~0-9;-Z]',
        description="Base85 encoded strings")
    ascii85 = alphabet(R'[!-u]',
        description="Ascii85 encoded strings")
    z85 = alphabet(R'[-0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]',
        description="Z85 encoded strings")
    base92 = pattern(_pattern_b92,
        description="Base92 encoded strings")
    base64u = alphabet(R'[-\w]{4}', suffix=R'(?:[-\w]{2,3}={0,3})?', suffix_max=6,
        description="Base64 encoded strings using URL-safe alphabet")
    hex = alphabet(R'[0-9a-fA-F]{2}', token_size=2,
        description="hexadecimal strings")
    base16 = alphabet(R'[0-9A-F]{2}', token_size=2,
        description="uppercase hexadecimal strings")
    base16s = tokenize(R'[0-9a-fA-F]+', R'\s*', bound='',
        description="hexadecimal strings")
    base64s = alphabet(R'[-\s\w\+/]', suffix=R'(?:={0,3})?', suffix_max=3,
        description="Base64 encoded strings, separated by whitespace")
    base85s = alphabet(R'[-!+*()#-&^-~0-9;-Z\s]',
        description="Base85 encoded string, separated by whitespace")
    a85s = alphabet(R'[!-u\s]',
        description="Ascii85 encoded string, separated by whitespace")
    z85s = alphabet(R'[-\s0-9a-zA-Z.:+=^!/*?&<>()\[\]{}@%$#]',
        description="Z85 encoded string, separated by whitespace")
    utf8 = pattern(_pattern_utf8,
        description="sequences of bytes that can be decoded as UTF8")
    hexdump = tokenize(_pattern_hexline, bound='', sep=R'\s*\n',
        description="typical hexdump output")
    uuenc = pattern(_pattern_uuencode,
        description="UUEncoded data")

    # shortcuts
    flt = float
    int = integer
    num = number
    str = string
    b32 = base32
    b58 = base58
    b62 = base62
    b64 = base64
    b85 = base85
    b92 = base92
    a85 = ascii85
    b16 = base16
    b64u = base64u
    b16s = base16s
    b64s = base64s
    b85s = base85s
    ps = printable
    hd = hexdump
    uq = urlquote
    uh = urlhex

    @classmethod
    def from_dashname(cls, key: str):
        if key.startswith('[') and key.endswith(']'):
            key = key[1:-1] + 'array'
        return getattr(cls, normalize_to_identifier(key))

Ancestors

  • refinery.lib.patterns._PatternEnum
  • enum.Enum

Class variables

var integer

The type of the None singleton.

var float

The type of the None singleton.

var number

The type of the None singleton.

var string

The type of the None singleton.

var cmdstr

The type of the None singleton.

var ps1str

The type of the None singleton.

var vbastr

The type of the None singleton.

var vbaint

The type of the None singleton.

var printable

The type of the None singleton.

var urlquote

The type of the None singleton.

var urlhex

The type of the None singleton.

var htmlesc

The type of the None singleton.

var intarray

The type of the None singleton.

var strarray

The type of the None singleton.

var numarray

The type of the None singleton.

var hexarray

The type of the None singleton.

var letters

The type of the None singleton.

var wshenc

The type of the None singleton.

var alnum

The type of the None singleton.

var base32

The type of the None singleton.

var base58

The type of the None singleton.

var base62

The type of the None singleton.

var base64

The type of the None singleton.

var base85

The type of the None singleton.

var ascii85

The type of the None singleton.

var z85

The type of the None singleton.

var base92

The type of the None singleton.

var base64u

The type of the None singleton.

var hex

The type of the None singleton.

var base16

The type of the None singleton.

var base16s

The type of the None singleton.

var base64s

The type of the None singleton.

var base85s

The type of the None singleton.

var a85s

The type of the None singleton.

var z85s

The type of the None singleton.

var utf8

The type of the None singleton.

var hexdump

The type of the None singleton.

var uuenc

The type of the None singleton.

var flt

The type of the None singleton.

var int

The type of the None singleton.

var num

The type of the None singleton.

var str

The type of the None singleton.

var b32

The type of the None singleton.

var b58

The type of the None singleton.

var b62

The type of the None singleton.

var b64

The type of the None singleton.

var b85

The type of the None singleton.

var b92

The type of the None singleton.

var a85

The type of the None singleton.

var b16

The type of the None singleton.

var b64u

The type of the None singleton.

var b16s

The type of the None singleton.

var b64s

The type of the None singleton.

var b85s

The type of the None singleton.

var ps

The type of the None singleton.

var hd

The type of the None singleton.

var uq

The type of the None singleton.

var uh

The type of the None singleton.

Static methods

def from_dashname(key)
class wallets (*args, **kwds)

Create a collection of name/value pairs.

Example enumeration:

>>> class Color(Enum):
...     RED = 1
...     BLUE = 2
...     GREEN = 3

Access them by:

  • attribute access:

Color.RED

  • value lookup:

Color(1)

  • name lookup:

Color['RED']

Enumerations can be iterated over, and know how many members they have:

>>> len(Color)
3
>>> list(Color)
[<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]

Methods can be added to enumerations, and members can have their own attributes – see the documentation for details.

Expand source code Browse git
class wallets(_PatternEnum):
    # https://gist.github.com/etherx-dev/76559d9e6d916917a960e33ceea91481
    ADA = pattern("addr1[a-z0-9]{98}|(Ae2|DdzFF)[1-9A-HJ-NP-Za-km-z]{50,104}")
    ATOM = pattern("cosmos[-\\w\\.]{10,}")
    AVAX = pattern("0x([0-9a-f]{40}|[0-9A-F]{40})|(X-|P-)avax1[0ac-hj-np-z2-9]{38}")
    BCH = pattern("(bitcoincash:)?(q|p)[0ac-hj-np-z2-9]{41}|(BITCOINCASH:)?(Q|P)[0AC-HJ-NP-Z2-9]{41}")
    BNB = pattern("0x([0-9a-f]{40}|[0-9A-F]{40})")
    BTC = pattern("(?:[13][a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-z0-9]{25,39})")
    BTCP = pattern("5[HJK][1-9A-Za-z][^A-HJ-NP-Za-km-z0-9]{48}")
    DASH = pattern("X[1-9A-HJ-NP-Za-km-z]{33}")
    DOGE = pattern("D{1}[5-9A-HJ-NP-U]{1}[1-9A-HJ-NP-Za-km-z]{32}")
    DOT = pattern("1[0-9a-zA-Z]{47}")
    ETH = pattern("0x([0-9a-f]{40}|[0-9A-F]{40})")
    IOTA = pattern("iota[a-z0-9]{10,}")
    LSK = pattern("[0-9]{19}L")
    LTC = pattern("[LM3][a-km-zA-HJ-NP-Z1-9]{26,33}")
    NEO = pattern("N[0-9a-zA-Z]{33}")
    ONE = pattern("(?:bnb|one)1[a-z0-9]{38}")
    ONT = pattern("A[0-9a-zA-Z]{33}")
    RONIN = pattern("ronin:([a-f0-9]{40}|[A-F0-9]{40})")
    RVN = pattern("R[1-9A-HJ-NP-Za-km-z]{33}")
    SOLANA = pattern("[1-9A-HJ-NP-Za-km-z]{32,44}")
    TERRA = pattern("terra1[a-z0-9]{38}")
    TON = pattern("[UE][Qf][0-9a-zA-Z_-]{46}")
    TRON = pattern("T[A-Za-z1-9]{33}")
    XEM = pattern("N[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}-[A-Za-z0-9]{4,7}")
    XLM = pattern("G[A-D][A-Z2-7]{54}")
    XMR = pattern("4[0-9AB][1-9A-HJ-NP-Za-km-z]{90,120}")
    XRP = pattern("r[0-9a-zA-Z]{24,34}|X[1-9A-HJ-NP-Za-km-z]{46}")
    ZCASH = pattern("(t[13][1-9A-HJ-NP-Za-km-z]{33}|z[cs][1-9A-HJ-NP-Za-km-z]{76,93}|u[1-9A-HJ-NP-Za-km-z]{100,1000})")

Ancestors

  • refinery.lib.patterns._PatternEnum
  • enum.Enum

Class variables

var ADA

The type of the None singleton.

var ATOM

The type of the None singleton.

var AVAX

The type of the None singleton.

var BCH

The type of the None singleton.

var BNB

The type of the None singleton.

var BTC

The type of the None singleton.

var BTCP

The type of the None singleton.

var DASH

The type of the None singleton.

var DOGE

The type of the None singleton.

var DOT

The type of the None singleton.

var ETH

The type of the None singleton.

var IOTA

The type of the None singleton.

var LSK

The type of the None singleton.

var LTC

The type of the None singleton.

var NEO

The type of the None singleton.

var ONE

The type of the None singleton.

var ONT

The type of the None singleton.

var RONIN

The type of the None singleton.

var RVN

The type of the None singleton.

var SOLANA

The type of the None singleton.

var TERRA

The type of the None singleton.

var TON

The type of the None singleton.

var TRON

The type of the None singleton.

var XEM

The type of the None singleton.

var XLM

The type of the None singleton.

var XMR

The type of the None singleton.

var XRP

The type of the None singleton.

var ZCASH

The type of the None singleton.

class indicators (*args, **kwds)

An enumeration of patterns for indicators.

Expand source code Browse git
class indicators(_PatternEnum):
    """
    An enumeration of patterns for indicators.
    """
    date = pattern(_pattern_date,
        description="date or timestamp value in a common format")
    domain = pattern(_pattern_serrated_domain,
        description="domain names")
    email = pattern(_pattern_email,
        description="email addresses")
    guid = pattern(_pattern_guid,
        description="Windows GUID")
    ipv4 = pattern(_pattern_serrated_ipv4,
        description="IPv4 address string")
    ipv6 = pattern(_pattern_ipv6,
        description="IPv6 address string")
    host = pattern(_pattern_serrated_host,
        description="domain or IPv4 optionally followed by colon and port")
    socket = pattern(_pattern_serrated_socket,
        description="domain or IPv4 followed by colon and port number")
    url = pattern(_pattern_serrated_url,
        description="uniform resource locator addresses")
    md5 = alphabet('[0-9A-Fa-f]', lower=32, upper=32,
        description="hex strings of length 32")
    sha1 = alphabet('[0-9A-Fa-f]', lower=40, upper=40,
        description="hex strings of length 40")
    sha256 = alphabet('[0-9A-Fa-f]', lower=64, upper=64,
        description="hex strings of length 64")
    subdomain = pattern(_pattern_subdomain,
        description="domain containing at least three parts including TLD")
    pem = pattern(_pattern_pem,
        description="PEM encoded cryptographic parameters")
    path = pattern(_pattern_any_path,
        description="any file path")
    nixpath = pattern(_pattern_nix_path,
        description="file paths (Linux)")
    winpath = pattern(_pattern_win_path,
        description="file paths (Windows)")
    tpath = pattern(_pattern_any_path_terse,
        description="file paths without whitespace")
    nixtpath = pattern(_pattern_nix_path_terse,
        description="tpath for Linux")
    wintpath = pattern(_pattern_win_path_terse,
        description="tpath for Windows")
    evar = pattern(_pattern_win_env_variable,
        description="Windows environment variable, i.e. %AppData%")

    hostname = host

    @classmethod
    def from_dashname(cls, key):
        return getattr(cls, normalize_to_identifier(key))

Ancestors

  • refinery.lib.patterns._PatternEnum
  • enum.Enum

Class variables

var date

The type of the None singleton.

var domain

The type of the None singleton.

var email

The type of the None singleton.

var guid

The type of the None singleton.

var ipv4

The type of the None singleton.

var ipv6

The type of the None singleton.

var host

The type of the None singleton.

var socket

The type of the None singleton.

var url

The type of the None singleton.

var md5

The type of the None singleton.

var sha1

The type of the None singleton.

var sha256

The type of the None singleton.

var subdomain

The type of the None singleton.

var pem

The type of the None singleton.

var path

The type of the None singleton.

var nixpath

The type of the None singleton.

var winpath

The type of the None singleton.

var tpath

The type of the None singleton.

var nixtpath

The type of the None singleton.

var wintpath

The type of the None singleton.

var evar

The type of the None singleton.

var hostname

The type of the None singleton.

Static methods

def from_dashname(key)
class defanged (*args, **kwds)

An enumeration of patterns for defanged indicators. Used only by the reverse operation of defang.

Expand source code Browse git
class defanged(_PatternEnum):
    """
    An enumeration of patterns for defanged indicators. Used only by the reverse
    operation of `refinery.defang`.
    """
    host = pattern(_pattern_defanged_host)
    "A defanged `refinery.lib.patterns.indicators.host`."
    url = pattern(_pattern_defanged_url)
    "A defanged `refinery.lib.patterns.indicators.url`."

Ancestors

  • refinery.lib.patterns._PatternEnum
  • enum.Enum

Class variables

var host

A defanged indicators.host.

var url

A defanged indicators.url.