Module refinery.lib.scripts.ps1.lexer
Expand source code Browse git
from __future__ import annotations
import enum
import re
from dataclasses import dataclass, field
from typing import Generator
from refinery.lib.scripts.ps1.token import _KEYWORDS, Ps1Token, Ps1TokenKind
class Ps1LexerMode(enum.Enum):
EXPRESSION = 'expression'
ARGUMENT = 'argument'
BACKTICK_ESCAPE = {
'0' : '\0',
'a' : '\a',
'b' : '\b',
'e' : '\x1b',
'f' : '\f',
'n' : '\n',
'r' : '\r',
't' : '\t',
'v' : '\v',
}
SINGLE_QUOTES = frozenset("'\u2018\u2019\u201A\u201B")
DOUBLE_QUOTES = frozenset('"\u201C\u201D\u201E')
DASHES = frozenset('-\u2013\u2014\u2015')
WHITESPACE = frozenset(' \t\u00A0\u0085')
NORMALIZE_QUOTES = str.maketrans({
'\u2018': "'",
'\u2019': "'",
'\u201A': "'",
'\u201B': "'",
'\u201C': '"',
'\u201D': '"',
'\u201E': '"',
})
_TWO_CHAR_OPS: dict[str, Ps1TokenKind] = {
'+=' : Ps1TokenKind.PLUS_ASSIGN,
'-=' : Ps1TokenKind.DASH_ASSIGN,
'*=' : Ps1TokenKind.STAR_ASSIGN,
'/=' : Ps1TokenKind.SLASH_ASSIGN,
'%=' : Ps1TokenKind.PERCENT_ASSIGN,
'++' : Ps1TokenKind.INCREMENT,
'--' : Ps1TokenKind.DECREMENT,
'..' : Ps1TokenKind.DOTDOT,
'::' : Ps1TokenKind.DOUBLE_COLON,
'&&' : Ps1TokenKind.DOUBLE_AMPERSAND,
'||' : Ps1TokenKind.DOUBLE_PIPE,
'@(' : Ps1TokenKind.AT_LPAREN,
'@{' : Ps1TokenKind.AT_LBRACE,
'$(' : Ps1TokenKind.DOLLAR_LPAREN,
}
_ONE_CHAR_OPS: dict[str, Ps1TokenKind] = {
'+' : Ps1TokenKind.PLUS,
'-' : Ps1TokenKind.DASH,
'*' : Ps1TokenKind.STAR,
'/' : Ps1TokenKind.SLASH,
'%' : Ps1TokenKind.PERCENT,
'.' : Ps1TokenKind.DOT,
',' : Ps1TokenKind.COMMA,
';' : Ps1TokenKind.SEMICOLON,
'!' : Ps1TokenKind.EXCLAIM,
'(' : Ps1TokenKind.LPAREN,
')' : Ps1TokenKind.RPAREN,
'{' : Ps1TokenKind.LBRACE,
'}' : Ps1TokenKind.RBRACE,
'[' : Ps1TokenKind.LBRACKET,
']' : Ps1TokenKind.RBRACKET,
'|' : Ps1TokenKind.PIPE,
'&' : Ps1TokenKind.AMPERSAND,
'=' : Ps1TokenKind.EQUALS,
}
_DASH_OPERATORS: dict[str, str] = {
_name: F'-{_name}' for _name in (
'and',
'as',
'band',
'bnot',
'bor',
'bxor',
'ccontains',
'ceq',
'cge',
'cgt',
'cin',
'cle',
'clike',
'clt',
'cmatch',
'cne',
'cnotcontains',
'cnotin',
'cnotlike',
'cnotmatch',
'contains',
'creplace',
'csplit',
'eq',
'f',
'ge',
'gt',
'icontains',
'ieq',
'ige',
'igt',
'iin',
'ile',
'ilike',
'ilt',
'imatch',
'in',
'ine',
'inotcontains',
'inotin',
'inotlike',
'inotmatch',
'ireplace',
'is',
'isnot',
'isplit',
'join',
'le',
'like',
'lt',
'match',
'ne',
'not',
'notcontains',
'notin',
'notlike',
'notmatch',
'or',
'replace',
'shl',
'shr',
'split',
'xor',
)
}
_FORCE_START_NEW_TOKEN = frozenset(' \t\r\n|&;,{}()')
_FORCE_NEW_TOKEN_AFTER_NUMBER = frozenset('!#%*+-./<=>]')
_VARIABLE_STOPS_NO_RESCAN = frozenset('.[=')
_REDIRECTION_PATTERN = re.compile(
r'[1-6*](?:>>|>&[12]|>)' # explicit stream: 2>&1, 2>>, 2>
r'|>>|>&1|>' # bare: >>, >&1, >
r'|<', # input
)
_INTEGER_PATTERN = re.compile(
r'0[xX][0-9a-fA-F][0-9a-fA-F_]*(?:l|L)?'
r'|0[bB][01][01_]*(?:l|L)?'
r'|[0-9][0-9_]*(?:l|L)?',
)
_REAL_PATTERN = re.compile(
r'(?:'
r'(?:[0-9]*\.[0-9]+|[0-9]+\.)(?:[eE][+-]?[0-9]+)?'
r'|[0-9]+[eE][+-]?[0-9]+'
r')(?:[dD]|[kKmMgGtTpP][bB])?'
r'|[0-9]+(?:\.[0-9]+)?(?:[dD]|[kKmMgGtTpP][bB])',
)
_VARIABLE_PATTERN = re.compile(
r'(?:[a-zA-Z0-9_]+:(?!:))?'
r'(?:\{[^}]+\}|[a-zA-Z0-9_][a-zA-Z0-9_?]*)'
r'|[$?^]',
re.IGNORECASE,
)
_PARAMETER_TERMINATORS = frozenset(' \t\r\n{}();,|&.[')
@dataclass
class Ps1Lexer:
source: str
pos: int = 0
mode_stack: list[Ps1LexerMode] = field(default_factory=lambda: [Ps1LexerMode.EXPRESSION])
@property
def mode(self) -> Ps1LexerMode:
return self.mode_stack[-1]
@mode.setter
def mode(self, value: Ps1LexerMode):
self.mode_stack[-1] = value
def push_mode(self, mode: Ps1LexerMode):
self.mode_stack.append(mode)
def pop_mode(self):
if len(self.mode_stack) > 1:
self.mode_stack.pop()
def _peek(self, count: int = 1) -> str:
return self.source[self.pos:self.pos + count]
def _at_end(self) -> bool:
return self.pos >= len(self.source)
def _skip_whitespace(self) -> bool:
start = self.pos
src = self.source
length = len(src)
while self.pos < length:
c = src[self.pos]
if c in WHITESPACE:
self.pos += 1
elif c == '`' and self.pos + 1 < length and src[self.pos + 1] == '\n':
self.pos += 2
elif c == '`' and self.pos + 2 < length and src[self.pos + 1:self.pos + 3] == '\r\n':
self.pos += 3
else:
break
return self.pos > start
def _read_line_comment(self) -> str:
start = self.pos
src = self.source
length = len(src)
while self.pos < length and src[self.pos] != '\n':
self.pos += 1
return src[start:self.pos]
def _read_block_comment(self) -> str:
start = self.pos
src = self.source
length = len(src)
self.pos += 2
while self.pos < length - 1:
if src[self.pos] == '#' and src[self.pos + 1] == '>':
self.pos += 2
return src[start:self.pos]
self.pos += 1
self.pos = length
return src[start:self.pos]
def _read_verbatim_string(self) -> str:
start = self.pos
src = self.source
length = len(src)
self.pos += 1
while self.pos < length:
c = src[self.pos]
if c in SINGLE_QUOTES:
self.pos += 1
if self.pos < length and src[self.pos] in SINGLE_QUOTES:
self.pos += 1
continue
return src[start:self.pos]
self.pos += 1
return src[start:self.pos]
def _skip_subexpression_content(self):
src = self.source
length = len(src)
depth = 1
while self.pos < length:
c = src[self.pos]
if c == '`' and self.pos + 1 < length:
self.pos += 2
continue
if c == '(':
depth += 1
self.pos += 1
continue
if c == ')':
depth -= 1
self.pos += 1
if depth == 0:
return
continue
if c == '@' and self.pos + 1 < length:
nc = src[self.pos + 1]
if (nc in SINGLE_QUOTES or nc in DOUBLE_QUOTES) and self.pos + 2 < length and src[self.pos + 2] in '\r\n':
quote_set = SINGLE_QUOTES if nc in SINGLE_QUOTES else DOUBLE_QUOTES
self.pos += 2
if self.pos < length and src[self.pos] == '\r':
self.pos += 1
if self.pos < length and src[self.pos] == '\n':
self.pos += 1
while self.pos < length:
if src[self.pos] in '\r\n':
if src[self.pos] == '\r' and self.pos + 1 < length and src[self.pos + 1] == '\n':
self.pos += 2
else:
self.pos += 1
if self.pos + 1 < length and src[self.pos] in quote_set and src[self.pos + 1] == '@':
self.pos += 2
break
else:
self.pos += 1
continue
if c in SINGLE_QUOTES:
self.pos += 1
while self.pos < length:
if src[self.pos] in SINGLE_QUOTES:
self.pos += 1
if self.pos < length and src[self.pos] in SINGLE_QUOTES:
self.pos += 1
continue
break
self.pos += 1
continue
if c in DOUBLE_QUOTES:
self.pos += 1
while self.pos < length:
sc = src[self.pos]
if sc == '`' and self.pos + 1 < length:
self.pos += 2
continue
if sc == '$' and self.pos + 1 < length and src[self.pos + 1] == '(':
self.pos += 2
self._skip_subexpression_content()
continue
if sc in DOUBLE_QUOTES:
self.pos += 1
if self.pos < length and src[self.pos] in DOUBLE_QUOTES:
self.pos += 1
continue
break
self.pos += 1
continue
self.pos += 1
def _read_expandable_string(self) -> str:
start = self.pos
src = self.source
length = len(src)
self.pos += 1
while self.pos < length:
c = src[self.pos]
if c == '`' and self.pos + 1 < length:
self.pos += 2
continue
if c == '$' and self.pos + 1 < length and src[self.pos + 1] == '(':
self.pos += 2
self._skip_subexpression_content()
continue
if c in DOUBLE_QUOTES:
self.pos += 1
if self.pos < length and src[self.pos] in DOUBLE_QUOTES:
self.pos += 1
continue
return src[start:self.pos]
self.pos += 1
return src[start:self.pos]
def _read_verbatim_here_string(self) -> str:
start = self.pos
src = self.source
length = len(src)
self.pos += 2
if self.pos < length and src[self.pos] == '\r':
self.pos += 1
if self.pos < length and src[self.pos] == '\n':
self.pos += 1
while self.pos < length:
if src[self.pos] in '\r\n':
if src[self.pos] == '\r' and self.pos + 1 < length and src[self.pos + 1] == '\n':
nl_end = self.pos + 2
else:
nl_end = self.pos + 1
if nl_end + 1 < length and src[nl_end] in SINGLE_QUOTES and src[nl_end + 1] == '@':
self.pos = nl_end + 2
return src[start:self.pos]
self.pos += 1
return src[start:self.pos]
def _read_expandable_here_string(self) -> str:
start = self.pos
src = self.source
length = len(src)
self.pos += 2
if self.pos < length and src[self.pos] == '\r':
self.pos += 1
if self.pos < length and src[self.pos] == '\n':
self.pos += 1
while self.pos < length:
c = src[self.pos]
if c in '\r\n':
if c == '\r' and self.pos + 1 < length and src[self.pos + 1] == '\n':
nl_end = self.pos + 2
else:
nl_end = self.pos + 1
if nl_end + 1 < length and src[nl_end] in DOUBLE_QUOTES and src[nl_end + 1] == '@':
self.pos = nl_end + 2
return src[start:self.pos]
if c == '`' and self.pos + 1 < length:
self.pos += 2
continue
if c == '$' and self.pos + 1 < length and src[self.pos + 1] == '(':
self.pos += 2
self._skip_subexpression_content()
continue
self.pos += 1
return src[start:self.pos]
def _read_variable(self, prefix: str) -> Ps1Token:
start = self.pos
self.pos += len(prefix)
m = _VARIABLE_PATTERN.match(self.source, self.pos)
if m:
self.pos = m.end()
kind = Ps1TokenKind.SPLAT_VARIABLE if prefix == '@' else Ps1TokenKind.VARIABLE
return Ps1Token(kind, self.source[start:self.pos], start)
def _read_number(self) -> Ps1Token | None:
src = self.source
m = _REAL_PATTERN.match(src, self.pos)
if m:
text = m.group()
end = m.end()
if text.endswith('.') and end < len(src):
nc = src[end]
if nc == '.' or nc.isalpha() or nc in '_$@{':
text = text[:-1]
end -= 1
if text and text.replace('_', '').isdigit():
start = self.pos
self.pos = end
return Ps1Token(Ps1TokenKind.INTEGER, text, start)
start = self.pos
self.pos = end
return Ps1Token(Ps1TokenKind.REAL, text, start)
m = _INTEGER_PATTERN.match(src, self.pos)
if m:
start = self.pos
self.pos = m.end()
if (
self.pos + 1 < len(src)
and src[self.pos].lower() in 'kmgtp'
and src[self.pos + 1].lower() == 'b'
):
text = src[start:self.pos + 2]
self.pos += 2
return Ps1Token(Ps1TokenKind.REAL, text, start)
return Ps1Token(Ps1TokenKind.INTEGER, m.group(), start)
return None
def _try_dash_operator(self) -> Ps1Token | None:
src = self.source
start = self.pos
self.pos += 1
m = re.match(r'[a-zA-Z]+', src[self.pos:])
if not m:
self.pos = start
return None
word = m.group().lower()
if word in _DASH_OPERATORS:
self.pos += m.end()
return Ps1Token(Ps1TokenKind.OPERATOR, _DASH_OPERATORS[word], start)
self.pos = start
return None
def _try_parameter(self) -> Ps1Token | None:
src = self.source
length = len(src)
start = self.pos
self.pos += 1
if self.pos >= length:
self.pos = start
return None
c = src[self.pos]
if not (c.isalpha() or c == '_' or c == '?'):
self.pos = start
return None
self.pos += 1
while self.pos < length:
c = src[self.pos]
if c in _PARAMETER_TERMINATORS or c.isspace():
break
if c in SINGLE_QUOTES or c in DOUBLE_QUOTES:
self.pos = start
return self._read_generic_token()
if c == ':':
self.pos += 1
break
self.pos += 1
return Ps1Token(Ps1TokenKind.PARAMETER, src[start:self.pos], start)
def _try_redirection(self) -> Ps1Token | None:
m = _REDIRECTION_PATTERN.match(self.source, self.pos)
if m:
start = self.pos
self.pos = m.end()
return Ps1Token(Ps1TokenKind.REDIRECTION, m.group(), start)
return None
def _read_generic_token(self) -> Ps1Token:
start = self.pos
src = self.source
length = len(src)
has_expansion = False
while self.pos < length:
c = src[self.pos]
if c == '`' and self.pos + 1 < length:
self.pos += 2
continue
if c in SINGLE_QUOTES:
self._read_verbatim_string()
continue
if c in DOUBLE_QUOTES:
self._read_expandable_string()
continue
if c == '$' and self.pos + 1 < length:
nc = src[self.pos + 1]
if nc == '(':
has_expansion = True
self.pos += 2
self._skip_subexpression_content()
continue
if nc.isalnum() or nc in '_?{$^':
m = _VARIABLE_PATTERN.match(src, self.pos + 1)
if m:
has_expansion = True
self.pos = m.end()
continue
self.pos += 1
continue
if c in _FORCE_START_NEW_TOKEN:
break
self.pos += 1
kind = Ps1TokenKind.GENERIC_EXPAND if has_expansion else Ps1TokenKind.GENERIC_TOKEN
return Ps1Token(kind, src[start:self.pos], start)
def tokenize(self) -> Generator[Ps1Token, Ps1LexerMode | None, None]:
src = self.source
length = len(src)
while True:
self._skip_whitespace()
if self._at_end():
yield Ps1Token(Ps1TokenKind.EOF, '', self.pos)
return
start = self.pos
c = src[self.pos]
c2 = src[self.pos:self.pos + 2]
if len(c2) == 2:
d0 = '-' if c2[0] in DASHES else c2[0]
d1 = '-' if c2[1] in DASHES else c2[1]
c2 = d0 + d1
if c == '\r' and self.pos + 1 < length and src[self.pos + 1] == '\n':
self.pos += 2
mode_hint = yield Ps1Token(Ps1TokenKind.NEWLINE, '\r\n', start)
if mode_hint is not None:
self.mode = mode_hint
continue
if c == '\n':
self.pos += 1
mode_hint = yield Ps1Token(Ps1TokenKind.NEWLINE, '\n', start)
if mode_hint is not None:
self.mode = mode_hint
continue
if c2 == '<#':
self._read_block_comment()
continue
if c == '#':
self._read_line_comment()
continue
if c == '@' and self.pos + 1 < length:
nc = src[self.pos + 1]
if nc in SINGLE_QUOTES:
text = self._read_verbatim_here_string()
mode_hint = yield Ps1Token(Ps1TokenKind.HSTRING_VERBATIM, text, start)
if mode_hint is not None:
self.mode = mode_hint
continue
if nc in DOUBLE_QUOTES:
text = self._read_expandable_here_string()
mode_hint = yield Ps1Token(Ps1TokenKind.HSTRING_EXPAND, text, start)
if mode_hint is not None:
self.mode = mode_hint
continue
if c2 in ('..', '--', '++', '::', '+=', '-=', '*=', '/=', '%=') and self.mode == Ps1LexerMode.ARGUMENT:
after = self.pos + 2
if after < length and src[after] not in ' \t\r\n|&;,{}()':
token = self._read_generic_token()
if token.value:
mode_hint = yield token
if mode_hint is not None:
self.mode = mode_hint
continue
if c2 in _TWO_CHAR_OPS:
self.pos += 2
kind = _TWO_CHAR_OPS[c2]
mode_hint = yield Ps1Token(kind, c2, start)
if mode_hint is not None:
self.mode = mode_hint
continue
if c == ':' and self.pos + 1 < length and (src[self.pos + 1].isalpha() or src[self.pos + 1] == '_'):
self.pos += 1
while self.pos < length and (src[self.pos].isalnum() or src[self.pos] == '_'):
self.pos += 1
mode_hint = yield Ps1Token(Ps1TokenKind.LABEL, src[start:self.pos], start)
if mode_hint is not None:
self.mode = mode_hint
continue
if c == '$' or (c == '@' and self.pos + 1 < length and src[self.pos + 1] not in '({'):
nc = src[self.pos + 1] if self.pos + 1 < length else ''
if c == '$' and nc == '(':
pass
elif nc and (nc.isalnum() or nc in '_?{$^'):
token = self._read_variable(c)
if self.mode == Ps1LexerMode.ARGUMENT and self.pos < length:
fc = src[self.pos]
if fc not in _FORCE_START_NEW_TOKEN and fc not in _VARIABLE_STOPS_NO_RESCAN:
self.pos = start
token = self._read_generic_token()
mode_hint = yield token
if mode_hint is not None:
self.mode = mode_hint
continue
if c in SINGLE_QUOTES:
text = self._read_verbatim_string()
mode_hint = yield Ps1Token(Ps1TokenKind.STRING_VERBATIM, text, start)
if mode_hint is not None:
self.mode = mode_hint
continue
if c in DOUBLE_QUOTES:
text = self._read_expandable_string()
mode_hint = yield Ps1Token(Ps1TokenKind.STRING_EXPAND, text, start)
if mode_hint is not None:
self.mode = mode_hint
continue
if c in '123456' and self.pos + 1 < length and src[self.pos + 1] == '>':
if self.mode != Ps1LexerMode.EXPRESSION:
redir = self._try_redirection()
if redir:
mode_hint = yield redir
if mode_hint is not None:
self.mode = mode_hint
continue
if c.isdigit() or (c == '.' and self.pos + 1 < length and src[self.pos + 1].isdigit()):
token = self._read_number()
if token:
nc = src[self.pos] if self.pos < length else None
if nc is not None and nc not in _FORCE_START_NEW_TOKEN and not nc.isspace():
if self.mode == Ps1LexerMode.ARGUMENT or (
nc not in _FORCE_NEW_TOKEN_AFTER_NUMBER
):
self.pos = start
token = self._read_generic_token()
mode_hint = yield token
if mode_hint is not None:
self.mode = mode_hint
continue
if c in DASHES:
if self.mode == Ps1LexerMode.EXPRESSION:
op = self._try_dash_operator()
if op:
mode_hint = yield op
if mode_hint is not None:
self.mode = mode_hint
continue
elif self.mode == Ps1LexerMode.ARGUMENT:
param = self._try_parameter()
if param:
mode_hint = yield param
if mode_hint is not None:
self.mode = mode_hint
continue
redir = self._try_redirection()
if redir:
mode_hint = yield redir
if mode_hint is not None:
self.mode = mode_hint
continue
if self.mode == Ps1LexerMode.ARGUMENT:
if c == '.' and self.pos + 1 < length:
nc = src[self.pos + 1]
if nc not in ' \t\r\n|&;,{}()$' and nc not in SINGLE_QUOTES and nc not in DOUBLE_QUOTES:
token = self._read_generic_token()
if token.value:
mode_hint = yield token
if mode_hint is not None:
self.mode = mode_hint
continue
if self.mode == Ps1LexerMode.ARGUMENT and c in '*/%=!+':
if self.pos + 1 < length and src[self.pos + 1] not in ' \t\r\n|&;,{}()':
token = self._read_generic_token()
if token.value:
mode_hint = yield token
if mode_hint is not None:
self.mode = mode_hint
continue
if c in _ONE_CHAR_OPS or c in DASHES:
self.pos += 1
kind = _ONE_CHAR_OPS.get(c) or Ps1TokenKind.DASH
mode_hint = yield Ps1Token(kind, c, start)
if mode_hint is not None:
self.mode = mode_hint
continue
if self.mode == Ps1LexerMode.ARGUMENT:
if c.isalpha() or c == '_' or c == '\\' or c == '`':
token = self._read_generic_token()
if token.value:
word = token.value.lower()
kw = _KEYWORDS.get(word)
if kw is not None:
mode_hint = yield Ps1Token(kw, token.value, token.offset)
else:
mode_hint = yield token
if mode_hint is not None:
self.mode = mode_hint
continue
if c.isalpha() or c == '_' or c == '`':
word = []
if c == '`' and self.pos + 1 < length:
self.pos += 1
word.append(src[self.pos])
self.pos += 1
else:
word.append(c)
self.pos += 1
while self.pos < length:
ch = src[self.pos]
if ch == '`' and self.pos + 1 < length:
self.pos += 1
word.append(src[self.pos])
self.pos += 1
elif ch.isalnum() or ch == '_':
word.append(ch)
self.pos += 1
elif ch in DASHES and self.mode != Ps1LexerMode.EXPRESSION:
word.append(ch)
self.pos += 1
else:
break
if self.pos < length and src[self.pos] not in _FORCE_START_NEW_TOKEN:
if self.mode == Ps1LexerMode.ARGUMENT or (
src[self.pos] in SINGLE_QUOTES
or src[self.pos] in DOUBLE_QUOTES
or src[self.pos] == '$'
):
self.pos = start
token = self._read_generic_token()
if token.value:
mode_hint = yield token
if mode_hint is not None:
self.mode = mode_hint
continue
identifier = ''.join(word)
if identifier:
kw = _KEYWORDS.get(identifier.lower())
if kw is not None:
mode_hint = yield Ps1Token(kw, identifier, start)
else:
mode_hint = yield Ps1Token(Ps1TokenKind.GENERIC_TOKEN, identifier, start)
if mode_hint is not None:
self.mode = mode_hint
continue
self.pos += 1
mode_hint = yield Ps1Token(Ps1TokenKind.GENERIC_TOKEN, c, start)
if mode_hint is not None:
self.mode = mode_hint
Classes
class Ps1LexerMode (*args, **kwds)-
Create a collection of name/value pairs.
Example enumeration:
>>> class Color(Enum): ... RED = 1 ... BLUE = 2 ... GREEN = 3Access them by:
- attribute access:
Color.RED
- value lookup:
Color(1)
- name lookup:
Color['RED']
Enumerations can be iterated over, and know how many members they have:
>>> len(Color) 3>>> list(Color) [<Color.RED: 1>, <Color.BLUE: 2>, <Color.GREEN: 3>]Methods can be added to enumerations, and members can have their own attributes – see the documentation for details.
Expand source code Browse git
class Ps1LexerMode(enum.Enum): EXPRESSION = 'expression' ARGUMENT = 'argument'Ancestors
- enum.Enum
Class variables
var EXPRESSION-
The type of the None singleton.
var ARGUMENT-
The type of the None singleton.
class Ps1Lexer (source, pos=0, mode_stack=<factory>)-
Ps1Lexer(source: 'str', pos: 'int' = 0, mode_stack: 'list[Ps1LexerMode]' =
) Expand source code Browse git
@dataclass class Ps1Lexer: source: str pos: int = 0 mode_stack: list[Ps1LexerMode] = field(default_factory=lambda: [Ps1LexerMode.EXPRESSION]) @property def mode(self) -> Ps1LexerMode: return self.mode_stack[-1] @mode.setter def mode(self, value: Ps1LexerMode): self.mode_stack[-1] = value def push_mode(self, mode: Ps1LexerMode): self.mode_stack.append(mode) def pop_mode(self): if len(self.mode_stack) > 1: self.mode_stack.pop() def _peek(self, count: int = 1) -> str: return self.source[self.pos:self.pos + count] def _at_end(self) -> bool: return self.pos >= len(self.source) def _skip_whitespace(self) -> bool: start = self.pos src = self.source length = len(src) while self.pos < length: c = src[self.pos] if c in WHITESPACE: self.pos += 1 elif c == '`' and self.pos + 1 < length and src[self.pos + 1] == '\n': self.pos += 2 elif c == '`' and self.pos + 2 < length and src[self.pos + 1:self.pos + 3] == '\r\n': self.pos += 3 else: break return self.pos > start def _read_line_comment(self) -> str: start = self.pos src = self.source length = len(src) while self.pos < length and src[self.pos] != '\n': self.pos += 1 return src[start:self.pos] def _read_block_comment(self) -> str: start = self.pos src = self.source length = len(src) self.pos += 2 while self.pos < length - 1: if src[self.pos] == '#' and src[self.pos + 1] == '>': self.pos += 2 return src[start:self.pos] self.pos += 1 self.pos = length return src[start:self.pos] def _read_verbatim_string(self) -> str: start = self.pos src = self.source length = len(src) self.pos += 1 while self.pos < length: c = src[self.pos] if c in SINGLE_QUOTES: self.pos += 1 if self.pos < length and src[self.pos] in SINGLE_QUOTES: self.pos += 1 continue return src[start:self.pos] self.pos += 1 return src[start:self.pos] def _skip_subexpression_content(self): src = self.source length = len(src) depth = 1 while self.pos < length: c = src[self.pos] if c == '`' and self.pos + 1 < length: self.pos += 2 continue if c == '(': depth += 1 self.pos += 1 continue if c == ')': depth -= 1 self.pos += 1 if depth == 0: return continue if c == '@' and self.pos + 1 < length: nc = src[self.pos + 1] if (nc in SINGLE_QUOTES or nc in DOUBLE_QUOTES) and self.pos + 2 < length and src[self.pos + 2] in '\r\n': quote_set = SINGLE_QUOTES if nc in SINGLE_QUOTES else DOUBLE_QUOTES self.pos += 2 if self.pos < length and src[self.pos] == '\r': self.pos += 1 if self.pos < length and src[self.pos] == '\n': self.pos += 1 while self.pos < length: if src[self.pos] in '\r\n': if src[self.pos] == '\r' and self.pos + 1 < length and src[self.pos + 1] == '\n': self.pos += 2 else: self.pos += 1 if self.pos + 1 < length and src[self.pos] in quote_set and src[self.pos + 1] == '@': self.pos += 2 break else: self.pos += 1 continue if c in SINGLE_QUOTES: self.pos += 1 while self.pos < length: if src[self.pos] in SINGLE_QUOTES: self.pos += 1 if self.pos < length and src[self.pos] in SINGLE_QUOTES: self.pos += 1 continue break self.pos += 1 continue if c in DOUBLE_QUOTES: self.pos += 1 while self.pos < length: sc = src[self.pos] if sc == '`' and self.pos + 1 < length: self.pos += 2 continue if sc == '$' and self.pos + 1 < length and src[self.pos + 1] == '(': self.pos += 2 self._skip_subexpression_content() continue if sc in DOUBLE_QUOTES: self.pos += 1 if self.pos < length and src[self.pos] in DOUBLE_QUOTES: self.pos += 1 continue break self.pos += 1 continue self.pos += 1 def _read_expandable_string(self) -> str: start = self.pos src = self.source length = len(src) self.pos += 1 while self.pos < length: c = src[self.pos] if c == '`' and self.pos + 1 < length: self.pos += 2 continue if c == '$' and self.pos + 1 < length and src[self.pos + 1] == '(': self.pos += 2 self._skip_subexpression_content() continue if c in DOUBLE_QUOTES: self.pos += 1 if self.pos < length and src[self.pos] in DOUBLE_QUOTES: self.pos += 1 continue return src[start:self.pos] self.pos += 1 return src[start:self.pos] def _read_verbatim_here_string(self) -> str: start = self.pos src = self.source length = len(src) self.pos += 2 if self.pos < length and src[self.pos] == '\r': self.pos += 1 if self.pos < length and src[self.pos] == '\n': self.pos += 1 while self.pos < length: if src[self.pos] in '\r\n': if src[self.pos] == '\r' and self.pos + 1 < length and src[self.pos + 1] == '\n': nl_end = self.pos + 2 else: nl_end = self.pos + 1 if nl_end + 1 < length and src[nl_end] in SINGLE_QUOTES and src[nl_end + 1] == '@': self.pos = nl_end + 2 return src[start:self.pos] self.pos += 1 return src[start:self.pos] def _read_expandable_here_string(self) -> str: start = self.pos src = self.source length = len(src) self.pos += 2 if self.pos < length and src[self.pos] == '\r': self.pos += 1 if self.pos < length and src[self.pos] == '\n': self.pos += 1 while self.pos < length: c = src[self.pos] if c in '\r\n': if c == '\r' and self.pos + 1 < length and src[self.pos + 1] == '\n': nl_end = self.pos + 2 else: nl_end = self.pos + 1 if nl_end + 1 < length and src[nl_end] in DOUBLE_QUOTES and src[nl_end + 1] == '@': self.pos = nl_end + 2 return src[start:self.pos] if c == '`' and self.pos + 1 < length: self.pos += 2 continue if c == '$' and self.pos + 1 < length and src[self.pos + 1] == '(': self.pos += 2 self._skip_subexpression_content() continue self.pos += 1 return src[start:self.pos] def _read_variable(self, prefix: str) -> Ps1Token: start = self.pos self.pos += len(prefix) m = _VARIABLE_PATTERN.match(self.source, self.pos) if m: self.pos = m.end() kind = Ps1TokenKind.SPLAT_VARIABLE if prefix == '@' else Ps1TokenKind.VARIABLE return Ps1Token(kind, self.source[start:self.pos], start) def _read_number(self) -> Ps1Token | None: src = self.source m = _REAL_PATTERN.match(src, self.pos) if m: text = m.group() end = m.end() if text.endswith('.') and end < len(src): nc = src[end] if nc == '.' or nc.isalpha() or nc in '_$@{': text = text[:-1] end -= 1 if text and text.replace('_', '').isdigit(): start = self.pos self.pos = end return Ps1Token(Ps1TokenKind.INTEGER, text, start) start = self.pos self.pos = end return Ps1Token(Ps1TokenKind.REAL, text, start) m = _INTEGER_PATTERN.match(src, self.pos) if m: start = self.pos self.pos = m.end() if ( self.pos + 1 < len(src) and src[self.pos].lower() in 'kmgtp' and src[self.pos + 1].lower() == 'b' ): text = src[start:self.pos + 2] self.pos += 2 return Ps1Token(Ps1TokenKind.REAL, text, start) return Ps1Token(Ps1TokenKind.INTEGER, m.group(), start) return None def _try_dash_operator(self) -> Ps1Token | None: src = self.source start = self.pos self.pos += 1 m = re.match(r'[a-zA-Z]+', src[self.pos:]) if not m: self.pos = start return None word = m.group().lower() if word in _DASH_OPERATORS: self.pos += m.end() return Ps1Token(Ps1TokenKind.OPERATOR, _DASH_OPERATORS[word], start) self.pos = start return None def _try_parameter(self) -> Ps1Token | None: src = self.source length = len(src) start = self.pos self.pos += 1 if self.pos >= length: self.pos = start return None c = src[self.pos] if not (c.isalpha() or c == '_' or c == '?'): self.pos = start return None self.pos += 1 while self.pos < length: c = src[self.pos] if c in _PARAMETER_TERMINATORS or c.isspace(): break if c in SINGLE_QUOTES or c in DOUBLE_QUOTES: self.pos = start return self._read_generic_token() if c == ':': self.pos += 1 break self.pos += 1 return Ps1Token(Ps1TokenKind.PARAMETER, src[start:self.pos], start) def _try_redirection(self) -> Ps1Token | None: m = _REDIRECTION_PATTERN.match(self.source, self.pos) if m: start = self.pos self.pos = m.end() return Ps1Token(Ps1TokenKind.REDIRECTION, m.group(), start) return None def _read_generic_token(self) -> Ps1Token: start = self.pos src = self.source length = len(src) has_expansion = False while self.pos < length: c = src[self.pos] if c == '`' and self.pos + 1 < length: self.pos += 2 continue if c in SINGLE_QUOTES: self._read_verbatim_string() continue if c in DOUBLE_QUOTES: self._read_expandable_string() continue if c == '$' and self.pos + 1 < length: nc = src[self.pos + 1] if nc == '(': has_expansion = True self.pos += 2 self._skip_subexpression_content() continue if nc.isalnum() or nc in '_?{$^': m = _VARIABLE_PATTERN.match(src, self.pos + 1) if m: has_expansion = True self.pos = m.end() continue self.pos += 1 continue if c in _FORCE_START_NEW_TOKEN: break self.pos += 1 kind = Ps1TokenKind.GENERIC_EXPAND if has_expansion else Ps1TokenKind.GENERIC_TOKEN return Ps1Token(kind, src[start:self.pos], start) def tokenize(self) -> Generator[Ps1Token, Ps1LexerMode | None, None]: src = self.source length = len(src) while True: self._skip_whitespace() if self._at_end(): yield Ps1Token(Ps1TokenKind.EOF, '', self.pos) return start = self.pos c = src[self.pos] c2 = src[self.pos:self.pos + 2] if len(c2) == 2: d0 = '-' if c2[0] in DASHES else c2[0] d1 = '-' if c2[1] in DASHES else c2[1] c2 = d0 + d1 if c == '\r' and self.pos + 1 < length and src[self.pos + 1] == '\n': self.pos += 2 mode_hint = yield Ps1Token(Ps1TokenKind.NEWLINE, '\r\n', start) if mode_hint is not None: self.mode = mode_hint continue if c == '\n': self.pos += 1 mode_hint = yield Ps1Token(Ps1TokenKind.NEWLINE, '\n', start) if mode_hint is not None: self.mode = mode_hint continue if c2 == '<#': self._read_block_comment() continue if c == '#': self._read_line_comment() continue if c == '@' and self.pos + 1 < length: nc = src[self.pos + 1] if nc in SINGLE_QUOTES: text = self._read_verbatim_here_string() mode_hint = yield Ps1Token(Ps1TokenKind.HSTRING_VERBATIM, text, start) if mode_hint is not None: self.mode = mode_hint continue if nc in DOUBLE_QUOTES: text = self._read_expandable_here_string() mode_hint = yield Ps1Token(Ps1TokenKind.HSTRING_EXPAND, text, start) if mode_hint is not None: self.mode = mode_hint continue if c2 in ('..', '--', '++', '::', '+=', '-=', '*=', '/=', '%=') and self.mode == Ps1LexerMode.ARGUMENT: after = self.pos + 2 if after < length and src[after] not in ' \t\r\n|&;,{}()': token = self._read_generic_token() if token.value: mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if c2 in _TWO_CHAR_OPS: self.pos += 2 kind = _TWO_CHAR_OPS[c2] mode_hint = yield Ps1Token(kind, c2, start) if mode_hint is not None: self.mode = mode_hint continue if c == ':' and self.pos + 1 < length and (src[self.pos + 1].isalpha() or src[self.pos + 1] == '_'): self.pos += 1 while self.pos < length and (src[self.pos].isalnum() or src[self.pos] == '_'): self.pos += 1 mode_hint = yield Ps1Token(Ps1TokenKind.LABEL, src[start:self.pos], start) if mode_hint is not None: self.mode = mode_hint continue if c == '$' or (c == '@' and self.pos + 1 < length and src[self.pos + 1] not in '({'): nc = src[self.pos + 1] if self.pos + 1 < length else '' if c == '$' and nc == '(': pass elif nc and (nc.isalnum() or nc in '_?{$^'): token = self._read_variable(c) if self.mode == Ps1LexerMode.ARGUMENT and self.pos < length: fc = src[self.pos] if fc not in _FORCE_START_NEW_TOKEN and fc not in _VARIABLE_STOPS_NO_RESCAN: self.pos = start token = self._read_generic_token() mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if c in SINGLE_QUOTES: text = self._read_verbatim_string() mode_hint = yield Ps1Token(Ps1TokenKind.STRING_VERBATIM, text, start) if mode_hint is not None: self.mode = mode_hint continue if c in DOUBLE_QUOTES: text = self._read_expandable_string() mode_hint = yield Ps1Token(Ps1TokenKind.STRING_EXPAND, text, start) if mode_hint is not None: self.mode = mode_hint continue if c in '123456' and self.pos + 1 < length and src[self.pos + 1] == '>': if self.mode != Ps1LexerMode.EXPRESSION: redir = self._try_redirection() if redir: mode_hint = yield redir if mode_hint is not None: self.mode = mode_hint continue if c.isdigit() or (c == '.' and self.pos + 1 < length and src[self.pos + 1].isdigit()): token = self._read_number() if token: nc = src[self.pos] if self.pos < length else None if nc is not None and nc not in _FORCE_START_NEW_TOKEN and not nc.isspace(): if self.mode == Ps1LexerMode.ARGUMENT or ( nc not in _FORCE_NEW_TOKEN_AFTER_NUMBER ): self.pos = start token = self._read_generic_token() mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if c in DASHES: if self.mode == Ps1LexerMode.EXPRESSION: op = self._try_dash_operator() if op: mode_hint = yield op if mode_hint is not None: self.mode = mode_hint continue elif self.mode == Ps1LexerMode.ARGUMENT: param = self._try_parameter() if param: mode_hint = yield param if mode_hint is not None: self.mode = mode_hint continue redir = self._try_redirection() if redir: mode_hint = yield redir if mode_hint is not None: self.mode = mode_hint continue if self.mode == Ps1LexerMode.ARGUMENT: if c == '.' and self.pos + 1 < length: nc = src[self.pos + 1] if nc not in ' \t\r\n|&;,{}()$' and nc not in SINGLE_QUOTES and nc not in DOUBLE_QUOTES: token = self._read_generic_token() if token.value: mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if self.mode == Ps1LexerMode.ARGUMENT and c in '*/%=!+': if self.pos + 1 < length and src[self.pos + 1] not in ' \t\r\n|&;,{}()': token = self._read_generic_token() if token.value: mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if c in _ONE_CHAR_OPS or c in DASHES: self.pos += 1 kind = _ONE_CHAR_OPS.get(c) or Ps1TokenKind.DASH mode_hint = yield Ps1Token(kind, c, start) if mode_hint is not None: self.mode = mode_hint continue if self.mode == Ps1LexerMode.ARGUMENT: if c.isalpha() or c == '_' or c == '\\' or c == '`': token = self._read_generic_token() if token.value: word = token.value.lower() kw = _KEYWORDS.get(word) if kw is not None: mode_hint = yield Ps1Token(kw, token.value, token.offset) else: mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if c.isalpha() or c == '_' or c == '`': word = [] if c == '`' and self.pos + 1 < length: self.pos += 1 word.append(src[self.pos]) self.pos += 1 else: word.append(c) self.pos += 1 while self.pos < length: ch = src[self.pos] if ch == '`' and self.pos + 1 < length: self.pos += 1 word.append(src[self.pos]) self.pos += 1 elif ch.isalnum() or ch == '_': word.append(ch) self.pos += 1 elif ch in DASHES and self.mode != Ps1LexerMode.EXPRESSION: word.append(ch) self.pos += 1 else: break if self.pos < length and src[self.pos] not in _FORCE_START_NEW_TOKEN: if self.mode == Ps1LexerMode.ARGUMENT or ( src[self.pos] in SINGLE_QUOTES or src[self.pos] in DOUBLE_QUOTES or src[self.pos] == '$' ): self.pos = start token = self._read_generic_token() if token.value: mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue identifier = ''.join(word) if identifier: kw = _KEYWORDS.get(identifier.lower()) if kw is not None: mode_hint = yield Ps1Token(kw, identifier, start) else: mode_hint = yield Ps1Token(Ps1TokenKind.GENERIC_TOKEN, identifier, start) if mode_hint is not None: self.mode = mode_hint continue self.pos += 1 mode_hint = yield Ps1Token(Ps1TokenKind.GENERIC_TOKEN, c, start) if mode_hint is not None: self.mode = mode_hintInstance variables
var source-
The type of the None singleton.
var mode_stack-
The type of the None singleton.
var pos-
The type of the None singleton.
var mode-
Expand source code Browse git
@property def mode(self) -> Ps1LexerMode: return self.mode_stack[-1]
Methods
def push_mode(self, mode)-
Expand source code Browse git
def push_mode(self, mode: Ps1LexerMode): self.mode_stack.append(mode) def pop_mode(self)-
Expand source code Browse git
def pop_mode(self): if len(self.mode_stack) > 1: self.mode_stack.pop() def tokenize(self)-
Expand source code Browse git
def tokenize(self) -> Generator[Ps1Token, Ps1LexerMode | None, None]: src = self.source length = len(src) while True: self._skip_whitespace() if self._at_end(): yield Ps1Token(Ps1TokenKind.EOF, '', self.pos) return start = self.pos c = src[self.pos] c2 = src[self.pos:self.pos + 2] if len(c2) == 2: d0 = '-' if c2[0] in DASHES else c2[0] d1 = '-' if c2[1] in DASHES else c2[1] c2 = d0 + d1 if c == '\r' and self.pos + 1 < length and src[self.pos + 1] == '\n': self.pos += 2 mode_hint = yield Ps1Token(Ps1TokenKind.NEWLINE, '\r\n', start) if mode_hint is not None: self.mode = mode_hint continue if c == '\n': self.pos += 1 mode_hint = yield Ps1Token(Ps1TokenKind.NEWLINE, '\n', start) if mode_hint is not None: self.mode = mode_hint continue if c2 == '<#': self._read_block_comment() continue if c == '#': self._read_line_comment() continue if c == '@' and self.pos + 1 < length: nc = src[self.pos + 1] if nc in SINGLE_QUOTES: text = self._read_verbatim_here_string() mode_hint = yield Ps1Token(Ps1TokenKind.HSTRING_VERBATIM, text, start) if mode_hint is not None: self.mode = mode_hint continue if nc in DOUBLE_QUOTES: text = self._read_expandable_here_string() mode_hint = yield Ps1Token(Ps1TokenKind.HSTRING_EXPAND, text, start) if mode_hint is not None: self.mode = mode_hint continue if c2 in ('..', '--', '++', '::', '+=', '-=', '*=', '/=', '%=') and self.mode == Ps1LexerMode.ARGUMENT: after = self.pos + 2 if after < length and src[after] not in ' \t\r\n|&;,{}()': token = self._read_generic_token() if token.value: mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if c2 in _TWO_CHAR_OPS: self.pos += 2 kind = _TWO_CHAR_OPS[c2] mode_hint = yield Ps1Token(kind, c2, start) if mode_hint is not None: self.mode = mode_hint continue if c == ':' and self.pos + 1 < length and (src[self.pos + 1].isalpha() or src[self.pos + 1] == '_'): self.pos += 1 while self.pos < length and (src[self.pos].isalnum() or src[self.pos] == '_'): self.pos += 1 mode_hint = yield Ps1Token(Ps1TokenKind.LABEL, src[start:self.pos], start) if mode_hint is not None: self.mode = mode_hint continue if c == '$' or (c == '@' and self.pos + 1 < length and src[self.pos + 1] not in '({'): nc = src[self.pos + 1] if self.pos + 1 < length else '' if c == '$' and nc == '(': pass elif nc and (nc.isalnum() or nc in '_?{$^'): token = self._read_variable(c) if self.mode == Ps1LexerMode.ARGUMENT and self.pos < length: fc = src[self.pos] if fc not in _FORCE_START_NEW_TOKEN and fc not in _VARIABLE_STOPS_NO_RESCAN: self.pos = start token = self._read_generic_token() mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if c in SINGLE_QUOTES: text = self._read_verbatim_string() mode_hint = yield Ps1Token(Ps1TokenKind.STRING_VERBATIM, text, start) if mode_hint is not None: self.mode = mode_hint continue if c in DOUBLE_QUOTES: text = self._read_expandable_string() mode_hint = yield Ps1Token(Ps1TokenKind.STRING_EXPAND, text, start) if mode_hint is not None: self.mode = mode_hint continue if c in '123456' and self.pos + 1 < length and src[self.pos + 1] == '>': if self.mode != Ps1LexerMode.EXPRESSION: redir = self._try_redirection() if redir: mode_hint = yield redir if mode_hint is not None: self.mode = mode_hint continue if c.isdigit() or (c == '.' and self.pos + 1 < length and src[self.pos + 1].isdigit()): token = self._read_number() if token: nc = src[self.pos] if self.pos < length else None if nc is not None and nc not in _FORCE_START_NEW_TOKEN and not nc.isspace(): if self.mode == Ps1LexerMode.ARGUMENT or ( nc not in _FORCE_NEW_TOKEN_AFTER_NUMBER ): self.pos = start token = self._read_generic_token() mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if c in DASHES: if self.mode == Ps1LexerMode.EXPRESSION: op = self._try_dash_operator() if op: mode_hint = yield op if mode_hint is not None: self.mode = mode_hint continue elif self.mode == Ps1LexerMode.ARGUMENT: param = self._try_parameter() if param: mode_hint = yield param if mode_hint is not None: self.mode = mode_hint continue redir = self._try_redirection() if redir: mode_hint = yield redir if mode_hint is not None: self.mode = mode_hint continue if self.mode == Ps1LexerMode.ARGUMENT: if c == '.' and self.pos + 1 < length: nc = src[self.pos + 1] if nc not in ' \t\r\n|&;,{}()$' and nc not in SINGLE_QUOTES and nc not in DOUBLE_QUOTES: token = self._read_generic_token() if token.value: mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if self.mode == Ps1LexerMode.ARGUMENT and c in '*/%=!+': if self.pos + 1 < length and src[self.pos + 1] not in ' \t\r\n|&;,{}()': token = self._read_generic_token() if token.value: mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if c in _ONE_CHAR_OPS or c in DASHES: self.pos += 1 kind = _ONE_CHAR_OPS.get(c) or Ps1TokenKind.DASH mode_hint = yield Ps1Token(kind, c, start) if mode_hint is not None: self.mode = mode_hint continue if self.mode == Ps1LexerMode.ARGUMENT: if c.isalpha() or c == '_' or c == '\\' or c == '`': token = self._read_generic_token() if token.value: word = token.value.lower() kw = _KEYWORDS.get(word) if kw is not None: mode_hint = yield Ps1Token(kw, token.value, token.offset) else: mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue if c.isalpha() or c == '_' or c == '`': word = [] if c == '`' and self.pos + 1 < length: self.pos += 1 word.append(src[self.pos]) self.pos += 1 else: word.append(c) self.pos += 1 while self.pos < length: ch = src[self.pos] if ch == '`' and self.pos + 1 < length: self.pos += 1 word.append(src[self.pos]) self.pos += 1 elif ch.isalnum() or ch == '_': word.append(ch) self.pos += 1 elif ch in DASHES and self.mode != Ps1LexerMode.EXPRESSION: word.append(ch) self.pos += 1 else: break if self.pos < length and src[self.pos] not in _FORCE_START_NEW_TOKEN: if self.mode == Ps1LexerMode.ARGUMENT or ( src[self.pos] in SINGLE_QUOTES or src[self.pos] in DOUBLE_QUOTES or src[self.pos] == '$' ): self.pos = start token = self._read_generic_token() if token.value: mode_hint = yield token if mode_hint is not None: self.mode = mode_hint continue identifier = ''.join(word) if identifier: kw = _KEYWORDS.get(identifier.lower()) if kw is not None: mode_hint = yield Ps1Token(kw, identifier, start) else: mode_hint = yield Ps1Token(Ps1TokenKind.GENERIC_TOKEN, identifier, start) if mode_hint is not None: self.mode = mode_hint continue self.pos += 1 mode_hint = yield Ps1Token(Ps1TokenKind.GENERIC_TOKEN, c, start) if mode_hint is not None: self.mode = mode_hint