Module refinery.lib.scripts.guess
Expand source code Browse git
from __future__ import annotations
from refinery.lib.types import INF
def guess_language(data: str | bytearray | bytes | memoryview) -> str | None:
"""
Try to parse the input as JavaScript, PowerShell, and VBA, then return the name of the language
whose parser produces the fewest error nodes. Returns None when no parser can handle more than
half of the input.
"""
best_name: str | None = None
best_errors = INF()
from refinery.lib.scripts.js.model import JsErrorNode
from refinery.lib.scripts.js.parser import JsParser
from refinery.lib.scripts.ps1.model import Ps1ErrorNode
from refinery.lib.scripts.ps1.parser import Ps1Parser
from refinery.lib.scripts.vba.model import VbaErrorNode
from refinery.lib.scripts.vba.parser import VbaParser
backends = (
('ps1', Ps1Parser, Ps1ErrorNode),
('vba', VbaParser, VbaErrorNode),
('js', JsParser, JsErrorNode),
)
if not isinstance(data, str):
import codecs
data = codecs.decode(data, 'utf8', 'surrogateescape')
for name, parser_type, error_type in backends:
try:
ast = parser_type(data).parse()
errors = sum(
len(n.text) for n in ast.walk() if isinstance(n, error_type))
except Exception:
continue
if errors < best_errors:
best_errors = errors
best_name = name
if errors == 0:
break
if best_name is None or best_errors * 2 > len(data):
return None
return best_name
Functions
def guess_language(data)-
Try to parse the input as JavaScript, PowerShell, and VBA, then return the name of the language whose parser produces the fewest error nodes. Returns None when no parser can handle more than half of the input.
Expand source code Browse git
def guess_language(data: str | bytearray | bytes | memoryview) -> str | None: """ Try to parse the input as JavaScript, PowerShell, and VBA, then return the name of the language whose parser produces the fewest error nodes. Returns None when no parser can handle more than half of the input. """ best_name: str | None = None best_errors = INF() from refinery.lib.scripts.js.model import JsErrorNode from refinery.lib.scripts.js.parser import JsParser from refinery.lib.scripts.ps1.model import Ps1ErrorNode from refinery.lib.scripts.ps1.parser import Ps1Parser from refinery.lib.scripts.vba.model import VbaErrorNode from refinery.lib.scripts.vba.parser import VbaParser backends = ( ('ps1', Ps1Parser, Ps1ErrorNode), ('vba', VbaParser, VbaErrorNode), ('js', JsParser, JsErrorNode), ) if not isinstance(data, str): import codecs data = codecs.decode(data, 'utf8', 'surrogateescape') for name, parser_type, error_type in backends: try: ast = parser_type(data).parse() errors = sum( len(n.text) for n in ast.walk() if isinstance(n, error_type)) except Exception: continue if errors < best_errors: best_errors = errors best_name = name if errors == 0: break if best_name is None or best_errors * 2 > len(data): return None return best_name