Viewing file: _parser.py (9.96 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
"""Handwritten parser of dependency specifiers.
The docstring for each __parse_* function contains ENBF-inspired grammar representing the implementation. """
import ast from typing import Any, List, NamedTuple, Optional, Tuple, Union
from ._tokenizer import DEFAULT_RULES, Tokenizer
class Node: def __init__(self, value: str) -> None: self.value = value
def __str__(self) -> str: return self.value
def __repr__(self) -> str: return f"<{self.__class__.__name__}('{self}')>"
def serialize(self) -> str: raise NotImplementedError
class Variable(Node): def serialize(self) -> str: return str(self)
class Value(Node): def serialize(self) -> str: return f'"{self}"'
class Op(Node): def serialize(self) -> str: return str(self)
MarkerVar = Union[Variable, Value] MarkerItem = Tuple[MarkerVar, Op, MarkerVar] # MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] # MarkerList = List[Union["MarkerList", MarkerAtom, str]] # mypy does not support recursive type definition # https://github.com/python/mypy/issues/731 MarkerAtom = Any MarkerList = List[Any]
class ParsedRequirement(NamedTuple): name: str url: str extras: List[str] specifier: str marker: Optional[MarkerList]
# -------------------------------------------------------------------------------------- # Recursive descent parser for dependency specifier # -------------------------------------------------------------------------------------- def parse_requirement(source: str) -> ParsedRequirement: return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: """ requirement = WS? IDENTIFIER WS? extras WS? requirement_details """ tokenizer.consume("WS")
name_token = tokenizer.expect( "IDENTIFIER", expected="package name at the start of dependency specifier" ) name = name_token.text tokenizer.consume("WS")
extras = _parse_extras(tokenizer) tokenizer.consume("WS")
url, specifier, marker = _parse_requirement_details(tokenizer) tokenizer.expect("END", expected="end of dependency specifier")
return ParsedRequirement(name, url, extras, specifier, marker)
def _parse_requirement_details( tokenizer: Tokenizer, ) -> Tuple[str, str, Optional[MarkerList]]: """ requirement_details = AT URL (WS requirement_marker?)? | specifier WS? (requirement_marker)? """
specifier = "" url = "" marker = None
if tokenizer.check("AT"): tokenizer.read() tokenizer.consume("WS")
url_start = tokenizer.position url = tokenizer.expect("URL", expected="URL after @").text if tokenizer.check("END", peek=True): return (url, specifier, marker)
tokenizer.expect("WS", expected="whitespace after URL")
# The input might end after whitespace. if tokenizer.check("END", peek=True): return (url, specifier, marker)
marker = _parse_requirement_marker( tokenizer, span_start=url_start, after="URL and whitespace" ) else: specifier_start = tokenizer.position specifier = _parse_specifier(tokenizer) tokenizer.consume("WS")
if tokenizer.check("END", peek=True): return (url, specifier, marker)
marker = _parse_requirement_marker( tokenizer, span_start=specifier_start, after=( "version specifier" if specifier else "name and no valid version specifier" ), )
return (url, specifier, marker)
def _parse_requirement_marker( tokenizer: Tokenizer, *, span_start: int, after: str ) -> MarkerList: """ requirement_marker = SEMICOLON marker WS? """
if not tokenizer.check("SEMICOLON"): tokenizer.raise_syntax_error( f"Expected end or semicolon (after {after})", span_start=span_start, ) tokenizer.read()
marker = _parse_marker(tokenizer) tokenizer.consume("WS")
return marker
def _parse_extras(tokenizer: Tokenizer) -> List[str]: """ extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? """ if not tokenizer.check("LEFT_BRACKET", peek=True): return []
with tokenizer.enclosing_tokens( "LEFT_BRACKET", "RIGHT_BRACKET", around="extras", ): tokenizer.consume("WS") extras = _parse_extras_list(tokenizer) tokenizer.consume("WS")
return extras
def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: """ extras_list = identifier (wsp* ',' wsp* identifier)* """ extras: List[str] = []
if not tokenizer.check("IDENTIFIER"): return extras
extras.append(tokenizer.read().text)
while True: tokenizer.consume("WS") if tokenizer.check("IDENTIFIER", peek=True): tokenizer.raise_syntax_error("Expected comma between extra names") elif not tokenizer.check("COMMA"): break
tokenizer.read() tokenizer.consume("WS")
extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") extras.append(extra_token.text)
return extras
def _parse_specifier(tokenizer: Tokenizer) -> str: """ specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS | WS? version_many WS? """ with tokenizer.enclosing_tokens( "LEFT_PARENTHESIS", "RIGHT_PARENTHESIS", around="version specifier", ): tokenizer.consume("WS") parsed_specifiers = _parse_version_many(tokenizer) tokenizer.consume("WS")
return parsed_specifiers
def _parse_version_many(tokenizer: Tokenizer) -> str: """ version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? """ parsed_specifiers = "" while tokenizer.check("SPECIFIER"): span_start = tokenizer.position parsed_specifiers += tokenizer.read().text if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): tokenizer.raise_syntax_error( ".* suffix can only be used with `==` or `!=` operators", span_start=span_start, span_end=tokenizer.position + 1, ) if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): tokenizer.raise_syntax_error( "Local version label can only be used with `==` or `!=` operators", span_start=span_start, span_end=tokenizer.position, ) tokenizer.consume("WS") if not tokenizer.check("COMMA"): break parsed_specifiers += tokenizer.read().text tokenizer.consume("WS")
return parsed_specifiers
# -------------------------------------------------------------------------------------- # Recursive descent parser for marker expression # -------------------------------------------------------------------------------------- def parse_marker(source: str) -> MarkerList: return _parse_marker(Tokenizer(source, rules=DEFAULT_RULES))
def _parse_marker(tokenizer: Tokenizer) -> MarkerList: """ marker = marker_atom (BOOLOP marker_atom)+ """ expression = [_parse_marker_atom(tokenizer)] while tokenizer.check("BOOLOP"): token = tokenizer.read() expr_right = _parse_marker_atom(tokenizer) expression.extend((token.text, expr_right)) return expression
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: """ marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? | WS? marker_item WS? """
tokenizer.consume("WS") if tokenizer.check("LEFT_PARENTHESIS", peek=True): with tokenizer.enclosing_tokens( "LEFT_PARENTHESIS", "RIGHT_PARENTHESIS", around="marker expression", ): tokenizer.consume("WS") marker: MarkerAtom = _parse_marker(tokenizer) tokenizer.consume("WS") else: marker = _parse_marker_item(tokenizer) tokenizer.consume("WS") return marker
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: """ marker_item = WS? marker_var WS? marker_op WS? marker_var WS? """ tokenizer.consume("WS") marker_var_left = _parse_marker_var(tokenizer) tokenizer.consume("WS") marker_op = _parse_marker_op(tokenizer) tokenizer.consume("WS") marker_var_right = _parse_marker_var(tokenizer) tokenizer.consume("WS") return (marker_var_left, marker_op, marker_var_right)
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: """ marker_var = VARIABLE | QUOTED_STRING """ if tokenizer.check("VARIABLE"): return process_env_var(tokenizer.read().text.replace(".", "_")) elif tokenizer.check("QUOTED_STRING"): return process_python_str(tokenizer.read().text) else: tokenizer.raise_syntax_error( message="Expected a marker variable or quoted string" )
def process_env_var(env_var: str) -> Variable: if ( env_var == "platform_python_implementation" or env_var == "python_implementation" ): return Variable("platform_python_implementation") else: return Variable(env_var)
def process_python_str(python_str: str) -> Value: value = ast.literal_eval(python_str) return Value(str(value))
def _parse_marker_op(tokenizer: Tokenizer) -> Op: """ marker_op = IN | NOT IN | OP """ if tokenizer.check("IN"): tokenizer.read() return Op("in") elif tokenizer.check("NOT"): tokenizer.read() tokenizer.expect("WS", expected="whitespace after 'not'") tokenizer.expect("IN", expected="'in' after 'not'") return Op("not in") elif tokenizer.check("OP"): return Op(tokenizer.read().text) else: return tokenizer.raise_syntax_error( "Expected marker operator, one of " "<=, <, !=, ==, >=, >, ~=, ===, in, not in" )
|