Viewing file: _check_docs_utils.py (25.74 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/PyCQA/pylint/blob/main/LICENSE # Copyright (c) https://github.com/PyCQA/pylint/blob/main/CONTRIBUTORS.txt
"""Utility methods for docstring checking."""
from __future__ import annotations
import re
import astroid from astroid import nodes from astroid.util import UninferableBase
from pylint.checkers import utils
def space_indentation(s: str) -> int: """The number of leading spaces in a string.
:param str s: input string
:rtype: int :return: number of leading spaces """ return len(s) - len(s.lstrip(" "))
def get_setters_property_name(node: nodes.FunctionDef) -> str | None: """Get the name of the property that the given node is a setter for.
:param node: The node to get the property name for. :type node: str
:rtype: str or None :returns: The name of the property that the node is a setter for, or None if one could not be found. """ decorators = node.decorators.nodes if node.decorators else [] for decorator in decorators: if ( isinstance(decorator, nodes.Attribute) and decorator.attrname == "setter" and isinstance(decorator.expr, nodes.Name) ): return decorator.expr.name # type: ignore[no-any-return] return None
def get_setters_property(node: nodes.FunctionDef) -> nodes.FunctionDef | None: """Get the property node for the given setter node.
:param node: The node to get the property for. :type node: nodes.FunctionDef
:rtype: nodes.FunctionDef or None :returns: The node relating to the property of the given setter node, or None if one could not be found. """ property_ = None
property_name = get_setters_property_name(node) class_node = utils.node_frame_class(node) if property_name and class_node: class_attrs: list[nodes.FunctionDef] = class_node.getattr(node.name) for attr in class_attrs: if utils.decorated_with_property(attr): property_ = attr break
return property_
def returns_something(return_node: nodes.Return) -> bool: """Check if a return node returns a value other than None.
:param return_node: The return node to check. :type return_node: astroid.Return
:rtype: bool :return: True if the return node returns a value other than None, False otherwise. """ returns = return_node.value
if returns is None: return False
return not (isinstance(returns, nodes.Const) and returns.value is None)
def _get_raise_target(node: nodes.NodeNG) -> nodes.NodeNG | UninferableBase | None: if isinstance(node.exc, nodes.Call): func = node.exc.func if isinstance(func, (nodes.Name, nodes.Attribute)): return utils.safe_infer(func) return None
def _split_multiple_exc_types(target: str) -> list[str]: delimiters = r"(\s*,(?:\s*or\s)?\s*|\s+or\s+)" return re.split(delimiters, target)
def possible_exc_types(node: nodes.NodeNG) -> set[nodes.ClassDef]: """Gets all the possible raised exception types for the given raise node.
.. note::
Caught exception types are ignored.
:param node: The raise node to find exception types for.
:returns: A list of exception types possibly raised by :param:`node`. """ exceptions = [] if isinstance(node.exc, nodes.Name): inferred = utils.safe_infer(node.exc) if inferred: exceptions = [inferred] elif node.exc is None: handler = node.parent while handler and not isinstance(handler, nodes.ExceptHandler): handler = handler.parent
if handler and handler.type: try: for exception in astroid.unpack_infer(handler.type): if not isinstance(exception, UninferableBase): exceptions.append(exception) except astroid.InferenceError: pass else: target = _get_raise_target(node) if isinstance(target, nodes.ClassDef): exceptions = [target] elif isinstance(target, nodes.FunctionDef): for ret in target.nodes_of_class(nodes.Return): if ret.value is None: continue if ret.frame(future=True) != target: # return from inner function - ignore it continue
val = utils.safe_infer(ret.value) if val and utils.inherit_from_std_ex(val): if isinstance(val, nodes.ClassDef): exceptions.append(val) elif isinstance(val, astroid.Instance): exceptions.append(val.getattr("__class__")[0])
try: return { exc for exc in exceptions if not utils.node_ignores_exception(node, exc.name) } except astroid.InferenceError: return set()
def docstringify( docstring: nodes.Const | None, default_type: str = "default" ) -> Docstring: best_match = (0, DOCSTRING_TYPES.get(default_type, Docstring)(docstring)) for docstring_type in ( SphinxDocstring, EpytextDocstring, GoogleDocstring, NumpyDocstring, ): instance = docstring_type(docstring) matching_sections = instance.matching_sections() if matching_sections > best_match[0]: best_match = (matching_sections, instance)
return best_match[1]
class Docstring: re_for_parameters_see = re.compile( r""" For\s+the\s+(other)?\s*parameters\s*,\s+see """, re.X | re.S, )
supports_yields: bool = False """True if the docstring supports a "yield" section.
False if the docstring uses the returns section to document generators. """
# These methods are designed to be overridden def __init__(self, doc: nodes.Const | None) -> None: docstring: str = doc.value if doc else "" self.doc = docstring.expandtabs()
def __repr__(self) -> str: return f"<{self.__class__.__name__}:'''{self.doc}'''>"
def matching_sections(self) -> int: """Returns the number of matching docstring sections.""" return 0
def exceptions(self) -> set[str]: return set()
def has_params(self) -> bool: return False
def has_returns(self) -> bool: return False
def has_rtype(self) -> bool: return False
def has_property_returns(self) -> bool: return False
def has_property_type(self) -> bool: return False
def has_yields(self) -> bool: return False
def has_yields_type(self) -> bool: return False
def match_param_docs(self) -> tuple[set[str], set[str]]: return set(), set()
def params_documented_elsewhere(self) -> bool: return self.re_for_parameters_see.search(self.doc) is not None
class SphinxDocstring(Docstring): re_type = r""" [~!.]? # Optional link style prefix \w(?:\w|\.[^\.])* # Valid python name """
re_simple_container_type = rf""" {re_type} # a container type [\(\[] [^\n\s]+ [\)\]] # with the contents of the container """
re_multiple_simple_type = r""" (?:{container_type}|{type}) (?:(?:\s+(?:of|or)\s+|\s*,\s*|\s+\|\s+)(?:{container_type}|{type}))* """.format( type=re_type, container_type=re_simple_container_type )
re_xref = rf""" (?::\w+:)? # optional tag `{re_type}` # what to reference """
re_param_raw = rf""" : # initial colon (?: # Sphinx keywords param|parameter| arg|argument| key|keyword ) \s+ # whitespace
(?: # optional type declaration ({re_type}|{re_simple_container_type}) \s+ )?
((\\\*{{0,2}}\w+)|(\w+)) # Parameter name with potential asterisks \s* # whitespace : # final colon """ re_param_in_docstring = re.compile(re_param_raw, re.X | re.S)
re_type_raw = rf""" :type # Sphinx keyword \s+ # whitespace ({re_multiple_simple_type}) # Parameter name \s* # whitespace : # final colon """ re_type_in_docstring = re.compile(re_type_raw, re.X | re.S)
re_property_type_raw = rf""" :type: # Sphinx keyword \s+ # whitespace {re_multiple_simple_type} # type declaration """ re_property_type_in_docstring = re.compile(re_property_type_raw, re.X | re.S)
re_raise_raw = rf""" : # initial colon (?: # Sphinx keyword raises?| except|exception ) \s+ # whitespace ({re_multiple_simple_type}) # exception type \s* # whitespace : # final colon """ re_raise_in_docstring = re.compile(re_raise_raw, re.X | re.S)
re_rtype_in_docstring = re.compile(r":rtype:")
re_returns_in_docstring = re.compile(r":returns?:")
supports_yields = False
def matching_sections(self) -> int: """Returns the number of matching docstring sections.""" return sum( bool(i) for i in ( self.re_param_in_docstring.search(self.doc), self.re_raise_in_docstring.search(self.doc), self.re_rtype_in_docstring.search(self.doc), self.re_returns_in_docstring.search(self.doc), self.re_property_type_in_docstring.search(self.doc), ) )
def exceptions(self) -> set[str]: types: set[str] = set()
for match in re.finditer(self.re_raise_in_docstring, self.doc): raise_type = match.group(1) types.update(_split_multiple_exc_types(raise_type))
return types
def has_params(self) -> bool: if not self.doc: return False
return self.re_param_in_docstring.search(self.doc) is not None
def has_returns(self) -> bool: if not self.doc: return False
return bool(self.re_returns_in_docstring.search(self.doc))
def has_rtype(self) -> bool: if not self.doc: return False
return bool(self.re_rtype_in_docstring.search(self.doc))
def has_property_returns(self) -> bool: if not self.doc: return False
# The summary line is the return doc, # so the first line must not be a known directive. return not self.doc.lstrip().startswith(":")
def has_property_type(self) -> bool: if not self.doc: return False
return bool(self.re_property_type_in_docstring.search(self.doc))
def match_param_docs(self) -> tuple[set[str], set[str]]: params_with_doc = set() params_with_type = set()
for match in re.finditer(self.re_param_in_docstring, self.doc): name = match.group(2) # Remove escape characters necessary for asterisks name = name.replace("\\", "") params_with_doc.add(name) param_type = match.group(1) if param_type is not None: params_with_type.add(name)
params_with_type.update(re.findall(self.re_type_in_docstring, self.doc)) return params_with_doc, params_with_type
class EpytextDocstring(SphinxDocstring): """Epytext is similar to Sphinx.
See the docs: http://epydoc.sourceforge.net/epytext.html http://epydoc.sourceforge.net/fields.html#fields
It's used in PyCharm: https://www.jetbrains.com/help/pycharm/2016.1/creating-documentation-comments.html#d848203e314 https://www.jetbrains.com/help/pycharm/2016.1/using-docstrings-to-specify-types.html """
re_param_in_docstring = re.compile( SphinxDocstring.re_param_raw.replace(":", "@", 1), re.X | re.S )
re_type_in_docstring = re.compile( SphinxDocstring.re_type_raw.replace(":", "@", 1), re.X | re.S )
re_property_type_in_docstring = re.compile( SphinxDocstring.re_property_type_raw.replace(":", "@", 1), re.X | re.S )
re_raise_in_docstring = re.compile( SphinxDocstring.re_raise_raw.replace(":", "@", 1), re.X | re.S )
re_rtype_in_docstring = re.compile( r""" @ # initial "at" symbol (?: # Epytext keyword rtype|returntype ) : # final colon """, re.X | re.S, )
re_returns_in_docstring = re.compile(r"@returns?:")
def has_property_returns(self) -> bool: if not self.doc: return False
# If this is a property docstring, the summary is the return doc. if self.has_property_type(): # The summary line is the return doc, # so the first line must not be a known directive. return not self.doc.lstrip().startswith("@")
return False
class GoogleDocstring(Docstring): re_type = SphinxDocstring.re_type
re_xref = SphinxDocstring.re_xref
re_container_type = rf""" (?:{re_type}|{re_xref}) # a container type [\(\[] [^\n]+ [\)\]] # with the contents of the container """
re_multiple_type = r""" (?:{container_type}|{type}|{xref}) (?:(?:\s+(?:of|or)\s+|\s*,\s*|\s+\|\s+)(?:{container_type}|{type}|{xref}))* """.format( type=re_type, xref=re_xref, container_type=re_container_type )
_re_section_template = r""" ^([ ]*) {0} \s*: \s*$ # Google parameter header ( .* ) # section """
re_param_section = re.compile( _re_section_template.format(r"(?:Args|Arguments|Parameters)"), re.X | re.S | re.M, )
re_keyword_param_section = re.compile( _re_section_template.format(r"Keyword\s(?:Args|Arguments|Parameters)"), re.X | re.S | re.M, )
re_param_line = re.compile( rf""" \s* ((?:\\?\*{{0,2}})?[\w\\]+) # identifier potentially with asterisks or escaped `\` \s* ( [(] {re_multiple_type} (?:,\s+optional)? [)] )? \s* : # optional type declaration \s* (.*) # beginning of optional description """, re.X | re.S | re.M, )
re_raise_section = re.compile( _re_section_template.format(r"Raises"), re.X | re.S | re.M )
re_raise_line = re.compile( rf""" \s* ({re_multiple_type}) \s* : # identifier \s* (.*) # beginning of optional description """, re.X | re.S | re.M, )
re_returns_section = re.compile( _re_section_template.format(r"Returns?"), re.X | re.S | re.M )
re_returns_line = re.compile( rf""" \s* ({re_multiple_type}:)? # identifier \s* (.*) # beginning of description """, re.X | re.S | re.M, )
re_property_returns_line = re.compile( rf""" ^{re_multiple_type}: # identifier \s* (.*) # Summary line / description """, re.X | re.S | re.M, )
re_yields_section = re.compile( _re_section_template.format(r"Yields?"), re.X | re.S | re.M )
re_yields_line = re_returns_line
supports_yields = True
def matching_sections(self) -> int: """Returns the number of matching docstring sections.""" return sum( bool(i) for i in ( self.re_param_section.search(self.doc), self.re_raise_section.search(self.doc), self.re_returns_section.search(self.doc), self.re_yields_section.search(self.doc), self.re_property_returns_line.search(self._first_line()), ) )
def has_params(self) -> bool: if not self.doc: return False
return self.re_param_section.search(self.doc) is not None
def has_returns(self) -> bool: if not self.doc: return False
entries = self._parse_section(self.re_returns_section) for entry in entries: match = self.re_returns_line.match(entry) if not match: continue
return_desc = match.group(2) if return_desc: return True
return False
def has_rtype(self) -> bool: if not self.doc: return False
entries = self._parse_section(self.re_returns_section) for entry in entries: match = self.re_returns_line.match(entry) if not match: continue
return_type = match.group(1) if return_type: return True
return False
def has_property_returns(self) -> bool: # The summary line is the return doc, # so the first line must not be a known directive. first_line = self._first_line() return not bool( self.re_param_section.search(first_line) or self.re_raise_section.search(first_line) or self.re_returns_section.search(first_line) or self.re_yields_section.search(first_line) )
def has_property_type(self) -> bool: if not self.doc: return False
return bool(self.re_property_returns_line.match(self._first_line()))
def has_yields(self) -> bool: if not self.doc: return False
entries = self._parse_section(self.re_yields_section) for entry in entries: match = self.re_yields_line.match(entry) if not match: continue
yield_desc = match.group(2) if yield_desc: return True
return False
def has_yields_type(self) -> bool: if not self.doc: return False
entries = self._parse_section(self.re_yields_section) for entry in entries: match = self.re_yields_line.match(entry) if not match: continue
yield_type = match.group(1) if yield_type: return True
return False
def exceptions(self) -> set[str]: types: set[str] = set()
entries = self._parse_section(self.re_raise_section) for entry in entries: match = self.re_raise_line.match(entry) if not match: continue
exc_type = match.group(1) exc_desc = match.group(2) if exc_desc: types.update(_split_multiple_exc_types(exc_type))
return types
def match_param_docs(self) -> tuple[set[str], set[str]]: params_with_doc: set[str] = set() params_with_type: set[str] = set()
entries = self._parse_section(self.re_param_section) entries.extend(self._parse_section(self.re_keyword_param_section)) for entry in entries: match = self.re_param_line.match(entry) if not match: continue
param_name = match.group(1) # Remove escape characters necessary for asterisks param_name = param_name.replace("\\", "")
param_type = match.group(2) param_desc = match.group(3)
if param_type: params_with_type.add(param_name)
if param_desc: params_with_doc.add(param_name)
return params_with_doc, params_with_type
def _first_line(self) -> str: return self.doc.lstrip().split("\n", 1)[0]
@staticmethod def min_section_indent(section_match: re.Match[str]) -> int: return len(section_match.group(1)) + 1
@staticmethod def _is_section_header(_: str) -> bool: # Google parsing does not need to detect section headers, # because it works off of indentation level only return False
def _parse_section(self, section_re: re.Pattern[str]) -> list[str]: section_match = section_re.search(self.doc) if section_match is None: return []
min_indentation = self.min_section_indent(section_match)
entries: list[str] = [] entry: list[str] = [] is_first = True for line in section_match.group(2).splitlines(): if not line.strip(): continue indentation = space_indentation(line) if indentation < min_indentation: break
# The first line after the header defines the minimum # indentation. if is_first: min_indentation = indentation is_first = False
if indentation == min_indentation: if self._is_section_header(line): break # Lines with minimum indentation must contain the beginning # of a new parameter documentation. if entry: entries.append("\n".join(entry)) entry = []
entry.append(line)
if entry: entries.append("\n".join(entry))
return entries
class NumpyDocstring(GoogleDocstring): _re_section_template = r""" ^([ ]*) {0} \s*?$ # Numpy parameters header \s* [-=]+ \s*?$ # underline ( .* ) # section """
re_param_section = re.compile( _re_section_template.format(r"(?:Args|Arguments|Parameters)"), re.X | re.S | re.M, )
re_default_value = r"""((['"]\w+\s*['"])|(\d+)|(True)|(False)|(None))"""
re_param_line = re.compile( rf""" \s* (?P<param_name>\*{{0,2}}\w+)(\s?(:|\n)) # identifier with potential asterisks \s* (?P<param_type> ( ({GoogleDocstring.re_multiple_type}) # default type declaration (,\s+optional)? # optional 'optional' indication )? ( {{({re_default_value},?\s*)+}} # set of default values )? (?:$|\n) )? ( \s* (?P<param_desc>.*) # optional description )? """, re.X | re.S, )
re_raise_section = re.compile( _re_section_template.format(r"Raises"), re.X | re.S | re.M )
re_raise_line = re.compile( rf""" \s* ({GoogleDocstring.re_type})$ # type declaration \s* (.*) # optional description """, re.X | re.S | re.M, )
re_returns_section = re.compile( _re_section_template.format(r"Returns?"), re.X | re.S | re.M )
re_returns_line = re.compile( rf""" \s* (?:\w+\s+:\s+)? # optional name ({GoogleDocstring.re_multiple_type})$ # type declaration \s* (.*) # optional description """, re.X | re.S | re.M, )
re_yields_section = re.compile( _re_section_template.format(r"Yields?"), re.X | re.S | re.M )
re_yields_line = re_returns_line
supports_yields = True
def match_param_docs(self) -> tuple[set[str], set[str]]: """Matches parameter documentation section to parameter documentation rules.""" params_with_doc = set() params_with_type = set()
entries = self._parse_section(self.re_param_section) entries.extend(self._parse_section(self.re_keyword_param_section)) for entry in entries: match = self.re_param_line.match(entry) if not match: continue
# check if parameter has description only re_only_desc = re.match(r"\s*(\*{0,2}\w+)\s*:?\n\s*\w*$", entry) if re_only_desc: param_name = match.group("param_name") param_desc = match.group("param_type") param_type = None else: param_name = match.group("param_name") param_type = match.group("param_type") param_desc = match.group("param_desc") # The re_param_line pattern needs to match multi-line which removes the ability # to match a single line description like 'arg : a number type.' # We are not trying to determine whether 'a number type' is correct typing # but we do accept it as typing as it is in the place where typing # should be if param_type is None and re.match(r"\s*(\*{0,2}\w+)\s*:.+$", entry): param_type = param_desc # If the description is "" but we have a type description # we consider the description to be the type if not param_desc and param_type: param_desc = param_type
if param_type: params_with_type.add(param_name)
if param_desc: params_with_doc.add(param_name)
return params_with_doc, params_with_type
@staticmethod def min_section_indent(section_match: re.Match[str]) -> int: return len(section_match.group(1))
@staticmethod def _is_section_header(line: str) -> bool: return bool(re.match(r"\s*-+$", line))
DOCSTRING_TYPES = { "sphinx": SphinxDocstring, "epytext": EpytextDocstring, "google": GoogleDocstring, "numpy": NumpyDocstring, "default": Docstring, } """A map of the name of the docstring type to its class.
:type: dict(str, type) """
|