Skip to content

Affirm

Functions to safely evaluate strings and inspect notebook.

DatabooksParser (NodeVisitor)

AST parser that disallows unsafe nodes/values.

Source code in databooks/affirm.py
class DatabooksParser(ast.NodeVisitor):
    """AST parser that disallows unsafe nodes/values."""

    def __init__(self, **variables: Any) -> None:
        """Instantiate with variables and callables (built-ins) scope."""
        # https://github.com/python/mypy/issues/3728
        self.builtins = {b.__name__: b for b in _ALLOWED_BUILTINS}  # type: ignore
        self.names = deepcopy(variables) or {}
        self.scope = {
            **self.names,
            "__builtins__": self.builtins,
        }

    @staticmethod
    def _prioritize(field: Tuple[str, Any]) -> bool:
        """Prioritize `ast.comprehension` nodes when expanding the AST tree."""
        _, value = field
        if not isinstance(value, list):
            return True
        return not any(isinstance(f, ast.comprehension) for f in value)

    @staticmethod
    def _allowed_attr(obj: Any, attr: str, is_dynamic: bool = False) -> None:
        """
        Check that attribute is a key of `databooks.data_models.base.DatabooksBase`.

        If `obj` is an iterable and was computed dynamically (that is, not originally in
         scope but computed from a comprehension), check attributes for all elements in
         the iterable.
        """
        allowed_attrs = list(dict(obj).keys()) if isinstance(obj, DatabooksBase) else ()
        if isinstance(obj, abc.Iterable) and is_dynamic:
            for el in obj:
                DatabooksParser._allowed_attr(obj=el, attr=attr)
        else:
            if attr not in allowed_attrs:
                raise ValueError(
                    "Expected attribute to be one of"
                    f" `{allowed_attrs}`, got `{attr}` for {obj}."
                )

    def _get_iter(self, node: ast.AST) -> Iterable:
        """Use `DatabooksParser.safe_eval_ast` to get the iterable object."""
        tree = ast.Expression(body=node)
        return iter(self.safe_eval_ast(tree))

    def generic_visit(self, node: ast.AST) -> None:
        """
        Prioritize `ast.comprehension` nodes when expanding tree.

        Similar to `NodeVisitor.generic_visit`, but favor comprehensions when multiple
         nodes on the same level. In comprehensions, we have a generator argument that
         includes names that are stored. By visiting them first we avoid 'running into'
         unknown names.
        """
        if not isinstance(node, _ALLOWED_NODES):
            raise ValueError(f"Invalid node `{node}`.")

        for field, value in sorted(ast.iter_fields(node), key=self._prioritize):
            if isinstance(value, list):
                for item in value:
                    if isinstance(item, ast.AST):
                        self.visit(item)
            elif isinstance(value, ast.AST):
                self.visit(value)

    def visit_comprehension(self, node: ast.comprehension) -> None:
        """Add variable from a comprehension to list of allowed names."""
        if not isinstance(node.target, ast.Name):
            raise RuntimeError(
                "Expected `ast.comprehension`'s target to be `ast.Name`, got"
                f" `ast.{type(node.target).__name__}`."
            )
        self.names[node.target.id] = self._get_iter(node.iter)
        self.generic_visit(node)

    def visit_Attribute(self, node: ast.Attribute) -> None:
        """Allow attributes for Pydantic fields only."""
        if not isinstance(node.value, (ast.Attribute, ast.Name, ast.Subscript)):
            raise ValueError(
                "Expected attribute to be one of `ast.Name`, `ast.Attribute` or"
                f" `ast.Subscript`, got `ast.{type(node.value).__name__}`."
            )
        if isinstance(node.value, ast.Name):
            self._allowed_attr(
                obj=self.names[node.value.id],
                attr=node.attr,
                is_dynamic=node.value.id in (self.names.keys() - self.scope.keys()),
            )
        self.generic_visit(node)

    def visit_Name(self, node: ast.Name) -> None:
        """Only allow names from scope or comprehension variables."""
        valid_names = {**self.names, **self.builtins}
        if node.id not in valid_names:
            raise ValueError(
                f"Expected `name` to be one of `{valid_names.keys()}`, got `{node.id}`."
            )
        self.generic_visit(node)

    def safe_eval_ast(self, ast_tree: ast.AST) -> Any:
        """Evaluate safe AST trees only (raise errors otherwise)."""
        self.visit(ast_tree)
        exe = compile(ast_tree, filename="", mode="eval")
        return eval(exe, self.scope)

    def safe_eval(self, src: str) -> Any:
        """
        Evaluate strings that are safe only (raise errors otherwise).

        A "safe" string or node provided may only consist of nodes in
         `databooks.affirm._ALLOWED_NODES` and built-ins from
         `databooks.affirm._ALLOWED_BUILTINS`.
        """
        ast_tree = ast.parse(src, mode="eval")
        return self.safe_eval_ast(ast_tree)

__init__(self, **variables) special

Instantiate with variables and callables (built-ins) scope.

Source code in databooks/affirm.py
def __init__(self, **variables: Any) -> None:
    """Instantiate with variables and callables (built-ins) scope."""
    # https://github.com/python/mypy/issues/3728
    self.builtins = {b.__name__: b for b in _ALLOWED_BUILTINS}  # type: ignore
    self.names = deepcopy(variables) or {}
    self.scope = {
        **self.names,
        "__builtins__": self.builtins,
    }

generic_visit(self, node)

Prioritize ast.comprehension nodes when expanding tree.

Similar to NodeVisitor.generic_visit, but favor comprehensions when multiple nodes on the same level. In comprehensions, we have a generator argument that includes names that are stored. By visiting them first we avoid 'running into' unknown names.

Source code in databooks/affirm.py
def generic_visit(self, node: ast.AST) -> None:
    """
    Prioritize `ast.comprehension` nodes when expanding tree.

    Similar to `NodeVisitor.generic_visit`, but favor comprehensions when multiple
     nodes on the same level. In comprehensions, we have a generator argument that
     includes names that are stored. By visiting them first we avoid 'running into'
     unknown names.
    """
    if not isinstance(node, _ALLOWED_NODES):
        raise ValueError(f"Invalid node `{node}`.")

    for field, value in sorted(ast.iter_fields(node), key=self._prioritize):
        if isinstance(value, list):
            for item in value:
                if isinstance(item, ast.AST):
                    self.visit(item)
        elif isinstance(value, ast.AST):
            self.visit(value)

safe_eval(self, src)

Evaluate strings that are safe only (raise errors otherwise).

A "safe" string or node provided may only consist of nodes in databooks.affirm._ALLOWED_NODES and built-ins from databooks.affirm._ALLOWED_BUILTINS.

Source code in databooks/affirm.py
def safe_eval(self, src: str) -> Any:
    """
    Evaluate strings that are safe only (raise errors otherwise).

    A "safe" string or node provided may only consist of nodes in
     `databooks.affirm._ALLOWED_NODES` and built-ins from
     `databooks.affirm._ALLOWED_BUILTINS`.
    """
    ast_tree = ast.parse(src, mode="eval")
    return self.safe_eval_ast(ast_tree)

safe_eval_ast(self, ast_tree)

Evaluate safe AST trees only (raise errors otherwise).

Source code in databooks/affirm.py
def safe_eval_ast(self, ast_tree: ast.AST) -> Any:
    """Evaluate safe AST trees only (raise errors otherwise)."""
    self.visit(ast_tree)
    exe = compile(ast_tree, filename="", mode="eval")
    return eval(exe, self.scope)

visit_Attribute(self, node)

Allow attributes for Pydantic fields only.

Source code in databooks/affirm.py
def visit_Attribute(self, node: ast.Attribute) -> None:
    """Allow attributes for Pydantic fields only."""
    if not isinstance(node.value, (ast.Attribute, ast.Name, ast.Subscript)):
        raise ValueError(
            "Expected attribute to be one of `ast.Name`, `ast.Attribute` or"
            f" `ast.Subscript`, got `ast.{type(node.value).__name__}`."
        )
    if isinstance(node.value, ast.Name):
        self._allowed_attr(
            obj=self.names[node.value.id],
            attr=node.attr,
            is_dynamic=node.value.id in (self.names.keys() - self.scope.keys()),
        )
    self.generic_visit(node)

visit_Name(self, node)

Only allow names from scope or comprehension variables.

Source code in databooks/affirm.py
def visit_Name(self, node: ast.Name) -> None:
    """Only allow names from scope or comprehension variables."""
    valid_names = {**self.names, **self.builtins}
    if node.id not in valid_names:
        raise ValueError(
            f"Expected `name` to be one of `{valid_names.keys()}`, got `{node.id}`."
        )
    self.generic_visit(node)

visit_comprehension(self, node)

Add variable from a comprehension to list of allowed names.

Source code in databooks/affirm.py
def visit_comprehension(self, node: ast.comprehension) -> None:
    """Add variable from a comprehension to list of allowed names."""
    if not isinstance(node.target, ast.Name):
        raise RuntimeError(
            "Expected `ast.comprehension`'s target to be `ast.Name`, got"
            f" `ast.{type(node.target).__name__}`."
        )
    self.names[node.target.id] = self._get_iter(node.iter)
    self.generic_visit(node)

affirm(nb_path, exprs, verbose=False)

Return whether notebook passed all checks (expressions).

Parameters:

Name Type Description Default
nb_path Path

Path of notebook file

required
exprs List[str]

Expression with check to be evaluated on notebook

required
verbose bool

Log failed tests for notebook

False

Returns:

Type Description
bool

Evaluated expression cast as a bool

Source code in databooks/affirm.py
def affirm(nb_path: Path, exprs: List[str], verbose: bool = False) -> bool:
    """
    Return whether notebook passed all checks (expressions).

    :param nb_path: Path of notebook file
    :param exprs: Expression with check to be evaluated on notebook
    :param verbose: Log failed tests for notebook
    :return: Evaluated expression cast as a `bool`
    """
    if verbose:
        set_verbose(logger)

    nb = JupyterNotebook.parse_file(nb_path)
    variables: Dict[str, Any] = {
        "nb": nb,
        "raw_cells": [c for c in nb.cells if c.cell_type == "raw"],
        "md_cells": [c for c in nb.cells if c.cell_type == "markdown"],
        "code_cells": [c for c in nb.cells if c.cell_type == "code"],
        "exec_cells": [
            c
            for c in nb.cells
            if c.cell_type == "code" and c.execution_count is not None
        ],
    }
    databooks_parser = DatabooksParser(**variables)
    is_ok = [bool(databooks_parser.safe_eval(expr)) for expr in exprs]
    n_fail = sum([not ok for ok in is_ok])

    logger.info(f"{nb_path} failed {n_fail} of {len(is_ok)} checks.")
    logger.debug(
        str(nb_path)
        + (
            f" failed {list(compress(exprs, (not ok for ok in is_ok)))}."
            if n_fail > 0
            else " succeeded all checks."
        )
    )
    return all(is_ok)

affirm_all(nb_paths, *, progress_callback=<function <lambda> at 0x7f85e49d2040>, **affirm_kwargs)

Clear metadata for multiple notebooks at notebooks and cell level.

Parameters:

Name Type Description Default
nb_paths List[pathlib.Path]

Paths of notebooks to assert metadata

required
progress_callback Callable[[], NoneType]

Callback function to report progress

<function <lambda> at 0x7f85e49d2040>
affirm_kwargs Any

Keyword arguments to be passed to databooks.affirm.affirm

{}

Returns:

Type Description
List[bool]

Whether the notebooks contained or not the desired metadata

Source code in databooks/affirm.py
def affirm_all(
    nb_paths: List[Path],
    *,
    progress_callback: Callable[[], None] = lambda: None,
    **affirm_kwargs: Any,
) -> List[bool]:
    """
    Clear metadata for multiple notebooks at notebooks and cell level.

    :param nb_paths: Paths of notebooks to assert metadata
    :param progress_callback: Callback function to report progress
    :param affirm_kwargs: Keyword arguments to be passed to `databooks.affirm.affirm`
    :return: Whether the notebooks contained or not the desired metadata
    """
    checks = []
    for nb_path in nb_paths:
        checks.append(affirm(nb_path, **affirm_kwargs))
        progress_callback()
    return checks