Skip to content

Notebooks

Data models - Jupyter Notebooks and components.

Cells (RootModel[Sequence[~T]], BaseCells)

Similar to list, with - operator using difflib.SequenceMatcher.

Source code in databooks/data_models/notebook.py
class Cells(RootModel[Sequence[T]], BaseCells[T]):
    """Similar to `list`, with `-` operator using `difflib.SequenceMatcher`."""

    root: Sequence[T]

    @property
    def data(self) -> List[T]:  # type: ignore
        """Define property `data` required for `collections.UserList` class."""
        return list(self.root)

    def __iter__(self) -> Generator[Any, None, None]:
        """Use list property as iterable."""
        return (el for el in self.data)

    def __sub__(self: Cells[Cell], other: Cells[Cell]) -> Cells[CellsPair]:
        """Return the difference using `difflib.SequenceMatcher`."""
        if type(self) != type(other):
            raise TypeError(
                f"Unsupported operand types for `-`: `{type(self).__name__}` and"
                f" `{type(other).__name__}`"
            )

        # By setting the context to the max number of cells and using
        #  `pathlib.SequenceMatcher.get_grouped_opcodes` we essentially get the same
        #  result as `pathlib.SequenceMatcher.get_opcodes` but in smaller chunks
        n_context = max(len(self), len(other))
        diff_opcodes = list(
            SequenceMatcher(
                isjunk=None, a=self, b=other, autojunk=False
            ).get_grouped_opcodes(n_context)
        )

        if len(diff_opcodes) > 1:
            raise RuntimeError(
                "Expected one group for opcodes when context size is "
                f" {n_context} for {len(self)} and {len(other)} cells in"
                " notebooks."
            )

        return Cells[CellsPair](
            [
                # https://github.com/python/mypy/issues/9459
                tuple((self.data[i1:j1], other.data[i2:j2]))  # type: ignore
                for _, i1, j1, i2, j2 in chain.from_iterable(diff_opcodes)
            ]
        )

    def __rich_console__(
        self, console: Console, options: ConsoleOptions
    ) -> RenderResult:
        """Rich display of all cells in notebook."""
        yield from self._get_renderables(expand=True, width=options.max_width // 3)

    def _get_renderables(self, **wrap_cols_kwargs: Any) -> Iterable[RenderableType]:
        """Get the Rich renderables, depending on whether `Cells` is a diff or not."""
        if all(isinstance(el, tuple) for el in self.data):
            return chain.from_iterable(
                Cells.wrap_cols(val[0], val[1], **wrap_cols_kwargs)
                if val[0] != val[1]
                else val[0]
                for val in cast(List[CellsPair], self.data)
            )
        return cast(List[Cell], self.data)

    @classmethod
    def __get_validators__(cls) -> Generator[Callable[..., Any], None, None]:
        """Get validators for custom class."""
        yield cls.validate

    @classmethod
    def validate(cls, v: List[T]) -> Cells[T]:
        """Ensure object is custom defined container."""
        if not isinstance(v, cls):
            return cls(v)
        else:
            return v

    @classmethod
    def wrap_cols(
        cls, first_cells: List[Cell], last_cells: List[Cell], **cols_kwargs: Any
    ) -> Sequence[Columns]:
        """Wrap the first and second cells into colunmns for iterable."""
        _empty = [Panel(Text("<None>", justify="center"), box=box.SIMPLE)]
        _first = Group(*first_cells or _empty)
        _last = Group(*last_cells or _empty)
        return [Columns([_first, _last], **cols_kwargs)]

    @staticmethod
    def wrap_git(
        first_cells: List[Cell],
        last_cells: List[Cell],
        hash_first: Optional[str] = None,
        hash_last: Optional[str] = None,
    ) -> Sequence[Cell]:
        """Wrap git-diff cells in existing notebook."""
        return [
            MarkdownCell(
                metadata=CellMetadata(git_hash=hash_first),
                source=[f"`<<<<<<< {hash_first}`"],
            ),
            *first_cells,
            MarkdownCell(
                source=["`=======`"],
                metadata=CellMetadata(),
            ),
            *last_cells,
            MarkdownCell(
                metadata=CellMetadata(git_hash=hash_last),
                source=[f"`>>>>>>> {hash_last}`"],
            ),
        ]

    def resolve(
        self: Cells[CellsPair],
        *,
        keep_first_cells: Optional[bool] = None,
        first_id: Optional[str] = None,
        last_id: Optional[str] = None,
        **kwargs: Any,
    ) -> List[Cell]:
        """
        Resolve differences between `databooks.data_models.notebook.Cells`.

        :param keep_first_cells: Whether to keep the cells of the first notebook or not.
         If `None`, then keep both wrapping the git-diff tags
        :param first_id: Git hash of first file in conflict
        :param last_id: Git hash of last file in conflict
        :param kwargs: (Unused) keyword arguments to keep compatibility with
         `databooks.data_models.base.resolve`
        :return: List of cells
        """
        if keep_first_cells is not None:
            return list(
                chain.from_iterable(pairs[not keep_first_cells] for pairs in self.data)
            )
        return list(
            chain.from_iterable(
                Cells.wrap_git(
                    first_cells=val[0],
                    last_cells=val[1],
                    hash_first=first_id,
                    hash_last=last_id,
                )
                if val[0] != val[1]
                else val[0]
                for val in self.data
            )
        )

data: List[T] property readonly

Define property data required for collections.UserList class.

__get_validators__() classmethod special

Get validators for custom class.

Source code in databooks/data_models/notebook.py
@classmethod
def __get_validators__(cls) -> Generator[Callable[..., Any], None, None]:
    """Get validators for custom class."""
    yield cls.validate

__iter__(self) special

Use list property as iterable.

Source code in databooks/data_models/notebook.py
def __iter__(self) -> Generator[Any, None, None]:
    """Use list property as iterable."""
    return (el for el in self.data)

__rich_console__(self, console, options) special

Rich display of all cells in notebook.

Source code in databooks/data_models/notebook.py
def __rich_console__(
    self, console: Console, options: ConsoleOptions
) -> RenderResult:
    """Rich display of all cells in notebook."""
    yield from self._get_renderables(expand=True, width=options.max_width // 3)

__sub__(self, other) special

Return the difference using difflib.SequenceMatcher.

Source code in databooks/data_models/notebook.py
def __sub__(self: Cells[Cell], other: Cells[Cell]) -> Cells[CellsPair]:
    """Return the difference using `difflib.SequenceMatcher`."""
    if type(self) != type(other):
        raise TypeError(
            f"Unsupported operand types for `-`: `{type(self).__name__}` and"
            f" `{type(other).__name__}`"
        )

    # By setting the context to the max number of cells and using
    #  `pathlib.SequenceMatcher.get_grouped_opcodes` we essentially get the same
    #  result as `pathlib.SequenceMatcher.get_opcodes` but in smaller chunks
    n_context = max(len(self), len(other))
    diff_opcodes = list(
        SequenceMatcher(
            isjunk=None, a=self, b=other, autojunk=False
        ).get_grouped_opcodes(n_context)
    )

    if len(diff_opcodes) > 1:
        raise RuntimeError(
            "Expected one group for opcodes when context size is "
            f" {n_context} for {len(self)} and {len(other)} cells in"
            " notebooks."
        )

    return Cells[CellsPair](
        [
            # https://github.com/python/mypy/issues/9459
            tuple((self.data[i1:j1], other.data[i2:j2]))  # type: ignore
            for _, i1, j1, i2, j2 in chain.from_iterable(diff_opcodes)
        ]
    )

resolve(self, *, keep_first_cells=None, first_id=None, last_id=None, **kwargs)

Resolve differences between databooks.data_models.notebook.Cells.

Parameters:

Name Type Description Default
keep_first_cells Optional[bool]

Whether to keep the cells of the first notebook or not. If None, then keep both wrapping the git-diff tags

None
first_id Optional[str]

Git hash of first file in conflict

None
last_id Optional[str]

Git hash of last file in conflict

None
kwargs Any

(Unused) keyword arguments to keep compatibility with databooks.data_models.base.resolve

{}

Returns:

Type Description
List[Cell]

List of cells

Source code in databooks/data_models/notebook.py
def resolve(
    self: Cells[CellsPair],
    *,
    keep_first_cells: Optional[bool] = None,
    first_id: Optional[str] = None,
    last_id: Optional[str] = None,
    **kwargs: Any,
) -> List[Cell]:
    """
    Resolve differences between `databooks.data_models.notebook.Cells`.

    :param keep_first_cells: Whether to keep the cells of the first notebook or not.
     If `None`, then keep both wrapping the git-diff tags
    :param first_id: Git hash of first file in conflict
    :param last_id: Git hash of last file in conflict
    :param kwargs: (Unused) keyword arguments to keep compatibility with
     `databooks.data_models.base.resolve`
    :return: List of cells
    """
    if keep_first_cells is not None:
        return list(
            chain.from_iterable(pairs[not keep_first_cells] for pairs in self.data)
        )
    return list(
        chain.from_iterable(
            Cells.wrap_git(
                first_cells=val[0],
                last_cells=val[1],
                hash_first=first_id,
                hash_last=last_id,
            )
            if val[0] != val[1]
            else val[0]
            for val in self.data
        )
    )

validate(v) classmethod

Ensure object is custom defined container.

Source code in databooks/data_models/notebook.py
@classmethod
def validate(cls, v: List[T]) -> Cells[T]:
    """Ensure object is custom defined container."""
    if not isinstance(v, cls):
        return cls(v)
    else:
        return v

wrap_cols(first_cells, last_cells, **cols_kwargs) classmethod

Wrap the first and second cells into colunmns for iterable.

Source code in databooks/data_models/notebook.py
@classmethod
def wrap_cols(
    cls, first_cells: List[Cell], last_cells: List[Cell], **cols_kwargs: Any
) -> Sequence[Columns]:
    """Wrap the first and second cells into colunmns for iterable."""
    _empty = [Panel(Text("<None>", justify="center"), box=box.SIMPLE)]
    _first = Group(*first_cells or _empty)
    _last = Group(*last_cells or _empty)
    return [Columns([_first, _last], **cols_kwargs)]

wrap_git(first_cells, last_cells, hash_first=None, hash_last=None) staticmethod

Wrap git-diff cells in existing notebook.

Source code in databooks/data_models/notebook.py
@staticmethod
def wrap_git(
    first_cells: List[Cell],
    last_cells: List[Cell],
    hash_first: Optional[str] = None,
    hash_last: Optional[str] = None,
) -> Sequence[Cell]:
    """Wrap git-diff cells in existing notebook."""
    return [
        MarkdownCell(
            metadata=CellMetadata(git_hash=hash_first),
            source=[f"`<<<<<<< {hash_first}`"],
        ),
        *first_cells,
        MarkdownCell(
            source=["`=======`"],
            metadata=CellMetadata(),
        ),
        *last_cells,
        MarkdownCell(
            metadata=CellMetadata(git_hash=hash_last),
            source=[f"`>>>>>>> {hash_last}`"],
        ),
    ]

JupyterNotebook (DatabooksBase)

Jupyter notebook. Extra fields yield invalid notebook.

Source code in databooks/data_models/notebook.py
class JupyterNotebook(DatabooksBase, extra=Extra.forbid):
    """Jupyter notebook. Extra fields yield invalid notebook."""

    nbformat: int
    nbformat_minor: int
    metadata: NotebookMetadata
    cells: Cells[Cell]

    def __rich_console__(
        self, console: Console, options: ConsoleOptions
    ) -> RenderResult:
        """Rich display notebook."""

        def _rich(kernel: str) -> Text:
            """Display with `kernel` theme, horizontal padding and right-justified."""
            return Text(kernel, style="kernel", justify="right")

        kernelspec = self.metadata.dict().get("kernelspec", {})
        if isinstance(kernelspec, tuple):  # check if this is a `DiffCells`
            kernelspec = tuple(
                ks or {"language": "text", "display_name": "null"} for ks in kernelspec
            )
            lang_first, lang_last = (ks.get("language", "text") for ks in kernelspec)
            nb_lang = lang_first if lang_first == lang_last else "text"
            if any("display_name" in ks.keys() for ks in kernelspec):
                kernel_first, kernel_last = [
                    _rich(ks["display_name"]) for ks in kernelspec
                ]
                yield Columns(
                    [kernel_first, kernel_last],
                    expand=True,
                    width=options.max_width // 3,
                ) if kernel_first != kernel_last else kernel_first
        else:
            nb_lang = kernelspec.get("language", "text")
            if "display_name" in kernelspec.keys():
                yield _rich(kernelspec["display_name"])

        for cell in self.cells:
            if isinstance(cell, CodeCell):
                cell.metadata = CellMetadata(**cell.metadata.dict(), lang=nb_lang)
        yield self.cells

    @classmethod
    def parse_file(cls, path: Path | str, **parse_kwargs: Any) -> JupyterNotebook:
        """Parse notebook from a path."""
        content_arg = parse_kwargs.pop("content_type", None)
        if content_arg is not None:
            raise ValueError(
                f"Value of `content_type` must be `json` (default), got `{content_arg}`"
            )

        path = Path(path) if not isinstance(path, Path) else path
        return JupyterNotebook.model_validate_json(json_data=path.read_text())

    def write(
        self, path: Path | str, overwrite: bool = False, **json_kwargs: Any
    ) -> None:
        """Write notebook to disk."""
        path = Path(path) if not isinstance(path, Path) else path
        json_kwargs = {"indent": 2, **json_kwargs}
        if path.is_file() and not overwrite:
            raise ValueError(
                f"File exists at {path} exists. Specify `overwrite = True`."
            )

        self.__class__.model_validate(self.dict())

        with path.open("w") as f:
            json.dump(self.dict(), fp=f, **json_kwargs)

    def clear_metadata(
        self,
        *,
        notebook_metadata_keep: Sequence[str] = None,
        notebook_metadata_remove: Sequence[str] = None,
        **cell_kwargs: Any,
    ) -> None:
        """
        Clear notebook and cell metadata.

        :param notebook_metadata_keep: Metadata values to keep - simply pass an empty
         sequence (i.e.: `()`) to remove all extra fields.
        :param notebook_metadata_remove: Metadata values to remove
        :param cell_kwargs: keyword arguments to be passed to each cell's
         `databooks.data_models.cell.BaseCell.clear_metadata`
        :return:
        """
        nargs = sum(
            (notebook_metadata_keep is not None, notebook_metadata_remove is not None)
        )
        if nargs != 1:
            raise ValueError(
                "Exactly one of `notebook_metadata_keep` or `notebook_metadata_remove`"
                f" must be passed, got {nargs} arguments."
            )
        if notebook_metadata_keep is not None:
            notebook_metadata_remove = tuple(
                field
                for field, _ in self.metadata
                if field not in notebook_metadata_keep
            )
        self.metadata.remove_fields(notebook_metadata_remove)  # type: ignore

        if len(cell_kwargs) > 0:
            _clean_cells = deepcopy(self.cells)
            for cell in _clean_cells:
                cell.clear_fields(**cell_kwargs)
            self.cells = _clean_cells

__rich_console__(self, console, options) special

Rich display notebook.

Source code in databooks/data_models/notebook.py
def __rich_console__(
    self, console: Console, options: ConsoleOptions
) -> RenderResult:
    """Rich display notebook."""

    def _rich(kernel: str) -> Text:
        """Display with `kernel` theme, horizontal padding and right-justified."""
        return Text(kernel, style="kernel", justify="right")

    kernelspec = self.metadata.dict().get("kernelspec", {})
    if isinstance(kernelspec, tuple):  # check if this is a `DiffCells`
        kernelspec = tuple(
            ks or {"language": "text", "display_name": "null"} for ks in kernelspec
        )
        lang_first, lang_last = (ks.get("language", "text") for ks in kernelspec)
        nb_lang = lang_first if lang_first == lang_last else "text"
        if any("display_name" in ks.keys() for ks in kernelspec):
            kernel_first, kernel_last = [
                _rich(ks["display_name"]) for ks in kernelspec
            ]
            yield Columns(
                [kernel_first, kernel_last],
                expand=True,
                width=options.max_width // 3,
            ) if kernel_first != kernel_last else kernel_first
    else:
        nb_lang = kernelspec.get("language", "text")
        if "display_name" in kernelspec.keys():
            yield _rich(kernelspec["display_name"])

    for cell in self.cells:
        if isinstance(cell, CodeCell):
            cell.metadata = CellMetadata(**cell.metadata.dict(), lang=nb_lang)
    yield self.cells

clear_metadata(self, *, notebook_metadata_keep=None, notebook_metadata_remove=None, **cell_kwargs)

Clear notebook and cell metadata.

Parameters:

Name Type Description Default
notebook_metadata_keep Sequence[str]

Metadata values to keep - simply pass an empty sequence (i.e.: ()) to remove all extra fields.

None
notebook_metadata_remove Sequence[str]

Metadata values to remove

None
cell_kwargs Any

keyword arguments to be passed to each cell's databooks.data_models.cell.BaseCell.clear_metadata

{}

Returns:

Type Description
None
Source code in databooks/data_models/notebook.py
def clear_metadata(
    self,
    *,
    notebook_metadata_keep: Sequence[str] = None,
    notebook_metadata_remove: Sequence[str] = None,
    **cell_kwargs: Any,
) -> None:
    """
    Clear notebook and cell metadata.

    :param notebook_metadata_keep: Metadata values to keep - simply pass an empty
     sequence (i.e.: `()`) to remove all extra fields.
    :param notebook_metadata_remove: Metadata values to remove
    :param cell_kwargs: keyword arguments to be passed to each cell's
     `databooks.data_models.cell.BaseCell.clear_metadata`
    :return:
    """
    nargs = sum(
        (notebook_metadata_keep is not None, notebook_metadata_remove is not None)
    )
    if nargs != 1:
        raise ValueError(
            "Exactly one of `notebook_metadata_keep` or `notebook_metadata_remove`"
            f" must be passed, got {nargs} arguments."
        )
    if notebook_metadata_keep is not None:
        notebook_metadata_remove = tuple(
            field
            for field, _ in self.metadata
            if field not in notebook_metadata_keep
        )
    self.metadata.remove_fields(notebook_metadata_remove)  # type: ignore

    if len(cell_kwargs) > 0:
        _clean_cells = deepcopy(self.cells)
        for cell in _clean_cells:
            cell.clear_fields(**cell_kwargs)
        self.cells = _clean_cells

parse_file(path, **parse_kwargs) classmethod

Parse notebook from a path.

Source code in databooks/data_models/notebook.py
@classmethod
def parse_file(cls, path: Path | str, **parse_kwargs: Any) -> JupyterNotebook:
    """Parse notebook from a path."""
    content_arg = parse_kwargs.pop("content_type", None)
    if content_arg is not None:
        raise ValueError(
            f"Value of `content_type` must be `json` (default), got `{content_arg}`"
        )

    path = Path(path) if not isinstance(path, Path) else path
    return JupyterNotebook.model_validate_json(json_data=path.read_text())

write(self, path, overwrite=False, **json_kwargs)

Write notebook to disk.

Source code in databooks/data_models/notebook.py
def write(
    self, path: Path | str, overwrite: bool = False, **json_kwargs: Any
) -> None:
    """Write notebook to disk."""
    path = Path(path) if not isinstance(path, Path) else path
    json_kwargs = {"indent": 2, **json_kwargs}
    if path.is_file() and not overwrite:
        raise ValueError(
            f"File exists at {path} exists. Specify `overwrite = True`."
        )

    self.__class__.model_validate(self.dict())

    with path.open("w") as f:
        json.dump(self.dict(), fp=f, **json_kwargs)

NotebookMetadata (DatabooksBase)

Notebook metadata. Empty by default but can accept extra fields.

Source code in databooks/data_models/notebook.py
class NotebookMetadata(DatabooksBase):
    """Notebook metadata. Empty by default but can accept extra fields."""