From 28c44450da07ee9100ab12af35e238c91bf1bd43 Mon Sep 17 00:00:00 2001 From: lightningforever Date: Thu, 5 Jan 2023 11:11:41 +0100 Subject: [PATCH 01/21] rebased changes --- requirements/fabric/test.txt | 1 + src/lightning_fabric/CHANGELOG.md | 6 + src/lightning_fabric/fabric.py | 43 ++- src/lightning_fabric/loggers/__init__.py | 14 + src/lightning_fabric/loggers/logger.py | 136 ++++++++ src/lightning_fabric/loggers/tensorboard.py | 310 ++++++++++++++++++ tests/tests_fabric/conftest.py | 1 + tests/tests_fabric/loggers/__init__.py | 0 .../tests_fabric/loggers/test_tensorboard.py | 229 +++++++++++++ tests/tests_fabric/test_fabric.py | 54 ++- 10 files changed, 792 insertions(+), 2 deletions(-) create mode 100644 src/lightning_fabric/loggers/__init__.py create mode 100644 src/lightning_fabric/loggers/logger.py create mode 100644 src/lightning_fabric/loggers/tensorboard.py create mode 100644 tests/tests_fabric/loggers/__init__.py create mode 100644 tests/tests_fabric/loggers/test_tensorboard.py diff --git a/requirements/fabric/test.txt b/requirements/fabric/test.txt index abb2cf558488a..c4e1cf8866576 100644 --- a/requirements/fabric/test.txt +++ b/requirements/fabric/test.txt @@ -4,3 +4,4 @@ pytest==7.2.0 pytest-cov==4.0.0 pre-commit==2.20.0 click==8.1.3 +tensorboard>=2.9.1, <2.12.0 diff --git a/src/lightning_fabric/CHANGELOG.md b/src/lightning_fabric/CHANGELOG.md index afb900294f64b..0a51fc3858cc4 100644 --- a/src/lightning_fabric/CHANGELOG.md +++ b/src/lightning_fabric/CHANGELOG.md @@ -30,6 +30,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added support for managing callbacks via `Fabric(callbacks=...)` and emitting events through `Fabric.call()` ([#16074](https://github.com/Lightning-AI/lightning/issues/16074)) +- Added Logger support ([#16121](https://github.com/Lightning-AI/lightning/issues/16121)) + * Added `Fabric(loggers=...)` to support different Logger frameworks in Fabric + * Added `Fabric.log` for logging scalars using multiple loggers + * Added `Fabric.log_dict` for logging a dictionary of multiple metrics at once + * Added `Fabric.loggers` and `Fabric.logger` attributes to access the individual logger instances + - Added support for a consistent `.zero_grad(set_to_none=...)` on the wrapped optimizer regardless of which strategy is used ([#16275](https://github.com/Lightning-AI/lightning/issues/16275)) diff --git a/src/lightning_fabric/fabric.py b/src/lightning_fabric/fabric.py index 75ba50e2b5dfa..fbfe572ebbdb5 100644 --- a/src/lightning_fabric/fabric.py +++ b/src/lightning_fabric/fabric.py @@ -22,10 +22,13 @@ import torch.nn as nn from lightning_utilities.core.apply_func import apply_to_collection from lightning_utilities.core.overrides import is_overridden +from lightning_utilities.core.rank_zero import rank_zero_warn from torch import Tensor from torch.optim import Optimizer from torch.utils.data import BatchSampler, DataLoader, DistributedSampler, RandomSampler, SequentialSampler +from lightning_fabric.loggers import Logger + from lightning_fabric.plugins import Precision # avoid circular imports: # isort: split from lightning_fabric.accelerators.accelerator import Accelerator from lightning_fabric.connector import _Connector, _PLUGIN_INPUT, _PRECISION_INPUT @@ -47,7 +50,6 @@ has_iterable_dataset, ) from lightning_fabric.utilities.distributed import DistributedSamplerWrapper -from lightning_fabric.utilities.rank_zero import rank_zero_warn from lightning_fabric.utilities.seed import seed_everything from lightning_fabric.utilities.warnings import PossibleUserWarning from lightning_fabric.wrappers import _FabricDataLoader, _FabricModule, _FabricOptimizer @@ -74,6 +76,10 @@ class Fabric: precision: Double precision (``64``), full precision (``32``), half precision (``16``), or bfloat16 precision (``"bf16"``). plugins: One or several custom plugins + callbacks: A single callback or a list of callbacks. A callback can contain any arbitrary methods that + can be invoked through :meth:`lightning_fabric.fabric.Fabric.call` by the user. + loggers: A single logger or a list of loggers. See :meth:`lightning_fabric.fabric.Fabric.log` for more + information. """ def __init__( @@ -85,6 +91,7 @@ def __init__( precision: _PRECISION_INPUT = 32, plugins: Optional[Union[_PLUGIN_INPUT, List[_PLUGIN_INPUT]]] = None, callbacks: Optional[Union[List[Any], Any]] = None, + loggers: Optional[Union[Logger, List[Logger]]] = None, ) -> None: self._connector = _Connector( accelerator=accelerator, @@ -99,6 +106,8 @@ def __init__( self._precision: Precision = self._strategy.precision callbacks = callbacks if callbacks is not None else [] self._callbacks = callbacks if isinstance(callbacks, list) else [callbacks] + loggers = loggers if loggers is not None else [] + self._loggers = loggers if isinstance(loggers, list) else [loggers] self._models_setup: int = 0 self._prepare_run_method() @@ -148,6 +157,16 @@ def is_global_zero(self) -> bool: """Whether this rank is rank zero.""" return self._strategy.is_global_zero + @property + def loggers(self) -> List[Logger]: + """Returns all loggers passed to Fabric.""" + return self._loggers + + @property + def logger(self) -> Logger: + """Returns the first logger in the list passed to Fabric, which is considered the main logger.""" + return self._loggers[0] + def run(self, *args: Any, **kwargs: Any) -> Any: """All the code inside this run method gets accelerated by Fabric. @@ -573,6 +592,28 @@ def on_train_epoch_end(self, results): # method(self, *args, y=1) # method(self, *args, **kwargs) + def log(self, name: str, value: Any, step: Optional[int] = None) -> None: + """Log a scalar to all loggers that were added to Fabric. + + Args: + name: The name of the metric to log. + value: The metric value to collect. + step: Optional step number. Most Logger implementations auto-increment the step value by one with every + log call. You can specify your own value here. + """ + self.log_dict(metrics={name: value}, step=step) + + def log_dict(self, metrics: Dict[str, Any], step: Optional[int] = None) -> None: + """Log multiple scalars at once to all loggers that were added to Fabric. + + Args: + metrics: A dictionary where the key is the name of the metric and the value the scalar to be logged. + step: Optional step number. Most Logger implementations auto-increment this value by one with every + log call. You can specify your own value here. + """ + for logger in self._loggers: + logger.log_metrics(metrics=metrics, step=step) + @staticmethod def seed_everything(seed: Optional[int] = None, workers: Optional[bool] = None) -> int: """Helper function to seed everything without explicitly importing Lightning. diff --git a/src/lightning_fabric/loggers/__init__.py b/src/lightning_fabric/loggers/__init__.py new file mode 100644 index 0000000000000..03c21d71f8304 --- /dev/null +++ b/src/lightning_fabric/loggers/__init__.py @@ -0,0 +1,14 @@ +# Copyright The PyTorch Lightning team. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from lightning_fabric.loggers.logger import Logger # noqa: F401 +from lightning_fabric.loggers.tensorboard import TensorBoardLogger # noqa: F401 diff --git a/src/lightning_fabric/loggers/logger.py b/src/lightning_fabric/loggers/logger.py new file mode 100644 index 0000000000000..a66023eb747d1 --- /dev/null +++ b/src/lightning_fabric/loggers/logger.py @@ -0,0 +1,136 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Abstract base class used to build new loggers.""" + +from abc import ABC, abstractmethod +from argparse import Namespace +from functools import wraps +from typing import Any, Callable, Dict, Optional, Union + +from torch import Tensor +from torch.nn import Module + +from lightning_fabric.utilities.rank_zero import rank_zero_only + + +class Logger(ABC): + """Base class for experiment loggers.""" + + @property + @abstractmethod + def name(self) -> Optional[str]: + """Return the experiment name.""" + + @property + @abstractmethod + def version(self) -> Optional[Union[int, str]]: + """Return the experiment version.""" + + @property + def root_dir(self) -> Optional[str]: + """Return the root directory where all versions of an experiment get saved, or `None` if the logger does + not save data locally.""" + return None + + @property + def log_dir(self) -> Optional[str]: + """Return directory the current version of the experiment gets saved, or `None` if the logger does not save + data locally.""" + return None + + @property + def group_separator(self) -> str: + """Return the default separator used by the logger to group the data into subfolders.""" + return "/" + + @abstractmethod + def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None: + """Records metrics. This method logs metrics as soon as it received them. + + Args: + metrics: Dictionary with metric names as keys and measured quantities as values + step: Step number at which the metrics should be recorded + """ + pass + + @abstractmethod + def log_hyperparams(self, params: Union[Dict[str, Any], Namespace], *args: Any, **kwargs: Any) -> None: + """Record hyperparameters. + + Args: + params: :class:`~argparse.Namespace` or `Dict` containing the hyperparameters + args: Optional positional arguments, depends on the specific logger being used + kwargs: Optional keyword arguments, depends on the specific logger being used + """ + + def log_graph(self, model: Module, input_array: Optional[Tensor] = None) -> None: + """Record model graph. + + Args: + model: the model with an implementation of ``forward``. + input_array: input passes to `model.forward` + """ + pass + + def save(self) -> None: + """Save log data.""" + + def finalize(self, status: str) -> None: + """Do any processing that is necessary to finalize an experiment. + + Args: + status: Status that the experiment finished with (e.g. success, failed, aborted) + """ + self.save() + + +def rank_zero_experiment(fn: Callable) -> Callable: + """Returns the real experiment on rank 0 and otherwise the _DummyExperiment.""" + + @wraps(fn) + def experiment(self) -> Union[Any, _DummyExperiment]: # type: ignore[no-untyped-def] + """ + Note: + ``self`` is a custom logger instance. The loggers typically wrap an ``experiment`` method + with a ``@rank_zero_experiment`` decorator. + + ``Union[Any, _DummyExperiment]`` is used because the wrapped hooks have several return + types that are specific to the custom logger. The return type here can be considered as + ``Union[return type of logger.experiment, _DummyExperiment]``. + """ + + @rank_zero_only + def get_experiment() -> Callable: + return fn(self) + + return get_experiment() or _DummyExperiment() + + return experiment + + +class _DummyExperiment: + """Dummy experiment.""" + + def nop(self, *args: Any, **kw: Any) -> None: + pass + + def __getattr__(self, _: Any) -> Callable: + return self.nop + + def __getitem__(self, idx: int) -> "_DummyExperiment": + # enables self.logger.experiment[0].add_image(...) + return self + + def __setitem__(self, *args: Any, **kwargs: Any) -> None: + pass diff --git a/src/lightning_fabric/loggers/tensorboard.py b/src/lightning_fabric/loggers/tensorboard.py new file mode 100644 index 0000000000000..ca694d9ea30c5 --- /dev/null +++ b/src/lightning_fabric/loggers/tensorboard.py @@ -0,0 +1,310 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import os +from argparse import Namespace +from typing import Any, Dict, Mapping, Optional, TYPE_CHECKING, Union + +import numpy as np +from lightning_utilities.core.imports import RequirementCache +from torch import Tensor +from torch.nn import Module + +from lightning_fabric.loggers.logger import Logger, rank_zero_experiment +from lightning_fabric.utilities.cloud_io import get_filesystem +from lightning_fabric.utilities.logger import _add_prefix, _convert_params, _flatten_dict +from lightning_fabric.utilities.logger import _sanitize_params as _utils_sanitize_params +from lightning_fabric.utilities.rank_zero import rank_zero_only, rank_zero_warn +from lightning_fabric.utilities.types import _PATH + +log = logging.getLogger(__name__) + +_TENSORBOARD_AVAILABLE = RequirementCache("tensorboard") +_TENSORBOARDX_AVAILABLE = RequirementCache("tensorboardX") +if TYPE_CHECKING: + # assumes at least one will be installed when type checking + if _TENSORBOARD_AVAILABLE: + from torch.utils.tensorboard import SummaryWriter + else: + from tensorboardX import SummaryWriter # type: ignore[no-redef] + + +class TensorBoardLogger(Logger): + r""" + Log to local file system in `TensorBoard `_ format. + + Implemented using :class:`~tensorboardX.SummaryWriter`. Logs are saved to + ``os.path.join(root_dir, name, version)``. This is the recommended logger in Lightning Fabric. + + Args: + root_dir: The root directory in which all your experiments with different names and versions will be stored. + name: Experiment name. Defaults to ``'lightning_logs'``. If it is the empty string then no per-experiment + subdirectory is used. + version: Experiment version. If version is not specified the logger inspects the save + directory for existing versions, then automatically assigns the next available version. + If it is a string then it is used as the run-specific subdirectory name, + otherwise ``'version_${version}'`` is used. + default_hp_metric: Enables a placeholder metric with key `hp_metric` when `log_hyperparams` is + called without a metric (otherwise calls to ``log_hyperparams`` without a metric are ignored). + prefix: A string to put at the beginning of all metric keys. + sub_dir: Sub-directory to group TensorBoard logs. If a ``sub_dir`` argument is passed + then logs are saved in ``/root_dir/name/version/sub_dir/``. Defaults to ``None`` in which case + logs are saved in ``/root_dir/name/version/``. + \**kwargs: Additional arguments used by :class:`tensorboardX.SummaryWriter` can be passed as keyword + arguments in this logger. To automatically flush to disk, `max_queue` sets the size + of the queue for pending logs before flushing. `flush_secs` determines how many seconds + elapses before flushing. + + + Example:: + + from lightning.fabric.loggers import TensorBoardLogger + + logger = TensorBoardLogger("path/to/logs/rot", name="my_model") + logger.log_hyperparams({"epochs": 5, "optimizer": "Adam"}) + logger.log_metrics({"acc": 0.75}) + logger.finalize("success") + """ + LOGGER_JOIN_CHAR = "-" + + def __init__( + self, + root_dir: _PATH, + name: Optional[str] = "lightning_logs", + version: Optional[Union[int, str]] = None, + default_hp_metric: bool = True, + prefix: str = "", + sub_dir: Optional[_PATH] = None, + **kwargs: Any, + ): + if not _TENSORBOARD_AVAILABLE and not _TENSORBOARDX_AVAILABLE: + raise ModuleNotFoundError( + "Neither `tensorboard` nor `tensorboardX` is available. Try `pip install`ing either." + ) + super().__init__() + root_dir = os.fspath(root_dir) + self._root_dir = root_dir + self._name = name or "" + self._version = version + self._sub_dir = None if sub_dir is None else os.fspath(sub_dir) + + self._default_hp_metric = default_hp_metric + self._prefix = prefix + self._fs = get_filesystem(root_dir) + + self._experiment: Optional["SummaryWriter"] = None + self._kwargs = kwargs + + @property + def name(self) -> str: + """Get the name of the experiment. + + Returns: + The name of the experiment. + """ + return self._name + + @property + def version(self) -> Union[int, str]: + """Get the experiment version. + + Returns: + The experiment version if specified else the next version. + """ + if self._version is None: + self._version = self._get_next_version() + return self._version + + @property + def root_dir(self) -> str: + """Gets the save directory where the TensorBoard experiments are saved. + + Returns: + The local path to the save directory where the TensorBoard experiments are saved. + """ + return self._root_dir + + @property + def log_dir(self) -> str: + """The directory for this run's tensorboard checkpoint. + + By default, it is named ``'version_${self.version}'`` but it can be overridden by passing a string value for the + constructor's version parameter instead of ``None`` or an int. + """ + version = self.version if isinstance(self.version, str) else f"version_{self.version}" + log_dir = os.path.join(self.root_dir, self.name, version) + if isinstance(self.sub_dir, str): + log_dir = os.path.join(log_dir, self.sub_dir) + log_dir = os.path.expandvars(log_dir) + log_dir = os.path.expanduser(log_dir) + return log_dir + + @property + def sub_dir(self) -> Optional[str]: + """Gets the sub directory where the TensorBoard experiments are saved. + + Returns: + The local path to the sub directory where the TensorBoard experiments are saved. + """ + return self._sub_dir + + @property + @rank_zero_experiment + def experiment(self) -> "SummaryWriter": + """Actual tensorboard object. To use TensorBoard features anywhere in your code, do the following. + + Example:: + + logger.experiment.some_tensorboard_function() + """ + if self._experiment is not None: + return self._experiment + + assert rank_zero_only.rank == 0, "tried to init log dirs in non global_rank=0" + if self.root_dir: + self._fs.makedirs(self.root_dir, exist_ok=True) + + if _TENSORBOARD_AVAILABLE: + from torch.utils.tensorboard import SummaryWriter + else: + from tensorboardX import SummaryWriter # type: ignore[no-redef] + + self._experiment = SummaryWriter(log_dir=self.log_dir, **self._kwargs) + return self._experiment + + @rank_zero_only + def log_metrics(self, metrics: Mapping[str, float], step: Optional[int] = None) -> None: + assert rank_zero_only.rank == 0, "experiment tried to log from global_rank != 0" + + metrics = _add_prefix(metrics, self._prefix, self.LOGGER_JOIN_CHAR) + + for k, v in metrics.items(): + if isinstance(v, Tensor): + v = v.item() + + if isinstance(v, dict): + self.experiment.add_scalars(k, v, step) + else: + try: + self.experiment.add_scalar(k, v, step) + # TODO(fabric): specify the possible exception + except Exception as ex: + m = f"\n you tried to log {v} which is currently not supported. Try a dict or a scalar/tensor." + raise ValueError(m) from ex + + @rank_zero_only + def log_hyperparams( + self, params: Union[Dict[str, Any], Namespace], metrics: Optional[Dict[str, Any]] = None + ) -> None: + """Record hyperparameters. TensorBoard logs with and without saved hyperparameters are incompatible, the + hyperparameters are then not displayed in the TensorBoard. Please delete or move the previously saved logs + to display the new ones with hyperparameters. + + Args: + params: a dictionary-like container with the hyperparameters + metrics: Dictionary with metric names as keys and measured quantities as values + """ + params = _convert_params(params) + + # format params into the suitable for tensorboard + params = _flatten_dict(params) + params = self._sanitize_params(params) + + if metrics is None: + if self._default_hp_metric: + metrics = {"hp_metric": -1} + elif not isinstance(metrics, dict): + metrics = {"hp_metric": metrics} + + if metrics: + self.log_metrics(metrics, 0) + + if _TENSORBOARD_AVAILABLE: + from torch.utils.tensorboard.summary import hparams + else: + from tensorboardX.summary import hparams # type: ignore[no-redef] + + exp, ssi, sei = hparams(params, metrics) + writer = self.experiment._get_file_writer() + writer.add_summary(exp) + writer.add_summary(ssi) + writer.add_summary(sei) + + @rank_zero_only + def log_graph(self, model: Module, input_array: Optional[Tensor] = None) -> None: + model_example_input = getattr(model, "example_input_array", None) + input_array = model_example_input if input_array is None else input_array + + if input_array is None: + rank_zero_warn( + "Could not log computational graph to TensorBoard: The `model.example_input_array` attribute" + " is not set or `input_array` was not given." + ) + elif not isinstance(input_array, (Tensor, tuple)): + rank_zero_warn( + "Could not log computational graph to TensorBoard: The `input_array` or `model.example_input_array`" + f" has type {type(input_array)} which can't be traced by TensorBoard. Make the input array a tuple" + f" representing the positional arguments to the model's `forward()` implementation." + ) + elif callable(getattr(model, "_on_before_batch_transfer", None)) and callable( + getattr(model, "_apply_batch_transfer_handler", None) + ): + # this is probably is a LightningModule + input_array = model._on_before_batch_transfer(input_array) # type: ignore[operator] + input_array = model._apply_batch_transfer_handler(input_array) # type: ignore[operator] + self.experiment.add_graph(model, input_array) + + @rank_zero_only + def save(self) -> None: + self.experiment.flush() + + @rank_zero_only + def finalize(self, status: str) -> None: + if self._experiment is not None: + self.experiment.flush() + self.experiment.close() + + def _get_next_version(self) -> int: + save_dir = os.path.join(self.root_dir, self.name) + + try: + listdir_info = self._fs.listdir(save_dir) + except OSError: + # TODO(fabric): This message can be confusing (did user do something wrong?). Improve it or remove it. + log.warning("Missing logger folder: %s", save_dir) + return 0 + + existing_versions = [] + for listing in listdir_info: + d = listing["name"] + bn = os.path.basename(d) + if self._fs.isdir(d) and bn.startswith("version_"): + dir_ver = bn.split("_")[1].replace("/", "") + existing_versions.append(int(dir_ver)) + if len(existing_versions) == 0: + return 0 + + return max(existing_versions) + 1 + + @staticmethod + def _sanitize_params(params: Dict[str, Any]) -> Dict[str, Any]: + params = _utils_sanitize_params(params) + # logging of arrays with dimension > 1 is not supported, sanitize as string + return {k: str(v) if isinstance(v, (Tensor, np.ndarray)) and v.ndim > 1 else v for k, v in params.items()} + + def __getstate__(self) -> Dict[str, Any]: + state = self.__dict__.copy() + state["_experiment"] = None + return state diff --git a/tests/tests_fabric/conftest.py b/tests/tests_fabric/conftest.py index b650e408a7b60..5aa786d20bf80 100644 --- a/tests/tests_fabric/conftest.py +++ b/tests/tests_fabric/conftest.py @@ -54,6 +54,7 @@ def restore_env_variables(): "RANK", # set by DeepSpeed "POPLAR_ENGINE_OPTIONS", # set by IPUStrategy "CUDA_MODULE_LOADING", # leaked since PyTorch 1.13 + "CRC32C_SW_MODE", # leaked by tensorboardX } leaked_vars.difference_update(allowlist) assert not leaked_vars, f"test is leaking environment variable(s): {set(leaked_vars)}" diff --git a/tests/tests_fabric/loggers/__init__.py b/tests/tests_fabric/loggers/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tests/tests_fabric/loggers/test_tensorboard.py b/tests/tests_fabric/loggers/test_tensorboard.py new file mode 100644 index 0000000000000..a0569248be143 --- /dev/null +++ b/tests/tests_fabric/loggers/test_tensorboard.py @@ -0,0 +1,229 @@ +# Copyright The PyTorch Lightning team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging +import os +from argparse import Namespace +from unittest import mock +from unittest.mock import Mock + +import numpy as np +import pytest +import torch +from tests_fabric.test_fabric import BoringModel + +from lightning_fabric.loggers import TensorBoardLogger +from lightning_fabric.loggers.tensorboard import _TENSORBOARD_AVAILABLE + + +def test_tensorboard_automatic_versioning(tmpdir): + """Verify that automatic versioning works.""" + root_dir = tmpdir / "tb_versioning" + root_dir.mkdir() + (root_dir / "version_0").mkdir() + (root_dir / "version_1").mkdir() + + logger = TensorBoardLogger(root_dir=tmpdir, name="tb_versioning") + assert logger.version == 2 + + +def test_tensorboard_manual_versioning(tmpdir): + """Verify that manual versioning works.""" + root_dir = tmpdir / "tb_versioning" + root_dir.mkdir() + (root_dir / "version_0").mkdir() + (root_dir / "version_1").mkdir() + (root_dir / "version_2").mkdir() + + logger = TensorBoardLogger(root_dir=tmpdir, name="tb_versioning", version=1) + assert logger.version == 1 + + +def test_tensorboard_named_version(tmpdir): + """Verify that manual versioning works for string versions, e.g. '2020-02-05-162402'.""" + name = "tb_versioning" + (tmpdir / name).mkdir() + expected_version = "2020-02-05-162402" + + logger = TensorBoardLogger(root_dir=tmpdir, name=name, version=expected_version) + logger.log_hyperparams({"a": 1, "b": 2, 123: 3, 3.5: 4, 5j: 5}) # Force data to be written + + assert logger.version == expected_version + assert os.listdir(tmpdir / name) == [expected_version] + assert os.listdir(tmpdir / name / expected_version) + + +@pytest.mark.parametrize("name", ["", None]) +def test_tensorboard_no_name(tmpdir, name): + """Verify that None or empty name works.""" + logger = TensorBoardLogger(root_dir=tmpdir, name=name) + logger.log_hyperparams({"a": 1, "b": 2, 123: 3, 3.5: 4, 5j: 5}) # Force data to be written + assert os.path.normpath(logger.root_dir) == tmpdir # use os.path.normpath to handle trailing / + assert os.listdir(tmpdir / "version_0") + + +def test_tensorboard_log_sub_dir(tmpdir): + # no sub_dir specified + root_dir = tmpdir / "logs" + logger = TensorBoardLogger(root_dir, name="name", version="version") + assert logger.log_dir == os.path.join(root_dir, "name", "version") + + # sub_dir specified + logger = TensorBoardLogger(root_dir, name="name", version="version", sub_dir="sub_dir") + assert logger.log_dir == os.path.join(root_dir, "name", "version", "sub_dir") + + +def test_tensorboard_expand_home(): + """Test that the home dir (`~`) gets expanded properly.""" + root_dir = "~/tmp" + explicit_root_dir = os.path.expanduser(root_dir) + logger = TensorBoardLogger(root_dir, name="name", version="version", sub_dir="sub_dir") + assert logger.root_dir == root_dir + assert logger.log_dir == os.path.join(explicit_root_dir, "name", "version", "sub_dir") + + +@mock.patch.dict(os.environ, {"TEST_ENV_DIR": "some_directory"}) +def test_tensorboard_expand_env_vars(): + """Test that the env vars in path names (`$`) get handled properly.""" + test_env_dir = os.environ["TEST_ENV_DIR"] + root_dir = "$TEST_ENV_DIR/tmp" + explicit_root_dir = f"{test_env_dir}/tmp" + logger = TensorBoardLogger(root_dir, name="name", version="version", sub_dir="sub_dir") + assert logger.log_dir == os.path.join(explicit_root_dir, "name", "version", "sub_dir") + + +@pytest.mark.parametrize("step_idx", [10, None]) +def test_tensorboard_log_metrics(tmpdir, step_idx): + logger = TensorBoardLogger(tmpdir) + metrics = {"float": 0.3, "int": 1, "FloatTensor": torch.tensor(0.1), "IntTensor": torch.tensor(1)} + logger.log_metrics(metrics, step_idx) + + +def test_tensorboard_log_hyperparams(tmpdir): + logger = TensorBoardLogger(tmpdir) + hparams = { + "float": 0.3, + "int": 1, + "string": "abc", + "bool": True, + "dict": {"a": {"b": "c"}}, + "list": [1, 2, 3], + "namespace": Namespace(foo=Namespace(bar="buzz")), + "layer": torch.nn.BatchNorm1d, + "tensor": torch.empty(2, 2, 2), + "array": np.empty([2, 2, 2]), + } + logger.log_hyperparams(hparams) + + +def test_tensorboard_log_hparams_and_metrics(tmpdir): + logger = TensorBoardLogger(tmpdir, default_hp_metric=False) + hparams = { + "float": 0.3, + "int": 1, + "string": "abc", + "bool": True, + "dict": {"a": {"b": "c"}}, + "list": [1, 2, 3], + "namespace": Namespace(foo=Namespace(bar="buzz")), + "layer": torch.nn.BatchNorm1d, + "tensor": torch.empty(2, 2, 2), + "array": np.empty([2, 2, 2]), + } + metrics = {"abc": torch.tensor([0.54])} + logger.log_hyperparams(hparams, metrics) + + +@pytest.mark.parametrize("example_input_array", [None, torch.rand(2, 32)]) +def test_tensorboard_log_graph(tmpdir, example_input_array): + """test that log graph works with both model.example_input_array and if array is passed externally.""" + # TODO(fabric): Test both nn.Module and LightningModule + # TODO(fabric): Assert _apply_batch_transfer_handler is calling the batch transfer hooks + model = BoringModel() + if example_input_array is not None: + model.example_input_array = None + + logger = TensorBoardLogger(tmpdir, log_graph=True) + logger.log_graph(model, example_input_array) + + +@pytest.mark.skipif(not _TENSORBOARD_AVAILABLE, reason=str(_TENSORBOARD_AVAILABLE)) +def test_tensorboard_log_graph_warning_no_example_input_array(tmpdir): + """test that log graph throws warning if model.example_input_array is None.""" + model = BoringModel() + model.example_input_array = None + logger = TensorBoardLogger(tmpdir, log_graph=True) + with pytest.warns( + UserWarning, + match="Could not log computational graph to TensorBoard: The `model.example_input_array` .* was not given", + ): + logger.log_graph(model) + + model.example_input_array = dict(x=1, y=2) + with pytest.warns( + UserWarning, match="Could not log computational graph to TensorBoard: .* can't be traced by TensorBoard" + ): + logger.log_graph(model) + + +def test_tensorboard_finalize(monkeypatch, tmpdir): + """Test that the SummaryWriter closes in finalize.""" + if _TENSORBOARD_AVAILABLE: + import torch.utils.tensorboard as tb + else: + import tensorboardX as tb + + monkeypatch.setattr(tb, "SummaryWriter", Mock()) + logger = TensorBoardLogger(root_dir=tmpdir) + assert logger._experiment is None + logger.finalize("any") + + # no log calls, no experiment created -> nothing to flush + logger.experiment.assert_not_called() + + logger = TensorBoardLogger(root_dir=tmpdir) + logger.log_metrics({"flush_me": 11.1}) # trigger creation of an experiment + logger.finalize("any") + + # finalize flushes to experiment directory + logger.experiment.flush.assert_called() + logger.experiment.close.assert_called() + + +@mock.patch("lightning_fabric.loggers.tensorboard.log") +def test_tensorboard_with_symlink(log, tmpdir): + """Tests a specific failure case when tensorboard logger is used with empty name, symbolic link ``save_dir``, + and relative paths.""" + os.chdir(tmpdir) # need to use relative paths + source = os.path.join(".", "lightning_logs") + dest = os.path.join(".", "sym_lightning_logs") + + os.makedirs(source, exist_ok=True) + os.symlink(source, dest) + + logger = TensorBoardLogger(root_dir=dest, name="") + _ = logger.version + + log.warning.assert_not_called() + + +def test_tensorboard_missing_folder_warning(tmpdir, caplog): + """Verify that the logger throws a warning for invalid directory.""" + + name = "fake_dir" + logger = TensorBoardLogger(root_dir=tmpdir, name=name) + + with caplog.at_level(logging.WARNING): + assert logger.version == 0 + + assert "Missing logger folder:" in caplog.text diff --git a/tests/tests_fabric/test_fabric.py b/tests/tests_fabric/test_fabric.py index 8aa40b0359a3f..88ac3d9fb7d82 100644 --- a/tests/tests_fabric/test_fabric.py +++ b/tests/tests_fabric/test_fabric.py @@ -718,7 +718,7 @@ def test_callbacks_input(): assert fabric._callbacks == [callback0, callback1] -def test_call_callbacks(): +def test_call(): """Test that `fabric.call` triggers the callback implementations.""" callback0 = Mock() callback1 = Mock() @@ -748,3 +748,55 @@ def test_call_callbacks(): with pytest.warns(UserWarning, match="Skipping the callback `Mock.not_a_method`"): fabric.call("not_a_method") assert not callback1.mock_calls + + +def test_loggers_input(): + """Test the various ways in which loggers can be registered with Fabric.""" + logger0 = Mock() + logger1 = Mock() + + # no logger + fabric = Fabric(loggers=None) + assert fabric._loggers == [] + fabric = Fabric(loggers=[]) + assert fabric._loggers == [] + + # single logger + fabric = Fabric(loggers=logger0) + assert fabric._loggers == [logger0] + + # multiple loggers + fabric = Fabric(loggers=[logger0, logger1]) + assert fabric._loggers == [logger0, logger1] + + +def test_log(): + """Test that `fabric.log` sends the metrics to each logger.""" + + logger0 = Mock() + logger1 = Mock() + fabric = Fabric(loggers=[logger0, logger1]) + + fabric.log("test", 1) + logger0.log_metrics.assert_called_with(metrics={"test": 1}, step=None) + logger1.log_metrics.assert_called_with(metrics={"test": 1}, step=None) + + fabric.log("test", 2, step=15) + logger0.log_metrics.assert_called_with(metrics={"test": 2}, step=15) + logger1.log_metrics.assert_called_with(metrics={"test": 2}, step=15) + + +def test_log_dict(): + """Test that `fabric.log_dict` sends the metrics dict to each logger.""" + + logger0 = Mock() + logger1 = Mock() + fabric = Fabric(loggers=[logger0, logger1]) + + fabric.log_dict({"foo": 1, "bar": 2}, step=None) + logger0.log_metrics.assert_called_with(metrics={"foo": 1, "bar": 2}, step=None) + logger1.log_metrics.assert_called_with(metrics={"foo": 1, "bar": 2}, step=None) + + fabric.log_dict({"foo": 3, "bar": 4}, step=15) + logger0.log_metrics.assert_called_with(metrics={"foo": 3, "bar": 4}, step=15) + logger1.log_metrics.assert_called_with(metrics={"foo": 3, "bar": 4}, step=15) From a9bedf863f3737aad98fa7029fc7b25c70d3d79b Mon Sep 17 00:00:00 2001 From: lightningforever Date: Thu, 5 Jan 2023 11:47:51 +0100 Subject: [PATCH 02/21] fix --- _notebooks | 2 +- .../utilities/migration/test_migration.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/_notebooks b/_notebooks index db21eb1dc060f..6d5634b794218 160000 --- a/_notebooks +++ b/_notebooks @@ -1 +1 @@ -Subproject commit db21eb1dc060fbba9c0e3336d827235d51cd5bd0 +Subproject commit 6d5634b7942180e6ba4a30bfbd74926d1c22f1eb diff --git a/tests/tests_pytorch/utilities/migration/test_migration.py b/tests/tests_pytorch/utilities/migration/test_migration.py index 1d8224059bfa7..f77c4ea3d220a 100644 --- a/tests/tests_pytorch/utilities/migration/test_migration.py +++ b/tests/tests_pytorch/utilities/migration/test_migration.py @@ -146,3 +146,13 @@ def test_migrate_model_checkpoint_save_on_train_epoch_end_default_collision(): with pytest.warns(PossibleUserWarning, match="callback states in this checkpoint.* colliding with each other"): updated_checkpoint, _ = migrate_checkpoint(old_checkpoint.copy(), target_version="1.9.0") assert updated_checkpoint["callbacks"] == old_checkpoint["callbacks"] # no migration was performed + + +def test_migrate_dropped_apex_amp_state(monkeypatch): + """Test that the migration warns about collisions that would occur if the keys were modified.""" + monkeypatch.setattr(pl, "__version__", "2.0.0") # pretend this version of Lightning is >= 2.0.0 + old_checkpoint = {"amp_scaling_state": {"scale": 1.23}} + _set_version(old_checkpoint, "1.9.0") # pretend a checkpoint prior to 2.0.0 + with pytest.warns(UserWarning, match="checkpoint contains apex AMP data"): + updated_checkpoint, _ = migrate_checkpoint(old_checkpoint.copy()) + assert "amp_scaling_state" not in updated_checkpoint From 055a546ecdc8aa0a1a79ae1f2c9323f67a17b1fc Mon Sep 17 00:00:00 2001 From: lightningforever Date: Thu, 5 Jan 2023 11:53:22 +0100 Subject: [PATCH 03/21] fix --- src/lightning/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lightning/__init__.py b/src/lightning/__init__.py index a0d5835536792..b66e83a1e2b17 100644 --- a/src/lightning/__init__.py +++ b/src/lightning/__init__.py @@ -40,7 +40,6 @@ def _detail(self: Any, message: str, *args: Any, **kwargs: Any) -> None: from lightning.fabric.utilities.seed import seed_everything # noqa: E402 from lightning.pytorch.callbacks import Callback # noqa: E402 from lightning.pytorch.core import LightningDataModule, LightningModule # noqa: E402 -from lightning.pytorch.lite import LightningLite # noqa: E402 from lightning.pytorch.trainer import Trainer # noqa: E402 import lightning.app # isort: skip # noqa: E402 @@ -61,7 +60,6 @@ def _detail(self: Any, message: str, *args: Any, **kwargs: Any) -> None: "LightningModule", "Callback", "seed_everything", - "LightningLite", "Fabric", "storage", "pdb", From b81ee71ca053759c77f76fd5fa585294359b7781 Mon Sep 17 00:00:00 2001 From: lightningforever Date: Thu, 5 Jan 2023 13:01:42 +0100 Subject: [PATCH 04/21] using inheritance to share code it's so wrong but we do it anyway not learning from our mistake in the past --- src/pytorch_lightning/loggers/tensorboard.py | 191 +++---------------- 1 file changed, 23 insertions(+), 168 deletions(-) diff --git a/src/pytorch_lightning/loggers/tensorboard.py b/src/pytorch_lightning/loggers/tensorboard.py index a2553576601e0..b8c1f03d84889 100644 --- a/src/pytorch_lightning/loggers/tensorboard.py +++ b/src/pytorch_lightning/loggers/tensorboard.py @@ -19,37 +19,26 @@ import logging import os from argparse import Namespace -from typing import Any, Dict, Mapping, Optional, TYPE_CHECKING, Union +from typing import Any, Dict, Optional, Union -import numpy as np from lightning_utilities.core.imports import RequirementCache -from torch import Tensor -import pytorch_lightning as pl -from lightning_fabric.utilities.cloud_io import get_filesystem -from lightning_fabric.utilities.logger import _add_prefix, _convert_params, _flatten_dict -from lightning_fabric.utilities.logger import _sanitize_params as _utils_sanitize_params +from lightning_fabric.loggers import TensorBoardLogger as FabricTensorBoardLogger +from lightning_fabric.utilities.logger import _convert_params from lightning_fabric.utilities.types import _PATH +from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.core.saving import save_hparams_to_yaml -from pytorch_lightning.loggers.logger import Logger, rank_zero_experiment from pytorch_lightning.utilities.imports import _OMEGACONF_AVAILABLE from pytorch_lightning.utilities.rank_zero import rank_zero_only, rank_zero_warn log = logging.getLogger(__name__) _TENSORBOARD_AVAILABLE = RequirementCache("tensorboard") -if TYPE_CHECKING: - # assumes at least one will be installed when type checking - if _TENSORBOARD_AVAILABLE: - from torch.utils.tensorboard import SummaryWriter - else: - from tensorboardX import SummaryWriter # type: ignore[no-redef] - if _OMEGACONF_AVAILABLE: from omegaconf import Container, OmegaConf -class TensorBoardLogger(Logger): +class TensorBoardLogger(FabricTensorBoardLogger): r""" Log to local file system in `TensorBoard `_ format. @@ -100,7 +89,6 @@ class TensorBoardLogger(Logger): >>> shutil.rmtree(tmp) """ NAME_HPARAMS_FILE = "hparams.yaml" - LOGGER_JOIN_CHAR = "-" def __init__( self, @@ -113,23 +101,19 @@ def __init__( sub_dir: Optional[_PATH] = None, **kwargs: Any, ): - super().__init__() - save_dir = os.fspath(save_dir) - self._save_dir = save_dir - self._name = name or "" - self._version = version - self._sub_dir = None if sub_dir is None else os.fspath(sub_dir) + super().__init__( + root_dir=save_dir, + name=name, + version=version, + default_hp_metric=default_hp_metric, + prefix=prefix, + sub_dir=sub_dir, + **kwargs, + ) if log_graph and not _TENSORBOARD_AVAILABLE: rank_zero_warn("You set `TensorBoardLogger(log_graph=True)` but `tensorboard` is not available.") self._log_graph = log_graph and _TENSORBOARD_AVAILABLE - - self._default_hp_metric = default_hp_metric - self._prefix = prefix - self._fs = get_filesystem(save_dir) - - self._experiment: Optional["SummaryWriter"] = None self.hparams: Union[Dict[str, Any], Namespace] = {} - self._kwargs = kwargs @property def root_dir(self) -> str: @@ -138,7 +122,7 @@ def root_dir(self) -> str: If the experiment name parameter is an empty string, no experiment subdirectory is used and the checkpoint will be saved in "save_dir/version" """ - return os.path.join(self.save_dir, self.name) + return os.path.join(super().root_dir, self.name) @property def log_dir(self) -> str: @@ -163,43 +147,7 @@ def save_dir(self) -> str: Returns: The local path to the save directory where the TensorBoard experiments are saved. """ - return self._save_dir - - @property - def sub_dir(self) -> Optional[str]: - """Gets the sub directory where the TensorBoard experiments are saved. - - Returns: - The local path to the sub directory where the TensorBoard experiments are saved. - """ - return self._sub_dir - - @property - @rank_zero_experiment - def experiment(self) -> "SummaryWriter": - r""" - Actual tensorboard object. To use TensorBoard features in your - :class:`~pytorch_lightning.core.module.LightningModule` do the following. - - Example:: - - self.logger.experiment.some_tensorboard_function() - - """ - if self._experiment is not None: - return self._experiment - - assert rank_zero_only.rank == 0, "tried to init log dirs in non global_rank=0" - if self.root_dir: - self._fs.makedirs(self.root_dir, exist_ok=True) - - if _TENSORBOARD_AVAILABLE: - from torch.utils.tensorboard import SummaryWriter - else: - from tensorboardX import SummaryWriter # type: ignore[no-redef] - - self._experiment = SummaryWriter(log_dir=self.log_dir, **self._kwargs) - return self._experiment + return self._root_dir @rank_zero_only def log_hyperparams( @@ -213,7 +161,6 @@ def log_hyperparams( params: a dictionary-like container with the hyperparameters metrics: Dictionary with metric names as keys and measured quantities as values """ - params = _convert_params(params) # store params to output @@ -222,73 +169,7 @@ def log_hyperparams( else: self.hparams.update(params) - # format params into the suitable for tensorboard - params = _flatten_dict(params) - params = self._sanitize_params(params) - - if metrics is None: - if self._default_hp_metric: - metrics = {"hp_metric": -1} - elif not isinstance(metrics, dict): - metrics = {"hp_metric": metrics} - - if metrics: - self.log_metrics(metrics, 0) - - if _TENSORBOARD_AVAILABLE: - from torch.utils.tensorboard.summary import hparams - else: - from tensorboardX.summary import hparams # type: ignore[no-redef] - - exp, ssi, sei = hparams(params, metrics) - writer = self.experiment._get_file_writer() - writer.add_summary(exp) - writer.add_summary(ssi) - writer.add_summary(sei) - - @rank_zero_only - def log_metrics(self, metrics: Mapping[str, float], step: Optional[int] = None) -> None: - assert rank_zero_only.rank == 0, "experiment tried to log from global_rank != 0" - - metrics = _add_prefix(metrics, self._prefix, self.LOGGER_JOIN_CHAR) - - for k, v in metrics.items(): - if isinstance(v, Tensor): - v = v.item() - - if isinstance(v, dict): - self.experiment.add_scalars(k, v, step) - else: - try: - self.experiment.add_scalar(k, v, step) - # todo: specify the possible exception - except Exception as ex: - m = f"\n you tried to log {v} which is currently not supported. Try a dict or a scalar/tensor." - raise ValueError(m) from ex - - @rank_zero_only - def log_graph(self, model: "pl.LightningModule", input_array: Optional[Tensor] = None) -> None: - if not self._log_graph: - return - - input_array = model.example_input_array if input_array is None else input_array - - if input_array is None: - rank_zero_warn( - "Could not log computational graph to TensorBoard: The `model.example_input_array` attribute" - " is not set or `input_array` was not given." - ) - elif not isinstance(input_array, (Tensor, tuple)): - rank_zero_warn( - "Could not log computational graph to TensorBoard: The `input_array` or `model.example_input_array`" - f" has type {type(input_array)} which can't be traced by TensorBoard. Make the input array a tuple" - f" representing the positional arguments to the model's `forward()` implementation." - ) - else: - input_array = model._on_before_batch_transfer(input_array) - input_array = model._apply_batch_transfer_handler(input_array) - with pl.core.module._jit_is_scripting(): - self.experiment.add_graph(model, input_array) + return super().log_hyperparams(params=params, metrics=metrics) @rank_zero_only def save(self) -> None: @@ -304,33 +185,18 @@ def save(self) -> None: @rank_zero_only def finalize(self, status: str) -> None: - if self._experiment is not None: - self.experiment.flush() - self.experiment.close() - + super().finalize(status) if status == "success": # saving hparams happens independent of experiment manager self.save() - @property - def name(self) -> str: - """Get the name of the experiment. - - Returns: - The name of the experiment. - """ - return self._name - - @property - def version(self) -> Union[int, str]: - """Get the experiment version. + def after_save_checkpoint(self, checkpoint_callback: ModelCheckpoint) -> None: + """Called after model checkpoint callback saves a new checkpoint. - Returns: - The experiment version if specified else the next version. + Args: + checkpoint_callback: the model checkpoint callback instance """ - if self._version is None: - self._version = self._get_next_version() - return self._version + pass def _get_next_version(self) -> int: root_dir = self.root_dir @@ -352,14 +218,3 @@ def _get_next_version(self) -> int: return 0 return max(existing_versions) + 1 - - @staticmethod - def _sanitize_params(params: Dict[str, Any]) -> Dict[str, Any]: - params = _utils_sanitize_params(params) - # logging of arrays with dimension > 1 is not supported, sanitize as string - return {k: str(v) if isinstance(v, (Tensor, np.ndarray)) and v.ndim > 1 else v for k, v in params.items()} - - def __getstate__(self) -> Dict[str, Any]: - state = self.__dict__.copy() - state["_experiment"] = None - return state From 5df141c2818a9c62552c8fe955a397dd34f7727c Mon Sep 17 00:00:00 2001 From: lightningforever Date: Thu, 5 Jan 2023 13:08:52 +0100 Subject: [PATCH 05/21] more --- src/pytorch_lightning/loggers/logger.py | 113 ++---------------------- 1 file changed, 8 insertions(+), 105 deletions(-) diff --git a/src/pytorch_lightning/loggers/logger.py b/src/pytorch_lightning/loggers/logger.py index 30bd9e66261e9..e72d41452c5f6 100644 --- a/src/pytorch_lightning/loggers/logger.py +++ b/src/pytorch_lightning/loggers/logger.py @@ -16,46 +16,21 @@ import functools import operator -from abc import ABC, abstractmethod -from argparse import Namespace +from abc import ABC from collections import defaultdict -from functools import wraps -from typing import Any, Callable, Dict, Mapping, Optional, Sequence, Union +from typing import Any, Callable, Dict, Mapping, Optional, Sequence import numpy as np -from torch import Tensor -import pytorch_lightning as pl +from lightning_fabric.loggers import Logger as FabricLogger +from lightning_fabric.loggers.logger import ( + rank_zero_experiment, # noqa: F401 # for backward compatibility + _DummyExperiment as DummyExperiment, # noqa: F401 # for backward compatibility +) from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint -from pytorch_lightning.utilities.rank_zero import rank_zero_only -def rank_zero_experiment(fn: Callable) -> Callable: - """Returns the real experiment on rank 0 and otherwise the DummyExperiment.""" - - @wraps(fn) - def experiment(self) -> Union[Any, DummyExperiment]: # type: ignore[no-untyped-def] - """ - Note: - ``self`` is a custom logger instance. The loggers typically wrap an ``experiment`` method - with a ``@rank_zero_experiment`` decorator. An exception is that ``loggers.neptune`` wraps - ``experiment`` and ``run`` with rank_zero_experiment. - - ``Union[Any, DummyExperiment]`` is used because the wrapped hooks have several return - types that are specific to the custom logger. The return type here can be considered as - ``Union[return type of logger.experiment, DummyExperiment]``. - """ - - @rank_zero_only - def get_experiment() -> Callable: - return fn(self) - - return get_experiment() or DummyExperiment() - - return experiment - - -class Logger(ABC): +class Logger(FabricLogger, ABC): """Base class for experiment loggers.""" def after_save_checkpoint(self, checkpoint_callback: ModelCheckpoint) -> None: @@ -66,84 +41,12 @@ def after_save_checkpoint(self, checkpoint_callback: ModelCheckpoint) -> None: """ pass - @abstractmethod - def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None: - """Records metrics. This method logs metrics as soon as it received them. - - Args: - metrics: Dictionary with metric names as keys and measured quantities as values - step: Step number at which the metrics should be recorded - """ - pass - - @abstractmethod - def log_hyperparams(self, params: Union[Dict[str, Any], Namespace], *args: Any, **kwargs: Any) -> None: - """Record hyperparameters. - - Args: - params: :class:`~argparse.Namespace` or `Dict` containing the hyperparameters - args: Optional positional arguments, depends on the specific logger being used - kwargs: Optional keyword arguments, depends on the specific logger being used - """ - - def log_graph(self, model: "pl.LightningModule", input_array: Optional[Tensor] = None) -> None: - """Record model graph. - - Args: - model: lightning model - input_array: input passes to `model.forward` - """ - pass - - def save(self) -> None: - """Save log data.""" - - def finalize(self, status: str) -> None: - """Do any processing that is necessary to finalize an experiment. - - Args: - status: Status that the experiment finished with (e.g. success, failed, aborted) - """ - self.save() - @property def save_dir(self) -> Optional[str]: """Return the root directory where experiment logs get saved, or `None` if the logger does not save data locally.""" return None - @property - def group_separator(self) -> str: - """Return the default separator used by the logger to group the data into subfolders.""" - return "/" - - @property - @abstractmethod - def name(self) -> Optional[str]: - """Return the experiment name.""" - - @property - @abstractmethod - def version(self) -> Optional[Union[int, str]]: - """Return the experiment version.""" - - -class DummyExperiment: - """Dummy experiment.""" - - def nop(self, *args: Any, **kw: Any) -> None: - pass - - def __getattr__(self, _: Any) -> Callable: - return self.nop - - def __getitem__(self, idx: int) -> "DummyExperiment": - # enables self.logger.experiment[0].add_image(...) - return self - - def __setitem__(self, *args: Any, **kwargs: Any) -> None: - pass - class DummyLogger(Logger): """Dummy logger for internal use. From 52aeaa752fd28698576c439b34fb68ee260eba7e Mon Sep 17 00:00:00 2001 From: lightningforever Date: Thu, 5 Jan 2023 13:46:59 +0100 Subject: [PATCH 06/21] more nonsense --- src/pytorch_lightning/core/module.py | 6 +++--- src/pytorch_lightning/loggers/comet.py | 4 ++-- src/pytorch_lightning/loggers/logger.py | 6 ++---- .../connectors/logger_connector/logger_connector.py | 2 +- src/pytorch_lightning/trainer/trainer.py | 10 +++++----- tests/tests_pytorch/loggers/test_tensorboard.py | 1 + 6 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index cf86a4ccb756c..88fe0b681933a 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -45,7 +45,7 @@ from pytorch_lightning.core.mixins import HyperparametersMixin from pytorch_lightning.core.optimizer import LightningOptimizer from pytorch_lightning.core.saving import ModelIO -from pytorch_lightning.loggers import Logger +from pytorch_lightning.loggers import Logger, TensorBoardLogger from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import _FxValidator from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -291,12 +291,12 @@ def truncated_bptt_steps(self, truncated_bptt_steps: int) -> None: self._truncated_bptt_steps = truncated_bptt_steps @property - def logger(self) -> Optional[Logger]: + def logger(self) -> Optional[Union[Logger, TensorBoardLogger]]: """Reference to the logger object in the Trainer.""" return self._trainer.logger if self._trainer is not None else None @property - def loggers(self) -> List[Logger]: + def loggers(self) -> List[Union[Logger, TensorBoardLogger]]: """Reference to the list of loggers in the Trainer.""" return self.trainer.loggers if self._trainer else [] diff --git a/src/pytorch_lightning/loggers/comet.py b/src/pytorch_lightning/loggers/comet.py index e66281a420fca..c8ef84f2b59cf 100644 --- a/src/pytorch_lightning/loggers/comet.py +++ b/src/pytorch_lightning/loggers/comet.py @@ -23,8 +23,8 @@ from lightning_utilities.core.imports import module_available from torch import Tensor +from torch.nn import Module -import pytorch_lightning as pl from lightning_fabric.utilities.logger import _add_prefix, _convert_params, _flatten_dict from pytorch_lightning.loggers.logger import Logger, rank_zero_experiment from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -423,6 +423,6 @@ def __getstate__(self) -> Dict[str, Any]: state["_experiment"] = None return state - def log_graph(self, model: "pl.LightningModule", input_array: Optional[Tensor] = None) -> None: + def log_graph(self, model: "Module", input_array: Optional[Tensor] = None) -> None: if self._experiment is not None: self._experiment.set_model_graph(model) diff --git a/src/pytorch_lightning/loggers/logger.py b/src/pytorch_lightning/loggers/logger.py index e72d41452c5f6..3e96e20952b28 100644 --- a/src/pytorch_lightning/loggers/logger.py +++ b/src/pytorch_lightning/loggers/logger.py @@ -23,10 +23,8 @@ import numpy as np from lightning_fabric.loggers import Logger as FabricLogger -from lightning_fabric.loggers.logger import ( - rank_zero_experiment, # noqa: F401 # for backward compatibility - _DummyExperiment as DummyExperiment, # noqa: F401 # for backward compatibility -) +from lightning_fabric.loggers.logger import _DummyExperiment as DummyExperiment # for backward compatibility +from lightning_fabric.loggers.logger import rank_zero_experiment # noqa: F401 # for backward compatibility from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index a8b28866ea4c2..061fe458398a5 100644 --- a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -37,7 +37,7 @@ def __init__(self, trainer: "pl.Trainer") -> None: def on_trainer_init( self, - logger: Union[bool, Logger, Iterable[Logger]], + logger: Union[bool, Union[Logger, TensorBoardLogger], Iterable[Union[Logger, TensorBoardLogger]]], log_every_n_steps: int, move_metrics_to_cpu: bool, ) -> None: diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index 00e070d36e33d..f61537ccf4616 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -118,7 +118,7 @@ class Trainer: @_defaults_from_env_vars def __init__( self, - logger: Union[Logger, Iterable[Logger], bool] = True, + logger: Union[Union[Logger, TensorBoardLogger], Iterable[Union[Logger, TensorBoardLogger]], bool] = True, enable_checkpointing: bool = True, callbacks: Optional[Union[List[Callback], Callback]] = None, default_root_dir: Optional[_PATH] = None, @@ -518,7 +518,7 @@ def __init__( setup._init_profiler(self, profiler) # init logger flags - self._loggers: List[Logger] + self._loggers: List[Union[Logger, TensorBoardLogger]] self._logger_connector.on_trainer_init(logger, log_every_n_steps, move_metrics_to_cpu) # init debugging flags @@ -2154,7 +2154,7 @@ def _active_loop(self) -> Optional[Union[FitLoop, EvaluationLoop, PredictionLoop """ @property - def logger(self) -> Optional[Logger]: + def logger(self) -> Optional[Union[Logger, TensorBoardLogger]]: return self.loggers[0] if len(self.loggers) > 0 else None @logger.setter @@ -2165,11 +2165,11 @@ def logger(self, logger: Optional[Logger]) -> None: self.loggers = [logger] @property - def loggers(self) -> List[Logger]: + def loggers(self) -> List[Union[Logger, TensorBoardLogger]]: return self._loggers @loggers.setter - def loggers(self, loggers: Optional[List[Logger]]) -> None: + def loggers(self, loggers: Optional[List[Union[Logger, TensorBoardLogger]]]) -> None: self._loggers = loggers if loggers else [] @property diff --git a/tests/tests_pytorch/loggers/test_tensorboard.py b/tests/tests_pytorch/loggers/test_tensorboard.py index 87264216f2b48..0f8273575310a 100644 --- a/tests/tests_pytorch/loggers/test_tensorboard.py +++ b/tests/tests_pytorch/loggers/test_tensorboard.py @@ -211,6 +211,7 @@ def test_tensorboard_log_omegaconf_hparams_and_metrics(tmpdir): logger.log_hyperparams(hparams, metrics) +@pytest.mark.skipif(not _TENSORBOARD_AVAILABLE, reason=str(_TENSORBOARD_AVAILABLE)) @pytest.mark.parametrize("example_input_array", [None, torch.rand(2, 32)]) def test_tensorboard_log_graph(tmpdir, example_input_array): """test that log graph works with both model.example_input_array and if array is passed externally.""" From 0715b5d04fbf76eca138676742efcb628565ef4a Mon Sep 17 00:00:00 2001 From: lightningforever Date: Thu, 5 Jan 2023 14:03:20 +0100 Subject: [PATCH 07/21] docs --- docs/source-pytorch/fabric/fabric.rst | 43 +++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/docs/source-pytorch/fabric/fabric.rst b/docs/source-pytorch/fabric/fabric.rst index e87a5d82e323d..52c794756a22d 100644 --- a/docs/source-pytorch/fabric/fabric.rst +++ b/docs/source-pytorch/fabric/fabric.rst @@ -385,6 +385,30 @@ Then, in your training loop, you can call a hook by its name. Any callback objec fabric.call("on_train_epoch_end", results={...}) +loggers +======= + +Attach one or several loggers/experiment trackers to Fabric for convenient logging of metrics. + +.. code-block:: python + + # Default used by Fabric, no loggers are active + fabric = Fabric(loggers=[]) + + # Log to a single logger + fabric = Fabric(loggers=TensorBoardLogger(...)) + + # Or multiple instances + fabric = Fabric(loggers=[logger1, logger2, ...]) + +Anywhere in your training loop, you can log metrics to all loggers at once: + +.. code-block:: python + + fabric.log("loss", loss) + fabric.log_dict({"loss": loss, "accuracy": acc}) + + ---------- @@ -613,3 +637,22 @@ It is useful when building a Trainer that allows the user to run arbitrary code # Only the callbacks that have this method defined will be executed fabric.call("undefined") + + +log and log_dict +================ + +These methods allows you to send scalar metrics to a logger registered in Fabric. + +.. code-block:: python + + # Set the logger in Fabric + fabric = Fabric(loggers=TensorBoardLogger(...)) + + # Anywhere in your training loop or model: + fabric.log("loss", loss) + + # Or send multiple metrics at once: + fabric.log_dict({"loss": loss, "accuracy": acc}) + +If no loggers are given to Fabric (default), `log` and `log_dict` won't do anything. From 364e8405e63a3a48888d26c3283ba70081ad6fcf Mon Sep 17 00:00:00 2001 From: lightningforever Date: Fri, 6 Jan 2023 16:31:41 +0100 Subject: [PATCH 08/21] typing --- .../trainer/connectors/logger_connector/logger_connector.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 061fe458398a5..7b9bbd17a36c1 100644 --- a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -37,7 +37,7 @@ def __init__(self, trainer: "pl.Trainer") -> None: def on_trainer_init( self, - logger: Union[bool, Union[Logger, TensorBoardLogger], Iterable[Union[Logger, TensorBoardLogger]]], + logger: Union[bool, Logger, TensorBoardLogger, Iterable[Union[Logger, TensorBoardLogger]]], log_every_n_steps: int, move_metrics_to_cpu: bool, ) -> None: @@ -51,7 +51,7 @@ def should_update_logs(self) -> bool: should_log = (self.trainer.fit_loop.epoch_loop._batches_that_stepped + 1) % self.trainer.log_every_n_steps == 0 return should_log or self.trainer.should_stop - def configure_logger(self, logger: Union[bool, Logger, Iterable[Logger]]) -> None: + def configure_logger(self, logger: Union[bool, Logger, TensorBoardLogger, Iterable[Union[Logger, TensorBoardLogger]]]) -> None: if not logger: # logger is None or logger is False self.trainer.loggers = [] From 4a225111c6747864898d0929bfcc42c16c2afc35 Mon Sep 17 00:00:00 2001 From: lightningforever Date: Fri, 6 Jan 2023 16:39:41 +0100 Subject: [PATCH 09/21] fix log_graph --- src/pytorch_lightning/loggers/tensorboard.py | 26 +++++++++++++++++++ .../tests_pytorch/loggers/test_tensorboard.py | 1 - 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/pytorch_lightning/loggers/tensorboard.py b/src/pytorch_lightning/loggers/tensorboard.py index b8c1f03d84889..de332fdcafcac 100644 --- a/src/pytorch_lightning/loggers/tensorboard.py +++ b/src/pytorch_lightning/loggers/tensorboard.py @@ -22,7 +22,9 @@ from typing import Any, Dict, Optional, Union from lightning_utilities.core.imports import RequirementCache +from torch import Tensor +import pytorch_lightning as pl from lightning_fabric.loggers import TensorBoardLogger as FabricTensorBoardLogger from lightning_fabric.utilities.logger import _convert_params from lightning_fabric.utilities.types import _PATH @@ -171,6 +173,30 @@ def log_hyperparams( return super().log_hyperparams(params=params, metrics=metrics) + @rank_zero_only + def log_graph(self, model: "pl.LightningModule", input_array: Optional[Tensor] = None) -> None: + if not self._log_graph: + return + + input_array = model.example_input_array if input_array is None else input_array + + if input_array is None: + rank_zero_warn( + "Could not log computational graph to TensorBoard: The `model.example_input_array` attribute" + " is not set or `input_array` was not given." + ) + elif not isinstance(input_array, (Tensor, tuple)): + rank_zero_warn( + "Could not log computational graph to TensorBoard: The `input_array` or `model.example_input_array`" + f" has type {type(input_array)} which can't be traced by TensorBoard. Make the input array a tuple" + f" representing the positional arguments to the model's `forward()` implementation." + ) + else: + input_array = model._on_before_batch_transfer(input_array) + input_array = model._apply_batch_transfer_handler(input_array) + with pl.core.module._jit_is_scripting(): + self.experiment.add_graph(model, input_array) + @rank_zero_only def save(self) -> None: super().save() diff --git a/tests/tests_pytorch/loggers/test_tensorboard.py b/tests/tests_pytorch/loggers/test_tensorboard.py index 0f8273575310a..87264216f2b48 100644 --- a/tests/tests_pytorch/loggers/test_tensorboard.py +++ b/tests/tests_pytorch/loggers/test_tensorboard.py @@ -211,7 +211,6 @@ def test_tensorboard_log_omegaconf_hparams_and_metrics(tmpdir): logger.log_hyperparams(hparams, metrics) -@pytest.mark.skipif(not _TENSORBOARD_AVAILABLE, reason=str(_TENSORBOARD_AVAILABLE)) @pytest.mark.parametrize("example_input_array", [None, torch.rand(2, 32)]) def test_tensorboard_log_graph(tmpdir, example_input_array): """test that log graph works with both model.example_input_array and if array is passed externally.""" From e2f079b85559b0ce98ce4c6b5959047ed351ea21 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Jan 2023 15:33:03 +0000 Subject: [PATCH 10/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../trainer/connectors/logger_connector/logger_connector.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 7b9bbd17a36c1..d52ca18a705a3 100644 --- a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -51,7 +51,9 @@ def should_update_logs(self) -> bool: should_log = (self.trainer.fit_loop.epoch_loop._batches_that_stepped + 1) % self.trainer.log_every_n_steps == 0 return should_log or self.trainer.should_stop - def configure_logger(self, logger: Union[bool, Logger, TensorBoardLogger, Iterable[Union[Logger, TensorBoardLogger]]]) -> None: + def configure_logger( + self, logger: Union[bool, Logger, TensorBoardLogger, Iterable[Union[Logger, TensorBoardLogger]]] + ) -> None: if not logger: # logger is None or logger is False self.trainer.loggers = [] From 789a28a35f1353324d59b359811535d07121f73e Mon Sep 17 00:00:00 2001 From: lightningforever Date: Mon, 9 Jan 2023 12:45:34 +0100 Subject: [PATCH 11/21] reviewer comments --- src/pytorch_lightning/loggers/tensorboard.py | 5 ++--- tests/tests_fabric/conftest.py | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/pytorch_lightning/loggers/tensorboard.py b/src/pytorch_lightning/loggers/tensorboard.py index de332fdcafcac..d04a5871d9049 100644 --- a/src/pytorch_lightning/loggers/tensorboard.py +++ b/src/pytorch_lightning/loggers/tensorboard.py @@ -21,11 +21,11 @@ from argparse import Namespace from typing import Any, Dict, Optional, Union -from lightning_utilities.core.imports import RequirementCache from torch import Tensor import pytorch_lightning as pl -from lightning_fabric.loggers import TensorBoardLogger as FabricTensorBoardLogger +from lightning_fabric.loggers.tensorboard import _TENSORBOARD_AVAILABLE +from lightning_fabric.loggers.tensorboard import TensorBoardLogger as FabricTensorBoardLogger from lightning_fabric.utilities.logger import _convert_params from lightning_fabric.utilities.types import _PATH from pytorch_lightning.callbacks import ModelCheckpoint @@ -35,7 +35,6 @@ log = logging.getLogger(__name__) -_TENSORBOARD_AVAILABLE = RequirementCache("tensorboard") if _OMEGACONF_AVAILABLE: from omegaconf import Container, OmegaConf diff --git a/tests/tests_fabric/conftest.py b/tests/tests_fabric/conftest.py index 5aa786d20bf80..b650e408a7b60 100644 --- a/tests/tests_fabric/conftest.py +++ b/tests/tests_fabric/conftest.py @@ -54,7 +54,6 @@ def restore_env_variables(): "RANK", # set by DeepSpeed "POPLAR_ENGINE_OPTIONS", # set by IPUStrategy "CUDA_MODULE_LOADING", # leaked since PyTorch 1.13 - "CRC32C_SW_MODE", # leaked by tensorboardX } leaked_vars.difference_update(allowlist) assert not leaked_vars, f"test is leaking environment variable(s): {set(leaked_vars)}" From fee648bc664f70dd7e973566731b9800eca2c2ef Mon Sep 17 00:00:00 2001 From: lightningforever Date: Mon, 9 Jan 2023 12:53:09 +0100 Subject: [PATCH 12/21] pseudo code --- docs/source-pytorch/fabric/fabric.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/source-pytorch/fabric/fabric.rst b/docs/source-pytorch/fabric/fabric.rst index 52c794756a22d..a7ee0d720104f 100644 --- a/docs/source-pytorch/fabric/fabric.rst +++ b/docs/source-pytorch/fabric/fabric.rst @@ -655,4 +655,11 @@ These methods allows you to send scalar metrics to a logger registered in Fabric # Or send multiple metrics at once: fabric.log_dict({"loss": loss, "accuracy": acc}) -If no loggers are given to Fabric (default), `log` and `log_dict` won't do anything. +If no loggers are given to Fabric (default), ``log`` and ``log_dict`` won't do anything. +Here is what's happening under the hood (pseudo code) when you call ``.log()`` or ``log_dict``: + +.. code-block:: python + + # When you call .log() or .log_dict(), we do this: + for logger in fabric.loggers: + logger.log_metrics(metrics=metrics, step=step) From 8a743d372610c90e13e0cc6cad0ec32e2c81e75e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 9 Jan 2023 14:55:37 +0100 Subject: [PATCH 13/21] Fix logger typing by using fabric's base logger --- src/pytorch_lightning/core/module.py | 6 +++--- .../connectors/logger_connector/logger_connector.py | 9 ++++----- src/pytorch_lightning/trainer/trainer.py | 10 +++++----- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index 88fe0b681933a..4a7d8b2977599 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -33,6 +33,7 @@ import lightning_fabric as lf import pytorch_lightning as pl +from lightning_fabric.loggers import Logger from lightning_fabric.utilities.apply_func import convert_to_tensors from lightning_fabric.utilities.cloud_io import get_filesystem from lightning_fabric.utilities.device_dtype_mixin import _DeviceDtypeModuleMixin @@ -45,7 +46,6 @@ from pytorch_lightning.core.mixins import HyperparametersMixin from pytorch_lightning.core.optimizer import LightningOptimizer from pytorch_lightning.core.saving import ModelIO -from pytorch_lightning.loggers import Logger, TensorBoardLogger from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import _FxValidator from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -291,12 +291,12 @@ def truncated_bptt_steps(self, truncated_bptt_steps: int) -> None: self._truncated_bptt_steps = truncated_bptt_steps @property - def logger(self) -> Optional[Union[Logger, TensorBoardLogger]]: + def logger(self) -> Optional[Logger]: """Reference to the logger object in the Trainer.""" return self._trainer.logger if self._trainer is not None else None @property - def loggers(self) -> List[Union[Logger, TensorBoardLogger]]: + def loggers(self) -> List[Logger]: """Reference to the list of loggers in the Trainer.""" return self.trainer.loggers if self._trainer else [] diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index d52ca18a705a3..203f4a12bad23 100644 --- a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -17,9 +17,10 @@ from torch import Tensor import pytorch_lightning as pl +from lightning_fabric.loggers import Logger from lightning_fabric.plugins.environments import SLURMEnvironment from lightning_fabric.utilities import move_data_to_device -from pytorch_lightning.loggers import Logger, TensorBoardLogger +from pytorch_lightning.loggers import TensorBoardLogger from pytorch_lightning.trainer.connectors.logger_connector.result import _METRICS, _OUT_DICT, _PBAR_DICT from pytorch_lightning.utilities.metrics import metrics_to_scalars @@ -37,7 +38,7 @@ def __init__(self, trainer: "pl.Trainer") -> None: def on_trainer_init( self, - logger: Union[bool, Logger, TensorBoardLogger, Iterable[Union[Logger, TensorBoardLogger]]], + logger: Union[bool, Logger, Iterable[Logger]], log_every_n_steps: int, move_metrics_to_cpu: bool, ) -> None: @@ -51,9 +52,7 @@ def should_update_logs(self) -> bool: should_log = (self.trainer.fit_loop.epoch_loop._batches_that_stepped + 1) % self.trainer.log_every_n_steps == 0 return should_log or self.trainer.should_stop - def configure_logger( - self, logger: Union[bool, Logger, TensorBoardLogger, Iterable[Union[Logger, TensorBoardLogger]]] - ) -> None: + def configure_logger(self, logger: Union[bool, Logger, Iterable[Logger]]) -> None: if not logger: # logger is None or logger is False self.trainer.loggers = [] diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index f61537ccf4616..c69d0ff8ce432 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -44,6 +44,7 @@ from typing_extensions import Literal import pytorch_lightning as pl +from lightning_fabric.loggers import Logger from lightning_fabric.utilities.cloud_io import get_filesystem from lightning_fabric.utilities.data import _auto_add_worker_init_fn from lightning_fabric.utilities.types import _PATH @@ -52,7 +53,6 @@ from pytorch_lightning.callbacks import Callback, Checkpoint, EarlyStopping, ProgressBarBase from pytorch_lightning.callbacks.prediction_writer import BasePredictionWriter from pytorch_lightning.core.datamodule import LightningDataModule -from pytorch_lightning.loggers import Logger from pytorch_lightning.loggers.tensorboard import TensorBoardLogger from pytorch_lightning.loops import PredictionLoop, TrainingEpochLoop from pytorch_lightning.loops.dataloader.evaluation_loop import EvaluationLoop @@ -118,7 +118,7 @@ class Trainer: @_defaults_from_env_vars def __init__( self, - logger: Union[Union[Logger, TensorBoardLogger], Iterable[Union[Logger, TensorBoardLogger]], bool] = True, + logger: Union[Union[Logger], Iterable[Union[Logger]], bool] = True, enable_checkpointing: bool = True, callbacks: Optional[Union[List[Callback], Callback]] = None, default_root_dir: Optional[_PATH] = None, @@ -518,7 +518,7 @@ def __init__( setup._init_profiler(self, profiler) # init logger flags - self._loggers: List[Union[Logger, TensorBoardLogger]] + self._loggers: List[Logger] self._logger_connector.on_trainer_init(logger, log_every_n_steps, move_metrics_to_cpu) # init debugging flags @@ -2165,11 +2165,11 @@ def logger(self, logger: Optional[Logger]) -> None: self.loggers = [logger] @property - def loggers(self) -> List[Union[Logger, TensorBoardLogger]]: + def loggers(self) -> List[Union[Logger]]: return self._loggers @loggers.setter - def loggers(self, loggers: Optional[List[Union[Logger, TensorBoardLogger]]]) -> None: + def loggers(self, loggers: Optional[List[Logger]]) -> None: self._loggers = loggers if loggers else [] @property From b844809bad43e1ea4e44013db284868d8c188fde Mon Sep 17 00:00:00 2001 From: lightningforever Date: Mon, 9 Jan 2023 15:16:35 +0100 Subject: [PATCH 14/21] resolve rebase conflicts --- _notebooks | 2 +- .../utilities/migration/test_migration.py | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/_notebooks b/_notebooks index 6d5634b794218..db21eb1dc060f 160000 --- a/_notebooks +++ b/_notebooks @@ -1 +1 @@ -Subproject commit 6d5634b7942180e6ba4a30bfbd74926d1c22f1eb +Subproject commit db21eb1dc060fbba9c0e3336d827235d51cd5bd0 diff --git a/tests/tests_pytorch/utilities/migration/test_migration.py b/tests/tests_pytorch/utilities/migration/test_migration.py index f77c4ea3d220a..1d8224059bfa7 100644 --- a/tests/tests_pytorch/utilities/migration/test_migration.py +++ b/tests/tests_pytorch/utilities/migration/test_migration.py @@ -146,13 +146,3 @@ def test_migrate_model_checkpoint_save_on_train_epoch_end_default_collision(): with pytest.warns(PossibleUserWarning, match="callback states in this checkpoint.* colliding with each other"): updated_checkpoint, _ = migrate_checkpoint(old_checkpoint.copy(), target_version="1.9.0") assert updated_checkpoint["callbacks"] == old_checkpoint["callbacks"] # no migration was performed - - -def test_migrate_dropped_apex_amp_state(monkeypatch): - """Test that the migration warns about collisions that would occur if the keys were modified.""" - monkeypatch.setattr(pl, "__version__", "2.0.0") # pretend this version of Lightning is >= 2.0.0 - old_checkpoint = {"amp_scaling_state": {"scale": 1.23}} - _set_version(old_checkpoint, "1.9.0") # pretend a checkpoint prior to 2.0.0 - with pytest.warns(UserWarning, match="checkpoint contains apex AMP data"): - updated_checkpoint, _ = migrate_checkpoint(old_checkpoint.copy()) - assert "amp_scaling_state" not in updated_checkpoint From 5b28859255b2b2bbca0db1abe4b80850e2e8365a Mon Sep 17 00:00:00 2001 From: lightningforever Date: Mon, 9 Jan 2023 15:19:33 +0100 Subject: [PATCH 15/21] simplify Union[Logger] --- src/pytorch_lightning/trainer/trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index c69d0ff8ce432..cc9256ef76666 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -118,7 +118,7 @@ class Trainer: @_defaults_from_env_vars def __init__( self, - logger: Union[Union[Logger], Iterable[Union[Logger]], bool] = True, + logger: Union[Logger, Iterable[Logger], bool] = True, enable_checkpointing: bool = True, callbacks: Optional[Union[List[Callback], Callback]] = None, default_root_dir: Optional[_PATH] = None, @@ -2165,11 +2165,11 @@ def logger(self, logger: Optional[Logger]) -> None: self.loggers = [logger] @property - def loggers(self) -> List[Union[Logger]]: + def loggers(self) -> List[Logger]: return self._loggers @loggers.setter - def loggers(self, loggers: Optional[List[Logger]]) -> None: + def loggers(self, loggers: Optional[Logger]) -> None: self._loggers = loggers if loggers else [] @property From 50a4031fff8fc6060d28471edbd4bfbc072ea697 Mon Sep 17 00:00:00 2001 From: lightningforever Date: Mon, 9 Jan 2023 16:16:55 +0100 Subject: [PATCH 16/21] trigger CI --- src/lightning_fabric/fabric.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lightning_fabric/fabric.py b/src/lightning_fabric/fabric.py index fbfe572ebbdb5..d713464dcddfe 100644 --- a/src/lightning_fabric/fabric.py +++ b/src/lightning_fabric/fabric.py @@ -748,3 +748,4 @@ def _is_using_cli() -> bool: def _do_nothing(*_: Any) -> None: pass + From 08f22a0f6e926a28da45eb466d7a710881c3af9a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Jan 2023 15:18:13 +0000 Subject: [PATCH 17/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/lightning_fabric/fabric.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lightning_fabric/fabric.py b/src/lightning_fabric/fabric.py index d713464dcddfe..fbfe572ebbdb5 100644 --- a/src/lightning_fabric/fabric.py +++ b/src/lightning_fabric/fabric.py @@ -748,4 +748,3 @@ def _is_using_cli() -> bool: def _do_nothing(*_: Any) -> None: pass - From 0d0c86441499d8343b79fc7c7c8a5e6225fa9c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 9 Jan 2023 16:31:06 +0100 Subject: [PATCH 18/21] Bad change --- src/pytorch_lightning/trainer/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index cc9256ef76666..47097a970cc89 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -2169,7 +2169,7 @@ def loggers(self) -> List[Logger]: return self._loggers @loggers.setter - def loggers(self, loggers: Optional[Logger]) -> None: + def loggers(self, loggers: Optional[List[Logger]]) -> None: self._loggers = loggers if loggers else [] @property From 97d2740597e86dfb0c1c060ef41ab89818eedc72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 9 Jan 2023 16:32:11 +0100 Subject: [PATCH 19/21] Missed this too --- src/pytorch_lightning/trainer/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index 47097a970cc89..a0f22fbf61e48 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -2154,7 +2154,7 @@ def _active_loop(self) -> Optional[Union[FitLoop, EvaluationLoop, PredictionLoop """ @property - def logger(self) -> Optional[Union[Logger, TensorBoardLogger]]: + def logger(self) -> Optional[Logger]: return self.loggers[0] if len(self.loggers) > 0 else None @logger.setter From 605a5e28b55f7ddde7276ff5fac8c7cdf185d9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 9 Jan 2023 16:33:19 +0100 Subject: [PATCH 20/21] Unnecessary quotes --- src/pytorch_lightning/loggers/comet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pytorch_lightning/loggers/comet.py b/src/pytorch_lightning/loggers/comet.py index c8ef84f2b59cf..7f38545955fa6 100644 --- a/src/pytorch_lightning/loggers/comet.py +++ b/src/pytorch_lightning/loggers/comet.py @@ -423,6 +423,6 @@ def __getstate__(self) -> Dict[str, Any]: state["_experiment"] = None return state - def log_graph(self, model: "Module", input_array: Optional[Tensor] = None) -> None: + def log_graph(self, model: Module, input_array: Optional[Tensor] = None) -> None: if self._experiment is not None: self._experiment.set_model_graph(model) From c88b9457acfdb2353df1603344017528668bc33c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 9 Jan 2023 18:16:06 +0100 Subject: [PATCH 21/21] Avoid jsonargparse breaking change --- src/pytorch_lightning/core/module.py | 15 +++++++++++---- src/pytorch_lightning/loggers/tensorboard.py | 3 ++- .../logger_connector/logger_connector.py | 3 +-- src/pytorch_lightning/trainer/trainer.py | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index 4a7d8b2977599..aa71bf2fab120 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -33,7 +33,7 @@ import lightning_fabric as lf import pytorch_lightning as pl -from lightning_fabric.loggers import Logger +from lightning_fabric.loggers import Logger as FabricLogger from lightning_fabric.utilities.apply_func import convert_to_tensors from lightning_fabric.utilities.cloud_io import get_filesystem from lightning_fabric.utilities.device_dtype_mixin import _DeviceDtypeModuleMixin @@ -46,6 +46,7 @@ from pytorch_lightning.core.mixins import HyperparametersMixin from pytorch_lightning.core.optimizer import LightningOptimizer from pytorch_lightning.core.saving import ModelIO +from pytorch_lightning.loggers import Logger from pytorch_lightning.trainer.connectors.logger_connector.fx_validator import _FxValidator from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -291,14 +292,20 @@ def truncated_bptt_steps(self, truncated_bptt_steps: int) -> None: self._truncated_bptt_steps = truncated_bptt_steps @property - def logger(self) -> Optional[Logger]: + def logger(self) -> Optional[Union[Logger, FabricLogger]]: """Reference to the logger object in the Trainer.""" + if self._fabric is not None: + return self._fabric.logger return self._trainer.logger if self._trainer is not None else None @property - def loggers(self) -> List[Logger]: + def loggers(self) -> Union[List[Logger], List[FabricLogger]]: """Reference to the list of loggers in the Trainer.""" - return self.trainer.loggers if self._trainer else [] + if self._fabric is not None: + return self._fabric.loggers + elif self._trainer is not None: + return self._trainer.loggers + return [] # type: ignore[return-value] def _call_batch_hook(self, hook_name: str, *args: Any) -> Any: if self._trainer: diff --git a/src/pytorch_lightning/loggers/tensorboard.py b/src/pytorch_lightning/loggers/tensorboard.py index d04a5871d9049..6dd8571df3690 100644 --- a/src/pytorch_lightning/loggers/tensorboard.py +++ b/src/pytorch_lightning/loggers/tensorboard.py @@ -30,6 +30,7 @@ from lightning_fabric.utilities.types import _PATH from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.core.saving import save_hparams_to_yaml +from pytorch_lightning.loggers.logger import Logger from pytorch_lightning.utilities.imports import _OMEGACONF_AVAILABLE from pytorch_lightning.utilities.rank_zero import rank_zero_only, rank_zero_warn @@ -39,7 +40,7 @@ from omegaconf import Container, OmegaConf -class TensorBoardLogger(FabricTensorBoardLogger): +class TensorBoardLogger(Logger, FabricTensorBoardLogger): r""" Log to local file system in `TensorBoard `_ format. diff --git a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py index 203f4a12bad23..a8b28866ea4c2 100644 --- a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py +++ b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py @@ -17,10 +17,9 @@ from torch import Tensor import pytorch_lightning as pl -from lightning_fabric.loggers import Logger from lightning_fabric.plugins.environments import SLURMEnvironment from lightning_fabric.utilities import move_data_to_device -from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.loggers import Logger, TensorBoardLogger from pytorch_lightning.trainer.connectors.logger_connector.result import _METRICS, _OUT_DICT, _PBAR_DICT from pytorch_lightning.utilities.metrics import metrics_to_scalars diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index a0f22fbf61e48..00e070d36e33d 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -44,7 +44,6 @@ from typing_extensions import Literal import pytorch_lightning as pl -from lightning_fabric.loggers import Logger from lightning_fabric.utilities.cloud_io import get_filesystem from lightning_fabric.utilities.data import _auto_add_worker_init_fn from lightning_fabric.utilities.types import _PATH @@ -53,6 +52,7 @@ from pytorch_lightning.callbacks import Callback, Checkpoint, EarlyStopping, ProgressBarBase from pytorch_lightning.callbacks.prediction_writer import BasePredictionWriter from pytorch_lightning.core.datamodule import LightningDataModule +from pytorch_lightning.loggers import Logger from pytorch_lightning.loggers.tensorboard import TensorBoardLogger from pytorch_lightning.loops import PredictionLoop, TrainingEpochLoop from pytorch_lightning.loops.dataloader.evaluation_loop import EvaluationLoop