From 45315e1da9720285b0ee3ae71e2e7c249c08eb3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 13 Dec 2022 17:13:12 +0100 Subject: [PATCH 01/14] WIP --- .../accelerators/gpu_intermediate.rst | 5 +-- docs/source-pytorch/api_references.rst | 3 +- .../common/checkpointing_basic.rst | 2 +- .../common/precision_intermediate.rst | 41 +----------------- docs/source-pytorch/common/trainer.rst | 42 ------------------- docs/source-pytorch/conf.py | 1 - docs/source-pytorch/extensions/plugins.rst | 3 +- .../model/manual_optimization.rst | 2 +- .../plugins/precision/deepspeed.py | 24 ++--------- .../plugins/precision/native_amp.py | 4 +- src/lightning_lite/utilities/enums.py | 1 + .../plugins/precision/apex_amp.py | 31 ++++++++++++-- .../plugins/precision/deepspeed.py | 27 ++++++++---- .../plugins/precision/native_amp.py | 1 + src/pytorch_lightning/trainer/trainer.py | 8 +++- src/pytorch_lightning/utilities/__init__.py | 1 - src/pytorch_lightning/utilities/imports.py | 1 - .../plugins/precision/test_deepspeed.py | 3 +- tests/tests_pytorch/conftest.py | 5 --- tests/tests_pytorch/helpers/runif.py | 3 +- tests/tests_pytorch/models/test_amp.py | 26 ++++++------ tests/tests_pytorch/models/test_hooks.py | 36 +++++++++++----- tests/tests_pytorch/models/test_horovod.py | 23 ---------- .../precision/test_deepspeed_precision.py | 7 +++- .../tests_pytorch/plugins/test_amp_plugins.py | 27 ++++++------ .../strategies/test_deepspeed_strategy.py | 28 +++++++++---- .../optimization/test_manual_optimization.py | 6 ++- tests/tests_pytorch/utilities/test_imports.py | 3 +- 28 files changed, 154 insertions(+), 210 deletions(-) diff --git a/docs/source-pytorch/accelerators/gpu_intermediate.rst b/docs/source-pytorch/accelerators/gpu_intermediate.rst index 9ba06c415b0e1..959a67ed4a555 100644 --- a/docs/source-pytorch/accelerators/gpu_intermediate.rst +++ b/docs/source-pytorch/accelerators/gpu_intermediate.rst @@ -469,9 +469,6 @@ Validation and test step have the same option when using DP. Distributed and 16-bit precision ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Due to an issue with Apex and DataParallel (PyTorch and NVIDIA issue), Lightning does -not allow 16-bit and DP training. We tried to get this to work, but it's an issue on their end. - Below are the possible configurations we support. +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ @@ -487,7 +484,7 @@ Below are the possible configurations we support. +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ | | Y | | Y | Y | `Trainer(accelerator="gpu", devices=k, strategy='ddp', precision=16)` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ - +# FIXME(carlos): check native amp and DP Implement Your Own Distributed (DDP) training ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/source-pytorch/api_references.rst b/docs/source-pytorch/api_references.rst index 9c7f3228821f0..141cfe0d67615 100644 --- a/docs/source-pytorch/api_references.rst +++ b/docs/source-pytorch/api_references.rst @@ -184,7 +184,6 @@ precision :nosignatures: :template: classtemplate.rst - ApexMixedPrecisionPlugin ColossalAIPrecisionPlugin DeepSpeedPrecisionPlugin DoublePrecisionPlugin @@ -192,7 +191,7 @@ precision FullyShardedNativeNativeMixedPrecisionPlugin HPUPrecisionPlugin IPUPrecisionPlugin - NativeMixedPrecisionPlugin + MixedPrecisionPlugin PrecisionPlugin ShardedNativeMixedPrecisionPlugin TPUBf16PrecisionPlugin diff --git a/docs/source-pytorch/common/checkpointing_basic.rst b/docs/source-pytorch/common/checkpointing_basic.rst index 85292b0a7085d..8096f1ffd632a 100644 --- a/docs/source-pytorch/common/checkpointing_basic.rst +++ b/docs/source-pytorch/common/checkpointing_basic.rst @@ -186,5 +186,5 @@ If you don't just want to load weights, but instead restore the full training, d model = LitModel() trainer = Trainer() - # automatically restores model, epoch, step, LR schedulers, apex, etc... + # automatically restores model, epoch, step, LR schedulers, etc... trainer.fit(model, ckpt_path="some/path/to/my_checkpoint.ckpt") diff --git a/docs/source-pytorch/common/precision_intermediate.rst b/docs/source-pytorch/common/precision_intermediate.rst index 0f149e93db4d8..fbd21d105a25a 100644 --- a/docs/source-pytorch/common/precision_intermediate.rst +++ b/docs/source-pytorch/common/precision_intermediate.rst @@ -58,6 +58,7 @@ FP16 Mixed Precision ******************** In most cases, mixed precision uses FP16. Supported `PyTorch operations `__ automatically run in FP16, saving memory and improving throughput on the supported accelerators. +Since computation happens in FP16, there is a chance of numerical instability during training. This is handled internally by a dynamic grad scaler which skips invalid steps and adjusts the scaler to ensure subsequent steps fall within a finite range. For more information `see the autocast docs `__. .. note:: @@ -69,46 +70,6 @@ In most cases, mixed precision uses FP16. Supported `PyTorch operations `__. It is more flexible and intuitive compared to `NVIDIA APEX `__. -Since computation happens in FP16, there is a chance of numerical instability during training. This is handled internally by a dynamic grad scaler which skips invalid steps and adjusts the scaler to ensure subsequent steps fall within a finite range. For more information `see the autocast docs `__. -Lightning uses native amp by default with ``precision=16|"bf16"``. You can also set it using: - -.. testcode:: - - Trainer(precision=16, amp_backend="native") - - -NVIDIA APEX ------------ - -.. warning:: - - We strongly recommend using the above native mixed precision rather than NVIDIA APEX unless you require more refined control. - -`NVIDIA APEX `__ offers additional flexibility in setting mixed precision. This can be useful when trying out different precision configurations, such as keeping most of your weights in FP16 and running computation in FP16. - -.. testcode:: - :skipif: not _APEX_AVAILABLE or not torch.cuda.is_available() - - Trainer(accelerator="gpu", devices=1, amp_backend="apex", precision=16) - -Set the `NVIDIA optimization level `__ via the precision plugin. - -.. testcode:: - :skipif: not _APEX_AVAILABLE or not torch.cuda.is_available() - - from pytorch_lightning.plugins import ApexMixedPrecisionPlugin - - - apex_plugin = ApexMixedPrecisionPlugin(amp_level="O3") - Trainer(accelerator="gpu", devices=1, precision=16, plugins=[apex_plugin]) - ----- - ************************ BFloat16 Mixed Precision ************************ diff --git a/docs/source-pytorch/common/trainer.rst b/docs/source-pytorch/common/trainer.rst index 1eb3f270fa1a2..8d5e35206b988 100644 --- a/docs/source-pytorch/common/trainer.rst +++ b/docs/source-pytorch/common/trainer.rst @@ -289,27 +289,6 @@ Example:: # no accumulation for epochs 1-4. accumulate 3 for epochs 5-10. accumulate 20 after that trainer = Trainer(accumulate_grad_batches={5: 3, 10: 20}) -amp_backend -^^^^^^^^^^^ - -.. raw:: html - - - -| - -Use PyTorch AMP ('native'), or NVIDIA apex ('apex'). - -.. testcode:: - - # using PyTorch built-in AMP, default used by the Trainer - trainer = Trainer(amp_backend="native") - - # using NVIDIA Apex - trainer = Trainer(amp_backend="apex") - auto_scale_batch_size ^^^^^^^^^^^^^^^^^^^^^ @@ -1156,27 +1135,6 @@ Half precision, or mixed precision, is the combined use of 32 and 16 bit floatin .. note:: When running on TPUs, torch.bfloat16 will be used but tensor printing will still show torch.float32. -.. admonition:: If you are interested in using Apex 16-bit training: - :class: dropdown - - NVIDIA Apex and DDP have instability problems. We recommend using the native AMP for 16-bit precision with multiple GPUs. - To use Apex 16-bit training: - - 1. `Install apex. `__ - - 2. Set the ``precision`` trainer flag to 16. You can customize the `Apex optimization level `_ by setting the ``amp_level`` flag - in the precision plugin. - - .. testcode:: - :skipif: not _APEX_AVAILABLE or not torch.cuda.is_available() - - from pytorch_lightning.plugins import ApexMixedPrecisionPlugin - - - apex_plugin = ApexMixedPrecisionPlugin(amp_level="O2") - # turn on 16-bit - trainer = Trainer(accelerator="gpu", devices=1, precision=16, plugins=[apex_plugin]) - profiler ^^^^^^^^ diff --git a/docs/source-pytorch/conf.py b/docs/source-pytorch/conf.py index 5bb3eb4c1115f..80659e021ecbf 100644 --- a/docs/source-pytorch/conf.py +++ b/docs/source-pytorch/conf.py @@ -398,7 +398,6 @@ def package_list_from_file(file): from pytorch_lightning.callbacks import Callback from pytorch_lightning.cli import _JSONARGPARSE_SIGNATURES_AVAILABLE as _JSONARGPARSE_AVAILABLE from pytorch_lightning.utilities import ( - _APEX_AVAILABLE, _TORCHVISION_AVAILABLE, ) from pytorch_lightning.loggers.neptune import _NEPTUNE_AVAILABLE diff --git a/docs/source-pytorch/extensions/plugins.rst b/docs/source-pytorch/extensions/plugins.rst index 8ba0eb9d3d87c..560c26a3e1cda 100644 --- a/docs/source-pytorch/extensions/plugins.rst +++ b/docs/source-pytorch/extensions/plugins.rst @@ -52,7 +52,6 @@ The full list of built-in precision plugins is listed below. :nosignatures: :template: classtemplate.rst - ApexMixedPrecisionPlugin ColossalAIPrecisionPlugin DeepSpeedPrecisionPlugin DoublePrecisionPlugin @@ -60,7 +59,7 @@ The full list of built-in precision plugins is listed below. FullyShardedNativeNativeMixedPrecisionPlugin HPUPrecisionPlugin IPUPrecisionPlugin - NativeMixedPrecisionPlugin + MixedPrecisionPlugin PrecisionPlugin ShardedNativeMixedPrecisionPlugin TPUBf16PrecisionPlugin diff --git a/docs/source-pytorch/model/manual_optimization.rst b/docs/source-pytorch/model/manual_optimization.rst index 96d24bbe044b0..aad1c1aa00263 100644 --- a/docs/source-pytorch/model/manual_optimization.rst +++ b/docs/source-pytorch/model/manual_optimization.rst @@ -319,4 +319,4 @@ Here is an example using a closure function. opt.step(closure=closure) .. warning:: - The :class:`~torch.optim.LBFGS` optimizer is not supported for apex AMP, native AMP, IPUs, or DeepSpeed. + The :class:`~torch.optim.LBFGS` optimizer is not supported for AMP, IPUs, or DeepSpeed. diff --git a/src/lightning_lite/plugins/precision/deepspeed.py b/src/lightning_lite/plugins/precision/deepspeed.py index 265dfacfdb9b5..d974a899d7aae 100644 --- a/src/lightning_lite/plugins/precision/deepspeed.py +++ b/src/lightning_lite/plugins/precision/deepspeed.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Optional, TYPE_CHECKING +from typing import Any, TYPE_CHECKING import torch from lightning_utilities.core.imports import RequirementCache @@ -20,11 +20,10 @@ from lightning_lite.plugins.precision.precision import Precision from lightning_lite.plugins.precision.utils import _convert_fp_tensor -from lightning_lite.utilities.enums import AMPType, PrecisionType +from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.types import Steppable _DEEPSPEED_AVAILABLE = RequirementCache("deepspeed") -_APEX_AVAILABLE = RequirementCache("apex") if TYPE_CHECKING and _DEEPSPEED_AVAILABLE: import deepspeed @@ -34,28 +33,13 @@ class DeepSpeedPrecision(Precision): Args: precision: Full precision (32), half precision (16) or bfloat16 precision (bf16). - amp_type: The mixed precision backend to use ("native" or "apex"). - amp_level: The optimization level to use (O1, O2, etc...). By default it will be set to "O2" - if ``amp_type`` is set to "apex". Raises: - MisconfigurationException: - If using ``bfloat16`` precision and ``deepspeed None: - if amp_type == AMPType.APEX: - if not _APEX_AVAILABLE: - raise ModuleNotFoundError( - "You have asked for Apex AMP but `apex` is not installed." - " Install `apex` using this guide: https://github.com/NVIDIA/apex" - ) - - amp_level = amp_level or "O2" - + def __init__(self, precision: Literal[16, 32, "bf16"]) -> None: supported_precision = (PrecisionType.HALF, PrecisionType.FLOAT, PrecisionType.BFLOAT) if precision not in supported_precision: raise ValueError( @@ -65,8 +49,6 @@ def __init__(self, precision: Literal[16, 32, "bf16"], amp_type: str, amp_level: super().__init__() self.precision = precision - self.amp_type = amp_type - self.amp_level = amp_level def convert_input(self, data: Tensor) -> Tensor: precision_to_type = {"bf16": torch.bfloat16, 16: torch.float16, 32: torch.float32} diff --git a/src/lightning_lite/plugins/precision/native_amp.py b/src/lightning_lite/plugins/precision/native_amp.py index 5f9b477171c21..083d0a187b300 100644 --- a/src/lightning_lite/plugins/precision/native_amp.py +++ b/src/lightning_lite/plugins/precision/native_amp.py @@ -26,8 +26,8 @@ from lightning_lite.utilities.types import Optimizable -class NativeMixedPrecision(Precision): - """Plugin for Native Mixed Precision (AMP) training with ``torch.autocast``. +class MixedPrecision(Precision): + """Plugin for Automatic Mixed Precision (AMP) training with ``torch.autocast``. Args: precision: Whether to use ``torch.float16`` (``16``) or ``torch.bfloat16`` (``'bf16'``). diff --git a/src/lightning_lite/utilities/enums.py b/src/lightning_lite/utilities/enums.py index c1bb015010385..03bb914bee5e0 100644 --- a/src/lightning_lite/utilities/enums.py +++ b/src/lightning_lite/utilities/enums.py @@ -29,6 +29,7 @@ class LightningEnum(StrEnum, Enum): LightningEnum = StrEnum +# FIXME(carlos): Deprecate this on PL class AMPType(LightningEnum): """Type of Automatic Mixed Precission used for training.""" diff --git a/src/pytorch_lightning/plugins/precision/apex_amp.py b/src/pytorch_lightning/plugins/precision/apex_amp.py index 6ea6128a43508..2a172e79a0e43 100644 --- a/src/pytorch_lightning/plugins/precision/apex_amp.py +++ b/src/pytorch_lightning/plugins/precision/apex_amp.py @@ -11,27 +11,48 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import functools +import warnings +from types import ModuleType from typing import Any, Callable, Dict, Optional +from lightning_utilities.core.imports import RequirementCache from torch import Tensor from torch.optim import LBFGS, Optimizer import pytorch_lightning as pl from lightning_lite.utilities.types import _PARAMETERS, Optimizable from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin -from pytorch_lightning.utilities import _APEX_AVAILABLE, AMPType +from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation -if _APEX_AVAILABLE: - from apex import amp +_APEX_AVAILABLE = RequirementCache("apex") +@functools.lru_cache(maxsize=1) +def _import_amp_without_deprecation() -> ModuleType: + # hide the warning upstream in favor of our deprecation + with warnings.filterwarnings(action="ignore", message="apex.amp is deprecated", category=FutureWarning): + from apex import amp + + return amp + + +# TODO: remove in v1.10.0 class ApexMixedPrecisionPlugin(PrecisionPlugin): """Mixed Precision Plugin based on Nvidia/Apex (https://github.com/NVIDIA/apex)""" backend = AMPType.APEX def __init__(self, amp_level: str = "O2") -> None: + # deprecate before the availability check so users don't install without knowning that it's deprecated + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + f" PyTorch Lightning has been deprecated in v1.9.0. The `{type(self).__name__}` class will be removed in" + " v1.10.0. Please use PyTorch's AMP implementation available in" + " `pytorch_lightning.plugins.MixedPrecisionPlugin` instead." + ) if not _APEX_AVAILABLE: raise MisconfigurationException( "You have asked for Apex AMP but `apex` is not installed." @@ -43,11 +64,13 @@ def __init__(self, amp_level: str = "O2") -> None: self._state_dict_loaded = False def main_params(self, optimizer: Optimizer) -> _PARAMETERS: + amp = _import_amp_without_deprecation() return amp.master_params(optimizer) def dispatch(self, trainer: "pl.Trainer") -> None: if not self._connected: strategy = trainer.strategy + amp = _import_amp_without_deprecation() _, strategy.optimizers = amp.initialize( trainer.lightning_module, strategy.optimizers, opt_level=self.amp_level ) @@ -73,6 +96,7 @@ def backward( # type: ignore[override] \**kwargs: Keyword arguments for the same purpose as ``*args``. """ opt = optimizer or model.trainer.optimizers + amp = _import_amp_without_deprecation() with amp.scale_loss(tensor, opt) as tensor: super().backward(tensor, model, optimizer, *args, **kwargs) @@ -102,6 +126,7 @@ def optimizer_step( # type: ignore[override] return closure_result def state_dict(self) -> Dict[str, Any]: + amp = _import_amp_without_deprecation() return amp.state_dict() def load_state_dict(self, state_dict: Dict[str, Any]) -> None: diff --git a/src/pytorch_lightning/plugins/precision/deepspeed.py b/src/pytorch_lightning/plugins/precision/deepspeed.py index 1b6cbb6ba84dd..23345707d8260 100644 --- a/src/pytorch_lightning/plugins/precision/deepspeed.py +++ b/src/pytorch_lightning/plugins/precision/deepspeed.py @@ -21,11 +21,12 @@ import pytorch_lightning as pl from lightning_lite.utilities.enums import AMPType, PrecisionType from lightning_lite.utilities.types import Steppable +from pytorch_lightning.plugins.precision.apex_amp import _APEX_AVAILABLE from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.imports import _APEX_AVAILABLE from pytorch_lightning.utilities.model_helpers import is_overridden +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation _DEEPSPEED_AVAILABLE = RequirementCache("deepspeed") if TYPE_CHECKING and _DEEPSPEED_AVAILABLE: @@ -39,20 +40,19 @@ class DeepSpeedPrecisionPlugin(PrecisionPlugin): Args: precision: Double precision (64), full precision (32), half precision (16) or bfloat16 precision (bf16). - amp_type: The mixed precision backend to use ("native" or "apex"). - amp_level: The optimization level to use (O1, O2, etc...). By default it will be set to "O2" - if ``amp_type`` is set to "apex". - Raises: - MisconfigurationException: - If using ``bfloat16`` precision and ``deepspeed None: + def __init__(self, precision: Union[str, int], amp_type: str = "native", amp_level: Optional[str] = None) -> None: if amp_type == AMPType.APEX: + # TODO: remove in v1.10.0 + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + " PyTorch Lightning has been deprecated in v1.9.0. Support for using it through the DeepSpeed" + " implementation will be removed in v1.10.0." + ) if not _APEX_AVAILABLE: raise MisconfigurationException( "You have asked for Apex AMP but `apex` is not installed." @@ -60,6 +60,15 @@ def __init__(self, precision: Union[str, int], amp_type: str, amp_level: Optiona ) amp_level = amp_level or "O2" + else: + if amp_level is not None: + raise ValueError( + f"`{type(self).__name__}(amp_level={amp_level!r})` is only relevant when using NVIDIA/apex" + ) + rank_zero_deprecation( + f"Passing `{type(self).__name__}(amp_type=...)` been deprecated in v1.9.0 and will be removed in" + " v1.10.0. This argument is no longer necessary." + ) supported_precision = (PrecisionType.HALF, PrecisionType.FLOAT, PrecisionType.BFLOAT) if precision not in supported_precision: diff --git a/src/pytorch_lightning/plugins/precision/native_amp.py b/src/pytorch_lightning/plugins/precision/native_amp.py index 552562dedc8de..286c7624b0d4b 100644 --- a/src/pytorch_lightning/plugins/precision/native_amp.py +++ b/src/pytorch_lightning/plugins/precision/native_amp.py @@ -26,6 +26,7 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException +# FIXME(carlos): deprecate Native class NativeMixedPrecisionPlugin(PrecisionPlugin): """Plugin for Native Mixed Precision (AMP) training with ``torch.autocast``. diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index 3317814367dde..eb7317c652930 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -164,8 +164,8 @@ def __init__( detect_anomaly: bool = False, auto_scale_batch_size: Union[str, bool] = False, plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None, - amp_backend: str = "native", - amp_level: Optional[str] = None, + amp_backend: str = "native", # TODO: Remove in 1.10 + amp_level: Optional[str] = None, # # TODO: Remove in 1.10 move_metrics_to_cpu: bool = False, multiple_trainloader_mode: str = "max_size_cycle", inference_mode: bool = True, @@ -184,6 +184,10 @@ def __init__( amp_backend: The mixed precision backend to use ("native" or "apex"). Default: ``'native''``. + .. deprecated:: v1.9 + Setting ``amp_backend`` inside the ``Trainer`` is deprecated in v1.8.0 and will be removed + in v1.10.0. This argument was only relevant for apex which is being removed. + amp_level: The optimization level to use (O1, O2, etc...). By default it will be set to "O2" if ``amp_backend`` is set to "apex". diff --git a/src/pytorch_lightning/utilities/__init__.py b/src/pytorch_lightning/utilities/__init__.py index f831376f4f1ad..a3b0ad6ba5250 100644 --- a/src/pytorch_lightning/utilities/__init__.py +++ b/src/pytorch_lightning/utilities/__init__.py @@ -21,7 +21,6 @@ from pytorch_lightning.utilities.enums import GradClipAlgorithmType # noqa: F401 from pytorch_lightning.utilities.grads import grad_norm # noqa: F401 from pytorch_lightning.utilities.imports import ( # noqa: F401 - _APEX_AVAILABLE, _HIVEMIND_AVAILABLE, _HOROVOD_AVAILABLE, _HPU_AVAILABLE, diff --git a/src/pytorch_lightning/utilities/imports.py b/src/pytorch_lightning/utilities/imports.py index a1e47a5ae07fc..d365135e81364 100644 --- a/src/pytorch_lightning/utilities/imports.py +++ b/src/pytorch_lightning/utilities/imports.py @@ -24,7 +24,6 @@ # duplicated from lite because HPU is patching it below _TORCH_GREATER_EQUAL_1_13 = compare_version("torch", operator.ge, "1.13.0") -_APEX_AVAILABLE = module_available("apex.amp") _DALI_AVAILABLE = module_available("nvidia.dali") _HABANA_FRAMEWORK_AVAILABLE = package_available("habana_frameworks") _HIVEMIND_AVAILABLE = package_available("hivemind") diff --git a/tests/tests_lite/plugins/precision/test_deepspeed.py b/tests/tests_lite/plugins/precision/test_deepspeed.py index 784f7011b396f..80c8626d38fa0 100644 --- a/tests/tests_lite/plugins/precision/test_deepspeed.py +++ b/tests/tests_lite/plugins/precision/test_deepspeed.py @@ -36,7 +36,8 @@ def test_deepspeed_precision_apex_not_installed(monkeypatch): @mock.patch("lightning_lite.plugins.precision.deepspeed._APEX_AVAILABLE", return_value=True) def test_deepspeed_precision_apex_default_level(_): - precision = DeepSpeedPrecision(precision=16, amp_type="apex") + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + precision = DeepSpeedPrecision(precision=16, amp_type="apex") assert isinstance(precision, DeepSpeedPrecision) assert precision.amp_level == "O2" diff --git a/tests/tests_pytorch/conftest.py b/tests/tests_pytorch/conftest.py index d72c8d8566ab2..39b97cb16d006 100644 --- a/tests/tests_pytorch/conftest.py +++ b/tests/tests_pytorch/conftest.py @@ -305,11 +305,6 @@ def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.C for item in items: item.add_marker(deprecation_error) - apex_deprecation = pytest.mark.filterwarnings("ignore:apex.amp is deprecated:FutureWarning") - for item in items: - if any(marker.name == "skipif" and marker.kwargs.get("amp_apex", False) for marker in item.own_markers): - item.add_marker(apex_deprecation) - def pytest_addoption(parser): parser.addoption("--hpus", action="store", type=int, default=1, help="Number of hpus 1-8") diff --git a/tests/tests_pytorch/helpers/runif.py b/tests/tests_pytorch/helpers/runif.py index ac3b45c0f8d55..bc9ef27312819 100644 --- a/tests/tests_pytorch/helpers/runif.py +++ b/tests/tests_pytorch/helpers/runif.py @@ -25,11 +25,11 @@ from pytorch_lightning.accelerators.mps import MPSAccelerator from pytorch_lightning.accelerators.tpu import TPUAccelerator from pytorch_lightning.callbacks.progress.rich_progress import _RICH_AVAILABLE +from pytorch_lightning.plugins.precision.apex_amp import _APEX_AVAILABLE from pytorch_lightning.strategies.bagua import _BAGUA_AVAILABLE from pytorch_lightning.strategies.colossalai import _COLOSSALAI_AVAILABLE from pytorch_lightning.strategies.deepspeed import _DEEPSPEED_AVAILABLE from pytorch_lightning.utilities.imports import ( - _APEX_AVAILABLE, _HIVEMIND_AVAILABLE, _HOROVOD_AVAILABLE, _HPU_AVAILABLE, @@ -154,6 +154,7 @@ def __new__( conditions.append(not _TORCH_QUANTIZE_AVAILABLE or _miss_default) reasons.append("PyTorch quantization") + # TODO: remove in v1.9.0 if amp_apex: conditions.append(not _APEX_AVAILABLE) reasons.append("NVIDIA Apex") diff --git a/tests/tests_pytorch/models/test_amp.py b/tests/tests_pytorch/models/test_amp.py index f769a904b7a31..7155e5e606b80 100644 --- a/tests/tests_pytorch/models/test_amp.py +++ b/tests/tests_pytorch/models/test_amp.py @@ -194,9 +194,10 @@ def configure_optimizers(self): model = CustomModel() model.training_epoch_end = None - trainer = Trainer( - default_root_dir=tmpdir, max_steps=5, precision=16, amp_backend="apex", accelerator="gpu", devices=1 - ) + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + default_root_dir=tmpdir, max_steps=5, precision=16, amp_backend="apex", accelerator="gpu", devices=1 + ) assert str(trainer.amp_backend) == "AMPType.APEX" trainer.fit(model) # `max_steps` is fulfilled in the third batch first optimizer, but we don't check the loop @@ -210,15 +211,16 @@ def configure_optimizers(self): @RunIf(min_cuda_gpus=1, amp_apex=True) def test_amp_with_apex_reload(tmpdir): model = BoringModel() - trainer = Trainer( - default_root_dir=tmpdir, - max_steps=1, - limit_test_batches=1, - precision=16, - amp_backend="apex", - accelerator="gpu", - devices=1, - ) + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + default_root_dir=tmpdir, + max_steps=1, + limit_test_batches=1, + precision=16, + amp_backend="apex", + accelerator="gpu", + devices=1, + ) trainer.fit(model) trainer.fit_loop.max_steps = 2 diff --git a/tests/tests_pytorch/models/test_hooks.py b/tests/tests_pytorch/models/test_hooks.py index 1eae5d7b64c34..eae53a7afc620 100644 --- a/tests/tests_pytorch/models/test_hooks.py +++ b/tests/tests_pytorch/models/test_hooks.py @@ -485,17 +485,31 @@ def training_step(self, batch, batch_idx): callback = HookedCallback(called) train_batches = 2 val_batches = 2 - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - limit_train_batches=train_batches, - limit_val_batches=val_batches, - enable_progress_bar=False, - enable_model_summary=False, - callbacks=[callback], - track_grad_norm=1, - **kwargs, - ) + if kwargs.get("amp_backend") == "apex": + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + limit_train_batches=train_batches, + limit_val_batches=val_batches, + enable_progress_bar=False, + enable_model_summary=False, + callbacks=[callback], + track_grad_norm=1, + **kwargs, + ) + else: + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + limit_train_batches=train_batches, + limit_val_batches=val_batches, + enable_progress_bar=False, + enable_model_summary=False, + callbacks=[callback], + track_grad_norm=1, + **kwargs, + ) trainer.fit(model) saved_ckpt = { "callbacks": ANY, diff --git a/tests/tests_pytorch/models/test_horovod.py b/tests/tests_pytorch/models/test_horovod.py index 3d223ef93a154..7963bde389f4d 100644 --- a/tests/tests_pytorch/models/test_horovod.py +++ b/tests/tests_pytorch/models/test_horovod.py @@ -200,29 +200,6 @@ def test_horovod_multi_gpu_grad_by_value(tmpdir): _run_horovod(trainer_options) -# todo: need to be fixed :] -# https://discuss.pytorch.org/t/torch-cuda-amp-vs-nvidia-apex/74994 -# Check with (tgaddair) on Horovod issues if this feature is needed -@pytest.mark.skip(reason="TODO: Horovod currently doesn't work with Apex") -@RunIf(min_cuda_gpus=2, amp_apex=True, horovod_nccl=True, skip_windows=True) -def test_horovod_apex(tmpdir): - """Test Horovod with multi-GPU support using apex amp.""" - trainer_options = dict( - default_root_dir=str(tmpdir), - gradient_clip_val=1.0, - enable_progress_bar=False, - max_epochs=1, - limit_train_batches=0.4, - limit_val_batches=0.2, - accelerator="gpu", - devices=2, - strategy="horovod", - amp_backend="apex", - precision=16, - ) - _run_horovod(trainer_options) - - @RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True) def test_horovod_amp(tmpdir): """Test Horovod with multi-GPU support using native amp.""" diff --git a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py index c1f7979ea8482..85304fafe6efe 100644 --- a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py +++ b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py @@ -28,12 +28,15 @@ def test_deepspeed_precision_apex_not_installed(monkeypatch): import pytorch_lightning.plugins.precision.deepspeed as deepspeed_apex monkeypatch.setattr(deepspeed_apex, "_APEX_AVAILABLE", False) - with pytest.raises(MisconfigurationException, match="You have asked for Apex AMP but `apex` is not installed."): + with pytest.raises( + MisconfigurationException, match="You have asked for Apex AMP but `apex` is not installed." + ), pytest.deprecated_call(match="apex AMP implementation has been deprecated"): DeepSpeedPrecisionPlugin(precision=16, amp_type="apex") @mock.patch("pytorch_lightning.plugins.precision.deepspeed._APEX_AVAILABLE", return_value=True) def test_deepspeed_precision_apex_default_level(_): - precision_plugin = DeepSpeedPrecisionPlugin(precision=16, amp_type="apex") + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + precision_plugin = DeepSpeedPrecisionPlugin(precision=16, amp_type="apex") assert isinstance(precision_plugin, DeepSpeedPrecisionPlugin) assert precision_plugin.amp_level == "O2" diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py index 65a764a093127..d101796a7d024 100644 --- a/tests/tests_pytorch/plugins/test_amp_plugins.py +++ b/tests/tests_pytorch/plugins/test_amp_plugins.py @@ -205,6 +205,8 @@ def training_step(self, batch, batch_idx): assert self.trainer.precision_plugin._connected return super().training_step(batch, batch_idx) + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + plugin = ApexMixedPrecisionPlugin(amp_level=amp_level) trainer = Trainer( default_root_dir=tmpdir, fast_dev_run=True, @@ -213,7 +215,7 @@ def training_step(self, batch, batch_idx): accelerator="gpu", devices=2, strategy="ddp", - plugins=ApexMixedPrecisionPlugin(amp_level=amp_level), + plugins=plugin, enable_progress_bar=False, enable_model_summary=False, ) @@ -226,16 +228,17 @@ def training_step(self, batch, batch_idx): @RunIf(min_cuda_gpus=2, amp_apex=True) @pytest.mark.parametrize("amp_level", ["O2"]) def test_amp_apex_ddp_spawn_fit(amp_level, tmpdir): - trainer = Trainer( - default_root_dir=tmpdir, - fast_dev_run=True, - precision=16, - amp_backend="apex", - accelerator="gpu", - devices=2, - strategy="ddp_spawn", - plugins=ApexMixedPrecisionPlugin(amp_level=amp_level), - ) + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + default_root_dir=tmpdir, + fast_dev_run=True, + precision=16, + amp_backend="apex", + accelerator="gpu", + devices=2, + strategy="ddp_spawn", + plugins=ApexMixedPrecisionPlugin(amp_level=amp_level), + ) assert isinstance(trainer.precision_plugin, ApexMixedPrecisionPlugin) model = BoringModel() trainer.fit(model) @@ -271,5 +274,5 @@ def test_precision_selection_raises(monkeypatch): monkeypatch.setattr(apex, "_APEX_AVAILABLE", False) with mock.patch("lightning_lite.accelerators.cuda.is_cuda_available", return_value=True), pytest.raises( MisconfigurationException, match="asked for Apex AMP but `apex` is not installed" - ): + ), pytest.deprecated_call(match="apex AMP implementation has been deprecated"): Trainer(amp_backend="apex", precision=16, accelerator="gpu", devices=1) diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py index 640c44c717ac2..902fe833fef10 100644 --- a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py +++ b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py @@ -140,15 +140,25 @@ def test_deepspeed_precision_choice(cuda_count_1, amp_backend, tmpdir): DeepSpeed handles precision via Custom DeepSpeedPrecisionPlugin """ - - trainer = Trainer( - fast_dev_run=True, - default_root_dir=tmpdir, - accelerator="gpu", - strategy="deepspeed", - amp_backend=amp_backend, - precision=16, - ) + if amp_backend == "apex": + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + fast_dev_run=True, + default_root_dir=tmpdir, + accelerator="gpu", + strategy="deepspeed", + amp_backend=amp_backend, + precision=16, + ) + else: + trainer = Trainer( + fast_dev_run=True, + default_root_dir=tmpdir, + accelerator="gpu", + strategy="deepspeed", + amp_backend=amp_backend, + precision=16, + ) assert isinstance(trainer.strategy, DeepSpeedStrategy) assert isinstance(trainer.strategy.precision_plugin, DeepSpeedPrecisionPlugin) diff --git a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py index 2224ed8569709..e3547dad94f9f 100644 --- a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py +++ b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py @@ -114,7 +114,11 @@ def on_train_end(self): model.val_dataloader = None limit_train_batches = 2 - plugins = [ApexMixedPrecisionPlugin(amp_level="O2")] if kwargs.get("amp_backend") == "apex" else [] + plugins = [] + if kwargs.get("amp_backend") == "apex": + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + apex_plugin = ApexMixedPrecisionPlugin(amp_level="O2") + plugins.append(apex_plugin) trainer = Trainer( default_root_dir=tmpdir, diff --git a/tests/tests_pytorch/utilities/test_imports.py b/tests/tests_pytorch/utilities/test_imports.py index 29f221e717879..3a22e8aeb6e7f 100644 --- a/tests/tests_pytorch/utilities/test_imports.py +++ b/tests/tests_pytorch/utilities/test_imports.py @@ -23,8 +23,9 @@ from lightning_utilities.core.imports import compare_version, module_available, RequirementCache from torch.distributed import is_available +from pytorch_lightning.plugins.precision.apex_amp import _APEX_AVAILABLE from pytorch_lightning.strategies.bagua import _BAGUA_AVAILABLE -from pytorch_lightning.utilities import _APEX_AVAILABLE, _HOROVOD_AVAILABLE, _OMEGACONF_AVAILABLE, _POPTORCH_AVAILABLE +from pytorch_lightning.utilities import _HOROVOD_AVAILABLE, _OMEGACONF_AVAILABLE, _POPTORCH_AVAILABLE from tests_pytorch.helpers.runif import RunIf From d5a14b0c68043e06c3ee85b60063260a33062367 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 13 Dec 2022 17:56:49 +0100 Subject: [PATCH 02/14] WIP --- src/lightning_lite/strategies/deepspeed.py | 7 +-- src/lightning_lite/utilities/__init__.py | 2 +- src/lightning_lite/utilities/enums.py | 8 --- src/pytorch_lightning/core/module.py | 1 + .../loops/optimization/optimizer_loop.py | 5 +- .../plugins/precision/apex_amp.py | 3 +- .../plugins/precision/deepspeed.py | 4 +- .../plugins/precision/native_amp.py | 21 +++++--- src/pytorch_lightning/strategies/deepspeed.py | 6 +-- .../connectors/accelerator_connector.py | 17 ++++--- src/pytorch_lightning/trainer/trainer.py | 9 ++-- src/pytorch_lightning/utilities/__init__.py | 3 +- src/pytorch_lightning/utilities/enums.py | 49 ++++++++++++++++++- tests/tests_pytorch/helpers/runif.py | 2 +- 14 files changed, 92 insertions(+), 45 deletions(-) diff --git a/src/lightning_lite/strategies/deepspeed.py b/src/lightning_lite/strategies/deepspeed.py index 74dc73c210c08..eb5967b230295 100644 --- a/src/lightning_lite/strategies/deepspeed.py +++ b/src/lightning_lite/strategies/deepspeed.py @@ -32,7 +32,7 @@ from lightning_lite.strategies.ddp import DDPStrategy from lightning_lite.strategies.strategy import _Sharded from lightning_lite.utilities.distributed import log -from lightning_lite.utilities.enums import AMPType, PrecisionType +from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.rank_zero import rank_zero_info from lightning_lite.utilities.seed import reset_seed from lightning_lite.utilities.types import _PATH @@ -501,7 +501,7 @@ def _format_config(self) -> None: def _format_precision_config(self) -> None: assert isinstance(self.config, dict) if self.precision.precision == PrecisionType.HALF: - if "fp16" not in self.config and self.precision.amp_type == AMPType.NATIVE: + if "fp16" not in self.config: # FP16 is a DeepSpeed standalone AMP implementation rank_zero_info("Enabling DeepSpeed FP16.") self.config["fp16"] = { @@ -512,9 +512,6 @@ def _format_precision_config(self) -> None: "hysteresis": self.hysteresis, "min_loss_scale": self.min_loss_scale, } - elif "amp" not in self.config and self.precision.amp_type == AMPType.APEX: - rank_zero_info("Enabling DeepSpeed APEX Implementation.") - self.config["amp"] = {"enabled": True, "opt_level": self.precision.amp_level} elif "bf16" not in self.config and self.precision.precision == PrecisionType.BFLOAT: rank_zero_info("Enabling DeepSpeed BF16.") self.config["bf16"] = {"enabled": True} diff --git a/src/lightning_lite/utilities/__init__.py b/src/lightning_lite/utilities/__init__.py index 17f37679f23d3..73c0a7ade0c76 100644 --- a/src/lightning_lite/utilities/__init__.py +++ b/src/lightning_lite/utilities/__init__.py @@ -14,7 +14,7 @@ """General utilities.""" from lightning_lite.utilities.apply_func import move_data_to_device # noqa: F401 -from lightning_lite.utilities.enums import _AcceleratorType, _StrategyType, AMPType, LightningEnum # noqa: F401 +from lightning_lite.utilities.enums import _AcceleratorType, _StrategyType, LightningEnum # noqa: F401 from lightning_lite.utilities.rank_zero import ( # noqa: F401 rank_zero_deprecation, rank_zero_info, diff --git a/src/lightning_lite/utilities/enums.py b/src/lightning_lite/utilities/enums.py index 03bb914bee5e0..cd8a3dd5bd062 100644 --- a/src/lightning_lite/utilities/enums.py +++ b/src/lightning_lite/utilities/enums.py @@ -29,14 +29,6 @@ class LightningEnum(StrEnum, Enum): LightningEnum = StrEnum -# FIXME(carlos): Deprecate this on PL -class AMPType(LightningEnum): - """Type of Automatic Mixed Precission used for training.""" - - APEX = "apex" - NATIVE = "native" - - class PrecisionType(LightningEnum): """Type of precision used.""" diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index c642835bb11fc..0a3824607fb14 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -1599,6 +1599,7 @@ def optimizer_step( optimizer_idx: int = 0, optimizer_closure: Optional[Callable[[], Any]] = None, on_tpu: bool = False, + # FIXME(carlos): deprecate this using_native_amp: bool = False, using_lbfgs: bool = False, ) -> None: diff --git a/src/pytorch_lightning/loops/optimization/optimizer_loop.py b/src/pytorch_lightning/loops/optimization/optimizer_loop.py index 33106bda959e0..b0b0b45858498 100644 --- a/src/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/src/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -31,7 +31,6 @@ _extract_hiddens, ) from pytorch_lightning.trainer.progress import OptimizationProgress -from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.types import STEP_OUTPUT @@ -341,7 +340,7 @@ def _optimizer_step( is_lbfgs = isinstance(optimizer, torch.optim.LBFGS) # wraps into LightningOptimizer only for running step - if self.trainer.amp_backend == AMPType.APEX: + if self.trainer.amp_backend == "apex": # apex overrides .step function and need to be wrapped on each step optimizer = LightningOptimizer._to_lightning_optimizer(optimizer, self.trainer.strategy, opt_idx) else: @@ -362,7 +361,7 @@ def _optimizer_step( opt_idx, train_step_and_backward_closure, on_tpu=isinstance(self.trainer.accelerator, TPUAccelerator), - using_native_amp=(self.trainer.amp_backend == AMPType.NATIVE), + using_native_amp=(self.trainer.amp_backend == "native"), using_lbfgs=is_lbfgs, ) diff --git a/src/pytorch_lightning/plugins/precision/apex_amp.py b/src/pytorch_lightning/plugins/precision/apex_amp.py index 2a172e79a0e43..951ec588a7d9f 100644 --- a/src/pytorch_lightning/plugins/precision/apex_amp.py +++ b/src/pytorch_lightning/plugins/precision/apex_amp.py @@ -23,7 +23,6 @@ import pytorch_lightning as pl from lightning_lite.utilities.types import _PARAMETERS, Optimizable from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin -from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation @@ -43,7 +42,7 @@ def _import_amp_without_deprecation() -> ModuleType: class ApexMixedPrecisionPlugin(PrecisionPlugin): """Mixed Precision Plugin based on Nvidia/Apex (https://github.com/NVIDIA/apex)""" - backend = AMPType.APEX + backend = "apex" def __init__(self, amp_level: str = "O2") -> None: # deprecate before the availability check so users don't install without knowning that it's deprecated diff --git a/src/pytorch_lightning/plugins/precision/deepspeed.py b/src/pytorch_lightning/plugins/precision/deepspeed.py index 23345707d8260..3d74ef4e7eef5 100644 --- a/src/pytorch_lightning/plugins/precision/deepspeed.py +++ b/src/pytorch_lightning/plugins/precision/deepspeed.py @@ -19,7 +19,7 @@ from torch.optim import LBFGS, Optimizer import pytorch_lightning as pl -from lightning_lite.utilities.enums import AMPType, PrecisionType +from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.types import Steppable from pytorch_lightning.plugins.precision.apex_amp import _APEX_AVAILABLE from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin @@ -46,7 +46,7 @@ class DeepSpeedPrecisionPlugin(PrecisionPlugin): """ def __init__(self, precision: Union[str, int], amp_type: str = "native", amp_level: Optional[str] = None) -> None: - if amp_type == AMPType.APEX: + if amp_type == "apex": # TODO: remove in v1.10.0 rank_zero_deprecation( "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" diff --git a/src/pytorch_lightning/plugins/precision/native_amp.py b/src/pytorch_lightning/plugins/precision/native_amp.py index 286c7624b0d4b..fdf4df7a0577f 100644 --- a/src/pytorch_lightning/plugins/precision/native_amp.py +++ b/src/pytorch_lightning/plugins/precision/native_amp.py @@ -22,13 +22,13 @@ from lightning_lite.accelerators.cuda import _patch_cuda_is_available from lightning_lite.utilities.types import Optimizable from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin -from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType +from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation -# FIXME(carlos): deprecate Native -class NativeMixedPrecisionPlugin(PrecisionPlugin): - """Plugin for Native Mixed Precision (AMP) training with ``torch.autocast``. +class MixedPrecisionPlugin(PrecisionPlugin): + """Plugin for Automatic Mixed Precision (AMP) training with ``torch.autocast``. Args: precision: Whether to use ``torch.float16`` (``16``) or ``torch.bfloat16`` (``'bf16'``). @@ -36,8 +36,6 @@ class NativeMixedPrecisionPlugin(PrecisionPlugin): scaler: An optional :class:`torch.cuda.amp.GradScaler` to use. """ - backend = AMPType.NATIVE - def __init__( self, precision: Union[str, int], device: str, scaler: Optional[torch.cuda.amp.GradScaler] = None ) -> None: @@ -126,6 +124,17 @@ def load_state_dict(self, state_dict: Dict[str, Any]) -> None: self.scaler.load_state_dict(state_dict) +class NativeMixedPrecisionPlugin(MixedPrecisionPlugin): + backend = "native" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + rank_zero_deprecation( + f"The `{type(self).__name__}` class has been renamed in v1.9.0 and will be removed in" + " v1.10.0. Please use `pytorch_lightning.plugins.MixedPrecisionPlugin` instead." + ) + super().__init__(*args, **kwargs) + + def _optimizer_handles_unscaling(optimizer: Any) -> bool: """Determines whether a PyTorch optimizer handles unscaling gradients in the step method rather than through the :class:`torch.cuda.amp.GradScaler`. diff --git a/src/pytorch_lightning/strategies/deepspeed.py b/src/pytorch_lightning/strategies/deepspeed.py index fc2363e2debe1..c171071c7496e 100644 --- a/src/pytorch_lightning/strategies/deepspeed.py +++ b/src/pytorch_lightning/strategies/deepspeed.py @@ -31,7 +31,7 @@ import pytorch_lightning as pl from lightning_lite.plugins import ClusterEnvironment -from lightning_lite.utilities.enums import AMPType, PrecisionType +from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.optimizer import _optimizers_to_device from lightning_lite.utilities.seed import reset_seed from lightning_lite.utilities.types import _PATH, LRScheduler, ReduceLROnPlateau @@ -654,7 +654,7 @@ def _auto_select_batch_size(self) -> int: def _format_precision_config(self) -> None: assert isinstance(self.config, dict) if self.precision_plugin.precision == PrecisionType.HALF: - if "fp16" not in self.config and self.precision_plugin.amp_type == AMPType.NATIVE: + if "fp16" not in self.config and self.precision_plugin.amp_type == "native": # FP16 is a DeepSpeed standalone AMP implementation rank_zero_info("Enabling DeepSpeed FP16.") self.config["fp16"] = { @@ -665,7 +665,7 @@ def _format_precision_config(self) -> None: "hysteresis": self.hysteresis, "min_loss_scale": self.min_loss_scale, } - elif "amp" not in self.config and self.precision_plugin.amp_type == AMPType.APEX: + elif "amp" not in self.config and self.precision_plugin.amp_type == "apex": rank_zero_info("Enabling DeepSpeed APEX Implementation.") self.config["amp"] = {"enabled": True, "opt_level": self.precision_plugin.amp_level} elif "bf16" not in self.config and self.precision_plugin.precision == PrecisionType.BFLOAT: diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py index 0393652a0784d..746ef937358b2 100644 --- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -28,7 +28,7 @@ SLURMEnvironment, TorchElasticEnvironment, ) -from lightning_lite.utilities import _StrategyType, AMPType, LightningEnum +from lightning_lite.utilities import _StrategyType, LightningEnum from lightning_lite.utilities.device_parser import _determine_root_gpu_device from lightning_lite.utilities.imports import _IS_INTERACTIVE, _TORCH_GREATER_EQUAL_1_11 from pytorch_lightning.accelerators import AcceleratorRegistry @@ -82,6 +82,7 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _HOROVOD_AVAILABLE, _IPU_AVAILABLE from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_info, rank_zero_warn +from src.pytorch_lightning.utilities.enums import AMPType log = logging.getLogger(__name__) @@ -381,9 +382,9 @@ def _check_config_and_set_final_flags( self._parallel_devices = self._strategy_flag.parallel_devices amp_type = amp_type if isinstance(amp_type, str) else None - self._amp_type_flag = AMPType.from_str(amp_type) + self._amp_type_flag = AMPType.from_str(amp_type).value - if amp_level is not None and self._amp_type_flag != AMPType.APEX: + if amp_level is not None and self._amp_type_flag != "apex": raise MisconfigurationException( f"You have asked for `amp_level={amp_level!r}` but it's only supported with `amp_backend='apex'`." ) @@ -712,7 +713,7 @@ def _check_and_init_precision(self) -> PrecisionPlugin: else "Using bfloat16 Automatic Mixed Precision (AMP)" ) - if self._amp_type_flag == AMPType.NATIVE: + if self._amp_type_flag == "native": device = "cpu" if self._accelerator_flag == "cpu" else "cuda" if isinstance(self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy)): @@ -723,7 +724,7 @@ def _check_and_init_precision(self) -> PrecisionPlugin: return FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device) return NativeMixedPrecisionPlugin(self._precision_flag, device) - if self._amp_type_flag == AMPType.APEX: + if self._amp_type_flag == "apex": self._amp_level_flag = self._amp_level_flag or "O2" return ApexMixedPrecisionPlugin(self._amp_level_flag) @@ -753,18 +754,18 @@ def _validate_precision_choice(self) -> None: if ( self._precision_flag == 16 and isinstance(self.accelerator, CPUAccelerator) - and self._amp_type_flag == AMPType.APEX + and self._amp_type_flag == "apex" ): raise MisconfigurationException( "You passed `Trainer(accelerator='cpu', precision=16, amp_type='apex')`" " but apex AMP not supported on CPU." ) - if self._precision_flag == "bf16" and self._amp_type_flag != AMPType.NATIVE: + if self._precision_flag == "bf16" and self._amp_type_flag != "native": raise MisconfigurationException( f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but " # type: ignore "it's not supported. Try using `amp_type='native'` instead." ) - if self._precision_flag in (16, "bf16") and self._amp_type_flag == AMPType.APEX: + if self._precision_flag in (16, "bf16") and self._amp_type_flag == "apex": if isinstance( self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy, DDPFullyShardedStrategy, DDPFullyShardedNativeStrategy), diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index eb7317c652930..64e1e7c64c89c 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -84,7 +84,7 @@ from pytorch_lightning.trainer.states import RunningStage, TrainerFn, TrainerState, TrainerStatus from pytorch_lightning.trainer.supporters import CombinedLoader from pytorch_lightning.tuner.tuning import _TunerResult, Tuner -from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType, parsing +from pytorch_lightning.utilities import GradClipAlgorithmType, parsing from pytorch_lightning.utilities.argparse import ( _defaults_from_env_vars, add_argparse_args, @@ -164,6 +164,7 @@ def __init__( detect_anomaly: bool = False, auto_scale_batch_size: Union[str, bool] = False, plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None, + # FIXME: deprecate these amp_backend: str = "native", # TODO: Remove in 1.10 amp_level: Optional[str] = None, # # TODO: Remove in 1.10 move_metrics_to_cpu: bool = False, @@ -1776,11 +1777,11 @@ def optimizer_frequencies(self, new_freqs: List[int]) -> None: self.strategy.optimizer_frequencies = new_freqs @property - def amp_backend(self) -> Optional[AMPType]: + def amp_backend(self) -> Optional[str]: if isinstance(self.precision_plugin, ApexMixedPrecisionPlugin): - return AMPType.APEX + return "apex" if isinstance(self.precision_plugin, NativeMixedPrecisionPlugin): - return AMPType.NATIVE + return "native" return None @property diff --git a/src/pytorch_lightning/utilities/__init__.py b/src/pytorch_lightning/utilities/__init__.py index a3b0ad6ba5250..6b329f27aaf28 100644 --- a/src/pytorch_lightning/utilities/__init__.py +++ b/src/pytorch_lightning/utilities/__init__.py @@ -15,8 +15,8 @@ import numpy +from lightning_lite.utilities import LightningEnum # noqa: F401 from lightning_lite.utilities import move_data_to_device # noqa: F401 -from lightning_lite.utilities import AMPType, LightningEnum # noqa: F401 from pytorch_lightning.utilities.distributed import AllGatherGrad # noqa: F401 from pytorch_lightning.utilities.enums import GradClipAlgorithmType # noqa: F401 from pytorch_lightning.utilities.grads import grad_norm # noqa: F401 @@ -38,6 +38,7 @@ rank_zero_only, rank_zero_warn, ) +from pytorch_lightning.utiltiies.enums import AMPType # noqa: F401 FLOAT16_EPSILON = numpy.finfo(numpy.float16).eps FLOAT32_EPSILON = numpy.finfo(numpy.float32).eps diff --git a/src/pytorch_lightning/utilities/enums.py b/src/pytorch_lightning/utilities/enums.py index 8a5fe0e35d6b2..7de22c84d8a93 100644 --- a/src/pytorch_lightning/utilities/enums.py +++ b/src/pytorch_lightning/utilities/enums.py @@ -15,9 +15,56 @@ from __future__ import annotations import os +from enum import Enum, EnumMeta +from typing import Any -from lightning_lite.utilities.enums import AMPType, LightningEnum, PrecisionType # noqa: F401 +from lightning_lite.utilities.enums import LightningEnum, PrecisionType # noqa: F401 from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation + + +class _DeprecatedEnumMeta(EnumMeta): + """Enum that calls `deprecate()` whenever a member is accessed. + + Adapted from: https://stackoverflow.com/a/62309159/208880 + """ + + def __getattribute__(cls, name: str) -> Any: + obj = super().__getattribute__(name) + # ignore __dunder__ names -- prevents potential recursion errors + if not (name.startswith("__") and name.endswith("__")) and isinstance(obj, Enum): + obj.deprecate() + return obj + + def __getitem__(cls, name: str) -> Any: + member: _DeprecatedEnumMeta = super().__getitem__(name) + member.deprecate() + return member + + def __call__(cls, *args: Any, **kwargs: Any) -> Any: + obj = super().__call__(*args, **kwargs) + if isinstance(obj, Enum): + obj.deprecate() + return obj + + +class _DeprecatedEnum(LightningEnum, metaclass=_DeprecatedEnumMeta): + """_DeprecatedEnum calls an enum's `deprecate()` method on member access.""" + + pass + + +class AMPType(LightningEnum): + """Type of Automatic Mixed Precision used for training.""" + + APEX = "apex" + NATIVE = "native" + + def deprecate(self) -> None: + rank_zero_deprecation( + f"`{type(self).__name__}` enum has been deprecated in v1.9.0 and will be removed in v1.10.0." + f" Use the string value `{self.value!r}` instead." + ) class GradClipAlgorithmType(LightningEnum): diff --git a/tests/tests_pytorch/helpers/runif.py b/tests/tests_pytorch/helpers/runif.py index bc9ef27312819..72c529dcc91b6 100644 --- a/tests/tests_pytorch/helpers/runif.py +++ b/tests/tests_pytorch/helpers/runif.py @@ -154,7 +154,7 @@ def __new__( conditions.append(not _TORCH_QUANTIZE_AVAILABLE or _miss_default) reasons.append("PyTorch quantization") - # TODO: remove in v1.9.0 + # TODO: remove in v1.10.0 if amp_apex: conditions.append(not _APEX_AVAILABLE) reasons.append("NVIDIA Apex") From e5d665bc973b59c97c1724b1a86b6f67d9573d96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 13 Dec 2022 18:52:39 +0100 Subject: [PATCH 03/14] using_native_amp --- docs/source-pytorch/common/optimization.rst | 4 ---- src/pytorch_lightning/core/module.py | 8 ++------ .../loops/optimization/optimizer_loop.py | 15 ++++++++++++++- .../trainer/configuration_validator.py | 14 +++++++------- tests/tests_pytorch/core/test_lightning_module.py | 2 -- tests/tests_pytorch/models/test_hooks.py | 3 +-- 6 files changed, 24 insertions(+), 22 deletions(-) diff --git a/docs/source-pytorch/common/optimization.rst b/docs/source-pytorch/common/optimization.rst index b9745493194b3..840be8293ce4b 100644 --- a/docs/source-pytorch/common/optimization.rst +++ b/docs/source-pytorch/common/optimization.rst @@ -151,7 +151,6 @@ For example, here step optimizer A every batch and optimizer B every 2 batches. optimizer_idx, optimizer_closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): # update generator every step @@ -183,7 +182,6 @@ Here we add a manual learning rate warm-up without an lr scheduler. optimizer_idx, optimizer_closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): # update params @@ -215,7 +213,6 @@ to perform a step, Lightning won't be able to support accelerators, precision an optimizer_idx, optimizer_closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): optimizer.step(closure=optimizer_closure) @@ -232,7 +229,6 @@ to perform a step, Lightning won't be able to support accelerators, precision an optimizer_idx, optimizer_closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): optimizer = optimizer.optimizer diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index 0a3824607fb14..6a2a79e717c0e 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -1599,8 +1599,6 @@ def optimizer_step( optimizer_idx: int = 0, optimizer_closure: Optional[Callable[[], Any]] = None, on_tpu: bool = False, - # FIXME(carlos): deprecate this - using_native_amp: bool = False, using_lbfgs: bool = False, ) -> None: r""" @@ -1619,19 +1617,18 @@ def optimizer_step( optimizer_closure: The optimizer closure. This closure must be executed as it includes the calls to ``training_step()``, ``optimizer.zero_grad()``, and ``backward()``. on_tpu: ``True`` if TPU backward is required - using_native_amp: ``True`` if using native amp using_lbfgs: True if the matching optimizer is :class:`torch.optim.LBFGS` Examples:: # DEFAULT def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, - optimizer_closure, on_tpu, using_native_amp, using_lbfgs): + optimizer_closure, on_tpu, using_lbfgs): optimizer.step(closure=optimizer_closure) # Alternating schedule for optimizer steps (i.e.: GANs) def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, - optimizer_closure, on_tpu, using_native_amp, using_lbfgs): + optimizer_closure, on_tpu, using_lbfgs): # update generator opt every step if optimizer_idx == 0: optimizer.step(closure=optimizer_closure) @@ -1661,7 +1658,6 @@ def optimizer_step( optimizer_idx, optimizer_closure, on_tpu, - using_native_amp, using_lbfgs, ): # update params diff --git a/src/pytorch_lightning/loops/optimization/optimizer_loop.py b/src/pytorch_lightning/loops/optimization/optimizer_loop.py index b0b0b45858498..5626631e449c1 100644 --- a/src/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/src/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -32,6 +32,8 @@ ) from pytorch_lightning.trainer.progress import OptimizationProgress from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation +from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature from pytorch_lightning.utilities.types import STEP_OUTPUT @@ -353,6 +355,17 @@ def _optimizer_step( self.optim_progress.optimizer.step.increment_ready() # model hook + kwargs = {} + pl_module = self.trainer.lightning_module + if is_param_in_hook_signature(pl_module.optimizer_step, "using_native_amp", explicit=True): + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + " PyTorch Lightning has been deprecated in v1.9.0 and will be removed in v1.10.0." + f" The `{type(pl_module).__name__}.optimizer_step()` hook is overridden, including the" + " `using_native_amp` argument. Removing this argument will avoid this message, you can expect it to " + " return True." + ) + kwargs["using_native_amp"] = self.trainer.amp_backend == "native" self.trainer._call_lightning_module_hook( "optimizer_step", self.trainer.current_epoch, @@ -361,7 +374,7 @@ def _optimizer_step( opt_idx, train_step_and_backward_closure, on_tpu=isinstance(self.trainer.accelerator, TPUAccelerator), - using_native_amp=(self.trainer.amp_backend == "native"), + **kwargs, using_lbfgs=is_lbfgs, ) diff --git a/src/pytorch_lightning/trainer/configuration_validator.py b/src/pytorch_lightning/trainer/configuration_validator.py index f551f3ba5d3b7..73b33d6ef4e05 100644 --- a/src/pytorch_lightning/trainer/configuration_validator.py +++ b/src/pytorch_lightning/trainer/configuration_validator.py @@ -42,11 +42,11 @@ def verify_loop_configurations(trainer: "pl.Trainer") -> None: __verify_manual_optimization_support(trainer, model) __check_training_step_requires_dataloader_iter(model) elif trainer.state.fn == TrainerFn.VALIDATING: - __verify_eval_loop_configuration(trainer, model, "val") + __verify_eval_loop_configuration(model, "val") elif trainer.state.fn == TrainerFn.TESTING: - __verify_eval_loop_configuration(trainer, model, "test") + __verify_eval_loop_configuration(model, "test") elif trainer.state.fn == TrainerFn.PREDICTING: - __verify_eval_loop_configuration(trainer, model, "predict") + __verify_eval_loop_configuration(model, "predict") __verify_batch_transfer_support(trainer) # TODO: Delete this check in v2.0 @@ -82,12 +82,12 @@ def __verify_train_val_loop_configuration(trainer: "pl.Trainer", model: "pl.Ligh " `training_step()`, `train_dataloader()` and `configure_optimizers()` to be defined." ) - trainer.overridden_optimizer_step = is_overridden("optimizer_step", model) - trainer.overridden_optimizer_zero_grad = is_overridden("optimizer_zero_grad", model) + overridden_optimizer_step = is_overridden("optimizer_step", model) + overridden_optimizer_zero_grad = is_overridden("optimizer_zero_grad", model) automatic_optimization = model.automatic_optimization going_to_accumulate_grad_batches = trainer.accumulation_scheduler.going_to_accumulate_grad_batches() - has_overridden_optimization_functions = trainer.overridden_optimizer_step or trainer.overridden_optimizer_zero_grad + has_overridden_optimization_functions = overridden_optimizer_step or overridden_optimizer_zero_grad if has_overridden_optimization_functions and going_to_accumulate_grad_batches and automatic_optimization: rank_zero_warn( "When using `Trainer(accumulate_grad_batches != 1)` and overriding" @@ -111,7 +111,7 @@ def __verify_train_val_loop_configuration(trainer: "pl.Trainer", model: "pl.Ligh ) -def __verify_eval_loop_configuration(trainer: "pl.Trainer", model: "pl.LightningModule", stage: str) -> None: +def __verify_eval_loop_configuration(model: "pl.LightningModule", stage: str) -> None: step_name = "validation_step" if stage == "val" else f"{stage}_step" trainer_method = "validate" if stage == "val" else stage diff --git a/tests/tests_pytorch/core/test_lightning_module.py b/tests/tests_pytorch/core/test_lightning_module.py index 2b628bf61bd81..1a9a48afecd02 100644 --- a/tests/tests_pytorch/core/test_lightning_module.py +++ b/tests/tests_pytorch/core/test_lightning_module.py @@ -152,7 +152,6 @@ def optimizer_step( optimizer_idx, closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): if optimizer_idx == 0: @@ -216,7 +215,6 @@ def optimizer_step( optimizer_idx, closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): if optimizer_idx == 0: diff --git a/tests/tests_pytorch/models/test_hooks.py b/tests/tests_pytorch/models/test_hooks.py index eae53a7afc620..e15829ca62b2d 100644 --- a/tests/tests_pytorch/models/test_hooks.py +++ b/tests/tests_pytorch/models/test_hooks.py @@ -302,7 +302,6 @@ def _train_batch(self, *args, **kwargs): def _auto_train_batch( trainer, model, batches, device=torch.device("cpu"), current_epoch=0, current_batch=0, **kwargs ): - using_native_amp = kwargs.get("amp_backend") == "native" using_deepspeed = kwargs.get("strategy") == "deepspeed" out = [] for i in range(current_batch, batches): @@ -344,7 +343,7 @@ def _auto_train_batch( dict( name="optimizer_step", args=(current_epoch, i, ANY, 0, ANY), - kwargs=dict(on_tpu=False, using_lbfgs=False, using_native_amp=using_native_amp), + kwargs=dict(on_tpu=False, using_lbfgs=False), ), *( [dict(name="lr_scheduler_step", args=(ANY, 0, None))] From c4078f25d41b267145e8d5eb9808143b9ad7ac44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 13 Dec 2022 19:00:33 +0100 Subject: [PATCH 04/14] amp_backend and amp_level --- src/lightning_lite/connector.py | 4 +-- src/lightning_lite/plugins/__init__.py | 4 +-- .../plugins/precision/__init__.py | 4 +-- src/lightning_lite/plugins/precision/fsdp.py | 12 +++---- src/pytorch_lightning/plugins/__init__.py | 3 +- .../connectors/accelerator_connector.py | 35 ++++++++++++------- src/pytorch_lightning/trainer/trainer.py | 14 +++----- src/pytorch_lightning/utilities/__init__.py | 2 +- 8 files changed, 41 insertions(+), 37 deletions(-) diff --git a/src/lightning_lite/connector.py b/src/lightning_lite/connector.py index 59251fc1cd119..7a84ab441018c 100644 --- a/src/lightning_lite/connector.py +++ b/src/lightning_lite/connector.py @@ -26,7 +26,7 @@ from lightning_lite.plugins import ( CheckpointIO, DeepSpeedPrecision, - NativeMixedPrecision, + MixedPrecision, Precision, TPUBf16Precision, TPUPrecision, @@ -476,7 +476,7 @@ def _check_and_init_precision(self) -> Precision: if isinstance(self.strategy, FSDPStrategy): return FSDPPrecision(precision=self._precision_input, device=device) - return NativeMixedPrecision(precision=self._precision_input, device=device) + return MixedPrecision(precision=self._precision_input, device=device) raise RuntimeError("No precision set") diff --git a/src/lightning_lite/plugins/__init__.py b/src/lightning_lite/plugins/__init__.py index d0416e70f9747..e294df2cb2f59 100644 --- a/src/lightning_lite/plugins/__init__.py +++ b/src/lightning_lite/plugins/__init__.py @@ -18,7 +18,7 @@ from lightning_lite.plugins.precision.deepspeed import DeepSpeedPrecision from lightning_lite.plugins.precision.double import DoublePrecision from lightning_lite.plugins.precision.fsdp import FSDPPrecision -from lightning_lite.plugins.precision.native_amp import NativeMixedPrecision +from lightning_lite.plugins.precision.native_amp import MixedPrecision from lightning_lite.plugins.precision.precision import Precision from lightning_lite.plugins.precision.tpu import TPUPrecision from lightning_lite.plugins.precision.tpu_bf16 import TPUBf16Precision @@ -31,7 +31,7 @@ "Precision", "DeepSpeedPrecision", "DoublePrecision", - "NativeMixedPrecision", + "MixedPrecision", "TPUPrecision", "TPUBf16Precision", "FSDPPrecision", diff --git a/src/lightning_lite/plugins/precision/__init__.py b/src/lightning_lite/plugins/precision/__init__.py index c47ffeb3f9fc1..9cb3224a28955 100644 --- a/src/lightning_lite/plugins/precision/__init__.py +++ b/src/lightning_lite/plugins/precision/__init__.py @@ -14,7 +14,7 @@ from lightning_lite.plugins.precision.deepspeed import DeepSpeedPrecision from lightning_lite.plugins.precision.double import DoublePrecision from lightning_lite.plugins.precision.fsdp import FSDPPrecision -from lightning_lite.plugins.precision.native_amp import NativeMixedPrecision +from lightning_lite.plugins.precision.native_amp import MixedPrecision from lightning_lite.plugins.precision.precision import Precision from lightning_lite.plugins.precision.tpu import TPUPrecision from lightning_lite.plugins.precision.tpu_bf16 import TPUBf16Precision @@ -22,7 +22,7 @@ __all__ = [ "DeepSpeedPrecision", "DoublePrecision", - "NativeMixedPrecision", + "MixedPrecision", "Precision", "TPUPrecision", "TPUBf16Precision", diff --git a/src/lightning_lite/plugins/precision/fsdp.py b/src/lightning_lite/plugins/precision/fsdp.py index 020369bcbc4cf..baff0924a303b 100644 --- a/src/lightning_lite/plugins/precision/fsdp.py +++ b/src/lightning_lite/plugins/precision/fsdp.py @@ -16,16 +16,16 @@ import torch from typing_extensions import Literal -from lightning_lite.plugins.precision.native_amp import NativeMixedPrecision +from lightning_lite.plugins.precision.native_amp import MixedPrecision from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.imports import _TORCH_GREATER_EQUAL_1_12 if TYPE_CHECKING: - from torch.distributed.fsdp.fully_sharded_data_parallel import MixedPrecision + from torch.distributed.fsdp.fully_sharded_data_parallel import MixedPrecision as TorchMixedPrecision from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler -class FSDPPrecision(NativeMixedPrecision): +class FSDPPrecision(MixedPrecision): """AMP for Fully Sharded Data Parallel training.""" def __init__( @@ -43,8 +43,8 @@ def __init__( ) @property - def mixed_precision_config(self) -> "MixedPrecision": - from torch.distributed.fsdp.fully_sharded_data_parallel import MixedPrecision + def mixed_precision_config(self) -> "TorchMixedPrecision": + from torch.distributed.fsdp.fully_sharded_data_parallel import MixedPrecision as TorchMixedPrecision if self.precision == PrecisionType.HALF: dtype = torch.float16 @@ -52,7 +52,7 @@ def mixed_precision_config(self) -> "MixedPrecision": dtype = torch.bfloat16 else: raise ValueError(f"Was unable to infer precision type, received {self.precision!r}.") - return MixedPrecision( + return TorchMixedPrecision( param_dtype=dtype, reduce_dtype=dtype, buffer_dtype=dtype, diff --git a/src/pytorch_lightning/plugins/__init__.py b/src/pytorch_lightning/plugins/__init__.py index 29b2db639335a..c719050e8cbe4 100644 --- a/src/pytorch_lightning/plugins/__init__.py +++ b/src/pytorch_lightning/plugins/__init__.py @@ -12,7 +12,7 @@ from pytorch_lightning.plugins.precision.fully_sharded_native_amp import FullyShardedNativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.hpu import HPUPrecisionPlugin from pytorch_lightning.plugins.precision.ipu import IPUPrecisionPlugin -from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.native_amp import MixedPrecisionPlugin, NativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.tpu import TPUPrecisionPlugin @@ -34,6 +34,7 @@ "IPUPrecisionPlugin", "HPUPrecisionPlugin", "NativeMixedPrecisionPlugin", + "MixedPrecisionPlugin", "PrecisionPlugin", "ShardedNativeMixedPrecisionPlugin", "FullyShardedNativeMixedPrecisionPlugin", diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py index 746ef937358b2..15f11e52592b3 100644 --- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -79,10 +79,10 @@ ) from pytorch_lightning.strategies.ddp_spawn import _DDP_FORK_ALIASES from pytorch_lightning.tuner.auto_gpu_select import pick_multiple_gpus +from pytorch_lightning.utilities.enums import AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _HOROVOD_AVAILABLE, _IPU_AVAILABLE from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_info, rank_zero_warn -from src.pytorch_lightning.utilities.enums import AMPType log = logging.getLogger(__name__) @@ -101,7 +101,7 @@ def __init__( strategy: Optional[Union[str, Strategy]] = None, plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None, precision: Union[int, str] = 32, - amp_type: str = "native", + amp_type: Optional[str] = None, amp_level: Optional[str] = None, sync_batchnorm: bool = False, benchmark: Optional[bool] = None, @@ -179,12 +179,6 @@ def __init__( self._amp_level_flag: Optional[str] = amp_level self._auto_select_gpus: bool = auto_select_gpus - if amp_level is not None: - rank_zero_deprecation( - "Setting `amp_level` inside the `Trainer` is deprecated in v1.8.0 and will be removed" - " in v1.10.0. Please set it inside the specific precision plugin and pass it to the `Trainer`." - ) - self._check_config_and_set_final_flags( strategy=strategy, accelerator=accelerator, @@ -244,7 +238,7 @@ def _check_config_and_set_final_flags( accelerator: Optional[Union[str, Accelerator]], precision: Union[int, str], plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]], - amp_type: str, + amp_type: Optional[str], amp_level: Optional[str], sync_batchnorm: bool, ) -> None: @@ -381,13 +375,28 @@ def _check_config_and_set_final_flags( self._accelerator_flag = "cuda" self._parallel_devices = self._strategy_flag.parallel_devices - amp_type = amp_type if isinstance(amp_type, str) else None + if amp_type is not None: + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + " PyTorch Lightning has been deprecated in v1.9.0 and will be removed in v1.10.0." + f" The `Trainer(amp_backend={amp_type!r})` argument is deprecated. Removing this argument will avoid" + f" this message, it will select PyTorch's implementation automatically." + ) + else: + amp_type = "native" self._amp_type_flag = AMPType.from_str(amp_type).value - if amp_level is not None and self._amp_type_flag != "apex": - raise MisconfigurationException( - f"You have asked for `amp_level={amp_level!r}` but it's only supported with `amp_backend='apex'`." + if amp_level is not None: + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + " PyTorch Lightning has been deprecated in v1.9.0 and will be removed in v1.10.0." + f" The `Trainer(amp_level={amp_level!r})` argument is deprecated. Removing this argument will avoid" + f" this message." ) + if self._amp_type_flag != "apex": + raise MisconfigurationException( + f"You have asked for `amp_level={amp_level!r}` but it's only supported with `amp_backend='apex'`." + ) def _check_device_config_and_set_final_flags( self, diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index 64e1e7c64c89c..9c87f21384ea1 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -58,12 +58,7 @@ from pytorch_lightning.loops.dataloader.evaluation_loop import EvaluationLoop from pytorch_lightning.loops.fit_loop import FitLoop from pytorch_lightning.loops.utilities import _parse_loop_limits, _reset_progress -from pytorch_lightning.plugins import ( - ApexMixedPrecisionPlugin, - NativeMixedPrecisionPlugin, - PLUGIN_INPUT, - PrecisionPlugin, -) +from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, MixedPrecisionPlugin, PLUGIN_INPUT, PrecisionPlugin from pytorch_lightning.profilers import Profiler from pytorch_lightning.strategies import ( DDPFullyShardedNativeStrategy, @@ -164,8 +159,7 @@ def __init__( detect_anomaly: bool = False, auto_scale_batch_size: Union[str, bool] = False, plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None, - # FIXME: deprecate these - amp_backend: str = "native", # TODO: Remove in 1.10 + amp_backend: Optional[str] = None, # TODO: Remove in 1.10 amp_level: Optional[str] = None, # # TODO: Remove in 1.10 move_metrics_to_cpu: bool = False, multiple_trainloader_mode: str = "max_size_cycle", @@ -194,7 +188,7 @@ def __init__( .. deprecated:: v1.8 Setting ``amp_level`` inside the ``Trainer`` is deprecated in v1.8.0 and will be removed - in v1.10.0. Please set it inside the specific precision plugin and pass it to the ``Trainer``. + in v1.10.0. auto_lr_find: If set to True, will make trainer.tune() run a learning rate finder, trying to optimize initial learning for faster convergence. trainer.tune() method will @@ -1780,7 +1774,7 @@ def optimizer_frequencies(self, new_freqs: List[int]) -> None: def amp_backend(self) -> Optional[str]: if isinstance(self.precision_plugin, ApexMixedPrecisionPlugin): return "apex" - if isinstance(self.precision_plugin, NativeMixedPrecisionPlugin): + if isinstance(self.precision_plugin, MixedPrecisionPlugin): return "native" return None diff --git a/src/pytorch_lightning/utilities/__init__.py b/src/pytorch_lightning/utilities/__init__.py index 6b329f27aaf28..27107bc8b81f8 100644 --- a/src/pytorch_lightning/utilities/__init__.py +++ b/src/pytorch_lightning/utilities/__init__.py @@ -18,6 +18,7 @@ from lightning_lite.utilities import LightningEnum # noqa: F401 from lightning_lite.utilities import move_data_to_device # noqa: F401 from pytorch_lightning.utilities.distributed import AllGatherGrad # noqa: F401 +from pytorch_lightning.utilities.enums import AMPType # noqa: F401 from pytorch_lightning.utilities.enums import GradClipAlgorithmType # noqa: F401 from pytorch_lightning.utilities.grads import grad_norm # noqa: F401 from pytorch_lightning.utilities.imports import ( # noqa: F401 @@ -38,7 +39,6 @@ rank_zero_only, rank_zero_warn, ) -from pytorch_lightning.utiltiies.enums import AMPType # noqa: F401 FLOAT16_EPSILON = numpy.finfo(numpy.float16).eps FLOAT32_EPSILON = numpy.finfo(numpy.float32).eps From 15ed1ce3c6e1a68da77babd1c5c663465ced1ddf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 13 Dec 2022 19:34:46 +0100 Subject: [PATCH 05/14] Fix tests --- .../plugins/precision/apex_amp.py | 6 +- .../plugins/precision/deepspeed.py | 4 +- .../connectors/accelerator_connector.py | 8 +- .../precision/test_deepspeed_precision.py | 4 +- .../plugins/precision/test_native_amp.py | 6 +- .../tests_pytorch/plugins/test_amp_plugins.py | 24 ++--- .../optimization/test_manual_optimization.py | 87 +++++++++++-------- 7 files changed, 79 insertions(+), 60 deletions(-) diff --git a/src/pytorch_lightning/plugins/precision/apex_amp.py b/src/pytorch_lightning/plugins/precision/apex_amp.py index 951ec588a7d9f..c8982197ee286 100644 --- a/src/pytorch_lightning/plugins/precision/apex_amp.py +++ b/src/pytorch_lightning/plugins/precision/apex_amp.py @@ -32,10 +32,10 @@ @functools.lru_cache(maxsize=1) def _import_amp_without_deprecation() -> ModuleType: # hide the warning upstream in favor of our deprecation - with warnings.filterwarnings(action="ignore", message="apex.amp is deprecated", category=FutureWarning): - from apex import amp + warnings.filterwarnings(action="ignore", message="apex.amp is deprecated", category=FutureWarning) + from apex import amp - return amp + return amp # TODO: remove in v1.10.0 diff --git a/src/pytorch_lightning/plugins/precision/deepspeed.py b/src/pytorch_lightning/plugins/precision/deepspeed.py index 3d74ef4e7eef5..b606eaede6f24 100644 --- a/src/pytorch_lightning/plugins/precision/deepspeed.py +++ b/src/pytorch_lightning/plugins/precision/deepspeed.py @@ -66,8 +66,8 @@ def __init__(self, precision: Union[str, int], amp_type: str = "native", amp_lev f"`{type(self).__name__}(amp_level={amp_level!r})` is only relevant when using NVIDIA/apex" ) rank_zero_deprecation( - f"Passing `{type(self).__name__}(amp_type=...)` been deprecated in v1.9.0 and will be removed in" - " v1.10.0. This argument is no longer necessary." + f"Passing `{type(self).__name__}(amp_type={amp_type!r})` been deprecated in v1.9.0 and will be removed" + f" in v1.10.0. This argument is no longer necessary." ) supported_precision = (PrecisionType.HALF, PrecisionType.FLOAT, PrecisionType.BFLOAT) diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py index 15f11e52592b3..5c8094c69185b 100644 --- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -48,7 +48,7 @@ FullyShardedNativeMixedPrecisionPlugin, HPUPrecisionPlugin, IPUPrecisionPlugin, - NativeMixedPrecisionPlugin, + MixedPrecisionPlugin, PLUGIN_INPUT, PrecisionPlugin, ShardedNativeMixedPrecisionPlugin, @@ -717,7 +717,7 @@ def _check_and_init_precision(self) -> PrecisionPlugin: if self._precision_flag in (16, "bf16"): rank_zero_info( - f"Using 16bit {self._amp_type_flag.value} Automatic Mixed Precision (AMP)" # type: ignore + f"Using 16bit {self._amp_type_flag} Automatic Mixed Precision (AMP)" # type: ignore if self._precision_flag == 16 else "Using bfloat16 Automatic Mixed Precision (AMP)" ) @@ -731,7 +731,7 @@ def _check_and_init_precision(self) -> PrecisionPlugin: return FullyShardedNativeNativeMixedPrecisionPlugin(self._precision_flag, device) if isinstance(self.strategy, DDPFullyShardedStrategy): return FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device) - return NativeMixedPrecisionPlugin(self._precision_flag, device) + return MixedPrecisionPlugin(self._precision_flag, device) if self._amp_type_flag == "apex": self._amp_level_flag = self._amp_level_flag or "O2" @@ -771,7 +771,7 @@ def _validate_precision_choice(self) -> None: ) if self._precision_flag == "bf16" and self._amp_type_flag != "native": raise MisconfigurationException( - f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but " # type: ignore + f"You passed `Trainer(amp_type={self._amp_type_flag!r}, precision='bf16')` but " # type: ignore "it's not supported. Try using `amp_type='native'` instead." ) if self._precision_flag in (16, "bf16") and self._amp_type_flag == "apex": diff --git a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py index 85304fafe6efe..8ffecb817f6c5 100644 --- a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py +++ b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py @@ -20,7 +20,9 @@ def test_invalid_precision_with_deepspeed_precision(): - with pytest.raises(ValueError, match="is not supported. `precision` must be one of"): + with pytest.deprecated_call(match=r"amp_type='native'\)` been deprecated in v1.9.0"), pytest.raises( + ValueError, match="is not supported. `precision` must be one of" + ): DeepSpeedPrecisionPlugin(precision=64, amp_type="native") diff --git a/tests/tests_pytorch/plugins/precision/test_native_amp.py b/tests/tests_pytorch/plugins/precision/test_native_amp.py index c848dcc351ac7..e37a9de45ebcf 100644 --- a/tests/tests_pytorch/plugins/precision/test_native_amp.py +++ b/tests/tests_pytorch/plugins/precision/test_native_amp.py @@ -16,14 +16,14 @@ import pytest from torch.optim import Optimizer -from pytorch_lightning.plugins import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins import MixedPrecisionPlugin from pytorch_lightning.utilities import GradClipAlgorithmType def test_clip_gradients(): """Test that `.clip_gradients()` is a no-op when clipping is disabled.""" optimizer = Mock(spec=Optimizer) - precision = NativeMixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock()) + precision = MixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock()) precision.clip_grad_by_value = Mock() precision.clip_grad_by_norm = Mock() precision.clip_gradients(optimizer) @@ -47,7 +47,7 @@ def test_optimizer_amp_scaling_support_in_step_method(): gradient clipping (example: fused Adam).""" optimizer = Mock(_step_supports_amp_scaling=True) - precision = NativeMixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock()) + precision = MixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock()) with pytest.raises(RuntimeError, match="The current optimizer.*does not allow for gradient clipping"): precision.clip_gradients(optimizer, clip_val=1.0) diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py index d101796a7d024..06941161560a5 100644 --- a/tests/tests_pytorch/plugins/test_amp_plugins.py +++ b/tests/tests_pytorch/plugins/test_amp_plugins.py @@ -20,13 +20,13 @@ from pytorch_lightning import Trainer from pytorch_lightning.demos.boring_classes import BoringModel -from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin +from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, MixedPrecisionPlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests_pytorch.conftest import mock_cuda_count from tests_pytorch.helpers.runif import RunIf -class MyNativeAMP(NativeMixedPrecisionPlugin): +class MyNativeAMP(MixedPrecisionPlugin): pass @@ -52,7 +52,7 @@ class MyApexPlugin(ApexMixedPrecisionPlugin): @pytest.mark.parametrize( "amp,custom_plugin,plugin_cls", [ - ("native", False, NativeMixedPrecisionPlugin), + ("native", False, MixedPrecisionPlugin), ("native", True, MyNativeAMP), pytest.param("apex", False, ApexMixedPrecisionPlugin, marks=RunIf(amp_apex=True)), pytest.param("apex", True, MyApexPlugin, marks=RunIf(amp_apex=True)), @@ -189,9 +189,7 @@ def configure_optimizers(self): torch.optim.SGD(self.layer2.parameters(), lr=0.1), ] - trainer = Trainer( - default_root_dir=tmpdir, accelerator="gpu", devices=1, fast_dev_run=1, amp_backend="native", precision=16 - ) + trainer = Trainer(default_root_dir=tmpdir, accelerator="gpu", devices=1, fast_dev_run=1, precision=16) model = CustomBoringModel() trainer.fit(model) @@ -246,7 +244,7 @@ def test_amp_apex_ddp_spawn_fit(amp_level, tmpdir): def test_cpu_amp_precision_context_manager(tmpdir): """Test to ensure that the context manager correctly is set to CPU + bfloat16.""" - plugin = NativeMixedPrecisionPlugin("bf16", "cpu") + plugin = MixedPrecisionPlugin("bf16", "cpu") assert plugin.device == "cpu" assert plugin.scaler is None context_manager = plugin.autocast_context_manager() @@ -256,16 +254,20 @@ def test_cpu_amp_precision_context_manager(tmpdir): def test_precision_selection_raises(monkeypatch): - with pytest.raises( + with pytest.deprecated_call(match=r"amp_backend='apex'\)` argument is deprecated"), pytest.raises( MisconfigurationException, match=r"precision=16, amp_type='apex'\)` but apex AMP not supported on CPU" ): Trainer(amp_backend="apex", precision=16) - with pytest.raises(MisconfigurationException, match=r"amp_type='apex', precision='bf16'\)` but it's not supported"): + with pytest.deprecated_call(match=r"amp_backend='apex'\)` argument is deprecated"), pytest.raises( + MisconfigurationException, match=r"amp_type='apex', precision='bf16'\)` but it's not supported" + ): Trainer(amp_backend="apex", precision="bf16") mock_cuda_count(monkeypatch, 1) - with pytest.raises(MisconfigurationException, match="Sharded plugins are not supported with apex"): + with pytest.deprecated_call(match=r"amp_backend='apex'\)` argument is deprecated"), pytest.raises( + MisconfigurationException, match="Sharded plugins are not supported with apex" + ): with mock.patch("lightning_lite.accelerators.cuda.is_cuda_available", return_value=True): Trainer(amp_backend="apex", precision=16, accelerator="gpu", devices=1, strategy="ddp_fully_sharded") @@ -274,5 +276,5 @@ def test_precision_selection_raises(monkeypatch): monkeypatch.setattr(apex, "_APEX_AVAILABLE", False) with mock.patch("lightning_lite.accelerators.cuda.is_cuda_available", return_value=True), pytest.raises( MisconfigurationException, match="asked for Apex AMP but `apex` is not installed" - ), pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + ), pytest.deprecated_call(match=r"amp_backend='apex'\)` argument is deprecated"): Trainer(amp_backend="apex", precision=16, accelerator="gpu", devices=1) diff --git a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py index e3547dad94f9f..845c849f82761 100644 --- a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py +++ b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py @@ -65,19 +65,51 @@ def configure_optimizers(self): @pytest.mark.parametrize( - "kwargs", - [ - {}, - pytest.param( - {"accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "native"}, marks=RunIf(min_cuda_gpus=1) - ), - pytest.param( - {"accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "apex"}, - marks=RunIf(min_cuda_gpus=1, amp_apex=True), - ), - ], + "kwargs", [{}, pytest.param({"accelerator": "gpu", "devices": 1, "precision": 16}, marks=RunIf(min_cuda_gpus=1))] ) def test_multiple_optimizers_manual_no_return(tmpdir, kwargs): + class TestModel(ManualOptModel): + def training_step(self, batch, batch_idx): + # avoid returning a value + super().training_step(batch, batch_idx) + + def training_epoch_end(self, outputs): + # outputs is empty as training_step does not return + # and it is not automatic optimization + assert not outputs + + model = TestModel() + model.val_dataloader = None + + limit_train_batches = 2 + trainer = Trainer( + default_root_dir=tmpdir, + limit_train_batches=limit_train_batches, + limit_val_batches=2, + max_epochs=1, + log_every_n_steps=1, + enable_model_summary=False, + **kwargs, + ) + + if kwargs.get("precision") == 16: + # mock the scaler instead of the optimizer step because it can be skipped with NaNs + scaler_step_patch = mock.patch.object( + trainer.precision_plugin.scaler, "step", wraps=trainer.precision_plugin.scaler.step + ) + scaler_step = scaler_step_patch.start() + + with mock.patch.object(Strategy, "backward", wraps=trainer.strategy.backward) as bwd_mock: + trainer.fit(model) + assert bwd_mock.call_count == limit_train_batches * 3 + + if kwargs.get("precision") == 16: + scaler_step_patch.stop() + assert scaler_step.call_count == len(model.optimizers()) * limit_train_batches + + +@RunIf(min_cuda_gpus=1, amp_apex=True) +def test_multiple_optimizers_manual_no_return_apex(tmpdir): apex_optimizer_patches = [] apex_optimizer_steps = [] @@ -92,8 +124,6 @@ def training_epoch_end(self, outputs): assert not outputs def on_train_start(self): - if kwargs.get("amp_backend") != "apex": - return # extremely ugly. APEX patches all the native torch optimizers on `_initialize` which we call on # `ApexMixedPrecisionPlugin.dispatch`. Additionally, their replacement `new_step` functions are locally # defined so can't even patch those, thus we need to create the mock after APEX has been initialized @@ -106,19 +136,15 @@ def on_train_start(self): apex_optimizer_steps.append(patch.start()) def on_train_end(self): - if kwargs.get("amp_backend") == "apex": - for p in apex_optimizer_patches: - p.stop() + for p in apex_optimizer_patches: + p.stop() model = TestModel() model.val_dataloader = None limit_train_batches = 2 - plugins = [] - if kwargs.get("amp_backend") == "apex": - with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): - apex_plugin = ApexMixedPrecisionPlugin(amp_level="O2") - plugins.append(apex_plugin) + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + plugins = [ApexMixedPrecisionPlugin(amp_level="O2")] trainer = Trainer( default_root_dir=tmpdir, @@ -128,25 +154,16 @@ def on_train_end(self): log_every_n_steps=1, enable_model_summary=False, plugins=plugins, - **kwargs, + accelerator="gpu", + devices=1, + precision=16, ) - if kwargs.get("amp_backend") == "native": - # mock the scaler instead of the optimizer step because it can be skipped with NaNs - scaler_step_patch = mock.patch.object( - trainer.precision_plugin.scaler, "step", wraps=trainer.precision_plugin.scaler.step - ) - scaler_step = scaler_step_patch.start() - with mock.patch.object(Strategy, "backward", wraps=trainer.strategy.backward) as bwd_mock: trainer.fit(model) assert bwd_mock.call_count == limit_train_batches * 3 - if kwargs.get("amp_backend") == "native": - scaler_step_patch.stop() - assert scaler_step.call_count == len(model.optimizers()) * limit_train_batches - if kwargs.get("amp_backend") == "apex": - assert [s.call_count for s in apex_optimizer_steps] == [len(model.optimizers())] * limit_train_batches + assert [s.call_count for s in apex_optimizer_steps] == [len(model.optimizers())] * limit_train_batches def test_multiple_optimizers_manual_return(tmpdir): @@ -396,7 +413,6 @@ def on_train_epoch_end(self, *_, **__): limit_test_batches=0, limit_val_batches=0, precision=16, - amp_backend="native", accelerator="gpu", devices=1, ) @@ -480,7 +496,6 @@ def log_grad_norm(self, grad_norm_dict): log_every_n_steps=1, enable_model_summary=False, precision=16, - amp_backend="native", accelerator="gpu", devices=1, track_grad_norm=2, From ea40f56be54a5e39ee1ddc0cf97dfdbae4c15d61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Mon, 19 Dec 2022 17:22:16 +0100 Subject: [PATCH 06/14] Update docs --- .../accelerators/gpu_intermediate.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/source-pytorch/accelerators/gpu_intermediate.rst b/docs/source-pytorch/accelerators/gpu_intermediate.rst index 959a67ed4a555..9e2e7a4071ce0 100644 --- a/docs/source-pytorch/accelerators/gpu_intermediate.rst +++ b/docs/source-pytorch/accelerators/gpu_intermediate.rst @@ -472,19 +472,23 @@ Distributed and 16-bit precision Below are the possible configurations we support. +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ -| 1 GPU | 1+ GPUs | DP | DDP | 16-bit | command | +| 1 GPU | 1+ GPUs | DDP | DP | 16-bit | command | +=======+=========+=====+=====+========+=======================================================================+ | Y | | | | | `Trainer(accelerator="gpu", devices=1)` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ | Y | | | | Y | `Trainer(accelerator="gpu", devices=1, precision=16)` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ -| | Y | Y | | | `Trainer(accelerator="gpu", devices=k, strategy='dp')` | +| | Y | Y | | | `Trainer(accelerator="gpu", devices=k, strategy='ddp')` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ -| | Y | | Y | | `Trainer(accelerator="gpu", devices=k, strategy='ddp')` | +| | Y | Y | | Y | `Trainer(accelerator="gpu", devices=k, strategy='ddp', precision=16)` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ -| | Y | | Y | Y | `Trainer(accelerator="gpu", devices=k, strategy='ddp', precision=16)` | +| | Y | | Y | | `Trainer(accelerator="gpu", devices=k, strategy='dp')` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ -# FIXME(carlos): check native amp and DP +| | Y | | Y | Y | `Trainer(accelerator="gpu", devices=k, strategy='dp', precision=16)` | ++-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ + +DDP and DP can also be used with 1 GPU, but there's no reason to do so other than debugging distributed-related issues. + Implement Your Own Distributed (DDP) training ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 457fa8ea3b4f95399c4737a398802cb9b52590b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 20 Dec 2022 00:36:00 +0100 Subject: [PATCH 07/14] Deprecation tests --- src/pytorch_lightning/lite/lite.py | 4 +- .../loops/optimization/optimizer_loop.py | 2 +- .../plugins/precision/deepspeed.py | 14 +++-- .../connectors/accelerator_connector.py | 3 +- src/pytorch_lightning/utilities/enums.py | 10 +--- .../deprecated_api/test_remove_1-10.py | 52 +++++++++++++++++-- 6 files changed, 62 insertions(+), 23 deletions(-) diff --git a/src/pytorch_lightning/lite/lite.py b/src/pytorch_lightning/lite/lite.py index 533bd84508d48..9fc610c911393 100644 --- a/src/pytorch_lightning/lite/lite.py +++ b/src/pytorch_lightning/lite/lite.py @@ -23,7 +23,7 @@ from lightning_lite.plugins import CheckpointIO, ClusterEnvironment from lightning_lite.plugins import DeepSpeedPrecision as LiteDeepSpeedPrecision from lightning_lite.plugins import DoublePrecision as LiteDoublePrecision -from lightning_lite.plugins import NativeMixedPrecision as LiteNativeMixedPrecision +from lightning_lite.plugins import MixedPrecision as LiteMixedPrecision from lightning_lite.plugins import Precision as LitePrecision from lightning_lite.plugins import TPUBf16Precision as LiteTPUBf16Precision from lightning_lite.plugins import TPUPrecision as LiteTPUPrecision @@ -285,7 +285,7 @@ def _to_lite_precision(plugin: Optional[PLPrecisionPlugin]) -> LitePrecision: return LitePrecision() if type(plugin) is PLNativeMixedPrecisionPlugin: - return LiteNativeMixedPrecision( + return LiteMixedPrecision( precision=plugin.precision, device=plugin.device, scaler=plugin.scaler # type: ignore[arg-type] ) diff --git a/src/pytorch_lightning/loops/optimization/optimizer_loop.py b/src/pytorch_lightning/loops/optimization/optimizer_loop.py index 5626631e449c1..adbdd1b249d47 100644 --- a/src/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/src/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -362,7 +362,7 @@ def _optimizer_step( "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" " PyTorch Lightning has been deprecated in v1.9.0 and will be removed in v1.10.0." f" The `{type(pl_module).__name__}.optimizer_step()` hook is overridden, including the" - " `using_native_amp` argument. Removing this argument will avoid this message, you can expect it to " + " `using_native_amp` argument. Removing this argument will avoid this message, you can expect it to" " return True." ) kwargs["using_native_amp"] = self.trainer.amp_backend == "native" diff --git a/src/pytorch_lightning/plugins/precision/deepspeed.py b/src/pytorch_lightning/plugins/precision/deepspeed.py index b606eaede6f24..8cafcd20af169 100644 --- a/src/pytorch_lightning/plugins/precision/deepspeed.py +++ b/src/pytorch_lightning/plugins/precision/deepspeed.py @@ -45,7 +45,9 @@ class DeepSpeedPrecisionPlugin(PrecisionPlugin): If unsupported ``precision`` is provided. """ - def __init__(self, precision: Union[str, int], amp_type: str = "native", amp_level: Optional[str] = None) -> None: + def __init__( + self, precision: Union[str, int], amp_type: Optional[str] = None, amp_level: Optional[str] = None + ) -> None: if amp_type == "apex": # TODO: remove in v1.10.0 rank_zero_deprecation( @@ -60,11 +62,13 @@ def __init__(self, precision: Union[str, int], amp_type: str = "native", amp_lev ) amp_level = amp_level or "O2" + elif amp_level is not None: + raise ValueError( + f"`{type(self).__name__}(amp_level={amp_level!r})` is only relevant when using NVIDIA/apex" + ) + if amp_type is None: + amp_type = "native" else: - if amp_level is not None: - raise ValueError( - f"`{type(self).__name__}(amp_level={amp_level!r})` is only relevant when using NVIDIA/apex" - ) rank_zero_deprecation( f"Passing `{type(self).__name__}(amp_type={amp_type!r})` been deprecated in v1.9.0 and will be removed" f" in v1.10.0. This argument is no longer necessary." diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py index 5c8094c69185b..41120a20d4d67 100644 --- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -79,7 +79,6 @@ ) from pytorch_lightning.strategies.ddp_spawn import _DDP_FORK_ALIASES from pytorch_lightning.tuner.auto_gpu_select import pick_multiple_gpus -from pytorch_lightning.utilities.enums import AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _HOROVOD_AVAILABLE, _IPU_AVAILABLE from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_info, rank_zero_warn @@ -384,7 +383,7 @@ def _check_config_and_set_final_flags( ) else: amp_type = "native" - self._amp_type_flag = AMPType.from_str(amp_type).value + self._amp_type_flag = amp_type.lower() if amp_level is not None: rank_zero_deprecation( diff --git a/src/pytorch_lightning/utilities/enums.py b/src/pytorch_lightning/utilities/enums.py index 7de22c84d8a93..832b2dc086b0a 100644 --- a/src/pytorch_lightning/utilities/enums.py +++ b/src/pytorch_lightning/utilities/enums.py @@ -48,13 +48,7 @@ def __call__(cls, *args: Any, **kwargs: Any) -> Any: return obj -class _DeprecatedEnum(LightningEnum, metaclass=_DeprecatedEnumMeta): - """_DeprecatedEnum calls an enum's `deprecate()` method on member access.""" - - pass - - -class AMPType(LightningEnum): +class AMPType(LightningEnum, metaclass=_DeprecatedEnumMeta): """Type of Automatic Mixed Precision used for training.""" APEX = "apex" @@ -62,7 +56,7 @@ class AMPType(LightningEnum): def deprecate(self) -> None: rank_zero_deprecation( - f"`{type(self).__name__}` enum has been deprecated in v1.9.0 and will be removed in v1.10.0." + f"The `{type(self).__name__}` enum has been deprecated in v1.9.0 and will be removed in v1.10.0." f" Use the string value `{self.value!r}` instead." ) diff --git a/tests/tests_pytorch/deprecated_api/test_remove_1-10.py b/tests/tests_pytorch/deprecated_api/test_remove_1-10.py index 72a03957f3a28..727f2dd3916f8 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_1-10.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_1-10.py @@ -33,6 +33,7 @@ from pytorch_lightning.overrides import LightningDistributedModule, LightningParallelModule from pytorch_lightning.overrides.base import unwrap_lightning_module from pytorch_lightning.overrides.fairscale import LightningShardedDataParallel, unwrap_lightning_module_sharded +from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, DeepSpeedPrecisionPlugin, NativeMixedPrecisionPlugin from pytorch_lightning.plugins.environments import LightningEnvironment from pytorch_lightning.strategies.bagua import LightningBaguaModule from pytorch_lightning.strategies.utils import on_colab_kaggle @@ -67,17 +68,13 @@ sync_ddp_if_available, tpu_distributed, ) +from pytorch_lightning.utilities.enums import AMPType from pytorch_lightning.utilities.optimizer import optimizer_to_device, optimizers_to_device from pytorch_lightning.utilities.seed import pl_worker_init_function, reset_seed, seed_everything from pytorch_lightning.utilities.xla_device import inner_f, pl_multi_process, XLADeviceUtils from tests_pytorch.helpers.runif import RunIf -def test_deprecated_amp_level(): - with pytest.deprecated_call(match="Setting `amp_level` inside the `Trainer` is deprecated in v1.8.0"): - Trainer(amp_level="O3", amp_backend="apex") - - @pytest.mark.parametrize( "wrapper_class", [ @@ -356,3 +353,48 @@ def test_profiler_classes_deprecated_warning(cls): f" Use .*profilers.{cls.__name__}` class instead." ): cls() + + +def test_apex_deprecation_warnings(): + class MyModel(BoringModel): + def optimizer_step( + self, + epoch, + batch_idx, + optimizer, + optimizer_idx=0, + optimizer_closure=None, + on_tpu=False, + using_native_amp=False, + **kwargs, + ): + return optimizer_closure() + + model = MyModel() + trainer = Trainer(fast_dev_run=True) + with pytest.deprecated_call(match="including the `using_native_amp` argument"): + trainer.fit(model) + + with pytest.deprecated_call(match="ApexMixedPrecisionPlugin` class will be removed in v1.10"): + ApexMixedPrecisionPlugin() + + with pytest.deprecated_call(match="NativeMixedPrecisionPlugin` class has been renamed in v1.9"): + NativeMixedPrecisionPlugin(16, "cpu") + + with pytest.deprecated_call(match="Support for.*DeepSpeed implementation will be removed in v1.10.0"): + DeepSpeedPrecisionPlugin(16, amp_type="apex") + + with pytest.deprecated_call(match=r"amp_type='native'\)` been deprecated in v1.9"): + DeepSpeedPrecisionPlugin(16, amp_type="native") + + with pytest.raises(ValueError, match=r"amp_level='O2'\)` is only relevant when using NVIDIA/apex"): + DeepSpeedPrecisionPlugin(16, amp_level="O2") + + with pytest.deprecated_call(match=r"Trainer\(amp_backend='apex'\)` argument is deprecated"): + Trainer(amp_backend="apex") + + with pytest.deprecated_call(match=r"Trainer\(amp_level='O2'\)` argument is deprecated"): + Trainer(amp_backend="apex", amp_level="O2") + + with pytest.deprecated_call(match="AMPType` enum has been deprecated in v1.9"): + AMPType.APEX From 82095db88f5a18b85943989414dda788de4859ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 20 Dec 2022 00:49:46 +0100 Subject: [PATCH 08/14] Fix plugin tests --- .../connectors/accelerator_connector.py | 12 ++++----- src/pytorch_lightning/trainer/trainer.py | 4 +-- .../tests_pytorch/plugins/test_amp_plugins.py | 25 +++++++++++-------- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py index 41120a20d4d67..02ebb06ef31e0 100644 --- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -28,7 +28,7 @@ SLURMEnvironment, TorchElasticEnvironment, ) -from lightning_lite.utilities import _StrategyType, LightningEnum +from lightning_lite.utilities import _StrategyType from lightning_lite.utilities.device_parser import _determine_root_gpu_device from lightning_lite.utilities.imports import _IS_INTERACTIVE, _TORCH_GREATER_EQUAL_1_11 from pytorch_lightning.accelerators import AcceleratorRegistry @@ -174,8 +174,8 @@ def __init__( self._parallel_devices: List[Union[int, torch.device, str]] = [] self._layer_sync: Optional[LayerSync] = NativeSyncBatchNorm() if sync_batchnorm else None self.checkpoint_io: Optional[CheckpointIO] = None - self._amp_type_flag: Optional[LightningEnum] = None - self._amp_level_flag: Optional[str] = amp_level + self._amp_type_flag: Optional[str] = None # TODO: Remove in v1.10.0 + self._amp_level_flag: Optional[str] = amp_level # TODO: Remove in v1.10.0 self._auto_select_gpus: bool = auto_select_gpus self._check_config_and_set_final_flags( @@ -382,8 +382,8 @@ def _check_config_and_set_final_flags( f" this message, it will select PyTorch's implementation automatically." ) else: - amp_type = "native" - self._amp_type_flag = amp_type.lower() + amp_type = None + self._amp_type_flag = amp_type if amp_level is not None: rank_zero_deprecation( @@ -721,7 +721,7 @@ def _check_and_init_precision(self) -> PrecisionPlugin: else "Using bfloat16 Automatic Mixed Precision (AMP)" ) - if self._amp_type_flag == "native": + if self._amp_type_flag in (None, "native"): device = "cpu" if self._accelerator_flag == "cpu" else "cuda" if isinstance(self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy)): diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index 9c87f21384ea1..8cf18f4835652 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -159,8 +159,8 @@ def __init__( detect_anomaly: bool = False, auto_scale_batch_size: Union[str, bool] = False, plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None, - amp_backend: Optional[str] = None, # TODO: Remove in 1.10 - amp_level: Optional[str] = None, # # TODO: Remove in 1.10 + amp_backend: Optional[str] = None, # TODO: Remove in v1.10.0 + amp_level: Optional[str] = None, # TODO: Remove in v1.10.0 move_metrics_to_cpu: bool = False, multiple_trainloader_mode: str = "max_size_cycle", inference_mode: bool = True, diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py index 06941161560a5..7b08fc884c07a 100644 --- a/tests/tests_pytorch/plugins/test_amp_plugins.py +++ b/tests/tests_pytorch/plugins/test_amp_plugins.py @@ -61,16 +61,21 @@ class MyApexPlugin(ApexMixedPrecisionPlugin): def test_amp_apex_ddp(cuda_count_2, strategy, devices, amp, custom_plugin, plugin_cls): plugin = None if custom_plugin: - plugin = plugin_cls(16, "cpu") if amp == "native" else plugin_cls() - trainer = Trainer( - fast_dev_run=True, - precision=16, - amp_backend=amp, - accelerator="gpu", - devices=devices, - strategy=strategy, - plugins=plugin, - ) + if amp == "native": + plugin = plugin_cls(16, "cpu") + else: + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + plugin = plugin_cls() + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + fast_dev_run=True, + precision=16, + amp_backend=amp, + accelerator="gpu", + devices=devices, + strategy=strategy, + plugins=plugin, + ) assert isinstance(trainer.precision_plugin, plugin_cls) From bdb77b0ac9fbe0a9b5d368dd6986fb1a0866db61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 20 Dec 2022 00:57:20 +0100 Subject: [PATCH 09/14] CHANGELOG --- src/pytorch_lightning/CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 5f8289a76c35c..929568c784b40 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -72,6 +72,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `pytorch_lightning.profiler` in favor of `pytorch_lightning.profilers` ([#16059](https://github.com/PyTorchLightning/pytorch-lightning/pull/16059)) +- `nvidia/apex` deprecation ([#16039](https://github.com/PyTorchLightning/pytorch-lightning/pull/16039)) + * Deprecated `pytorch_lightning.plugins.NativeMixedPrecisionPlugin` in favor of `pytorch_lightning.plugins.MixedPrecisionPlugin` + * Deprecated the `LightningModule.optimizer_step(using_native_amp=...)` argument. + * Deprecated the `Trainer(amp_backend=...)` argument. + * Deprecated the `Trainer(amp_level=...)` argument. + * Deprecated the `pytorch_lightning.plugins.ApexMixedPrecisionPlugin` class. + * Deprecates the `pytorch_lightning.utilities.enum.sAMPType` enum. + * Deprecates the `DeepSpeedPrecisionPlugin(amp_type=..., amp_level=...)` arguments. + + ### Removed - Removed deprecated `pytorch_lightning.utilities.memory.get_gpu_memory_map` in favor of `pytorch_lightning.accelerators.cuda.get_nvidia_gpu_stats` ([#15617](https://github.com/Lightning-AI/lightning/pull/15617)) From 135a73474dbcd3a3f0d1f8b550eea572b63f48ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 20 Dec 2022 01:06:42 +0100 Subject: [PATCH 10/14] Fixes --- src/lightning_lite/connector.py | 2 +- src/pytorch_lightning/lite/lite.py | 4 +--- .../loops/optimization/optimizer_loop.py | 2 +- .../connectors/accelerator_connector.py | 16 +++++++--------- .../plugins/precision/test_native_amp.py | 18 +++++++++--------- tests/tests_lite/test_connector.py | 8 ++++---- 6 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/lightning_lite/connector.py b/src/lightning_lite/connector.py index 7a84ab441018c..584d74571d096 100644 --- a/src/lightning_lite/connector.py +++ b/src/lightning_lite/connector.py @@ -452,7 +452,7 @@ def _check_and_init_precision(self) -> Precision: ) return TPUBf16Precision() if isinstance(self.strategy, DeepSpeedStrategy): - return DeepSpeedPrecision(self._precision_input, amp_type="native", amp_level=None) # type: ignore + return DeepSpeedPrecision(self._precision_input) # type: ignore if self._precision_input == 32: return Precision() diff --git a/src/pytorch_lightning/lite/lite.py b/src/pytorch_lightning/lite/lite.py index 9fc610c911393..1f8699dcae29d 100644 --- a/src/pytorch_lightning/lite/lite.py +++ b/src/pytorch_lightning/lite/lite.py @@ -293,9 +293,7 @@ def _to_lite_precision(plugin: Optional[PLPrecisionPlugin]) -> LitePrecision: return LiteDoublePrecision() if type(plugin) is PLDeepSpeedPrecisionPlugin: - return LiteDeepSpeedPrecision( - precision=plugin.precision, amp_type=plugin.amp_type, amp_level=plugin.amp_level # type: ignore[arg-type] - ) + return LiteDeepSpeedPrecision(precision=plugin.precision) # type: ignore[arg-type] if type(plugin) is PLTPUPrecisionPlugin: return LiteTPUPrecision() diff --git a/src/pytorch_lightning/loops/optimization/optimizer_loop.py b/src/pytorch_lightning/loops/optimization/optimizer_loop.py index adbdd1b249d47..ebcf6cdafaf44 100644 --- a/src/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/src/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -374,7 +374,7 @@ def _optimizer_step( opt_idx, train_step_and_backward_closure, on_tpu=isinstance(self.trainer.accelerator, TPUAccelerator), - **kwargs, + **kwargs, # type: ignore[arg-type] using_lbfgs=is_lbfgs, ) diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py index 02ebb06ef31e0..1a6193c04653f 100644 --- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -716,7 +716,7 @@ def _check_and_init_precision(self) -> PrecisionPlugin: if self._precision_flag in (16, "bf16"): rank_zero_info( - f"Using 16bit {self._amp_type_flag} Automatic Mixed Precision (AMP)" # type: ignore + f"Using 16bit {self._amp_type_flag} Automatic Mixed Precision (AMP)" if self._precision_flag == 16 else "Using bfloat16 Automatic Mixed Precision (AMP)" ) @@ -768,19 +768,17 @@ def _validate_precision_choice(self) -> None: "You passed `Trainer(accelerator='cpu', precision=16, amp_type='apex')`" " but apex AMP not supported on CPU." ) - if self._precision_flag == "bf16" and self._amp_type_flag != "native": - raise MisconfigurationException( - f"You passed `Trainer(amp_type={self._amp_type_flag!r}, precision='bf16')` but " # type: ignore - "it's not supported. Try using `amp_type='native'` instead." - ) if self._precision_flag in (16, "bf16") and self._amp_type_flag == "apex": + if self._precision_flag == "bf16": + raise MisconfigurationException( + "You passed `Trainer(amp_type='apex', precision='bf16')` but it's not supported." + " Remove the `amp_type` argument." + ) if isinstance( self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy, DDPFullyShardedStrategy, DDPFullyShardedNativeStrategy), ): - raise MisconfigurationException( - "Sharded plugins are not supported with apex, please switch to `amp_backend='native'`." - ) + raise MisconfigurationException("Sharded plugins are not supported with apex.") def _lazy_init_strategy(self) -> None: """Lazily set missing attributes on the previously instantiated strategy.""" diff --git a/tests/tests_lite/plugins/precision/test_native_amp.py b/tests/tests_lite/plugins/precision/test_native_amp.py index 5d431df93e83f..8d997bf1f836e 100644 --- a/tests/tests_lite/plugins/precision/test_native_amp.py +++ b/tests/tests_lite/plugins/precision/test_native_amp.py @@ -16,25 +16,25 @@ import pytest import torch -from lightning_lite.plugins.precision.native_amp import NativeMixedPrecision +from lightning_lite.plugins.precision.native_amp import MixedPrecision def test_native_amp_precision_default_scaler(): - precision = NativeMixedPrecision(precision=16, device=Mock()) + precision = MixedPrecision(precision=16, device=Mock()) assert isinstance(precision.scaler, torch.cuda.amp.GradScaler) def test_native_amp_precision_scaler_with_bf16(): with pytest.raises(ValueError, match="`precision='bf16'` does not use a scaler"): - NativeMixedPrecision(precision="bf16", device=Mock(), scaler=Mock()) + MixedPrecision(precision="bf16", device=Mock(), scaler=Mock()) - precision = NativeMixedPrecision(precision="bf16", device=Mock()) + precision = MixedPrecision(precision="bf16", device=Mock()) assert precision.scaler is None def test_native_amp_precision_forward_context(): """Test to ensure that the context manager correctly is set to bfloat16 on CPU and CUDA.""" - precision = NativeMixedPrecision(precision=16, device="cuda") + precision = MixedPrecision(precision=16, device="cuda") assert precision.device == "cuda" assert isinstance(precision.scaler, torch.cuda.amp.GradScaler) assert torch.get_default_dtype() == torch.float32 @@ -42,7 +42,7 @@ def test_native_amp_precision_forward_context(): # check with str due to a bug upstream: https://github.com/pytorch/pytorch/issues/65786 assert str(torch.get_autocast_gpu_dtype()) in ("torch.float16", "torch.half") - precision = NativeMixedPrecision(precision="bf16", device="cpu") + precision = MixedPrecision(precision="bf16", device="cpu") assert precision.device == "cpu" assert precision.scaler is None with precision.forward_context(): @@ -56,7 +56,7 @@ def test_native_amp_precision_forward_context(): def test_native_amp_precision_backward(): - precision = NativeMixedPrecision(precision="mixed", device="cuda") + precision = MixedPrecision(precision="mixed", device="cuda") precision.scaler = Mock() precision.scaler.scale = Mock(side_effect=(lambda x: x)) tensor = Mock() @@ -67,7 +67,7 @@ def test_native_amp_precision_backward(): def test_native_amp_precision_optimizer_step_with_scaler(): - precision = NativeMixedPrecision(precision="mixed", device="cuda") + precision = MixedPrecision(precision="mixed", device="cuda") precision.scaler = Mock() optimizer = Mock() @@ -77,7 +77,7 @@ def test_native_amp_precision_optimizer_step_with_scaler(): def test_native_amp_precision_optimizer_step_without_scaler(): - precision = NativeMixedPrecision(precision="bf16", device="cuda") + precision = MixedPrecision(precision="bf16", device="cuda") assert precision.scaler is None optimizer = Mock() diff --git a/tests/tests_lite/test_connector.py b/tests/tests_lite/test_connector.py index f447e720a64a5..b6f2a8ed3304a 100644 --- a/tests/tests_lite/test_connector.py +++ b/tests/tests_lite/test_connector.py @@ -29,7 +29,7 @@ from lightning_lite.accelerators.cuda import CUDAAccelerator from lightning_lite.accelerators.mps import MPSAccelerator from lightning_lite.connector import _Connector -from lightning_lite.plugins import DoublePrecision, NativeMixedPrecision, Precision, TPUPrecision +from lightning_lite.plugins import DoublePrecision, MixedPrecision, Precision, TPUPrecision from lightning_lite.plugins.environments import ( KubeflowEnvironment, LightningEnvironment, @@ -409,7 +409,7 @@ def test_strategy_choice_gpu_str(strategy, strategy_class): "strategy,expected_strategy", [("ddp_sharded", DDPShardedStrategy), ("ddp_sharded_spawn", DDPShardedStrategy)] ) @pytest.mark.parametrize( - "precision,expected_precision", [(16, NativeMixedPrecision), (32, Precision), ("bf16", NativeMixedPrecision)] + "precision,expected_precision", [(16, MixedPrecision), (32, Precision), ("bf16", MixedPrecision)] ) def test_strategy_choice_sharded(strategy, expected_strategy, precision, expected_precision): connector = _Connector(strategy=strategy, devices=1, precision=precision) @@ -753,7 +753,7 @@ def test_precision_selection_16_on_cpu_warns(): _Connector(precision=16) -class MyNativeAMP(NativeMixedPrecision): +class MyNativeAMP(MixedPrecision): pass @@ -761,7 +761,7 @@ class MyNativeAMP(NativeMixedPrecision): @pytest.mark.parametrize("strategy,devices", [("ddp", 2), ("ddp_spawn", 2)]) @pytest.mark.parametrize( "is_custom_plugin,plugin_cls", - [(False, NativeMixedPrecision), (True, MyNativeAMP)], + [(False, MixedPrecision), (True, MyNativeAMP)], ) def test_precision_selection_amp_ddp(strategy, devices, is_custom_plugin, plugin_cls): plugin = None From 475bcf53beb126aeff9b9792f44104e89367ea18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 20 Dec 2022 01:32:08 +0100 Subject: [PATCH 11/14] Trainer.amp_backend. Tests --- src/pytorch_lightning/CHANGELOG.md | 13 +++++++------ .../loops/optimization/optimizer_loop.py | 3 ++- .../plugins/precision/apex_amp.py | 2 -- .../plugins/precision/fsdp_native_native_amp.py | 4 ++-- .../plugins/precision/native_amp.py | 2 +- src/pytorch_lightning/trainer/trainer.py | 6 ++++++ .../deprecated_api/test_remove_1-10.py | 4 ++++ tests/tests_pytorch/models/test_amp.py | 15 ++++++++++----- tests/tests_pytorch/models/test_hooks.py | 6 ++---- tests/tests_pytorch/plugins/test_amp_plugins.py | 1 - .../strategies/test_sharded_strategy.py | 1 + .../trainer/logging_/test_train_loop_logging.py | 1 - .../optimization/test_manual_optimization.py | 1 - .../tests_pytorch/tuner/test_scale_batch_size.py | 2 -- 14 files changed, 35 insertions(+), 26 deletions(-) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 929568c784b40..340047512b185 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -74,12 +74,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - `nvidia/apex` deprecation ([#16039](https://github.com/PyTorchLightning/pytorch-lightning/pull/16039)) * Deprecated `pytorch_lightning.plugins.NativeMixedPrecisionPlugin` in favor of `pytorch_lightning.plugins.MixedPrecisionPlugin` - * Deprecated the `LightningModule.optimizer_step(using_native_amp=...)` argument. - * Deprecated the `Trainer(amp_backend=...)` argument. - * Deprecated the `Trainer(amp_level=...)` argument. - * Deprecated the `pytorch_lightning.plugins.ApexMixedPrecisionPlugin` class. - * Deprecates the `pytorch_lightning.utilities.enum.sAMPType` enum. - * Deprecates the `DeepSpeedPrecisionPlugin(amp_type=..., amp_level=...)` arguments. + * Deprecated the `LightningModule.optimizer_step(using_native_amp=...)` argument + * Deprecated the `Trainer(amp_backend=...)` argument + * Deprecated the `Trainer.amp_backend` property + * Deprecated the `Trainer(amp_level=...)` argument + * Deprecated the `pytorch_lightning.plugins.ApexMixedPrecisionPlugin` class + * Deprecates the `pytorch_lightning.utilities.enum.sAMPType` enum + * Deprecates the `DeepSpeedPrecisionPlugin(amp_type=..., amp_level=...)` arguments ### Removed diff --git a/src/pytorch_lightning/loops/optimization/optimizer_loop.py b/src/pytorch_lightning/loops/optimization/optimizer_loop.py index ebcf6cdafaf44..0435ceaf60923 100644 --- a/src/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/src/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -30,6 +30,7 @@ _build_training_step_kwargs, _extract_hiddens, ) +from pytorch_lightning.plugins import ApexMixedPrecisionPlugin from pytorch_lightning.trainer.progress import OptimizationProgress from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation @@ -342,7 +343,7 @@ def _optimizer_step( is_lbfgs = isinstance(optimizer, torch.optim.LBFGS) # wraps into LightningOptimizer only for running step - if self.trainer.amp_backend == "apex": + if isinstance(self.trainer.precision_plugin, ApexMixedPrecisionPlugin): # apex overrides .step function and need to be wrapped on each step optimizer = LightningOptimizer._to_lightning_optimizer(optimizer, self.trainer.strategy, opt_idx) else: diff --git a/src/pytorch_lightning/plugins/precision/apex_amp.py b/src/pytorch_lightning/plugins/precision/apex_amp.py index c8982197ee286..cd0765a7b0a98 100644 --- a/src/pytorch_lightning/plugins/precision/apex_amp.py +++ b/src/pytorch_lightning/plugins/precision/apex_amp.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import functools import warnings from types import ModuleType from typing import Any, Callable, Dict, Optional @@ -29,7 +28,6 @@ _APEX_AVAILABLE = RequirementCache("apex") -@functools.lru_cache(maxsize=1) def _import_amp_without_deprecation() -> ModuleType: # hide the warning upstream in favor of our deprecation warnings.filterwarnings(action="ignore", message="apex.amp is deprecated", category=FutureWarning) diff --git a/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py b/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py index 08e5adbf12549..c34aa8076067e 100644 --- a/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py +++ b/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py @@ -17,7 +17,7 @@ from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.imports import _TORCH_GREATER_EQUAL_1_12 -from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.native_amp import MixedPrecisionPlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException if _TORCH_GREATER_EQUAL_1_12 and torch.distributed.is_available(): @@ -28,7 +28,7 @@ ShardedGradScaler = None # type: ignore[misc,assignment] -class FullyShardedNativeNativeMixedPrecisionPlugin(NativeMixedPrecisionPlugin): +class FullyShardedNativeNativeMixedPrecisionPlugin(MixedPrecisionPlugin): """Native AMP for Fully Sharded Native Training.""" def __init__(self, precision: Union[str, int], device: str, scaler: Optional[ShardedGradScaler] = None) -> None: diff --git a/src/pytorch_lightning/plugins/precision/native_amp.py b/src/pytorch_lightning/plugins/precision/native_amp.py index fdf4df7a0577f..98757de015f39 100644 --- a/src/pytorch_lightning/plugins/precision/native_amp.py +++ b/src/pytorch_lightning/plugins/precision/native_amp.py @@ -129,7 +129,7 @@ class NativeMixedPrecisionPlugin(MixedPrecisionPlugin): def __init__(self, *args: Any, **kwargs: Any) -> None: rank_zero_deprecation( - f"The `{type(self).__name__}` class has been renamed in v1.9.0 and will be removed in" + "The `NativeMixedPrecisionPlugin` class has been renamed in v1.9.0 and will be removed in" " v1.10.0. Please use `pytorch_lightning.plugins.MixedPrecisionPlugin` instead." ) super().__init__(*args, **kwargs) diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index 8cf18f4835652..79f8412b7435b 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -1772,6 +1772,12 @@ def optimizer_frequencies(self, new_freqs: List[int]) -> None: @property def amp_backend(self) -> Optional[str]: + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + " PyTorch Lightning has been deprecated in v1.9.0 and will be removed in v1.10.0." + " Accessing `Trainer.amp_backend` will not be supported. You can assume it will be `'native'`", + stacklevel=6, + ) if isinstance(self.precision_plugin, ApexMixedPrecisionPlugin): return "apex" if isinstance(self.precision_plugin, MixedPrecisionPlugin): diff --git a/tests/tests_pytorch/deprecated_api/test_remove_1-10.py b/tests/tests_pytorch/deprecated_api/test_remove_1-10.py index 727f2dd3916f8..3ab98ecd843b8 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_1-10.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_1-10.py @@ -398,3 +398,7 @@ def optimizer_step( with pytest.deprecated_call(match="AMPType` enum has been deprecated in v1.9"): AMPType.APEX + + trainer = Trainer() + with pytest.deprecated_call(match="amp_backend` will not be supported"): + trainer.amp_backend diff --git a/tests/tests_pytorch/models/test_amp.py b/tests/tests_pytorch/models/test_amp.py index 7155e5e606b80..4926a353e99be 100644 --- a/tests/tests_pytorch/models/test_amp.py +++ b/tests/tests_pytorch/models/test_amp.py @@ -166,11 +166,15 @@ def test_amp_without_apex(bwd_mock, tmpdir): """Check that even with apex amp type without requesting precision=16 the amp backend is void.""" model = BoringModel() - trainer = Trainer(default_root_dir=tmpdir, amp_backend="native") - assert trainer.amp_backend is None + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer(default_root_dir=tmpdir, amp_backend="native") + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + assert trainer.amp_backend is None - trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, amp_backend="apex") - assert trainer.amp_backend is None + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, amp_backend="apex") + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + assert trainer.amp_backend is None trainer.fit(model) assert not bwd_mock.called @@ -198,7 +202,8 @@ def configure_optimizers(self): trainer = Trainer( default_root_dir=tmpdir, max_steps=5, precision=16, amp_backend="apex", accelerator="gpu", devices=1 ) - assert str(trainer.amp_backend) == "AMPType.APEX" + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + assert str(trainer.amp_backend) == "apex" trainer.fit(model) # `max_steps` is fulfilled in the third batch first optimizer, but we don't check the loop # `done` condition until all optimizers have run, so the number of backwards is higher than `max_steps` diff --git a/tests/tests_pytorch/models/test_hooks.py b/tests/tests_pytorch/models/test_hooks.py index e15829ca62b2d..683ab845f8fc3 100644 --- a/tests/tests_pytorch/models/test_hooks.py +++ b/tests/tests_pytorch/models/test_hooks.py @@ -448,9 +448,7 @@ def _predict_batch(trainer, model, batches): [ {}, # these precision plugins modify the optimization flow, so testing them explicitly - pytest.param( - dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_cuda_gpus=1) - ), + pytest.param(dict(accelerator="gpu", devices=1, precision=16), marks=RunIf(min_cuda_gpus=1)), pytest.param( dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(min_cuda_gpus=1, amp_apex=True), @@ -520,7 +518,7 @@ def training_step(self, batch, batch_idx): "state_dict": ANY, "loops": ANY, } - if kwargs.get("amp_backend") == "native" or kwargs.get("amp_backend") == "apex": + if kwargs.get("precision") == 16: saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY device = torch.device("cuda:0" if "accelerator" in kwargs and kwargs["accelerator"] == "gpu" else "cpu") expected = [ diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py index 7b08fc884c07a..368c42d1c44c7 100644 --- a/tests/tests_pytorch/plugins/test_amp_plugins.py +++ b/tests/tests_pytorch/plugins/test_amp_plugins.py @@ -151,7 +151,6 @@ def test_amp_gradient_unscale(tmpdir, accum: int): default_root_dir=tmpdir, limit_train_batches=2, limit_val_batches=0, - amp_backend="native", strategy="ddp_spawn", accelerator="gpu", devices=2, diff --git a/tests/tests_pytorch/strategies/test_sharded_strategy.py b/tests/tests_pytorch/strategies/test_sharded_strategy.py index 1a5c6d68d99d7..b13d9a6c49444 100644 --- a/tests/tests_pytorch/strategies/test_sharded_strategy.py +++ b/tests/tests_pytorch/strategies/test_sharded_strategy.py @@ -58,6 +58,7 @@ def test_ddp_sharded_precision_16_clip_gradients(mock_oss_clip_grad_norm, clip_v """Ensure that clip gradients is only called if the value is greater than 0.""" model = BoringModel() trainer = Trainer( + default_root_dir=tmpdir, strategy="ddp_sharded", accelerator="gpu", devices=1, diff --git a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py index 379b77ae7e88f..0352c9eda8d32 100644 --- a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py +++ b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py @@ -730,7 +730,6 @@ def on_before_backward(self, loss: torch.Tensor) -> None: trainer = Trainer( default_root_dir=tmpdir, fast_dev_run=True, - amp_backend="native", precision=16, move_metrics_to_cpu=True, accelerator="gpu", diff --git a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py index 845c849f82761..ad691f3a30364 100644 --- a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py +++ b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py @@ -324,7 +324,6 @@ def test_manual_optimization_and_return_tensor(tmpdir): limit_test_batches=0, limit_val_batches=0, precision=16, - amp_backend="native", strategy="ddp_spawn", accelerator="gpu", devices=2, diff --git a/tests/tests_pytorch/tuner/test_scale_batch_size.py b/tests/tests_pytorch/tuner/test_scale_batch_size.py index cd434fc0dcf8f..78727899b5c37 100644 --- a/tests/tests_pytorch/tuner/test_scale_batch_size.py +++ b/tests/tests_pytorch/tuner/test_scale_batch_size.py @@ -23,7 +23,6 @@ from pytorch_lightning import Trainer from pytorch_lightning.callbacks.batch_size_finder import BatchSizeFinder from pytorch_lightning.demos.boring_classes import BoringDataModule, BoringModel, RandomDataset -from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests_pytorch.helpers.runif import RunIf @@ -256,7 +255,6 @@ def test_auto_scale_batch_size_with_amp(tmpdir): ) trainer.tune(model) after_batch_size = model.batch_size - assert trainer.amp_backend == AMPType.NATIVE assert trainer.scaler is not None assert after_batch_size != before_batch_size From 645db8c32ac4a8c7ee55b38fde6db748d6189bef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 20 Dec 2022 01:33:45 +0100 Subject: [PATCH 12/14] Non-deprecated attr --- src/pytorch_lightning/loops/optimization/optimizer_loop.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pytorch_lightning/loops/optimization/optimizer_loop.py b/src/pytorch_lightning/loops/optimization/optimizer_loop.py index 0435ceaf60923..62f8980d28625 100644 --- a/src/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/src/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -31,6 +31,7 @@ _extract_hiddens, ) from pytorch_lightning.plugins import ApexMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.native_amp import MixedPrecisionPlugin from pytorch_lightning.trainer.progress import OptimizationProgress from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation @@ -366,7 +367,7 @@ def _optimizer_step( " `using_native_amp` argument. Removing this argument will avoid this message, you can expect it to" " return True." ) - kwargs["using_native_amp"] = self.trainer.amp_backend == "native" + kwargs["using_native_amp"] = isinstance(self.trainer.precision_plugin, MixedPrecisionPlugin) self.trainer._call_lightning_module_hook( "optimizer_step", self.trainer.current_epoch, From e24f95898ec3b460d1ac85d13e42a3272abfe1c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 20 Dec 2022 01:59:10 +0100 Subject: [PATCH 13/14] Fixes --- src/pytorch_lightning/lite/lite.py | 4 ++-- .../plugins/precision/__init__.py | 3 ++- .../plugins/precision/sharded_native_amp.py | 4 ++-- .../connectors/checkpoint_connector.py | 6 ++--- .../plugins/precision/test_deepspeed.py | 22 +++---------------- .../deprecated_api/test_remove_1-10.py | 1 + .../tests_pytorch/models/test_ddp_fork_amp.py | 4 ++-- tests/tests_pytorch/models/test_hooks.py | 5 +++-- .../strategies/test_sharded_strategy.py | 4 ++-- .../connectors/test_accelerator_connector.py | 6 ++--- 10 files changed, 22 insertions(+), 37 deletions(-) diff --git a/src/pytorch_lightning/lite/lite.py b/src/pytorch_lightning/lite/lite.py index 1f8699dcae29d..683c94446d1e1 100644 --- a/src/pytorch_lightning/lite/lite.py +++ b/src/pytorch_lightning/lite/lite.py @@ -38,7 +38,7 @@ from pytorch_lightning.accelerators import Accelerator as PLAccelerator from pytorch_lightning.plugins import DeepSpeedPrecisionPlugin as PLDeepSpeedPrecisionPlugin from pytorch_lightning.plugins import DoublePrecisionPlugin as PLDoublePrecisionPlugin -from pytorch_lightning.plugins import NativeMixedPrecisionPlugin as PLNativeMixedPrecisionPlugin +from pytorch_lightning.plugins import MixedPrecisionPlugin as PLMixedPrecisionPlugin from pytorch_lightning.plugins import PrecisionPlugin as PLPrecisionPlugin from pytorch_lightning.plugins import TPUBf16PrecisionPlugin as PLTPUBf16PrecisionPlugin from pytorch_lightning.plugins import TPUPrecisionPlugin as PLTPUPrecisionPlugin @@ -284,7 +284,7 @@ def _to_lite_precision(plugin: Optional[PLPrecisionPlugin]) -> LitePrecision: if type(plugin) is PLPrecisionPlugin: return LitePrecision() - if type(plugin) is PLNativeMixedPrecisionPlugin: + if type(plugin) is PLMixedPrecisionPlugin: return LiteMixedPrecision( precision=plugin.precision, device=plugin.device, scaler=plugin.scaler # type: ignore[arg-type] ) diff --git a/src/pytorch_lightning/plugins/precision/__init__.py b/src/pytorch_lightning/plugins/precision/__init__.py index cab728fad131f..31210b919e114 100644 --- a/src/pytorch_lightning/plugins/precision/__init__.py +++ b/src/pytorch_lightning/plugins/precision/__init__.py @@ -19,7 +19,7 @@ from pytorch_lightning.plugins.precision.fully_sharded_native_amp import FullyShardedNativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.hpu import HPUPrecisionPlugin from pytorch_lightning.plugins.precision.ipu import IPUPrecisionPlugin -from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.native_amp import MixedPrecisionPlugin, NativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.tpu import TPUPrecisionPlugin @@ -35,6 +35,7 @@ "HPUPrecisionPlugin", "IPUPrecisionPlugin", "NativeMixedPrecisionPlugin", + "MixedPrecisionPlugin", "PrecisionPlugin", "ShardedNativeMixedPrecisionPlugin", "TPUPrecisionPlugin", diff --git a/src/pytorch_lightning/plugins/precision/sharded_native_amp.py b/src/pytorch_lightning/plugins/precision/sharded_native_amp.py index 30132b291e021..2b2b7065e75d8 100644 --- a/src/pytorch_lightning/plugins/precision/sharded_native_amp.py +++ b/src/pytorch_lightning/plugins/precision/sharded_native_amp.py @@ -14,7 +14,7 @@ from typing import Optional, Union from lightning_lite.strategies.fairscale import _FAIRSCALE_AVAILABLE -from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.native_amp import MixedPrecisionPlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException if _FAIRSCALE_AVAILABLE: @@ -24,7 +24,7 @@ OSS = ShardedGradScaler = object -class ShardedNativeMixedPrecisionPlugin(NativeMixedPrecisionPlugin): +class ShardedNativeMixedPrecisionPlugin(MixedPrecisionPlugin): """Native AMP for Sharded Training.""" def __init__(self, precision: Union[str, int], device: str, scaler: Optional[ShardedGradScaler] = None) -> None: diff --git a/src/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/src/pytorch_lightning/trainer/connectors/checkpoint_connector.py index 22d6f7955e6df..90b5d0801eaa3 100644 --- a/src/pytorch_lightning/trainer/connectors/checkpoint_connector.py +++ b/src/pytorch_lightning/trainer/connectors/checkpoint_connector.py @@ -29,7 +29,7 @@ from lightning_lite.utilities.cloud_io import get_filesystem from lightning_lite.utilities.types import _PATH from pytorch_lightning.callbacks import ModelCheckpoint -from pytorch_lightning.plugins.precision import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision import ApexMixedPrecisionPlugin, MixedPrecisionPlugin from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _OMEGACONF_AVAILABLE from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -295,9 +295,7 @@ def restore_precision_plugin_state(self) -> None: # old checkpoints compatibility if "amp_scaling_state" in self._loaded_checkpoint and isinstance(prec_plugin, ApexMixedPrecisionPlugin): prec_plugin.load_state_dict(self._loaded_checkpoint["amp_scaling_state"]) - if "native_amp_scaling_state" in self._loaded_checkpoint and isinstance( - prec_plugin, NativeMixedPrecisionPlugin - ): + if "native_amp_scaling_state" in self._loaded_checkpoint and isinstance(prec_plugin, MixedPrecisionPlugin): prec_plugin.load_state_dict(self._loaded_checkpoint["native_amp_scaling_state"]) def _restore_quantization_callbacks(self) -> None: diff --git a/tests/tests_lite/plugins/precision/test_deepspeed.py b/tests/tests_lite/plugins/precision/test_deepspeed.py index 80c8626d38fa0..b974d5b31ecbf 100644 --- a/tests/tests_lite/plugins/precision/test_deepspeed.py +++ b/tests/tests_lite/plugins/precision/test_deepspeed.py @@ -23,27 +23,11 @@ def test_invalid_precision_with_deepspeed_precision(): with pytest.raises(ValueError, match="is not supported in DeepSpeed. `precision` must be one of"): - DeepSpeedPrecision(precision=64, amp_type="native") - - -def test_deepspeed_precision_apex_not_installed(monkeypatch): - import lightning_lite.plugins.precision.deepspeed as deepspeed - - monkeypatch.setattr(deepspeed, "_APEX_AVAILABLE", False) - with pytest.raises(ImportError, match="You have asked for Apex AMP but `apex` is not installed."): - DeepSpeedPrecision(precision=16, amp_type="apex") - - -@mock.patch("lightning_lite.plugins.precision.deepspeed._APEX_AVAILABLE", return_value=True) -def test_deepspeed_precision_apex_default_level(_): - with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): - precision = DeepSpeedPrecision(precision=16, amp_type="apex") - assert isinstance(precision, DeepSpeedPrecision) - assert precision.amp_level == "O2" + DeepSpeedPrecision(precision=64) def test_deepspeed_precision_backward(): - precision = DeepSpeedPrecision(precision=32, amp_type="native") + precision = DeepSpeedPrecision(precision=32) tensor = Mock() model = Mock() precision.backward(tensor, model, "positional-arg", keyword="arg") @@ -61,7 +45,7 @@ def test_deepspeed_engine_is_steppable(engine): def test_deepspeed_precision_optimizer_step(): - precision = DeepSpeedPrecision(precision=32, amp_type="native") + precision = DeepSpeedPrecision(precision=32) optimizer = model = Mock() precision.optimizer_step(optimizer, lr_kwargs=dict()) model.step.assert_called_once_with(lr_kwargs=dict()) diff --git a/tests/tests_pytorch/deprecated_api/test_remove_1-10.py b/tests/tests_pytorch/deprecated_api/test_remove_1-10.py index 3ab98ecd843b8..715475bcacb4d 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_1-10.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_1-10.py @@ -355,6 +355,7 @@ def test_profiler_classes_deprecated_warning(cls): cls() +@RunIf(amp_apex=True) def test_apex_deprecation_warnings(): class MyModel(BoringModel): def optimizer_step( diff --git a/tests/tests_pytorch/models/test_ddp_fork_amp.py b/tests/tests_pytorch/models/test_ddp_fork_amp.py index 7cbc5ea84b524..de929907c86cc 100644 --- a/tests/tests_pytorch/models/test_ddp_fork_amp.py +++ b/tests/tests_pytorch/models/test_ddp_fork_amp.py @@ -15,7 +15,7 @@ import torch -from pytorch_lightning.plugins import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins import MixedPrecisionPlugin from tests_pytorch.helpers.runif import RunIf @@ -24,7 +24,7 @@ def test_amp_gpus_ddp_fork(): """Ensure the use of native AMP with `ddp_fork` (or associated alias strategies) does not generate CUDA initialization errors.""" - _ = NativeMixedPrecisionPlugin(precision=16, device="cuda") + _ = MixedPrecisionPlugin(precision=16, device="cuda") with multiprocessing.get_context("fork").Pool(1) as pool: in_bad_fork = pool.apply(torch.cuda._is_in_bad_fork) assert not in_bad_fork diff --git a/tests/tests_pytorch/models/test_hooks.py b/tests/tests_pytorch/models/test_hooks.py index 683ab845f8fc3..38a42f8b3d1fc 100644 --- a/tests/tests_pytorch/models/test_hooks.py +++ b/tests/tests_pytorch/models/test_hooks.py @@ -518,14 +518,15 @@ def training_step(self, batch, batch_idx): "state_dict": ANY, "loops": ANY, } - if kwargs.get("precision") == 16: + using_deepspeed = kwargs.get("strategy") == "deepspeed" + if kwargs.get("precision") == 16 and not using_deepspeed: saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY device = torch.device("cuda:0" if "accelerator" in kwargs and kwargs["accelerator"] == "gpu" else "cpu") expected = [ dict(name="configure_callbacks"), dict(name="prepare_data"), # DeepSpeed needs the batch size to figure out throughput logging - *([dict(name="train_dataloader")] if kwargs.get("strategy") == "deepspeed" else []), + *([dict(name="train_dataloader")] if using_deepspeed else []), dict(name="Callback.setup", args=(trainer, model), kwargs=dict(stage="fit")), dict(name="setup", kwargs=dict(stage="fit")), dict(name="configure_sharded_model"), diff --git a/tests/tests_pytorch/strategies/test_sharded_strategy.py b/tests/tests_pytorch/strategies/test_sharded_strategy.py index b13d9a6c49444..7200d4a866397 100644 --- a/tests/tests_pytorch/strategies/test_sharded_strategy.py +++ b/tests/tests_pytorch/strategies/test_sharded_strategy.py @@ -10,7 +10,7 @@ from lightning_lite.strategies.fairscale import _FAIRSCALE_AVAILABLE from pytorch_lightning import LightningModule, Trainer from pytorch_lightning.demos.boring_classes import BoringModel -from pytorch_lightning.plugins import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins import MixedPrecisionPlugin from pytorch_lightning.strategies import DDPShardedStrategy, DDPSpawnShardedStrategy from pytorch_lightning.trainer.states import TrainerFn from tests_pytorch.helpers.runif import RunIf @@ -91,7 +91,7 @@ def test_ddp_choice_sharded_amp(strategy, expected): """Test to ensure that plugin native amp plugin is correctly chosen when using sharded.""" trainer = Trainer(fast_dev_run=True, accelerator="gpu", devices=1, precision=16, strategy=strategy) assert isinstance(trainer.strategy, expected) - assert isinstance(trainer.precision_plugin, NativeMixedPrecisionPlugin) + assert isinstance(trainer.precision_plugin, MixedPrecisionPlugin) @RunIf(fairscale=True) diff --git a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py index 77a4888351cf2..b33400d2aa227 100644 --- a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py +++ b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py @@ -432,9 +432,9 @@ def test_validate_precision_type(precision): def test_amp_level_raises_error_with_native(): - with pytest.deprecated_call( - match="Setting `amp_level` inside the `Trainer` is deprecated in v1.8.0" - ), pytest.raises(MisconfigurationException, match="O2'` but it's only supported with `amp_backend='apex'`"): + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"), pytest.raises( + MisconfigurationException, match="O2'` but it's only supported with `amp_backend='apex'`" + ): _ = Trainer(amp_level="O2", amp_backend="native", precision=16) From 66379cd43cd7402c9f7b0006dc5d7e3591231edb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Tue, 20 Dec 2022 03:13:39 +0100 Subject: [PATCH 14/14] Non-standalone --- tests/tests_pytorch/plugins/test_amp_plugins.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py index 368c42d1c44c7..1c9dd53f2da10 100644 --- a/tests/tests_pytorch/plugins/test_amp_plugins.py +++ b/tests/tests_pytorch/plugins/test_amp_plugins.py @@ -198,7 +198,7 @@ def configure_optimizers(self): trainer.fit(model) -@RunIf(min_cuda_gpus=2, amp_apex=True, standalone=True) +@RunIf(min_cuda_gpus=1, amp_apex=True) @pytest.mark.parametrize("amp_level", ["O2"]) def test_amp_apex_ddp_fit(amp_level, tmpdir): class CustomBoringModel(BoringModel): @@ -213,9 +213,8 @@ def training_step(self, batch, batch_idx): default_root_dir=tmpdir, fast_dev_run=True, precision=16, - amp_backend="apex", accelerator="gpu", - devices=2, + devices=1, strategy="ddp", plugins=plugin, enable_progress_bar=False,