diff --git a/docs/source-pytorch/accelerators/gpu_intermediate.rst b/docs/source-pytorch/accelerators/gpu_intermediate.rst index 9ba06c415b0e1..9e2e7a4071ce0 100644 --- a/docs/source-pytorch/accelerators/gpu_intermediate.rst +++ b/docs/source-pytorch/accelerators/gpu_intermediate.rst @@ -469,25 +469,26 @@ Validation and test step have the same option when using DP. Distributed and 16-bit precision ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Due to an issue with Apex and DataParallel (PyTorch and NVIDIA issue), Lightning does -not allow 16-bit and DP training. We tried to get this to work, but it's an issue on their end. - Below are the possible configurations we support. +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ -| 1 GPU | 1+ GPUs | DP | DDP | 16-bit | command | +| 1 GPU | 1+ GPUs | DDP | DP | 16-bit | command | +=======+=========+=====+=====+========+=======================================================================+ | Y | | | | | `Trainer(accelerator="gpu", devices=1)` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ | Y | | | | Y | `Trainer(accelerator="gpu", devices=1, precision=16)` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ -| | Y | Y | | | `Trainer(accelerator="gpu", devices=k, strategy='dp')` | +| | Y | Y | | | `Trainer(accelerator="gpu", devices=k, strategy='ddp')` | ++-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ +| | Y | Y | | Y | `Trainer(accelerator="gpu", devices=k, strategy='ddp', precision=16)` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ -| | Y | | Y | | `Trainer(accelerator="gpu", devices=k, strategy='ddp')` | +| | Y | | Y | | `Trainer(accelerator="gpu", devices=k, strategy='dp')` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ -| | Y | | Y | Y | `Trainer(accelerator="gpu", devices=k, strategy='ddp', precision=16)` | +| | Y | | Y | Y | `Trainer(accelerator="gpu", devices=k, strategy='dp', precision=16)` | +-------+---------+-----+-----+--------+-----------------------------------------------------------------------+ +DDP and DP can also be used with 1 GPU, but there's no reason to do so other than debugging distributed-related issues. + Implement Your Own Distributed (DDP) training ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/source-pytorch/api_references.rst b/docs/source-pytorch/api_references.rst index 9c7f3228821f0..141cfe0d67615 100644 --- a/docs/source-pytorch/api_references.rst +++ b/docs/source-pytorch/api_references.rst @@ -184,7 +184,6 @@ precision :nosignatures: :template: classtemplate.rst - ApexMixedPrecisionPlugin ColossalAIPrecisionPlugin DeepSpeedPrecisionPlugin DoublePrecisionPlugin @@ -192,7 +191,7 @@ precision FullyShardedNativeNativeMixedPrecisionPlugin HPUPrecisionPlugin IPUPrecisionPlugin - NativeMixedPrecisionPlugin + MixedPrecisionPlugin PrecisionPlugin ShardedNativeMixedPrecisionPlugin TPUBf16PrecisionPlugin diff --git a/docs/source-pytorch/common/checkpointing_basic.rst b/docs/source-pytorch/common/checkpointing_basic.rst index 85292b0a7085d..8096f1ffd632a 100644 --- a/docs/source-pytorch/common/checkpointing_basic.rst +++ b/docs/source-pytorch/common/checkpointing_basic.rst @@ -186,5 +186,5 @@ If you don't just want to load weights, but instead restore the full training, d model = LitModel() trainer = Trainer() - # automatically restores model, epoch, step, LR schedulers, apex, etc... + # automatically restores model, epoch, step, LR schedulers, etc... trainer.fit(model, ckpt_path="some/path/to/my_checkpoint.ckpt") diff --git a/docs/source-pytorch/common/optimization.rst b/docs/source-pytorch/common/optimization.rst index b9745493194b3..840be8293ce4b 100644 --- a/docs/source-pytorch/common/optimization.rst +++ b/docs/source-pytorch/common/optimization.rst @@ -151,7 +151,6 @@ For example, here step optimizer A every batch and optimizer B every 2 batches. optimizer_idx, optimizer_closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): # update generator every step @@ -183,7 +182,6 @@ Here we add a manual learning rate warm-up without an lr scheduler. optimizer_idx, optimizer_closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): # update params @@ -215,7 +213,6 @@ to perform a step, Lightning won't be able to support accelerators, precision an optimizer_idx, optimizer_closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): optimizer.step(closure=optimizer_closure) @@ -232,7 +229,6 @@ to perform a step, Lightning won't be able to support accelerators, precision an optimizer_idx, optimizer_closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): optimizer = optimizer.optimizer diff --git a/docs/source-pytorch/common/precision_intermediate.rst b/docs/source-pytorch/common/precision_intermediate.rst index 0f149e93db4d8..fbd21d105a25a 100644 --- a/docs/source-pytorch/common/precision_intermediate.rst +++ b/docs/source-pytorch/common/precision_intermediate.rst @@ -58,6 +58,7 @@ FP16 Mixed Precision ******************** In most cases, mixed precision uses FP16. Supported `PyTorch operations `__ automatically run in FP16, saving memory and improving throughput on the supported accelerators. +Since computation happens in FP16, there is a chance of numerical instability during training. This is handled internally by a dynamic grad scaler which skips invalid steps and adjusts the scaler to ensure subsequent steps fall within a finite range. For more information `see the autocast docs `__. .. note:: @@ -69,46 +70,6 @@ In most cases, mixed precision uses FP16. Supported `PyTorch operations `__. It is more flexible and intuitive compared to `NVIDIA APEX `__. -Since computation happens in FP16, there is a chance of numerical instability during training. This is handled internally by a dynamic grad scaler which skips invalid steps and adjusts the scaler to ensure subsequent steps fall within a finite range. For more information `see the autocast docs `__. -Lightning uses native amp by default with ``precision=16|"bf16"``. You can also set it using: - -.. testcode:: - - Trainer(precision=16, amp_backend="native") - - -NVIDIA APEX ------------ - -.. warning:: - - We strongly recommend using the above native mixed precision rather than NVIDIA APEX unless you require more refined control. - -`NVIDIA APEX `__ offers additional flexibility in setting mixed precision. This can be useful when trying out different precision configurations, such as keeping most of your weights in FP16 and running computation in FP16. - -.. testcode:: - :skipif: not _APEX_AVAILABLE or not torch.cuda.is_available() - - Trainer(accelerator="gpu", devices=1, amp_backend="apex", precision=16) - -Set the `NVIDIA optimization level `__ via the precision plugin. - -.. testcode:: - :skipif: not _APEX_AVAILABLE or not torch.cuda.is_available() - - from pytorch_lightning.plugins import ApexMixedPrecisionPlugin - - - apex_plugin = ApexMixedPrecisionPlugin(amp_level="O3") - Trainer(accelerator="gpu", devices=1, precision=16, plugins=[apex_plugin]) - ----- - ************************ BFloat16 Mixed Precision ************************ diff --git a/docs/source-pytorch/common/trainer.rst b/docs/source-pytorch/common/trainer.rst index 1eb3f270fa1a2..8d5e35206b988 100644 --- a/docs/source-pytorch/common/trainer.rst +++ b/docs/source-pytorch/common/trainer.rst @@ -289,27 +289,6 @@ Example:: # no accumulation for epochs 1-4. accumulate 3 for epochs 5-10. accumulate 20 after that trainer = Trainer(accumulate_grad_batches={5: 3, 10: 20}) -amp_backend -^^^^^^^^^^^ - -.. raw:: html - - - -| - -Use PyTorch AMP ('native'), or NVIDIA apex ('apex'). - -.. testcode:: - - # using PyTorch built-in AMP, default used by the Trainer - trainer = Trainer(amp_backend="native") - - # using NVIDIA Apex - trainer = Trainer(amp_backend="apex") - auto_scale_batch_size ^^^^^^^^^^^^^^^^^^^^^ @@ -1156,27 +1135,6 @@ Half precision, or mixed precision, is the combined use of 32 and 16 bit floatin .. note:: When running on TPUs, torch.bfloat16 will be used but tensor printing will still show torch.float32. -.. admonition:: If you are interested in using Apex 16-bit training: - :class: dropdown - - NVIDIA Apex and DDP have instability problems. We recommend using the native AMP for 16-bit precision with multiple GPUs. - To use Apex 16-bit training: - - 1. `Install apex. `__ - - 2. Set the ``precision`` trainer flag to 16. You can customize the `Apex optimization level `_ by setting the ``amp_level`` flag - in the precision plugin. - - .. testcode:: - :skipif: not _APEX_AVAILABLE or not torch.cuda.is_available() - - from pytorch_lightning.plugins import ApexMixedPrecisionPlugin - - - apex_plugin = ApexMixedPrecisionPlugin(amp_level="O2") - # turn on 16-bit - trainer = Trainer(accelerator="gpu", devices=1, precision=16, plugins=[apex_plugin]) - profiler ^^^^^^^^ diff --git a/docs/source-pytorch/conf.py b/docs/source-pytorch/conf.py index 5bb3eb4c1115f..80659e021ecbf 100644 --- a/docs/source-pytorch/conf.py +++ b/docs/source-pytorch/conf.py @@ -398,7 +398,6 @@ def package_list_from_file(file): from pytorch_lightning.callbacks import Callback from pytorch_lightning.cli import _JSONARGPARSE_SIGNATURES_AVAILABLE as _JSONARGPARSE_AVAILABLE from pytorch_lightning.utilities import ( - _APEX_AVAILABLE, _TORCHVISION_AVAILABLE, ) from pytorch_lightning.loggers.neptune import _NEPTUNE_AVAILABLE diff --git a/docs/source-pytorch/extensions/plugins.rst b/docs/source-pytorch/extensions/plugins.rst index 8ba0eb9d3d87c..560c26a3e1cda 100644 --- a/docs/source-pytorch/extensions/plugins.rst +++ b/docs/source-pytorch/extensions/plugins.rst @@ -52,7 +52,6 @@ The full list of built-in precision plugins is listed below. :nosignatures: :template: classtemplate.rst - ApexMixedPrecisionPlugin ColossalAIPrecisionPlugin DeepSpeedPrecisionPlugin DoublePrecisionPlugin @@ -60,7 +59,7 @@ The full list of built-in precision plugins is listed below. FullyShardedNativeNativeMixedPrecisionPlugin HPUPrecisionPlugin IPUPrecisionPlugin - NativeMixedPrecisionPlugin + MixedPrecisionPlugin PrecisionPlugin ShardedNativeMixedPrecisionPlugin TPUBf16PrecisionPlugin diff --git a/docs/source-pytorch/model/manual_optimization.rst b/docs/source-pytorch/model/manual_optimization.rst index 96d24bbe044b0..aad1c1aa00263 100644 --- a/docs/source-pytorch/model/manual_optimization.rst +++ b/docs/source-pytorch/model/manual_optimization.rst @@ -319,4 +319,4 @@ Here is an example using a closure function. opt.step(closure=closure) .. warning:: - The :class:`~torch.optim.LBFGS` optimizer is not supported for apex AMP, native AMP, IPUs, or DeepSpeed. + The :class:`~torch.optim.LBFGS` optimizer is not supported for AMP, IPUs, or DeepSpeed. diff --git a/src/lightning_lite/connector.py b/src/lightning_lite/connector.py index 59251fc1cd119..584d74571d096 100644 --- a/src/lightning_lite/connector.py +++ b/src/lightning_lite/connector.py @@ -26,7 +26,7 @@ from lightning_lite.plugins import ( CheckpointIO, DeepSpeedPrecision, - NativeMixedPrecision, + MixedPrecision, Precision, TPUBf16Precision, TPUPrecision, @@ -452,7 +452,7 @@ def _check_and_init_precision(self) -> Precision: ) return TPUBf16Precision() if isinstance(self.strategy, DeepSpeedStrategy): - return DeepSpeedPrecision(self._precision_input, amp_type="native", amp_level=None) # type: ignore + return DeepSpeedPrecision(self._precision_input) # type: ignore if self._precision_input == 32: return Precision() @@ -476,7 +476,7 @@ def _check_and_init_precision(self) -> Precision: if isinstance(self.strategy, FSDPStrategy): return FSDPPrecision(precision=self._precision_input, device=device) - return NativeMixedPrecision(precision=self._precision_input, device=device) + return MixedPrecision(precision=self._precision_input, device=device) raise RuntimeError("No precision set") diff --git a/src/lightning_lite/plugins/__init__.py b/src/lightning_lite/plugins/__init__.py index d0416e70f9747..e294df2cb2f59 100644 --- a/src/lightning_lite/plugins/__init__.py +++ b/src/lightning_lite/plugins/__init__.py @@ -18,7 +18,7 @@ from lightning_lite.plugins.precision.deepspeed import DeepSpeedPrecision from lightning_lite.plugins.precision.double import DoublePrecision from lightning_lite.plugins.precision.fsdp import FSDPPrecision -from lightning_lite.plugins.precision.native_amp import NativeMixedPrecision +from lightning_lite.plugins.precision.native_amp import MixedPrecision from lightning_lite.plugins.precision.precision import Precision from lightning_lite.plugins.precision.tpu import TPUPrecision from lightning_lite.plugins.precision.tpu_bf16 import TPUBf16Precision @@ -31,7 +31,7 @@ "Precision", "DeepSpeedPrecision", "DoublePrecision", - "NativeMixedPrecision", + "MixedPrecision", "TPUPrecision", "TPUBf16Precision", "FSDPPrecision", diff --git a/src/lightning_lite/plugins/precision/__init__.py b/src/lightning_lite/plugins/precision/__init__.py index c47ffeb3f9fc1..9cb3224a28955 100644 --- a/src/lightning_lite/plugins/precision/__init__.py +++ b/src/lightning_lite/plugins/precision/__init__.py @@ -14,7 +14,7 @@ from lightning_lite.plugins.precision.deepspeed import DeepSpeedPrecision from lightning_lite.plugins.precision.double import DoublePrecision from lightning_lite.plugins.precision.fsdp import FSDPPrecision -from lightning_lite.plugins.precision.native_amp import NativeMixedPrecision +from lightning_lite.plugins.precision.native_amp import MixedPrecision from lightning_lite.plugins.precision.precision import Precision from lightning_lite.plugins.precision.tpu import TPUPrecision from lightning_lite.plugins.precision.tpu_bf16 import TPUBf16Precision @@ -22,7 +22,7 @@ __all__ = [ "DeepSpeedPrecision", "DoublePrecision", - "NativeMixedPrecision", + "MixedPrecision", "Precision", "TPUPrecision", "TPUBf16Precision", diff --git a/src/lightning_lite/plugins/precision/deepspeed.py b/src/lightning_lite/plugins/precision/deepspeed.py index 265dfacfdb9b5..d974a899d7aae 100644 --- a/src/lightning_lite/plugins/precision/deepspeed.py +++ b/src/lightning_lite/plugins/precision/deepspeed.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Optional, TYPE_CHECKING +from typing import Any, TYPE_CHECKING import torch from lightning_utilities.core.imports import RequirementCache @@ -20,11 +20,10 @@ from lightning_lite.plugins.precision.precision import Precision from lightning_lite.plugins.precision.utils import _convert_fp_tensor -from lightning_lite.utilities.enums import AMPType, PrecisionType +from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.types import Steppable _DEEPSPEED_AVAILABLE = RequirementCache("deepspeed") -_APEX_AVAILABLE = RequirementCache("apex") if TYPE_CHECKING and _DEEPSPEED_AVAILABLE: import deepspeed @@ -34,28 +33,13 @@ class DeepSpeedPrecision(Precision): Args: precision: Full precision (32), half precision (16) or bfloat16 precision (bf16). - amp_type: The mixed precision backend to use ("native" or "apex"). - amp_level: The optimization level to use (O1, O2, etc...). By default it will be set to "O2" - if ``amp_type`` is set to "apex". Raises: - MisconfigurationException: - If using ``bfloat16`` precision and ``deepspeed None: - if amp_type == AMPType.APEX: - if not _APEX_AVAILABLE: - raise ModuleNotFoundError( - "You have asked for Apex AMP but `apex` is not installed." - " Install `apex` using this guide: https://github.com/NVIDIA/apex" - ) - - amp_level = amp_level or "O2" - + def __init__(self, precision: Literal[16, 32, "bf16"]) -> None: supported_precision = (PrecisionType.HALF, PrecisionType.FLOAT, PrecisionType.BFLOAT) if precision not in supported_precision: raise ValueError( @@ -65,8 +49,6 @@ def __init__(self, precision: Literal[16, 32, "bf16"], amp_type: str, amp_level: super().__init__() self.precision = precision - self.amp_type = amp_type - self.amp_level = amp_level def convert_input(self, data: Tensor) -> Tensor: precision_to_type = {"bf16": torch.bfloat16, 16: torch.float16, 32: torch.float32} diff --git a/src/lightning_lite/plugins/precision/fsdp.py b/src/lightning_lite/plugins/precision/fsdp.py index 020369bcbc4cf..baff0924a303b 100644 --- a/src/lightning_lite/plugins/precision/fsdp.py +++ b/src/lightning_lite/plugins/precision/fsdp.py @@ -16,16 +16,16 @@ import torch from typing_extensions import Literal -from lightning_lite.plugins.precision.native_amp import NativeMixedPrecision +from lightning_lite.plugins.precision.native_amp import MixedPrecision from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.imports import _TORCH_GREATER_EQUAL_1_12 if TYPE_CHECKING: - from torch.distributed.fsdp.fully_sharded_data_parallel import MixedPrecision + from torch.distributed.fsdp.fully_sharded_data_parallel import MixedPrecision as TorchMixedPrecision from torch.distributed.fsdp.sharded_grad_scaler import ShardedGradScaler -class FSDPPrecision(NativeMixedPrecision): +class FSDPPrecision(MixedPrecision): """AMP for Fully Sharded Data Parallel training.""" def __init__( @@ -43,8 +43,8 @@ def __init__( ) @property - def mixed_precision_config(self) -> "MixedPrecision": - from torch.distributed.fsdp.fully_sharded_data_parallel import MixedPrecision + def mixed_precision_config(self) -> "TorchMixedPrecision": + from torch.distributed.fsdp.fully_sharded_data_parallel import MixedPrecision as TorchMixedPrecision if self.precision == PrecisionType.HALF: dtype = torch.float16 @@ -52,7 +52,7 @@ def mixed_precision_config(self) -> "MixedPrecision": dtype = torch.bfloat16 else: raise ValueError(f"Was unable to infer precision type, received {self.precision!r}.") - return MixedPrecision( + return TorchMixedPrecision( param_dtype=dtype, reduce_dtype=dtype, buffer_dtype=dtype, diff --git a/src/lightning_lite/plugins/precision/native_amp.py b/src/lightning_lite/plugins/precision/native_amp.py index 5f9b477171c21..083d0a187b300 100644 --- a/src/lightning_lite/plugins/precision/native_amp.py +++ b/src/lightning_lite/plugins/precision/native_amp.py @@ -26,8 +26,8 @@ from lightning_lite.utilities.types import Optimizable -class NativeMixedPrecision(Precision): - """Plugin for Native Mixed Precision (AMP) training with ``torch.autocast``. +class MixedPrecision(Precision): + """Plugin for Automatic Mixed Precision (AMP) training with ``torch.autocast``. Args: precision: Whether to use ``torch.float16`` (``16``) or ``torch.bfloat16`` (``'bf16'``). diff --git a/src/lightning_lite/strategies/deepspeed.py b/src/lightning_lite/strategies/deepspeed.py index 74dc73c210c08..eb5967b230295 100644 --- a/src/lightning_lite/strategies/deepspeed.py +++ b/src/lightning_lite/strategies/deepspeed.py @@ -32,7 +32,7 @@ from lightning_lite.strategies.ddp import DDPStrategy from lightning_lite.strategies.strategy import _Sharded from lightning_lite.utilities.distributed import log -from lightning_lite.utilities.enums import AMPType, PrecisionType +from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.rank_zero import rank_zero_info from lightning_lite.utilities.seed import reset_seed from lightning_lite.utilities.types import _PATH @@ -501,7 +501,7 @@ def _format_config(self) -> None: def _format_precision_config(self) -> None: assert isinstance(self.config, dict) if self.precision.precision == PrecisionType.HALF: - if "fp16" not in self.config and self.precision.amp_type == AMPType.NATIVE: + if "fp16" not in self.config: # FP16 is a DeepSpeed standalone AMP implementation rank_zero_info("Enabling DeepSpeed FP16.") self.config["fp16"] = { @@ -512,9 +512,6 @@ def _format_precision_config(self) -> None: "hysteresis": self.hysteresis, "min_loss_scale": self.min_loss_scale, } - elif "amp" not in self.config and self.precision.amp_type == AMPType.APEX: - rank_zero_info("Enabling DeepSpeed APEX Implementation.") - self.config["amp"] = {"enabled": True, "opt_level": self.precision.amp_level} elif "bf16" not in self.config and self.precision.precision == PrecisionType.BFLOAT: rank_zero_info("Enabling DeepSpeed BF16.") self.config["bf16"] = {"enabled": True} diff --git a/src/lightning_lite/utilities/__init__.py b/src/lightning_lite/utilities/__init__.py index 17f37679f23d3..73c0a7ade0c76 100644 --- a/src/lightning_lite/utilities/__init__.py +++ b/src/lightning_lite/utilities/__init__.py @@ -14,7 +14,7 @@ """General utilities.""" from lightning_lite.utilities.apply_func import move_data_to_device # noqa: F401 -from lightning_lite.utilities.enums import _AcceleratorType, _StrategyType, AMPType, LightningEnum # noqa: F401 +from lightning_lite.utilities.enums import _AcceleratorType, _StrategyType, LightningEnum # noqa: F401 from lightning_lite.utilities.rank_zero import ( # noqa: F401 rank_zero_deprecation, rank_zero_info, diff --git a/src/lightning_lite/utilities/enums.py b/src/lightning_lite/utilities/enums.py index c1bb015010385..cd8a3dd5bd062 100644 --- a/src/lightning_lite/utilities/enums.py +++ b/src/lightning_lite/utilities/enums.py @@ -29,13 +29,6 @@ class LightningEnum(StrEnum, Enum): LightningEnum = StrEnum -class AMPType(LightningEnum): - """Type of Automatic Mixed Precission used for training.""" - - APEX = "apex" - NATIVE = "native" - - class PrecisionType(LightningEnum): """Type of precision used.""" diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 5f8289a76c35c..340047512b185 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -72,6 +72,17 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Deprecated `pytorch_lightning.profiler` in favor of `pytorch_lightning.profilers` ([#16059](https://github.com/PyTorchLightning/pytorch-lightning/pull/16059)) +- `nvidia/apex` deprecation ([#16039](https://github.com/PyTorchLightning/pytorch-lightning/pull/16039)) + * Deprecated `pytorch_lightning.plugins.NativeMixedPrecisionPlugin` in favor of `pytorch_lightning.plugins.MixedPrecisionPlugin` + * Deprecated the `LightningModule.optimizer_step(using_native_amp=...)` argument + * Deprecated the `Trainer(amp_backend=...)` argument + * Deprecated the `Trainer.amp_backend` property + * Deprecated the `Trainer(amp_level=...)` argument + * Deprecated the `pytorch_lightning.plugins.ApexMixedPrecisionPlugin` class + * Deprecates the `pytorch_lightning.utilities.enum.sAMPType` enum + * Deprecates the `DeepSpeedPrecisionPlugin(amp_type=..., amp_level=...)` arguments + + ### Removed - Removed deprecated `pytorch_lightning.utilities.memory.get_gpu_memory_map` in favor of `pytorch_lightning.accelerators.cuda.get_nvidia_gpu_stats` ([#15617](https://github.com/Lightning-AI/lightning/pull/15617)) diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index c642835bb11fc..6a2a79e717c0e 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -1599,7 +1599,6 @@ def optimizer_step( optimizer_idx: int = 0, optimizer_closure: Optional[Callable[[], Any]] = None, on_tpu: bool = False, - using_native_amp: bool = False, using_lbfgs: bool = False, ) -> None: r""" @@ -1618,19 +1617,18 @@ def optimizer_step( optimizer_closure: The optimizer closure. This closure must be executed as it includes the calls to ``training_step()``, ``optimizer.zero_grad()``, and ``backward()``. on_tpu: ``True`` if TPU backward is required - using_native_amp: ``True`` if using native amp using_lbfgs: True if the matching optimizer is :class:`torch.optim.LBFGS` Examples:: # DEFAULT def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, - optimizer_closure, on_tpu, using_native_amp, using_lbfgs): + optimizer_closure, on_tpu, using_lbfgs): optimizer.step(closure=optimizer_closure) # Alternating schedule for optimizer steps (i.e.: GANs) def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, - optimizer_closure, on_tpu, using_native_amp, using_lbfgs): + optimizer_closure, on_tpu, using_lbfgs): # update generator opt every step if optimizer_idx == 0: optimizer.step(closure=optimizer_closure) @@ -1660,7 +1658,6 @@ def optimizer_step( optimizer_idx, optimizer_closure, on_tpu, - using_native_amp, using_lbfgs, ): # update params diff --git a/src/pytorch_lightning/lite/lite.py b/src/pytorch_lightning/lite/lite.py index 533bd84508d48..683c94446d1e1 100644 --- a/src/pytorch_lightning/lite/lite.py +++ b/src/pytorch_lightning/lite/lite.py @@ -23,7 +23,7 @@ from lightning_lite.plugins import CheckpointIO, ClusterEnvironment from lightning_lite.plugins import DeepSpeedPrecision as LiteDeepSpeedPrecision from lightning_lite.plugins import DoublePrecision as LiteDoublePrecision -from lightning_lite.plugins import NativeMixedPrecision as LiteNativeMixedPrecision +from lightning_lite.plugins import MixedPrecision as LiteMixedPrecision from lightning_lite.plugins import Precision as LitePrecision from lightning_lite.plugins import TPUBf16Precision as LiteTPUBf16Precision from lightning_lite.plugins import TPUPrecision as LiteTPUPrecision @@ -38,7 +38,7 @@ from pytorch_lightning.accelerators import Accelerator as PLAccelerator from pytorch_lightning.plugins import DeepSpeedPrecisionPlugin as PLDeepSpeedPrecisionPlugin from pytorch_lightning.plugins import DoublePrecisionPlugin as PLDoublePrecisionPlugin -from pytorch_lightning.plugins import NativeMixedPrecisionPlugin as PLNativeMixedPrecisionPlugin +from pytorch_lightning.plugins import MixedPrecisionPlugin as PLMixedPrecisionPlugin from pytorch_lightning.plugins import PrecisionPlugin as PLPrecisionPlugin from pytorch_lightning.plugins import TPUBf16PrecisionPlugin as PLTPUBf16PrecisionPlugin from pytorch_lightning.plugins import TPUPrecisionPlugin as PLTPUPrecisionPlugin @@ -284,8 +284,8 @@ def _to_lite_precision(plugin: Optional[PLPrecisionPlugin]) -> LitePrecision: if type(plugin) is PLPrecisionPlugin: return LitePrecision() - if type(plugin) is PLNativeMixedPrecisionPlugin: - return LiteNativeMixedPrecision( + if type(plugin) is PLMixedPrecisionPlugin: + return LiteMixedPrecision( precision=plugin.precision, device=plugin.device, scaler=plugin.scaler # type: ignore[arg-type] ) @@ -293,9 +293,7 @@ def _to_lite_precision(plugin: Optional[PLPrecisionPlugin]) -> LitePrecision: return LiteDoublePrecision() if type(plugin) is PLDeepSpeedPrecisionPlugin: - return LiteDeepSpeedPrecision( - precision=plugin.precision, amp_type=plugin.amp_type, amp_level=plugin.amp_level # type: ignore[arg-type] - ) + return LiteDeepSpeedPrecision(precision=plugin.precision) # type: ignore[arg-type] if type(plugin) is PLTPUPrecisionPlugin: return LiteTPUPrecision() diff --git a/src/pytorch_lightning/loops/optimization/optimizer_loop.py b/src/pytorch_lightning/loops/optimization/optimizer_loop.py index 33106bda959e0..62f8980d28625 100644 --- a/src/pytorch_lightning/loops/optimization/optimizer_loop.py +++ b/src/pytorch_lightning/loops/optimization/optimizer_loop.py @@ -30,9 +30,12 @@ _build_training_step_kwargs, _extract_hiddens, ) +from pytorch_lightning.plugins import ApexMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.native_amp import MixedPrecisionPlugin from pytorch_lightning.trainer.progress import OptimizationProgress -from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation +from pytorch_lightning.utilities.signature_utils import is_param_in_hook_signature from pytorch_lightning.utilities.types import STEP_OUTPUT @@ -341,7 +344,7 @@ def _optimizer_step( is_lbfgs = isinstance(optimizer, torch.optim.LBFGS) # wraps into LightningOptimizer only for running step - if self.trainer.amp_backend == AMPType.APEX: + if isinstance(self.trainer.precision_plugin, ApexMixedPrecisionPlugin): # apex overrides .step function and need to be wrapped on each step optimizer = LightningOptimizer._to_lightning_optimizer(optimizer, self.trainer.strategy, opt_idx) else: @@ -354,6 +357,17 @@ def _optimizer_step( self.optim_progress.optimizer.step.increment_ready() # model hook + kwargs = {} + pl_module = self.trainer.lightning_module + if is_param_in_hook_signature(pl_module.optimizer_step, "using_native_amp", explicit=True): + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + " PyTorch Lightning has been deprecated in v1.9.0 and will be removed in v1.10.0." + f" The `{type(pl_module).__name__}.optimizer_step()` hook is overridden, including the" + " `using_native_amp` argument. Removing this argument will avoid this message, you can expect it to" + " return True." + ) + kwargs["using_native_amp"] = isinstance(self.trainer.precision_plugin, MixedPrecisionPlugin) self.trainer._call_lightning_module_hook( "optimizer_step", self.trainer.current_epoch, @@ -362,7 +376,7 @@ def _optimizer_step( opt_idx, train_step_and_backward_closure, on_tpu=isinstance(self.trainer.accelerator, TPUAccelerator), - using_native_amp=(self.trainer.amp_backend == AMPType.NATIVE), + **kwargs, # type: ignore[arg-type] using_lbfgs=is_lbfgs, ) diff --git a/src/pytorch_lightning/plugins/__init__.py b/src/pytorch_lightning/plugins/__init__.py index 29b2db639335a..c719050e8cbe4 100644 --- a/src/pytorch_lightning/plugins/__init__.py +++ b/src/pytorch_lightning/plugins/__init__.py @@ -12,7 +12,7 @@ from pytorch_lightning.plugins.precision.fully_sharded_native_amp import FullyShardedNativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.hpu import HPUPrecisionPlugin from pytorch_lightning.plugins.precision.ipu import IPUPrecisionPlugin -from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.native_amp import MixedPrecisionPlugin, NativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.tpu import TPUPrecisionPlugin @@ -34,6 +34,7 @@ "IPUPrecisionPlugin", "HPUPrecisionPlugin", "NativeMixedPrecisionPlugin", + "MixedPrecisionPlugin", "PrecisionPlugin", "ShardedNativeMixedPrecisionPlugin", "FullyShardedNativeMixedPrecisionPlugin", diff --git a/src/pytorch_lightning/plugins/precision/__init__.py b/src/pytorch_lightning/plugins/precision/__init__.py index cab728fad131f..31210b919e114 100644 --- a/src/pytorch_lightning/plugins/precision/__init__.py +++ b/src/pytorch_lightning/plugins/precision/__init__.py @@ -19,7 +19,7 @@ from pytorch_lightning.plugins.precision.fully_sharded_native_amp import FullyShardedNativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.hpu import HPUPrecisionPlugin from pytorch_lightning.plugins.precision.ipu import IPUPrecisionPlugin -from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.native_amp import MixedPrecisionPlugin, NativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin from pytorch_lightning.plugins.precision.sharded_native_amp import ShardedNativeMixedPrecisionPlugin from pytorch_lightning.plugins.precision.tpu import TPUPrecisionPlugin @@ -35,6 +35,7 @@ "HPUPrecisionPlugin", "IPUPrecisionPlugin", "NativeMixedPrecisionPlugin", + "MixedPrecisionPlugin", "PrecisionPlugin", "ShardedNativeMixedPrecisionPlugin", "TPUPrecisionPlugin", diff --git a/src/pytorch_lightning/plugins/precision/apex_amp.py b/src/pytorch_lightning/plugins/precision/apex_amp.py index 6ea6128a43508..cd0765a7b0a98 100644 --- a/src/pytorch_lightning/plugins/precision/apex_amp.py +++ b/src/pytorch_lightning/plugins/precision/apex_amp.py @@ -11,27 +11,45 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import warnings +from types import ModuleType from typing import Any, Callable, Dict, Optional +from lightning_utilities.core.imports import RequirementCache from torch import Tensor from torch.optim import LBFGS, Optimizer import pytorch_lightning as pl from lightning_lite.utilities.types import _PARAMETERS, Optimizable from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin -from pytorch_lightning.utilities import _APEX_AVAILABLE, AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation -if _APEX_AVAILABLE: +_APEX_AVAILABLE = RequirementCache("apex") + + +def _import_amp_without_deprecation() -> ModuleType: + # hide the warning upstream in favor of our deprecation + warnings.filterwarnings(action="ignore", message="apex.amp is deprecated", category=FutureWarning) from apex import amp + return amp + +# TODO: remove in v1.10.0 class ApexMixedPrecisionPlugin(PrecisionPlugin): """Mixed Precision Plugin based on Nvidia/Apex (https://github.com/NVIDIA/apex)""" - backend = AMPType.APEX + backend = "apex" def __init__(self, amp_level: str = "O2") -> None: + # deprecate before the availability check so users don't install without knowning that it's deprecated + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + f" PyTorch Lightning has been deprecated in v1.9.0. The `{type(self).__name__}` class will be removed in" + " v1.10.0. Please use PyTorch's AMP implementation available in" + " `pytorch_lightning.plugins.MixedPrecisionPlugin` instead." + ) if not _APEX_AVAILABLE: raise MisconfigurationException( "You have asked for Apex AMP but `apex` is not installed." @@ -43,11 +61,13 @@ def __init__(self, amp_level: str = "O2") -> None: self._state_dict_loaded = False def main_params(self, optimizer: Optimizer) -> _PARAMETERS: + amp = _import_amp_without_deprecation() return amp.master_params(optimizer) def dispatch(self, trainer: "pl.Trainer") -> None: if not self._connected: strategy = trainer.strategy + amp = _import_amp_without_deprecation() _, strategy.optimizers = amp.initialize( trainer.lightning_module, strategy.optimizers, opt_level=self.amp_level ) @@ -73,6 +93,7 @@ def backward( # type: ignore[override] \**kwargs: Keyword arguments for the same purpose as ``*args``. """ opt = optimizer or model.trainer.optimizers + amp = _import_amp_without_deprecation() with amp.scale_loss(tensor, opt) as tensor: super().backward(tensor, model, optimizer, *args, **kwargs) @@ -102,6 +123,7 @@ def optimizer_step( # type: ignore[override] return closure_result def state_dict(self) -> Dict[str, Any]: + amp = _import_amp_without_deprecation() return amp.state_dict() def load_state_dict(self, state_dict: Dict[str, Any]) -> None: diff --git a/src/pytorch_lightning/plugins/precision/deepspeed.py b/src/pytorch_lightning/plugins/precision/deepspeed.py index 1b6cbb6ba84dd..8cafcd20af169 100644 --- a/src/pytorch_lightning/plugins/precision/deepspeed.py +++ b/src/pytorch_lightning/plugins/precision/deepspeed.py @@ -19,13 +19,14 @@ from torch.optim import LBFGS, Optimizer import pytorch_lightning as pl -from lightning_lite.utilities.enums import AMPType, PrecisionType +from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.types import Steppable +from pytorch_lightning.plugins.precision.apex_amp import _APEX_AVAILABLE from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.imports import _APEX_AVAILABLE from pytorch_lightning.utilities.model_helpers import is_overridden +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation _DEEPSPEED_AVAILABLE = RequirementCache("deepspeed") if TYPE_CHECKING and _DEEPSPEED_AVAILABLE: @@ -39,20 +40,21 @@ class DeepSpeedPrecisionPlugin(PrecisionPlugin): Args: precision: Double precision (64), full precision (32), half precision (16) or bfloat16 precision (bf16). - amp_type: The mixed precision backend to use ("native" or "apex"). - amp_level: The optimization level to use (O1, O2, etc...). By default it will be set to "O2" - if ``amp_type`` is set to "apex". - Raises: - MisconfigurationException: - If using ``bfloat16`` precision and ``deepspeed None: - if amp_type == AMPType.APEX: + def __init__( + self, precision: Union[str, int], amp_type: Optional[str] = None, amp_level: Optional[str] = None + ) -> None: + if amp_type == "apex": + # TODO: remove in v1.10.0 + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + " PyTorch Lightning has been deprecated in v1.9.0. Support for using it through the DeepSpeed" + " implementation will be removed in v1.10.0." + ) if not _APEX_AVAILABLE: raise MisconfigurationException( "You have asked for Apex AMP but `apex` is not installed." @@ -60,6 +62,17 @@ def __init__(self, precision: Union[str, int], amp_type: str, amp_level: Optiona ) amp_level = amp_level or "O2" + elif amp_level is not None: + raise ValueError( + f"`{type(self).__name__}(amp_level={amp_level!r})` is only relevant when using NVIDIA/apex" + ) + if amp_type is None: + amp_type = "native" + else: + rank_zero_deprecation( + f"Passing `{type(self).__name__}(amp_type={amp_type!r})` been deprecated in v1.9.0 and will be removed" + f" in v1.10.0. This argument is no longer necessary." + ) supported_precision = (PrecisionType.HALF, PrecisionType.FLOAT, PrecisionType.BFLOAT) if precision not in supported_precision: diff --git a/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py b/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py index 08e5adbf12549..c34aa8076067e 100644 --- a/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py +++ b/src/pytorch_lightning/plugins/precision/fsdp_native_native_amp.py @@ -17,7 +17,7 @@ from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.imports import _TORCH_GREATER_EQUAL_1_12 -from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.native_amp import MixedPrecisionPlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException if _TORCH_GREATER_EQUAL_1_12 and torch.distributed.is_available(): @@ -28,7 +28,7 @@ ShardedGradScaler = None # type: ignore[misc,assignment] -class FullyShardedNativeNativeMixedPrecisionPlugin(NativeMixedPrecisionPlugin): +class FullyShardedNativeNativeMixedPrecisionPlugin(MixedPrecisionPlugin): """Native AMP for Fully Sharded Native Training.""" def __init__(self, precision: Union[str, int], device: str, scaler: Optional[ShardedGradScaler] = None) -> None: diff --git a/src/pytorch_lightning/plugins/precision/native_amp.py b/src/pytorch_lightning/plugins/precision/native_amp.py index 552562dedc8de..98757de015f39 100644 --- a/src/pytorch_lightning/plugins/precision/native_amp.py +++ b/src/pytorch_lightning/plugins/precision/native_amp.py @@ -22,12 +22,13 @@ from lightning_lite.accelerators.cuda import _patch_cuda_is_available from lightning_lite.utilities.types import Optimizable from pytorch_lightning.plugins.precision.precision_plugin import PrecisionPlugin -from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType +from pytorch_lightning.utilities import GradClipAlgorithmType from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation -class NativeMixedPrecisionPlugin(PrecisionPlugin): - """Plugin for Native Mixed Precision (AMP) training with ``torch.autocast``. +class MixedPrecisionPlugin(PrecisionPlugin): + """Plugin for Automatic Mixed Precision (AMP) training with ``torch.autocast``. Args: precision: Whether to use ``torch.float16`` (``16``) or ``torch.bfloat16`` (``'bf16'``). @@ -35,8 +36,6 @@ class NativeMixedPrecisionPlugin(PrecisionPlugin): scaler: An optional :class:`torch.cuda.amp.GradScaler` to use. """ - backend = AMPType.NATIVE - def __init__( self, precision: Union[str, int], device: str, scaler: Optional[torch.cuda.amp.GradScaler] = None ) -> None: @@ -125,6 +124,17 @@ def load_state_dict(self, state_dict: Dict[str, Any]) -> None: self.scaler.load_state_dict(state_dict) +class NativeMixedPrecisionPlugin(MixedPrecisionPlugin): + backend = "native" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + rank_zero_deprecation( + "The `NativeMixedPrecisionPlugin` class has been renamed in v1.9.0 and will be removed in" + " v1.10.0. Please use `pytorch_lightning.plugins.MixedPrecisionPlugin` instead." + ) + super().__init__(*args, **kwargs) + + def _optimizer_handles_unscaling(optimizer: Any) -> bool: """Determines whether a PyTorch optimizer handles unscaling gradients in the step method rather than through the :class:`torch.cuda.amp.GradScaler`. diff --git a/src/pytorch_lightning/plugins/precision/sharded_native_amp.py b/src/pytorch_lightning/plugins/precision/sharded_native_amp.py index 30132b291e021..2b2b7065e75d8 100644 --- a/src/pytorch_lightning/plugins/precision/sharded_native_amp.py +++ b/src/pytorch_lightning/plugins/precision/sharded_native_amp.py @@ -14,7 +14,7 @@ from typing import Optional, Union from lightning_lite.strategies.fairscale import _FAIRSCALE_AVAILABLE -from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision.native_amp import MixedPrecisionPlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException if _FAIRSCALE_AVAILABLE: @@ -24,7 +24,7 @@ OSS = ShardedGradScaler = object -class ShardedNativeMixedPrecisionPlugin(NativeMixedPrecisionPlugin): +class ShardedNativeMixedPrecisionPlugin(MixedPrecisionPlugin): """Native AMP for Sharded Training.""" def __init__(self, precision: Union[str, int], device: str, scaler: Optional[ShardedGradScaler] = None) -> None: diff --git a/src/pytorch_lightning/strategies/deepspeed.py b/src/pytorch_lightning/strategies/deepspeed.py index fc2363e2debe1..c171071c7496e 100644 --- a/src/pytorch_lightning/strategies/deepspeed.py +++ b/src/pytorch_lightning/strategies/deepspeed.py @@ -31,7 +31,7 @@ import pytorch_lightning as pl from lightning_lite.plugins import ClusterEnvironment -from lightning_lite.utilities.enums import AMPType, PrecisionType +from lightning_lite.utilities.enums import PrecisionType from lightning_lite.utilities.optimizer import _optimizers_to_device from lightning_lite.utilities.seed import reset_seed from lightning_lite.utilities.types import _PATH, LRScheduler, ReduceLROnPlateau @@ -654,7 +654,7 @@ def _auto_select_batch_size(self) -> int: def _format_precision_config(self) -> None: assert isinstance(self.config, dict) if self.precision_plugin.precision == PrecisionType.HALF: - if "fp16" not in self.config and self.precision_plugin.amp_type == AMPType.NATIVE: + if "fp16" not in self.config and self.precision_plugin.amp_type == "native": # FP16 is a DeepSpeed standalone AMP implementation rank_zero_info("Enabling DeepSpeed FP16.") self.config["fp16"] = { @@ -665,7 +665,7 @@ def _format_precision_config(self) -> None: "hysteresis": self.hysteresis, "min_loss_scale": self.min_loss_scale, } - elif "amp" not in self.config and self.precision_plugin.amp_type == AMPType.APEX: + elif "amp" not in self.config and self.precision_plugin.amp_type == "apex": rank_zero_info("Enabling DeepSpeed APEX Implementation.") self.config["amp"] = {"enabled": True, "opt_level": self.precision_plugin.amp_level} elif "bf16" not in self.config and self.precision_plugin.precision == PrecisionType.BFLOAT: diff --git a/src/pytorch_lightning/trainer/configuration_validator.py b/src/pytorch_lightning/trainer/configuration_validator.py index f551f3ba5d3b7..73b33d6ef4e05 100644 --- a/src/pytorch_lightning/trainer/configuration_validator.py +++ b/src/pytorch_lightning/trainer/configuration_validator.py @@ -42,11 +42,11 @@ def verify_loop_configurations(trainer: "pl.Trainer") -> None: __verify_manual_optimization_support(trainer, model) __check_training_step_requires_dataloader_iter(model) elif trainer.state.fn == TrainerFn.VALIDATING: - __verify_eval_loop_configuration(trainer, model, "val") + __verify_eval_loop_configuration(model, "val") elif trainer.state.fn == TrainerFn.TESTING: - __verify_eval_loop_configuration(trainer, model, "test") + __verify_eval_loop_configuration(model, "test") elif trainer.state.fn == TrainerFn.PREDICTING: - __verify_eval_loop_configuration(trainer, model, "predict") + __verify_eval_loop_configuration(model, "predict") __verify_batch_transfer_support(trainer) # TODO: Delete this check in v2.0 @@ -82,12 +82,12 @@ def __verify_train_val_loop_configuration(trainer: "pl.Trainer", model: "pl.Ligh " `training_step()`, `train_dataloader()` and `configure_optimizers()` to be defined." ) - trainer.overridden_optimizer_step = is_overridden("optimizer_step", model) - trainer.overridden_optimizer_zero_grad = is_overridden("optimizer_zero_grad", model) + overridden_optimizer_step = is_overridden("optimizer_step", model) + overridden_optimizer_zero_grad = is_overridden("optimizer_zero_grad", model) automatic_optimization = model.automatic_optimization going_to_accumulate_grad_batches = trainer.accumulation_scheduler.going_to_accumulate_grad_batches() - has_overridden_optimization_functions = trainer.overridden_optimizer_step or trainer.overridden_optimizer_zero_grad + has_overridden_optimization_functions = overridden_optimizer_step or overridden_optimizer_zero_grad if has_overridden_optimization_functions and going_to_accumulate_grad_batches and automatic_optimization: rank_zero_warn( "When using `Trainer(accumulate_grad_batches != 1)` and overriding" @@ -111,7 +111,7 @@ def __verify_train_val_loop_configuration(trainer: "pl.Trainer", model: "pl.Ligh ) -def __verify_eval_loop_configuration(trainer: "pl.Trainer", model: "pl.LightningModule", stage: str) -> None: +def __verify_eval_loop_configuration(model: "pl.LightningModule", stage: str) -> None: step_name = "validation_step" if stage == "val" else f"{stage}_step" trainer_method = "validate" if stage == "val" else stage diff --git a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py index 0393652a0784d..1a6193c04653f 100644 --- a/src/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/src/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -28,7 +28,7 @@ SLURMEnvironment, TorchElasticEnvironment, ) -from lightning_lite.utilities import _StrategyType, AMPType, LightningEnum +from lightning_lite.utilities import _StrategyType from lightning_lite.utilities.device_parser import _determine_root_gpu_device from lightning_lite.utilities.imports import _IS_INTERACTIVE, _TORCH_GREATER_EQUAL_1_11 from pytorch_lightning.accelerators import AcceleratorRegistry @@ -48,7 +48,7 @@ FullyShardedNativeMixedPrecisionPlugin, HPUPrecisionPlugin, IPUPrecisionPlugin, - NativeMixedPrecisionPlugin, + MixedPrecisionPlugin, PLUGIN_INPUT, PrecisionPlugin, ShardedNativeMixedPrecisionPlugin, @@ -100,7 +100,7 @@ def __init__( strategy: Optional[Union[str, Strategy]] = None, plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None, precision: Union[int, str] = 32, - amp_type: str = "native", + amp_type: Optional[str] = None, amp_level: Optional[str] = None, sync_batchnorm: bool = False, benchmark: Optional[bool] = None, @@ -174,16 +174,10 @@ def __init__( self._parallel_devices: List[Union[int, torch.device, str]] = [] self._layer_sync: Optional[LayerSync] = NativeSyncBatchNorm() if sync_batchnorm else None self.checkpoint_io: Optional[CheckpointIO] = None - self._amp_type_flag: Optional[LightningEnum] = None - self._amp_level_flag: Optional[str] = amp_level + self._amp_type_flag: Optional[str] = None # TODO: Remove in v1.10.0 + self._amp_level_flag: Optional[str] = amp_level # TODO: Remove in v1.10.0 self._auto_select_gpus: bool = auto_select_gpus - if amp_level is not None: - rank_zero_deprecation( - "Setting `amp_level` inside the `Trainer` is deprecated in v1.8.0 and will be removed" - " in v1.10.0. Please set it inside the specific precision plugin and pass it to the `Trainer`." - ) - self._check_config_and_set_final_flags( strategy=strategy, accelerator=accelerator, @@ -243,7 +237,7 @@ def _check_config_and_set_final_flags( accelerator: Optional[Union[str, Accelerator]], precision: Union[int, str], plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]], - amp_type: str, + amp_type: Optional[str], amp_level: Optional[str], sync_batchnorm: bool, ) -> None: @@ -380,13 +374,28 @@ def _check_config_and_set_final_flags( self._accelerator_flag = "cuda" self._parallel_devices = self._strategy_flag.parallel_devices - amp_type = amp_type if isinstance(amp_type, str) else None - self._amp_type_flag = AMPType.from_str(amp_type) + if amp_type is not None: + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + " PyTorch Lightning has been deprecated in v1.9.0 and will be removed in v1.10.0." + f" The `Trainer(amp_backend={amp_type!r})` argument is deprecated. Removing this argument will avoid" + f" this message, it will select PyTorch's implementation automatically." + ) + else: + amp_type = None + self._amp_type_flag = amp_type - if amp_level is not None and self._amp_type_flag != AMPType.APEX: - raise MisconfigurationException( - f"You have asked for `amp_level={amp_level!r}` but it's only supported with `amp_backend='apex'`." + if amp_level is not None: + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + " PyTorch Lightning has been deprecated in v1.9.0 and will be removed in v1.10.0." + f" The `Trainer(amp_level={amp_level!r})` argument is deprecated. Removing this argument will avoid" + f" this message." ) + if self._amp_type_flag != "apex": + raise MisconfigurationException( + f"You have asked for `amp_level={amp_level!r}` but it's only supported with `amp_backend='apex'`." + ) def _check_device_config_and_set_final_flags( self, @@ -707,12 +716,12 @@ def _check_and_init_precision(self) -> PrecisionPlugin: if self._precision_flag in (16, "bf16"): rank_zero_info( - f"Using 16bit {self._amp_type_flag.value} Automatic Mixed Precision (AMP)" # type: ignore + f"Using 16bit {self._amp_type_flag} Automatic Mixed Precision (AMP)" if self._precision_flag == 16 else "Using bfloat16 Automatic Mixed Precision (AMP)" ) - if self._amp_type_flag == AMPType.NATIVE: + if self._amp_type_flag in (None, "native"): device = "cpu" if self._accelerator_flag == "cpu" else "cuda" if isinstance(self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy)): @@ -721,9 +730,9 @@ def _check_and_init_precision(self) -> PrecisionPlugin: return FullyShardedNativeNativeMixedPrecisionPlugin(self._precision_flag, device) if isinstance(self.strategy, DDPFullyShardedStrategy): return FullyShardedNativeMixedPrecisionPlugin(self._precision_flag, device) - return NativeMixedPrecisionPlugin(self._precision_flag, device) + return MixedPrecisionPlugin(self._precision_flag, device) - if self._amp_type_flag == AMPType.APEX: + if self._amp_type_flag == "apex": self._amp_level_flag = self._amp_level_flag or "O2" return ApexMixedPrecisionPlugin(self._amp_level_flag) @@ -753,25 +762,23 @@ def _validate_precision_choice(self) -> None: if ( self._precision_flag == 16 and isinstance(self.accelerator, CPUAccelerator) - and self._amp_type_flag == AMPType.APEX + and self._amp_type_flag == "apex" ): raise MisconfigurationException( "You passed `Trainer(accelerator='cpu', precision=16, amp_type='apex')`" " but apex AMP not supported on CPU." ) - if self._precision_flag == "bf16" and self._amp_type_flag != AMPType.NATIVE: - raise MisconfigurationException( - f"You passed `Trainer(amp_type={self._amp_type_flag.value!r}, precision='bf16')` but " # type: ignore - "it's not supported. Try using `amp_type='native'` instead." - ) - if self._precision_flag in (16, "bf16") and self._amp_type_flag == AMPType.APEX: + if self._precision_flag in (16, "bf16") and self._amp_type_flag == "apex": + if self._precision_flag == "bf16": + raise MisconfigurationException( + "You passed `Trainer(amp_type='apex', precision='bf16')` but it's not supported." + " Remove the `amp_type` argument." + ) if isinstance( self.strategy, (DDPShardedStrategy, DDPSpawnShardedStrategy, DDPFullyShardedStrategy, DDPFullyShardedNativeStrategy), ): - raise MisconfigurationException( - "Sharded plugins are not supported with apex, please switch to `amp_backend='native'`." - ) + raise MisconfigurationException("Sharded plugins are not supported with apex.") def _lazy_init_strategy(self) -> None: """Lazily set missing attributes on the previously instantiated strategy.""" diff --git a/src/pytorch_lightning/trainer/connectors/checkpoint_connector.py b/src/pytorch_lightning/trainer/connectors/checkpoint_connector.py index 22d6f7955e6df..90b5d0801eaa3 100644 --- a/src/pytorch_lightning/trainer/connectors/checkpoint_connector.py +++ b/src/pytorch_lightning/trainer/connectors/checkpoint_connector.py @@ -29,7 +29,7 @@ from lightning_lite.utilities.cloud_io import get_filesystem from lightning_lite.utilities.types import _PATH from pytorch_lightning.callbacks import ModelCheckpoint -from pytorch_lightning.plugins.precision import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin +from pytorch_lightning.plugins.precision import ApexMixedPrecisionPlugin, MixedPrecisionPlugin from pytorch_lightning.trainer.states import TrainerFn from pytorch_lightning.utilities import _OMEGACONF_AVAILABLE from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -295,9 +295,7 @@ def restore_precision_plugin_state(self) -> None: # old checkpoints compatibility if "amp_scaling_state" in self._loaded_checkpoint and isinstance(prec_plugin, ApexMixedPrecisionPlugin): prec_plugin.load_state_dict(self._loaded_checkpoint["amp_scaling_state"]) - if "native_amp_scaling_state" in self._loaded_checkpoint and isinstance( - prec_plugin, NativeMixedPrecisionPlugin - ): + if "native_amp_scaling_state" in self._loaded_checkpoint and isinstance(prec_plugin, MixedPrecisionPlugin): prec_plugin.load_state_dict(self._loaded_checkpoint["native_amp_scaling_state"]) def _restore_quantization_callbacks(self) -> None: diff --git a/src/pytorch_lightning/trainer/trainer.py b/src/pytorch_lightning/trainer/trainer.py index 3317814367dde..79f8412b7435b 100644 --- a/src/pytorch_lightning/trainer/trainer.py +++ b/src/pytorch_lightning/trainer/trainer.py @@ -58,12 +58,7 @@ from pytorch_lightning.loops.dataloader.evaluation_loop import EvaluationLoop from pytorch_lightning.loops.fit_loop import FitLoop from pytorch_lightning.loops.utilities import _parse_loop_limits, _reset_progress -from pytorch_lightning.plugins import ( - ApexMixedPrecisionPlugin, - NativeMixedPrecisionPlugin, - PLUGIN_INPUT, - PrecisionPlugin, -) +from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, MixedPrecisionPlugin, PLUGIN_INPUT, PrecisionPlugin from pytorch_lightning.profilers import Profiler from pytorch_lightning.strategies import ( DDPFullyShardedNativeStrategy, @@ -84,7 +79,7 @@ from pytorch_lightning.trainer.states import RunningStage, TrainerFn, TrainerState, TrainerStatus from pytorch_lightning.trainer.supporters import CombinedLoader from pytorch_lightning.tuner.tuning import _TunerResult, Tuner -from pytorch_lightning.utilities import AMPType, GradClipAlgorithmType, parsing +from pytorch_lightning.utilities import GradClipAlgorithmType, parsing from pytorch_lightning.utilities.argparse import ( _defaults_from_env_vars, add_argparse_args, @@ -164,8 +159,8 @@ def __init__( detect_anomaly: bool = False, auto_scale_batch_size: Union[str, bool] = False, plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None, - amp_backend: str = "native", - amp_level: Optional[str] = None, + amp_backend: Optional[str] = None, # TODO: Remove in v1.10.0 + amp_level: Optional[str] = None, # TODO: Remove in v1.10.0 move_metrics_to_cpu: bool = False, multiple_trainloader_mode: str = "max_size_cycle", inference_mode: bool = True, @@ -184,12 +179,16 @@ def __init__( amp_backend: The mixed precision backend to use ("native" or "apex"). Default: ``'native''``. + .. deprecated:: v1.9 + Setting ``amp_backend`` inside the ``Trainer`` is deprecated in v1.8.0 and will be removed + in v1.10.0. This argument was only relevant for apex which is being removed. + amp_level: The optimization level to use (O1, O2, etc...). By default it will be set to "O2" if ``amp_backend`` is set to "apex". .. deprecated:: v1.8 Setting ``amp_level`` inside the ``Trainer`` is deprecated in v1.8.0 and will be removed - in v1.10.0. Please set it inside the specific precision plugin and pass it to the ``Trainer``. + in v1.10.0. auto_lr_find: If set to True, will make trainer.tune() run a learning rate finder, trying to optimize initial learning for faster convergence. trainer.tune() method will @@ -1772,11 +1771,17 @@ def optimizer_frequencies(self, new_freqs: List[int]) -> None: self.strategy.optimizer_frequencies = new_freqs @property - def amp_backend(self) -> Optional[AMPType]: + def amp_backend(self) -> Optional[str]: + rank_zero_deprecation( + "The NVIDIA/apex AMP implementation has been deprecated upstream. Consequently, its integration inside" + " PyTorch Lightning has been deprecated in v1.9.0 and will be removed in v1.10.0." + " Accessing `Trainer.amp_backend` will not be supported. You can assume it will be `'native'`", + stacklevel=6, + ) if isinstance(self.precision_plugin, ApexMixedPrecisionPlugin): - return AMPType.APEX - if isinstance(self.precision_plugin, NativeMixedPrecisionPlugin): - return AMPType.NATIVE + return "apex" + if isinstance(self.precision_plugin, MixedPrecisionPlugin): + return "native" return None @property diff --git a/src/pytorch_lightning/utilities/__init__.py b/src/pytorch_lightning/utilities/__init__.py index f831376f4f1ad..27107bc8b81f8 100644 --- a/src/pytorch_lightning/utilities/__init__.py +++ b/src/pytorch_lightning/utilities/__init__.py @@ -15,13 +15,13 @@ import numpy +from lightning_lite.utilities import LightningEnum # noqa: F401 from lightning_lite.utilities import move_data_to_device # noqa: F401 -from lightning_lite.utilities import AMPType, LightningEnum # noqa: F401 from pytorch_lightning.utilities.distributed import AllGatherGrad # noqa: F401 +from pytorch_lightning.utilities.enums import AMPType # noqa: F401 from pytorch_lightning.utilities.enums import GradClipAlgorithmType # noqa: F401 from pytorch_lightning.utilities.grads import grad_norm # noqa: F401 from pytorch_lightning.utilities.imports import ( # noqa: F401 - _APEX_AVAILABLE, _HIVEMIND_AVAILABLE, _HOROVOD_AVAILABLE, _HPU_AVAILABLE, diff --git a/src/pytorch_lightning/utilities/enums.py b/src/pytorch_lightning/utilities/enums.py index 8a5fe0e35d6b2..832b2dc086b0a 100644 --- a/src/pytorch_lightning/utilities/enums.py +++ b/src/pytorch_lightning/utilities/enums.py @@ -15,9 +15,50 @@ from __future__ import annotations import os +from enum import Enum, EnumMeta +from typing import Any -from lightning_lite.utilities.enums import AMPType, LightningEnum, PrecisionType # noqa: F401 +from lightning_lite.utilities.enums import LightningEnum, PrecisionType # noqa: F401 from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation + + +class _DeprecatedEnumMeta(EnumMeta): + """Enum that calls `deprecate()` whenever a member is accessed. + + Adapted from: https://stackoverflow.com/a/62309159/208880 + """ + + def __getattribute__(cls, name: str) -> Any: + obj = super().__getattribute__(name) + # ignore __dunder__ names -- prevents potential recursion errors + if not (name.startswith("__") and name.endswith("__")) and isinstance(obj, Enum): + obj.deprecate() + return obj + + def __getitem__(cls, name: str) -> Any: + member: _DeprecatedEnumMeta = super().__getitem__(name) + member.deprecate() + return member + + def __call__(cls, *args: Any, **kwargs: Any) -> Any: + obj = super().__call__(*args, **kwargs) + if isinstance(obj, Enum): + obj.deprecate() + return obj + + +class AMPType(LightningEnum, metaclass=_DeprecatedEnumMeta): + """Type of Automatic Mixed Precision used for training.""" + + APEX = "apex" + NATIVE = "native" + + def deprecate(self) -> None: + rank_zero_deprecation( + f"The `{type(self).__name__}` enum has been deprecated in v1.9.0 and will be removed in v1.10.0." + f" Use the string value `{self.value!r}` instead." + ) class GradClipAlgorithmType(LightningEnum): diff --git a/src/pytorch_lightning/utilities/imports.py b/src/pytorch_lightning/utilities/imports.py index a1e47a5ae07fc..d365135e81364 100644 --- a/src/pytorch_lightning/utilities/imports.py +++ b/src/pytorch_lightning/utilities/imports.py @@ -24,7 +24,6 @@ # duplicated from lite because HPU is patching it below _TORCH_GREATER_EQUAL_1_13 = compare_version("torch", operator.ge, "1.13.0") -_APEX_AVAILABLE = module_available("apex.amp") _DALI_AVAILABLE = module_available("nvidia.dali") _HABANA_FRAMEWORK_AVAILABLE = package_available("habana_frameworks") _HIVEMIND_AVAILABLE = package_available("hivemind") diff --git a/tests/tests_lite/plugins/precision/test_deepspeed.py b/tests/tests_lite/plugins/precision/test_deepspeed.py index 784f7011b396f..b974d5b31ecbf 100644 --- a/tests/tests_lite/plugins/precision/test_deepspeed.py +++ b/tests/tests_lite/plugins/precision/test_deepspeed.py @@ -23,26 +23,11 @@ def test_invalid_precision_with_deepspeed_precision(): with pytest.raises(ValueError, match="is not supported in DeepSpeed. `precision` must be one of"): - DeepSpeedPrecision(precision=64, amp_type="native") - - -def test_deepspeed_precision_apex_not_installed(monkeypatch): - import lightning_lite.plugins.precision.deepspeed as deepspeed - - monkeypatch.setattr(deepspeed, "_APEX_AVAILABLE", False) - with pytest.raises(ImportError, match="You have asked for Apex AMP but `apex` is not installed."): - DeepSpeedPrecision(precision=16, amp_type="apex") - - -@mock.patch("lightning_lite.plugins.precision.deepspeed._APEX_AVAILABLE", return_value=True) -def test_deepspeed_precision_apex_default_level(_): - precision = DeepSpeedPrecision(precision=16, amp_type="apex") - assert isinstance(precision, DeepSpeedPrecision) - assert precision.amp_level == "O2" + DeepSpeedPrecision(precision=64) def test_deepspeed_precision_backward(): - precision = DeepSpeedPrecision(precision=32, amp_type="native") + precision = DeepSpeedPrecision(precision=32) tensor = Mock() model = Mock() precision.backward(tensor, model, "positional-arg", keyword="arg") @@ -60,7 +45,7 @@ def test_deepspeed_engine_is_steppable(engine): def test_deepspeed_precision_optimizer_step(): - precision = DeepSpeedPrecision(precision=32, amp_type="native") + precision = DeepSpeedPrecision(precision=32) optimizer = model = Mock() precision.optimizer_step(optimizer, lr_kwargs=dict()) model.step.assert_called_once_with(lr_kwargs=dict()) diff --git a/tests/tests_lite/plugins/precision/test_native_amp.py b/tests/tests_lite/plugins/precision/test_native_amp.py index 5d431df93e83f..8d997bf1f836e 100644 --- a/tests/tests_lite/plugins/precision/test_native_amp.py +++ b/tests/tests_lite/plugins/precision/test_native_amp.py @@ -16,25 +16,25 @@ import pytest import torch -from lightning_lite.plugins.precision.native_amp import NativeMixedPrecision +from lightning_lite.plugins.precision.native_amp import MixedPrecision def test_native_amp_precision_default_scaler(): - precision = NativeMixedPrecision(precision=16, device=Mock()) + precision = MixedPrecision(precision=16, device=Mock()) assert isinstance(precision.scaler, torch.cuda.amp.GradScaler) def test_native_amp_precision_scaler_with_bf16(): with pytest.raises(ValueError, match="`precision='bf16'` does not use a scaler"): - NativeMixedPrecision(precision="bf16", device=Mock(), scaler=Mock()) + MixedPrecision(precision="bf16", device=Mock(), scaler=Mock()) - precision = NativeMixedPrecision(precision="bf16", device=Mock()) + precision = MixedPrecision(precision="bf16", device=Mock()) assert precision.scaler is None def test_native_amp_precision_forward_context(): """Test to ensure that the context manager correctly is set to bfloat16 on CPU and CUDA.""" - precision = NativeMixedPrecision(precision=16, device="cuda") + precision = MixedPrecision(precision=16, device="cuda") assert precision.device == "cuda" assert isinstance(precision.scaler, torch.cuda.amp.GradScaler) assert torch.get_default_dtype() == torch.float32 @@ -42,7 +42,7 @@ def test_native_amp_precision_forward_context(): # check with str due to a bug upstream: https://github.com/pytorch/pytorch/issues/65786 assert str(torch.get_autocast_gpu_dtype()) in ("torch.float16", "torch.half") - precision = NativeMixedPrecision(precision="bf16", device="cpu") + precision = MixedPrecision(precision="bf16", device="cpu") assert precision.device == "cpu" assert precision.scaler is None with precision.forward_context(): @@ -56,7 +56,7 @@ def test_native_amp_precision_forward_context(): def test_native_amp_precision_backward(): - precision = NativeMixedPrecision(precision="mixed", device="cuda") + precision = MixedPrecision(precision="mixed", device="cuda") precision.scaler = Mock() precision.scaler.scale = Mock(side_effect=(lambda x: x)) tensor = Mock() @@ -67,7 +67,7 @@ def test_native_amp_precision_backward(): def test_native_amp_precision_optimizer_step_with_scaler(): - precision = NativeMixedPrecision(precision="mixed", device="cuda") + precision = MixedPrecision(precision="mixed", device="cuda") precision.scaler = Mock() optimizer = Mock() @@ -77,7 +77,7 @@ def test_native_amp_precision_optimizer_step_with_scaler(): def test_native_amp_precision_optimizer_step_without_scaler(): - precision = NativeMixedPrecision(precision="bf16", device="cuda") + precision = MixedPrecision(precision="bf16", device="cuda") assert precision.scaler is None optimizer = Mock() diff --git a/tests/tests_lite/test_connector.py b/tests/tests_lite/test_connector.py index f447e720a64a5..b6f2a8ed3304a 100644 --- a/tests/tests_lite/test_connector.py +++ b/tests/tests_lite/test_connector.py @@ -29,7 +29,7 @@ from lightning_lite.accelerators.cuda import CUDAAccelerator from lightning_lite.accelerators.mps import MPSAccelerator from lightning_lite.connector import _Connector -from lightning_lite.plugins import DoublePrecision, NativeMixedPrecision, Precision, TPUPrecision +from lightning_lite.plugins import DoublePrecision, MixedPrecision, Precision, TPUPrecision from lightning_lite.plugins.environments import ( KubeflowEnvironment, LightningEnvironment, @@ -409,7 +409,7 @@ def test_strategy_choice_gpu_str(strategy, strategy_class): "strategy,expected_strategy", [("ddp_sharded", DDPShardedStrategy), ("ddp_sharded_spawn", DDPShardedStrategy)] ) @pytest.mark.parametrize( - "precision,expected_precision", [(16, NativeMixedPrecision), (32, Precision), ("bf16", NativeMixedPrecision)] + "precision,expected_precision", [(16, MixedPrecision), (32, Precision), ("bf16", MixedPrecision)] ) def test_strategy_choice_sharded(strategy, expected_strategy, precision, expected_precision): connector = _Connector(strategy=strategy, devices=1, precision=precision) @@ -753,7 +753,7 @@ def test_precision_selection_16_on_cpu_warns(): _Connector(precision=16) -class MyNativeAMP(NativeMixedPrecision): +class MyNativeAMP(MixedPrecision): pass @@ -761,7 +761,7 @@ class MyNativeAMP(NativeMixedPrecision): @pytest.mark.parametrize("strategy,devices", [("ddp", 2), ("ddp_spawn", 2)]) @pytest.mark.parametrize( "is_custom_plugin,plugin_cls", - [(False, NativeMixedPrecision), (True, MyNativeAMP)], + [(False, MixedPrecision), (True, MyNativeAMP)], ) def test_precision_selection_amp_ddp(strategy, devices, is_custom_plugin, plugin_cls): plugin = None diff --git a/tests/tests_pytorch/conftest.py b/tests/tests_pytorch/conftest.py index d72c8d8566ab2..39b97cb16d006 100644 --- a/tests/tests_pytorch/conftest.py +++ b/tests/tests_pytorch/conftest.py @@ -305,11 +305,6 @@ def pytest_collection_modifyitems(items: List[pytest.Function], config: pytest.C for item in items: item.add_marker(deprecation_error) - apex_deprecation = pytest.mark.filterwarnings("ignore:apex.amp is deprecated:FutureWarning") - for item in items: - if any(marker.name == "skipif" and marker.kwargs.get("amp_apex", False) for marker in item.own_markers): - item.add_marker(apex_deprecation) - def pytest_addoption(parser): parser.addoption("--hpus", action="store", type=int, default=1, help="Number of hpus 1-8") diff --git a/tests/tests_pytorch/core/test_lightning_module.py b/tests/tests_pytorch/core/test_lightning_module.py index 2b628bf61bd81..1a9a48afecd02 100644 --- a/tests/tests_pytorch/core/test_lightning_module.py +++ b/tests/tests_pytorch/core/test_lightning_module.py @@ -152,7 +152,6 @@ def optimizer_step( optimizer_idx, closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): if optimizer_idx == 0: @@ -216,7 +215,6 @@ def optimizer_step( optimizer_idx, closure, on_tpu=False, - using_native_amp=False, using_lbfgs=False, ): if optimizer_idx == 0: diff --git a/tests/tests_pytorch/deprecated_api/test_remove_1-10.py b/tests/tests_pytorch/deprecated_api/test_remove_1-10.py index 72a03957f3a28..715475bcacb4d 100644 --- a/tests/tests_pytorch/deprecated_api/test_remove_1-10.py +++ b/tests/tests_pytorch/deprecated_api/test_remove_1-10.py @@ -33,6 +33,7 @@ from pytorch_lightning.overrides import LightningDistributedModule, LightningParallelModule from pytorch_lightning.overrides.base import unwrap_lightning_module from pytorch_lightning.overrides.fairscale import LightningShardedDataParallel, unwrap_lightning_module_sharded +from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, DeepSpeedPrecisionPlugin, NativeMixedPrecisionPlugin from pytorch_lightning.plugins.environments import LightningEnvironment from pytorch_lightning.strategies.bagua import LightningBaguaModule from pytorch_lightning.strategies.utils import on_colab_kaggle @@ -67,17 +68,13 @@ sync_ddp_if_available, tpu_distributed, ) +from pytorch_lightning.utilities.enums import AMPType from pytorch_lightning.utilities.optimizer import optimizer_to_device, optimizers_to_device from pytorch_lightning.utilities.seed import pl_worker_init_function, reset_seed, seed_everything from pytorch_lightning.utilities.xla_device import inner_f, pl_multi_process, XLADeviceUtils from tests_pytorch.helpers.runif import RunIf -def test_deprecated_amp_level(): - with pytest.deprecated_call(match="Setting `amp_level` inside the `Trainer` is deprecated in v1.8.0"): - Trainer(amp_level="O3", amp_backend="apex") - - @pytest.mark.parametrize( "wrapper_class", [ @@ -356,3 +353,53 @@ def test_profiler_classes_deprecated_warning(cls): f" Use .*profilers.{cls.__name__}` class instead." ): cls() + + +@RunIf(amp_apex=True) +def test_apex_deprecation_warnings(): + class MyModel(BoringModel): + def optimizer_step( + self, + epoch, + batch_idx, + optimizer, + optimizer_idx=0, + optimizer_closure=None, + on_tpu=False, + using_native_amp=False, + **kwargs, + ): + return optimizer_closure() + + model = MyModel() + trainer = Trainer(fast_dev_run=True) + with pytest.deprecated_call(match="including the `using_native_amp` argument"): + trainer.fit(model) + + with pytest.deprecated_call(match="ApexMixedPrecisionPlugin` class will be removed in v1.10"): + ApexMixedPrecisionPlugin() + + with pytest.deprecated_call(match="NativeMixedPrecisionPlugin` class has been renamed in v1.9"): + NativeMixedPrecisionPlugin(16, "cpu") + + with pytest.deprecated_call(match="Support for.*DeepSpeed implementation will be removed in v1.10.0"): + DeepSpeedPrecisionPlugin(16, amp_type="apex") + + with pytest.deprecated_call(match=r"amp_type='native'\)` been deprecated in v1.9"): + DeepSpeedPrecisionPlugin(16, amp_type="native") + + with pytest.raises(ValueError, match=r"amp_level='O2'\)` is only relevant when using NVIDIA/apex"): + DeepSpeedPrecisionPlugin(16, amp_level="O2") + + with pytest.deprecated_call(match=r"Trainer\(amp_backend='apex'\)` argument is deprecated"): + Trainer(amp_backend="apex") + + with pytest.deprecated_call(match=r"Trainer\(amp_level='O2'\)` argument is deprecated"): + Trainer(amp_backend="apex", amp_level="O2") + + with pytest.deprecated_call(match="AMPType` enum has been deprecated in v1.9"): + AMPType.APEX + + trainer = Trainer() + with pytest.deprecated_call(match="amp_backend` will not be supported"): + trainer.amp_backend diff --git a/tests/tests_pytorch/helpers/runif.py b/tests/tests_pytorch/helpers/runif.py index ac3b45c0f8d55..72c529dcc91b6 100644 --- a/tests/tests_pytorch/helpers/runif.py +++ b/tests/tests_pytorch/helpers/runif.py @@ -25,11 +25,11 @@ from pytorch_lightning.accelerators.mps import MPSAccelerator from pytorch_lightning.accelerators.tpu import TPUAccelerator from pytorch_lightning.callbacks.progress.rich_progress import _RICH_AVAILABLE +from pytorch_lightning.plugins.precision.apex_amp import _APEX_AVAILABLE from pytorch_lightning.strategies.bagua import _BAGUA_AVAILABLE from pytorch_lightning.strategies.colossalai import _COLOSSALAI_AVAILABLE from pytorch_lightning.strategies.deepspeed import _DEEPSPEED_AVAILABLE from pytorch_lightning.utilities.imports import ( - _APEX_AVAILABLE, _HIVEMIND_AVAILABLE, _HOROVOD_AVAILABLE, _HPU_AVAILABLE, @@ -154,6 +154,7 @@ def __new__( conditions.append(not _TORCH_QUANTIZE_AVAILABLE or _miss_default) reasons.append("PyTorch quantization") + # TODO: remove in v1.10.0 if amp_apex: conditions.append(not _APEX_AVAILABLE) reasons.append("NVIDIA Apex") diff --git a/tests/tests_pytorch/models/test_amp.py b/tests/tests_pytorch/models/test_amp.py index f769a904b7a31..4926a353e99be 100644 --- a/tests/tests_pytorch/models/test_amp.py +++ b/tests/tests_pytorch/models/test_amp.py @@ -166,11 +166,15 @@ def test_amp_without_apex(bwd_mock, tmpdir): """Check that even with apex amp type without requesting precision=16 the amp backend is void.""" model = BoringModel() - trainer = Trainer(default_root_dir=tmpdir, amp_backend="native") - assert trainer.amp_backend is None - - trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, amp_backend="apex") - assert trainer.amp_backend is None + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer(default_root_dir=tmpdir, amp_backend="native") + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + assert trainer.amp_backend is None + + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, amp_backend="apex") + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + assert trainer.amp_backend is None trainer.fit(model) assert not bwd_mock.called @@ -194,10 +198,12 @@ def configure_optimizers(self): model = CustomModel() model.training_epoch_end = None - trainer = Trainer( - default_root_dir=tmpdir, max_steps=5, precision=16, amp_backend="apex", accelerator="gpu", devices=1 - ) - assert str(trainer.amp_backend) == "AMPType.APEX" + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + default_root_dir=tmpdir, max_steps=5, precision=16, amp_backend="apex", accelerator="gpu", devices=1 + ) + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + assert str(trainer.amp_backend) == "apex" trainer.fit(model) # `max_steps` is fulfilled in the third batch first optimizer, but we don't check the loop # `done` condition until all optimizers have run, so the number of backwards is higher than `max_steps` @@ -210,15 +216,16 @@ def configure_optimizers(self): @RunIf(min_cuda_gpus=1, amp_apex=True) def test_amp_with_apex_reload(tmpdir): model = BoringModel() - trainer = Trainer( - default_root_dir=tmpdir, - max_steps=1, - limit_test_batches=1, - precision=16, - amp_backend="apex", - accelerator="gpu", - devices=1, - ) + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + default_root_dir=tmpdir, + max_steps=1, + limit_test_batches=1, + precision=16, + amp_backend="apex", + accelerator="gpu", + devices=1, + ) trainer.fit(model) trainer.fit_loop.max_steps = 2 diff --git a/tests/tests_pytorch/models/test_ddp_fork_amp.py b/tests/tests_pytorch/models/test_ddp_fork_amp.py index 7cbc5ea84b524..de929907c86cc 100644 --- a/tests/tests_pytorch/models/test_ddp_fork_amp.py +++ b/tests/tests_pytorch/models/test_ddp_fork_amp.py @@ -15,7 +15,7 @@ import torch -from pytorch_lightning.plugins import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins import MixedPrecisionPlugin from tests_pytorch.helpers.runif import RunIf @@ -24,7 +24,7 @@ def test_amp_gpus_ddp_fork(): """Ensure the use of native AMP with `ddp_fork` (or associated alias strategies) does not generate CUDA initialization errors.""" - _ = NativeMixedPrecisionPlugin(precision=16, device="cuda") + _ = MixedPrecisionPlugin(precision=16, device="cuda") with multiprocessing.get_context("fork").Pool(1) as pool: in_bad_fork = pool.apply(torch.cuda._is_in_bad_fork) assert not in_bad_fork diff --git a/tests/tests_pytorch/models/test_hooks.py b/tests/tests_pytorch/models/test_hooks.py index 1eae5d7b64c34..38a42f8b3d1fc 100644 --- a/tests/tests_pytorch/models/test_hooks.py +++ b/tests/tests_pytorch/models/test_hooks.py @@ -302,7 +302,6 @@ def _train_batch(self, *args, **kwargs): def _auto_train_batch( trainer, model, batches, device=torch.device("cpu"), current_epoch=0, current_batch=0, **kwargs ): - using_native_amp = kwargs.get("amp_backend") == "native" using_deepspeed = kwargs.get("strategy") == "deepspeed" out = [] for i in range(current_batch, batches): @@ -344,7 +343,7 @@ def _auto_train_batch( dict( name="optimizer_step", args=(current_epoch, i, ANY, 0, ANY), - kwargs=dict(on_tpu=False, using_lbfgs=False, using_native_amp=using_native_amp), + kwargs=dict(on_tpu=False, using_lbfgs=False), ), *( [dict(name="lr_scheduler_step", args=(ANY, 0, None))] @@ -449,9 +448,7 @@ def _predict_batch(trainer, model, batches): [ {}, # these precision plugins modify the optimization flow, so testing them explicitly - pytest.param( - dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_cuda_gpus=1) - ), + pytest.param(dict(accelerator="gpu", devices=1, precision=16), marks=RunIf(min_cuda_gpus=1)), pytest.param( dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(min_cuda_gpus=1, amp_apex=True), @@ -485,17 +482,31 @@ def training_step(self, batch, batch_idx): callback = HookedCallback(called) train_batches = 2 val_batches = 2 - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - limit_train_batches=train_batches, - limit_val_batches=val_batches, - enable_progress_bar=False, - enable_model_summary=False, - callbacks=[callback], - track_grad_norm=1, - **kwargs, - ) + if kwargs.get("amp_backend") == "apex": + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + limit_train_batches=train_batches, + limit_val_batches=val_batches, + enable_progress_bar=False, + enable_model_summary=False, + callbacks=[callback], + track_grad_norm=1, + **kwargs, + ) + else: + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + limit_train_batches=train_batches, + limit_val_batches=val_batches, + enable_progress_bar=False, + enable_model_summary=False, + callbacks=[callback], + track_grad_norm=1, + **kwargs, + ) trainer.fit(model) saved_ckpt = { "callbacks": ANY, @@ -507,14 +518,15 @@ def training_step(self, batch, batch_idx): "state_dict": ANY, "loops": ANY, } - if kwargs.get("amp_backend") == "native" or kwargs.get("amp_backend") == "apex": + using_deepspeed = kwargs.get("strategy") == "deepspeed" + if kwargs.get("precision") == 16 and not using_deepspeed: saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY device = torch.device("cuda:0" if "accelerator" in kwargs and kwargs["accelerator"] == "gpu" else "cpu") expected = [ dict(name="configure_callbacks"), dict(name="prepare_data"), # DeepSpeed needs the batch size to figure out throughput logging - *([dict(name="train_dataloader")] if kwargs.get("strategy") == "deepspeed" else []), + *([dict(name="train_dataloader")] if using_deepspeed else []), dict(name="Callback.setup", args=(trainer, model), kwargs=dict(stage="fit")), dict(name="setup", kwargs=dict(stage="fit")), dict(name="configure_sharded_model"), diff --git a/tests/tests_pytorch/models/test_horovod.py b/tests/tests_pytorch/models/test_horovod.py index 3d223ef93a154..7963bde389f4d 100644 --- a/tests/tests_pytorch/models/test_horovod.py +++ b/tests/tests_pytorch/models/test_horovod.py @@ -200,29 +200,6 @@ def test_horovod_multi_gpu_grad_by_value(tmpdir): _run_horovod(trainer_options) -# todo: need to be fixed :] -# https://discuss.pytorch.org/t/torch-cuda-amp-vs-nvidia-apex/74994 -# Check with (tgaddair) on Horovod issues if this feature is needed -@pytest.mark.skip(reason="TODO: Horovod currently doesn't work with Apex") -@RunIf(min_cuda_gpus=2, amp_apex=True, horovod_nccl=True, skip_windows=True) -def test_horovod_apex(tmpdir): - """Test Horovod with multi-GPU support using apex amp.""" - trainer_options = dict( - default_root_dir=str(tmpdir), - gradient_clip_val=1.0, - enable_progress_bar=False, - max_epochs=1, - limit_train_batches=0.4, - limit_val_batches=0.2, - accelerator="gpu", - devices=2, - strategy="horovod", - amp_backend="apex", - precision=16, - ) - _run_horovod(trainer_options) - - @RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True) def test_horovod_amp(tmpdir): """Test Horovod with multi-GPU support using native amp.""" diff --git a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py index c1f7979ea8482..8ffecb817f6c5 100644 --- a/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py +++ b/tests/tests_pytorch/plugins/precision/test_deepspeed_precision.py @@ -20,7 +20,9 @@ def test_invalid_precision_with_deepspeed_precision(): - with pytest.raises(ValueError, match="is not supported. `precision` must be one of"): + with pytest.deprecated_call(match=r"amp_type='native'\)` been deprecated in v1.9.0"), pytest.raises( + ValueError, match="is not supported. `precision` must be one of" + ): DeepSpeedPrecisionPlugin(precision=64, amp_type="native") @@ -28,12 +30,15 @@ def test_deepspeed_precision_apex_not_installed(monkeypatch): import pytorch_lightning.plugins.precision.deepspeed as deepspeed_apex monkeypatch.setattr(deepspeed_apex, "_APEX_AVAILABLE", False) - with pytest.raises(MisconfigurationException, match="You have asked for Apex AMP but `apex` is not installed."): + with pytest.raises( + MisconfigurationException, match="You have asked for Apex AMP but `apex` is not installed." + ), pytest.deprecated_call(match="apex AMP implementation has been deprecated"): DeepSpeedPrecisionPlugin(precision=16, amp_type="apex") @mock.patch("pytorch_lightning.plugins.precision.deepspeed._APEX_AVAILABLE", return_value=True) def test_deepspeed_precision_apex_default_level(_): - precision_plugin = DeepSpeedPrecisionPlugin(precision=16, amp_type="apex") + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + precision_plugin = DeepSpeedPrecisionPlugin(precision=16, amp_type="apex") assert isinstance(precision_plugin, DeepSpeedPrecisionPlugin) assert precision_plugin.amp_level == "O2" diff --git a/tests/tests_pytorch/plugins/precision/test_native_amp.py b/tests/tests_pytorch/plugins/precision/test_native_amp.py index c848dcc351ac7..e37a9de45ebcf 100644 --- a/tests/tests_pytorch/plugins/precision/test_native_amp.py +++ b/tests/tests_pytorch/plugins/precision/test_native_amp.py @@ -16,14 +16,14 @@ import pytest from torch.optim import Optimizer -from pytorch_lightning.plugins import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins import MixedPrecisionPlugin from pytorch_lightning.utilities import GradClipAlgorithmType def test_clip_gradients(): """Test that `.clip_gradients()` is a no-op when clipping is disabled.""" optimizer = Mock(spec=Optimizer) - precision = NativeMixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock()) + precision = MixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock()) precision.clip_grad_by_value = Mock() precision.clip_grad_by_norm = Mock() precision.clip_gradients(optimizer) @@ -47,7 +47,7 @@ def test_optimizer_amp_scaling_support_in_step_method(): gradient clipping (example: fused Adam).""" optimizer = Mock(_step_supports_amp_scaling=True) - precision = NativeMixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock()) + precision = MixedPrecisionPlugin(precision=16, device="cuda:0", scaler=Mock()) with pytest.raises(RuntimeError, match="The current optimizer.*does not allow for gradient clipping"): precision.clip_gradients(optimizer, clip_val=1.0) diff --git a/tests/tests_pytorch/plugins/test_amp_plugins.py b/tests/tests_pytorch/plugins/test_amp_plugins.py index 65a764a093127..1c9dd53f2da10 100644 --- a/tests/tests_pytorch/plugins/test_amp_plugins.py +++ b/tests/tests_pytorch/plugins/test_amp_plugins.py @@ -20,13 +20,13 @@ from pytorch_lightning import Trainer from pytorch_lightning.demos.boring_classes import BoringModel -from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, NativeMixedPrecisionPlugin +from pytorch_lightning.plugins import ApexMixedPrecisionPlugin, MixedPrecisionPlugin from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests_pytorch.conftest import mock_cuda_count from tests_pytorch.helpers.runif import RunIf -class MyNativeAMP(NativeMixedPrecisionPlugin): +class MyNativeAMP(MixedPrecisionPlugin): pass @@ -52,7 +52,7 @@ class MyApexPlugin(ApexMixedPrecisionPlugin): @pytest.mark.parametrize( "amp,custom_plugin,plugin_cls", [ - ("native", False, NativeMixedPrecisionPlugin), + ("native", False, MixedPrecisionPlugin), ("native", True, MyNativeAMP), pytest.param("apex", False, ApexMixedPrecisionPlugin, marks=RunIf(amp_apex=True)), pytest.param("apex", True, MyApexPlugin, marks=RunIf(amp_apex=True)), @@ -61,16 +61,21 @@ class MyApexPlugin(ApexMixedPrecisionPlugin): def test_amp_apex_ddp(cuda_count_2, strategy, devices, amp, custom_plugin, plugin_cls): plugin = None if custom_plugin: - plugin = plugin_cls(16, "cpu") if amp == "native" else plugin_cls() - trainer = Trainer( - fast_dev_run=True, - precision=16, - amp_backend=amp, - accelerator="gpu", - devices=devices, - strategy=strategy, - plugins=plugin, - ) + if amp == "native": + plugin = plugin_cls(16, "cpu") + else: + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + plugin = plugin_cls() + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + fast_dev_run=True, + precision=16, + amp_backend=amp, + accelerator="gpu", + devices=devices, + strategy=strategy, + plugins=plugin, + ) assert isinstance(trainer.precision_plugin, plugin_cls) @@ -146,7 +151,6 @@ def test_amp_gradient_unscale(tmpdir, accum: int): default_root_dir=tmpdir, limit_train_batches=2, limit_val_batches=0, - amp_backend="native", strategy="ddp_spawn", accelerator="gpu", devices=2, @@ -189,14 +193,12 @@ def configure_optimizers(self): torch.optim.SGD(self.layer2.parameters(), lr=0.1), ] - trainer = Trainer( - default_root_dir=tmpdir, accelerator="gpu", devices=1, fast_dev_run=1, amp_backend="native", precision=16 - ) + trainer = Trainer(default_root_dir=tmpdir, accelerator="gpu", devices=1, fast_dev_run=1, precision=16) model = CustomBoringModel() trainer.fit(model) -@RunIf(min_cuda_gpus=2, amp_apex=True, standalone=True) +@RunIf(min_cuda_gpus=1, amp_apex=True) @pytest.mark.parametrize("amp_level", ["O2"]) def test_amp_apex_ddp_fit(amp_level, tmpdir): class CustomBoringModel(BoringModel): @@ -205,15 +207,16 @@ def training_step(self, batch, batch_idx): assert self.trainer.precision_plugin._connected return super().training_step(batch, batch_idx) + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + plugin = ApexMixedPrecisionPlugin(amp_level=amp_level) trainer = Trainer( default_root_dir=tmpdir, fast_dev_run=True, precision=16, - amp_backend="apex", accelerator="gpu", - devices=2, + devices=1, strategy="ddp", - plugins=ApexMixedPrecisionPlugin(amp_level=amp_level), + plugins=plugin, enable_progress_bar=False, enable_model_summary=False, ) @@ -226,16 +229,17 @@ def training_step(self, batch, batch_idx): @RunIf(min_cuda_gpus=2, amp_apex=True) @pytest.mark.parametrize("amp_level", ["O2"]) def test_amp_apex_ddp_spawn_fit(amp_level, tmpdir): - trainer = Trainer( - default_root_dir=tmpdir, - fast_dev_run=True, - precision=16, - amp_backend="apex", - accelerator="gpu", - devices=2, - strategy="ddp_spawn", - plugins=ApexMixedPrecisionPlugin(amp_level=amp_level), - ) + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + default_root_dir=tmpdir, + fast_dev_run=True, + precision=16, + amp_backend="apex", + accelerator="gpu", + devices=2, + strategy="ddp_spawn", + plugins=ApexMixedPrecisionPlugin(amp_level=amp_level), + ) assert isinstance(trainer.precision_plugin, ApexMixedPrecisionPlugin) model = BoringModel() trainer.fit(model) @@ -243,7 +247,7 @@ def test_amp_apex_ddp_spawn_fit(amp_level, tmpdir): def test_cpu_amp_precision_context_manager(tmpdir): """Test to ensure that the context manager correctly is set to CPU + bfloat16.""" - plugin = NativeMixedPrecisionPlugin("bf16", "cpu") + plugin = MixedPrecisionPlugin("bf16", "cpu") assert plugin.device == "cpu" assert plugin.scaler is None context_manager = plugin.autocast_context_manager() @@ -253,16 +257,20 @@ def test_cpu_amp_precision_context_manager(tmpdir): def test_precision_selection_raises(monkeypatch): - with pytest.raises( + with pytest.deprecated_call(match=r"amp_backend='apex'\)` argument is deprecated"), pytest.raises( MisconfigurationException, match=r"precision=16, amp_type='apex'\)` but apex AMP not supported on CPU" ): Trainer(amp_backend="apex", precision=16) - with pytest.raises(MisconfigurationException, match=r"amp_type='apex', precision='bf16'\)` but it's not supported"): + with pytest.deprecated_call(match=r"amp_backend='apex'\)` argument is deprecated"), pytest.raises( + MisconfigurationException, match=r"amp_type='apex', precision='bf16'\)` but it's not supported" + ): Trainer(amp_backend="apex", precision="bf16") mock_cuda_count(monkeypatch, 1) - with pytest.raises(MisconfigurationException, match="Sharded plugins are not supported with apex"): + with pytest.deprecated_call(match=r"amp_backend='apex'\)` argument is deprecated"), pytest.raises( + MisconfigurationException, match="Sharded plugins are not supported with apex" + ): with mock.patch("lightning_lite.accelerators.cuda.is_cuda_available", return_value=True): Trainer(amp_backend="apex", precision=16, accelerator="gpu", devices=1, strategy="ddp_fully_sharded") @@ -271,5 +279,5 @@ def test_precision_selection_raises(monkeypatch): monkeypatch.setattr(apex, "_APEX_AVAILABLE", False) with mock.patch("lightning_lite.accelerators.cuda.is_cuda_available", return_value=True), pytest.raises( MisconfigurationException, match="asked for Apex AMP but `apex` is not installed" - ): + ), pytest.deprecated_call(match=r"amp_backend='apex'\)` argument is deprecated"): Trainer(amp_backend="apex", precision=16, accelerator="gpu", devices=1) diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py index 640c44c717ac2..902fe833fef10 100644 --- a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py +++ b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py @@ -140,15 +140,25 @@ def test_deepspeed_precision_choice(cuda_count_1, amp_backend, tmpdir): DeepSpeed handles precision via Custom DeepSpeedPrecisionPlugin """ - - trainer = Trainer( - fast_dev_run=True, - default_root_dir=tmpdir, - accelerator="gpu", - strategy="deepspeed", - amp_backend=amp_backend, - precision=16, - ) + if amp_backend == "apex": + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + trainer = Trainer( + fast_dev_run=True, + default_root_dir=tmpdir, + accelerator="gpu", + strategy="deepspeed", + amp_backend=amp_backend, + precision=16, + ) + else: + trainer = Trainer( + fast_dev_run=True, + default_root_dir=tmpdir, + accelerator="gpu", + strategy="deepspeed", + amp_backend=amp_backend, + precision=16, + ) assert isinstance(trainer.strategy, DeepSpeedStrategy) assert isinstance(trainer.strategy.precision_plugin, DeepSpeedPrecisionPlugin) diff --git a/tests/tests_pytorch/strategies/test_sharded_strategy.py b/tests/tests_pytorch/strategies/test_sharded_strategy.py index 1a5c6d68d99d7..7200d4a866397 100644 --- a/tests/tests_pytorch/strategies/test_sharded_strategy.py +++ b/tests/tests_pytorch/strategies/test_sharded_strategy.py @@ -10,7 +10,7 @@ from lightning_lite.strategies.fairscale import _FAIRSCALE_AVAILABLE from pytorch_lightning import LightningModule, Trainer from pytorch_lightning.demos.boring_classes import BoringModel -from pytorch_lightning.plugins import NativeMixedPrecisionPlugin +from pytorch_lightning.plugins import MixedPrecisionPlugin from pytorch_lightning.strategies import DDPShardedStrategy, DDPSpawnShardedStrategy from pytorch_lightning.trainer.states import TrainerFn from tests_pytorch.helpers.runif import RunIf @@ -58,6 +58,7 @@ def test_ddp_sharded_precision_16_clip_gradients(mock_oss_clip_grad_norm, clip_v """Ensure that clip gradients is only called if the value is greater than 0.""" model = BoringModel() trainer = Trainer( + default_root_dir=tmpdir, strategy="ddp_sharded", accelerator="gpu", devices=1, @@ -90,7 +91,7 @@ def test_ddp_choice_sharded_amp(strategy, expected): """Test to ensure that plugin native amp plugin is correctly chosen when using sharded.""" trainer = Trainer(fast_dev_run=True, accelerator="gpu", devices=1, precision=16, strategy=strategy) assert isinstance(trainer.strategy, expected) - assert isinstance(trainer.precision_plugin, NativeMixedPrecisionPlugin) + assert isinstance(trainer.precision_plugin, MixedPrecisionPlugin) @RunIf(fairscale=True) diff --git a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py index 77a4888351cf2..b33400d2aa227 100644 --- a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py +++ b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py @@ -432,9 +432,9 @@ def test_validate_precision_type(precision): def test_amp_level_raises_error_with_native(): - with pytest.deprecated_call( - match="Setting `amp_level` inside the `Trainer` is deprecated in v1.8.0" - ), pytest.raises(MisconfigurationException, match="O2'` but it's only supported with `amp_backend='apex'`"): + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"), pytest.raises( + MisconfigurationException, match="O2'` but it's only supported with `amp_backend='apex'`" + ): _ = Trainer(amp_level="O2", amp_backend="native", precision=16) diff --git a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py index 379b77ae7e88f..0352c9eda8d32 100644 --- a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py +++ b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py @@ -730,7 +730,6 @@ def on_before_backward(self, loss: torch.Tensor) -> None: trainer = Trainer( default_root_dir=tmpdir, fast_dev_run=True, - amp_backend="native", precision=16, move_metrics_to_cpu=True, accelerator="gpu", diff --git a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py index 2224ed8569709..ad691f3a30364 100644 --- a/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py +++ b/tests/tests_pytorch/trainer/optimization/test_manual_optimization.py @@ -65,19 +65,51 @@ def configure_optimizers(self): @pytest.mark.parametrize( - "kwargs", - [ - {}, - pytest.param( - {"accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "native"}, marks=RunIf(min_cuda_gpus=1) - ), - pytest.param( - {"accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "apex"}, - marks=RunIf(min_cuda_gpus=1, amp_apex=True), - ), - ], + "kwargs", [{}, pytest.param({"accelerator": "gpu", "devices": 1, "precision": 16}, marks=RunIf(min_cuda_gpus=1))] ) def test_multiple_optimizers_manual_no_return(tmpdir, kwargs): + class TestModel(ManualOptModel): + def training_step(self, batch, batch_idx): + # avoid returning a value + super().training_step(batch, batch_idx) + + def training_epoch_end(self, outputs): + # outputs is empty as training_step does not return + # and it is not automatic optimization + assert not outputs + + model = TestModel() + model.val_dataloader = None + + limit_train_batches = 2 + trainer = Trainer( + default_root_dir=tmpdir, + limit_train_batches=limit_train_batches, + limit_val_batches=2, + max_epochs=1, + log_every_n_steps=1, + enable_model_summary=False, + **kwargs, + ) + + if kwargs.get("precision") == 16: + # mock the scaler instead of the optimizer step because it can be skipped with NaNs + scaler_step_patch = mock.patch.object( + trainer.precision_plugin.scaler, "step", wraps=trainer.precision_plugin.scaler.step + ) + scaler_step = scaler_step_patch.start() + + with mock.patch.object(Strategy, "backward", wraps=trainer.strategy.backward) as bwd_mock: + trainer.fit(model) + assert bwd_mock.call_count == limit_train_batches * 3 + + if kwargs.get("precision") == 16: + scaler_step_patch.stop() + assert scaler_step.call_count == len(model.optimizers()) * limit_train_batches + + +@RunIf(min_cuda_gpus=1, amp_apex=True) +def test_multiple_optimizers_manual_no_return_apex(tmpdir): apex_optimizer_patches = [] apex_optimizer_steps = [] @@ -92,8 +124,6 @@ def training_epoch_end(self, outputs): assert not outputs def on_train_start(self): - if kwargs.get("amp_backend") != "apex": - return # extremely ugly. APEX patches all the native torch optimizers on `_initialize` which we call on # `ApexMixedPrecisionPlugin.dispatch`. Additionally, their replacement `new_step` functions are locally # defined so can't even patch those, thus we need to create the mock after APEX has been initialized @@ -106,15 +136,15 @@ def on_train_start(self): apex_optimizer_steps.append(patch.start()) def on_train_end(self): - if kwargs.get("amp_backend") == "apex": - for p in apex_optimizer_patches: - p.stop() + for p in apex_optimizer_patches: + p.stop() model = TestModel() model.val_dataloader = None limit_train_batches = 2 - plugins = [ApexMixedPrecisionPlugin(amp_level="O2")] if kwargs.get("amp_backend") == "apex" else [] + with pytest.deprecated_call(match="apex AMP implementation has been deprecated"): + plugins = [ApexMixedPrecisionPlugin(amp_level="O2")] trainer = Trainer( default_root_dir=tmpdir, @@ -124,25 +154,16 @@ def on_train_end(self): log_every_n_steps=1, enable_model_summary=False, plugins=plugins, - **kwargs, + accelerator="gpu", + devices=1, + precision=16, ) - if kwargs.get("amp_backend") == "native": - # mock the scaler instead of the optimizer step because it can be skipped with NaNs - scaler_step_patch = mock.patch.object( - trainer.precision_plugin.scaler, "step", wraps=trainer.precision_plugin.scaler.step - ) - scaler_step = scaler_step_patch.start() - with mock.patch.object(Strategy, "backward", wraps=trainer.strategy.backward) as bwd_mock: trainer.fit(model) assert bwd_mock.call_count == limit_train_batches * 3 - if kwargs.get("amp_backend") == "native": - scaler_step_patch.stop() - assert scaler_step.call_count == len(model.optimizers()) * limit_train_batches - if kwargs.get("amp_backend") == "apex": - assert [s.call_count for s in apex_optimizer_steps] == [len(model.optimizers())] * limit_train_batches + assert [s.call_count for s in apex_optimizer_steps] == [len(model.optimizers())] * limit_train_batches def test_multiple_optimizers_manual_return(tmpdir): @@ -303,7 +324,6 @@ def test_manual_optimization_and_return_tensor(tmpdir): limit_test_batches=0, limit_val_batches=0, precision=16, - amp_backend="native", strategy="ddp_spawn", accelerator="gpu", devices=2, @@ -392,7 +412,6 @@ def on_train_epoch_end(self, *_, **__): limit_test_batches=0, limit_val_batches=0, precision=16, - amp_backend="native", accelerator="gpu", devices=1, ) @@ -476,7 +495,6 @@ def log_grad_norm(self, grad_norm_dict): log_every_n_steps=1, enable_model_summary=False, precision=16, - amp_backend="native", accelerator="gpu", devices=1, track_grad_norm=2, diff --git a/tests/tests_pytorch/tuner/test_scale_batch_size.py b/tests/tests_pytorch/tuner/test_scale_batch_size.py index cd434fc0dcf8f..78727899b5c37 100644 --- a/tests/tests_pytorch/tuner/test_scale_batch_size.py +++ b/tests/tests_pytorch/tuner/test_scale_batch_size.py @@ -23,7 +23,6 @@ from pytorch_lightning import Trainer from pytorch_lightning.callbacks.batch_size_finder import BatchSizeFinder from pytorch_lightning.demos.boring_classes import BoringDataModule, BoringModel, RandomDataset -from pytorch_lightning.utilities import AMPType from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests_pytorch.helpers.runif import RunIf @@ -256,7 +255,6 @@ def test_auto_scale_batch_size_with_amp(tmpdir): ) trainer.tune(model) after_batch_size = model.batch_size - assert trainer.amp_backend == AMPType.NATIVE assert trainer.scaler is not None assert after_batch_size != before_batch_size diff --git a/tests/tests_pytorch/utilities/test_imports.py b/tests/tests_pytorch/utilities/test_imports.py index 29f221e717879..3a22e8aeb6e7f 100644 --- a/tests/tests_pytorch/utilities/test_imports.py +++ b/tests/tests_pytorch/utilities/test_imports.py @@ -23,8 +23,9 @@ from lightning_utilities.core.imports import compare_version, module_available, RequirementCache from torch.distributed import is_available +from pytorch_lightning.plugins.precision.apex_amp import _APEX_AVAILABLE from pytorch_lightning.strategies.bagua import _BAGUA_AVAILABLE -from pytorch_lightning.utilities import _APEX_AVAILABLE, _HOROVOD_AVAILABLE, _OMEGACONF_AVAILABLE, _POPTORCH_AVAILABLE +from pytorch_lightning.utilities import _HOROVOD_AVAILABLE, _OMEGACONF_AVAILABLE, _POPTORCH_AVAILABLE from tests_pytorch.helpers.runif import RunIf