Skip to content

Commit 35c65b0

Browse files
authored
Fix test suite when running on MPS-enabled hardware (#14708)
1 parent 5aaab22 commit 35c65b0

File tree

13 files changed

+71
-58
lines changed

13 files changed

+71
-58
lines changed

_notebooks

tests/tests_lite/accelerators/test_mps.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from tests_lite.helpers.runif import RunIf
1717

1818
from lightning_lite.accelerators.mps import MPSAccelerator
19+
from lightning_lite.utilities.exceptions import MisconfigurationException
1920

2021
_MAYBE_MPS = "mps" if MPSAccelerator.is_available() else "cpu" # torch.device(mps) only works on torch>=1.12
2122

@@ -39,11 +40,17 @@ def test_init_device_with_wrong_device_type():
3940
"devices,expected",
4041
[
4142
(1, [torch.device(_MAYBE_MPS, 0)]),
42-
(2, [torch.device(_MAYBE_MPS, 0), torch.device(_MAYBE_MPS, 1)]),
4343
([0], [torch.device(_MAYBE_MPS, 0)]),
44-
# TODO(lite): This case passes with the implementation from PL, but looks like a bug
45-
([0, 2], [torch.device(_MAYBE_MPS, 0), torch.device(_MAYBE_MPS, 1)]),
44+
("1", [torch.device(_MAYBE_MPS, 0)]),
45+
("0,", [torch.device(_MAYBE_MPS, 0)]),
4646
],
4747
)
4848
def test_get_parallel_devices(devices, expected):
4949
assert MPSAccelerator.get_parallel_devices(devices) == expected
50+
51+
52+
@RunIf(mps=True)
53+
@pytest.mark.parametrize("devices", [2, [0, 2], "2", "0,2"])
54+
def test_get_parallel_devices_invalid_request(devices):
55+
with pytest.raises(MisconfigurationException, match="But your machine only has"):
56+
MPSAccelerator.get_parallel_devices(devices)

tests/tests_pytorch/deprecated_api/test_remove_1-8.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,7 @@ def on_save_checkpoint(self, trainer, pl_module, checkpoint):
689689
def test_trainer_gpus(monkeypatch, trainer_kwargs):
690690
monkeypatch.setattr(device_parser, "is_cuda_available", lambda: True)
691691
monkeypatch.setattr(device_parser, "num_cuda_devices", lambda: 4)
692+
monkeypatch.setattr(device_parser, "_get_all_available_mps_gpus", lambda: list(range(4)))
692693
trainer = Trainer(**trainer_kwargs)
693694
with pytest.deprecated_call(
694695
match=(

tests/tests_pytorch/lite/test_wrappers.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def test_lite_dataloader_iterator():
204204
("cpu", "cpu"),
205205
pytest.param("cpu", "cuda:0", marks=RunIf(min_cuda_gpus=1)),
206206
pytest.param("cuda:0", "cpu", marks=RunIf(min_cuda_gpus=1)),
207-
pytest.param("cpu", "mps", marks=RunIf(mps=True)),
207+
# pytest.param("cpu", "mps", marks=RunIf(mps=True)), # TODO: Add once torch.equal is supported
208208
pytest.param("mps", "cpu", marks=RunIf(mps=True)),
209209
],
210210
)
@@ -222,12 +222,12 @@ def test_lite_dataloader_device_placement(src_device_str, dest_device_str):
222222
iterator = iter(lite_dataloader)
223223

224224
batch0 = next(iterator)
225-
# TODO: This should be torch.equal, but not supported on MPS at this time (torch 1.12)
226-
assert torch.allclose(batch0, torch.tensor([0, 1], device=dest_device))
225+
# TODO: torch.equal is not supported on MPS at this time (torch 1.12)
226+
assert torch.equal(batch0, torch.tensor([0, 1], device=dest_device))
227227

228228
batch1 = next(iterator)
229-
# TODO: This should be torch.equal, but not supported on MPS at this time (torch 1.12)
230-
assert torch.allclose(batch1["data"], torch.tensor([2, 3], device=dest_device))
229+
# TODO: torch.equal is not supported on MPS at this time (torch 1.12)
230+
assert torch.equal(batch1["data"], torch.tensor([2, 3], device=dest_device))
231231

232232

233233
def test_lite_optimizer_wraps():

tests/tests_pytorch/models/test_gpu.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ def device_count():
9191
monkeypatch.setattr(device_parser, "num_cuda_devices", device_count)
9292

9393

94-
# Asking for a gpu when non are available will result in a MisconfigurationException
9594
@pytest.mark.parametrize(
9695
["devices", "expected_root_gpu", "strategy"],
9796
[
@@ -104,8 +103,11 @@ def device_count():
104103
("-1", None, "ddp"),
105104
],
106105
)
107-
def test_root_gpu_property_0_raising(mocked_device_count_0, devices, expected_root_gpu, strategy):
108-
with pytest.raises(MisconfigurationException):
106+
@mock.patch("lightning_lite.accelerators.mps.MPSAccelerator.is_available", return_value=False)
107+
@mock.patch("lightning_lite.accelerators.cuda.CUDAAccelerator.is_available", return_value=False)
108+
def test_root_gpu_property_0_raising(_, __, devices, expected_root_gpu, strategy):
109+
"""Test that asking for a GPU when none are available will result in a MisconfigurationException."""
110+
with pytest.raises(MisconfigurationException, match="No supported gpu backend found!"):
109111
Trainer(accelerator="gpu", devices=devices, strategy=strategy)
110112

111113

tests/tests_pytorch/plugins/test_amp_plugins.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class MyApexPlugin(ApexMixedPrecisionPlugin):
3939
pass
4040

4141

42+
@RunIf(mps=False)
4243
@mock.patch.dict(
4344
os.environ,
4445
{

tests/tests_pytorch/plugins/test_cluster_integration.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ def test_ranks_available_manual_strategy_selection(mock_gpu_acc_available, strat
8787
)
8888
@mock.patch("lightning_lite.utilities.device_parser.is_cuda_available", return_value=True)
8989
@mock.patch("lightning_lite.utilities.device_parser.num_cuda_devices", return_value=4)
90-
def test_ranks_available_automatic_strategy_selection(mock0, mock1, trainer_kwargs):
90+
@mock.patch("lightning_lite.utilities.device_parser._get_all_available_mps_gpus", return_value=list(range(4)))
91+
def test_ranks_available_automatic_strategy_selection(_, __, ___, trainer_kwargs):
9192
"""Test that the rank information is readily available after Trainer initialization."""
9293
num_nodes = 2
9394
trainer_kwargs.update(num_nodes=num_nodes)

tests/tests_pytorch/strategies/test_ddp.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
import os
1514
from unittest import mock
1615

1716
import pytest
@@ -58,9 +57,8 @@ def test_multi_gpu_model_ddp_fit_test(tmpdir):
5857

5958

6059
@RunIf(skip_windows=True)
61-
@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1", "WORLD_SIZE": "2"}, clear=True)
62-
@mock.patch("lightning_lite.utilities.device_parser.is_cuda_available", return_value=True)
6360
@mock.patch("lightning_lite.utilities.device_parser.num_cuda_devices", return_value=2)
61+
@mock.patch("lightning_lite.utilities.device_parser._get_all_available_mps_gpus", return_value=list(range(2)))
6462
def test_torch_distributed_backend_invalid(_, __, tmpdir):
6563
"""This test set `undefined` as torch backend and should raise an `Backend.UNDEFINED` ValueError."""
6664
model = BoringModel()

tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,8 @@ def test_dist_backend_accelerator_mapping(*_):
214214

215215

216216
@mock.patch("lightning_lite.utilities.device_parser.num_cuda_devices", return_value=2)
217-
def test_ipython_incompatible_backend_error(_, monkeypatch):
217+
@mock.patch("lightning_lite.utilities.device_parser._get_all_available_mps_gpus", return_value=[0, 1])
218+
def test_ipython_incompatible_backend_error(_, __, monkeypatch):
218219
monkeypatch.setattr(pytorch_lightning.utilities, "_IS_INTERACTIVE", True)
219220
with pytest.raises(MisconfigurationException, match=r"strategy='ddp'\)`.*is not compatible"):
220221
Trainer(strategy="ddp", accelerator="gpu", devices=2)
@@ -252,6 +253,7 @@ def test_ipython_compatible_strategy_ddp_fork(monkeypatch):
252253
assert trainer.strategy.launcher.is_interactive_compatible
253254

254255

256+
@RunIf(mps=False)
255257
@pytest.mark.parametrize(
256258
["strategy", "strategy_class"],
257259
[
@@ -462,7 +464,7 @@ def test_strategy_choice_ddp_fork_cpu():
462464
@mock.patch("lightning_lite.utilities.device_parser.num_cuda_devices", return_value=2)
463465
@mock.patch("lightning_lite.utilities.device_parser.is_cuda_available", return_value=True)
464466
def test_strategy_choice_ddp(*_):
465-
trainer = Trainer(fast_dev_run=True, strategy="ddp", accelerator="gpu", devices=1)
467+
trainer = Trainer(fast_dev_run=True, strategy="ddp", accelerator="cuda", devices=1)
466468
assert isinstance(trainer.accelerator, CUDAAccelerator)
467469
assert isinstance(trainer.strategy, DDPStrategy)
468470
assert isinstance(trainer.strategy.cluster_environment, LightningEnvironment)
@@ -471,8 +473,8 @@ def test_strategy_choice_ddp(*_):
471473
@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
472474
@mock.patch("lightning_lite.utilities.device_parser.num_cuda_devices", return_value=2)
473475
@mock.patch("lightning_lite.utilities.device_parser.is_cuda_available", return_value=True)
474-
def test_strategy_choice_ddp_spawn(cuda_available_mock, device_count_mock):
475-
trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", accelerator="gpu", devices=1)
476+
def test_strategy_choice_ddp_spawn(*_):
477+
trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", accelerator="cuda", devices=1)
476478
assert isinstance(trainer.accelerator, CUDAAccelerator)
477479
assert isinstance(trainer.strategy, DDPSpawnStrategy)
478480
assert isinstance(trainer.strategy.cluster_environment, LightningEnvironment)
@@ -515,13 +517,10 @@ def test_strategy_choice_ddp_slurm(_, __, strategy, job_name, expected_env):
515517
"TORCHELASTIC_RUN_ID": "1",
516518
},
517519
)
518-
@mock.patch("torch.cuda.set_device")
519520
@mock.patch("lightning_lite.utilities.device_parser.num_cuda_devices", return_value=2)
520521
@mock.patch("lightning_lite.utilities.device_parser.is_cuda_available", return_value=True)
521-
@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True)
522-
@mock.patch("lightning_lite.utilities.device_parser.is_cuda_available", return_value=True)
523522
def test_strategy_choice_ddp_te(*_):
524-
trainer = Trainer(fast_dev_run=True, strategy="ddp", accelerator="gpu", devices=2)
523+
trainer = Trainer(fast_dev_run=True, strategy="ddp", accelerator="cuda", devices=2)
525524
assert isinstance(trainer.accelerator, CUDAAccelerator)
526525
assert isinstance(trainer.strategy, DDPStrategy)
527526
assert isinstance(trainer.strategy.cluster_environment, TorchElasticEnvironment)
@@ -562,12 +561,10 @@ def test_strategy_choice_ddp_cpu_te(*_):
562561
"RANK": "1",
563562
},
564563
)
565-
@mock.patch("torch.cuda.set_device")
566564
@mock.patch("lightning_lite.utilities.device_parser.num_cuda_devices", return_value=1)
567565
@mock.patch("lightning_lite.utilities.device_parser.is_cuda_available", return_value=True)
568-
@mock.patch("pytorch_lightning.strategies.DDPStrategy.setup_distributed", autospec=True)
569566
def test_strategy_choice_ddp_kubeflow(*_):
570-
trainer = Trainer(fast_dev_run=True, strategy="ddp", accelerator="gpu", devices=1)
567+
trainer = Trainer(fast_dev_run=True, strategy="ddp", accelerator="cuda", devices=1)
571568
assert isinstance(trainer.accelerator, CUDAAccelerator)
572569
assert isinstance(trainer.strategy, DDPStrategy)
573570
assert isinstance(trainer.strategy.cluster_environment, KubeflowEnvironment)
@@ -780,10 +777,10 @@ def test_gpu_accelerator_backend_choice(expected_accelerator_flag, expected_acce
780777
assert isinstance(trainer.accelerator, expected_accelerator_class)
781778

782779

780+
@RunIf(mps=False)
783781
@mock.patch("lightning_lite.utilities.device_parser.num_cuda_devices", return_value=1)
784782
def test_gpu_accelerator_backend_choice_cuda(_):
785783
trainer = Trainer(accelerator="gpu")
786-
787784
assert trainer._accelerator_connector._accelerator_flag == "cuda"
788785
assert isinstance(trainer.accelerator, CUDAAccelerator)
789786

tests/tests_pytorch/trainer/properties/test_auto_gpu_select.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def test_pick_multiple_gpus_more_than_available(*_):
4848
pick_multiple_gpus(3)
4949

5050

51+
@RunIf(mps=False)
5152
@mock.patch("lightning_lite.utilities.device_parser.num_cuda_devices", return_value=2)
5253
@mock.patch("pytorch_lightning.trainer.connectors.accelerator_connector.pick_multiple_gpus", return_value=[1])
5354
def test_auto_select_gpus(*_):

0 commit comments

Comments
 (0)