13
13
# limitations under the License.
14
14
import os
15
15
from unittest import mock
16
- from unittest .mock import patch
17
16
18
17
import pytest
19
18
import torch
@@ -59,34 +58,29 @@ def test_multi_gpu_model_ddp_fit_test(tmpdir):
59
58
60
59
61
60
@RunIf (skip_windows = True )
62
- @pytest . mark . skipif ( torch . cuda . is_available (), reason = "test doesn't requires GPU machine" )
61
+ @mock . patch . dict ( os . environ , { "CUDA_VISIBLE_DEVICES" : "0,1" , "WORLD_SIZE" : "2" }, clear = True )
63
62
@mock .patch ("lightning_lite.utilities.device_parser.is_cuda_available" , return_value = True )
64
- def test_torch_distributed_backend_env_variables (tmpdir ):
63
+ @mock .patch ("lightning_lite.utilities.device_parser.num_cuda_devices" , return_value = 2 )
64
+ def test_torch_distributed_backend_invalid (_ , __ , tmpdir ):
65
65
"""This test set `undefined` as torch backend and should raise an `Backend.UNDEFINED` ValueError."""
66
- _environ = {"PL_TORCH_DISTRIBUTED_BACKEND" : "undefined" , "CUDA_VISIBLE_DEVICES" : "0,1" , "WORLD_SIZE" : "2" }
67
- with patch .dict (os .environ , _environ ), patch (
68
- "lightning_lite.utilities.device_parser.num_cuda_devices" , return_value = 2
69
- ):
70
- with pytest .deprecated_call (match = "Environment variable `PL_TORCH_DISTRIBUTED_BACKEND` was deprecated in v1.6" ):
71
- with pytest .raises (ValueError , match = "Invalid backend: 'undefined'" ):
72
- model = BoringModel ()
73
- trainer = Trainer (
74
- default_root_dir = tmpdir ,
75
- fast_dev_run = True ,
76
- strategy = "ddp" ,
77
- accelerator = "gpu" ,
78
- devices = 2 ,
79
- logger = False ,
80
- )
81
- trainer .fit (model )
66
+ model = BoringModel ()
67
+ trainer = Trainer (
68
+ default_root_dir = tmpdir ,
69
+ fast_dev_run = True ,
70
+ strategy = DDPStrategy (process_group_backend = "undefined" ),
71
+ accelerator = "cuda" ,
72
+ devices = 2 ,
73
+ logger = False ,
74
+ )
75
+ with pytest .raises (ValueError , match = "Invalid backend: 'undefined'" ):
76
+ trainer .fit (model )
82
77
83
78
84
79
@RunIf (skip_windows = True )
85
80
@mock .patch ("torch.cuda.set_device" )
86
81
@mock .patch ("lightning_lite.utilities.device_parser.is_cuda_available" , return_value = True )
87
82
@mock .patch ("lightning_lite.utilities.device_parser.num_cuda_devices" , return_value = 1 )
88
83
@mock .patch ("pytorch_lightning.accelerators.gpu.CUDAAccelerator.is_available" , return_value = True )
89
- @mock .patch .dict (os .environ , {"PL_TORCH_DISTRIBUTED_BACKEND" : "gloo" }, clear = True )
90
84
def test_ddp_torch_dist_is_available_in_setup (
91
85
mock_gpu_is_available , mock_device_count , mock_cuda_available , mock_set_device , tmpdir
92
86
):
@@ -98,10 +92,15 @@ def setup(self, stage: str) -> None:
98
92
raise SystemExit ()
99
93
100
94
model = TestModel ()
101
- trainer = Trainer (default_root_dir = tmpdir , fast_dev_run = True , strategy = "ddp" , accelerator = "gpu" , devices = 1 )
102
- with pytest .deprecated_call (match = "Environment variable `PL_TORCH_DISTRIBUTED_BACKEND` was deprecated in v1.6" ):
103
- with pytest .raises (SystemExit ):
104
- trainer .fit (model )
95
+ trainer = Trainer (
96
+ default_root_dir = tmpdir ,
97
+ fast_dev_run = True ,
98
+ strategy = DDPStrategy (process_group_backend = "gloo" ),
99
+ accelerator = "gpu" ,
100
+ devices = 1 ,
101
+ )
102
+ with pytest .raises (SystemExit ):
103
+ trainer .fit (model )
105
104
106
105
107
106
@RunIf (min_cuda_gpus = 2 , min_torch = "1.8.1" , standalone = True )
@@ -143,17 +142,15 @@ def on_train_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule")
143
142
144
143
145
144
@pytest .mark .parametrize (
146
- ["process_group_backend" , "env_var" , " device_str" , "expected_process_group_backend" ],
145
+ ["process_group_backend" , "device_str" , "expected_process_group_backend" ],
147
146
[
148
- pytest .param ("foo" , None , "cpu" , "foo" ),
149
- pytest .param ("foo" , "BAR" , "cpu" , "foo" ),
150
- pytest .param ("foo" , "BAR" , "cuda:0" , "foo" ),
151
- pytest .param (None , "BAR" , "cuda:0" , "BAR" ),
152
- pytest .param (None , None , "cuda:0" , "nccl" ),
153
- pytest .param (None , None , "cpu" , "gloo" ),
147
+ pytest .param ("foo" , "cpu" , "foo" ),
148
+ pytest .param ("foo" , "cuda:0" , "foo" ),
149
+ pytest .param (None , "cuda:0" , "nccl" ),
150
+ pytest .param (None , "cpu" , "gloo" ),
154
151
],
155
152
)
156
- def test_ddp_process_group_backend (process_group_backend , env_var , device_str , expected_process_group_backend ):
153
+ def test_ddp_process_group_backend (process_group_backend , device_str , expected_process_group_backend ):
157
154
"""Test settings for process group backend."""
158
155
159
156
class MockDDPStrategy (DDPStrategy ):
@@ -166,14 +163,7 @@ def root_device(self):
166
163
return self ._root_device
167
164
168
165
strategy = MockDDPStrategy (process_group_backend = process_group_backend , root_device = torch .device (device_str ))
169
- if not process_group_backend and env_var :
170
- with mock .patch .dict (os .environ , {"PL_TORCH_DISTRIBUTED_BACKEND" : env_var }):
171
- with pytest .deprecated_call (
172
- match = "Environment variable `PL_TORCH_DISTRIBUTED_BACKEND` was deprecated in v1.6"
173
- ):
174
- assert strategy ._get_process_group_backend () == expected_process_group_backend
175
- else :
176
- assert strategy ._get_process_group_backend () == expected_process_group_backend
166
+ assert strategy ._get_process_group_backend () == expected_process_group_backend
177
167
178
168
179
169
@pytest .mark .parametrize (
0 commit comments