Skip to content

Commit 9351d41

Browse files
ethanwharrisBorda
authored andcommitted
[App] Fix multi-node pytorch example CI (#15753)
(cherry picked from commit bc797fd)
1 parent 49f3da7 commit 9351d41

File tree

4 files changed

+5
-5
lines changed

4 files changed

+5
-5
lines changed

.github/checkgroup.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ subprojects:
244244
- ".github/workflows/ci-app-examples.yml"
245245
- "src/lightning_app/**"
246246
- "tests/tests_app_examples/**"
247-
- "examples/app_*"
247+
- "examples/app_*/**"
248248
- "requirements/app/**"
249249
- "setup.py"
250250
- ".actions/**"

.github/workflows/ci-app-examples.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ on:
1111
- ".github/workflows/ci-app-examples.yml"
1212
- "src/lightning_app/**"
1313
- "tests/tests_app_examples/**"
14-
- "examples/app_*"
14+
- "examples/app_*/**"
1515
- "requirements/app/**"
1616
- "setup.py"
1717
- ".actions/**"

docs/source-app/levels/basic/hello_components/pt_multinode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def distributed_train(local_rank: int, main_address: str, main_port: int, num_no
2222
# 2. PREPARE DISTRIBUTED MODEL
2323
model = torch.nn.Linear(32, 2)
2424
device = torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else torch.device("cpu")
25-
model = DistributedDataParallel(model, device_ids=[local_rank]).to(device)
25+
model = DistributedDataParallel(model, device_ids=[local_rank] if torch.cuda.is_available() else None).to(device)
2626

2727
# 3. SETUP LOSS AND OPTIMIZER
2828
criterion = torch.nn.MSELoss()

examples/app_multi_node/train_pytorch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def distributed_train(local_rank: int, main_address: str, main_port: int, num_no
2323
# 2. PREPARE DISTRIBUTED MODEL
2424
model = torch.nn.Linear(32, 2)
2525
device = torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else torch.device("cpu")
26-
model = DistributedDataParallel(model, device_ids=[local_rank]).to(device)
26+
model = DistributedDataParallel(model, device_ids=[local_rank] if torch.cuda.is_available() else None).to(device)
2727

2828
# 3. SETUP LOSS AND OPTIMIZER
2929
criterion = torch.nn.MSELoss()
@@ -55,7 +55,7 @@ def run(self, main_address: str, main_port: int, num_nodes: int, node_rank: int)
5555
)
5656

5757

58-
# 32 GPUs: (8 nodes x 4 v 100)
58+
# 8 GPUs: (2 nodes x 4 v 100)
5959
compute = L.CloudCompute("gpu-fast-multi") # 4xV100
6060
component = MultiNode(PyTorchDistributed, num_nodes=2, cloud_compute=compute)
6161
app = L.LightningApp(component)

0 commit comments

Comments
 (0)