support more hardware platforms and no longer hard code cuda when call _get_default_process_group_backend_for_device

taozhiwei · taozhiwei · commit 116bd74de17e · 2025-08-13T07:43:45.000Z
Signed-off-by: taozhiwei &lt;taozhiweigis@163.com&gt;
diff --git a/src/lightning/fabric/utilities/distributed.py b/src/lightning/fabric/utilities/distributed.py
@@ -319,7 +319,11 @@ def _destroy_dist_connection() -> None:
 
 
 def _get_default_process_group_backend_for_device(device: torch.device) -> str:
-    return "nccl" if device.type == "cuda" else "gloo"
+    device_backend_map = torch.distributed.Backend.default_device_backend_map
+    if device.type in device_backend_map:
+        return device_backend_map[device.type]
+    else:
+        return "gloo"
 
 
 class _DatasetSamplerWrapper(Dataset):
diff --git a/tests/tests_fabric/utilities/test_distributed.py b/tests/tests_fabric/utilities/test_distributed.py
@@ -19,6 +19,7 @@
     _gather_all_tensors,
     _InfiniteBarrier,
     _init_dist_connection,
+    _get_default_process_group_backend_for_device,
     _is_dtensor,
     _set_num_threads_if_needed,
     _suggested_max_num_threads,
@@ -242,6 +243,24 @@ def test_init_dist_connection_registers_destruction_handler(_, atexit_mock):
     atexit_mock.register.assert_not_called()
 
 
+def test_get_default_process_group_backend_for_device():
+    # register a custom backend for test
+    torch.utils.rename_privateuse1_backend("pcu")
+    def mock_backend(store, group_rank, group_size, timeout):
+        pass
+    torch.distributed.Backend.register_backend(
+    "pccl",
+    lambda store, group_rank, group_size, timeout: mock_backend(store, group_rank, group_size, timeout
+    ),
+    devices=["pcu"])
+
+    # test that the default backend is correctly set for each device
+    devices = [torch.device("cpu"), torch.device("cuda:0"), torch.device("xpu:0"), torch.device("pcu:0")]
+    backends = ["gloo", "nccl", "xccl", "pccl"]
+    for device, backend in zip(devices, backends):
+        assert _get_default_process_group_backend_for_device(device) == backend
+
+
 @RunIf(min_torch="2.4")
 def test_is_dtensor(monkeypatch):
     from torch.distributed._tensor import DTensor