From b8bd5b35d6f724d663201f20ec493f47019eed63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Tue, 21 Feb 2023 17:09:41 +0100
Subject: [PATCH 1/4] Rename `replace_sampler_ddp` and `replace_sampler` to
 `use_distributed_sampler`

---
 docs/source-pytorch/common/trainer.rst        | 27 ++++------------
 .../fabric/api/fabric_methods.rst             |  2 +-
 src/lightning/fabric/fabric.py                | 20 +++++++-----
 src/lightning/fabric/utilities/distributed.py |  9 +++---
 .../connectors/accelerator_connector.py       |  4 +--
 .../trainer/connectors/data_connector.py      |  2 +-
 src/lightning/pytorch/trainer/trainer.py      | 15 +++++----
 tests/tests_fabric/test_fabric.py             | 10 +++---
 .../benchmarks/test_basic_parity.py           |  2 +-
 .../benchmarks/test_sync_batchnorm_parity.py  |  2 +-
 .../trainer/connectors/test_data_connector.py |  2 +-
 .../tests_pytorch/trainer/test_dataloaders.py |  2 +-
 .../utilities/test_combined_loader.py         | 32 ++++++++++---------
 13 files changed, 61 insertions(+), 68 deletions(-)
diff --git a/docs/source-pytorch/common/trainer.rst b/docs/source-pytorch/common/trainer.rst
index c9fb5344eb883..babd9737c3e22 100644
--- a/docs/source-pytorch/common/trainer.rst
+++ b/docs/source-pytorch/common/trainer.rst
@@ -1014,29 +1014,15 @@ The pseudocode applies also to the ``val_dataloader``.
 
 .. _replace-sampler-ddp:
 
-replace_sampler_ddp
-^^^^^^^^^^^^^^^^^^^
-
-.. raw:: html
-
-    <video width="50%" max-width="400px" controls
-    poster="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/trainer_flags/thumb/replace_sampler_ddp.jpg"
-    src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/pl_docs/trainer_flags/replace_sampler_ddp.mp4"></video>
-
-|
+use_distributed_sampler
+^^^^^^^^^^^^^^^^^^^^^^^
 
-Enables auto adding of :class:`~torch.utils.data.distributed.DistributedSampler`. In PyTorch, you must use it in
-distributed settings such as TPUs or multi-node. The sampler makes sure each GPU sees the appropriate part of your data.
-By default it will add ``shuffle=True`` for train sampler and ``shuffle=False`` for val/test sampler.
-If you already use a custom sampler, Lightning will wrap it in a way that it samples from your sampler in a distributed manner.
-If you want to customize it, you can set ``replace_sampler_ddp=False`` and add your own distributed sampler.
-If ``replace_sampler_ddp=True`` and a distributed sampler was already added,
-Lightning will not replace the existing one.
+See :paramref:`lightning.pytorch.Trainer.use_distributed_sampler`.
 
 .. testcode::
 
     # default used by the Trainer
-    trainer = Trainer(replace_sampler_ddp=True)
+    trainer = Trainer(use_distributed_sampler=True)
 
 By setting to False, you have to add your own distributed sampler:
 
@@ -1044,13 +1030,12 @@ By setting to False, you have to add your own distributed sampler:
 
     # in your LightningModule or LightningDataModule
     def train_dataloader(self):
+        dataset = ...
         # default used by the Trainer
-        sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=True)
+        sampler = torch.utils.data.DistributedSampler(dataset, shuffle=True)
         dataloader = DataLoader(dataset, batch_size=32, sampler=sampler)
         return dataloader
 
-.. note:: For iterable datasets, we don't do this automatically.
-
 
 strategy
 ^^^^^^^^
diff --git a/docs/source-pytorch/fabric/api/fabric_methods.rst b/docs/source-pytorch/fabric/api/fabric_methods.rst
index d6dcbc6d51968..d1ec33e7d22af 100644
--- a/docs/source-pytorch/fabric/api/fabric_methods.rst
+++ b/docs/source-pytorch/fabric/api/fabric_methods.rst
@@ -44,7 +44,7 @@ data tensors to the correct device automatically.
     train_data, test_data = fabric.setup_dataloaders(train_data, test_data, move_to_device=False)
 
     # If you don't want Fabric to replace the sampler in the context of distributed training
-    train_data, test_data = fabric.setup_dataloaders(train_data, test_data, replace_sampler=False)
+    train_data, test_data = fabric.setup_dataloaders(train_data, test_data, use_distributed_sampler=False)
 
 
 backward
diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py
index 47a3097244258..f05c22d93f9af 100644
--- a/src/lightning/fabric/fabric.py
+++ b/src/lightning/fabric/fabric.py
@@ -273,15 +273,16 @@ def setup_optimizers(self, *optimizers: Optimizer) -> Union[_FabricOptimizer, Tu
         return optimizers[0] if len(optimizers) == 1 else tuple(optimizers)
 
     def setup_dataloaders(
-        self, *dataloaders: DataLoader, replace_sampler: bool = True, move_to_device: bool = True
+        self, *dataloaders: DataLoader, use_distributed_sampler: bool = True, move_to_device: bool = True
     ) -> Union[DataLoader, List[DataLoader]]:
         """Set up one or multiple dataloaders for accelerated training. If you need different settings for each
         dataloader, call this method individually for each one.
 
         Args:
             *dataloaders: A single dataloader or a sequence of dataloaders.
-            replace_sampler: If set ``True`` (default), automatically wraps or replaces the sampler on the dataloader(s)
-                for distributed training. If you have a custom sampler defined, set this to this argument to ``False``.
+            use_distributed_sampler: If set ``True`` (default), automatically wraps or replaces the sampler on the
+                dataloader(s) for distributed training. If you have a custom sampler defined, set this to this argument
+                to ``False``.
             move_to_device: If set ``True`` (default), moves the data returned by the dataloader(s) automatically to
                 the correct device. Set this to ``False`` and alternatively use :meth:`to_device` manually on the
                 returned data.
@@ -291,21 +292,24 @@ def setup_dataloaders(
         """
         self._validate_setup_dataloaders(dataloaders)
         dataloaders = [
-            self._setup_dataloader(dataloader, replace_sampler=replace_sampler, move_to_device=move_to_device)
+            self._setup_dataloader(
+                dataloader, use_distributed_sampler=use_distributed_sampler, move_to_device=move_to_device
+            )
             for dataloader in dataloaders
         ]
         dataloaders = dataloaders[0] if len(dataloaders) == 1 else dataloaders
         return dataloaders  # type: ignore[return-value]
 
     def _setup_dataloader(
-        self, dataloader: DataLoader, replace_sampler: bool = True, move_to_device: bool = True
+        self, dataloader: DataLoader, use_distributed_sampler: bool = True, move_to_device: bool = True
     ) -> DataLoader:
         """Set up a single dataloader for accelerated training.
 
         Args:
             dataloader: The dataloader to accelerate.
-            replace_sampler: If set ``True`` (default), automatically wraps or replaces the sampler on the dataloader
-                for distributed training. If you have a custom sampler defined, set this to this argument to ``False``.
+            use_distributed_sampler: If set ``True`` (default), automatically wraps or replaces the sampler on the
+                dataloader for distributed training. If you have a custom sampler defined, set this to this argument to
+                ``False``.
             move_to_device: If set ``True`` (default), moves the data returned by the dataloader automatically to
                 the correct device. Set this to ``False`` and alternatively use :meth:`to_device` manually on the
                 returned data.
@@ -314,7 +318,7 @@ def _setup_dataloader(
             The wrapped dataloader.
         """
         sampler = dataloader.sampler
-        if replace_sampler and self._requires_distributed_sampler(dataloader):
+        if use_distributed_sampler and self._requires_distributed_sampler(dataloader):
             sampler = self._get_distributed_sampler(dataloader, **self._strategy.distributed_sampler_kwargs)
 
         # the dataloader needs to be re-instantiated because we want to update the input arguments (e.g., sampler)
diff --git a/src/lightning/fabric/utilities/distributed.py b/src/lightning/fabric/utilities/distributed.py
index f4cfc201aed5f..2b574e020b4d9 100644
--- a/src/lightning/fabric/utilities/distributed.py
+++ b/src/lightning/fabric/utilities/distributed.py
@@ -257,7 +257,6 @@ def _get_default_process_group_backend_for_device(device: torch.device) -> str:
     return "nccl" if device.type == "cuda" else "gloo"
 
 
-# TODO(fabric): The error messages refer to 'replace_sampler_ddp' in PL but Fabric has it named 'replace_sampler'
 class _DatasetSamplerWrapper(Dataset):
     """Dataset to create indexes from `Sampler` or `Iterable`"""
 
@@ -266,19 +265,19 @@ def __init__(self, sampler: Union[Sampler, Iterable]) -> None:
             raise TypeError(
                 "You seem to have configured a sampler in your DataLoader which"
                 " does not provide `__len__` method. The sampler was about to be"
-                " replaced by `DistributedSamplerWrapper` since `replace_sampler_ddp`"
+                " replaced by `DistributedSamplerWrapper` since `use_distributed_sampler`"
                 " is True and you are using distributed training. Either provide `__len__`"
-                " method in your sampler, remove it from DataLoader or set `replace_sampler_ddp=False`"
+                " method in your sampler, remove it from DataLoader or set `use_distributed_sampler=False`"
                 " if you want to handle distributed sampling yourself."
             )
         if len(sampler) == float("inf"):
             raise TypeError(
                 "You seem to have configured a sampler in your DataLoader which"
                 " does not provide finite `__len__` method. The sampler was about to be"
-                " replaced by `DistributedSamplerWrapper` since `replace_sampler_ddp`"
+                " replaced by `DistributedSamplerWrapper` since `use_distributed_sampler`"
                 " is True and you are using distributed training. Either provide `__len__`"
                 " method in your sampler which returns a finite number, remove it from DataLoader"
-                " or set `replace_sampler_ddp=False` if you want to handle distributed sampling yourself."
+                " or set `use_distributed_sampler=False` if you want to handle distributed sampling yourself."
             )
         self._sampler = sampler
         # defer materializing an iterator until it is necessary
diff --git a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py
index ae13d4ee59c3e..da5aefb27c941 100644
--- a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py
+++ b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py
@@ -88,7 +88,7 @@ def __init__(
         precision: _PRECISION_INPUT = "32-true",
         sync_batchnorm: bool = False,
         benchmark: Optional[bool] = None,
-        replace_sampler_ddp: bool = True,
+        use_distributed_sampler: bool = True,
         deterministic: Optional[Union[bool, _LITERAL_WARN]] = None,
     ) -> None:
         """The AcceleratorConnector parses several Trainer arguments and instantiates the Strategy including other
@@ -120,7 +120,7 @@ def __init__(
             A. Class > str
             B. Strategy > Accelerator/precision/plugins
         """
-        self.replace_sampler_ddp = replace_sampler_ddp
+        self.use_distributed_sampler = use_distributed_sampler
         _set_torch_flags(deterministic=deterministic, benchmark=benchmark)
 
         # 1. Parsing flags
diff --git a/src/lightning/pytorch/trainer/connectors/data_connector.py b/src/lightning/pytorch/trainer/connectors/data_connector.py
index 10f6eec96ae3f..74906d573bb38 100644
--- a/src/lightning/pytorch/trainer/connectors/data_connector.py
+++ b/src/lightning/pytorch/trainer/connectors/data_connector.py
@@ -229,7 +229,7 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None:
 
     def _requires_distributed_sampler(self, dataloader: DataLoader) -> bool:
         return (
-            self.trainer._accelerator_connector.replace_sampler_ddp
+            self.trainer._accelerator_connector.use_distributed_sampler
             and self.trainer._accelerator_connector.is_distributed
             and not isinstance(dataloader.sampler, DistributedSampler)
             and not has_iterable_dataset(dataloader)
diff --git a/src/lightning/pytorch/trainer/trainer.py b/src/lightning/pytorch/trainer/trainer.py
index eca416bbb8d72..9029ace72c953 100644
--- a/src/lightning/pytorch/trainer/trainer.py
+++ b/src/lightning/pytorch/trainer/trainer.py
@@ -123,7 +123,7 @@ def __init__(
         benchmark: Optional[bool] = None,
         deterministic: Optional[Union[bool, _LITERAL_WARN]] = None,
         reload_dataloaders_every_n_epochs: int = 0,
-        replace_sampler_ddp: bool = True,
+        use_distributed_sampler: bool = True,
         detect_anomaly: bool = False,
         plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None,
         inference_mode: bool = True,
@@ -251,10 +251,13 @@ def __init__(
             reload_dataloaders_every_n_epochs: Set to a non-negative integer to reload dataloaders every n epochs.
                 Default: ``0``.
 
-            replace_sampler_ddp: Explicitly enables or disables sampler replacement. If not specified this
-                will toggled automatically when DDP is used. By default it will add ``shuffle=True`` for
-                train sampler and ``shuffle=False`` for val/test sampler. If you want to customize it,
-                you can set ``replace_sampler_ddp=False`` and add your own distributed sampler.
+            use_distributed_sampler: Whether to wrap the DataLoader's sampler with
+                :class:`torch.utils.data.DistributedSampler`. If not specified this is toggled automatically for
+                strategies that require it. By default, it will add ``shuffle=True`` for the train sampler and
+                ``shuffle=False`` for validation/test/predict samplers. If you want to disable this logic, you can pass
+                ``False`` and add your own distributed sampler in the dataloader hooks. If ``True`` and a distributed
+                sampler was already added, Lightning will not replace the existing one. For iterable-style datasets,
+                we don't do this automatically.
 
             strategy: Supports different training strategies with aliases
                 as well custom strategies.
@@ -293,7 +296,7 @@ def __init__(
             num_nodes=num_nodes,
             sync_batchnorm=sync_batchnorm,
             benchmark=benchmark,
-            replace_sampler_ddp=replace_sampler_ddp,
+            use_distributed_sampler=use_distributed_sampler,
             deterministic=deterministic,
             precision=precision,
             plugins=plugins,
diff --git a/tests/tests_fabric/test_fabric.py b/tests/tests_fabric/test_fabric.py
index 803f614ef6066..a57d9907a86cc 100644
--- a/tests/tests_fabric/test_fabric.py
+++ b/tests/tests_fabric/test_fabric.py
@@ -362,13 +362,13 @@ def test_setup_dataloaders_move_to_device(fabric_device_mock):
 
 
 def test_setup_dataloaders_distributed_sampler_not_needed():
-    """Test that replace_sampler option has no effect when no distributed sampler is needed."""
+    """Test that `use_distributed_sampler` option has no effect when no distributed sampler is needed."""
     custom_sampler = Mock(spec=Sampler)
     dataloader = DataLoader(Mock(), sampler=custom_sampler)
 
     # keep the custom sampler when not needed to replace
     fabric = EmptyFabric()
-    fabric_dataloader = fabric.setup_dataloaders(dataloader, replace_sampler=True)
+    fabric_dataloader = fabric.setup_dataloaders(dataloader, use_distributed_sampler=True)
     assert fabric_dataloader.sampler is custom_sampler
 
 
@@ -469,10 +469,10 @@ def test_setup_dataloaders_replace_custom_sampler(strategy):
     fabric = EmptyFabric(accelerator="cpu", strategy=strategy, devices=2)
     if hasattr(fabric.strategy, "distributed_sampler_kwargs"):
         with pytest.raises(TypeError, match="You seem to have configured a sampler in your DataLoader"):
-            fabric.setup_dataloaders(dataloader, replace_sampler=True)
+            fabric.setup_dataloaders(dataloader, use_distributed_sampler=True)
 
-    # setting `replace_sampler=False` leaves the sampler untouched
-    fabric_dataloader = fabric.setup_dataloaders(dataloader, replace_sampler=False)
+    # setting `use_distributed_sampler=False` leaves the sampler untouched
+    fabric_dataloader = fabric.setup_dataloaders(dataloader, use_distributed_sampler=False)
     assert fabric_dataloader.sampler is custom_sampler
 
 
diff --git a/tests/tests_pytorch/benchmarks/test_basic_parity.py b/tests/tests_pytorch/benchmarks/test_basic_parity.py
index 623d43f4a47e8..ee45332ce15dd 100644
--- a/tests/tests_pytorch/benchmarks/test_basic_parity.py
+++ b/tests/tests_pytorch/benchmarks/test_basic_parity.py
@@ -161,7 +161,7 @@ def lightning_loop(cls_model, idx, device_type: str = "cuda", num_epochs=10):
         accelerator="gpu" if device_type == "cuda" else "cpu",
         devices=1,
         logger=False,
-        replace_sampler_ddp=False,
+        use_distributed_sampler=False,
         benchmark=False,
     )
     trainer.fit(model)
diff --git a/tests/tests_pytorch/benchmarks/test_sync_batchnorm_parity.py b/tests/tests_pytorch/benchmarks/test_sync_batchnorm_parity.py
index bcbaea67148e1..c8a969f84bf54 100644
--- a/tests/tests_pytorch/benchmarks/test_sync_batchnorm_parity.py
+++ b/tests/tests_pytorch/benchmarks/test_sync_batchnorm_parity.py
@@ -65,7 +65,7 @@ def test_sync_batchnorm_parity(tmpdir):
         max_steps=3,
         sync_batchnorm=True,
         num_sanity_val_steps=0,
-        replace_sampler_ddp=False,
+        use_distributed_sampler=False,
         deterministic=True,
         benchmark=False,
         enable_progress_bar=False,
diff --git a/tests/tests_pytorch/trainer/connectors/test_data_connector.py b/tests/tests_pytorch/trainer/connectors/test_data_connector.py
index de8fb78218db3..6fcf331831601 100644
--- a/tests/tests_pytorch/trainer/connectors/test_data_connector.py
+++ b/tests/tests_pytorch/trainer/connectors/test_data_connector.py
@@ -211,7 +211,7 @@ def __init__(self, num_feat, dataset, **kwargs):
 
 
 def test_update_dataloader_with_multiprocessing_context():
-    """This test verifies that replace_sampler conserves multiprocessing context."""
+    """This test verifies that `use_distributed_sampler` conserves multiprocessing context."""
     train = RandomDataset(32, 64)
     context = "spawn"
     train = DataLoader(train, batch_size=32, num_workers=2, multiprocessing_context=context, shuffle=True)
diff --git a/tests/tests_pytorch/trainer/test_dataloaders.py b/tests/tests_pytorch/trainer/test_dataloaders.py
index f945168e7028f..1e13fc52dbc5e 100644
--- a/tests/tests_pytorch/trainer/test_dataloaders.py
+++ b/tests/tests_pytorch/trainer/test_dataloaders.py
@@ -829,7 +829,7 @@ def test_dataloader_distributed_sampler_already_attached(tmpdir):
         default_root_dir=tmpdir,
         max_steps=100,
         callbacks=[DistribSamplerCallback(expected_seeds=(11, 123, 0))],
-        replace_sampler_ddp=True,
+        use_distributed_sampler=True,
     )
     trainer.fit(model)
     assert trainer.state.finished, "DDP Training failed"
diff --git a/tests/tests_pytorch/utilities/test_combined_loader.py b/tests/tests_pytorch/utilities/test_combined_loader.py
index bb782ddadc3e4..b8e79a053d91a 100644
--- a/tests/tests_pytorch/utilities/test_combined_loader.py
+++ b/tests/tests_pytorch/utilities/test_combined_loader.py
@@ -267,8 +267,8 @@ def __len__(self):
     assert seen == max(x, y)
 
 
-@pytest.mark.parametrize("replace_sampler_ddp", [False, True])
-def test_combined_data_loader_validation_test(replace_sampler_ddp):
+@pytest.mark.parametrize("use_distributed_sampler", (False, True))
+def test_combined_data_loader_validation_test(use_distributed_sampler):
     """This test makes sure distributed sampler has been properly injected in dataloaders when using
     CombinedLoader."""
 
@@ -297,7 +297,7 @@ def __init__(self, data_source, name) -> None:
         }
     )
     model = BoringModel()
-    trainer = Trainer(replace_sampler_ddp=replace_sampler_ddp, strategy="ddp", accelerator="cpu", devices=2)
+    trainer = Trainer(use_distributed_sampler=use_distributed_sampler, strategy="ddp", accelerator="cpu", devices=2)
     trainer.strategy.connect(model)
     trainer._data_connector.attach_data(model, train_dataloaders=combined_loader)
     trainer.state.fn = "fit"
@@ -306,7 +306,7 @@ def __init__(self, data_source, name) -> None:
 
     samplers_flattened = tree_flatten(combined_loader.sampler)[0]
     assert len(samplers_flattened) == 6
-    if replace_sampler_ddp:
+    if use_distributed_sampler:
         assert all(isinstance(s, DistributedSampler) for s in samplers_flattened)
     else:
         assert all(isinstance(s, (SequentialSampler, CustomSampler)) for s in samplers_flattened)
@@ -317,11 +317,13 @@ def __init__(self, data_source, name) -> None:
 
 
 @pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_cuda_gpus=2))])
-@pytest.mark.parametrize("replace_sampler_ddp", [False, True])
-def test_combined_data_loader_with_max_size_cycle_and_ddp(accelerator, replace_sampler_ddp):
+@pytest.mark.parametrize("use_distributed_sampler", (False, True))
+def test_combined_data_loader_with_max_size_cycle_and_ddp(accelerator, use_distributed_sampler):
     """This test makes sure distributed sampler has been properly injected in dataloaders when using CombinedLoader
     with ddp and `max_size_cycle` mode."""
-    trainer = Trainer(strategy="ddp", accelerator=accelerator, devices=2, replace_sampler_ddp=replace_sampler_ddp)
+    trainer = Trainer(
+        strategy="ddp", accelerator=accelerator, devices=2, use_distributed_sampler=use_distributed_sampler
+    )
     model = BoringModel()
 
     combined_loader = CombinedLoader(
@@ -333,7 +335,7 @@ def test_combined_data_loader_with_max_size_cycle_and_ddp(accelerator, replace_s
     trainer._data_connector.attach_data(model, train_dataloaders=combined_loader)
     trainer.fit_loop.setup_data()
 
-    assert len(combined_loader) == 4 if replace_sampler_ddp else 8
+    assert len(combined_loader) == 4 if use_distributed_sampler else 8
 
     for a_length in [6, 8, 10]:
         combined_loader = CombinedLoader(
@@ -350,8 +352,8 @@ def test_combined_data_loader_with_max_size_cycle_and_ddp(accelerator, replace_s
         trainer._data_connector.attach_data(model, train_dataloaders=combined_loader)
         trainer.fit_loop.setup_data(shuffle=False)
 
-        assert len(combined_loader) == length // 2 if replace_sampler_ddp else length
-        if replace_sampler_ddp:
+        assert len(combined_loader) == length // 2 if use_distributed_sampler else length
+        if use_distributed_sampler:
             last_batch = list(combined_loader)[-1]
             if a_length == 6:
                 assert last_batch == {"a": torch.tensor([0]), "b": torch.tensor([6])}
@@ -379,14 +381,14 @@ def __iter__(self):
     trainer._data_connector.attach_data(model, train_dataloaders=combined_loader)
     trainer.fit_loop.setup_data()
 
-    assert len(combined_loader.iterables["b"]) == 4 if replace_sampler_ddp else 8
+    assert len(combined_loader.iterables["b"]) == 4 if use_distributed_sampler else 8
     with pytest.raises(NotImplementedError, match="DataLoader` does not define `__len__"):
         len(combined_loader)
 
 
-@pytest.mark.parametrize("replace_sampler_ddp", [False, True])
+@pytest.mark.parametrize("use_distributed_sampler", (False, True))
 @pytest.mark.parametrize("mode", ("min_size", "max_size_cycle", "sequential"))
-def test_combined_dataloader_for_training_with_ddp(replace_sampler_ddp, mode, mps_count_0):
+def test_combined_dataloader_for_training_with_ddp(use_distributed_sampler, mode, mps_count_0):
     """When providing a CombinedLoader as the training data, it should be correctly receive the distributed
     samplers."""
     dim = 3
@@ -403,7 +405,7 @@ def test_combined_dataloader_for_training_with_ddp(replace_sampler_ddp, mode, mp
         strategy="ddp",
         accelerator="auto",
         devices="auto",
-        replace_sampler_ddp=replace_sampler_ddp,
+        use_distributed_sampler=use_distributed_sampler,
     )
     trainer.strategy.connect(model)
     trainer._data_connector.attach_data(model=model, train_dataloaders=dataloader)
@@ -411,7 +413,7 @@ def test_combined_dataloader_for_training_with_ddp(replace_sampler_ddp, mode, mp
     expected_length_before_ddp = fn([n1, n2])
     expected_length_after_ddp = (
         math.ceil(expected_length_before_ddp / trainer.num_devices)
-        if replace_sampler_ddp
+        if use_distributed_sampler
         else expected_length_before_ddp
     )
     trainer.state.stage = "train"

From 9aec84813903900b8af606eda3e09aad2da50af2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Tue, 21 Feb 2023 18:49:28 +0100
Subject: [PATCH 2/4] CHANGELOG

---
 src/lightning/fabric/CHANGELOG.md  | 1 +
 src/lightning/pytorch/CHANGELOG.md | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/src/lightning/fabric/CHANGELOG.md b/src/lightning/fabric/CHANGELOG.md
index 919704929b405..b30bbdc64a765 100644
--- a/src/lightning/fabric/CHANGELOG.md
+++ b/src/lightning/fabric/CHANGELOG.md
@@ -42,6 +42,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - The selection `Fabric(strategy="ddp_spawn", ...)` no longer falls back to "ddp" when a cluster environment gets detected ([#16780](https://github.com/Lightning-AI/lightning/pull/16780))
 
+- Renamed `setup_dataloaders(replace_sampler=...)` to `setup_dataloaders(use_distributed_sampler=...)` ([#16829](https://github.com/Lightning-AI/lightning/pull/16829))
 
 ### Deprecated
 
diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md
index 502d92bc38f67..1e27ad5b18f62 100644
--- a/src/lightning/pytorch/CHANGELOG.md
+++ b/src/lightning/pytorch/CHANGELOG.md
@@ -93,6 +93,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Renamed `CombinedLoader.loaders` to `CombinedLoader.iterables` ([#16743](https://github.com/Lightning-AI/lightning/pull/16743))
 
 
+- Renamed `Trainer(replace_sampler_ddp=...)` to `Trainer(use_distributed_sampler=...)` ([#16829](https://github.com/Lightning-AI/lightning/pull/16829))
+
+
 - Moved the `CombinedLoader` class from `lightning.pytorch.trainer.supporters` to `lightning.pytorch.combined_loader` ([#16819](https://github.com/Lightning-AI/lightning/pull/16819))
 
 

From cc0e43d34c4bc3863e5eb9450e4aaf9b692dc285 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Tue, 21 Feb 2023 18:53:42 +0100
Subject: [PATCH 3/4] fix paramref?

---
 docs/source-pytorch/common/trainer.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source-pytorch/common/trainer.rst b/docs/source-pytorch/common/trainer.rst
index babd9737c3e22..116f026e0d403 100644
--- a/docs/source-pytorch/common/trainer.rst
+++ b/docs/source-pytorch/common/trainer.rst
@@ -1017,7 +1017,7 @@ The pseudocode applies also to the ``val_dataloader``.
 use_distributed_sampler
 ^^^^^^^^^^^^^^^^^^^^^^^
 
-See :paramref:`lightning.pytorch.Trainer.use_distributed_sampler`.
+See :paramref:`pytorch_lightning.trainer.Trainer.params.use_distributed_sampler`.
 
 .. testcode::
 

From ef97699a748e2a7afa60291690ad165c7f5d61d4 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <6035284+Borda@users.noreply.github.com>
Date: Wed, 22 Feb 2023 13:16:39 +0100
Subject: [PATCH 4/4] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com>
---
 src/lightning/fabric/fabric.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py
index f05c22d93f9af..2f6cc26180a5c 100644
--- a/src/lightning/fabric/fabric.py
+++ b/src/lightning/fabric/fabric.py
@@ -281,7 +281,7 @@ def setup_dataloaders(
         Args:
             *dataloaders: A single dataloader or a sequence of dataloaders.
             use_distributed_sampler: If set ``True`` (default), automatically wraps or replaces the sampler on the
-                dataloader(s) for distributed training. If you have a custom sampler defined, set this to this argument
+                dataloader(s) for distributed training. If you have a custom sampler defined, set this argument
                 to ``False``.
             move_to_device: If set ``True`` (default), moves the data returned by the dataloader(s) automatically to
                 the correct device. Set this to ``False`` and alternatively use :meth:`to_device` manually on the
@@ -308,7 +308,7 @@ def _setup_dataloader(
         Args:
             dataloader: The dataloader to accelerate.
             use_distributed_sampler: If set ``True`` (default), automatically wraps or replaces the sampler on the
-                dataloader for distributed training. If you have a custom sampler defined, set this to this argument to
+                dataloader for distributed training. If you have a custom sampler defined, set this argument to
                 ``False``.
             move_to_device: If set ``True`` (default), moves the data returned by the dataloader automatically to
                 the correct device. Set this to ``False`` and alternatively use :meth:`to_device` manually on the