Avoid non-blocking GPU->CPU copies. (#11288)

edpizzi · Borda · rohitgr7 · rohitgr7 · commit 512ec4ffb591 · 2022-01-04T20:09:09.000+05:30
Co-authored-by: Jirka Borovec &lt;Borda@users.noreply.github.com&gt;
Co-authored-by: Rohit Gupta &lt;rohitgr1998@gmail.com&gt;
Co-authored-by: Justus Schock &lt;12886177+justusschock@users.noreply.github.com&gt;
Co-authored-by: Adrian Wälchli &lt;aedu.waelchli@gmail.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -385,6 +385,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed data fetcher selection ([#11294](https://github.com/PyTorchLightning/pytorch-lightning/pull/11294))
 
 
+- Fixed a race condition that could result in incorrect (zero) values being observed in prediction writer callbacks ([#11288](https://github.com/PyTorchLightning/pytorch-lightning/pull/11288))
+
+
 ## [1.5.7] - 2021-12-21
 
 ### Fixed
diff --git a/pytorch_lightning/utilities/apply_func.py b/pytorch_lightning/utilities/apply_func.py
@@ -35,6 +35,9 @@
     Batch = type(None)
 
 
+_CPU_DEVICES = ("cpu", torch.device("cpu"))
+
+
 def to_dtype_tensor(
     value: Union[int, float, List[Union[int, float]]], dtype: torch.dtype, device: Union[str, torch.device]
 ) -> torch.Tensor:
@@ -274,7 +277,10 @@ def batch_to(data: Any) -> Any:
                 setattr(device_data, field, device_field)
             return device_data
 
-        kwargs = dict(non_blocking=True) if isinstance(data, torch.Tensor) else {}
+        kwargs = {}
+        # Don't issue non-blocking transfers to CPU
+        if isinstance(data, torch.Tensor) and device not in _CPU_DEVICES:
+            kwargs["non_blocking"] = True
         data_output = data.to(device, **kwargs)
         if data_output is not None:
             return data_output