Keep the output type

carmocca · carmocca · commit 523f44fe8bee · 2021-12-01T18:09:42.000+01:00
diff --git a/pytorch_lightning/loops/optimization/closure.py b/pytorch_lightning/loops/optimization/closure.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Any, Dict, Generic, Optional, TypeVar
+from typing import Any, Generic, Optional, TypeVar
 
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 
@@ -22,7 +22,7 @@
 
 @dataclass
 class OutputResult:
-    def asdict(self) -> Dict[str, Any]:
+    def get(self) -> Any:
         raise NotImplementedError
 
 
diff --git a/pytorch_lightning/loops/optimization/manual_loop.py b/pytorch_lightning/loops/optimization/manual_loop.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from dataclasses import dataclass, field
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 
 from torch import Tensor
 
@@ -31,15 +31,19 @@ class ManualResult(OutputResult):
 
     Attributes:
         extra: Anything returned by the ``training_step``.
+        was_dict: Whether the training step output was a dictionary.
     """
 
     extra: Dict[str, Any] = field(default_factory=dict)
+    was_dict: bool = False
 
     @classmethod
     def from_training_step_output(cls, training_step_output: Optional[STEP_OUTPUT]) -> "ManualResult":
-        extra = {}
+        extra, was_dict = {}, False
+
         if isinstance(training_step_output, dict):
             extra = {k: v for k, v in training_step_output.items() if k != "hiddens"}
+            was_dict = True
         elif isinstance(training_step_output, Tensor):
             extra = {"loss": training_step_output}
         elif training_step_output is not None:
@@ -52,13 +56,15 @@ def from_training_step_output(cls, training_step_output: Optional[STEP_OUTPUT])
             # we detach manually as it's expected that it will have a `grad_fn`
             extra["loss"] = extra["loss"].detach()
 
-        return cls(extra=extra)
+        return cls(extra=extra, was_dict=was_dict)
 
-    def asdict(self) -> Dict[str, Any]:
-        return self.extra
+    def get(self) -> Union[Optional[Tensor], Dict[str, Any]]:
+        if self.was_dict:
+            return self.extra
+        return self.extra.get("loss")
 
 
-_OUTPUTS_TYPE = Dict[str, Any]
+_OUTPUTS_TYPE = Union[Optional[Tensor], Dict[str, Any]]
 
 
 class ManualOptimization(Loop[_OUTPUTS_TYPE]):
@@ -122,7 +128,7 @@ def advance(self, batch: Any, batch_idx: int) -> None:  # type: ignore[override]
                 self.trainer._results.cpu()
 
         self._done = True
-        self._output = result.asdict()
+        self._output = result.get()
 
     def on_run_end(self) -> _OUTPUTS_TYPE:
         """Returns the result of this loop, i.e., the post-processed outputs from the training step."""
diff --git a/pytorch_lightning/loops/optimization/optimizer_loop.py b/pytorch_lightning/loops/optimization/optimizer_loop.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 from dataclasses import dataclass, field
 from functools import partial
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
 from torch import Tensor
@@ -48,11 +48,13 @@ class ClosureResult(OutputResult):
         closure_loss: The loss with a graph attached.
         loss: A detached copy of the closure loss.
         extra: Any keys other than the loss returned.
+        was_dict: Whether the training step output was a dictionary.
     """
 
     closure_loss: Optional[Tensor]
     loss: Optional[Tensor] = field(init=False, default=None)
     extra: Dict[str, Any] = field(default_factory=dict)
+    was_dict: bool = False
 
     def __post_init__(self) -> None:
         self._clone_loss()
@@ -68,6 +70,7 @@ def from_training_step_output(
     ) -> "ClosureResult":
         closure_loss, extra = None, {}
 
+        was_dict = False
         if isinstance(training_step_output, dict):
             # this should not modify the `training_step_output`, as the user could be using it after `training_step_end`
             closure_loss = training_step_output.get("loss")
@@ -76,6 +79,7 @@ def from_training_step_output(
                     "In automatic_optimization, when `training_step` returns a dict, the 'loss' key needs to be present"
                 )
             extra = {k: v for k, v in training_step_output.items() if k not in ("loss", "hiddens")}
+            was_dict = True
         elif isinstance(training_step_output, Tensor):
             closure_loss = training_step_output
         elif training_step_output is not None:
@@ -89,10 +93,12 @@ def from_training_step_output(
             # note: avoid in-place operation `x /= y` here on purpose
             closure_loss = closure_loss / normalize
 
-        return cls(closure_loss, extra=extra)
+        return cls(closure_loss, extra=extra, was_dict=was_dict)
 
-    def asdict(self) -> Dict[str, Any]:
-        return {"loss": self.loss, **self.extra}
+    def get(self) -> Union[Optional[Tensor], Dict[str, Any]]:
+        if self.was_dict:
+            return {"loss": self.loss, **self.extra}
+        return self.loss
 
 
 class Closure(AbstractClosure[ClosureResult]):
@@ -158,7 +164,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> Optional[Tensor]:
         return self._result.loss
 
 
-_OUTPUTS_TYPE = Dict[int, Dict[str, Any]]
+_OUTPUTS_TYPE = Dict[int, Union[Optional[Tensor], Dict[str, Any]]]
 
 
 class OptimizerLoop(Loop[_OUTPUTS_TYPE]):
@@ -218,7 +224,7 @@ def advance(self, batch: Any, *args: Any, **kwargs: Any) -> None:  # type: ignor
         if result.loss is not None:
             # automatic optimization assumes a loss needs to be returned for extras to be considered as the batch
             # would be skipped otherwise
-            self._outputs[self.optimizer_idx] = result.asdict()
+            self._outputs[self.optimizer_idx] = result.get()
         self.optim_progress.optimizer_position += 1
 
     def on_run_end(self) -> _OUTPUTS_TYPE:
diff --git a/tests/loops/optimization/test_manual_loop.py b/tests/loops/optimization/test_manual_loop.py
@@ -23,7 +23,7 @@
 def test_manual_result():
     training_step_output = {"loss": torch.tensor(25.0, requires_grad=True), "something": "jiraffe"}
     result = ManualResult.from_training_step_output(training_step_output)
-    asdict = result.asdict()
+    asdict = result.get()
     assert not asdict["loss"].requires_grad
     assert asdict["loss"] == 25
     assert result.extra == asdict
diff --git a/tests/loops/optimization/test_optimizer_loop.py b/tests/loops/optimization/test_optimizer_loop.py
@@ -28,13 +28,13 @@
 
 def test_closure_result_deepcopy():
     closure_loss = torch.tensor(123.45)
-    result = ClosureResult(closure_loss)
+    result = ClosureResult(closure_loss, was_dict=True)
 
     assert closure_loss.data_ptr() == result.closure_loss.data_ptr()
     # the `loss` is cloned so the storage is different
     assert closure_loss.data_ptr() != result.loss.data_ptr()
 
-    copy = result.asdict()
+    copy = result.get()
     assert result.loss == copy["loss"]
     assert copy.keys() == {"loss"}
 
diff --git a/tests/loops/test_evaluation_loop_flow.py b/tests/loops/test_evaluation_loop_flow.py
@@ -68,8 +68,8 @@ def backward(self, loss, optimizer, optimizer_idx):
 
     assert len(train_step_out) == 1
     train_step_out = train_step_out[0][0]
-    assert isinstance(train_step_out["loss"], torch.Tensor)
-    assert train_step_out["loss"].item() == 171
+    assert isinstance(train_step_out, torch.Tensor)
+    assert train_step_out.item() == 171
 
     # make sure the optimizer closure returns the correct things
     opt_closure = trainer.fit_loop.epoch_loop.batch_loop.optimizer_loop._make_closure(
@@ -131,8 +131,8 @@ def backward(self, loss, optimizer, optimizer_idx):
 
     assert len(train_step_out) == 1
     train_step_out = train_step_out[0][0]
-    assert isinstance(train_step_out["loss"], torch.Tensor)
-    assert train_step_out["loss"].item() == 171
+    assert isinstance(train_step_out, torch.Tensor)
+    assert train_step_out.item() == 171
 
     # make sure the optimizer closure returns the correct things
     opt_closure = trainer.fit_loop.epoch_loop.batch_loop.optimizer_loop._make_closure(
diff --git a/tests/loops/test_flow_warnings.py b/tests/loops/test_flow_warnings.py
diff --git a/tests/loops/test_training_loop.py b/tests/loops/test_training_loop.py
@@ -136,6 +136,10 @@ def training_step_end(self, outputs):
             loss = self.loss(outputs["batch"], outputs["output"])
             return loss
 
+        def training_epoch_end(self, outputs) -> None:
+            # since `training_step_end` returns a tensor, these are tensors
+            torch.stack(outputs).mean()
+
     # No error is raised
     model = ValidTrainStepEndModel()
     trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=1)
diff --git a/tests/loops/test_training_loop_flow_scalar.py b/tests/loops/test_training_loop_flow_scalar.py
@@ -109,15 +109,9 @@ def training_step(self, batch, batch_idx):
 
         def training_epoch_end(self, outputs):
             self.training_epoch_end_called = True
-
             # verify we saw the current num of batches
             assert len(outputs) == 2
-
-            for b in outputs:
-                # time = 1
-                assert len(b) == 1
-                assert "loss" in b
-                assert isinstance(b, dict)
+            assert all(isinstance(o, torch.Tensor) for o in outputs)
 
         def backward(self, loss, optimizer, optimizer_idx):
             return LightningModule.backward(self, loss, optimizer, optimizer_idx)
@@ -151,8 +145,8 @@ def backward(self, loss, optimizer, optimizer_idx):
 
     assert len(train_step_out) == 1
     train_step_out = train_step_out[0][0]
-    assert isinstance(train_step_out["loss"], torch.Tensor)
-    assert train_step_out["loss"].item() == 171
+    assert isinstance(train_step_out, torch.Tensor)
+    assert train_step_out.item() == 171
 
     # make sure the optimizer closure returns the correct things
     opt_closure = trainer.fit_loop.epoch_loop.batch_loop.optimizer_loop._make_closure(
@@ -184,12 +178,7 @@ def training_epoch_end(self, outputs):
 
             # verify we saw the current num of batches
             assert len(outputs) == 2
-
-            for b in outputs:
-                # time = 1
-                assert len(b) == 1
-                assert "loss" in b
-                assert isinstance(b, dict)
+            assert all(isinstance(o, torch.Tensor) for o in outputs)
 
         def backward(self, loss, optimizer, optimizer_idx):
             return LightningModule.backward(self, loss, optimizer, optimizer_idx)
@@ -223,8 +212,8 @@ def backward(self, loss, optimizer, optimizer_idx):
 
     assert len(train_step_out) == 1
     train_step_out = train_step_out[0][0]
-    assert isinstance(train_step_out["loss"], torch.Tensor)
-    assert train_step_out["loss"].item() == 171
+    assert isinstance(train_step_out, torch.Tensor)
+    assert train_step_out.item() == 171
 
     # make sure the optimizer closure returns the correct things
     opt_closure = trainer.fit_loop.epoch_loop.batch_loop.optimizer_loop._make_closure(
@@ -285,6 +274,9 @@ def training_step(self, batch, batch_idx):
             self.log("a", loss, on_step=True, on_epoch=True)
             return loss if batch_idx % 2 else None
 
+        def training_epoch_end(self, outputs) -> None:
+            torch.stack(outputs).mean()
+
     model = TestModel()
     trainer = Trainer(
         default_root_dir=tmpdir,
diff --git a/tests/plugins/test_ddp_spawn_plugin.py b/tests/plugins/test_ddp_spawn_plugin.py
@@ -47,7 +47,7 @@ def get_from_queue(self, queue: torch.multiprocessing.SimpleQueue) -> None:
         return super().get_from_queue(queue)
 
 
-@RunIf(skip_windows=True, skip_49370=True)
+@RunIf(skip_windows=True, skip_49370=True, skip_hanging_spawn=True)
 def test_ddp_cpu():
     """Tests if device is set correctly when training for DDPSpawnPlugin."""
     trainer = Trainer(num_processes=2, fast_dev_run=True)