Lightning-AI · lantiga · Dec 8, 2022 · Dec 8, 2022 · Dec 8, 2022 · Dec 8, 2022
@@ -179,7 +179,7 @@ def _load_aggregate_requirements(req_dir: str = "requirements", freeze_requireme
         load_requirements(d, file_name="base.txt", unfreeze=not freeze_requirements)
         for d in glob.glob(os.path.join(req_dir, "*"))
         # skip empty folder as git artefacts, and resolving Will's special issue
-        if os.path.isdir(d) and len(glob.glob(os.path.join(d, "*"))) > 0
+        if os.path.isdir(d) and len(glob.glob(os.path.join(d, "*"))) > 0 and "__pycache__" not in d
     ]
     if not requires:
         return None

@@ -51,7 +51,7 @@ jobs:
   - job: App_cloud_e2e_testing
     pool: azure-cpus
     container:
-      image: mcr.microsoft.com/playwright/python:v1.27.1-focal
+      image: mcr.microsoft.com/playwright/python:v1.28.0-focal
       options: "--shm-size=4gb"
     strategy:
       matrix:

@@ -99,7 +99,7 @@ jobs:
 
       - name: Adjust tests
         if: ${{ matrix.pkg-name == 'lightning' }}
-        run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app" --target_import="lightning.app"
+        run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app,lightning_lite,pytorch_lightning" --target_import="lightning.app,lightning.lite,lightning.pytorch"
 
       - name: Adjust examples
         if: ${{ matrix.pkg-name != 'lightning' }}

@@ -32,11 +32,19 @@ ___________________
     :nosignatures:
     :template: classtemplate_no_index.rst
 
+    ~database.client.DatabaseClient
+    ~database.server.Database
     ~python.popen.PopenPythonScript
     ~python.tracer.TracerPythonScript
     ~training.LightningTrainerScript
     ~serve.gradio.ServeGradio
     ~serve.serve.ModelInferenceAPI
+    ~serve.python_server.PythonServer
+    ~serve.streamlit.ServeStreamlit
+    ~multi_node.base.MultiNode
+    ~multi_node.lite.LiteMultiNode
+    ~multi_node.pytorch_spawn.PyTorchSpawnMultiNode
+    ~multi_node.trainer.LightningTrainerMultiNode
     ~auto_scaler.AutoScaler
 
 ----

@@ -4,7 +4,7 @@ pytest==7.2.0
 pytest-timeout==2.1.0
 pytest-cov==4.0.0
 pytest-doctestplus>=0.9.0
-playwright==1.27.1
+playwright==1.28.0
 httpx
 trio<0.22.0
 pympler

@@ -1,2 +1,2 @@
-streamlit>=1.3.1, <=1.11.1
+streamlit>=1.0.0, <=1.15.2
 panel>=0.12.7, <=0.13.1
@@ -4,18 +4,19 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
-## [1.8.4] - 2022-12-06
+## [1.8.4] - 2022-12-08
 
 ### Added
 
 - Add `code_dir` argument to tracer run ([#15771](https://github.com/Lightning-AI/lightning/pull/15771))
 - Added the CLI command `lightning run model` to launch a `LightningLite` accelerated script ([#15506](https://github.com/Lightning-AI/lightning/pull/15506))
 - Added the CLI command `lightning delete app` to delete a lightning app on the cloud ([#15783](https://github.com/Lightning-AI/lightning/pull/15783))
 - Added a CloudMultiProcessBackend which enables running a child App from within the Flow in the cloud ([#15800](https://github.com/Lightning-AI/lightning/pull/15800))
+- Utility for pickling work object safely even from a child process ([#15836](https://github.com/Lightning-AI/lightning/pull/15836))
 - Added `AutoScaler` component ([#15769](https://github.com/Lightning-AI/lightning/pull/15769))
 - Added the property `ready` of the LightningFlow to inform when the `Open App` should be visible ([#15921](https://github.com/Lightning-AI/lightning/pull/15921))
 - Added private work attributed `_start_method` to customize how to start the works ([#15923](https://github.com/Lightning-AI/lightning/pull/15923))
-
+- Added a `configure_layout` method to the `LightningWork` which can be used to control how the work is handled in the layout of a parent flow ([#15926](https://github.com/Lightning-AI/lightning/pull/15926))
 
 ### Changed
 
@@ -37,12 +38,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed SSH CLI command listing stopped components ([#15810](https://github.com/Lightning-AI/lightning/pull/15810))
 - Fixed bug when launching apps on multiple clusters ([#15484](https://github.com/Lightning-AI/lightning/pull/15484))
 - Fixed Sigterm Handler causing thread lock which caused KeyboardInterrupt to hang ([#15881](https://github.com/Lightning-AI/lightning/pull/15881))
+- Fixed MPS error for multinode component (defaults to cpu on mps devices now as distributed operations are not supported by pytorch on mps) ([#15748](https://github.com/Ligtning-AI/lightning/pull/15748))
 - Fixed the work not stopped when successful when passed directly to the LightningApp ([#15801](https://github.com/Lightning-AI/lightning/pull/15801))
 - Fixed the PyTorch Inference locally on GPU ([#15813](https://github.com/Lightning-AI/lightning/pull/15813))
 - Fixed the `enable_spawn` method of the `WorkRunExecutor` ([#15812](https://github.com/Lightning-AI/lightning/pull/15812))
 - Fixed require/import decorator ([#15849](https://github.com/Lightning-AI/lightning/pull/15849))
-
 - Fixed a bug where using `L.app.structures` would cause multiple apps to be opened and fail with an error in the cloud ([#15911](https://github.com/Lightning-AI/lightning/pull/15911))
+- Fixed PythonServer generating noise on M1 ([#15949](https://github.com/Lightning-AI/lightning/pull/15949))
 
 
 ## [1.8.3] - 2022-11-22

@@ -206,7 +206,6 @@ async def process_request(self, data: BaseModel):
                 return result
 
     def run(self):
-
         logger.info(f"servers: {self.servers}")
         lock = asyncio.Lock()
 
@@ -271,7 +270,6 @@ async def sys_info(authenticated: bool = Depends(authenticate_private_endpoint))
         async def update_servers(servers: List[str], authenticated: bool = Depends(authenticate_private_endpoint)):
             async with lock:
                 self.servers = servers
-
             self._iter = cycle(self.servers)
 
         @fastapi_app.post(self.endpoint, response_model=self._output_type)

@@ -1,4 +1,6 @@
+import importlib
 import os
+import warnings
 from dataclasses import dataclass
 from typing import Any, Callable, Type
 
@@ -30,8 +32,16 @@ def run(
         node_rank: int,
         nprocs: int,
     ):
-        from lightning.lite import LightningLite
-        from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy
+        lites = []
+        strategies = []
+        mps_accelerators = []
+
+        for pkg_name in ("lightning.lite", "lightning_" + "lite"):
+            pkg = importlib.import_module(pkg_name)
+            lites.append(pkg.LightningLite)
+            strategies.append(pkg.strategies.DDPSpawnShardedStrategy)
+            strategies.append(pkg.strategies.DDPSpawnStrategy)
+            mps_accelerators.append(pkg.accelerators.MPSAccelerator)
 
         # Used to configure PyTorch progress group
         os.environ["MASTER_ADDR"] = main_address
@@ -52,23 +62,36 @@ def run(
         def pre_fn(lite, *args, **kwargs):
             kwargs["devices"] = nprocs
             kwargs["num_nodes"] = num_nodes
-            kwargs["accelerator"] = "auto"
+
+            if any(acc.is_available() for acc in mps_accelerators):
+                old_acc_value = kwargs.get("accelerator", "auto")
+                kwargs["accelerator"] = "cpu"
+
+                if old_acc_value != kwargs["accelerator"]:
+                    warnings.warn("Forcing `accelerator=cpu` as MPS does not support distributed training.")
+            else:
+                kwargs["accelerator"] = "auto"
             strategy = kwargs.get("strategy", None)
             if strategy:
                 if isinstance(strategy, str):
                     if strategy == "ddp_spawn":
                         strategy = "ddp"
                     elif strategy == "ddp_sharded_spawn":
                         strategy = "ddp_sharded"
-                elif isinstance(strategy, (DDPSpawnStrategy, DDPSpawnShardedStrategy)):
-                    raise Exception("DDP Spawned strategies aren't supported yet.")
+                elif isinstance(strategy, tuple(strategies)):
+                    raise ValueError("DDP Spawned strategies aren't supported yet.")
+
+            kwargs["strategy"] = strategy
+
             return {}, args, kwargs
 
         tracer = Tracer()
-        tracer.add_traced(LightningLite, "__init__", pre_fn=pre_fn)
+        for ll in lites:
+            tracer.add_traced(ll, "__init__", pre_fn=pre_fn)
         tracer._instrument()
-        work_run()
+        ret_val = work_run()
         tracer._restore()
+        return ret_val
 
 
 class LiteMultiNode(MultiNode):

@@ -88,7 +88,7 @@ def run(
         elif world_size > 1:
             raise Exception("Torch distributed should be available.")
 
-        work_run(world_size, node_rank, global_rank, local_rank)
+        return work_run(world_size, node_rank, global_rank, local_rank)
 
 
 class PyTorchSpawnMultiNode(MultiNode):

@@ -1,4 +1,6 @@
+import importlib
 import os
+import warnings
 from dataclasses import dataclass
 from typing import Any, Callable, Type
 
@@ -30,9 +32,16 @@ def run(
         node_rank: int,
         nprocs: int,
     ):
-        from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy
-        from lightning.pytorch import Trainer as LTrainer
-        from pytorch_lightning import Trainer as PLTrainer
+        trainers = []
+        strategies = []
+        mps_accelerators = []
+
+        for pkg_name in ("lightning.pytorch", "pytorch_" + "lightning"):
+            pkg = importlib.import_module(pkg_name)
+            trainers.append(pkg.Trainer)
+            strategies.append(pkg.strategies.DDPSpawnShardedStrategy)
+            strategies.append(pkg.strategies.DDPSpawnStrategy)
+            mps_accelerators.append(pkg.accelerators.MPSAccelerator)
 
         # Used to configure PyTorch progress group
         os.environ["MASTER_ADDR"] = main_address
@@ -50,24 +59,34 @@ def run(
         def pre_fn(trainer, *args, **kwargs):
             kwargs["devices"] = nprocs
             kwargs["num_nodes"] = num_nodes
-            kwargs["accelerator"] = "auto"
+            if any(acc.is_available() for acc in mps_accelerators):
+                old_acc_value = kwargs.get("accelerator", "auto")
+                kwargs["accelerator"] = "cpu"
+
+                if old_acc_value != kwargs["accelerator"]:
+                    warnings.warn("Forcing `accelerator=cpu` as MPS does not support distributed training.")
+            else:
+                kwargs["accelerator"] = "auto"
+
             strategy = kwargs.get("strategy", None)
             if strategy:
                 if isinstance(strategy, str):
                     if strategy == "ddp_spawn":
                         strategy = "ddp"
                     elif strategy == "ddp_sharded_spawn":
                         strategy = "ddp_sharded"
-                elif isinstance(strategy, (DDPSpawnStrategy, DDPSpawnShardedStrategy)):
-                    raise Exception("DDP Spawned strategies aren't supported yet.")
+                elif isinstance(strategy, tuple(strategies)):
+                    raise ValueError("DDP Spawned strategies aren't supported yet.")
+                kwargs["strategy"] = strategy
             return {}, args, kwargs
 
         tracer = Tracer()
-        tracer.add_traced(PLTrainer, "__init__", pre_fn=pre_fn)
-        tracer.add_traced(LTrainer, "__init__", pre_fn=pre_fn)
+        for trainer in trainers:
+            tracer.add_traced(trainer, "__init__", pre_fn=pre_fn)
         tracer._instrument()
-        work_run()
+        ret_val = work_run()
         tracer._restore()
+        return ret_val
 
 
 class LightningTrainerMultiNode(MultiNode):

@@ -22,6 +22,9 @@ class Code(TypedDict):
 
 
 class TracerPythonScript(LightningWork):
+
+    _start_method = "spawn"
+
     def on_before_run(self):
         """Called before the python script is executed."""
 

@@ -1,10 +1,8 @@
 import abc
-import os
 from functools import partial
 from types import ModuleType
 from typing import Any, List, Optional
 
-from lightning_app.components.serve.python_server import _PyTorchSpawnRunExecutor, WorkRunExecutor
 from lightning_app.core.work import LightningWork
 from lightning_app.utilities.imports import _is_gradio_available, requires
 
@@ -36,15 +34,13 @@ class ServeGradio(LightningWork, abc.ABC):
     title: Optional[str] = None
     description: Optional[str] = None
 
+    _start_method = "spawn"
+
     def __init__(self, *args, **kwargs):
         requires("gradio")(super().__init__(*args, **kwargs))
         assert self.inputs
         assert self.outputs
         self._model = None
-        # Note: Enable to run inference on GPUs.
-        self._run_executor_cls = (
-            WorkRunExecutor if os.getenv("LIGHTNING_CLOUD_APP_ID", None) else _PyTorchSpawnRunExecutor
-        )
 
     @property
     def model(self):
@@ -78,3 +74,6 @@ def run(self, *args, **kwargs):
             server_port=self.port,
             enable_queue=self.enable_queue,
         )
+
+    def configure_layout(self) -> str:
+        return self.url