Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .actions/setup_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def _load_aggregate_requirements(req_dir: str = "requirements", freeze_requireme
load_requirements(d, file_name="base.txt", unfreeze=not freeze_requirements)
for d in glob.glob(os.path.join(req_dir, "*"))
# skip empty folder as git artefacts, and resolving Will's special issue
if os.path.isdir(d) and len(glob.glob(os.path.join(d, "*"))) > 0
if os.path.isdir(d) and len(glob.glob(os.path.join(d, "*"))) > 0 and "__pycache__" not in d
]
if not requires:
return None
Expand Down
2 changes: 1 addition & 1 deletion .azure/app-cloud-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
- job: App_cloud_e2e_testing
pool: azure-cpus
container:
image: mcr.microsoft.com/playwright/python:v1.27.1-focal
image: mcr.microsoft.com/playwright/python:v1.28.0-focal
options: "--shm-size=4gb"
strategy:
matrix:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-app-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ jobs:

- name: Adjust tests
if: ${{ matrix.pkg-name == 'lightning' }}
run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app" --target_import="lightning.app"
run: python .actions/assistant.py copy_replace_imports --source_dir="./tests" --source_import="lightning_app,lightning_lite,pytorch_lightning" --target_import="lightning.app,lightning.lite,lightning.pytorch"

- name: Adjust examples
if: ${{ matrix.pkg-name != 'lightning' }}
Expand Down
8 changes: 8 additions & 0 deletions docs/source-app/api_references.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,19 @@ ___________________
:nosignatures:
:template: classtemplate_no_index.rst

~database.client.DatabaseClient
~database.server.Database
~python.popen.PopenPythonScript
~python.tracer.TracerPythonScript
~training.LightningTrainerScript
~serve.gradio.ServeGradio
~serve.serve.ModelInferenceAPI
~serve.python_server.PythonServer
~serve.streamlit.ServeStreamlit
~multi_node.base.MultiNode
~multi_node.lite.LiteMultiNode
~multi_node.pytorch_spawn.PyTorchSpawnMultiNode
~multi_node.trainer.LightningTrainerMultiNode
~auto_scaler.AutoScaler

----
Expand Down
2 changes: 1 addition & 1 deletion requirements/app/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ pytest==7.2.0
pytest-timeout==2.1.0
pytest-cov==4.0.0
pytest-doctestplus>=0.9.0
playwright==1.27.1
playwright==1.28.0
httpx
trio<0.22.0
pympler
Expand Down
2 changes: 1 addition & 1 deletion requirements/app/ui.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
streamlit>=1.3.1, <=1.11.1
streamlit>=1.0.0, <=1.15.2
panel>=0.12.7, <=0.13.1
8 changes: 5 additions & 3 deletions src/lightning_app/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,19 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

## [1.8.4] - 2022-12-06
## [1.8.4] - 2022-12-08

### Added

- Add `code_dir` argument to tracer run ([#15771](https://github.com/Lightning-AI/lightning/pull/15771))
- Added the CLI command `lightning run model` to launch a `LightningLite` accelerated script ([#15506](https://github.com/Lightning-AI/lightning/pull/15506))
- Added the CLI command `lightning delete app` to delete a lightning app on the cloud ([#15783](https://github.com/Lightning-AI/lightning/pull/15783))
- Added a CloudMultiProcessBackend which enables running a child App from within the Flow in the cloud ([#15800](https://github.com/Lightning-AI/lightning/pull/15800))
- Utility for pickling work object safely even from a child process ([#15836](https://github.com/Lightning-AI/lightning/pull/15836))
- Added `AutoScaler` component ([#15769](https://github.com/Lightning-AI/lightning/pull/15769))
- Added the property `ready` of the LightningFlow to inform when the `Open App` should be visible ([#15921](https://github.com/Lightning-AI/lightning/pull/15921))
- Added private work attributed `_start_method` to customize how to start the works ([#15923](https://github.com/Lightning-AI/lightning/pull/15923))

- Added a `configure_layout` method to the `LightningWork` which can be used to control how the work is handled in the layout of a parent flow ([#15926](https://github.com/Lightning-AI/lightning/pull/15926))

### Changed

Expand All @@ -37,12 +38,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Fixed SSH CLI command listing stopped components ([#15810](https://github.com/Lightning-AI/lightning/pull/15810))
- Fixed bug when launching apps on multiple clusters ([#15484](https://github.com/Lightning-AI/lightning/pull/15484))
- Fixed Sigterm Handler causing thread lock which caused KeyboardInterrupt to hang ([#15881](https://github.com/Lightning-AI/lightning/pull/15881))
- Fixed MPS error for multinode component (defaults to cpu on mps devices now as distributed operations are not supported by pytorch on mps) ([#15748](https://github.com/Ligtning-AI/lightning/pull/15748))
- Fixed the work not stopped when successful when passed directly to the LightningApp ([#15801](https://github.com/Lightning-AI/lightning/pull/15801))
- Fixed the PyTorch Inference locally on GPU ([#15813](https://github.com/Lightning-AI/lightning/pull/15813))
- Fixed the `enable_spawn` method of the `WorkRunExecutor` ([#15812](https://github.com/Lightning-AI/lightning/pull/15812))
- Fixed require/import decorator ([#15849](https://github.com/Lightning-AI/lightning/pull/15849))

- Fixed a bug where using `L.app.structures` would cause multiple apps to be opened and fail with an error in the cloud ([#15911](https://github.com/Lightning-AI/lightning/pull/15911))
- Fixed PythonServer generating noise on M1 ([#15949](https://github.com/Lightning-AI/lightning/pull/15949))


## [1.8.3] - 2022-11-22
Expand Down
2 changes: 0 additions & 2 deletions src/lightning_app/components/auto_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,6 @@ async def process_request(self, data: BaseModel):
return result

def run(self):

logger.info(f"servers: {self.servers}")
lock = asyncio.Lock()

Expand Down Expand Up @@ -271,7 +270,6 @@ async def sys_info(authenticated: bool = Depends(authenticate_private_endpoint))
async def update_servers(servers: List[str], authenticated: bool = Depends(authenticate_private_endpoint)):
async with lock:
self.servers = servers

self._iter = cycle(self.servers)

@fastapi_app.post(self.endpoint, response_model=self._output_type)
Expand Down
37 changes: 30 additions & 7 deletions src/lightning_app/components/multi_node/lite.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import importlib
import os
import warnings
from dataclasses import dataclass
from typing import Any, Callable, Type

Expand Down Expand Up @@ -30,8 +32,16 @@ def run(
node_rank: int,
nprocs: int,
):
from lightning.lite import LightningLite
from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy
lites = []
strategies = []
mps_accelerators = []

for pkg_name in ("lightning.lite", "lightning_" + "lite"):
pkg = importlib.import_module(pkg_name)
lites.append(pkg.LightningLite)
strategies.append(pkg.strategies.DDPSpawnShardedStrategy)
strategies.append(pkg.strategies.DDPSpawnStrategy)
mps_accelerators.append(pkg.accelerators.MPSAccelerator)

# Used to configure PyTorch progress group
os.environ["MASTER_ADDR"] = main_address
Expand All @@ -52,23 +62,36 @@ def run(
def pre_fn(lite, *args, **kwargs):
kwargs["devices"] = nprocs
kwargs["num_nodes"] = num_nodes
kwargs["accelerator"] = "auto"

if any(acc.is_available() for acc in mps_accelerators):
old_acc_value = kwargs.get("accelerator", "auto")
kwargs["accelerator"] = "cpu"

if old_acc_value != kwargs["accelerator"]:
warnings.warn("Forcing `accelerator=cpu` as MPS does not support distributed training.")
else:
kwargs["accelerator"] = "auto"
strategy = kwargs.get("strategy", None)
if strategy:
if isinstance(strategy, str):
if strategy == "ddp_spawn":
strategy = "ddp"
elif strategy == "ddp_sharded_spawn":
strategy = "ddp_sharded"
elif isinstance(strategy, (DDPSpawnStrategy, DDPSpawnShardedStrategy)):
raise Exception("DDP Spawned strategies aren't supported yet.")
elif isinstance(strategy, tuple(strategies)):
raise ValueError("DDP Spawned strategies aren't supported yet.")

kwargs["strategy"] = strategy

return {}, args, kwargs

tracer = Tracer()
tracer.add_traced(LightningLite, "__init__", pre_fn=pre_fn)
for ll in lites:
tracer.add_traced(ll, "__init__", pre_fn=pre_fn)
tracer._instrument()
work_run()
ret_val = work_run()
tracer._restore()
return ret_val


class LiteMultiNode(MultiNode):
Expand Down
2 changes: 1 addition & 1 deletion src/lightning_app/components/multi_node/pytorch_spawn.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def run(
elif world_size > 1:
raise Exception("Torch distributed should be available.")

work_run(world_size, node_rank, global_rank, local_rank)
return work_run(world_size, node_rank, global_rank, local_rank)


class PyTorchSpawnMultiNode(MultiNode):
Expand Down
37 changes: 28 additions & 9 deletions src/lightning_app/components/multi_node/trainer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import importlib
import os
import warnings
from dataclasses import dataclass
from typing import Any, Callable, Type

Expand Down Expand Up @@ -30,9 +32,16 @@ def run(
node_rank: int,
nprocs: int,
):
from lightning.lite.strategies import DDPSpawnShardedStrategy, DDPSpawnStrategy
from lightning.pytorch import Trainer as LTrainer
from pytorch_lightning import Trainer as PLTrainer
trainers = []
strategies = []
mps_accelerators = []

for pkg_name in ("lightning.pytorch", "pytorch_" + "lightning"):
pkg = importlib.import_module(pkg_name)
trainers.append(pkg.Trainer)
strategies.append(pkg.strategies.DDPSpawnShardedStrategy)
strategies.append(pkg.strategies.DDPSpawnStrategy)
mps_accelerators.append(pkg.accelerators.MPSAccelerator)

# Used to configure PyTorch progress group
os.environ["MASTER_ADDR"] = main_address
Expand All @@ -50,24 +59,34 @@ def run(
def pre_fn(trainer, *args, **kwargs):
kwargs["devices"] = nprocs
kwargs["num_nodes"] = num_nodes
kwargs["accelerator"] = "auto"
if any(acc.is_available() for acc in mps_accelerators):
old_acc_value = kwargs.get("accelerator", "auto")
kwargs["accelerator"] = "cpu"

if old_acc_value != kwargs["accelerator"]:
warnings.warn("Forcing `accelerator=cpu` as MPS does not support distributed training.")
else:
kwargs["accelerator"] = "auto"

strategy = kwargs.get("strategy", None)
if strategy:
if isinstance(strategy, str):
if strategy == "ddp_spawn":
strategy = "ddp"
elif strategy == "ddp_sharded_spawn":
strategy = "ddp_sharded"
elif isinstance(strategy, (DDPSpawnStrategy, DDPSpawnShardedStrategy)):
raise Exception("DDP Spawned strategies aren't supported yet.")
elif isinstance(strategy, tuple(strategies)):
raise ValueError("DDP Spawned strategies aren't supported yet.")
kwargs["strategy"] = strategy
return {}, args, kwargs

tracer = Tracer()
tracer.add_traced(PLTrainer, "__init__", pre_fn=pre_fn)
tracer.add_traced(LTrainer, "__init__", pre_fn=pre_fn)
for trainer in trainers:
tracer.add_traced(trainer, "__init__", pre_fn=pre_fn)
tracer._instrument()
work_run()
ret_val = work_run()
tracer._restore()
return ret_val


class LightningTrainerMultiNode(MultiNode):
Expand Down
3 changes: 3 additions & 0 deletions src/lightning_app/components/python/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ class Code(TypedDict):


class TracerPythonScript(LightningWork):

_start_method = "spawn"

def on_before_run(self):
"""Called before the python script is executed."""

Expand Down
11 changes: 5 additions & 6 deletions src/lightning_app/components/serve/gradio.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import abc
import os
from functools import partial
from types import ModuleType
from typing import Any, List, Optional

from lightning_app.components.serve.python_server import _PyTorchSpawnRunExecutor, WorkRunExecutor
from lightning_app.core.work import LightningWork
from lightning_app.utilities.imports import _is_gradio_available, requires

Expand Down Expand Up @@ -36,15 +34,13 @@ class ServeGradio(LightningWork, abc.ABC):
title: Optional[str] = None
description: Optional[str] = None

_start_method = "spawn"

def __init__(self, *args, **kwargs):
requires("gradio")(super().__init__(*args, **kwargs))
assert self.inputs
assert self.outputs
self._model = None
# Note: Enable to run inference on GPUs.
self._run_executor_cls = (
WorkRunExecutor if os.getenv("LIGHTNING_CLOUD_APP_ID", None) else _PyTorchSpawnRunExecutor
)

@property
def model(self):
Expand Down Expand Up @@ -78,3 +74,6 @@ def run(self, *args, **kwargs):
server_port=self.port,
enable_queue=self.enable_queue,
)

def configure_layout(self) -> str:
return self.url
Loading