Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
32eb9fc
Error when running on multiple clusters
luca3rd Nov 2, 2022
5a60a9d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 2, 2022
d9bc71c
Revert this in separate PR: keep this focused
luca3rd Nov 5, 2022
cad231f
Improve testing
luca3rd Nov 5, 2022
0aa51d8
fixup! Improve testing
luca3rd Nov 6, 2022
9de897e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 6, 2022
18b8b70
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 7, 2022
ac9ccab
pass flake8
luca3rd Nov 7, 2022
97eb392
Update changelog
luca3rd Nov 7, 2022
17af0f8
Address PR feedback
luca3rd Nov 9, 2022
be6b745
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 9, 2022
2af76cd
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 9, 2022
58c733f
remove unused import
luca3rd Nov 9, 2022
234c782
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 10, 2022
d7bd429
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 21, 2022
7a093a8
Reword error message
luca3rd Nov 21, 2022
3474c5b
Error if running on cluster that doesn't exist
luca3rd Nov 21, 2022
9cd61e5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 21, 2022
0701419
fixup! Error if running on cluster that doesn't exist
luca3rd Nov 21, 2022
165d4ba
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 21, 2022
74945de
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
Borda Nov 21, 2022
e61ac3d
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 28, 2022
bc9fb75
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 28, 2022
511fac2
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 28, 2022
cc6f4e9
Remove unsued import
luca3rd Nov 29, 2022
09e06f4
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
Borda Nov 30, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/lightning_app/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Fixed bug with Multi Node Component and add some examples ([#15557](https://github.com/Lightning-AI/lightning/pull/15557))

- Fixed bug when launching apps on multiple clusters ([#15484](https://github.com/Lightning-AI/lightning/pull/15484))


- Fixed an issue with the `lightning` CLI taking a long time to error out when the cloud is not reachable ([#15412](https://github.com/Lightning-AI/lightning/pull/15412))

Expand Down
17 changes: 17 additions & 0 deletions src/lightning_app/runners/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import traceback
from dataclasses import dataclass
from pathlib import Path
from textwrap import dedent
from typing import Any, Callable, List, Optional, Union

import click
Expand All @@ -16,6 +17,7 @@
Body7,
Body8,
Body9,
Externalv1LightningappInstance,
Gridv1ImageSpec,
V1BuildSpec,
V1DependencyFileInfo,
Expand Down Expand Up @@ -293,6 +295,7 @@ def dispatch(
elif CLOUD_QUEUE_TYPE == "redis":
queue_server_type = V1QueueServerType.REDIS

existing_instance: Optional[Externalv1LightningappInstance] = None
if find_instances_resp.lightningapps:
existing_instance = find_instances_resp.lightningapps[0]

Expand Down Expand Up @@ -337,6 +340,20 @@ def dispatch(
default=True,
)
app_config.cluster_id = None
if existing_instance and existing_instance.spec.cluster_id != app_config.cluster_id:
raise ValueError(
dedent(
f"""\
Can not run app {app_config.name} on cluster {app_config.cluster_id} since it already exists on {existing_instance.spec.cluster_id}
(moving apps between clusters is not supported).

You can either:
a. rename the app to run on {app_config.cluster_id} with the --name option
lightning run app script.py --name (new name) --cloud --cluster-id {app_config.cluster_id}
b. delete the app running on {existing_instance.spec.cluster_id} in the UI before running this command.
""" # noqa: E501
)
)

if app_config.cluster_id is not None:
self._ensure_cluster_project_binding(project.project_id, app_config.cluster_id)
Expand Down
88 changes: 51 additions & 37 deletions tests/tests_app/runners/test_cloud.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import os
from contextlib import nullcontext as does_not_raise
from copy import copy
from pathlib import Path
from unittest import mock
Expand Down Expand Up @@ -94,75 +95,88 @@ def get_cloud_runtime_request_body(**kwargs) -> "Body8":
return Body8(**default_request_body)


@pytest.fixture
def cloud_backend(monkeypatch):
cloud_backend = mock.MagicMock()
monkeypatch.setattr(cloud, "LocalSourceCodeDir", mock.MagicMock())
monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock())
monkeypatch.setattr(backends, "CloudBackend", mock.MagicMock(return_value=cloud_backend))
return cloud_backend


@pytest.fixture
def project_id():
return "test-project-id"


DEFAULT_CLUSTER = "litng-ai-03"


class TestAppCreationClient:
"""Testing the calls made using GridRestClient to create the app."""

# TODO: remove this test once there is support for multiple instances
@mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock())
def test_new_instance_on_different_cluster(self, monkeypatch):
app_name = "test-app-name"
original_cluster = "cluster-001"
new_cluster = "cluster-002"
@pytest.mark.parametrize(
"old_cluster,new_cluster,expected_raise",
[
(
"test",
"other",
pytest.raises(
ValueError,
match="Can not run app test-app on cluster other since it already exists on test",
),
),
("test", "test", does_not_raise()),
(None, None, does_not_raise()),
(None, "litng-ai-03", does_not_raise()),
("litng-ai-03", None, does_not_raise()),
],
)
def test_new_instance_on_different_cluster(
self, cloud_backend, project_id, old_cluster, new_cluster, expected_raise
):
app_name = "test-app"

mock_client = mock.MagicMock()
mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse(
memberships=[V1Membership(name="Default Project", project_id="default-project-id")]
memberships=[V1Membership(name="Default Project", project_id=project_id)]
)
mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease(
cluster_id=new_cluster
)

# Note:
# backend converts "None" cluster to "litng-ai-03"
# dispatch should receive None, but API calls should return "litng-ai-03"
mock_client.cluster_service_list_clusters.return_value = V1ListClustersResponse(
[Externalv1Cluster(id=original_cluster), Externalv1Cluster(id=new_cluster)]
[
Externalv1Cluster(id=old_cluster or DEFAULT_CLUSTER),
Externalv1Cluster(id=new_cluster or DEFAULT_CLUSTER),
]
)

cloud_backend = mock.MagicMock()
cloud_backend.client = mock_client
monkeypatch.setattr(cloud, "LocalSourceCodeDir", mock.MagicMock())
monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock())
monkeypatch.setattr(backends, "CloudBackend", mock.MagicMock(return_value=cloud_backend))

app = mock.MagicMock()
app.flows = []
app.frontend = {}

existing_instance = MagicMock()
existing_instance.status.phase = V1LightningappInstanceState.STOPPED
existing_instance.spec.cluster_id = original_cluster
existing_instance.spec.cluster_id = old_cluster or DEFAULT_CLUSTER
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=[existing_instance])
)

cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file="entrypoint.py")
cloud_runtime._check_uploaded_folder = mock.MagicMock()

# without requirements file
# setting is_file to False so requirements.txt existence check will return False
monkeypatch.setattr(Path, "is_file", lambda *args, **kwargs: False)
monkeypatch.setattr(cloud, "Path", Path)

# This is the main assertion:
# we have an existing instance on `cluster-001`
# but we want to run this app on `cluster-002`
cloud_runtime.dispatch(name=app_name, cluster_id=new_cluster)

body = Body8(
cluster_id=new_cluster,
app_entrypoint_file=mock.ANY,
enable_app_server=True,
flow_servers=[],
image_spec=None,
works=[],
local_source=True,
dependency_cache_key=mock.ANY,
user_requested_flow_compute_config=mock.ANY,
)
cloud_runtime.backend.client.lightningapp_v2_service_create_lightningapp_release.assert_called_once_with(
project_id="default-project-id", app_id=mock.ANY, body=body
)
cloud_runtime.backend.client.projects_service_create_project_cluster_binding.assert_called_once_with(
project_id="default-project-id",
body=V1ProjectClusterBinding(cluster_id=new_cluster, project_id="default-project-id"),
)
with expected_raise:
cloud_runtime.dispatch(name=app_name, cluster_id=new_cluster)

@pytest.mark.parametrize("flow_cloud_compute", [None, CloudCompute(name="t2.medium")])
@mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock())
Expand Down