From 8c5670b4b7cfa53038f45fca94b36ce16d61f1da Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 17 Jan 2023 01:10:35 +0900 Subject: [PATCH 1/8] enable retry in LightningClient --- src/lightning_app/utilities/network.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lightning_app/utilities/network.py b/src/lightning_app/utilities/network.py index f4ea59739b742..146a8aede0590 100644 --- a/src/lightning_app/utilities/network.py +++ b/src/lightning_app/utilities/network.py @@ -113,8 +113,8 @@ class LightningClient(GridRestClient): retry: Whether API calls should follow a retry mechanism with exponential backoff. """ - def __new__(cls, *args: Any, **kwargs: Any) -> "LightningClient": - if kwargs.get("retry", False): + def __new__(cls, *args: Any, retry=True, **kwargs: Any) -> "LightningClient": + if retry: for base_class in GridRestClient.__mro__: for name, attribute in base_class.__dict__.items(): if callable(attribute) and attribute.__name__ != "__init__": From 71cf4a29f8ecccb54a72663dac956af5f2129127 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 17 Jan 2023 01:21:52 +0900 Subject: [PATCH 2/8] udpate chglog --- src/lightning_app/CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 068b491c16a9a..9041e6cc5df06 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ([#16002](https://github.com/Lightning-AI/lightning/pull/16002)) - Changed `lightning_app.components.serve.gradio` to `lightning_app.components.serve.gradio_server` ([#16201](https://github.com/Lightning-AI/lightning/pull/16201)) - Made cluster creation/deletion async by default ([#16185](https://github.com/Lightning-AI/lightning/pull/16185)) +- Changed the default `LightningClient(retry=False)` to `retry=True` ([#16382](https://github.com/Lightning-AI/lightning/pull/16382)) ### Fixed From d9a46b9e9c916614472358f410e49616d09da77d Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Tue, 17 Jan 2023 01:24:00 +0900 Subject: [PATCH 3/8] Use default --- src/lightning_app/runners/backends/cloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lightning_app/runners/backends/cloud.py b/src/lightning_app/runners/backends/cloud.py index 356cc987f0d82..84eb85e066f81 100644 --- a/src/lightning_app/runners/backends/cloud.py +++ b/src/lightning_app/runners/backends/cloud.py @@ -11,7 +11,7 @@ class CloudBackend(Backend): def __init__(self, entrypoint_file, queue_id: Optional[str] = None, status_update_interval: int = None): super().__init__(entrypoint_file, queues=QueuingSystem.MULTIPROCESS, queue_id=queue_id) - self.client = LightningClient(retry=True) + self.client = LightningClient() def create_work(self, app: "lightning_app.LightningApp", work: "lightning_app.LightningWork") -> None: raise NotImplementedError From ba7051f67ca3282b7e61e7b81abca2cce0d7d535 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 21 Jan 2023 01:10:12 +0900 Subject: [PATCH 4/8] update test --- tests/tests_app/utilities/test_network.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tests_app/utilities/test_network.py b/tests/tests_app/utilities/test_network.py index 2247f4f8c42e1..69c9db49eb41f 100644 --- a/tests/tests_app/utilities/test_network.py +++ b/tests/tests_app/utilities/test_network.py @@ -9,9 +9,9 @@ def test_port(): def test_lightning_client_retry_enabled(): with patch("lightning_app.utilities.network._retry_wrapper") as wrapper: - LightningClient() # default: retry=False + LightningClient(retry=False) wrapper.assert_not_called() with patch("lightning_app.utilities.network._retry_wrapper") as wrapper: - LightningClient(retry=True) + LightningClient() # default: retry=True wrapper.assert_called() From fdd9ceceebaeb2fabcc4acd3b1ce389b4a080076 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 21 Jan 2023 01:28:27 +0900 Subject: [PATCH 5/8] keep retry=False in some places --- src/lightning_app/cli/cmd_apps.py | 2 +- src/lightning_app/cli/cmd_clusters.py | 2 +- src/lightning_app/cli/cmd_ssh_keys.py | 2 +- src/lightning_app/cli/commands/connection.py | 2 +- src/lightning_app/cli/commands/logs.py | 2 +- src/lightning_app/cli/lightning_cli.py | 2 +- src/lightning_app/utilities/cli_helpers.py | 2 +- src/lightning_app/utilities/commands/base.py | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/lightning_app/cli/cmd_apps.py b/src/lightning_app/cli/cmd_apps.py index 2f9943054e00e..6c1bac7a1cd3c 100644 --- a/src/lightning_app/cli/cmd_apps.py +++ b/src/lightning_app/cli/cmd_apps.py @@ -22,7 +22,7 @@ class _AppManager: """_AppManager implements API calls specific to Lightning AI BYOC apps.""" def __init__(self) -> None: - self.api_client = LightningClient() + self.api_client = LightningClient(retry=False) def get_cluster(self, cluster_id: str) -> V1GetClusterResponse: return self.api_client.cluster_service_get_cluster(id=cluster_id) diff --git a/src/lightning_app/cli/cmd_clusters.py b/src/lightning_app/cli/cmd_clusters.py index 35aefa57604fa..97dcaff142fbd 100644 --- a/src/lightning_app/cli/cmd_clusters.py +++ b/src/lightning_app/cli/cmd_clusters.py @@ -99,7 +99,7 @@ class AWSClusterManager: is selected as the backend compute.""" def __init__(self) -> None: - self.api_client = LightningClient() + self.api_client = LightningClient(retry=False) def create( self, diff --git a/src/lightning_app/cli/cmd_ssh_keys.py b/src/lightning_app/cli/cmd_ssh_keys.py index 1f32796076ea1..fd7e69333170f 100644 --- a/src/lightning_app/cli/cmd_ssh_keys.py +++ b/src/lightning_app/cli/cmd_ssh_keys.py @@ -33,7 +33,7 @@ class _SSHKeyManager: """_SSHKeyManager implements API calls specific to Lightning AI SSH-Keys.""" def __init__(self) -> None: - self.api_client = LightningClient() + self.api_client = LightningClient(retry=False) def get_ssh_keys(self) -> _SSHKeyList: resp = self.api_client.s_sh_public_key_service_list_ssh_public_keys() diff --git a/src/lightning_app/cli/commands/connection.py b/src/lightning_app/cli/commands/connection.py index f5600947ab427..df4ad084e7a17 100644 --- a/src/lightning_app/cli/commands/connection.py +++ b/src/lightning_app/cli/commands/connection.py @@ -151,7 +151,7 @@ def connect(app_name_or_id: str): retriever = _LightningAppOpenAPIRetriever(app_name_or_id) if not retriever.api_commands: - client = LightningClient() + client = LightningClient(retry=False) project = _get_project(client) apps = client.lightningapp_instance_service_list_lightningapp_instances(project_id=project.project_id) click.echo( diff --git a/src/lightning_app/cli/commands/logs.py b/src/lightning_app/cli/commands/logs.py index 0f90d2b7ad69f..a55a9da000e9d 100644 --- a/src/lightning_app/cli/commands/logs.py +++ b/src/lightning_app/cli/commands/logs.py @@ -39,7 +39,7 @@ def logs(app_name: str, components: List[str], follow: bool) -> None: def _show_logs(app_name: str, components: List[str], follow: bool) -> None: - client = LightningClient() + client = LightningClient(retry=False) project = _get_project(client) apps = { diff --git a/src/lightning_app/cli/lightning_cli.py b/src/lightning_app/cli/lightning_cli.py index 171c6e6ac29c9..aa478927caba9 100644 --- a/src/lightning_app/cli/lightning_cli.py +++ b/src/lightning_app/cli/lightning_cli.py @@ -158,7 +158,7 @@ def cluster_logs(cluster_id: str, to_time: arrow.Arrow, from_time: arrow.Arrow, $ lightning show cluster logs my-cluster --limit 10 """ - client = LightningClient() + client = LightningClient(retry=False) cluster_manager = AWSClusterManager() existing_cluster_list = cluster_manager.get_clusters() diff --git a/src/lightning_app/utilities/cli_helpers.py b/src/lightning_app/utilities/cli_helpers.py index b45840a0d9489..a2d4e113c41c9 100644 --- a/src/lightning_app/utilities/cli_helpers.py +++ b/src/lightning_app/utilities/cli_helpers.py @@ -160,7 +160,7 @@ def _maybe_find_url(self): def _maybe_find_matching_cloud_app(self): """Tries to resolve the app url from the provided `app_id_or_name_or_url`.""" - client = LightningClient() + client = LightningClient(retry=False) project = _get_project(client) list_apps = client.lightningapp_instance_service_list_lightningapp_instances(project_id=project.project_id) diff --git a/src/lightning_app/utilities/commands/base.py b/src/lightning_app/utilities/commands/base.py index 8b0abd210b4be..a87b45da456ba 100644 --- a/src/lightning_app/utilities/commands/base.py +++ b/src/lightning_app/utilities/commands/base.py @@ -107,7 +107,7 @@ def _download_command( if not debug_mode: if app_id: if not os.path.exists(target_file): - client = LightningClient() + client = LightningClient(retry=False) project_id = _get_project(client).project_id response = client.lightningapp_instance_service_list_lightningapp_instance_artifacts( project_id=project_id, id=app_id From 9f61b2fea1c4a1954c53ea14dc907e6bb31392f9 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 21 Jan 2023 01:29:24 +0900 Subject: [PATCH 6/8] udpate chglog --- src/lightning_app/CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index ba29d30148024..2fceed755d9cf 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -14,7 +14,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- +- Changed the default `LightningClient(retry=False)` to `retry=True` ([#16382](https://github.com/Lightning-AI/lightning/pull/16382)) ### Deprecated @@ -47,7 +47,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `lightning_app.components.serve.gradio` to `lightning_app.components.serve.gradio_server` ([#16201](https://github.com/Lightning-AI/lightning/pull/16201)) - Made cluster creation/deletion async by default ([#16185](https://github.com/Lightning-AI/lightning/pull/16185)) - Expose `LightningFlow.stop` method to stop the flow similar to works ([##16378](https://github.com/Lightning-AI/lightning/pull/16378)) -- Changed the default `LightningClient(retry=False)` to `retry=True` ([#16382](https://github.com/Lightning-AI/lightning/pull/16382)) ### Fixed From 993cd14bcde443c8e64d98c5a0738fc618bdfcf4 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 23 Jan 2023 16:38:51 +0000 Subject: [PATCH 7/8] update --- src/lightning_app/utilities/network.py | 10 +++------- tests/tests_app/utilities/test_network.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/src/lightning_app/utilities/network.py b/src/lightning_app/utilities/network.py index 146a8aede0590..9efce0b934efc 100644 --- a/src/lightning_app/utilities/network.py +++ b/src/lightning_app/utilities/network.py @@ -113,17 +113,13 @@ class LightningClient(GridRestClient): retry: Whether API calls should follow a retry mechanism with exponential backoff. """ - def __new__(cls, *args: Any, retry=True, **kwargs: Any) -> "LightningClient": + def __init__(self, retry: bool = True) -> None: + super().__init__(api_client=create_swagger_client()) if retry: for base_class in GridRestClient.__mro__: for name, attribute in base_class.__dict__.items(): if callable(attribute) and attribute.__name__ != "__init__": - setattr(cls, name, _retry_wrapper(attribute)) - return super().__new__(cls) - - def __init__(self, retry: bool = False) -> None: - super().__init__(api_client=create_swagger_client()) - self._retry = retry + setattr(self, name, _retry_wrapper(attribute)) class CustomRetryAdapter(HTTPAdapter): diff --git a/tests/tests_app/utilities/test_network.py b/tests/tests_app/utilities/test_network.py index 69c9db49eb41f..b992fe738efe8 100644 --- a/tests/tests_app/utilities/test_network.py +++ b/tests/tests_app/utilities/test_network.py @@ -1,5 +1,3 @@ -from unittest.mock import patch - from lightning_app.utilities.network import find_free_network_port, LightningClient @@ -8,10 +6,12 @@ def test_port(): def test_lightning_client_retry_enabled(): - with patch("lightning_app.utilities.network._retry_wrapper") as wrapper: - LightningClient(retry=False) - wrapper.assert_not_called() - with patch("lightning_app.utilities.network._retry_wrapper") as wrapper: - LightningClient() # default: retry=True - wrapper.assert_called() + client = LightningClient() # default: retry=True + assert hasattr(client.auth_service_get_user_with_http_info, "__wrapped__") + + client = LightningClient(retry=False) + assert not hasattr(client.auth_service_get_user_with_http_info, "__wrapped__") + + client = LightningClient(retry=True) + assert hasattr(client.auth_service_get_user_with_http_info, "__wrapped__") From b93f817128cd226aa17801e726b7cf1044fe7b77 Mon Sep 17 00:00:00 2001 From: thomas Date: Mon, 23 Jan 2023 16:59:10 +0000 Subject: [PATCH 8/8] update --- src/lightning_app/utilities/network.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lightning_app/utilities/network.py b/src/lightning_app/utilities/network.py index 9efce0b934efc..8c7c70900c963 100644 --- a/src/lightning_app/utilities/network.py +++ b/src/lightning_app/utilities/network.py @@ -65,7 +65,7 @@ def _get_next_backoff_time(num_retries: int, backoff_value: float = 0.5) -> floa return min(_DEFAULT_BACKOFF_MAX, next_backoff_value) -def _retry_wrapper(func: Callable) -> Callable: +def _retry_wrapper(self, func: Callable) -> Callable: """Returns the function decorated by a wrapper that retries the call several times if a connection error occurs. @@ -77,7 +77,7 @@ def wrapped(*args: Any, **kwargs: Any) -> Any: consecutive_errors = 0 while _get_next_backoff_time(consecutive_errors) != _DEFAULT_BACKOFF_MAX: try: - return func(*args, **kwargs) + return func(self, *args, **kwargs) except lightning_cloud.openapi.rest.ApiException as e: # retry if the control plane fails with all errors except 4xx but not 408 - (Request Timeout) if e.status == 408 or e.status == 409 or not str(e.status).startswith("4"): @@ -119,7 +119,7 @@ def __init__(self, retry: bool = True) -> None: for base_class in GridRestClient.__mro__: for name, attribute in base_class.__dict__.items(): if callable(attribute) and attribute.__name__ != "__init__": - setattr(self, name, _retry_wrapper(attribute)) + setattr(self, name, _retry_wrapper(self, attribute)) class CustomRetryAdapter(HTTPAdapter):