Skip to content

Commit c57af59

Browse files
Sherin ThomasBorda
authored andcommitted
[App] Min replica=0 would break autoscaler component (#16092)
* fixing the bug where num_replica=0 would fail * changelog (cherry picked from commit aba5f12)
1 parent a0f8f70 commit c57af59

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

src/lightning_app/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
6262
- Fixed a bug where apps that had previously been deleted could not be run again from the CLI ([#16082](https://github.com/Lightning-AI/lightning/pull/16082))
6363
- Fixed install/upgrade - removing single quote ([#16079](https://github.com/Lightning-AI/lightning/pull/16079))
6464

65+
- Fixed a bug where `AutoScaler` would fail with min_replica=0 ([#16092](https://github.com/Lightning-AI/lightning/pull/16092)
66+
6567

6668
## [1.8.4] - 2022-12-08
6769

src/lightning_app/components/serve/auto_scaler.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ def run(self):
224224
security = HTTPBasic()
225225
fastapi_app.SEND_TASK = None
226226

227+
input_type = self._input_type
228+
227229
@fastapi_app.middleware("http")
228230
async def current_request_counter(request: Request, call_next):
229231
if not request.scope["path"] == self.endpoint:
@@ -281,7 +283,7 @@ async def update_servers(servers: List[str], authenticated: bool = Depends(authe
281283
self._iter = cycle(self.servers)
282284

283285
@fastapi_app.post(self.endpoint, response_model=self._output_type)
284-
async def balance_api(inputs: self._input_type):
286+
async def balance_api(inputs: input_type):
285287
return await self.process_request(inputs)
286288

287289
endpoint_info_page = self._get_endpoint_info_page()
@@ -578,9 +580,13 @@ def scale(self, replicas: int, metrics: dict) -> int:
578580
The target number of running works. The value will be adjusted after this method runs
579581
so that it satisfies ``min_replicas<=replicas<=max_replicas``.
580582
"""
581-
pending_requests_per_running_or_pending_work = metrics["pending_requests"] / (
582-
replicas + metrics["pending_works"]
583-
)
583+
pending_requests = metrics["pending_requests"]
584+
active_or_pending_works = replicas + metrics["pending_works"]
585+
586+
if active_or_pending_works == 0:
587+
return 1 if pending_requests > 0 else 0
588+
589+
pending_requests_per_running_or_pending_work = pending_requests / active_or_pending_works
584590

585591
# scale out if the number of pending requests exceeds max batch size.
586592
max_requests_per_work = self.max_batch_size

tests/tests_app/components/serve/test_auto_scaler.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,24 @@ def test_scale(replicas, metrics, expected_replicas):
9393
assert auto_scaler.scale(replicas, metrics) == expected_replicas
9494

9595

96+
def test_scale_from_zero_min_replica():
97+
auto_scaler = AutoScaler(
98+
EmptyWork,
99+
min_replicas=0,
100+
max_replicas=2,
101+
max_batch_size=10,
102+
)
103+
104+
resp = auto_scaler.scale(0, {"pending_requests": 0, "pending_works": 0})
105+
assert resp == 0
106+
107+
resp = auto_scaler.scale(0, {"pending_requests": 1, "pending_works": 0})
108+
assert resp == 1
109+
110+
resp = auto_scaler.scale(0, {"pending_requests": 1, "pending_works": 1})
111+
assert resp <= 0
112+
113+
96114
def test_create_work_cloud_compute_cloned():
97115
"""Test CloudCompute is cloned to avoid creating multiple works in a single machine."""
98116
cloud_compute = CloudCompute("gpu")

0 commit comments

Comments
 (0)