From f5badecf592de2f7845a1e3dd8abd79162797104 Mon Sep 17 00:00:00 2001 From: GdoongMathew Date: Thu, 21 Aug 2025 00:56:50 +0800 Subject: [PATCH 1/4] fix mis-alignment column while using rich model summary in DeepSpeed strategy. --- .../pytorch/callbacks/rich_model_summary.py | 7 +++- .../model_summary/model_summary_deepspeed.py | 1 + .../utilities/test_deepspeed_model_summary.py | 39 +++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/lightning/pytorch/callbacks/rich_model_summary.py b/src/lightning/pytorch/callbacks/rich_model_summary.py index ce00ae06890f6..2843806ca595a 100644 --- a/src/lightning/pytorch/callbacks/rich_model_summary.py +++ b/src/lightning/pytorch/callbacks/rich_model_summary.py @@ -79,6 +79,7 @@ def summarize( from rich.table import Table console = get_console() + column_names = list(zip(*summary_data))[0] header_style: str = summarize_kwargs.get("header_style", "bold magenta") table = Table(header_style=header_style) @@ -86,11 +87,13 @@ def summarize( table.add_column("Name", justify="left", no_wrap=True) table.add_column("Type") table.add_column("Params", justify="right") + + if "Params per Device" in column_names: + table.add_column("Params per Device", justify="right") + table.add_column("Mode") table.add_column("FLOPs", justify="right") - column_names = list(zip(*summary_data))[0] - for column_name in ["In sizes", "Out sizes"]: if column_name in column_names: table.add_column(column_name, justify="right", style="white") diff --git a/src/lightning/pytorch/utilities/model_summary/model_summary_deepspeed.py b/src/lightning/pytorch/utilities/model_summary/model_summary_deepspeed.py index 5038aebf0db79..3b6e022cd86b3 100644 --- a/src/lightning/pytorch/utilities/model_summary/model_summary_deepspeed.py +++ b/src/lightning/pytorch/utilities/model_summary/model_summary_deepspeed.py @@ -99,6 +99,7 @@ def _get_summary_data(self) -> list[tuple[str, list[str]]]: ("Params", list(map(get_human_readable_count, self.param_nums))), ("Params per Device", list(map(get_human_readable_count, self.parameters_per_layer))), ("Mode", ["train" if mode else "eval" for mode in self.training_modes]), + ("FLOPs", list(map(get_human_readable_count, (sum(x.values()) for x in self.flop_counts.values())))), ] if self._model.example_input_array is not None: arrays.append(("In sizes", [str(x) for x in self.in_sizes])) diff --git a/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py b/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py index 256233e01fa98..54897e2743545 100644 --- a/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py +++ b/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock + +import torch + import lightning.pytorch as pl from lightning.pytorch import Callback, Trainer from lightning.pytorch.demos.boring_classes import BoringModel @@ -51,3 +55,38 @@ def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") - ) trainer.fit(model) + + +@RunIf(deepspeed=True, rich=True) +@mock.patch("rich.table.Table.add_row", autospec=True) +def test_deepspeed_summary_with_rich_model_summary(mock_table_add_row, tmp_path): + from lightning.pytorch.callbacks import RichModelSummary + + model = BoringModel() + model.example_input_array = torch.randn(4, 32) + + trainer = Trainer( + strategy=DeepSpeedStrategy(stage=3), + default_root_dir=tmp_path, + accelerator="gpu", + fast_dev_run=True, + devices=1, + enable_model_summary=True, + callbacks=[RichModelSummary()], + ) + + trainer.fit(model) + + # assert that the input summary data was converted correctly + args, _ = mock_table_add_row.call_args_list[0] + assert args[1:] == ( + "0", + "layer", + "Linear", + "66 ", + "66 ", + "train", + "512 ", + "[4, 32]", + "[4, 2]", + ) From 7cf97c216a769dd5e3a076aa6cd6371cb8aca675 Mon Sep 17 00:00:00 2001 From: GdoongMathew Date: Thu, 21 Aug 2025 01:40:53 +0800 Subject: [PATCH 2/4] test: add minimum gpu requirement in `test_deepspeed_summary_with_rich_model_summary` --- tests/tests_pytorch/utilities/test_deepspeed_model_summary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py b/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py index 54897e2743545..3679c0d52f8fe 100644 --- a/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py +++ b/tests/tests_pytorch/utilities/test_deepspeed_model_summary.py @@ -57,7 +57,7 @@ def on_fit_start(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") - trainer.fit(model) -@RunIf(deepspeed=True, rich=True) +@RunIf(min_cuda_gpus=1, deepspeed=True, rich=True) @mock.patch("rich.table.Table.add_row", autospec=True) def test_deepspeed_summary_with_rich_model_summary(mock_table_add_row, tmp_path): from lightning.pytorch.callbacks import RichModelSummary From dc757ac1f81084186280b323206dabdac42ca358 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Thu, 21 Aug 2025 09:43:44 +0200 Subject: [PATCH 3/4] chlog --- src/lightning/pytorch/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index 107c389dba590..2a92e7bcf25be 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -33,6 +33,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed learning rate not being correctly set after using `LearningRateFinder` callback ([#21068](https://github.com/Lightning-AI/pytorch-lightning/pull/21068)) + +- fix mis-alignment column while using rich model summary in `DeepSpeedstrategy` ([#21100](https://github.com/Lightning-AI/pytorch-lightning/pull/21100)) + + --- ## [2.5.3] - 2025-08-13 From 0675023288e44e952b6b0bbda01b66586246dae6 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Thu, 21 Aug 2025 13:50:26 +0200 Subject: [PATCH 4/4] typo --- src/lightning/pytorch/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index 946f01a93b68f..96f85265cc6ac 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -37,7 +37,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed learning rate not being correctly set after using `LearningRateFinder` callback ([#21068](https://github.com/Lightning-AI/pytorch-lightning/pull/21068)) -- Fixed mis-alignment column while using rich model summary in `DeepSpeedstrategy` ([#21100](https://github.com/Lightning-AI/pytorch-lightning/pull/21100)) +- Fixed misalignment column while using rich model summary in `DeepSpeedstrategy` ([#21100](https://github.com/Lightning-AI/pytorch-lightning/pull/21100)) ---