Fix(scheduler): WarmupLR inherits optimizer lr when not specified

Flink-ddd · Flink-ddd · commit 04d33d366ba0 · 2025-06-18T09:57:09.000+08:00
This commit ensures that the WarmupLR scheduler correctly inherits the learning rate from the optimizer's parameters when `warmup_max_lr` is not explicitly provided in the scheduler's configuration. This prevents the scheduler from falling back to a hard-coded default value, aligning its behavior with user expectations. Fixes #7303 Signed-off-by: Vensenmu <vensenmu@gmail.com>#
diff --git a/deepspeed/runtime/lr_schedules.py b/deepspeed/runtime/lr_schedules.py
@@ -655,13 +655,16 @@ class WarmupLR(object):
     def __init__(self,
                  optimizer: Optimizer,
                  warmup_min_lr: float = 0.0,
-                 warmup_max_lr: float = 0.001,
+                 warmup_max_lr: float = None,
                  warmup_num_steps: int = 1000,
                  warmup_type: str = WARMUP_LOG_RATE,
                  last_batch_iteration: int = -1):
 
         self.optimizer = get_torch_optimizer(optimizer)
 
+        if warmup_max_lr is None:
+            warmup_max_lr = [group['lr'] for group in self.optimizer.param_groups][0]
+
         self.min_lrs = self._format_param(self.optimizer, warmup_min_lr, "min_lr")
         self.max_lrs = self._format_param(self.optimizer, warmup_max_lr, "max_lr")
         self.delta_lrs = [big - small for big, small in zip(self.max_lrs, self.min_lrs)]