[BugFix][Spec Decode] Use float64 for uniform_probs (vllm-project#23803)

WoosukKwon · zhewenl · commit 6c834ddd0a83 · 2025-09-03T15:25:46.000-07:00
Signed-off-by: Woosuk Kwon &lt;woosuk.kwon@berkeley.edu&gt;
diff --git a/examples/offline_inference/spec_decode.py b/examples/offline_inference/spec_decode.py
@@ -138,7 +138,7 @@ def main():
     sampling_params = SamplingParams(temperature=args.temp, max_tokens=args.output_len)
     if not args.custom_mm_prompts:
         outputs = llm.generate(
-            TokensPrompt(prompt_token_ids=prompt_ids),
+            [TokensPrompt(prompt_token_ids=x) for x in prompt_ids],
             sampling_params=sampling_params,
         )
     else:
diff --git a/vllm/v1/sample/rejection_sampler.py b/vllm/v1/sample/rejection_sampler.py
@@ -365,9 +365,14 @@ def generate_uniform_probs(
             A tensor of shape `(num_tokens, )` containing uniform
             random values in the range [0, 1).
     """
+    # NOTE(woosuk): We deliberately use float64 instead of float32 here
+    # because when using float32, there's a non-negligible chance that
+    # uniform_prob is sampled to be exact 0.0 as reported in
+    # https://github.com/pytorch/pytorch/issues/16706. Using float64
+    # mitigates the issue.
     uniform_probs = torch.rand(
         (num_tokens, ),
-        dtype=torch.float32,
+        dtype=torch.float64,
         device=device,
     )
     start_idx = 0

Original file line number	Diff line number	Diff line change
`@@ -138,7 +138,7 @@ def main():`
`138`	`138`	`sampling_params = SamplingParams(temperature=args.temp, max_tokens=args.output_len)`
`139`	`139`	`if not args.custom_mm_prompts:`
`140`	`140`	`outputs = llm.generate(`
`141`		`- TokensPrompt(prompt_token_ids=prompt_ids),`
	`141`	`+ [TokensPrompt(prompt_token_ids=x) for x in prompt_ids],`
`142`	`142`	`sampling_params=sampling_params,`
`143`	`143`	`)`
`144`	`144`	`else:`