fix(tests): Ensure reliable CUDA cache clearing in MoE test (#23416)

AzizCode92 · mgoin · gemini-code-assist[bot] · web-flow · commit 341923b9820e · 2025-08-22T17:20:59.000Z
Signed-off-by: AzizCode92 &lt;azizbenothman76@gmail.com&gt;
Signed-off-by: Michael Goin &lt;mgoin64@gmail.com&gt;
Co-authored-by: Michael Goin &lt;mgoin64@gmail.com&gt;
Co-authored-by: gemini-code-assist[bot] &lt;176961590+gemini-code-assist[bot]@users.noreply.github.com&gt;
diff --git a/tests/kernels/moe/test_moe.py b/tests/kernels/moe/test_moe.py
@@ -429,11 +429,11 @@ def test_mixtral_moe(dtype: torch.dtype, padding: bool, use_rocm_aiter: bool,
                 vllm_moe.experts.w13_weight, (0, 128), "constant", 0)[...,
                                                                       0:-128],
                                                     requires_grad=False)
-            torch.cuda.empty_cache()
             vllm_moe.experts.w2_weight = Parameter(F.pad(
                 vllm_moe.experts.w2_weight, (0, 128), "constant", 0)[...,
                                                                      0:-128],
                                                    requires_grad=False)
+            torch.cuda.synchronize()
             torch.cuda.empty_cache()
 
         # Run forward passes for both MoE blocks