vllm-project
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎csrc/rocm/ops.h‎
Lines changed: 9 additions & 0 deletions b/‎csrc/rocm/ops.h‎
Lines changed: 9 additions & 0 deletions
@@ -678,6 +678,7 @@ if(VLLM_GPU_LANG STREQUAL "HIP")
   #
   set(VLLM_ROCM_EXT_SRC
     "csrc/rocm/torch_bindings.cpp"
+    "csrc/rocm/skinny_gemms.cu"
     "csrc/rocm/attention.cu")
 
   define_gpu_extension_target(
 
@@ -2,6 +2,15 @@
 
 #include <torch/all.h>
 
+torch::Tensor LLMM1(at::Tensor& in_a, at::Tensor& in_b,
+                    const int64_t rows_per_block);
+
+torch::Tensor wvSplitK(at::Tensor& in_a, at::Tensor& in_b,
+                       const int64_t CuCount);
+
+void wvSplitKQ(at::Tensor& in_a, at::Tensor& in_b, at::Tensor& out_c,
+               at::Tensor& scale_a, at::Tensor& scale_b, const int64_t CuCount);
+
 void paged_attention(torch::Tensor& out, torch::Tensor& exp_sums,
                      torch::Tensor& max_logits, torch::Tensor& tmp_out,
                      torch::Tensor& query, torch::Tensor& key_cache,
Original file line number	Diff line number	Diff line change
`@@ -678,6 +678,7 @@ if(VLLM_GPU_LANG STREQUAL "HIP")`
`678`	`678`	`#`
`679`	`679`	`set(VLLM_ROCM_EXT_SRC`
`680`	`680`	`"csrc/rocm/torch_bindings.cpp"`
	`681`	`+ "csrc/rocm/skinny_gemms.cu"`
`681`	`682`	`"csrc/rocm/attention.cu")`
`682`	`683`
`683`	`684`	`define_gpu_extension_target(`