Skip to content
Merged
Show file tree
Hide file tree
Changes from 79 commits
Commits
Show all changes
91 commits
Select commit Hold shift + click to select a range
34d7deb
Initialize a mooncake backend
UNIDY2002 Aug 12, 2025
fa80700
Add pybind
UNIDY2002 Aug 12, 2025
a401f32
Fix incorrect backend registration
UNIDY2002 Aug 12, 2025
b1e5dfc
Fix wheel building of mooncake_ep
UNIDY2002 Aug 13, 2025
b535bbb
Add a fake allreduce implementation
UNIDY2002 Aug 13, 2025
779b447
Introduce transfer_engine to mooncake_backend
UNIDY2002 Aug 14, 2025
fb8918e
Add a basic CPU proxy execution framework
UNIDY2002 Aug 14, 2025
cf75d22
Implement a seemingly working allgather
UNIDY2002 Aug 14, 2025
1427db9
Remove mooncake_ep's dependency on etcd
UNIDY2002 Aug 15, 2025
fab5451
Implement `_allgather_base`
UNIDY2002 Aug 15, 2025
bfe83e0
Implement `allreduce`
UNIDY2002 Aug 15, 2025
74834c1
Implement `alltoall`
UNIDY2002 Aug 15, 2025
3f72b68
Use an even-odd pattern for data transfer
UNIDY2002 Aug 18, 2025
1c548b3
Add a `set_host_ip` method
UNIDY2002 Aug 19, 2025
e99eb08
Switch to an extended-API implementation of the Mooncake backend
UNIDY2002 Aug 19, 2025
7bc9438
Implement `broadcast`
UNIDY2002 Aug 20, 2025
635197f
Implement `barrier`
UNIDY2002 Aug 20, 2025
b1a5a37
Extend Mooncake backend to CPU
UNIDY2002 Aug 20, 2025
9a610cb
Support more operations for reduction
UNIDY2002 Aug 20, 2025
1d06727
Fix the backend-worker coordination logic
UNIDY2002 Aug 20, 2025
6169ff9
Optimize CPU worker with a callback pattern
UNIDY2002 Aug 20, 2025
10f2f8e
Add a timeout-based broken-ranks detection
UNIDY2002 Aug 20, 2025
bbcf85c
Merge EP module into Mooncake's build system
UNIDY2002 Aug 25, 2025
964e0a9
Share transfer buffer across all worker instances
UNIDY2002 Aug 25, 2025
d391ba9
Switch to a more robust approach to detect broken ranks
UNIDY2002 Aug 26, 2025
2f308f9
Specify CUDA device for test_mooncake_backend.py
UNIDY2002 Aug 26, 2025
3a0d872
Explicitly stop mooncake worker
UNIDY2002 Aug 26, 2025
f20ffb2
Use transfer engine's notifications to implement collective signals
UNIDY2002 Aug 26, 2025
896f668
Remove the unused `all_reduce_without` API
UNIDY2002 Aug 27, 2025
9f82a37
Switch to mooncake backend for test_mooncake_ep.py
UNIDY2002 Aug 27, 2025
2795455
Support both IB and RoCE
UNIDY2002 Aug 27, 2025
80db270
Fix EP unit test
UNIDY2002 Aug 27, 2025
95d62ef
Pass the auto-detected nic_id to EP Buffer
UNIDY2002 Aug 27, 2025
d9c961d
Fix CMake conditional branches when `PYTORCH_CMAKE_PATH` is not set
UNIDY2002 Aug 28, 2025
6a07455
Fix ibgda syncing for RoCE
UNIDY2002 Aug 28, 2025
7ed6664
Revert "Share transfer buffer across all worker instances"
UNIDY2002 Aug 28, 2025
d3b9d64
Implement `_reduce_scatter_base`
UNIDY2002 Aug 28, 2025
05b1cb2
Make CPU backends aware of broken ranks
UNIDY2002 Aug 28, 2025
06653a2
Fix .typos.toml
UNIDY2002 Aug 28, 2025
f56c078
Add a perf test for mooncake backend
UNIDY2002 Aug 29, 2025
78fd0d2
Support more dtypes for reduction
UNIDY2002 Sep 1, 2025
57c634c
Revert "Use transfer engine's notifications to implement collective s…
UNIDY2002 Aug 28, 2025
ed9292d
Share worker thread among all process groups
UNIDY2002 Sep 2, 2025
5c17a19
Share transfer engine among all process groups
UNIDY2002 Sep 2, 2025
a20fa7d
Fix unit tests
UNIDY2002 Sep 2, 2025
438184f
Add a warmup phase for transfer engine
UNIDY2002 Sep 2, 2025
9df28c5
Fix transfer engine buffer locations
UNIDY2002 Sep 2, 2025
fa342f3
Fix incorrect calculation of mooncake ep buffer
UNIDY2002 Sep 3, 2025
3f51b5f
Do not use timeout detection in mooncake_ep tests
UNIDY2002 Sep 3, 2025
ec661f6
Update mooncake backend perf test
UNIDY2002 Sep 3, 2025
017d5d7
Demangle per-group buffer offset from the shared taskId
UNIDY2002 Sep 3, 2025
631aa1a
Stop allocating the useless `cuda_counter_buffer` and `cuda_data_buffer`
UNIDY2002 Sep 3, 2025
5d06444
Split the task list into a CPU region and a CUDA region
UNIDY2002 Sep 3, 2025
b271f04
Add a warmup for test_mooncake_backend_perf.py
UNIDY2002 Sep 3, 2025
6581ee4
Switch from raw cudaEvent to `torch::Event`
UNIDY2002 Sep 3, 2025
76b2a60
Fix MooncakeWorkCuda::wait() to make it compatible with cuda graphs
UNIDY2002 Sep 3, 2025
97e816f
Add doc
UNIDY2002 Sep 4, 2025
82cadfd
Fix perf test
UNIDY2002 Sep 4, 2025
d7a5ae6
Implement all-gather for perf test
UNIDY2002 Sep 4, 2025
7382f93
Move impl of `MooncakeEpBuffer`'s member functions to .cpp
UNIDY2002 Sep 4, 2025
22875b8
Change `gathered_experts` to `broken_nodes` to make the API more cons…
UNIDY2002 Sep 4, 2025
f238a5a
`broken_nodes` should be `broken_ranks`
UNIDY2002 Sep 4, 2025
3e6eb7e
API rename
UNIDY2002 Sep 4, 2025
a9bb0ef
Merge branch 'main' into sunxun/mooncake-backend-dev
UNIDY2002 Sep 4, 2025
6f21e23
Fix format
UNIDY2002 Sep 4, 2025
250f34b
Enable WITH_EP option in CI
UNIDY2002 Sep 4, 2025
e7d3ec4
Try installing torch in advance in CI
UNIDY2002 Sep 4, 2025
920d5dc
Set `TORCH_CUDA_ARCH_LIST` in CMakeLists.txt
UNIDY2002 Sep 5, 2025
657e22c
Install required dependencies in the CI CUDA environment
UNIDY2002 Sep 5, 2025
3b1ee99
[CI] Add the matching PyTorch
UNIDY2002 Sep 8, 2025
3d6f9fd
[CI] Add a workaround for missing `CUDA::nvToolsExt`
UNIDY2002 Sep 8, 2025
ed9a416
Merge remote-tracking branch 'origin/main' into sunxun/mooncake-backe…
UNIDY2002 Sep 8, 2025
5738d5f
Remove unused pybind base class declaration of `MooncakeBackendOptions`
UNIDY2002 Sep 8, 2025
5aaf4d4
Support `set_device_filter`
UNIDY2002 Sep 10, 2025
c355c10
Remove unused headers for ep_py.cpp
UNIDY2002 Sep 12, 2025
1bb73ca
Build the EP-wheel with setuptools on CI
UNIDY2002 Sep 13, 2025
eda4728
[CI] Add the build-with-ep process to release.yaml
UNIDY2002 Sep 14, 2025
7c638e5
Minor format fix
UNIDY2002 Sep 14, 2025
f30e29d
Update build guide
UNIDY2002 Sep 15, 2025
aa24136
Fix docs
UNIDY2002 Sep 15, 2025
e4aae45
Only build EP wheel with torch==2.8.0
UNIDY2002 Sep 15, 2025
74d4791
Add a torch version assertion for Mooncake Backend
UNIDY2002 Sep 15, 2025
b5929fc
Fix some python typing
UNIDY2002 Sep 15, 2025
1db7604
Use the correct group for EP's initial data sharing
UNIDY2002 Sep 15, 2025
e8582f0
API: invert `broken_ranks` and change into `active_ranks`
UNIDY2002 Sep 16, 2025
b0925fe
Followup fix for inverting the API
UNIDY2002 Sep 17, 2025
16c07c9
Fix format
UNIDY2002 Sep 17, 2025
f252056
Bug-fix in mooncake_ep_kernel.cu
UNIDY2002 Sep 17, 2025
fdf0cad
Mooncake EP has to be built with USE_CUDA on
UNIDY2002 Sep 18, 2025
a88e815
Fixed some issues according to the review
UNIDY2002 Sep 22, 2025
72beb68
Fix bug
UNIDY2002 Sep 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,100 @@ jobs:
name: mooncake-wheel-ubuntu-py${{ steps.generate_tag_flags.outputs.python_version_tag }}
path: mooncake-wheel/dist-py${{ steps.generate_tag_flags.outputs.python_version_tag }}/*.whl

build-with-ep:
runs-on: ubuntu-22.04
strategy:
matrix:
python-version: ['3.10', '3.12']
torch-version: ['2.7.1', '2.8.0']
env:
BUILD_WITH_EP: "1"
SCCACHE_GHA_ENABLED: "true"

steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Free up disk space
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL

- name: Install CUDA Toolkit
uses: Jimver/[email protected]
with:
cuda: '12.8.1'
linux-local-args: '["--toolkit"]'
method: 'network'
sub-packages: '["nvcc", "nvrtc-dev"]'
non-cuda-sub-packages: '["libcusparse-dev", "libcublas-dev", "libcusolver-dev"]'
Comment on lines +333 to +340
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@xiaguan do you have time to check on this? Do you know if this is supported on our CI machine?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/kvcache-ai/Mooncake/actions/runs/17720954259/job/50353039158?pr=805

It compiles successfully in CI, but I'm not sure if the .whl package will actually work for users.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the users would usually have the full toolkit installed. I tested the .whl in the SGLang docker environment, and it could work :)


- name: Run sccache-cache
uses: mozilla-actions/[email protected]

- name: Configure sccache
uses: actions/github-script@v7
with:
script: |
core.exportVariable('ACTIONS_RESULTS_URL', process.env.ACTIONS_RESULTS_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');

- name: Run sccache stat for check
shell: bash
run: ${SCCACHE_PATH} --show-stats

- name: Install dependencies
run: |
sudo apt update -y
sudo bash -x dependencies.sh -y
pip install toml-cli # for updating the version
pip install torch==${{ matrix.torch-version }}
shell: bash

- name: Build transfer engine with EP
run: |
mkdir build
cd build
export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH
export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
cmake .. -DUSE_ETCD=ON -DUSE_REDIS=ON -DUSE_HTTP=ON -DWITH_STORE=ON -DWITH_P2P_STORE=ON -DWITH_EP=ON -DWITH_METRICS=ON -DBUILD_UNIT_TESTS=ON -DBUILD_EXAMPLES=ON -DENABLE_SCCACHE=ON -DUSE_CUDA=OFF -DUSE_MNNVL=OFF -DCMAKE_EXE_LINKER_FLAGS="-L/usr/local/cuda/lib64/stubs"
make -j
sudo make install
shell: bash

- name: Build nvlink_allocator.so
run: |
mkdir -p build/mooncake-transfer-engine/nvlink-allocator
cd mooncake-transfer-engine/nvlink-allocator
bash build.sh --ci-build ../../build/mooncake-transfer-engine/nvlink-allocator/
shell: bash

- name: Generate Python version tag
id: generate_tag_flags
run: |
echo "python_version_tag=$(echo ${{ matrix.python-version }} | tr -d '.')" >> $GITHUB_OUTPUT
shell: bash

- name: Build Python wheel
run: |
BASE_VERSION=$(toml get --toml-path mooncake-wheel/pyproject.toml project.version | tr -d '"')
TORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
toml set --toml-path mooncake-wheel/pyproject.toml project.version "${BASE_VERSION}+ep_torch${TORCH_VERSION}"
# Build wheel with specific Python version
PYTHON_VERSION=${{ matrix.python-version }} OUTPUT_DIR=dist-py${{ steps.generate_tag_flags.outputs.python_version_tag }} ./scripts/build_wheel.sh
shell: bash

- name: Upload Python wheel artifact
uses: actions/upload-artifact@v4
with:
name: mooncake-wheel-ubuntu-py${{ steps.generate_tag_flags.outputs.python_version_tag }}+ep_torch${{ matrix.torch-version }}
path: mooncake-wheel/dist-py${{ steps.generate_tag_flags.outputs.python_version_tag }}/*.whl

build-docker:
name: Build Docker Image
runs-on: ubuntu-22.04
Expand Down
100 changes: 99 additions & 1 deletion .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,106 @@ jobs:
name: mooncake-wheel-py${{ steps.generate_tag_release.outputs.python_version_tag }}
path: mooncake-wheel/dist-py${{ steps.generate_tag_release.outputs.python_version_tag }}/*.whl

build-with-ep:
runs-on: ubuntu-22.04
permissions:
contents: write
strategy:
matrix:
python-version: ['3.10', '3.12']
torch-version: ['2.7.1', '2.8.0']
env:
BUILD_WITH_EP: "1"
steps:
- name: Checkout source
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Free up disk space
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL

- name: Install CUDA Toolkit
uses: Jimver/[email protected]
with:
cuda: '12.8.1'
linux-local-args: '["--toolkit"]'
method: 'network'
sub-packages: '["nvcc", "nvrtc-dev"]'
non-cuda-sub-packages: '["libcusparse-dev", "libcublas-dev", "libcusolver-dev"]'

- name: Run sccache-cache
uses: mozilla-actions/[email protected]

- name: Configure sccache
uses: actions/github-script@v7
with:
script: |
core.exportVariable('ACTIONS_RESULTS_URL', process.env.ACTIONS_RESULTS_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');

- name: Run sccache stat for check
shell: bash
run: ${SCCACHE_PATH} --show-stats

- name: Configure project
run: |
sudo apt update -y
sudo bash -x dependencies.sh -y
pip install toml-cli # for updating the version
pip install torch==${{ matrix.torch-version }}
mkdir build
cd build
cmake .. -DUSE_HTTP=ON -DUSE_ETCD=ON -DWITH_EP=ON -DSTORE_USE_ETCD=ON -DENABLE_SCCACHE=ON -DCMAKE_BUILD_TYPE=Release
shell: bash

- name: Build project
run: |
cd build
make -j
sudo make install
shell: bash

- name: Build nvlink_allocator.so
run: |
mkdir -p build/mooncake-transfer-engine/nvlink-allocator
cd mooncake-transfer-engine/nvlink-allocator
bash build.sh --ci-build ../../build/mooncake-transfer-engine/nvlink-allocator/
shell: bash

- name: Generate Python version tag
id: generate_tag_release
run: |
echo "python_version_tag=$(echo ${{ matrix.python-version }} | tr -d '.')" >> $GITHUB_OUTPUT
shell: bash

- name: Build Python wheel
run: |
BASE_VERSION=$(toml get --toml-path mooncake-wheel/pyproject.toml project.version | tr -d '"')
TORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
toml set --toml-path mooncake-wheel/pyproject.toml project.version "${BASE_VERSION}+ep_torch${TORCH_VERSION}"
# Set LD_LIBRARY_PATH for wheel building
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
PYTHON_VERSION=${{ matrix.python-version }} OUTPUT_DIR=dist-py${{ steps.generate_tag_release.outputs.python_version_tag }} ./scripts/build_wheel.sh
env:
VERSION: ${{ env.VERSION }}

- name: Upload Python wheel artifact
uses: actions/upload-artifact@v4
with:
name: mooncake-wheel-py${{ steps.generate_tag_release.outputs.python_version_tag }}+ep_torch${{ matrix.torch-version }}
path: mooncake-wheel/dist-py${{ steps.generate_tag_release.outputs.python_version_tag }}/*.whl

publish-release:
needs: build
needs:
- build
- build-with-ep
runs-on: ubuntu-22.04
permissions:
contents: write
Expand Down
7 changes: 3 additions & 4 deletions .typos.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
[default]
extend-ignore-words = ["CANN"]

[files]
extend-exclude = ["mooncake-ep/csrc/*.h"]
extend-ignore-words = ["CANN", "ASO", "fre"]

[default.extend-words]
CANN = "CANN"
ASO = "ASO"
fre = "fre"
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ endif()
option(WITH_STORE "build mooncake store library and sample code" ON)
option(WITH_P2P_STORE "build p2p store library and sample code" OFF)
option(WITH_RUST_EXAMPLE "build the Rust interface and sample code for the transfer engine" OFF)
option(WITH_EP "build mooncake with expert parallelism support" OFF)

add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extern/pybind11)
set(PYTHON_EXECUTABLE "python3")
Expand Down Expand Up @@ -51,6 +52,12 @@ if (WITH_STORE)
include_directories(mooncake-store/include)
endif()

if (WITH_EP)
message(STATUS "Mooncake EP will be built")
add_subdirectory(mooncake-ep)
include_directories(mooncake-ep/include)
endif()

add_subdirectory(mooncake-integration)

if (WITH_P2P_STORE)
Expand Down
5 changes: 5 additions & 0 deletions doc/en/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ This document describes how to build Mooncake from source.
```bash
pip3 install mooncake-transfer-engine --upgrade
```
- To install with the Mooncake Backend and Mooncake EP support, use the following command:
```bash
# replace torch2.8.0 with the corresponding version
pip3 install mooncake-transfer-engine==0.3.7+ep.torch2.8.0 --upgrade
```

## Automatic

Expand Down
70 changes: 70 additions & 0 deletions doc/en/ep-backend.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Mooncake EP & Mooncake Backend

## Overview

Mooncake EP is an adaption of [DeepEP](https://github.com/deepseek-ai/DeepEP) that supports **fault tolerance** and fast data transfer with **IBGDA**, designed as a critical component for large-scale, latency-sensitive MoE (Mixture of Experts) inference. Mooncake EP aims to retain full compatibility with the DeepEP API, with the addition of a `broken_ranks` tensor passed to both the `dispatch` and `combine` functions to capture information about any rank failures. By integrating with the EPLB module, Mooncake EP ensures fault tolerance during MoE inference, enabling robust performance even in large-scale, fault-prone environments.

Mooncake Backend is a PyTorch distributed backend (a replacement for NCCL and Gloo) that provides **fault-tolerant collective communication primitives** and can be seamlessly integrated into machine learning systems. Built with the [Transfer Engine](transfer-engine.md), Mooncake Backend ensures that collective communications can continue even in the event of rank failures. Furthermore, it reports these failures to the upper layers of the system, allowing for graceful error handling without disrupting ongoing operations.

## Usage

### Mooncake EP

> **Note:** Mooncake EP currently supports only the low-latency transfer mode.
The API is largely consistent with DeepEP's, with only minor differences in a few parameters. Mooncake EP exposes a `Buffer` that can be imported from `mooncake.mooncake_ep_buffer`. For an example, refer to `mooncake-wheel/tests/test_mooncake_ep.py`.

#### Buffer.get_buffer_size_hint()

**Signature:**

```python
@staticmethod
def get_ep_buffer_size_hint(num_max_dispatch_tokens_per_rank: int, hidden: int, num_ranks: int, num_experts: int) -> int
```

Calculates the number of bytes to pre-allocate for data transfer.

#### Buffer.\_\_init\_\_()

**Signature:**

```python
def __init__(self, group: dist.ProcessGroup, num_ep_buffer_bytes: int = 0)
```

The constructor. Ensure that only one instance is created.

- **group**: Must be a Mooncake Backend process group.
- **num_ep_buffer_bytes**: The number of bytes acquired with `Buffer.get_buffer_size_hint()`

#### Buffer.dispatch/Buffer.combine

**Signature:** Similar to DeepEP's `low_latency_dispatch`/`low_latency_combine`, with two additional parameters:

- **broken_ranks**: A tensor of shape `(num_ranks,)` containing values of 0 or 1. The indices of the broken ranks will be set to 1.
- **timeout_us**: The timeout in microseconds for a rank to be considered broken. Set to -1 for infinite timeout.

### Mooncake Backend

Basic usage:

```python
import torch
import torch.distributed as dist
from mooncake import ep

broken_ranks = torch.zeros((world_size,), dtype=torch.int32, device="cuda")
dist.init_process_group(
backend="mooncake",
rank=rank,
world_size=world_size,
pg_options=ep.MooncakeBackendOptions(broken_ranks),
)

dist.all_gather(...) # Standard API usage
assert not broken_ranks.any() # Verify that no ranks are broken
```

For a full example, see `mooncake-wheel/tests/test_mooncake_backend.py`.

42 changes: 42 additions & 0 deletions mooncake-ep/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
cmake_minimum_required(VERSION 3.16)
project(mooncake-ep)

# Find PyTorch's CMake prefix path
execute_process(
COMMAND ${PYTHON_EXECUTABLE} -c "import torch; print(torch.utils.cmake_prefix_path)"
OUTPUT_VARIABLE PYTORCH_CMAKE_PATH
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(NOT PYTORCH_CMAKE_PATH)
message(WARNING "Could not find PyTorch CMake path! Please set Torch_DIR.")
else ()
message(STATUS "Found PyTorch CMake path: ${PYTORCH_CMAKE_PATH}")
list(APPEND CMAKE_PREFIX_PATH "${PYTORCH_CMAKE_PATH}/Torch")
endif()

set(TORCH_CUDA_ARCH_LIST "8.0;9.0")

find_package(CUDAToolkit REQUIRED)
# https://discuss.pytorch.org/t/failed-to-find-nvtoolsext/179635/13
if(NOT TARGET CUDA::nvToolsExt AND TARGET CUDA::nvtx3)
add_library(CUDA::nvToolsExt INTERFACE IMPORTED)
target_compile_definitions(
CUDA::nvToolsExt INTERFACE
TORCH_CUDA_USE_NVTX3
)
target_link_libraries(CUDA::nvToolsExt INTERFACE CUDA::nvtx3)
endif()
find_package(Torch REQUIRED)
include_directories(${TORCH_INCLUDE_DIRS})

include_directories(include)
add_subdirectory(include)
add_subdirectory(src)

if (BUILD_UNIT_TESTS)
add_subdirectory(tests)
endif()

if (BUILD_EXAMPLES)
add_subdirectory(example)
endif()
25 changes: 0 additions & 25 deletions mooncake-ep/csrc/CMakeLists.txt

This file was deleted.

Loading
Loading