1
1
steps :
2
- # aarch64 + CUDA builds
3
- - label : " Build arm64 wheel - CUDA 12.8 "
4
- id : build-wheel-arm64-cuda-12-8
2
+ # aarch64 + CUDA builds. PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9
3
+ - label : " Build arm64 wheel - CUDA 12.9 "
4
+ id : build-wheel-arm64-cuda-12-9
5
5
agents :
6
6
queue : arm64_cpu_queue_postmerge
7
7
commands :
8
8
# #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here:
9
9
# https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7
10
- - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8 .1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
10
+ - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9 .1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
11
11
- " mkdir artifacts"
12
12
- " docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
13
13
- " bash .buildkite/scripts/upload-wheels.sh"
14
14
env :
15
15
DOCKER_BUILDKIT : " 1"
16
16
17
- # x86 + CUDA builds
17
+ - block : " Build CUDA 12.8 wheel"
18
+ key : block-build-cu128-wheel
19
+
18
20
- label : " Build wheel - CUDA 12.8"
21
+ depends_on : block-build-cu128-wheel
19
22
id : build-wheel-cuda-12-8
20
23
agents :
21
24
queue : cpu_queue_postmerge
@@ -44,18 +47,14 @@ steps:
44
47
env :
45
48
DOCKER_BUILDKIT : " 1"
46
49
47
- # Note(simon): We can always build CUDA 11.8 wheel to ensure the build is working.
48
- # However, this block can be uncommented to save some compute hours.
49
- # - block: "Build CUDA 11.8 wheel"
50
- # key: block-build-cu118-wheel
51
-
52
- - label : " Build wheel - CUDA 11.8"
53
- # depends_on: block-build-cu118-wheel
54
- id : build-wheel-cuda-11-8
50
+ # x86 + CUDA builds
51
+ - label : " Build wheel - CUDA 12.9"
52
+ depends_on : ~
53
+ id : build-wheel-cuda-12-9
55
54
agents :
56
55
queue : cpu_queue_postmerge
57
56
commands :
58
- - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
57
+ - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
59
58
- " mkdir artifacts"
60
59
- " docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
61
60
- " bash .buildkite/scripts/upload-wheels.sh"
@@ -75,14 +74,15 @@ steps:
75
74
- " docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
76
75
- " docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
77
76
77
+ # PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9
78
78
- label : " Build release image (arm64)"
79
79
depends_on : ~
80
80
id : build-release-image-arm64
81
81
agents :
82
82
queue : arm64_cpu_queue_postmerge
83
83
commands :
84
84
- " aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
85
- - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8 .1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
85
+ - " DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9 .1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
86
86
- " docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)"
87
87
88
88
# Add job to create multi-arch manifest
@@ -103,7 +103,7 @@ steps:
103
103
- create-multi-arch-manifest
104
104
- build-wheel-cuda-12-8
105
105
- build-wheel-cuda-12-6
106
- - build-wheel-cuda-11-8
106
+ - build-wheel-cuda-12-9
107
107
id : annotate-release-workflow
108
108
agents :
109
109
queue : cpu_queue_postmerge
0 commit comments