|
29 | 29 | TZ=Europe/Prague \
|
30 | 30 | PATH="$PATH:/root/.local/bin" \
|
31 | 31 | CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \
|
32 |
| - TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0;7.5;8.0" \ |
33 | 32 | MKL_THREADING_LAYER=GNU \
|
34 | 33 | # MAKEFLAGS="-j$(nproc)"
|
35 | 34 | MAKEFLAGS="-j2"
|
|
99 | 98 | pip install -r requirements/pytorch/base.txt --no-cache-dir --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html && \
|
100 | 99 | rm assistant.py
|
101 | 100 |
|
102 |
| -ENV \ |
103 |
| - HOROVOD_CUDA_HOME=$CUDA_TOOLKIT_ROOT_DIR \ |
104 |
| - HOROVOD_GPU_OPERATIONS=NCCL \ |
105 |
| - HOROVOD_WITH_PYTORCH=1 \ |
106 |
| - HOROVOD_WITHOUT_TENSORFLOW=1 \ |
107 |
| - HOROVOD_WITHOUT_MXNET=1 \ |
108 |
| - HOROVOD_WITH_GLOO=1 \ |
109 |
| - HOROVOD_WITH_MPI=1 |
110 |
| - |
111 |
| -RUN \ |
112 |
| - # CUDA 10.2 doesn't support ampere architecture (8.0). |
113 |
| - if [[ "$CUDA_VERSION" < "11.0" ]]; then export TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST//";8.0"/}; echo $TORCH_CUDA_ARCH_LIST; fi && \ |
114 |
| - HOROVOD_BUILD_CUDA_CC_LIST=${TORCH_CUDA_ARCH_LIST//";"/","} && \ |
115 |
| - export HOROVOD_BUILD_CUDA_CC_LIST=${HOROVOD_BUILD_CUDA_CC_LIST//"."/""} && \ |
116 |
| - echo $HOROVOD_BUILD_CUDA_CC_LIST && \ |
117 |
| - cmake --version && \ |
118 |
| - pip install --no-cache-dir horovod && \ |
119 |
| - horovodrun --check-build |
120 | 101 |
|
121 | 102 | RUN \
|
122 | 103 | # install Bagua
|
@@ -145,7 +126,6 @@ RUN \
|
145 | 126 | # install rest of strategies
|
146 | 127 | # remove colossalai from requirements since they are installed separately
|
147 | 128 | python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'colossalai' not in line] ; open(fname, 'w').writelines(lines)" ; \
|
148 |
| - python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" ; \ |
149 | 129 | cat requirements/pytorch/strategies.txt && \
|
150 | 130 | pip install -r requirements/pytorch/devel.txt -r requirements/pytorch/strategies.txt --no-cache-dir --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html
|
151 | 131 |
|
|
0 commit comments