Skip to content
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
77c7744
modify accelerate tests
mengfei25 Sep 1, 2025
4b6013a
modify transformers tests
mengfei25 Sep 1, 2025
c8cc64c
Merge branch 'main' into mengfeil/modify-extra-tests
mengfei25 Sep 1, 2025
00aa549
update
mengfei25 Sep 1, 2025
42e6541
Merge branch 'mengfeil/modify-extra-tests' of https://github.com/inte…
mengfei25 Sep 1, 2025
2de397e
split transformers test jobs
mengfei25 Sep 1, 2025
782a62c
update
mengfei25 Sep 1, 2025
4df28d5
update
mengfei25 Sep 1, 2025
cbc00e3
update
mengfei25 Sep 2, 2025
bec7bb1
Merge branch 'main' into mengfeil/modify-extra-tests
mengfei25 Sep 2, 2025
29705c1
update
mengfei25 Sep 2, 2025
50c015e
Merge branch 'mengfeil/modify-extra-tests' of https://github.com/inte…
mengfei25 Sep 2, 2025
6727196
update
mengfei25 Sep 2, 2025
10c6152
update
mengfei25 Sep 2, 2025
5663c85
update
mengfei25 Sep 2, 2025
081fad6
update
mengfei25 Sep 2, 2025
608c5c6
update
mengfei25 Sep 2, 2025
5e9a5be
Merge branch 'main' into mengfeil/modify-extra-tests
mengfei25 Sep 2, 2025
12d8646
update
mengfei25 Sep 2, 2025
638762d
update
mengfei25 Sep 3, 2025
c3275a3
accelerate only need 1 card
mengfei25 Sep 3, 2025
0303d81
update
mengfei25 Sep 3, 2025
824cfef
update
mengfei25 Sep 3, 2025
0a159d9
Merge branch 'main' into mengfeil/modify-extra-tests
mengfei25 Sep 3, 2025
234985e
Merge branch 'main' into mengfeil/modify-extra-tests
mengfei25 Sep 6, 2025
379cf35
Merge branch 'main' into mengfeil/modify-extra-tests
mengfei25 Sep 6, 2025
b456364
modify container args
mengfei25 Sep 9, 2025
856bd09
remove workspace cleanup before checkout
mengfei25 Sep 9, 2025
462b387
modify ZE_AFFINITY_MASK in container
mengfei25 Sep 10, 2025
9b49c07
transformers mutli shards
mengfei25 Sep 10, 2025
fc3ac8e
update
mengfei25 Sep 10, 2025
dda9f7d
Merge branch 'main' into mengfeil/modify-extra-tests
mengfei25 Sep 10, 2025
b47b279
set numactl to distribute CPUs
mengfei25 Sep 10, 2025
344d370
fix lint
mengfei25 Sep 10, 2025
86bfbce
rollback to 856bd09
mengfei25 Sep 11, 2025
01af1f6
accelerate tests parallel with ZE_AFFINITY_MASK=n
mengfei25 Sep 11, 2025
acbdde6
split transformers test jobs
mengfei25 Sep 1, 2025
8ad779c
cleanup
mengfei25 Sep 11, 2025
70e031d
cleanup
mengfei25 Sep 11, 2025
da60b8e
cleanup
mengfei25 Sep 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 52 additions & 18 deletions .github/actions/get-runner/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,28 @@ outputs:
value: ${{ steps.runner.outputs.render_id }}
hostname:
value: ${{ steps.runner.outputs.hostname }}
ZE_AFFINITY_MASK:
value: ${{ steps.tests.outputs.ZE_AFFINITY_MASK }}
xpu_num:
value: ${{ steps.runner.outputs.xpu_num }}
value: ${{ steps.tests.outputs.xpu_num }}
cpus_per_xpu:
value: ${{ steps.runner.outputs.cpus_per_xpu }}
value: ${{ steps.tests.outputs.cpus_per_xpu }}
pytest_extra_args:
value: ${{ steps.runner.outputs.pytest_extra_args }}
value: ${{ steps.tests.outputs.pytest_extra_args }}
numactl_args:
value: ${{ steps.tests.outputs.numactl_args }}

runs:
using: composite
steps:
- name: Get runner
- name: Show runner
shell: bash -xe {0}
id: runner
run: |
# get test runner
echo "runner_id=$(echo ${RUNNER_NAME} |sed 's/\-[0-9]$//')" |tee -a ${GITHUB_OUTPUT}
echo "user_id=$(id -u)" |tee -a ${GITHUB_OUTPUT}
echo "user_id=$(id -u):$(id -g)" |tee -a ${GITHUB_OUTPUT}
echo "render_id=$(getent group render |cut -d: -f3)" |tee -a ${GITHUB_OUTPUT}
echo "hostname=$(hostname)" |tee -a ${GITHUB_OUTPUT}
# show host info
lscpu
lshw -C display
free -h
Expand All @@ -37,6 +39,9 @@ runs:
uname -a
# clinfo hang and reboot system to recover
timeout 120 clinfo --list || sudo reboot
- name: Check scaling_governor
shell: bash -xe {0}
run: |
scaling_governor=$(cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor |sort |uniq)
if [ $(sudo -n true > /dev/null 2>&1 && echo $? || echo $?) -eq 0 ];then
if [ "${scaling_governor}" != "performance" ];then
Expand All @@ -52,31 +57,60 @@ runs:
echo "[INFO] You do NOT have ROOT permission to set system config."
echo " The frequency governor is ${scaling_governor}."
fi
- name: Info for tests
shell: bash -xe {0}
id: tests
run: |
# cpu number
cpu_num="$(lscpu |grep -E 'Core\(s\) per socket:|Socket\(s\):' |awk 'BEGIN{sum=1}{sum*=$NF}END{printf sum}')"
xpu_num="$(clinfo --list |awk 'BEGIN{gpu=0;}{
if(gpu==1 && $0~/Platform/){gpu=0}; if(gpu==1){print $0}; if($0~/Platform.*Graphics/){gpu=1}
}' |wc -l)"
cpus_per_xpu="$(echo |awk -v c="${cpu_num}" -v x="${xpu_num}" '{printf c/x}')"
# available gpu card number
xpu_num="$(xpu-smi discovery --dump 1 2>&1 |grep -c "[0-9]")"
# total gpu card number
pci_base_class_mask=0x00ff0000
pci_base_class_display=0x00030000
pci_vendor_id_intel=0x8086
gpu_card_num=0
for var in $(ls /sys/bus/pci/devices)
do
pci_class="$(cat /sys/bus/pci/devices/${var}/class)"
pci_vendor="$(cat /sys/bus/pci/devices/${var}/vendor)"
is_xpu="$(python -c "if (${pci_class} & ${pci_base_class_mask}) == ${pci_base_class_display} and ${pci_vendor} == ${pci_vendor_id_intel}: print('yes')")"
if [ "${is_xpu}" == "yes" ];then
echo "Detected Intel GPU at /sys/bus/pci/devices/${var}"
gpu_card_num=$[ $gpu_card_num + 1 ]
fi
done
# get available gpus
ZE_AFFINITY_MASK="$(xpu-smi discovery 2>&1 |grep 'DRM Device: /dev/' |sed 's/.*card//;s/[^0-9].*//' |sort -n |uniq |awk '{
if (NR == 1) { first = $1; }
if (first > 0) { printf("%s,", $1 - 1); } else { printf("%s,", $1); }
}' |sed 's/,$//')"
cpus_per_xpu="$(echo |awk -v c="${cpu_num}" -v x="${gpu_card_num}" '{printf c/x}')"
# get pytest args for pytest parallel
numactl_args="$(echo |awk -v c="${cpus_per_xpu}" -v ze="${ZE_AFFINITY_MASK}" '{
split(ze, x, ",");
for (i in x) {
printf(" numactl -l -C %d-%d ;", c*x[i], c*x[i]+c-1);
}
}')"
pytest_extra_args="$(echo |awk -v x="${xpu_num}" -v z="${ZE_AFFINITY_MASK}" -v cx="${cpus_per_xpu}" '{
if (x > 0) {
split(z, xpu_list, ",");
for (i=0;i<x;i++) {
if (z != "") {
ze = xpu_list[i+1];
} else {
ze = i;
}
ze = xpu_list[i+1];
printf(" --tx popen//env:ZE_AFFINITY_MASK=%d//env:OMP_NUM_THREADS=%d//python=\"numactl -l -C %d-%d python\"",
ze, cx, i*cx, (i+1)*cx-1);
ze, cx, ze*cx, (ze+1)*cx-1);
}
}else {
printf(" -n 1 ");
}
}')"
echo "ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK}" |tee -a ${GITHUB_OUTPUT}
echo "xpu_num=${xpu_num}" |tee -a ${GITHUB_OUTPUT}
echo "cpus_per_xpu=${cpus_per_xpu}" |tee -a ${GITHUB_OUTPUT}
echo "numactl_args=${numactl_args}" |tee -a ${GITHUB_OUTPUT}
echo "pytest_extra_args=${pytest_extra_args}" |tee -a ${GITHUB_OUTPUT}
- name: Cleanup host
- name: Cleanup runner
shell: bash -xe {0}
run: |
# clean docker cache
Expand Down
4 changes: 2 additions & 2 deletions .github/actions/linux-e2etest/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ runs:
else
xpu_id=${var}
fi
numactl --localalloc --physcpubind=${cpu_list} bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${var} &
bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu ${xpu_id} static ${xpu_num} ${var} &
done
else
for test_model in $(echo ${MODEL_ONLY_NAME} |sed 's/,/ /g')
do
numactl --localalloc bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 0 static 1 0 ${test_model}
bash -x inductor_xpu_test.sh ${suite} ${dt} ${mode} ${scenario} xpu 0 static 1 0 ${test_model}
done
fi
wait
Expand Down
7 changes: 4 additions & 3 deletions .github/scripts/inductor_xpu_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ fi

ulimit -n 1048576
ZE_AFFINITY_MASK=${CARD} \
eval python benchmarks/dynamo/"${SUITE}".py --"${SCENARIO}" --"${Real_DT}" -d "${DEVICE}" -n10 "${DT_extra}" "${Mode_extra}" \
"${Shape_extra}" "${partition_flags}" "${Model_only_extra}" --backend=inductor --cold-start-latency --timeout=10800 \
--output="${LOG_DIR}"/"${LOG_NAME}".csv 2>&1 | tee "${LOG_DIR}"/"${LOG_NAME}"_card"${CARD}".log
eval $(echo ${numactl_args}|awk -F ';' -v i=$[${CARD}+1] '{print $i}') \
python benchmarks/dynamo/"${SUITE}".py --"${SCENARIO}" --"${Real_DT}" -d "${DEVICE}" -n10 "${DT_extra}" "${Mode_extra}" \
"${Shape_extra}" "${partition_flags}" "${Model_only_extra}" --backend=inductor --cold-start-latency --timeout=10800 \
--output="${LOG_DIR}"/"${LOG_NAME}".csv 2>&1 | tee "${LOG_DIR}"/"${LOG_NAME}"_card"${CARD}".log
103 changes: 61 additions & 42 deletions .github/workflows/_linux_accelerate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ on:
runner:
required: true
type: string
default: 'linux.idc.xpu'
default: 'pvc_rolling'
description: Runner label
accelerate:
required: false
Expand All @@ -45,11 +45,15 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

defaults:
run:
shell: bash {0}

jobs:
conditions-filter:
name: conditions-filter
if: ${{ github.event.pull_request.draft == false }}
runs-on: ubuntu-22.04
runs-on: ubuntu-24.04
timeout-minutes: 10
env:
GH_TOKEN: ${{ github.token }}
Expand All @@ -66,22 +70,46 @@ jobs:
disabled_tests="$(awk '/disable_/{printf("%s ", $0)}' pr-info.txt)"
echo "disabled_tests=${disabled_tests}" |tee "${GITHUB_OUTPUT}"

Torch-XPU-Accelerate-Tests:
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }}
prepare:
runs-on: ${{ inputs.runner != '' && inputs.runner || 'pvc_rolling' }}
needs: conditions-filter
if: ${{ !(contains(needs.conditions-filter.outputs.disabled_tests, 'disable_all') || contains(needs.conditions-filter.outputs.disabled_tests, 'disable_accelerate')) }}
outputs:
runner_id: ${{ steps.runner-info.outputs.runner_id }}
user_id: ${{ steps.runner-info.outputs.user_id }}
render_id: ${{ steps.runner-info.outputs.render_id }}
hostname: ${{ steps.runner-info.outputs.hostname }}
ZE_AFFINITY_MASK: ${{ steps.runner-info.outputs.ZE_AFFINITY_MASK }}
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Get runner
id: runner-info
uses: ./.github/actions/get-runner

tests:
runs-on: ${{ needs.prepare.outputs.runner_id }}
needs: prepare
container:
image: mengfeili/intel-pvc-driver:1146-1136
volumes:
- ${{ github.workspace }}:${{ github.workspace }}
options: --device=/dev/mem --device=/dev/dri --group-add video --group-add ${{ needs.prepare.outputs.render_id }}
--security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g
-u ${{ needs.prepare.outputs.user_id }}
env:
ZE_AFFINITY_MASK: ${{ needs.prepare.outputs.ZE_AFFINITY_MASK }}
WORK_DIR: 'accelerate'
PYTORCH_DEBUG_XPU_FALLBACK: 1
HF_HUB_ETAG_TIMEOUT: 120
HF_HUB_DOWNLOAD_TIMEOUT: 120
PARSE_JUNIT: ${{ github.workspace }}/torch-xpu-ops/.github/scripts/parse-junitxml.py
AGENT_TOOLSDIRECTORY: /tmp/xpu-tool
PYTEST_ADDOPTS: -rsf --timeout 600 --timeout_method=thread -n 1
env:
WORK_DIR: 'accelerate'
NEOReadDebugKeys: 0
DisableScratchPages: 0
accelerate: ${{ inputs.accelerate != '' && inputs.accelerate || 'v1.6.0' }}
transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.51.3' }}
python: ${{ inputs.python != '' && inputs.python || '3.10' }}
PYTORCH_DEBUG_XPU_FALLBACK: 1
ZE_AFFINITY_MASK: 0
PARSE_JUNIT: ${{ github.workspace }}/torch-xpu-ops/.github/scripts/parse-junitxml.py
HF_HUB_ETAG_TIMEOUT: 120
HF_HUB_DOWNLOAD_TIMEOUT: 120
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
Expand All @@ -93,25 +121,22 @@ jobs:
repository: huggingface/accelerate
ref: ${{ env.accelerate }}
path: accelerate
- name: Create unique Conda ENV name
- name: Setup python-${{ env.python }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.python }}
- name: Check python
run: |
random=$(head /dev/urandom | tr -dc A-Za-z0-9_ | head -c ${1:-5} | xargs)
echo "CONDA_ENV_NAME=hf_accelerate_test_${ZE_AFFINITY_MASK}_${random}" >> $GITHUB_ENV
- name: Prepare Conda ENV
which python && python -V
which pip && pip list
pip install -U pip wheel setuptools
- name: Install pytorch and deps
run: |
echo "Using Conda ENV name: $CONDA_ENV_NAME"
conda remove --all -y -n $CONDA_ENV_NAME || rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME
conda create -y -n $CONDA_ENV_NAME python=${{ env.python }}
source activate $CONDA_ENV_NAME
pip install junitparser pytest-timeout
pip install junitparser
pip install transformers==${{ env.transformers }}
- name: Prepare Stock XPU Pytorch
run: |
source activate $CONDA_ENV_NAME
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
- name: Prepare Accelerate
run: |
source activate $CONDA_ENV_NAME
cd $WORK_DIR
pip install -e .
pip install -e ".[testing]"
Expand All @@ -120,7 +145,6 @@ jobs:
cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./
- name: Report installed versions
run: |
source activate $CONDA_ENV_NAME
echo "pip installed packages:"
pip list | tee ${{ github.workspace }}/$WORK_DIR/tests_log/pip_list.txt
echo "lspci gpu devices:"
Expand All @@ -131,16 +155,20 @@ jobs:
xpu-smi discovery -y --json --dump -1
- name: Sanity check installed packages
run: |
source activate $CONDA_ENV_NAME
# Use latest pytest
pip install -U pytest pytest-timeout pytest-xdist
# These checks are to exit earlier if for any reason torch
# packages were reinstalled back to CUDA versions (not expected).
pip show torch | grep Version | grep xpu
pip show torchaudio | grep Version | grep xpu
pip show torchvision | grep Version | grep xpu
python -c 'import torch; exit(not torch.xpu.is_available())'
- name: Run tests
printenv
- name: Run tests on ${{ needs.prepare.outputs.hostname }}
run: |
source activate $CONDA_ENV_NAME
# use 1 GPU only for tests
# which also can get the 1st GPU from 1 card only runner and some cards lost runner
export ZE_AFFINITY_MASK="$(echo ${ZE_AFFINITY_MASK} |sed 's/,.*//')"
cd $WORK_DIR && rm -rf reports && mkdir -p reports
# Excluding tests due to:
# * tests/test_examples.py::FeatureExamplesTests::test_profiler fails on
Expand All @@ -150,8 +178,7 @@ jobs:
# * tests/test_big_modeling.py::test_dispatch_model_tied_weights_memory_with_nested_offload_cpu fails
# with OOM. That's a new test added by https://github.com/huggingface/accelerate/pull/3445
pattern="not test_profiler and not test_gated and not test_dispatch_model_tied_weights_memory_with_nested_offload_cpu"
cmd=(python3 -m pytest --timeout 600 -rsf --junitxml=reports/accelerate.xml -k "$pattern" \
tests/)
cmd=(python -m pytest --junitxml=reports/accelerate.xml -k "$pattern" tests/)
{
echo "### Running"
echo "\`\`\`"
Expand All @@ -162,28 +189,20 @@ jobs:
- name: Print result tables
if: ${{ ! cancelled() }}
run: |
source activate $CONDA_ENV_NAME
cd $WORK_DIR
{
echo "### Results"
python3 $PARSE_JUNIT reports/accelerate.xml --stats
python $PARSE_JUNIT reports/accelerate.xml --stats
echo "### Failed"
python3 $PARSE_JUNIT reports/accelerate.xml --errors --failed
python $PARSE_JUNIT reports/accelerate.xml --errors --failed
echo "### Skipped"
python3 $PARSE_JUNIT reports/accelerate.xml --skipped
python $PARSE_JUNIT reports/accelerate.xml --skipped
} >> $GITHUB_STEP_SUMMARY
- name: Print environment
if: ${{ ! cancelled() }}
uses: ./torch-xpu-ops/.github/actions/print-environment
with:
conda: $CONDA_ENV_NAME
pip_packages: 'accelerate transformers'
- name: Clean up
if: ${{ always() }}
run: |
if [ -n "$CONDA_ENV_NAME" ]; then
conda remove --all -y -n $CONDA_ENV_NAME || rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME
fi
- name: Upload Test log
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
Expand Down
12 changes: 7 additions & 5 deletions .github/workflows/_linux_e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,9 @@ jobs:
hostname: ${{ steps.runner-info.outputs.hostname }}
xpu_num: ${{ steps.runner-info.outputs.xpu_num }}
cpus_per_xpu: ${{ steps.runner-info.outputs.cpus_per_xpu }}
ZE_AFFINITY_MASK: ${{ steps.runner-info.outputs.ZE_AFFINITY_MASK }}
numactl_args: ${{ steps.runner-info.outputs.numactl_args }}
steps:
- name: Cleanup workspace
run: |
sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Get runner
Expand All @@ -80,9 +79,12 @@ jobs:
image: mengfeili/intel-pvc-driver:1146-1136
volumes:
- ${{ github.workspace }}:${{ github.workspace }}
options: --device=/dev/mem --device=/dev/dri --group-add video --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g
-u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
options: --device=/dev/mem --device=/dev/dri --group-add video --group-add ${{ needs.runner.outputs.render_id }}
--security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g
-u ${{ needs.runner.outputs.user_id }}
env:
ZE_AFFINITY_MASK: ${{ needs.runner.outputs.ZE_AFFINITY_MASK }}
numactl_args: ${{ needs.runner.outputs.numactl_args }}
xpu_num: ${{ needs.runner.outputs.xpu_num }}
cpus_per_xpu: ${{ needs.runner.outputs.cpus_per_xpu }}
MODEL_ONLY_NAME: ${{ inputs.model }}
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/_linux_op_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,8 @@ jobs:
user_id: ${{ steps.runner-info.outputs.user_id }}
render_id: ${{ steps.runner-info.outputs.render_id }}
hostname: ${{ steps.runner-info.outputs.hostname }}
ZE_AFFINITY_MASK: ${{ steps.runner-info.outputs.ZE_AFFINITY_MASK }}
steps:
- name: Cleanup workspace
run: |
sudo find ./ |grep -v "^\./$" |xargs sudo rm -rf
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Get runner
Expand All @@ -57,9 +55,11 @@ jobs:
image: mengfeili/intel-pvc-driver:1146-1136
volumes:
- ${{ github.workspace }}:${{ github.workspace }}
options: --device=/dev/mem --device=/dev/dri --group-add video --security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g
-u ${{ needs.runner.outputs.user_id }}:${{ needs.runner.outputs.render_id }}
options: --device=/dev/mem --device=/dev/dri --group-add video --group-add ${{ needs.runner.outputs.render_id }}
--security-opt seccomp=unconfined --cap-add=SYS_PTRACE --shm-size=8g
-u ${{ needs.runner.outputs.user_id }}
env:
ZE_AFFINITY_MASK: ${{ needs.runner.outputs.ZE_AFFINITY_MASK }}
AGENT_TOOLSDIRECTORY: /opt/xpu-tool
steps:
- name: Checkout torch-xpu-ops
Expand Down
Loading
Loading