Skip to content

Commit cff811d

Browse files
authored
Merge branch 'master' into bugfix/strategy-type-validation
2 parents 19ed92d + a40f89d commit cff811d

File tree

76 files changed

+940
-561
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+940
-561
lines changed

.azure/gpu-tests-fabric.yml

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,21 @@ jobs:
4545
pool: lit-rtx-3090
4646
variables:
4747
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
48+
FREEZE_REQUIREMENTS: "1"
49+
COVERAGE_SCOPE: $( python -c 'n = "$(PACKAGE_NAME)" ; print(dict(fabric="lightning_fabric").get(n, n))' )
4850
container:
4951
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.7.1"
5052
# default shm size is 64m. Increase it to avoid:
5153
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
5254
options: "--gpus=all --shm-size=2gb"
53-
# TODO: package parametrization
55+
strategy:
56+
matrix:
57+
'pkg: Fabric':
58+
PACKAGE_NAME: "fabric"
59+
'pkg: Lightning':
60+
PACKAGE_NAME: "lightning"
5461
workspace:
5562
clean: all
56-
5763
steps:
5864
- bash: |
5965
echo "##vso[task.setvariable variable=CUDA_VISIBLE_DEVICES]$(DEVICES)"
@@ -62,6 +68,8 @@ jobs:
6268
displayName: 'set env. vars'
6369
6470
- bash: |
71+
echo $(DEVICES)
72+
echo $(COVERAGE_SCOPE)
6573
echo $CUDA_VISIBLE_DEVICES
6674
echo $TORCH_URL
6775
lspci | egrep 'VGA|3D'
@@ -80,11 +88,7 @@ jobs:
8088
done
8189
displayName: 'Adjust dependencies'
8290
83-
- bash: |
84-
pip install -e .[dev,strategies,examples] --find-links ${TORCH_URL}
85-
env:
86-
PACKAGE_NAME: "fabric"
87-
FREEZE_REQUIREMENTS: "1"
91+
- bash: pip install -e .[dev,strategies,examples] --find-links ${TORCH_URL}
8892
displayName: 'Install package & dependencies'
8993

9094
- bash: |
@@ -94,17 +98,26 @@ jobs:
9498
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'"
9599
displayName: 'Env details'
96100
101+
- bash: python -m pytest lightning_fabric
102+
workingDirectory: src
103+
condition: eq(variables['PACKAGE_NAME'], 'fabric')
104+
displayName: 'Testing: Fabric doctests'
105+
97106
- bash: |
98107
pip install -q -r .actions/requirements.txt
99108
python .actions/assistant.py copy_replace_imports --source_dir="./tests" \
100109
--source_import="lightning.fabric,lightning.pytorch" \
101110
--target_import="lightning_fabric,pytorch_lightning"
102-
displayName: 'Adjust tests'
111+
python .actions/assistant.py copy_replace_imports --source_dir="./examples" \
112+
--source_import="lightning.fabric,lightning.pytorch" \
113+
--target_import="lightning_fabric,pytorch_lightning"
114+
condition: eq(variables['PACKAGE_NAME'], 'fabric')
115+
displayName: 'Adjust tests & examples'
103116
104-
- bash: python -m coverage run --source lightning_fabric -m pytest --ignore benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
117+
- bash: python -m coverage run --source $(COVERAGE_SCOPE) -m pytest --ignore benchmarks -v --durations=50
118+
workingDirectory: tests/tests_fabric
105119
env:
106120
PL_RUN_CUDA_TESTS: "1"
107-
workingDirectory: tests/tests_fabric
108121
displayName: 'Testing: fabric standard'
109122
timeoutInMinutes: "10"
110123

@@ -113,28 +126,21 @@ jobs:
113126
env:
114127
PL_RUN_CUDA_TESTS: "1"
115128
PL_STANDALONE_TESTS_SOURCE: "lightning_fabric"
129+
condition: eq(variables['PACKAGE_NAME'], 'fabric')
116130
displayName: 'Testing: fabric standalone tests'
117131
timeoutInMinutes: "10"
118132

119133
- bash: |
120134
python -m coverage report
121135
python -m coverage xml
122136
python -m coverage html
123-
python -m codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure
137+
python -m codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
138+
--flags=gpu,pytest,$(COVERAGE_SCOPE) --name="GPU-coverage" --env=linux,azure
124139
ls -l
125140
workingDirectory: tests/tests_fabric
126141
displayName: 'Statistics'
127142
128-
- task: PublishTestResults@2
129-
displayName: 'Publish test results'
130-
inputs:
131-
testResultsFiles: '$(Build.StagingDirectory)/test-results.xml'
132-
testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
133-
condition: succeededOrFailed()
134-
135143
- script: |
136-
# In order to run the examples, we need to substitute the meta package imports with the standalone package
137-
python ../.actions/assistant.py copy_replace_imports --source_dir="./fabric" --source_import="lightning.fabric" --target_import="lightning_fabric.fabric"
138144
set -e
139145
bash run_fabric_examples.sh --accelerator=cuda --devices=1
140146
bash run_fabric_examples.sh --accelerator=cuda --devices=2 --strategy ddp

.azure/gpu-tests-pytorch.yml

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,29 +40,36 @@ pr:
4040

4141
jobs:
4242
- job: testing
43+
# how long to run the job before automatically cancelling
44+
timeoutInMinutes: "80"
45+
# how much time to give 'run always even if cancelled tasks' before stopping them
46+
cancelTimeoutInMinutes: "2"
4347
strategy:
4448
matrix:
4549
'PyTorch & strategies': # this uses torch 1.12 as not all strategies support 1.13 yet
4650
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12-cuda11.6.1"
4751
scope: "strategies"
48-
'PyTorch - latest':
52+
PACKAGE_NAME: "pytorch"
53+
'PyTorch | latest':
4954
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.7.1"
5055
scope: ""
51-
# how long to run the job before automatically cancelling
52-
timeoutInMinutes: "80"
53-
# how much time to give 'run always even if cancelled tasks' before stopping them
54-
cancelTimeoutInMinutes: "2"
56+
PACKAGE_NAME: "pytorch"
57+
'Lightning pkg':
58+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.7.1"
59+
scope: ""
60+
PACKAGE_NAME: "lightning"
5561
pool: lit-rtx-3090
5662
variables:
5763
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
64+
FREEZE_REQUIREMENTS: "1"
65+
COVERAGE_SCOPE: $( python -c 'n = "$(PACKAGE_NAME)" ; print(dict(pytorch="pytorch_lightning").get(n, n))' )
5866
container:
5967
image: $(image)
6068
# default shm size is 64m. Increase it to avoid:
6169
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
6270
options: "--gpus=all --shm-size=2gb"
6371
workspace:
6472
clean: all
65-
6673
steps:
6774

6875
- bash: |
@@ -75,6 +82,8 @@ jobs:
7582
displayName: 'set env. vars'
7683
7784
- bash: |
85+
echo $(DEVICES)
86+
echo $(COVERAGE_SCOPE)
7887
echo $CUDA_VISIBLE_DEVICES
7988
echo $CUDA_VERSION_MM
8089
echo $PYTORCH_VERSION
@@ -95,9 +104,6 @@ jobs:
95104
displayName: 'Adjust dependencies'
96105
97106
- bash: pip install -e .[extra,test,examples] --find-links ${TORCH_URL}
98-
env:
99-
PACKAGE_NAME: "pytorch"
100-
FREEZE_REQUIREMENTS: "1"
101107
displayName: 'Install package & extras'
102108

103109
- bash: pip uninstall -y -r requirements/pytorch/strategies.txt
@@ -132,19 +138,21 @@ jobs:
132138

133139
- bash: python -m pytest pytorch_lightning
134140
workingDirectory: src
141+
condition: eq(variables['PACKAGE_NAME'], 'pytorch')
135142
displayName: 'Testing: PyTorch doctests'
136143

137144
- bash: |
138145
pip install -q -r .actions/requirements.txt
139146
python .actions/assistant.py copy_replace_imports --source_dir="./tests" \
140147
--source_import="lightning.fabric,lightning.pytorch" \
141148
--target_import="lightning_fabric,pytorch_lightning"
149+
condition: eq(variables['PACKAGE_NAME'], 'pytorch')
142150
displayName: 'Adjust tests'
143151
144-
- bash: python -m coverage run --source pytorch_lightning -m pytest --ignore benchmarks -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=50
152+
- bash: python -m coverage run --source $(COVERAGE_SCOPE) -m pytest --ignore benchmarks -v --durations=50
153+
workingDirectory: tests/tests_pytorch
145154
env:
146155
PL_RUN_CUDA_TESTS: "1"
147-
workingDirectory: tests/tests_pytorch
148156
displayName: 'Testing: PyTorch standard'
149157
timeoutInMinutes: "35"
150158

@@ -155,6 +163,7 @@ jobs:
155163
PL_RUN_CUDA_TESTS: "1"
156164
PL_STANDALONE_TESTS_SOURCE: "pytorch_lightning"
157165
displayName: 'Testing: PyTorch standalone tests'
166+
condition: eq(variables['PACKAGE_NAME'], 'pytorch')
158167
timeoutInMinutes: "35"
159168

160169
- bash: bash run_standalone_tasks.sh
@@ -169,18 +178,12 @@ jobs:
169178
python -m coverage report
170179
python -m coverage xml
171180
python -m coverage html
172-
python -m codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure
181+
python -m codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
182+
--flags=gpu,pytest,$(COVERAGE_SCOPE) --name="GPU-coverage" --env=linux,azure
173183
ls -l
174184
workingDirectory: tests/tests_pytorch
175185
displayName: 'Statistics'
176186
177-
- task: PublishTestResults@2
178-
displayName: 'Publish test results'
179-
inputs:
180-
testResultsFiles: '$(Build.StagingDirectory)/test-results.xml'
181-
testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
182-
condition: succeededOrFailed()
183-
184187
- script: |
185188
set -e
186189
bash run_pl_examples.sh --trainer.accelerator=gpu --trainer.devices=1

.git-blame-ignore-revs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# copyright Lightning AI team (#16647)
2+
770b7929255389503e907350e2380ff449229816
3+
# [App] Add Missing Copyright (#16625)
4+
2bab2bac01694680b6c3e4f3a19d5bcd361fcaf4
5+
# adding license (#16450)
6+
e4c3441b25a8c194a873c8850e9507771de7053c
7+
# update copyright in PL & Fabric (#16481)
8+
98f7696d1681974d34fad59c03b4b58d9524ed13
9+
# add copyr (#6661)
10+
d471fa30b3bf95cfe601014bac544754067241ca
11+
# add copyright to tests (#5143)
12+
35401706bf0b89b07bc1748fdc2df612baa2be2a
13+
# added copyright notices (#3062)
14+
f43028f3ae5333b4ef0b08cc34f5560736381962
15+
# copyright (#2710)
16+
44d85c12191098b9bad40536375b29b154d91a47

.github/checkgroup.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,6 @@ subprojects:
3232
- "pl-cpu (windows-2022, lightning, 3.10, 1.12)"
3333
- "pl-cpu (windows-2022, lightning, 3.10, 1.13)"
3434
- "pl-cpu (windows-2022, lightning, 3.8, 1.11, oldest)"
35-
- "pl-cpu (slow, macOS-11, lightning, 3.8, 1.11)"
36-
- "pl-cpu (slow, ubuntu-20.04, lightning, 3.8, 1.11)"
37-
- "pl-cpu (slow, windows-2022, lightning, 3.8, 1.11)"
3835
- "pl-cpu (macOS-11, pytorch, 3.8, 1.13)"
3936
- "pl-cpu (ubuntu-20.04, pytorch, 3.8, 1.13)"
4037
- "pl-cpu (windows-2022, pytorch, 3.8, 1.13)"

.github/workflows/ci-examples-app.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,11 @@ jobs:
7575
- name: Install Lightning package & dependencies
7676
run: |
7777
# do not use `-e` because it will make both packages available since it adds `src` to `sys.path` automatically
78-
pip install .[dev] -U -f ${TORCH_URL} -f ".wheels/"
78+
pip install .[dev] -U -f ${TORCH_URL} -f ".wheels/" --prefer-binary
7979
pip list
8080
- name: Dump handy wheels
8181
if: github.event_name == 'push'
82+
continue-on-error: true
8283
uses: ./.github/actions/pip-wheels
8384
with:
8485
torch-url: ${{ env.TORCH_URL }}

.github/workflows/ci-tests-app.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,18 @@ jobs:
6868
path: .wheels
6969
key: pip_wheels # to not being cleand with crone
7070
enableCrossOsArchive: true
71+
- run: ls -lh .wheels/
7172

7273
- name: Switch PyTorch URL
7374
run: python -c "print('TORCH_URL=https://download.pytorch.org/whl/' + str('test/cpu/torch_test.html' if '${{matrix.release}}' == 'pre' else 'cpu/torch_stable.html'))" >> $GITHUB_ENV
7475

7576
- name: Install package & dependencies
7677
run: |
77-
pip install -e .[dev] -U -f ${TORCH_URL} -f ".wheels/"
78+
pip install -e .[dev] -U -f ${TORCH_URL} -f ".wheels/" --prefer-binary
7879
pip list
7980
- name: Dump handy wheels
8081
if: github.event_name == 'push'
82+
continue-on-error: true
8183
uses: ./.github/actions/pip-wheels
8284
with:
8385
torch-url: ${{ env.TORCH_URL }}

.github/workflows/ci-tests-fabric.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,17 +92,19 @@ jobs:
9292
path: .wheels
9393
key: pip_wheels # to not being cleand with crone
9494
enableCrossOsArchive: true
95+
- run: ls -lh .wheels/
9596

9697
- name: Switch PyTorch URL
9798
run: python -c "print('TORCH_URL=https://download.pytorch.org/whl/' + str('test/cpu/torch_test.html' if '${{matrix.release}}' == 'pre' else 'cpu/torch_stable.html'))" >> $GITHUB_ENV
9899

99100
- name: Install package & dependencies
100101
run: |
101-
pip install -e .[test] "pytest-timeout" -U -f ${TORCH_URL} -f ".wheels/"
102-
pip install -r requirements/fabric/strategies.txt -f ".wheels/"
102+
pip install -e .[test] "pytest-timeout" -U -f ${TORCH_URL} -f ".wheels/" --prefer-binary
103+
pip install -r requirements/fabric/strategies.txt -f ".wheels/" --prefer-binary
103104
pip list
104105
- name: Dump handy wheels
105106
if: github.event_name == 'push'
107+
continue-on-error: true
106108
uses: ./.github/actions/pip-wheels
107109
with:
108110
torch-url: ${{ env.TORCH_URL }}

.github/workflows/ci-tests-pytorch.yml

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,6 @@ jobs:
5757
- {os: "macOS-11", pkg-name: "lightning", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"}
5858
- {os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"}
5959
- {os: "windows-2022", pkg-name: "lightning", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"}
60-
# run test under SLOW label
61-
- {type: "slow", os: "macOS-11", pkg-name: "lightning", python-version: "3.8", pytorch-version: "1.11"}
62-
- {type: "slow", os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.8", pytorch-version: "1.11"}
63-
- {type: "slow", os: "windows-2022", pkg-name: "lightning", python-version: "3.8", pytorch-version: "1.11"}
6460
# "pytorch" installs the standalone package
6561
- {os: "macOS-11", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.13"}
6662
- {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.13"}
@@ -88,7 +84,6 @@ jobs:
8884
python .actions/assistant.py replace_oldest_ver
8985
9086
- name: Pull legacy checkpoints
91-
if: ${{ matrix.type != 'slow' }}
9287
run: bash .actions/pull_legacy_checkpoints.sh
9388

9489
- name: Adjust PyTorch versions in requirements files
@@ -105,24 +100,22 @@ jobs:
105100
path: .wheels
106101
key: pip_wheels # to not being cleand with crone
107102
enableCrossOsArchive: true
103+
- run: ls -lh .wheels/
108104

109105
- name: Env. variables
110106
run: |
111107
# Switch PyTorch URL
112108
python -c "print('TORCH_URL=https://download.pytorch.org/whl/' + str('test/cpu/torch_test.html' if '${{matrix.release}}' == 'pre' else 'cpu/torch_stable.html'))" >> $GITHUB_ENV
113109
# Switch coverage scope
114110
python -c "print('COVERAGE_SCOPE=' + str('lightning' if '${{matrix.pkg-name}}' == 'lightning' else 'pytorch_lightning'))" >> $GITHUB_ENV
115-
# Whether to run slow tests
116-
python -c "print('PL_RUN_SLOW_TESTS=' + str(int('${{ matrix.type }}' == 'slow')))" >> $GITHUB_ENV
117-
# Decide timeout
118-
python -c "print('TEST_TIMEOUT=' + str(120 if '${{ matrix.type }}' == 'slow' else 60))" >> $GITHUB_ENV
119111
120112
- name: Install package & dependencies
121113
run: |
122-
pip install -e .[extra,test] "pytest-timeout" --upgrade -f ${TORCH_URL} -f ".wheels/"
114+
pip install -e .[extra,test] "pytest-timeout" -U -f ${TORCH_URL} -f ".wheels/" --prefer-binary
123115
pip list
124116
- name: Dump handy wheels
125117
if: github.event_name == 'push'
118+
continue-on-error: true
126119
uses: ./.github/actions/pip-wheels
127120
with:
128121
torch-url: ${{ env.TORCH_URL }}
@@ -155,9 +148,7 @@ jobs:
155148
# NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003
156149
run: |
157150
python -m coverage run --source ${{ env.COVERAGE_SCOPE }} \
158-
-m pytest . -v \
159-
--timeout=${TEST_TIMEOUT} --durations=50 \
160-
--reruns 3 --reruns-delay 1
151+
-m pytest . -v --timeout=60 --durations=50 --reruns 3 --reruns-delay 1
161152
162153
- name: Statistics
163154
if: success()

docs/source-pytorch/api_references.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,6 @@ utilities
254254
data
255255
deepspeed
256256
distributed
257-
finite_checks
258257
memory
259258
model_summary
260259
parsing

docs/source-pytorch/common/progress_bar.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ The :class:`~pytorch_lightning.callbacks.TQDMProgressBar` uses the `tqdm <https:
2222
It prints to ``stdout`` and shows up to four different bars:
2323

2424
- **sanity check progress:** the progress during the sanity check run
25-
- **main progress:** shows training + validation progress combined. It also accounts for multiple validation runs during training when :paramref:`~pytorch_lightning.trainer.trainer.Trainer.val_check_interval` is used.
25+
- **train progress:** shows the training progress. It will pause if validation starts and will resume when it ends, and also accounts for multiple validation runs during training when :paramref:`~pytorch_lightning.trainer.trainer.Trainer.val_check_interval` is used.
2626
- **validation progress:** only visible during validation; shows total progress over all validation datasets.
2727
- **test progress:** only active when testing; shows total progress over all test datasets.
2828

0 commit comments

Comments
 (0)