Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/pytorch_lightning/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
## [unReleased] - 2022-MM-DD


- Added an option to configure the signal SLURM sends when a job is preempted or requeued ([#14610](https://github.com/Lightning-AI/lightning/issues/14610))


### Added


Expand Down Expand Up @@ -40,6 +37,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Added `WandbLogger.download_artifact` and `WandbLogger.use_artifact` for managing artifacts with Weights and Biases ([#14551](https://github.com/Lightning-AI/lightning/issues/14551))


- Added an option to configure the signal SLURM sends when a job is preempted or requeued ([#14610](https://github.com/Lightning-AI/lightning/issues/14610))


### Changed

- The `Trainer.{fit,validate,test,predict,tune}` methods now raise a useful error message if the input is not a `LightningModule` ([#13892](https://github.com/Lightning-AI/lightning/pull/13892))
Expand Down Expand Up @@ -186,6 +186,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Fixed torchscript error with ensembles of LightningModules ([#14657](https://github.com/Lightning-AI/lightning/pull/14657), [#14724](https://github.com/Lightning-AI/lightning/pull/14724))


- Fixed an issue with `TensorBoardLogger.finalize` creating a new experiment when none was created during the Trainer's execution ([#14762](https://github.com/Lightning-AI/lightning/pull/14762))



## [1.7.6] - 2022-09-13

### Changed
Expand Down
5 changes: 3 additions & 2 deletions src/pytorch_lightning/loggers/tensorboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,9 @@ def save(self) -> None:

@rank_zero_only
def finalize(self, status: str) -> None:
self.experiment.flush()
self.experiment.close()
if self._experiment is not None:
self.experiment.flush()
self.experiment.close()
self.save()

@property
Expand Down
14 changes: 10 additions & 4 deletions tests/tests_pytorch/checkpointing/test_model_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,18 +869,23 @@ def validation_step(self, batch, batch_idx):
"limit_test_batches": 2,
"enable_progress_bar": False,
"enable_model_summary": False,
"log_every_n_steps": 1,
"default_root_dir": tmpdir,
}
trainer = Trainer(**trainer_kwargs, callbacks=[checkpoint_callback])
trainer.fit(model)
assert os.listdir(tmpdir) == ["epoch=00.ckpt"]
assert set(os.listdir(tmpdir)) == {"epoch=00.ckpt", "lightning_logs"}

for idx in range(4):
# load from checkpoint
trainer = pl.Trainer(**trainer_kwargs, default_root_dir=tmpdir)
trainer = Trainer(**trainer_kwargs)
trainer.fit(model, ckpt_path=checkpoint_callback.best_model_path)
trainer.test(ckpt_path=checkpoint_callback.best_model_path, verbose=False)

assert set(os.listdir(tmpdir)) == {"epoch=00.ckpt", "lightning_logs"}
assert set(os.listdir(tmpdir / "lightning_logs")) == {f"version_{i}" for i in range(4)}

# no new versions created after the initial fit, because the ones that resume from ckpt do not log anything
assert set(os.listdir(tmpdir / "lightning_logs")) == {"version_0"}


def test_checkpoint_repeated_strategy_extended(tmpdir):
Expand All @@ -891,6 +896,7 @@ class ExtendedBoringModel(BoringModel):
def validation_step(self, batch, batch_idx):
output = self.layer(batch)
loss = self.loss(batch, output)
self.log("val_loss", loss)
return {"val_loss": loss}

def validation_epoch_end(self, *_):
Expand Down Expand Up @@ -930,7 +936,7 @@ def assert_checkpoint_log_dir(idx):
limit_test_batches=4,
callbacks=[checkpoint_cb],
)
trainer = pl.Trainer(**trainer_config)
trainer = Trainer(**trainer_config)
assert_trainer_init(trainer)

model = ExtendedBoringModel()
Expand Down
10 changes: 10 additions & 0 deletions tests/tests_pytorch/loggers/test_tensorboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,17 @@ def training_step(self, *args):
def test_tensorboard_finalize(summary_writer, tmpdir):
"""Test that the SummaryWriter closes in finalize."""
logger = TensorBoardLogger(save_dir=tmpdir)
assert logger._experiment is None
logger.finalize("any")

# no log calls, no experiment created -> nothing to flush
summary_writer.assert_not_called()

logger = TensorBoardLogger(save_dir=tmpdir)
logger.log_metrics({"flush_me": 11.1}) # trigger creation of an experiment
logger.finalize("any")

# finalize flushes to experiment directory
summary_writer().flush.assert_called()
summary_writer().close.assert_called()

Expand Down