Skip to content

Commit eaed66d

Browse files
committed
Revert "Improving Hydra+DDP support (#11617)"
This reverts commit 45ca781.
1 parent 609b258 commit eaed66d

File tree

2 files changed

+35
-171
lines changed

2 files changed

+35
-171
lines changed

src/pytorch_lightning/strategies/launchers/subprocess_script.py

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import pytorch_lightning as pl
2323
from lightning_lite.plugins import ClusterEnvironment
2424
from lightning_lite.strategies.launchers.base import _Launcher
25-
from lightning_lite.strategies.launchers.subprocess_script import _basic_subprocess_cmd, _hydra_subprocess_cmd
2625

2726
_HYDRA_AVAILABLE = RequirementCache("hydra-core")
2827

@@ -100,6 +99,32 @@ def _call_children_scripts(self) -> None:
10099
# allow the user to pass the node rank
101100
os.environ["NODE_RANK"] = str(self.cluster_environment.node_rank())
102101
os.environ["LOCAL_RANK"] = str(self.cluster_environment.local_rank())
102+
103+
# Check if the current calling command looked like `python a/b/c.py` or `python -m a.b.c`
104+
# See https://docs.python.org/3/reference/import.html#main-spec
105+
if __main__.__spec__ is None: # pragma: no-cover
106+
# Script called as `python a/b/c.py`
107+
if _HYDRA_AVAILABLE:
108+
# when user is using hydra find the absolute path
109+
from hydra.utils import to_absolute_path
110+
111+
to_abs_path = to_absolute_path
112+
else:
113+
to_abs_path = os.path.abspath
114+
115+
# pull out the commands used to run the script and resolve the absolute file path
116+
command = sys.argv
117+
try:
118+
full_path = to_abs_path(command[0])
119+
except Exception:
120+
full_path = os.path.abspath(command[0])
121+
122+
command[0] = full_path
123+
# use the same python interpreter and actually running
124+
command = [sys.executable] + command
125+
else: # Script called as `python -m a.b.c`
126+
command = [sys.executable, "-m", __main__.__spec__.name] + sys.argv[1:]
127+
103128
os.environ["WORLD_SIZE"] = f"{self.num_processes * self.num_nodes}"
104129

105130
for local_rank in range(1, self.num_processes):
@@ -110,18 +135,18 @@ def _call_children_scripts(self) -> None:
110135
if os.environ.get("PL_GLOBAL_SEED") is None and "PL_GLOBAL_SEED" in env_copy:
111136
del env_copy["PL_GLOBAL_SEED"]
112137

113-
hydra_in_use = False
138+
# start process
139+
# if hydra is available and initialized, make sure to set the cwd correctly
140+
cwd: Optional[str] = None
114141
if _HYDRA_AVAILABLE:
115142
from hydra.core.hydra_config import HydraConfig
143+
from hydra.utils import get_original_cwd
116144

117-
hydra_in_use = HydraConfig.initialized()
118-
119-
if hydra_in_use:
120-
command = _hydra_subprocess_cmd(local_rank)
121-
else:
122-
command = _basic_subprocess_cmd()
123-
124-
subprocess.Popen(command, env=env_copy)
145+
if HydraConfig.initialized():
146+
cwd = get_original_cwd()
147+
os_cwd = f'"{os.getcwd()}"'
148+
command += [f"hydra.run.dir={os_cwd}", f"hydra.job.name=train_ddp_process_{local_rank}"]
149+
subprocess.Popen(command, env=env_copy, cwd=cwd)
125150

126151
# starting all processes at once can cause issues
127152
# with dataloaders delay between 1-10 seconds

tests/tests_pytorch/strategies/launchers/test_subprocess_script.py

Lines changed: 0 additions & 161 deletions
This file was deleted.

0 commit comments

Comments
 (0)