Skip to content

Commit edd2b42

Browse files
authored
Introduce {Work,Flow}.lightningignore (#15818)
1 parent 53ceb15 commit edd2b42

File tree

10 files changed

+213
-42
lines changed

10 files changed

+213
-42
lines changed

docs/source-app/workflows/run_app_on_cloud/cloud_files.rst

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ For example, the source code directory below with the ``.lightningignore`` file
3030
├── requirements.txt
3131
└── model.pt
3232
33-
3433
.. code:: bash
3534
3635
~/project/home ❯ cat .lightningignore
@@ -39,6 +38,18 @@ For example, the source code directory below with the ``.lightningignore`` file
3938
4039
A sample ``.lightningignore`` file can be found `here <https://github.com/Lightning-AI/lightning.beta/blob/master/.lightningignore>`_.
4140

41+
If you are a component author and your components creates local files that you want to ignore, you can do:
42+
43+
.. code-block:: python
44+
45+
class MyComponent(L.LightningWork): # or L.LightningFlow
46+
def __init__(self):
47+
super().__init__()
48+
self.lightningignore = ("model.pt", "data_dir")
49+
50+
51+
This has the benefit that the files will be ignored automatically for all the component users, making an easier
52+
transition between running locally vs in the cloud.
4253

4354
----
4455

src/lightning_app/CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
88

99
### Added
1010

11-
-
11+
- Added `Lightning{Flow,Work}.lightningignores` attributes to programmatically ignore files before uploading to the cloud ([#15818](https://github.com/Lightning-AI/lightning/pull/15818))
1212

1313

1414
### Changed

src/lightning_app/components/multi_node/trainer.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,6 @@ def __init__(
114114
cloud_compute=cloud_compute,
115115
**work_kwargs,
116116
)
117+
118+
# the Trainer enables TensorBoard by default, so this is often an undesired directory to upload to the cloud
119+
self.lightningignore += ("lightning_logs",)

src/lightning_app/core/flow.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,13 @@
1010
from lightning_app.frontend import Frontend
1111
from lightning_app.storage import Path
1212
from lightning_app.storage.drive import _maybe_create_drive, Drive
13-
from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, _set_child_name, is_overridden
13+
from lightning_app.utilities.app_helpers import (
14+
_is_json_serializable,
15+
_lightning_dispatched,
16+
_LightningAppRef,
17+
_set_child_name,
18+
is_overridden,
19+
)
1420
from lightning_app.utilities.component import _sanitize_state
1521
from lightning_app.utilities.exceptions import ExitAppException
1622
from lightning_app.utilities.introspection import _is_init_context, _is_run_context
@@ -104,6 +110,8 @@ def __init__(self):
104110
self._layout: Union[List[Dict], Dict] = {}
105111
self._paths = {}
106112
self._backend: Optional[Backend] = None
113+
# tuple instead of a list so that it cannot be modified without using the setter
114+
self._lightningignore: Tuple[str, ...] = tuple()
107115

108116
@property
109117
def name(self):
@@ -310,6 +318,20 @@ def flows(self) -> Dict[str, "LightningFlow"]:
310318
flows.update(getattr(self, struct_name).flows)
311319
return flows
312320

321+
@property
322+
def lightningignore(self) -> Tuple[str, ...]:
323+
"""Programmatic equivalent of the ``.lightningignore`` file."""
324+
return self._lightningignore
325+
326+
@lightningignore.setter
327+
def lightningignore(self, lightningignore: Tuple[str, ...]) -> None:
328+
if _lightning_dispatched():
329+
raise RuntimeError(
330+
f"Your app has been already dispatched, so modifying the `{self.name}.lightningignore` does not have an"
331+
" effect"
332+
)
333+
self._lightningignore = lightningignore
334+
313335
def works(self, recurse: bool = True) -> List[LightningWork]:
314336
"""Return its :class:`~lightning_app.core.work.LightningWork`."""
315337
works = [getattr(self, el) for el in sorted(self._works)]

src/lightning_app/core/work.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,20 @@
33
import warnings
44
from copy import deepcopy
55
from functools import partial, wraps
6-
from typing import Any, Callable, Dict, List, Optional, Type, TYPE_CHECKING, Union
6+
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TYPE_CHECKING, Union
77

88
from deepdiff import DeepHash, Delta
99

1010
from lightning_app.core.queues import BaseQueue
1111
from lightning_app.storage import Path
1212
from lightning_app.storage.drive import _maybe_create_drive, Drive
1313
from lightning_app.storage.payload import Payload
14-
from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, is_overridden
14+
from lightning_app.utilities.app_helpers import (
15+
_is_json_serializable,
16+
_lightning_dispatched,
17+
_LightningAppRef,
18+
is_overridden,
19+
)
1520
from lightning_app.utilities.component import _is_flow_context, _sanitize_state
1621
from lightning_app.utilities.enum import (
1722
CacheCallsKeys,
@@ -154,6 +159,8 @@ def __init__(
154159
self._local_build_config = local_build_config or BuildConfig()
155160
self._cloud_build_config = cloud_build_config or BuildConfig()
156161
self._cloud_compute = cloud_compute or CloudCompute()
162+
# tuple instead of a list so that it cannot be modified without using the setter
163+
self._lightningignore: Tuple[str, ...] = tuple()
157164
self._backend: Optional[Backend] = None
158165
self._check_run_is_implemented()
159166
self._on_init_end()
@@ -253,6 +260,20 @@ def cloud_compute(self, cloud_compute: CloudCompute) -> None:
253260
compute_store.remove(self.name)
254261
self._cloud_compute = cloud_compute
255262

263+
@property
264+
def lightningignore(self) -> Tuple[str, ...]:
265+
"""Programmatic equivalent of the ``.lightningignore`` file."""
266+
return self._lightningignore
267+
268+
@lightningignore.setter
269+
def lightningignore(self, lightningignore: Tuple[str, ...]) -> None:
270+
if _lightning_dispatched():
271+
raise RuntimeError(
272+
f"Your app has been already dispatched, so modifying the `{self.name}.lightningignore` does not have an"
273+
" effect"
274+
)
275+
self._lightningignore = lightningignore
276+
256277
@property
257278
def status(self) -> WorkStatus:
258279
"""Return the current status of the work.

src/lightning_app/runners/cloud.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sys
66
import time
77
from dataclasses import dataclass
8+
from functools import partial
89
from pathlib import Path
910
from textwrap import dedent
1011
from typing import Any, List, Optional, Union
@@ -62,6 +63,7 @@
6263
from lightning_app.runners.backends.cloud import CloudBackend
6364
from lightning_app.runners.runtime import Runtime
6465
from lightning_app.source_code import LocalSourceCodeDir
66+
from lightning_app.source_code.copytree import _filter_ignored, _parse_lightningignore
6567
from lightning_app.storage import Drive, Mount
6668
from lightning_app.utilities.app_helpers import _is_headless, Logger
6769
from lightning_app.utilities.cloud import _get_project
@@ -217,7 +219,19 @@ def dispatch(
217219
root = Path(self.entrypoint_file).absolute().parent
218220
cleanup_handle = _prepare_lightning_wheels_and_requirements(root)
219221
self.app._update_index_file()
220-
repo = LocalSourceCodeDir(path=root)
222+
223+
# gather and merge all lightningignores
224+
children = self.app.flows + self.app.works
225+
lightningignores = [c.lightningignore for c in children]
226+
if lightningignores:
227+
merged = sum(lightningignores, tuple())
228+
logger.debug(f"Found the following lightningignores: {merged}")
229+
patterns = _parse_lightningignore(merged)
230+
ignore_functions = [partial(_filter_ignored, root, patterns)]
231+
else:
232+
ignore_functions = None
233+
234+
repo = LocalSourceCodeDir(path=root, ignore_functions=ignore_functions)
221235
self._check_uploaded_folder(root, repo)
222236
requirements_file = root / "requirements.txt"
223237
# The entry point file needs to be relative to the root of the uploaded source file directory,
@@ -493,24 +507,34 @@ def _ensure_cluster_project_binding(self, project_id: str, cluster_id: str):
493507
@staticmethod
494508
def _check_uploaded_folder(root: Path, repo: LocalSourceCodeDir) -> None:
495509
"""This method is used to inform the users if their folder files are large and how to filter them."""
496-
lightning_tar = set(fnmatch.filter(repo.files, "*lightning-*.tar.gz"))
497-
app_folder_size = sum(Path(p).stat().st_size for p in repo.files if p not in lightning_tar)
498-
app_folder_size_in_mb = round(app_folder_size / (1000 * 1000), 5)
510+
excludes = set(fnmatch.filter(repo.files, "*lightning-*.tar.gz"))
511+
excludes.update(fnmatch.filter(repo.files, ".lightningignore"))
512+
files = [Path(f) for f in repo.files if f not in excludes]
513+
file_sizes = {f: f.stat().st_size for f in files}
514+
mb = 1000_000
515+
app_folder_size_in_mb = sum(file_sizes.values()) / mb
499516
if app_folder_size_in_mb > CLOUD_UPLOAD_WARNING:
500-
path_sizes = [(p, Path(p).stat().st_size / (1000 * 1000)) for p in repo.files]
501-
largest_paths = sorted((x for x in path_sizes if x[-1] > 0.01), key=lambda x: x[1], reverse=True)[:25]
502-
largest_paths_msg = "\n".join(f"{round(s, 5)} MB: {p}" for p, s in largest_paths)
517+
# filter out files under 0.01mb
518+
relevant_files = {f: sz for f, sz in file_sizes.items() if sz > 0.01 * mb}
519+
if relevant_files:
520+
by_largest = dict(sorted(relevant_files.items(), key=lambda x: x[1], reverse=True))
521+
by_largest = dict(list(by_largest.items())[:25]) # trim
522+
largest_paths_msg = "\n".join(
523+
f"{round(sz / mb, 5)} MB: {p.relative_to(root)}" for p, sz in by_largest.items()
524+
)
525+
largest_paths_msg = f"Here are the largest files:\n{largest_paths_msg}\n"
526+
else:
527+
largest_paths_msg = ""
503528
warning_msg = (
504529
f"Your application folder '{root.absolute()}' is more than {CLOUD_UPLOAD_WARNING} MB. "
505-
f"The total size is {app_folder_size_in_mb} MB\n"
506-
f"Here are the largest files: \n{largest_paths_msg}\n"
507-
"Perhaps you should try running the app in an empty directory."
530+
f"The total size is {round(app_folder_size_in_mb, 2)} MB. {len(files)} files were uploaded.\n"
531+
+ largest_paths_msg
532+
+ "Perhaps you should try running the app in an empty directory."
508533
)
509534
if not (root / DOT_IGNORE_FILENAME).is_file():
510-
warning_msg = (
511-
warning_msg
512-
+ "\nIn order to ignore some files or folder, "
513-
+ "create a `.lightningignore` file and add the paths to ignore."
535+
warning_msg += (
536+
"\nIn order to ignore some files or folder, create a `.lightningignore` file and add the paths to"
537+
" ignore. You can also set the `lightningingore` attribute in a Flow or Work."
514538
)
515539
else:
516540
warning_msg += "\nYou can ignore some files or folders by adding them to `.lightningignore`."

src/lightning_app/source_code/copytree.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,20 @@
33
from functools import partial
44
from pathlib import Path
55
from shutil import copy2, copystat, Error
6-
from typing import Callable, List, Set, Union
6+
from typing import Callable, List, Optional, Set, Union
77

88
from lightning_app.core.constants import DOT_IGNORE_FILENAME
99
from lightning_app.utilities.app_helpers import Logger
1010

1111
logger = Logger(__name__)
1212

13+
_IGNORE_FUNCTION = Callable[[Path, List[Path]], List[Path]]
14+
1315

1416
def _copytree(
1517
src: Union[Path, str],
1618
dst: Union[Path, str],
17-
ignore_functions: List[Callable] = None,
19+
ignore_functions: Optional[List[_IGNORE_FUNCTION]] = None,
1820
dirs_exist_ok=False,
1921
dry_run=False,
2022
) -> List[str]:

src/lightning_app/source_code/local.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from shutil import rmtree
55
from typing import List, Optional
66

7-
from lightning_app.source_code.copytree import _copytree
7+
from lightning_app.source_code.copytree import _copytree, _IGNORE_FUNCTION
88
from lightning_app.source_code.hashing import _get_hash
99
from lightning_app.source_code.tar import _tar_path
1010
from lightning_app.source_code.uploader import FileUploader
@@ -15,8 +15,9 @@ class LocalSourceCodeDir:
1515

1616
cache_location: Path = Path.home() / ".lightning" / "cache" / "repositories"
1717

18-
def __init__(self, path: Path):
18+
def __init__(self, path: Path, ignore_functions: Optional[List[_IGNORE_FUNCTION]] = None) -> None:
1919
self.path = path
20+
self.ignore_functions = ignore_functions
2021

2122
# cache checksum version
2223
self._version: Optional[str] = None
@@ -33,7 +34,7 @@ def __init__(self, path: Path):
3334
def files(self) -> List[str]:
3435
"""Returns a set of files that are not ignored by .lightningignore."""
3536
if self._non_ignored_files is None:
36-
self._non_ignored_files = _copytree(self.path, "", dry_run=True)
37+
self._non_ignored_files = _copytree(self.path, "", ignore_functions=self.ignore_functions, dry_run=True)
3738
return self._non_ignored_files
3839

3940
@property
@@ -59,7 +60,7 @@ def packaging_session(self) -> Path:
5960
session_path = self.cache_location / "packaging_sessions" / self.version
6061
try:
6162
rmtree(session_path, ignore_errors=True)
62-
_copytree(self.path, session_path)
63+
_copytree(self.path, session_path, ignore_functions=self.ignore_functions)
6364
yield session_path
6465
finally:
6566
rmtree(session_path, ignore_errors=True)

src/lightning_app/utilities/app_helpers.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,11 +511,15 @@ def is_static_method(klass_or_instance, attr) -> bool:
511511
return isinstance(inspect.getattr_static(klass_or_instance, attr), staticmethod)
512512

513513

514+
def _lightning_dispatched() -> bool:
515+
return bool(int(os.getenv("LIGHTNING_DISPATCHED", 0)))
516+
517+
514518
def _should_dispatch_app() -> bool:
515519
return (
516520
__debug__
517521
and "_pytest.doctest" not in sys.modules
518-
and not bool(int(os.getenv("LIGHTNING_DISPATCHED", "0")))
522+
and not _lightning_dispatched()
519523
and "LIGHTNING_APP_STATE_URL" not in os.environ
520524
)
521525

0 commit comments

Comments
 (0)