-
Notifications
You must be signed in to change notification settings - Fork 455
chore(mlobs): infer the name of the job from the submission ID #14540
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dubloom/ray-v0
Are you sure you want to change the base?
Changes from all commits
e571992
51690f6
7d56dc2
0ed6e6b
c336200
a595b3b
f16e29b
25e75b9
b4b51d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,15 +2,20 @@ | |
from inspect import Parameter | ||
from inspect import Signature | ||
import os | ||
import re | ||
from typing import Any | ||
from typing import Callable | ||
from typing import List | ||
from typing import Optional | ||
|
||
from ddtrace.propagation.http import _TraceContext | ||
import ray | ||
from ray.runtime_context import get_runtime_context | ||
|
||
|
||
JOB_NAME_REGEX = re.compile(r"^job\:([A-Za-z0-9_\.\-]+),run:([A-Za-z0-9_\.\-]+)$") | ||
|
||
|
||
def _inject_dd_trace_ctx_kwarg(method: Callable) -> Signature: | ||
old_sig = inspect.signature(method) | ||
if "_dd_trace_ctx" in old_sig.parameters: | ||
|
@@ -152,3 +157,22 @@ def check_cython(x): | |
|
||
# Check if function or method, respectively | ||
return check_cython(obj) or (hasattr(obj, "__func__") and check_cython(obj.__func__)) | ||
|
||
|
||
def get_dd_job_name(submission_id: Optional[str] = None): | ||
""" | ||
Get the job name from the submission id. | ||
If the submission id is not a valid job name, return the default job name. | ||
If the submission id is not set, return the default job name. | ||
""" | ||
job_name = os.environ.get("_RAY_JOB_NAME") | ||
if job_name: | ||
return job_name | ||
if submission_id is None: | ||
submission_id = os.environ.get("_RAY_SUBMISSION_ID") or "" | ||
match = JOB_NAME_REGEX.match(submission_id) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of extracting the first section of submission id, I would recommend asking users to send additional parameter in job metadata else default it to submission id. @kanwang thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @savitagm I agree and like your idea about passing the job id through the job metadata, the reason I didn't do it right away in the same PR is that from a quick look it didn't seem like Ray passes the job metadata through the runtime context into every span. So I was going to make that change in a separate follow-up PR. I think if there is no model name in the metadata, we should still default to the first part of the submission ID and not to the entire submission ID, so that similar jobs get grouped together. Let me know if you disagree with either of these. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with savita on that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dubloom @savitagm I have updated the code to support two ways of specifying the job name:
If neither one of these is specified (no job name in the metadata, and the submission ID is not in the |
||
if match: | ||
return match.group(1) | ||
elif submission_id: | ||
return submission_id | ||
return None |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import os | ||
|
||
from ddtrace.contrib.internal.ray.utils import get_dd_job_name | ||
|
||
|
||
def test_get_dd_job_name(): | ||
assert get_dd_job_name("job:frobnitzigate_idiosyncrasies,run:38") == "frobnitzigate_idiosyncrasies" | ||
assert get_dd_job_name("joe.schmoe-cf32445c3b2842958956ba6b6225ad") == "joe.schmoe-cf32445c3b2842958956ba6b6225ad" | ||
assert get_dd_job_name("mortar.clustering.pipeline") == "mortar.clustering.pipeline" | ||
os.environ["_RAY_JOB_NAME"] = "train.cool.model" | ||
assert get_dd_job_name("whatever") == "train.cool.model" | ||
del os.environ["_RAY_JOB_NAME"] | ||
assert get_dd_job_name() == "unspecified.ray.job" | ||
os.environ["_RAY_SUBMISSION_ID"] = "job:frobnitzigate_idiosyncrasies,run:38" | ||
assert get_dd_job_name() == "frobnitzigate_idiosyncrasies" | ||
os.environ["_RAY_SUBMISSION_ID"] = "whatever" | ||
assert get_dd_job_name() == "whatever" | ||
del os.environ["_RAY_SUBMISSION_ID"] | ||
assert get_dd_job_name() == "unspecified.ray.job" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we will eventually want to make
_RAY_JOB_NAME
and_RAY_SUBMISSION_ID
string constants instead of strings, but in the interest of minimizing merge conflicts, I am postponing that change until a future PR.