Skip to content

Commit db8e628

Browse files
authored
Allow secrets redact function to have different redaction than *** (#53977)
* Allow secrets redact function to have different redaction than `***` For logs, using `***` is fine, but as part of the changes introduced in #53943 we decided it might be nice to use an even-less-frequently-appearing thing than `***` so we can detect modified secrets. This gives us the ability to do that at the redaction layer * Deal with OpenLineage subclassing SecretsMasker class
1 parent f5a88d9 commit db8e628

File tree

4 files changed

+117
-26
lines changed

4 files changed

+117
-26
lines changed

devel-common/src/tests_common/pytest_plugin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1826,7 +1826,7 @@ def _disable_redact(request: pytest.FixtureRequest, mocker):
18261826
)
18271827

18281828
mocked_redact = mocker.patch(target)
1829-
mocked_redact.side_effect = lambda item, name=None, max_depth=None: item
1829+
mocked_redact.side_effect = lambda item, *args, **kwargs: item
18301830
with pytest.MonkeyPatch.context() as mp_ctx:
18311831
mp_ctx.setattr(settings, "MASK_SECRETS_IN_LOGS", False)
18321832
yield

providers/openlineage/src/airflow/providers/openlineage/utils/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,7 @@ def from_masker(cls, other: SecretsMasker) -> OpenLineageRedactor:
830830
instance.replacer = other.replacer
831831
return instance
832832

833-
def _redact(self, item: Redactable, name: str | None, depth: int, max_depth: int) -> Redacted:
833+
def _redact(self, item: Redactable, name: str | None, depth: int, max_depth: int, **kwargs) -> Redacted: # type: ignore[override]
834834
if AIRFLOW_V_3_0_PLUS:
835835
# Keep compatibility for Airflow 2.x, remove when Airflow 3.0 is the minimum version
836836
class AirflowContextDeprecationWarning(UserWarning):
@@ -886,7 +886,7 @@ class AirflowContextDeprecationWarning(UserWarning):
886886
),
887887
)
888888
return item
889-
return super()._redact(item, name, depth, max_depth)
889+
return super()._redact(item, name, depth, max_depth, **kwargs)
890890
except Exception as exc:
891891
log.warning("Unable to redact %r. Error was: %s: %s", item, type(exc).__name__, exc)
892892
return item

task-sdk/src/airflow/sdk/execution_time/secrets_masker.py

Lines changed: 93 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
import collections.abc
2222
import contextlib
23+
import functools
24+
import inspect
2325
import logging
2426
import re
2527
import sys
@@ -118,9 +120,11 @@ def mask_secret(secret: str | dict | Iterable, name: str | None = None) -> None:
118120
_secrets_masker().add_mask(secret, name)
119121

120122

121-
def redact(value: Redactable, name: str | None = None, max_depth: int | None = None) -> Redacted:
122-
"""Redact any secrets found in ``value``."""
123-
return _secrets_masker().redact(value, name, max_depth)
123+
def redact(
124+
value: Redactable, name: str | None = None, max_depth: int | None = None, replacement: str = "***"
125+
) -> Redacted:
126+
"""Redact any secrets found in ``value`` with the given replacement."""
127+
return _secrets_masker().redact(value, name, max_depth, replacement=replacement)
124128

125129

126130
@overload
@@ -198,6 +202,29 @@ def __init__(self):
198202
super().__init__()
199203
self.patterns = set()
200204

205+
@classmethod
206+
def __init_subclass__(cls, **kwargs):
207+
super().__init_subclass__(**kwargs)
208+
209+
if cls._redact is not SecretsMasker._redact:
210+
sig = inspect.signature(cls._redact)
211+
# Compat for older versions of the OpenLineage plugin which subclasses this -- call the method
212+
# without the replacement character
213+
for param in sig.parameters.values():
214+
if param.name == "replacement" or param.kind == param.VAR_KEYWORD:
215+
break
216+
else:
217+
# Block only runs if no break above.
218+
219+
f = cls._redact
220+
221+
@functools.wraps(f)
222+
def _redact(*args, replacement: str = "***", **kwargs):
223+
return f(*args, **kwargs)
224+
225+
cls._redact = _redact
226+
...
227+
201228
@cached_property
202229
def _record_attrs_to_ignore(self) -> Iterable[str]:
203230
# Doing log.info(..., extra={'foo': 2}) sets extra properties on
@@ -251,59 +278,85 @@ def filter(self, record) -> bool:
251278

252279
# Default on `max_depth` is to support versions of the OpenLineage plugin (not the provider) which called
253280
# this function directly. New versions of that provider, and this class itself call it with a value
254-
def _redact_all(self, item: Redactable, depth: int, max_depth: int = MAX_RECURSION_DEPTH) -> Redacted:
281+
def _redact_all(
282+
self,
283+
item: Redactable,
284+
depth: int,
285+
max_depth: int = MAX_RECURSION_DEPTH,
286+
*,
287+
replacement: str = "***",
288+
) -> Redacted:
255289
if depth > max_depth or isinstance(item, str):
256-
return "***"
290+
return replacement
257291
if isinstance(item, dict):
258292
return {
259-
dict_key: self._redact_all(subval, depth + 1, max_depth) for dict_key, subval in item.items()
293+
dict_key: self._redact_all(subval, depth + 1, max_depth, replacement=replacement)
294+
for dict_key, subval in item.items()
260295
}
261296
if isinstance(item, (tuple, set)):
262297
# Turn set in to tuple!
263-
return tuple(self._redact_all(subval, depth + 1, max_depth) for subval in item)
298+
return tuple(
299+
self._redact_all(subval, depth + 1, max_depth, replacement=replacement) for subval in item
300+
)
264301
if isinstance(item, list):
265-
return list(self._redact_all(subval, depth + 1, max_depth) for subval in item)
302+
return list(
303+
self._redact_all(subval, depth + 1, max_depth, replacement=replacement) for subval in item
304+
)
266305
return item
267306

268-
def _redact(self, item: Redactable, name: str | None, depth: int, max_depth: int) -> Redacted:
307+
def _redact(
308+
self, item: Redactable, name: str | None, depth: int, max_depth: int, replacement: str = "***"
309+
) -> Redacted:
269310
# Avoid spending too much effort on redacting on deeply nested
270311
# structures. This also avoid infinite recursion if a structure has
271312
# reference to self.
272313
if depth > max_depth:
273314
return item
274315
try:
275316
if name and should_hide_value_for_key(name):
276-
return self._redact_all(item, depth, max_depth)
317+
return self._redact_all(item, depth, max_depth, replacement=replacement)
277318
if isinstance(item, dict):
278319
to_return = {
279-
dict_key: self._redact(subval, name=dict_key, depth=(depth + 1), max_depth=max_depth)
320+
dict_key: self._redact(
321+
subval, name=dict_key, depth=(depth + 1), max_depth=max_depth, replacement=replacement
322+
)
280323
for dict_key, subval in item.items()
281324
}
282325
return to_return
283326
if isinstance(item, Enum):
284-
return self._redact(item=item.value, name=name, depth=depth, max_depth=max_depth)
327+
return self._redact(
328+
item=item.value, name=name, depth=depth, max_depth=max_depth, replacement=replacement
329+
)
285330
if _is_v1_env_var(item):
286331
tmp = item.to_dict()
287332
if should_hide_value_for_key(tmp.get("name", "")) and "value" in tmp:
288-
tmp["value"] = "***"
333+
tmp["value"] = replacement
289334
else:
290-
return self._redact(item=tmp, name=name, depth=depth, max_depth=max_depth)
335+
return self._redact(
336+
item=tmp, name=name, depth=depth, max_depth=max_depth, replacement=replacement
337+
)
291338
return tmp
292339
if isinstance(item, str):
293340
if self.replacer:
294341
# We can't replace specific values, but the key-based redacting
295342
# can still happen, so we can't short-circuit, we need to walk
296343
# the structure.
297-
return self.replacer.sub("***", str(item))
344+
return self.replacer.sub(replacement, str(item))
298345
return item
299346
if isinstance(item, (tuple, set)):
300347
# Turn set in to tuple!
301348
return tuple(
302-
self._redact(subval, name=None, depth=(depth + 1), max_depth=max_depth) for subval in item
349+
self._redact(
350+
subval, name=None, depth=(depth + 1), max_depth=max_depth, replacement=replacement
351+
)
352+
for subval in item
303353
)
304354
if isinstance(item, list):
305355
return [
306-
self._redact(subval, name=None, depth=(depth + 1), max_depth=max_depth) for subval in item
356+
self._redact(
357+
subval, name=None, depth=(depth + 1), max_depth=max_depth, replacement=replacement
358+
)
359+
for subval in item
307360
]
308361
return item
309362
# I think this should never happen, but it does not hurt to leave it just in case
@@ -325,10 +378,12 @@ def _merge(
325378
self,
326379
new_item: Redacted,
327380
old_item: Redactable,
381+
*,
328382
name: str | None,
329383
depth: int,
330384
max_depth: int,
331385
force_sensitive: bool = False,
386+
replacement: str,
332387
) -> Redacted:
333388
"""Merge a redacted item with its original unredacted counterpart."""
334389
if depth > max_depth:
@@ -353,6 +408,7 @@ def _merge(
353408
depth=depth + 1,
354409
max_depth=max_depth,
355410
force_sensitive=is_sensitive,
411+
replacement=replacement,
356412
)
357413
else:
358414
merged[key] = new_item[key]
@@ -374,6 +430,7 @@ def _merge(
374430
depth=depth + 1,
375431
max_depth=max_depth,
376432
force_sensitive=is_sensitive,
433+
replacement=replacement,
377434
)
378435
)
379436
else:
@@ -398,25 +455,38 @@ def _merge(
398455
except (TypeError, AttributeError, ValueError):
399456
return new_item
400457

401-
def redact(self, item: Redactable, name: str | None = None, max_depth: int | None = None) -> Redacted:
458+
def redact(
459+
self,
460+
item: Redactable,
461+
name: str | None = None,
462+
max_depth: int | None = None,
463+
replacement: str = "***",
464+
) -> Redacted:
402465
"""
403466
Redact an any secrets found in ``item``, if it is a string.
404467
405468
If ``name`` is given, and it's a "sensitive" name (see
406469
:func:`should_hide_value_for_key`) then all string values in the item
407470
is redacted.
408471
"""
409-
return self._redact(item, name, depth=0, max_depth=max_depth or self.MAX_RECURSION_DEPTH)
472+
return self._redact(
473+
item, name, depth=0, max_depth=max_depth or self.MAX_RECURSION_DEPTH, replacement=replacement
474+
)
410475

411476
def merge(
412-
self, new_item: Redacted, old_item: Redactable, name: str | None = None, max_depth: int | None = None
477+
self,
478+
new_item: Redacted,
479+
old_item: Redactable,
480+
name: str | None = None,
481+
max_depth: int | None = None,
482+
replacement: str = "***",
413483
) -> Redacted:
414484
"""
415485
Merge a redacted item with its original unredacted counterpart.
416486
417487
Takes a user-modified redacted item and merges it with the original unredacted item.
418-
For sensitive fields that still contain "***" (unchanged), the original value is restored.
419-
For fields that have been updated, the new value is preserved.
488+
For sensitive fields that still contain "***" (or whatever the ``replacement`` is specified as), the
489+
original value is restored. For fields that have been updated, the new value is preserved.
420490
"""
421491
return self._merge(
422492
new_item,
@@ -425,6 +495,7 @@ def merge(
425495
depth=0,
426496
max_depth=max_depth or self.MAX_RECURSION_DEPTH,
427497
force_sensitive=False,
498+
replacement=replacement,
428499
)
429500

430501
@cached_property

task-sdk/tests/task_sdk/definitions/test_secrets_masker.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,26 @@ def test_redact(self, patterns, name, value, expected):
287287

288288
assert filt.redact(value, name) == expected
289289

290+
@pytest.mark.parametrize(
291+
("name", "value", "expected"),
292+
[
293+
("api_key", "pass", "*️⃣*️⃣*️⃣"),
294+
("api_key", ("pass",), ("*️⃣*️⃣*️⃣",)),
295+
(None, {"data": {"secret": "secret"}}, {"data": {"secret": "*️⃣*️⃣*️⃣"}}),
296+
# Non string dict keys
297+
(None, {1: {"secret": "secret"}}, {1: {"secret": "*️⃣*️⃣*️⃣"}}),
298+
(
299+
"api_key",
300+
{"other": "innoent", "nested": ["x", "y"]},
301+
{"other": "*️⃣*️⃣*️⃣", "nested": ["*️⃣*️⃣*️⃣", "*️⃣*️⃣*️⃣"]},
302+
),
303+
],
304+
)
305+
def test_redact_replacement(self, name, value, expected):
306+
filt = SecretsMasker()
307+
308+
assert filt.redact(value, name, replacement="*️⃣*️⃣*️⃣") == expected
309+
290310
def test_redact_filehandles(self, caplog):
291311
filt = SecretsMasker()
292312
with open("/dev/null", "w") as handle:
@@ -699,7 +719,7 @@ def test_redact_all_directly(self):
699719
"nested": {"tuple": ("a", "b", "c"), "set": {"x", "y", "z"}},
700720
}
701721

702-
result = secrets_masker._redact_all(test_data, depth=0)
722+
result = secrets_masker._redact_all(test_data, depth=0, replacement="***")
703723

704724
assert result["string"] == "***"
705725
assert result["number"] == 12345

0 commit comments

Comments
 (0)