Skip to content

Commit 8ee0ebb

Browse files
committed
Allow secrets redact function to have different redaction than ***
For logs, using `***` is fine, but as part of the changes introduced in apache#53943 we decided it might be nice to use an even-less-frequently-appearing thing than `***` so we can detect modified secrets. This gives us the ability to do that at the redaction layer
1 parent bcd294d commit 8ee0ebb

File tree

3 files changed

+90
-24
lines changed

3 files changed

+90
-24
lines changed

devel-common/src/tests_common/pytest_plugin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1826,7 +1826,7 @@ def _disable_redact(request: pytest.FixtureRequest, mocker):
18261826
)
18271827

18281828
mocked_redact = mocker.patch(target)
1829-
mocked_redact.side_effect = lambda item, name=None, max_depth=None: item
1829+
mocked_redact.side_effect = lambda item, *args, **kwargs: item
18301830
with pytest.MonkeyPatch.context() as mp_ctx:
18311831
mp_ctx.setattr(settings, "MASK_SECRETS_IN_LOGS", False)
18321832
yield

task-sdk/src/airflow/sdk/execution_time/secrets_masker.py

Lines changed: 68 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,11 @@ def mask_secret(secret: str | dict | Iterable, name: str | None = None) -> None:
111111
_secrets_masker().add_mask(secret, name)
112112

113113

114-
def redact(value: Redactable, name: str | None = None, max_depth: int | None = None) -> Redacted:
115-
"""Redact any secrets found in ``value``."""
116-
return _secrets_masker().redact(value, name, max_depth)
114+
def redact(
115+
value: Redactable, name: str | None = None, max_depth: int | None = None, replacement: str = "***"
116+
) -> Redacted:
117+
"""Redact any secrets found in ``value`` with the given replacement."""
118+
return _secrets_masker().redact(value, name, max_depth, replacement=replacement)
117119

118120

119121
@overload
@@ -244,59 +246,85 @@ def filter(self, record) -> bool:
244246

245247
# Default on `max_depth` is to support versions of the OpenLineage plugin (not the provider) which called
246248
# this function directly. New versions of that provider, and this class itself call it with a value
247-
def _redact_all(self, item: Redactable, depth: int, max_depth: int = MAX_RECURSION_DEPTH) -> Redacted:
249+
def _redact_all(
250+
self,
251+
item: Redactable,
252+
depth: int,
253+
max_depth: int = MAX_RECURSION_DEPTH,
254+
*,
255+
replacement: str = "***",
256+
) -> Redacted:
248257
if depth > max_depth or isinstance(item, str):
249-
return "***"
258+
return replacement
250259
if isinstance(item, dict):
251260
return {
252-
dict_key: self._redact_all(subval, depth + 1, max_depth) for dict_key, subval in item.items()
261+
dict_key: self._redact_all(subval, depth + 1, max_depth, replacement=replacement)
262+
for dict_key, subval in item.items()
253263
}
254264
if isinstance(item, (tuple, set)):
255265
# Turn set in to tuple!
256-
return tuple(self._redact_all(subval, depth + 1, max_depth) for subval in item)
266+
return tuple(
267+
self._redact_all(subval, depth + 1, max_depth, replacement=replacement) for subval in item
268+
)
257269
if isinstance(item, list):
258-
return list(self._redact_all(subval, depth + 1, max_depth) for subval in item)
270+
return list(
271+
self._redact_all(subval, depth + 1, max_depth, replacement=replacement) for subval in item
272+
)
259273
return item
260274

261-
def _redact(self, item: Redactable, name: str | None, depth: int, max_depth: int) -> Redacted:
275+
def _redact(
276+
self, item: Redactable, name: str | None, depth: int, max_depth: int, replacement: str = "***"
277+
) -> Redacted:
262278
# Avoid spending too much effort on redacting on deeply nested
263279
# structures. This also avoid infinite recursion if a structure has
264280
# reference to self.
265281
if depth > max_depth:
266282
return item
267283
try:
268284
if name and should_hide_value_for_key(name):
269-
return self._redact_all(item, depth, max_depth)
285+
return self._redact_all(item, depth, max_depth, replacement=replacement)
270286
if isinstance(item, dict):
271287
to_return = {
272-
dict_key: self._redact(subval, name=dict_key, depth=(depth + 1), max_depth=max_depth)
288+
dict_key: self._redact(
289+
subval, name=dict_key, depth=(depth + 1), max_depth=max_depth, replacement=replacement
290+
)
273291
for dict_key, subval in item.items()
274292
}
275293
return to_return
276294
if isinstance(item, Enum):
277-
return self._redact(item=item.value, name=name, depth=depth, max_depth=max_depth)
295+
return self._redact(
296+
item=item.value, name=name, depth=depth, max_depth=max_depth, replacement=replacement
297+
)
278298
if _is_v1_env_var(item) and hasattr(item, "to_dict"):
279299
tmp: dict = item.to_dict()
280300
if should_hide_value_for_key(tmp.get("name", "")) and "value" in tmp:
281-
tmp["value"] = "***"
301+
tmp["value"] = replacement
282302
else:
283-
return self._redact(item=tmp, name=name, depth=depth, max_depth=max_depth)
303+
return self._redact(
304+
item=tmp, name=name, depth=depth, max_depth=max_depth, replacement=replacement
305+
)
284306
return tmp
285307
if isinstance(item, str):
286308
if self.replacer:
287309
# We can't replace specific values, but the key-based redacting
288310
# can still happen, so we can't short-circuit, we need to walk
289311
# the structure.
290-
return self.replacer.sub("***", str(item))
312+
return self.replacer.sub(replacement, str(item))
291313
return item
292314
if isinstance(item, (tuple, set)):
293315
# Turn set in to tuple!
294316
return tuple(
295-
self._redact(subval, name=None, depth=(depth + 1), max_depth=max_depth) for subval in item
317+
self._redact(
318+
subval, name=None, depth=(depth + 1), max_depth=max_depth, replacement=replacement
319+
)
320+
for subval in item
296321
)
297322
if isinstance(item, list):
298323
return [
299-
self._redact(subval, name=None, depth=(depth + 1), max_depth=max_depth) for subval in item
324+
self._redact(
325+
subval, name=None, depth=(depth + 1), max_depth=max_depth, replacement=replacement
326+
)
327+
for subval in item
300328
]
301329
return item
302330
# I think this should never happen, but it does not hurt to leave it just in case
@@ -317,10 +345,12 @@ def _merge(
317345
self,
318346
new_item: Redacted,
319347
old_item: Redactable,
348+
*,
320349
name: str | None,
321350
depth: int,
322351
max_depth: int,
323352
force_sensitive: bool = False,
353+
replacement: str,
324354
) -> Redacted:
325355
"""Merge a redacted item with its original unredacted counterpart."""
326356
if depth > max_depth:
@@ -345,6 +375,7 @@ def _merge(
345375
depth=depth + 1,
346376
max_depth=max_depth,
347377
force_sensitive=is_sensitive,
378+
replacement=replacement,
348379
)
349380
else:
350381
merged[key] = new_item[key]
@@ -366,6 +397,7 @@ def _merge(
366397
depth=depth + 1,
367398
max_depth=max_depth,
368399
force_sensitive=is_sensitive,
400+
replacement=replacement,
369401
)
370402
)
371403
else:
@@ -390,25 +422,38 @@ def _merge(
390422
except (TypeError, AttributeError, ValueError):
391423
return new_item
392424

393-
def redact(self, item: Redactable, name: str | None = None, max_depth: int | None = None) -> Redacted:
425+
def redact(
426+
self,
427+
item: Redactable,
428+
name: str | None = None,
429+
max_depth: int | None = None,
430+
replacement: str = "***",
431+
) -> Redacted:
394432
"""
395433
Redact an any secrets found in ``item``, if it is a string.
396434
397435
If ``name`` is given, and it's a "sensitive" name (see
398436
:func:`should_hide_value_for_key`) then all string values in the item
399437
is redacted.
400438
"""
401-
return self._redact(item, name, depth=0, max_depth=max_depth or self.MAX_RECURSION_DEPTH)
439+
return self._redact(
440+
item, name, depth=0, max_depth=max_depth or self.MAX_RECURSION_DEPTH, replacement=replacement
441+
)
402442

403443
def merge(
404-
self, new_item: Redacted, old_item: Redactable, name: str | None = None, max_depth: int | None = None
444+
self,
445+
new_item: Redacted,
446+
old_item: Redactable,
447+
name: str | None = None,
448+
max_depth: int | None = None,
449+
replacement: str = "***",
405450
) -> Redacted:
406451
"""
407452
Merge a redacted item with its original unredacted counterpart.
408453
409454
Takes a user-modified redacted item and merges it with the original unredacted item.
410-
For sensitive fields that still contain "***" (unchanged), the original value is restored.
411-
For fields that have been updated, the new value is preserved.
455+
For sensitive fields that still contain "***" (or whatever the ``replacement`` is specified as), the
456+
original value is restored. For fields that have been updated, the new value is preserved.
412457
"""
413458
return self._merge(
414459
new_item,
@@ -417,6 +462,7 @@ def merge(
417462
depth=0,
418463
max_depth=max_depth or self.MAX_RECURSION_DEPTH,
419464
force_sensitive=False,
465+
replacement=replacement,
420466
)
421467

422468
@cached_property

task-sdk/tests/task_sdk/definitions/test_secrets_masker.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,26 @@ def test_redact(self, patterns, name, value, expected):
287287

288288
assert filt.redact(value, name) == expected
289289

290+
@pytest.mark.parametrize(
291+
("name", "value", "expected"),
292+
[
293+
("api_key", "pass", "*️⃣*️⃣*️⃣"),
294+
("api_key", ("pass",), ("*️⃣*️⃣*️⃣",)),
295+
(None, {"data": {"secret": "secret"}}, {"data": {"secret": "*️⃣*️⃣*️⃣"}}),
296+
# Non string dict keys
297+
(None, {1: {"secret": "secret"}}, {1: {"secret": "*️⃣*️⃣*️⃣"}}),
298+
(
299+
"api_key",
300+
{"other": "innoent", "nested": ["x", "y"]},
301+
{"other": "*️⃣*️⃣*️⃣", "nested": ["*️⃣*️⃣*️⃣", "*️⃣*️⃣*️⃣"]},
302+
),
303+
],
304+
)
305+
def test_redact_replacement(self, name, value, expected):
306+
filt = SecretsMasker()
307+
308+
assert filt.redact(value, name, replacement="*️⃣*️⃣*️⃣") == expected
309+
290310
def test_redact_filehandles(self, caplog):
291311
filt = SecretsMasker()
292312
with open("/dev/null", "w") as handle:
@@ -696,7 +716,7 @@ def test_redact_all_directly(self):
696716
"nested": {"tuple": ("a", "b", "c"), "set": {"x", "y", "z"}},
697717
}
698718

699-
result = secrets_masker._redact_all(test_data, depth=0)
719+
result = secrets_masker._redact_all(test_data, depth=0, replacement="***")
700720

701721
assert result["string"] == "***"
702722
assert result["number"] == 12345

0 commit comments

Comments
 (0)