35
35
import attrs
36
36
import lazy_object_proxy
37
37
import structlog
38
- from pydantic import AwareDatetime , ConfigDict , Field , JsonValue
38
+ from pydantic import AwareDatetime , ConfigDict , Field , JsonValue , TypeAdapter
39
39
40
+ from airflow .configuration import conf
40
41
from airflow .dag_processing .bundles .base import BaseDagBundle , BundleVersionLock
41
42
from airflow .dag_processing .bundles .manager import DagBundlesManager
42
43
from airflow .exceptions import AirflowInactiveAssetInInletOrOutletException
97
98
)
98
99
from airflow .sdk .execution_time .xcom import XCom
99
100
from airflow .utils .net import get_hostname
101
+ from airflow .utils .platform import getuser
100
102
from airflow .utils .timezone import coerce_datetime
101
103
102
104
if TYPE_CHECKING :
@@ -642,6 +644,7 @@ def parse(what: StartupDetails, log: Logger) -> RuntimeTaskInstance:
642
644
# accessible wherever needed during task execution without modifying every layer of the call stack.
643
645
SUPERVISOR_COMMS : CommsDecoder [ToTask , ToSupervisor ]
644
646
647
+
645
648
# State machine!
646
649
# 1. Start up (receive details from supervisor)
647
650
# 2. Execution (run task code, possibly send requests)
@@ -651,13 +654,18 @@ def parse(what: StartupDetails, log: Logger) -> RuntimeTaskInstance:
651
654
def startup () -> tuple [RuntimeTaskInstance , Context , Logger ]:
652
655
# The parent sends us a StartupDetails message un-prompted. After this, every single message is only sent
653
656
# in response to us sending a request.
654
- msg = SUPERVISOR_COMMS ._get_response ()
657
+ log = structlog .get_logger (logger_name = "task" )
658
+
659
+ if os .environ .get ("_AIRFLOW__REEXECUTED_PROCESS" ) == "1" and os .environ .get ("_AIRFLOW__STARTUP_MSG" ):
660
+ # entrypoint of re-exec process
661
+ msg = TypeAdapter (StartupDetails ).validate_json (os .environ ["_AIRFLOW__STARTUP_MSG" ])
662
+ log .debug ("Using serialized startup message from environment" , msg = msg )
663
+ else :
664
+ # normal entry point
665
+ msg = SUPERVISOR_COMMS ._get_response () # type: ignore[assignment]
655
666
656
667
if not isinstance (msg , StartupDetails ):
657
668
raise RuntimeError (f"Unhandled startup message { type (msg )} { msg } " )
658
-
659
- log = structlog .get_logger (logger_name = "task" )
660
-
661
669
# setproctitle causes issue on Mac OS: https://github.com/benoitc/gunicorn/issues/3021
662
670
os_type = sys .platform
663
671
if os_type == "darwin" :
@@ -677,6 +685,34 @@ def startup() -> tuple[RuntimeTaskInstance, Context, Logger]:
677
685
ti .log_url = get_log_url_from_ti (ti )
678
686
log .debug ("DAG file parsed" , file = msg .dag_rel_path )
679
687
688
+ run_as_user = getattr (ti .task , "run_as_user" , None ) or conf .get (
689
+ "core" , "default_impersonation" , fallback = None
690
+ )
691
+
692
+ if os .environ .get ("_AIRFLOW__REEXECUTED_PROCESS" ) != "1" and run_as_user and run_as_user != getuser ():
693
+ # enters here for re-exec process
694
+ os .environ ["_AIRFLOW__REEXECUTED_PROCESS" ] = "1"
695
+ # store startup message in environment for re-exec process
696
+ os .environ ["_AIRFLOW__STARTUP_MSG" ] = msg .model_dump_json ()
697
+ os .set_inheritable (SUPERVISOR_COMMS .socket .fileno (), True )
698
+
699
+ # Import main directly from the module instead of re-executing the file.
700
+ # This ensures that when other parts modules import
701
+ # airflow.sdk.execution_time.task_runner, they get the same module instance
702
+ # with the properly initialized SUPERVISOR_COMMS global variable.
703
+ # If we re-executed the module with `python -m`, it would load as __main__ and future
704
+ # imports would get a fresh copy without the initialized globals.
705
+ rexec_python_code = "from airflow.sdk.execution_time.task_runner import main; main()"
706
+ cmd = ["sudo" , "-E" , "-H" , "-u" , run_as_user , sys .executable , "-c" , rexec_python_code ]
707
+ log .info (
708
+ "Running command" ,
709
+ command = cmd ,
710
+ )
711
+ os .execvp ("sudo" , cmd )
712
+
713
+ # ideally, we should never reach here, but if we do, we should return None, None, None
714
+ return None , None , None
715
+
680
716
return ti , ti .get_template_context (), log
681
717
682
718
0 commit comments