Skip to content

Commit 78a981a

Browse files
[v3-0-test] Exclude dag.fileloc from determining dag version (#54477) (#54483)
A combination of bundle path and dag.relative_fileloc more correctly determines the right location of a dag and not dag.fileloc. This PR excludes fileloc from determining the creation of a new dag version. (cherry picked from commit 9efadf8) Co-authored-by: Ephraim Anierobi <[email protected]>
1 parent bd6ace5 commit 78a981a

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

airflow-core/src/airflow/models/serialized_dag.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,13 @@ def __repr__(self) -> str:
350350
def hash(cls, dag_data):
351351
"""Hash the data to get the dag_hash."""
352352
dag_data = cls._sort_serialized_dag_dict(dag_data)
353-
data_json = json.dumps(dag_data, sort_keys=True).encode("utf-8")
353+
data_ = dag_data.copy()
354+
# Remove fileloc from the hash so changes to fileloc
355+
# does not affect the hash. In 3.0+, a combination of
356+
# bundle_path and relative fileloc more correctly determines the
357+
# dag file location.
358+
data_["dag"].pop("fileloc", None)
359+
data_json = json.dumps(data_, sort_keys=True).encode("utf-8")
354360
return md5(data_json).hexdigest()
355361

356362
@classmethod

airflow-core/tests/unit/models/test_serialized_dag.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,3 +522,31 @@ def test_new_dag_version_created_when_bundle_name_changes_and_hash_unchanged(sel
522522

523523
# There should now be two versions of the DAG
524524
assert session.query(DagVersion).count() == 2
525+
526+
def test_hash_method_removes_fileloc_and_remains_consistent(self):
527+
"""Test that the hash method removes fileloc before hashing."""
528+
test_data = {
529+
"__version": 1,
530+
"dag": {
531+
"fileloc": "/path/to/dag.py",
532+
"dag_id": "test_dag",
533+
"tasks": {
534+
"task1": {"task_id": "task1"},
535+
},
536+
},
537+
}
538+
539+
hash_with_fileloc = SDM.hash(test_data)
540+
541+
# Modify only the top-level dag.fileloc path (simulating file location changes)
542+
test_data["dag"]["fileloc"] = "/different/path/to/dag.py"
543+
544+
# Get hash with different top-level fileloc (should be the same)
545+
hash_with_different_fileloc = SDM.hash(test_data)
546+
547+
# Hashes should be identical since top-level dag.fileloc is removed before hashing
548+
assert hash_with_fileloc == hash_with_different_fileloc
549+
550+
# Verify that the original data still has fileloc (method shouldn't modify original)
551+
assert "fileloc" in test_data["dag"]
552+
assert test_data["dag"]["fileloc"] == "/different/path/to/dag.py"

0 commit comments

Comments
 (0)