feat: raise last good snapshot in PipelineRunTimeError + tests updates (#9758)

davidsbatista · web-flow · commit f48789f5fee8 · 2025-09-03T13:46:33.000+02:00
* raise last good snapshot in PipelineRunTimeError + tests updates

* adding release notes

* renaming test file

* PR comments + fixing tests
diff --git a/haystack/core/errors.py b/haystack/core/errors.py
@@ -4,6 +4,8 @@
 
 from typing import Any, Optional
 
+from haystack.dataclasses.breakpoints import PipelineSnapshot
+
 
 class PipelineError(Exception):
     pass
@@ -15,11 +17,11 @@ def __init__(
         component_name: Optional[str],
         component_type: Optional[type],
         message: str,
-        pipeline_outputs: Optional[Any] = None,
+        pipeline_snapshot: Optional[PipelineSnapshot] = None,
     ) -> None:
         self.component_name = component_name
         self.component_type = component_type
-        self.pipeline_outputs = pipeline_outputs
+        self.pipeline_snapshot = pipeline_snapshot
         super().__init__(message)
 
     @classmethod
diff --git a/haystack/core/pipeline/async_pipeline.py b/haystack/core/pipeline/async_pipeline.py
@@ -271,8 +271,6 @@ async def _run_highest_in_isolation(component_name: str) -> AsyncIterator[dict[s
                         parent_span=parent_span,
                     )
                 except PipelineRuntimeError as error:
-                    # Attach partial pipeline outputs to the error before re-raising
-                    error.pipeline_outputs = pipeline_outputs
                     raise error
 
                 # Distribute outputs to downstream inputs; also prune outputs based on `include_outputs_from`
@@ -321,8 +319,6 @@ async def _runner():
                                 parent_span=parent_span,
                             )
                     except PipelineRuntimeError as error:
-                        # Attach partial pipeline outputs to the error before re-raising
-                        error.pipeline_outputs = pipeline_outputs
                         raise error
 
                     # Distribute outputs to downstream inputs; also prune outputs based on `include_outputs_from`
diff --git a/haystack/core/pipeline/pipeline.py b/haystack/core/pipeline/pipeline.py
@@ -390,9 +390,6 @@ def run(  # noqa: PLR0915, PLR0912, C901, pylint: disable=too-many-branches
                         parent_span=span,
                     )
                 except PipelineRuntimeError as error:
-                    # Attach partial pipeline outputs to the error before re-raising
-                    error.pipeline_outputs = pipeline_outputs
-
                     # Create a snapshot of the last good state of the pipeline before the error occurred.
                     pipeline_snapshot_inputs_serialised = deepcopy(inputs)
                     pipeline_snapshot_inputs_serialised[component_name] = deepcopy(component_inputs)
@@ -411,6 +408,8 @@ def run(  # noqa: PLR0915, PLR0912, C901, pylint: disable=too-many-branches
                         include_outputs_from=include_outputs_from,
                         pipeline_outputs=pipeline_outputs,
                     )
+                    # Attach the last good state snapshot to the error before re-raising and saving to disk
+                    error.pipeline_snapshot = last_good_state_snapshot
                     try:
                         _save_pipeline_snapshot(pipeline_snapshot=last_good_state_snapshot)
                         logger.info(
diff --git a/releasenotes/notes/pipeline-last-good-snapshot-raised-a6db4238547e7c01.yaml b/releasenotes/notes/pipeline-last-good-snapshot-raised-a6db4238547e7c01.yaml
@@ -0,0 +1,5 @@
+---
+features:
+  - |
+    A snapshot of the last successful step is also raised when an error occurs during a `Pipeline` run. Allowing the caller to catch it to inspect the possible reason
+    for crash and use it to resume the pipeline execution from that point onwards.
diff --git a/test/core/pipeline/test_pipeline_crash_regular_pipeline_snapshot_is_raised.py b/test/core/pipeline/test_pipeline_crash_regular_pipeline_snapshot_is_raised.py
@@ -152,75 +152,8 @@ def test_hybrid_rag_pipeline_crash_on_embedding_retriever(
                 },
             )
 
-        pipeline_outputs = exc_info.value.pipeline_outputs
-
-        assert pipeline_outputs is not None, "Pipeline outputs should be captured in the exception"
-
-        # verify that bm25_retriever and text_embedder ran successfully before the crash
-        assert "bm25_retriever" in pipeline_outputs, "BM25 retriever output not captured"
-        assert "documents" in pipeline_outputs["bm25_retriever"], "BM25 retriever should have produced documents"
-        assert "text_embedder" in pipeline_outputs, "Text embedder output not captured"
-        assert "embedding" in pipeline_outputs["text_embedder"], "Text embedder should have produced embeddings"
-
-        # components after the crash point are not in the outputs
-        assert "document_joiner" not in pipeline_outputs, "Document joiner should not have run due to crash"
-        assert "prompt_builder" not in pipeline_outputs, "Prompt builder should not have run due to crash"
-        assert "llm" not in pipeline_outputs, "LLM should not have run due to crash"
-        assert "answer_builder" not in pipeline_outputs, "Answer builder should not have run due to crash"
-
-    @pytest.mark.asyncio
-    async def test_async_hybrid_rag_pipeline_crash_on_embedding_retriever(
-        self, mock_sentence_transformers_text_embedder, monkeypatch
-    ):
-        monkeypatch.setenv("OPENAI_API_KEY", "test-api-key")
-
-        document_store = setup_document_store()
-        text_embedder = mock_sentence_transformers_text_embedder
-        invalid_embedding_retriever = InvalidOutputEmbeddingRetriever()
-        bm25_retriever = InMemoryBM25Retriever(document_store)
-        document_joiner = DocumentJoiner(join_mode="concatenate")
-
-        pipeline = AsyncPipeline()
-        pipeline.add_component("text_embedder", text_embedder)
-        pipeline.add_component("embedding_retriever", invalid_embedding_retriever)
-        pipeline.add_component("bm25_retriever", bm25_retriever)
-        pipeline.add_component("document_joiner", document_joiner)
-        pipeline.add_component(
-            "prompt_builder", ChatPromptBuilder(template=template, required_variables=["question", "documents"])
-        )
-        pipeline.add_component("llm", OpenAIChatGenerator(api_key=Secret.from_env_var("OPENAI_API_KEY")))
-        pipeline.add_component("answer_builder", AnswerBuilder())
-
-        pipeline.connect("text_embedder", "embedding_retriever")
-        pipeline.connect("bm25_retriever", "document_joiner")
-        pipeline.connect("embedding_retriever", "document_joiner")
-        pipeline.connect("document_joiner.documents", "prompt_builder.documents")
-        pipeline.connect("prompt_builder", "llm")
-        pipeline.connect("llm.replies", "answer_builder.replies")
-
-        question = "Where does Mark live?"
-        test_data = {
-            "text_embedder": {"text": question},
-            "bm25_retriever": {"query": question},
-            "prompt_builder": {"question": question},
-            "answer_builder": {"query": question},
-        }
-
-        with pytest.raises(PipelineRuntimeError) as exc_info:
-            await pipeline.run_async(
-                data=test_data,
-                include_outputs_from={
-                    "text_embedder",
-                    "embedding_retriever",
-                    "bm25_retriever",
-                    "document_joiner",
-                    "prompt_builder",
-                    "llm",
-                    "answer_builder",
-                },
-            )
-
-        pipeline_outputs = exc_info.value.pipeline_outputs
+        pipeline_snapshot = exc_info.value.pipeline_snapshot
+        pipeline_outputs = pipeline_snapshot.pipeline_state.pipeline_outputs
         assert pipeline_outputs is not None, "Pipeline outputs should be captured in the exception"
 
         # verify that bm25_retriever and text_embedder ran successfully before the crash
@@ -234,7 +167,3 @@ async def test_async_hybrid_rag_pipeline_crash_on_embedding_retriever(
         assert "prompt_builder" not in pipeline_outputs, "Prompt builder should not have run due to crash"
         assert "llm" not in pipeline_outputs, "LLM should not have run due to crash"
         assert "answer_builder" not in pipeline_outputs, "Answer builder should not have run due to crash"
-
-        # check that a pipeline snapshot file was created in the "pipeline_snapshot" directory
-        snapshot_files = os.listdir(_get_output_dir("pipeline_snapshot"))
-        assert any(f.endswith(".json") for f in snapshot_files), "No pipeline snapshot file found in debug directory"