|
| 1 | +# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]> |
| 2 | +# |
| 3 | +# SPDX-License-Identifier: Apache-2.0 |
| 4 | +import os |
| 5 | + |
| 6 | +import pytest |
| 7 | + |
| 8 | +from haystack.components.builders import AnswerBuilder |
| 9 | + |
| 10 | +from haystack import Document, Pipeline |
| 11 | +from haystack.dataclasses.answer import ExtractedAnswer, GeneratedAnswer, ExtractedTableAnswer |
| 12 | +from haystack.components.generators.chat import OpenAIChatGenerator |
| 13 | +from haystack.components.joiners.answer_joiner import AnswerJoiner, JoinMode |
| 14 | +from haystack.dataclasses import ChatMessage |
| 15 | + |
| 16 | + |
| 17 | +class TestAnswerJoiner: |
| 18 | + def test_init(self): |
| 19 | + joiner = AnswerJoiner() |
| 20 | + assert joiner.join_mode == JoinMode.CONCATENATE |
| 21 | + assert joiner.top_k is None |
| 22 | + assert joiner.sort_by_score is False |
| 23 | + |
| 24 | + def test_init_with_custom_parameters(self): |
| 25 | + joiner = AnswerJoiner(join_mode="concatenate", top_k=5, sort_by_score=True) |
| 26 | + assert joiner.join_mode == JoinMode.CONCATENATE |
| 27 | + assert joiner.top_k == 5 |
| 28 | + assert joiner.sort_by_score is True |
| 29 | + |
| 30 | + def test_to_dict(self): |
| 31 | + joiner = AnswerJoiner() |
| 32 | + data = joiner.to_dict() |
| 33 | + assert data == { |
| 34 | + "type": "haystack.components.joiners.answer_joiner.AnswerJoiner", |
| 35 | + "init_parameters": {"join_mode": "concatenate", "top_k": None, "sort_by_score": False}, |
| 36 | + } |
| 37 | + |
| 38 | + def test_to_from_dict_custom_parameters(self): |
| 39 | + joiner = AnswerJoiner("concatenate", top_k=5, sort_by_score=True) |
| 40 | + data = joiner.to_dict() |
| 41 | + assert data == { |
| 42 | + "type": "haystack.components.joiners.answer_joiner.AnswerJoiner", |
| 43 | + "init_parameters": {"join_mode": "concatenate", "top_k": 5, "sort_by_score": True}, |
| 44 | + } |
| 45 | + |
| 46 | + deserialized_joiner = AnswerJoiner.from_dict(data) |
| 47 | + assert deserialized_joiner.join_mode == JoinMode.CONCATENATE |
| 48 | + assert deserialized_joiner.top_k == 5 |
| 49 | + assert deserialized_joiner.sort_by_score is True |
| 50 | + |
| 51 | + def test_from_dict(self): |
| 52 | + data = {"type": "haystack.components.joiners.answer_joiner.AnswerJoiner", "init_parameters": {}} |
| 53 | + answer_joiner = AnswerJoiner.from_dict(data) |
| 54 | + assert answer_joiner.join_mode == JoinMode.CONCATENATE |
| 55 | + assert answer_joiner.top_k is None |
| 56 | + assert answer_joiner.sort_by_score is False |
| 57 | + |
| 58 | + def test_from_dict_customs_parameters(self): |
| 59 | + data = { |
| 60 | + "type": "haystack.components.joiners.answer_joiner.AnswerJoiner", |
| 61 | + "init_parameters": {"join_mode": "concatenate", "top_k": 5, "sort_by_score": True}, |
| 62 | + } |
| 63 | + answer_joiner = AnswerJoiner.from_dict(data) |
| 64 | + assert answer_joiner.join_mode == JoinMode.CONCATENATE |
| 65 | + assert answer_joiner.top_k == 5 |
| 66 | + assert answer_joiner.sort_by_score is True |
| 67 | + |
| 68 | + def test_empty_list(self): |
| 69 | + joiner = AnswerJoiner() |
| 70 | + result = joiner.run([]) |
| 71 | + assert result == {"answers": []} |
| 72 | + |
| 73 | + def test_list_of_empty_lists(self): |
| 74 | + joiner = AnswerJoiner() |
| 75 | + result = joiner.run([[], []]) |
| 76 | + assert result == {"answers": []} |
| 77 | + |
| 78 | + def test_list_of_single_answer(self): |
| 79 | + joiner = AnswerJoiner() |
| 80 | + answers = [ |
| 81 | + GeneratedAnswer(query="a", data="a", meta={}, documents=[Document(content="a")]), |
| 82 | + GeneratedAnswer(query="b", data="b", meta={}, documents=[Document(content="b")]), |
| 83 | + GeneratedAnswer(query="c", data="c", meta={}, documents=[Document(content="c")]), |
| 84 | + ] |
| 85 | + result = joiner.run([answers]) |
| 86 | + assert result == {"answers": answers} |
| 87 | + |
| 88 | + def test_two_lists_of_generated_answers(self): |
| 89 | + joiner = AnswerJoiner() |
| 90 | + answers1 = [GeneratedAnswer(query="a", data="a", meta={}, documents=[Document(content="a")])] |
| 91 | + answers2 = [GeneratedAnswer(query="d", data="d", meta={}, documents=[Document(content="d")])] |
| 92 | + result = joiner.run([answers1, answers2]) |
| 93 | + assert result == {"answers": answers1 + answers2} |
| 94 | + |
| 95 | + def test_multiple_lists_of_mixed_answers(self): |
| 96 | + joiner = AnswerJoiner() |
| 97 | + answers1 = [GeneratedAnswer(query="a", data="a", meta={}, documents=[Document(content="a")])] |
| 98 | + answers2 = [ExtractedAnswer(query="d", score=0.9, meta={}, document=Document(content="d"))] |
| 99 | + answers3 = [ExtractedTableAnswer(query="e", score=0.7, meta={}, document=Document(content="e"))] |
| 100 | + answers4 = [GeneratedAnswer(query="f", data="f", meta={}, documents=[Document(content="f")])] |
| 101 | + all_answers = answers1 + answers2 + answers3 + answers4 # type: ignore |
| 102 | + result = joiner.run([answers1, answers2, answers3, answers4]) |
| 103 | + assert result == {"answers": all_answers} |
| 104 | + |
| 105 | + def test_unsupported_join_mode(self): |
| 106 | + unsupported_mode = "unsupported_mode" |
| 107 | + with pytest.raises(ValueError): |
| 108 | + AnswerJoiner(join_mode=unsupported_mode) |
| 109 | + |
| 110 | + @pytest.mark.skipif(not os.environ.get("OPENAI_API_KEY", ""), reason="Needs OPENAI_API_KEY to run this test.") |
| 111 | + @pytest.mark.integration |
| 112 | + def test_with_pipeline(self): |
| 113 | + query = "What's Natural Language Processing?" |
| 114 | + messages = [ |
| 115 | + ChatMessage.from_system("You are a helpful, respectful and honest assistant. Be super concise."), |
| 116 | + ChatMessage.from_user(query), |
| 117 | + ] |
| 118 | + |
| 119 | + pipe = Pipeline() |
| 120 | + pipe.add_component("gpt-4o", OpenAIChatGenerator(model="gpt-4o")) |
| 121 | + pipe.add_component("llama", OpenAIChatGenerator(model="gpt-3.5-turbo")) |
| 122 | + pipe.add_component("aba", AnswerBuilder()) |
| 123 | + pipe.add_component("abb", AnswerBuilder()) |
| 124 | + pipe.add_component("joiner", AnswerJoiner()) |
| 125 | + |
| 126 | + pipe.connect("gpt-4o.replies", "aba") |
| 127 | + pipe.connect("llama.replies", "abb") |
| 128 | + pipe.connect("aba.answers", "joiner") |
| 129 | + pipe.connect("abb.answers", "joiner") |
| 130 | + |
| 131 | + results = pipe.run( |
| 132 | + data={ |
| 133 | + "gpt-4o": {"messages": messages}, |
| 134 | + "llama": {"messages": messages}, |
| 135 | + "aba": {"query": query}, |
| 136 | + "abb": {"query": query}, |
| 137 | + } |
| 138 | + ) |
| 139 | + |
| 140 | + assert "joiner" in results |
| 141 | + assert len(results["joiner"]["answers"]) == 2 |
0 commit comments