2
2
3
3
from haystack import Document , DeserializationError , Pipeline
4
4
from haystack .components .retrievers import InMemoryBM25Retriever
5
- from haystack .components .retrievers .sentence_window_retrieval import SentenceWindowRetrieval
5
+ from haystack .components .retrievers .sentence_window_retriever import SentenceWindowRetriever
6
6
from haystack .document_stores .in_memory import InMemoryDocumentStore
7
7
from haystack .components .preprocessors import DocumentSplitter
8
8
9
9
10
- class TestSentenceWindowRetrieval :
10
+ class TestSentenceWindowRetriever :
11
11
def test_init_default (self ):
12
- retrieval = SentenceWindowRetrieval (InMemoryDocumentStore ())
13
- assert retrieval .window_size == 3
12
+ retriever = SentenceWindowRetriever (InMemoryDocumentStore ())
13
+ assert retriever .window_size == 3
14
14
15
15
def test_init_with_parameters (self ):
16
- retrieval = SentenceWindowRetrieval (InMemoryDocumentStore (), window_size = 5 )
17
- assert retrieval .window_size == 5
16
+ retriever = SentenceWindowRetriever (InMemoryDocumentStore (), window_size = 5 )
17
+ assert retriever .window_size == 5
18
18
19
19
def test_init_with_invalid_window_size_parameter (self ):
20
20
with pytest .raises (ValueError ):
21
- SentenceWindowRetrieval (InMemoryDocumentStore (), window_size = - 2 )
21
+ SentenceWindowRetriever (InMemoryDocumentStore (), window_size = - 2 )
22
22
23
23
def test_merge_documents (self ):
24
24
docs = [
@@ -50,15 +50,15 @@ def test_merge_documents(self):
50
50
"_split_overlap" : [{"doc_id" : "doc_1" , "range" : (23 , 52 )}],
51
51
},
52
52
]
53
- merged_text = SentenceWindowRetrieval .merge_documents_text ([Document .from_dict (doc ) for doc in docs ])
53
+ merged_text = SentenceWindowRetriever .merge_documents_text ([Document .from_dict (doc ) for doc in docs ])
54
54
expected = "This is a text with some words. There is a second sentence. And there is also a third sentence"
55
55
assert merged_text == expected
56
56
57
57
def test_to_dict (self ):
58
- window_retrieval = SentenceWindowRetrieval (InMemoryDocumentStore ())
59
- data = window_retrieval .to_dict ()
58
+ window_retriever = SentenceWindowRetriever (InMemoryDocumentStore ())
59
+ data = window_retriever .to_dict ()
60
60
61
- assert data ["type" ] == "haystack.components.retrievers.sentence_window_retrieval.SentenceWindowRetrieval "
61
+ assert data ["type" ] == "haystack.components.retrievers.sentence_window_retriever.SentenceWindowRetriever "
62
62
assert data ["init_parameters" ]["window_size" ] == 3
63
63
assert (
64
64
data ["init_parameters" ]["document_store" ]["type" ]
@@ -67,7 +67,7 @@ def test_to_dict(self):
67
67
68
68
def test_from_dict (self ):
69
69
data = {
70
- "type" : "haystack.components.retrievers.sentence_window_retrieval.SentenceWindowRetrieval " ,
70
+ "type" : "haystack.components.retrievers.sentence_window_retriever.SentenceWindowRetriever " ,
71
71
"init_parameters" : {
72
72
"document_store" : {
73
73
"type" : "haystack.document_stores.in_memory.document_store.InMemoryDocumentStore" ,
@@ -76,35 +76,35 @@ def test_from_dict(self):
76
76
"window_size" : 5 ,
77
77
},
78
78
}
79
- component = SentenceWindowRetrieval .from_dict (data )
79
+ component = SentenceWindowRetriever .from_dict (data )
80
80
assert isinstance (component .document_store , InMemoryDocumentStore )
81
81
assert component .window_size == 5
82
82
83
83
def test_from_dict_without_docstore (self ):
84
- data = {"type" : "SentenceWindowRetrieval " , "init_parameters" : {}}
84
+ data = {"type" : "SentenceWindowRetriever " , "init_parameters" : {}}
85
85
with pytest .raises (DeserializationError , match = "Missing 'document_store' in serialization data" ):
86
- SentenceWindowRetrieval .from_dict (data )
86
+ SentenceWindowRetriever .from_dict (data )
87
87
88
88
def test_from_dict_without_docstore_type (self ):
89
- data = {"type" : "SentenceWindowRetrieval " , "init_parameters" : {"document_store" : {"init_parameters" : {}}}}
89
+ data = {"type" : "SentenceWindowRetriever " , "init_parameters" : {"document_store" : {"init_parameters" : {}}}}
90
90
with pytest .raises (DeserializationError , match = "Missing 'type' in document store's serialization data" ):
91
- SentenceWindowRetrieval .from_dict (data )
91
+ SentenceWindowRetriever .from_dict (data )
92
92
93
93
def test_from_dict_non_existing_docstore (self ):
94
94
data = {
95
- "type" : "SentenceWindowRetrieval " ,
95
+ "type" : "SentenceWindowRetriever " ,
96
96
"init_parameters" : {"document_store" : {"type" : "Nonexisting.Docstore" , "init_parameters" : {}}},
97
97
}
98
98
with pytest .raises (DeserializationError ):
99
- SentenceWindowRetrieval .from_dict (data )
99
+ SentenceWindowRetriever .from_dict (data )
100
100
101
101
def test_document_without_split_id (self ):
102
102
docs = [
103
103
Document (content = "This is a text with some words. There is a " , meta = {"id" : "doc_0" }),
104
104
Document (content = "some words. There is a second sentence. And there is " , meta = {"id" : "doc_1" }),
105
105
]
106
106
with pytest .raises (ValueError ):
107
- retriever = SentenceWindowRetrieval (document_store = InMemoryDocumentStore (), window_size = 3 )
107
+ retriever = SentenceWindowRetriever (document_store = InMemoryDocumentStore (), window_size = 3 )
108
108
retriever .run (retrieved_documents = docs )
109
109
110
110
def test_document_without_source_id (self ):
@@ -115,7 +115,7 @@ def test_document_without_source_id(self):
115
115
),
116
116
]
117
117
with pytest .raises (ValueError ):
118
- retriever = SentenceWindowRetrieval (document_store = InMemoryDocumentStore (), window_size = 3 )
118
+ retriever = SentenceWindowRetriever (document_store = InMemoryDocumentStore (), window_size = 3 )
119
119
retriever .run (retrieved_documents = docs )
120
120
121
121
@pytest .mark .integration
@@ -132,7 +132,7 @@ def test_run_with_pipeline(self):
132
132
133
133
rag = Pipeline ()
134
134
rag .add_component ("bm25_retriever" , InMemoryBM25Retriever (doc_store , top_k = 1 ))
135
- rag .add_component ("sentence_window_retriever" , SentenceWindowRetrieval (document_store = doc_store , window_size = 2 ))
135
+ rag .add_component ("sentence_window_retriever" , SentenceWindowRetriever (document_store = doc_store , window_size = 2 ))
136
136
rag .connect ("bm25_retriever" , "sentence_window_retriever" )
137
137
result = rag .run ({"bm25_retriever" : {"query" : "third" }})
138
138
0 commit comments