|
12 | 12 | @component
|
13 | 13 | class SentenceWindowRetriever:
|
14 | 14 | """
|
15 |
| - A component that retrieves surrounding documents of a given document from the document store. |
| 15 | + Retrieves documents adjacent to a given document in the Document Store. |
16 | 16 |
|
17 |
| - It relies on the `source_id` and on the `doc.meta['split_id']` to get the surrounding documents from the document. |
18 |
| - This component is designed to work together with one of the existing retrievers, e.g. BM25Retriever, |
19 |
| - EmbeddingRetriever. One of these retrievers can be used to retrieve documents based on a query and then use this |
20 |
| - component to get the surrounding documents of the retrieved documents. |
| 17 | + During indexing, documents are broken into smaller chunks, or sentences. When you submit a query, |
| 18 | + the Retriever fetches the most relevant sentence. To provide full context, |
| 19 | + SentenceWindowRetriever fetches a number of neighboring sentences before and after each |
| 20 | + relevant one. You can set this number with the `window_size` parameter. |
| 21 | + It uses `source_id` and `doc.meta['split_id']` to locate the surrounding documents. |
21 | 22 |
|
| 23 | + This component works with existing Retrievers, like BM25Retriever or |
| 24 | + EmbeddingRetriever. First, use a Retriever to find documents based on a query and then use |
| 25 | + SentenceWindowRetriever to get the surrounding documents for context. |
| 26 | +
|
| 27 | +
|
| 28 | + ### Usage example |
22 | 29 |
|
23 |
| - Usage example: |
24 | 30 | ```python
|
25 | 31 | from haystack import Document, Pipeline
|
26 | 32 | from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
|
@@ -56,8 +62,9 @@ def __init__(self, document_store: DocumentStore, window_size: int = 3):
|
56 | 62 | """
|
57 | 63 | Creates a new SentenceWindowRetriever component.
|
58 | 64 |
|
59 |
| - :param document_store: The document store to use for retrieving the surrounding documents. |
60 |
| - :param window_size: The number of surrounding documents to retrieve. |
| 65 | + :param document_store: The Document Store to retrieve the surrounding documents from. |
| 66 | + :param window_size: The number of documents to retrieve before and after the relevant one. |
| 67 | + For example, `window_size: 2` fetches 2 preceding and 2 following documents. |
61 | 68 | """
|
62 | 69 | if window_size < 1:
|
63 | 70 | raise ValueError("The window_size parameter must be greater than 0.")
|
|
0 commit comments