|
17 | 17 | @component
|
18 | 18 | class OpenAITextEmbedder:
|
19 | 19 | """
|
20 |
| - A component for embedding strings using OpenAI models. |
| 20 | + Embeds strings using OpenAI models. |
| 21 | +
|
| 22 | + You can use it to embed user query and send it to an embedding Retriever. |
| 23 | +
|
| 24 | + ### Usage example |
21 | 25 |
|
22 |
| - Usage example: |
23 | 26 | ```python
|
24 | 27 | from haystack.components.embedders import OpenAITextEmbedder
|
25 | 28 |
|
@@ -48,34 +51,38 @@ def __init__(
|
48 | 51 | max_retries: Optional[int] = None,
|
49 | 52 | ):
|
50 | 53 | """
|
51 |
| - Create an OpenAITextEmbedder component. |
| 54 | + Creates an OpenAITextEmbedder component. |
52 | 55 |
|
53 |
| - By setting the 'OPENAI_TIMEOUT' and 'OPENAI_MAX_RETRIES' you can change the timeout and max_retries parameters |
| 56 | + Before initializing the component, you can set the 'OPENAI_TIMEOUT' and 'OPENAI_MAX_RETRIES' |
| 57 | + environment variables to override the `timeout` and `max_retries` parameters respectively |
54 | 58 | in the OpenAI client.
|
55 | 59 |
|
56 | 60 | :param api_key:
|
57 | 61 | The OpenAI API key.
|
| 62 | + You can set it with an environment variable `OPENAI_API_KEY`, or pass with this parameter |
| 63 | + during initialization. |
58 | 64 | :param model:
|
59 |
| - The name of the model to use. |
| 65 | + The name of the model to use for calculating embeddings. |
| 66 | + The default model is `text-embedding-ada-002`. |
60 | 67 | :param dimensions:
|
61 |
| - The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` a |
62 |
| - nd later models. |
| 68 | + The number of dimensions of the resulting embeddings. Only `text-embedding-3` and |
| 69 | + later models support this parameter. |
63 | 70 | :param api_base_url:
|
64 |
| - Overrides default base url for all HTTP requests. |
| 71 | + Overrides default base URL for all HTTP requests. |
65 | 72 | :param organization:
|
66 |
| - The Organization ID. See OpenAI's |
| 73 | + Your organization ID. See OpenAI's |
67 | 74 | [production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization)
|
68 | 75 | for more information.
|
69 | 76 | :param prefix:
|
70 |
| - A string to add at the beginning of each text. |
| 77 | + A string to add at the beginning of each text to embed. |
71 | 78 | :param suffix:
|
72 |
| - A string to add at the end of each text. |
| 79 | + A string to add at the end of each text to embed. |
73 | 80 | :param timeout:
|
74 |
| - Timeout for OpenAI Client calls, if not set it is inferred from the `OPENAI_TIMEOUT` environment variable |
75 |
| - or set to 30. |
| 81 | + Timeout for OpenAI client calls. If not set, it defaults to either the |
| 82 | + `OPENAI_TIMEOUT` environment variable, or 30 seconds. |
76 | 83 | :param max_retries:
|
77 |
| - Maximum retries to stablish contact with OpenAI if it returns an internal error, if not set it is inferred |
78 |
| - from the `OPENAI_MAX_RETRIES` environment variable or set to 5. |
| 84 | + Maximum number of retries to contact OpenAI after an internal error. |
| 85 | + If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5. |
79 | 86 | """
|
80 | 87 | self.model = model
|
81 | 88 | self.dimensions = dimensions
|
@@ -138,7 +145,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "OpenAITextEmbedder":
|
138 | 145 | @component.output_types(embedding=List[float], meta=Dict[str, Any])
|
139 | 146 | def run(self, text: str):
|
140 | 147 | """
|
141 |
| - Embed a single string. |
| 148 | + Embeds a single string. |
142 | 149 |
|
143 | 150 | :param text:
|
144 | 151 | Text to embed.
|
|
0 commit comments