docs: clean up docstrings of OpenAIChatGenerator (#8125)

dfokina · web-flow · commit 28141ec6b90d · 2024-07-31T09:45:14.000+02:00
* openaichatgen-docstrings

* link update
diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py
@@ -22,17 +22,21 @@
 @component
 class OpenAIChatGenerator:
     """
-    A Chat Generator component that uses the OpenAI API to generate text.
+    Completes chats using OpenAI's large language models (LLMs).
 
-    Enables text generation using OpenAI's large language models (LLMs). It supports `gpt-4` and `gpt-3.5-turbo`
-    family of models accessed through the chat completions API endpoint.
+    It works with the gpt-4 and gpt-3.5-turbo models and supports streaming responses
+    from OpenAI API. It uses [ChatMessage](https://docs.haystack.deepset.ai/docs/data-classes#chatmessage)
+    format in input and output.
 
-    Users can pass any text generation parameters valid for the `openai.ChatCompletion.create` method
-    directly to this component via the `generation_kwargs` parameter in `__init__` or the `generation_kwargs`
-    parameter in `run` method.
+    You can customize how the text is generated by passing parameters to the
+    OpenAI API. Use the `**generation_kwargs` argument when you initialize
+    the component or when you run it. Any parameter that works with
+    `openai.ChatCompletion.create` will work here too.
 
-    For more details on the parameters supported by the OpenAI API, refer to the OpenAI
-    [documentation](https://platform.openai.com/docs/api-reference/chat).
+    For details on OpenAI API parameters, see
+    [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).
+
+    ### Usage example
 
     ```python
     from haystack.components.generators.chat import OpenAIChatGenerator
@@ -56,17 +60,6 @@ class OpenAIChatGenerator:
         ]
     }
     ```
-
-     Key Features and Compatibility:
-      - Primary Compatibility: designed to work seamlessly with the OpenAI API Chat Completion endpoint and `gpt-4`
-        and `gpt-3.5-turbo` family of models.
-      - Streaming Support: supports streaming responses from the OpenAI API Chat Completion endpoint.
-      - Customizability: supports all parameters supported by the OpenAI API Chat Completion endpoint.
-
-     Input and Output Format:
-       - ChatMessage Format: this component uses the ChatMessage format for structuring both input and output,
-         ensuring coherent and contextually relevant responses in chat-based text generation scenarios. Details on the
-         ChatMessage format can be found at [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage).
     """
 
     def __init__(
@@ -81,30 +74,31 @@ def __init__(
         max_retries: Optional[int] = None,
     ):
         """
-        Initializes the OpenAIChatGenerator component.
-
-        Creates an instance of OpenAIChatGenerator. Unless specified otherwise in the `model`, this is for OpenAI's
-        GPT-3.5 model.
+        Creates an instance of OpenAIChatGenerator. Unless specified otherwise in `model`, uses OpenAI's GPT-3.5.
 
-        By setting the 'OPENAI_TIMEOUT' and 'OPENAI_MAX_RETRIES' you can change the timeout and max_retries parameters
+        Before initializing the component, you can set the 'OPENAI_TIMEOUT' and 'OPENAI_MAX_RETRIES'
+        environment variables to override the `timeout` and `max_retries` parameters respectively
         in the OpenAI client.
 
         :param api_key: The OpenAI API key.
+            You can set it with an environment variable `OPENAI_API_KEY`, or pass with this parameter
+            during initialization.
         :param model: The name of the model to use.
         :param streaming_callback: A callback function that is called when a new token is received from the stream.
-            The callback function accepts StreamingChunk as an argument.
+            The callback function accepts [StreamingChunk](https://docs.haystack.deepset.ai/docs/data-classes#streamingchunk)
+            as an argument.
         :param api_base_url: An optional base URL.
-        :param organization: The Organization ID, defaults to `None`. See
+        :param organization: Your organization ID, defaults to `None`. See
         [production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
-        :param generation_kwargs: Other parameters to use for the model. These parameters are all sent directly to
+        :param generation_kwargs: Other parameters to use for the model. These parameters are sent directly to
             the OpenAI endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat) for
             more details.
             Some of the supported parameters:
             - `max_tokens`: The maximum number of tokens the output text can have.
             - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks.
                 Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
             - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
-                considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens
+                considers the results of the tokens with top_p probability mass. For example, 0.1 means only the tokens
                 comprising the top 10% probability mass are considered.
             - `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2,
                 it will generate two completions for each of the three prompts, ending up with 6 completions in total.
@@ -116,11 +110,11 @@ def __init__(
             - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens, and the
                 values are the bias to add to that token.
         :param timeout:
-            Timeout for OpenAI Client calls, if not set it is inferred from the `OPENAI_TIMEOUT` environment variable
-            or set to 30.
+            Timeout for OpenAI client calls. If not set, it defaults to either the
+            `OPENAI_TIMEOUT` environment variable, or 30 seconds.
         :param max_retries:
-            Maximum retries to stablish contact with OpenAI if it returns an internal error, if not set it is inferred
-            from the `OPENAI_MAX_RETRIES` environment variable or set to 5.
+            Maximum number of retries to contact OpenAI after an internal error.
+            If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5.
         """
         self.api_key = api_key
         self.model = model
@@ -190,14 +184,14 @@ def run(
         generation_kwargs: Optional[Dict[str, Any]] = None,
     ):
         """
-        Invoke the text generation inference based on the provided messages and generation parameters.
+        Invokes chat completion based on the provided messages and generation parameters.
 
         :param messages: A list of ChatMessage instances representing the input messages.
         :param streaming_callback: A callback function that is called when a new token is received from the stream.
         :param generation_kwargs: Additional keyword arguments for text generation. These parameters will
-                                  potentially override the parameters passed in the `__init__` method.
-                                  For more details on the parameters supported by the OpenAI API, refer to the
-                                  OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat/create).
+                                  override the parameters passed during component initialization.
+                                  For details on OpenAI API parameters, see
+                                  [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat/create).
 
         :returns:
             A list containing the generated responses as ChatMessage instances.