update docstrings (#8150)

agnieszka-m · web-flow · commit ba5d105a78b0 · 2024-08-02T14:25:56.000+02:00
diff --git a/haystack/components/generators/chat/azure.py b/haystack/components/generators/chat/azure.py
@@ -19,30 +19,21 @@
 @component
 class AzureOpenAIChatGenerator(OpenAIChatGenerator):
     """
-    A Chat Generator component that uses the Azure OpenAI API to generate text.
+    Generates text using OpenAI's models on Azure.
 
-    Enables text generation using OpenAI's large language models (LLMs) on Azure. It supports `gpt-4` and
-    `gpt-3.5-turbo` family of models accessed through the chat completions API endpoint.
+    It works with the gpt-4 and gpt-3.5-turbo - type models and supports streaming responses
+    from OpenAI API. It uses [ChatMessage](https://docs.haystack.deepset.ai/docs/data-classes#chatmessage)
+    format in input and output.
 
-    Users can pass any text generation parameters valid for the `openai.ChatCompletion.create` method
-    directly to this component via the `generation_kwargs` parameter in `__init__` or the `generation_kwargs`
-    parameter in `run` method.
+    You can customize how the text is generated by passing parameters to the
+    OpenAI API. Use the `**generation_kwargs` argument when you initialize
+    the component or when you run it. Any parameter that works with
+    `openai.ChatCompletion.create` will work here too.
 
-    For more details on OpenAI models deployed on Azure, refer to the Microsoft
-    [documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/).
+    For details on OpenAI API parameters, see
+    [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).
 
-    Key Features and Compatibility:
-     - Primary Compatibility: Designed to work seamlessly with the OpenAI API Chat Completion endpoint.
-     - Streaming Support: Supports streaming responses from the OpenAI API Chat Completion endpoint.
-     - Customizability: Supports all parameters supported by the OpenAI API Chat Completion endpoint.
-
-    Input and Output Format:
-      - ChatMessage Format: This component uses the ChatMessage format for structuring both input and output, ensuring
-        coherent and contextually relevant responses in chat-based text generation scenarios.
-     - Details on the ChatMessage format can be found [here](https://docs.haystack.deepset.ai/v2.0/docs/data-classes#chatmessage).
-
-
-    Usage example:
+    ### Usage example
 
     ```python
     from haystack.components.generators.chat import AzureOpenAIGenerator
@@ -87,37 +78,37 @@ def __init__(
         """
         Initialize the Azure OpenAI Chat Generator component.
 
-        :param azure_endpoint: The endpoint of the deployed model, e.g. `"https://example-resource.azure.openai.com/"`
-        :param api_version: The version of the API to use. Defaults to 2023-05-15
+        :param azure_endpoint: The endpoint of the deployed model, for example `"https://example-resource.azure.openai.com/"`.
+        :param api_version: The version of the API to use. Defaults to 2023-05-15.
         :param azure_deployment: The deployment of the model, usually the model name.
         :param api_key: The API key to use for authentication.
-        :param azure_ad_token: [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id)
-        :param organization: The Organization ID, defaults to `None`. See
-        [production best practices](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
-        :param streaming_callback: A callback function that is called when a new token is received from the stream.
-            The callback function accepts StreamingChunk as an argument.
-        :param timeout: The timeout in seconds to be passed to the underlying `AzureOpenAI` client, if not set it is
-            inferred from the `OPENAI_TIMEOUT` environment variable or set to 30.
-        :param max_retries: Maximum retries to establish a connection with AzureOpenAI if it returns an internal error,
-            if not set it is inferred from the `OPENAI_MAX_RETRIES` environment variable or set to 5.
-        :param generation_kwargs: Other parameters to use for the model. These parameters are all sent directly to
-            the OpenAI endpoint. See OpenAI [documentation](https://platform.openai.com/docs/api-reference/chat) for
-            more details.
+        :param azure_ad_token: [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id).
+        :param organization: Your organization ID, defaults to `None`. For help, see
+        [Setting up your organization](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
+        :param streaming_callback: A callback function called when a new token is received from the stream.
+            It accepts [StreamingChunk](https://docs.haystack.deepset.ai/docs/data-classes#streamingchunk)
+            as an argument.
+        :param timeout: Timeout for OpenAI client calls. If not set, it defaults to either the
+            `OPENAI_TIMEOUT` environment variable, or 30 seconds.
+        :param max_retries: Maximum number of retries to contact OpenAI after an internal error.
+            If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5.
+        :param generation_kwargs: Other parameters to use for the model. These parameters are sent directly to
+            the OpenAI endpoint. For details, see [OpenAI documentation](https://platform.openai.com/docs/api-reference/chat).
             Some of the supported parameters:
             - `max_tokens`: The maximum number of tokens the output text can have.
-            - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks.
+            - `temperature`: The sampling temperature to use. Higher values mean the model takes more risks.
                 Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
-            - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
-                considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens
-                comprising the top 10% probability mass are considered.
-            - `n`: How many completions to generate for each prompt. For example, if the LLM gets 3 prompts and n is 2,
-                it will generate two completions for each of the three prompts, ending up with 6 completions in total.
+            - `top_p`: Nucleus sampling is an alternative to sampling with temperature, where the model considers
+                tokens with a top_p probability mass. For example, 0.1 means only the tokens comprising
+                the top 10% probability mass are considered.
+            - `n`: The number of completions to generate for each prompt. For example, with 3 prompts and n=2,
+                the LLM will generate two completions per prompt, resulting in 6 completions total.
             - `stop`: One or more sequences after which the LLM should stop generating tokens.
-            - `presence_penalty`: What penalty to apply if a token is already present at all. Bigger values mean
-                the model will be less likely to repeat the same token in the text.
-            - `frequency_penalty`: What penalty to apply if a token has already been generated in the text.
-                Bigger values mean the model will be less likely to repeat the same token in the text.
-            - `logit_bias`: Add a logit bias to specific tokens. The keys of the dictionary are tokens, and the
+            - `presence_penalty`: The penalty applied if a token is already present.
+                Higher values make the model less likely to repeat the token.
+            - `frequency_penalty`: Penalty applied if a token has already been generated.
+                Higher values make the model less likely to repeat the token.
+            - `logit_bias`: Adds a logit bias to specific tokens. The keys of the dictionary are tokens, and the
                 values are the bias to add to that token.
         """
         # We intentionally do not call super().__init__ here because we only need to instantiate the client to interact