Skip to content

RealtimeTranscriber stops transcribing after 20-25 words #107

@yohankoshydt

Description

@yohankoshydt

RealtimeTranscriber abruptly stops transcribing after listening for 1.5-2 minutes of speech.
Additionally, setting the end_utterance_silence_threshold as given below seems to have to effect. The expected behaviour of stopping transcribing after the given threshold is not achieved.

`import assemblyai as aai
from elevenlabs import stream
from elevenlabs.client import ElevenLabs
from openai import OpenAI

class AI_Assistant:
def init(self):
aai.settings.api_key = "xxx"
self.openai_client = OpenAI(api_key = "xxx")
self.elevenlabs_api_key = "xxx"

    self.elevenlabs_client = ElevenLabs(api_key = self.elevenlabs_api_key)

    self.transcriber = None

    self.interaction = [
        {"role":"system", "content":"You are a interviewer conducting an interview for the role of a Software Developer. Please continue the conversation from the point where it is. Do not answer the questions yourself and have a professional tone, less friendly."},
    ]

def stop_transcription(self):
    if self.transcriber:
        self.transcriber.close()
        self.transcriber = None

def on_open(self, session_opened: aai.RealtimeSessionOpened):
    print("Session ID:", session_opened.session_id)
    return

def on_error(self, error: aai.RealtimeError):
    print("An error occured:", error)
    return

def on_close(self):
    print("Closing Session")
    return

def on_data(self, transcript: aai.RealtimeTranscript):
    if not transcript.text:
        return
    if isinstance(transcript, aai.RealtimeFinalTranscript):
        self.generate_ai_response(transcript)
    else:
        print(transcript.text, end="\r")

def start_transcription(self):
    self.transcriber = aai.RealtimeTranscriber(
        sample_rate = 16000,
        on_data = self.on_data,
        on_error = self.on_error,
        on_open = self.on_open,
        on_close = self.on_close,
        end_utterance_silence_threshold = 4000,


    )

    self.transcriber.configure_end_utterance_silence_threshold(4000)

    self.transcriber.connect()
    microphone_stream = aai.extras.MicrophoneStream(sample_rate=16000)
    self.transcriber.stream(microphone_stream)

def generate_ai_response(self, transcript):

    self.stop_transcription()

    self.interaction.append({"role":"user", "content": transcript.text})
    print(f"\nTourist: {transcript.text}", end="\r\n")

    response = self.openai_client.chat.completions.create(
        model = "gpt-3.5-turbo",
        messages = self.interaction
    )

    ai_response = response.choices[0].message.content

    self.generate_audio(ai_response)

    self.start_transcription()
    print(f"\nReal-time transcription: ", end="\r\n")


def generate_audio(self, text):

    self.interaction.append({"role":"assistant", "content": text})
    print(f"\nAI Guide: {text}")

    audio_stream = self.elevenlabs_client.generate(
        text = text,
        voice = "Rachel",
        stream = True
    )

    stream(audio_stream)

greeting = "Hello Yohan"
ai_assistant = AI_Assistant()
ai_assistant.generate_audio(greeting)
ai_assistant.start_transcription()`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions