openvinotoolkit · Wovchena · Jul 28, 2025 · Jul 9, 2025 · Jul 9, 2025 · Jul 9, 2025
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
@@ -9,6 +9,7 @@ add_subdirectory(cpp/visual_language_chat)
 add_subdirectory(cpp/whisper_speech_recognition)
 add_subdirectory(cpp/rag)
 add_subdirectory(c/text_generation)
+add_subdirectory(c/whisper_speech_recognition)
 
 install(FILES
         deployment-requirements.txt
@@ -38,4 +39,5 @@ install(DIRECTORY
 
 install(DIRECTORY
         c/text_generation
+        c/whisper_speech_recognition
         DESTINATION samples/c COMPONENT cpp_samples_genai)
diff --git a/samples/c/whisper_speech_recognition/CMakeLists.txt b/samples/c/whisper_speech_recognition/CMakeLists.txt
@@ -0,0 +1,26 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+find_package(OpenVINOGenAI REQUIRED
+    PATHS
+        "${CMAKE_BINARY_DIR}"  # Reuse the package from the build.
+        ${OpenVINO_DIR}  # GenAI may be installed alogside OpenVINO.
+    NO_CMAKE_FIND_ROOT_PATH
+)
+
+# Whisper Speech Recognition Sample
+add_executable(whisper_speech_recognition_c whisper_speech_recognition.c whisper_utils.c)
+
+# Specifies that the source file should be compiled as a C source file
+set_source_files_properties(whisper_speech_recognition.c whisper_utils.c PROPERTIES LANGUAGE C)
+target_link_libraries(whisper_speech_recognition_c PRIVATE openvino::genai::c)
+
+set_target_properties(whisper_speech_recognition_c PROPERTIES
+    # Ensure out-of-box LC_RPATH on macOS with SIP
+    INSTALL_RPATH_USE_LINK_PATH ON)
+
+# Install
+install(TARGETS whisper_speech_recognition_c
+        RUNTIME DESTINATION samples_bin/
+        COMPONENT samples_bin
+        EXCLUDE_FROM_ALL)
diff --git a/samples/c/whisper_speech_recognition/README.md b/samples/c/whisper_speech_recognition/README.md
@@ -0,0 +1,133 @@
+# Whisper Automatic Speech Recognition C Sample
+
+## Table of Contents
+
+1. [Download OpenVINO GenAI](#download-openvino-genai)
+2. [Build Samples](#build-samples)
+3. [Download and Convert the Model](#download-and-convert-the-model)
+4. [Prepare Audio File](#prepare-audio-file)
+5. [Sample Description](#sample-description)
+6. [Troubleshooting](#troubleshooting)
+7. [Support and Contribution](#support-and-contribution)
+
+## Download OpenVINO GenAI
+
+Download and extract [OpenVINO GenAI Archive](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/download.html?PACKAGE=OPENVINO_GENAI&VERSION=NIGHTLY&OP_SYSTEM=WINDOWS&DISTRIBUTION=ARCHIVE) Visit the OpenVINO Download Page.
+
+## Build Samples
+
+Set up the environment and build the samples Linux and macOS:
+
+```sh
+source <INSTALL_DIR>/setupvars.sh
+./<INSTALL_DIR>/samples/c/build_samples.sh
+```
+
+Windows Command Prompt:
+
+```sh
+<INSTALL_DIR>\setupvars.bat
+<INSTALL_DIR>\samples\c\build_samples_msvc.bat
+```
+
+Windows PowerShell:
+
+```sh
+.<INSTALL_DIR>\setupvars.ps1
+.<INSTALL_DIR>\samples\c\build_samples.ps1
+```
+
+## Download and Convert the Model
+
+The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
+
+Install [../../export-requirements.txt](../../export-requirements.txt) if model conversion is required.
+
+```sh
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
+optimum-cli export openvino --trust-remote-code --model openai/whisper-tiny whisper-tiny
+```
+
+If a converted model in OpenVINO IR format is available in the [OpenVINO optimized models](https://huggingface.co/OpenVINO) collection on Hugging Face, you can download it directly via huggingface-cli.
+
+For example:
+
+```sh
+pip install huggingface-hub
+huggingface-cli download OpenVINO/whisper-tiny-int8-ov --local-dir whisper-tiny-int8-ov
+```
+
+## Prepare audio file
+
+Prepare audio file in wav format with sampling rate 16k Hz.
+
+You can download example audio file: https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/librispeech_s5/how_are_you_doing_today.wav
+
+## Sample Description
+
+This example showcases inference of speech recognition Whisper Models using the OpenVINO GenAI C API. The sample features `ov_genai_whisper_pipeline` and uses audio files in WAV format as input.
+
+### Run Command
+
+```sh
+./whisper_speech_recognition_c <MODEL_DIR> "<WAV_FILE_PATH>" [DEVICE]
+```
+
+### Parameters
+
+- `MODEL_DIR`: Path to the converted Whisper model directory
+- `WAV_FILE_PATH`: Path to the WAV audio file (use quotes if path contains spaces)
+- `DEVICE`: Optional - device to run inference on (default: "CPU")
+
+### Example Usage
+
+```sh
+./whisper_speech_recognition_c whisper-tiny how_are_you_doing_today.wav
+```
+
+### Expected Output
+
+```text
+ How are you doing today?
+timestamps: [0.00, 2.00] text:  How are you doing today?
+```
+
+The sample will:
+
+1. Load the WAV audio file and validate its format
+2. Automatically resample to 16kHz if needed
+3. Perform speech-to-text transcription
+4. Output the full transcription
+5. Display word-level timestamps for each text chunk
+
+## Troubleshooting
+
+### Empty or Incorrect Output
+
+If you get empty or incorrect transcription results:
+
+- Ensure your audio file is in WAV format
+- Check that the audio contains clear speech
+
+### Model Loading Errors
+
+If the model fails to load:
+
+- Verify the model path exists and contains valid Whisper model files
+- Ensure the model was properly converted to OpenVINO IR format
+- Check that the specified device (CPU, GPU, etc.) is available on your system
+
+### Audio File Errors
+
+The sample provides detailed error messages for common audio file issues:
+
+- File not found
+- Permission denied
+- Invalid WAV format
+- Unsupported audio encoding (only PCM is supported)
+- Multi-channel audio (only mono is supported)
+
+
+## Support and Contribution
+- For troubleshooting, consult the [OpenVINO documentation](https://docs.openvino.ai).
+- To report issues or contribute, visit the [GitHub repository](https://github.com/openvinotoolkit/openvino.genai).
diff --git a/samples/c/whisper_speech_recognition/whisper_speech_recognition.c b/samples/c/whisper_speech_recognition/whisper_speech_recognition.c
@@ -0,0 +1,130 @@
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "openvino/genai/c/whisper_pipeline.h"
+#include "whisper_utils.h"
+
+int main(int argc, char* argv[]) {
+    if (argc != 3 && argc != 4) {
+        fprintf(stderr, "Usage: %s <MODEL_DIR> \"<WAV_FILE_PATH>\" [DEVICE]\n", argv[0]);
+        return EXIT_FAILURE;
+    }
+
+    const char* model_path = argv[1];
+    const char* wav_file_path = argv[2];
+    const char* device = (argc == 4) ? argv[3] : "CPU";  // Default to CPU if no device is provided
+
+    int exit_code = EXIT_SUCCESS;
+
+    ov_genai_whisper_pipeline* pipeline = NULL;
+    ov_genai_whisper_generation_config* config = NULL;
+    ov_genai_whisper_decoded_results* results = NULL;
+    float* audio_data = NULL;
+    float* resampled_audio = NULL;
+    size_t audio_length = 0;
+    char* output = NULL;
+    size_t output_size = 0;
+
+    float file_sample_rate;
+    if (load_wav_file(wav_file_path, &audio_data, &audio_length, &file_sample_rate) != 0) {
+        exit_code = EXIT_FAILURE;
+        goto err;
+    }
+
+    if (file_sample_rate != 16000.0f) {
+        size_t resampled_length;
+        resampled_audio = resample_audio(audio_data, audio_length, file_sample_rate, 16000.0f, &resampled_length);
+        if (!resampled_audio) {
+            fprintf(stderr, "Error: Failed to resample audio\n");
+            exit_code = EXIT_FAILURE;
+            goto err;
+        }
+        free(audio_data);
+        audio_data = resampled_audio;
+        audio_length = resampled_length;
+        resampled_audio = NULL;
+    }
+
+    ov_status_e status = ov_genai_whisper_pipeline_create(model_path, device, 0, &pipeline);
+    if (status != OK) {
+        if (status == UNKNOW_EXCEPTION) {
+            fprintf(stderr, "Error: Failed to create Whisper pipeline. Please check:\n");
+            fprintf(stderr, "  - Model path exists and contains valid Whisper model files\n");
+            fprintf(stderr, "  - Device '%s' is available and supported\n", device);
+            fprintf(stderr, "  - Model is compatible with OpenVINO GenAI\n");
+        }
+        CHECK_STATUS(status);
+    }
+
+    CHECK_STATUS(ov_genai_whisper_generation_config_create(&config));
+    CHECK_STATUS(ov_genai_whisper_generation_config_set_task(config, "transcribe"));
+    CHECK_STATUS(ov_genai_whisper_generation_config_set_return_timestamps(config, true));
+    CHECK_STATUS(ov_genai_whisper_pipeline_generate(pipeline, audio_data, audio_length, config, &results));
+
+    CHECK_STATUS(ov_genai_whisper_decoded_results_get_string(results, NULL, &output_size));
+    output = (char*)malloc(output_size);
+    if (!output) {
+        fprintf(stderr, "Error: Failed to allocate memory for output\n");
+        exit_code = EXIT_FAILURE;
+        goto err;
+    }
+
+    CHECK_STATUS(ov_genai_whisper_decoded_results_get_string(results, output, &output_size));
+    printf("%s\n", output);
+
+    bool has_chunks = false;
+    CHECK_STATUS(ov_genai_whisper_decoded_results_has_chunks(results, &has_chunks));
+
+    if (has_chunks) {
+        size_t chunks_count = 0;
+        CHECK_STATUS(ov_genai_whisper_decoded_results_get_chunks_count(results, &chunks_count));
+
+        for (size_t i = 0; i < chunks_count; i++) {
+            ov_genai_whisper_decoded_result_chunk* chunk = NULL;
+            CHECK_STATUS(ov_genai_whisper_decoded_results_get_chunk_at(results, i, &chunk));
+
+            float start_ts = 0.0f, end_ts = 0.0f;
+            CHECK_STATUS(ov_genai_whisper_decoded_result_chunk_get_start_ts(chunk, &start_ts));
+            CHECK_STATUS(ov_genai_whisper_decoded_result_chunk_get_end_ts(chunk, &end_ts));
+
+            size_t chunk_text_size = 0;
+            CHECK_STATUS(ov_genai_whisper_decoded_result_chunk_get_text(chunk, NULL, &chunk_text_size));
+
+            char* chunk_text = (char*)malloc(chunk_text_size);
+            if (!chunk_text) {
+                fprintf(stderr, "Warning: Failed to allocate memory for chunk text %zu\n", i);
+                ov_genai_whisper_decoded_result_chunk_free(chunk);
+                exit_code = EXIT_FAILURE;
+                goto err;
+            }
+
+            CHECK_STATUS(ov_genai_whisper_decoded_result_chunk_get_text(chunk, chunk_text, &chunk_text_size));
+
+            printf("timestamps: [%.2f, %.2f] text: %s\n", start_ts, end_ts, chunk_text);
+
+            free(chunk_text);
+            ov_genai_whisper_decoded_result_chunk_free(chunk);
+        }
+    }
+
+err:
+    if (pipeline)
+        ov_genai_whisper_pipeline_free(pipeline);
+    if (config)
+        ov_genai_whisper_generation_config_free(config);
+    if (results)
+        ov_genai_whisper_decoded_results_free(results);
+    if (output)
+        free(output);
+    if (audio_data)
+        free(audio_data);
+    if (resampled_audio)
+        free(resampled_audio);
+
+    return exit_code;
+}