Skip to content

Commit 9dd00b2

Browse files
committed
feat(kokoro): complete kokoro integration
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 20a70e1 commit 9dd00b2

17 files changed

+259
-1180
lines changed

Makefile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ backends/rfdetr: docker-build-rfdetr docker-save-rfdetr build
159159
backends/kitten-tts: docker-build-kitten-tts docker-save-kitten-tts build
160160
./local-ai backends install "ocifile://$(abspath ./backend-images/kitten-tts.tar)"
161161

162+
backends/kokoro: docker-build-kokoro docker-save-kokoro build
163+
./local-ai backends install "ocifile://$(abspath ./backend-images/kokoro.tar)"
164+
162165
########################################################
163166
## AIO tests
164167
########################################################
@@ -378,6 +381,12 @@ docker-build-kitten-tts:
378381
docker-save-kitten-tts: backend-images
379382
docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar
380383

384+
docker-build-kokoro:
385+
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend
386+
387+
docker-save-kokoro: backend-images
388+
docker save local-ai-backend:kokoro -o backend-images/kokoro.tar
389+
381390
docker-save-rfdetr: backend-images
382391
docker save local-ai-backend:rfdetr -o backend-images/rfdetr.tar
383392

@@ -420,9 +429,6 @@ docker-build-transformers:
420429
docker-build-diffusers:
421430
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers .
422431

423-
docker-build-kokoro:
424-
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
425-
426432
docker-build-whisper:
427433
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.golang --build-arg BACKEND=whisper .
428434

backend/python/kokoro/Makefile

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
1-
.DEFAULT_GOAL := install
2-
3-
.PHONY: install
4-
install:
1+
.PHONY: kokoro
2+
kokoro: protogen
53
bash install.sh
6-
$(MAKE) protogen
4+
5+
.PHONY: run
6+
run: protogen
7+
@echo "Running kokoro..."
8+
bash run.sh
9+
@echo "kokoro run."
10+
11+
.PHONY: test
12+
test: protogen
13+
@echo "Testing kokoro..."
14+
bash test.sh
15+
@echo "kokoro tested."
716

817
.PHONY: protogen
918
protogen: backend_pb2_grpc.py backend_pb2.py
@@ -13,7 +22,7 @@ protogen-clean:
1322
$(RM) backend_pb2_grpc.py backend_pb2.py
1423

1524
backend_pb2_grpc.py backend_pb2.py:
16-
bash protogen.sh
25+
python3 -m grpc_tools.protoc -I../.. -I./ --python_out=. --grpc_python_out=. backend.proto
1726

1827
.PHONY: clean
1928
clean: protogen-clean

backend/python/kokoro/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Kokoro TTS Backend for LocalAI
2+
3+
This is a gRPC server backend for LocalAI that uses the Kokoro TTS pipeline.
4+
5+
## Creating a separate environment for kokoro project
6+
7+
```bash
8+
make kokoro
9+
```
10+
11+
## Testing the gRPC server
12+
13+
```bash
14+
make test
15+
```
16+
17+
## Features
18+
19+
- Lightweight TTS model with 82 million parameters
20+
- Apache-licensed weights
21+
- Fast and cost-efficient
22+
- Multi-language support
23+
- Multiple voice options

backend/python/kokoro/backend.py

100755100644
Lines changed: 54 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,92 @@
11
#!/usr/bin/env python3
22
"""
3-
Extra gRPC server for Kokoro models.
3+
This is an extra gRPC server of LocalAI for Kokoro TTS
44
"""
55
from concurrent import futures
6-
6+
import time
77
import argparse
88
import signal
99
import sys
1010
import os
11-
import time
1211
import backend_pb2
1312
import backend_pb2_grpc
13+
14+
import torch
15+
from kokoro import KPipeline
1416
import soundfile as sf
17+
1518
import grpc
1619

17-
from models import build_model
18-
from kokoro import generate
19-
import torch
2020

21-
SAMPLE_RATE = 22050
2221
_ONE_DAY_IN_SECONDS = 60 * 60 * 24
2322

2423
# If MAX_WORKERS are specified in the environment use it, otherwise default to 1
2524
MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1'))
25+
KOKORO_LANG_CODE = os.environ.get('KOKORO_LANG_CODE', 'a')
2626

2727
# Implement the BackendServicer class with the service methods
2828
class BackendServicer(backend_pb2_grpc.BackendServicer):
2929
"""
30-
A gRPC servicer for the backend service.
31-
32-
This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding.
30+
BackendServicer is the class that implements the gRPC service
3331
"""
3432
def Health(self, request, context):
35-
"""
36-
A gRPC method that returns the health status of the backend service.
37-
38-
Args:
39-
request: A HealthRequest object that contains the request parameters.
40-
context: A grpc.ServicerContext object that provides information about the RPC.
41-
42-
Returns:
43-
A Reply object that contains the health status of the backend service.
44-
"""
4533
return backend_pb2.Reply(message=bytes("OK", 'utf-8'))
46-
34+
4735
def LoadModel(self, request, context):
48-
"""
49-
A gRPC method that loads a model into memory.
36+
# Get device
37+
if torch.cuda.is_available():
38+
print("CUDA is available", file=sys.stderr)
39+
device = "cuda"
40+
else:
41+
print("CUDA is not available", file=sys.stderr)
42+
device = "cpu"
5043

51-
Args:
52-
request: A LoadModelRequest object that contains the request parameters.
53-
context: A grpc.ServicerContext object that provides information about the RPC.
44+
if not torch.cuda.is_available() and request.CUDA:
45+
return backend_pb2.Result(success=False, message="CUDA is not available")
5446

55-
Returns:
56-
A Result object that contains the result of the LoadModel operation.
57-
"""
58-
model_name = request.Model
5947
try:
60-
device = "cuda:0" if torch.cuda.is_available() else "cpu"
61-
self.MODEL = build_model(request.ModelFile, device)
48+
print("Preparing Kokoro TTS pipeline, please wait", file=sys.stderr)
49+
# empty dict
50+
self.options = {}
6251
options = request.Options
63-
# Find the voice from the options, options are a list of strings in this form optname:optvalue:
64-
VOICE_NAME = None
52+
# The options are a list of strings in this form optname:optvalue
53+
# We are storing all the options in a dict so we can use it later when
54+
# generating the images
6555
for opt in options:
66-
if opt.startswith("voice:"):
67-
VOICE_NAME = opt.split(":")[1]
68-
break
69-
if VOICE_NAME is None:
70-
return backend_pb2.Result(success=False, message=f"No voice specified in options")
71-
MODELPATH = request.ModelPath
72-
# If voice name contains a plus, split it and load the two models and combine them
73-
if "+" in VOICE_NAME:
74-
voice1, voice2 = VOICE_NAME.split("+")
75-
voice1 = torch.load(f'{MODELPATH}/{voice1}.pt', weights_only=True).to(device)
76-
voice2 = torch.load(f'{MODELPATH}/{voice2}.pt', weights_only=True).to(device)
77-
self.VOICEPACK = torch.mean(torch.stack([voice1, voice2]), dim=0)
78-
else:
79-
self.VOICEPACK = torch.load(f'{MODELPATH}/{VOICE_NAME}.pt', weights_only=True).to(device)
80-
81-
self.VOICE_NAME = VOICE_NAME
82-
83-
print(f'Loaded voice: {VOICE_NAME}')
56+
if ":" not in opt:
57+
continue
58+
key, value = opt.split(":")
59+
self.options[key] = value
60+
61+
# Initialize Kokoro pipeline with language code
62+
lang_code = self.options.get("lang_code", KOKORO_LANG_CODE)
63+
self.pipeline = KPipeline(lang_code=lang_code)
64+
print(f"Kokoro TTS pipeline loaded with language code: {lang_code}", file=sys.stderr)
8465
except Exception as err:
8566
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
86-
87-
return backend_pb2.Result(message="Model loaded successfully", success=True)
67+
68+
return backend_pb2.Result(message="Kokoro TTS pipeline loaded successfully", success=True)
8869

8970
def TTS(self, request, context):
90-
model_name = request.model
91-
if model_name == "":
92-
return backend_pb2.Result(success=False, message="request.model is required")
9371
try:
94-
audio, out_ps = generate(self.MODEL, request.text, self.VOICEPACK, lang=self.VOICE_NAME)
95-
print(out_ps)
96-
sf.write(request.dst, audio, SAMPLE_RATE)
72+
# Get voice from request, default to 'af_heart' if not specified
73+
voice = request.voice if request.voice else 'af_heart'
74+
75+
# Generate audio using Kokoro pipeline
76+
generator = self.pipeline(request.text, voice=voice)
77+
78+
# Get the first (and typically only) audio segment
79+
for i, (gs, ps, audio) in enumerate(generator):
80+
# Save audio to the destination file
81+
sf.write(request.dst, audio, 24000)
82+
print(f"Generated audio segment {i}: gs={gs}, ps={ps}", file=sys.stderr)
83+
# For now, we only process the first segment
84+
# If you need to handle multiple segments, you might want to modify this
85+
break
86+
9787
except Exception as err:
9888
return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}")
89+
9990
return backend_pb2.Result(success=True)
10091

10192
def serve(address):
@@ -108,11 +99,11 @@ def serve(address):
10899
backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server)
109100
server.add_insecure_port(address)
110101
server.start()
111-
print("[Kokoro] Server started. Listening on: " + address, file=sys.stderr)
102+
print("Server started. Listening on: " + address, file=sys.stderr)
112103

113104
# Define the signal handler function
114105
def signal_handler(sig, frame):
115-
print("[Kokoro] Received termination signal. Shutting down...")
106+
print("Received termination signal. Shutting down...")
116107
server.stop(0)
117108
sys.exit(0)
118109

@@ -132,5 +123,5 @@ def signal_handler(sig, frame):
132123
"--addr", default="localhost:50051", help="The address to bind the server to."
133124
)
134125
args = parser.parse_args()
135-
print(f"[Kokoro] startup: {args}", file=sys.stderr)
126+
136127
serve(args.addr)

0 commit comments

Comments
 (0)