kokoro / app.py
0xWerz
init
a17f08d
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from pydantic import BaseModel
from talk import say
app = FastAPI()
class TTSRequest(BaseModel):
text: str
voice: str = "af_heart"
@app.get("/")
def root():
return {
"message": "Kokoro TTS API",
"endpoints": {
"/tts": "POST - Generate speech from text",
"/voices": "GET - List available voices"
}
}
@app.post("/tts")
def generate_speech(request: TTSRequest):
"""Generate speech from text using specified voice."""
if not request.text or not request.text.strip():
raise HTTPException(status_code=400, detail="Text cannot be empty")
audio_file = say(request.text, voice=request.voice, play=False)
if audio_file:
return FileResponse(
audio_file,
media_type="audio/wav",
filename=f"speech_{request.voice}.wav"
)
raise HTTPException(status_code=500, detail="Failed to generate audio")
@app.get("/voices")
def list_voices():
"""List available voices."""
return {
"voices": [
{"id": "af_heart", "name": "Female Heart", "language": "English"},
{"id": "af_sky", "name": "Female Sky", "language": "English"},
{"id": "af_bella", "name": "Female Bella", "language": "English"},
{"id": "af_nicole", "name": "Female Nicole", "language": "English"},
{"id": "af_sarah", "name": "Female Sarah", "language": "English"},
{"id": "am_adam", "name": "Male Adam", "language": "English"},
{"id": "am_michael", "name": "Male Michael", "language": "English"},
{"id": "bf_emma", "name": "British Female Emma", "language": "English (British)"},
{"id": "bf_isabella", "name": "British Female Isabella", "language": "English (British)"},
{"id": "bm_george", "name": "British Male George", "language": "English (British)"},
{"id": "bm_lewis", "name": "British Male Lewis", "language": "English (British)"}
]
}