kokoro / talk.py
0xWerz
init
c7438ae
import sys
import os
import soundfile as sf
from kokoro import KPipeline
import subprocess
import argparse
def play_audio(file_path):
"""Plays audio using the macOS afplay command."""
try:
subprocess.run(["afplay", file_path], check=True)
except FileNotFoundError:
print("Error: 'afplay' command not found. Are you on macOS?")
except Exception as e:
print(f"Error playing audio: {e}")
def say(text: str, voice: str = "af_heart", play: bool = True):
lang = "a"
try:
print(f"Initializing pipeline for language '{lang}'...")
pipeline = KPipeline(lang_code=lang)
except Exception as e:
print(f"Failed to initialize KPipeline: {e}")
print(
"Note: Japanese ('j') and Chinese ('z') require extra dependencies: pip install 'misaki[ja]' or 'misaki[zh]'"
)
sys.exit(1)
print(f"Generating audio with voice '{voice}'...")
# Generate audio
try:
generator = pipeline(text, voice=voice, speed=1, split_pattern=r"\n+")
for i, (gs, ps, audio) in enumerate(generator):
filename = f"output_{i}.wav"
sf.write(filename, audio, 24000)
if play:
print(f"Playing segment {i}...")
play_audio(filename)
return filename
except Exception as e:
print(f"Error during generation: {e}")
return None
def main():
parser = argparse.ArgumentParser(description="Kokoro TTS Generator")
parser.add_argument("text", nargs="*", help="Text to speak")
parser.add_argument(
"-l",
"--lang",
default="a",
help="Language code (a=Am. English, b=Br. English, e=Spanish, f=French, h=Hindi, i=Italian, j=Japanese, p=Portuguese, z=Chinese)",
)
parser.add_argument(
"-v", "--voice", default="af_heart", help="Voice ID (default: af_heart)"
)
args = parser.parse_args()
# Combine text arguments or read from stdin
if args.text:
text = " ".join(args.text)
else:
print("Enter text to speak (Ctrl+D to finish):")
try:
text = sys.stdin.read()
except KeyboardInterrupt:
print("\nExiting.")
sys.exit(0)
if not text.strip():
print("No text provided.")
return
# Initialize pipeline
try:
print(f"Initializing pipeline for language '{args.lang}'...")
pipeline = KPipeline(lang_code=args.lang)
except Exception as e:
print(f"Failed to initialize KPipeline: {e}")
print(
"Note: Japanese ('j') and Chinese ('z') require extra dependencies: pip install 'misaki[ja]' or 'misaki[zh]'"
)
sys.exit(1)
print(f"Generating audio with voice '{args.voice}'...")
# Generate audio
try:
generator = pipeline(text, voice=args.voice, speed=1, split_pattern=r"\n+")
for i, (gs, ps, audio) in enumerate(generator):
filename = f"output_{i}.wav"
sf.write(filename, audio, 24000)
print(f"Playing segment {i}...")
play_audio(filename)
except Exception as e:
print(f"Error during generation: {e}")
if __name__ == "__main__":
main()