|
|
import sys |
|
|
import os |
|
|
import soundfile as sf |
|
|
from kokoro import KPipeline |
|
|
import subprocess |
|
|
import argparse |
|
|
|
|
|
|
|
|
def play_audio(file_path): |
|
|
"""Plays audio using the macOS afplay command.""" |
|
|
try: |
|
|
subprocess.run(["afplay", file_path], check=True) |
|
|
except FileNotFoundError: |
|
|
print("Error: 'afplay' command not found. Are you on macOS?") |
|
|
except Exception as e: |
|
|
print(f"Error playing audio: {e}") |
|
|
|
|
|
|
|
|
def say(text: str, voice: str = "af_heart", play: bool = True): |
|
|
lang = "a" |
|
|
try: |
|
|
print(f"Initializing pipeline for language '{lang}'...") |
|
|
pipeline = KPipeline(lang_code=lang) |
|
|
except Exception as e: |
|
|
print(f"Failed to initialize KPipeline: {e}") |
|
|
print( |
|
|
"Note: Japanese ('j') and Chinese ('z') require extra dependencies: pip install 'misaki[ja]' or 'misaki[zh]'" |
|
|
) |
|
|
sys.exit(1) |
|
|
|
|
|
print(f"Generating audio with voice '{voice}'...") |
|
|
|
|
|
|
|
|
try: |
|
|
generator = pipeline(text, voice=voice, speed=1, split_pattern=r"\n+") |
|
|
|
|
|
for i, (gs, ps, audio) in enumerate(generator): |
|
|
filename = f"output_{i}.wav" |
|
|
sf.write(filename, audio, 24000) |
|
|
if play: |
|
|
print(f"Playing segment {i}...") |
|
|
play_audio(filename) |
|
|
return filename |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error during generation: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser(description="Kokoro TTS Generator") |
|
|
parser.add_argument("text", nargs="*", help="Text to speak") |
|
|
parser.add_argument( |
|
|
"-l", |
|
|
"--lang", |
|
|
default="a", |
|
|
help="Language code (a=Am. English, b=Br. English, e=Spanish, f=French, h=Hindi, i=Italian, j=Japanese, p=Portuguese, z=Chinese)", |
|
|
) |
|
|
parser.add_argument( |
|
|
"-v", "--voice", default="af_heart", help="Voice ID (default: af_heart)" |
|
|
) |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
if args.text: |
|
|
text = " ".join(args.text) |
|
|
else: |
|
|
print("Enter text to speak (Ctrl+D to finish):") |
|
|
try: |
|
|
text = sys.stdin.read() |
|
|
except KeyboardInterrupt: |
|
|
print("\nExiting.") |
|
|
sys.exit(0) |
|
|
|
|
|
if not text.strip(): |
|
|
print("No text provided.") |
|
|
return |
|
|
|
|
|
|
|
|
try: |
|
|
print(f"Initializing pipeline for language '{args.lang}'...") |
|
|
pipeline = KPipeline(lang_code=args.lang) |
|
|
except Exception as e: |
|
|
print(f"Failed to initialize KPipeline: {e}") |
|
|
print( |
|
|
"Note: Japanese ('j') and Chinese ('z') require extra dependencies: pip install 'misaki[ja]' or 'misaki[zh]'" |
|
|
) |
|
|
sys.exit(1) |
|
|
|
|
|
print(f"Generating audio with voice '{args.voice}'...") |
|
|
|
|
|
|
|
|
try: |
|
|
generator = pipeline(text, voice=args.voice, speed=1, split_pattern=r"\n+") |
|
|
|
|
|
for i, (gs, ps, audio) in enumerate(generator): |
|
|
filename = f"output_{i}.wav" |
|
|
sf.write(filename, audio, 24000) |
|
|
print(f"Playing segment {i}...") |
|
|
play_audio(filename) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error during generation: {e}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|