import sys import os import soundfile as sf from kokoro import KPipeline import subprocess import argparse def play_audio(file_path): """Plays audio using the macOS afplay command.""" try: subprocess.run(["afplay", file_path], check=True) except FileNotFoundError: print("Error: 'afplay' command not found. Are you on macOS?") except Exception as e: print(f"Error playing audio: {e}") def say(text: str, voice: str = "af_heart", play: bool = True): lang = "a" try: print(f"Initializing pipeline for language '{lang}'...") pipeline = KPipeline(lang_code=lang) except Exception as e: print(f"Failed to initialize KPipeline: {e}") print( "Note: Japanese ('j') and Chinese ('z') require extra dependencies: pip install 'misaki[ja]' or 'misaki[zh]'" ) sys.exit(1) print(f"Generating audio with voice '{voice}'...") # Generate audio try: generator = pipeline(text, voice=voice, speed=1, split_pattern=r"\n+") for i, (gs, ps, audio) in enumerate(generator): filename = f"output_{i}.wav" sf.write(filename, audio, 24000) if play: print(f"Playing segment {i}...") play_audio(filename) return filename except Exception as e: print(f"Error during generation: {e}") return None def main(): parser = argparse.ArgumentParser(description="Kokoro TTS Generator") parser.add_argument("text", nargs="*", help="Text to speak") parser.add_argument( "-l", "--lang", default="a", help="Language code (a=Am. English, b=Br. English, e=Spanish, f=French, h=Hindi, i=Italian, j=Japanese, p=Portuguese, z=Chinese)", ) parser.add_argument( "-v", "--voice", default="af_heart", help="Voice ID (default: af_heart)" ) args = parser.parse_args() # Combine text arguments or read from stdin if args.text: text = " ".join(args.text) else: print("Enter text to speak (Ctrl+D to finish):") try: text = sys.stdin.read() except KeyboardInterrupt: print("\nExiting.") sys.exit(0) if not text.strip(): print("No text provided.") return # Initialize pipeline try: print(f"Initializing pipeline for language '{args.lang}'...") pipeline = KPipeline(lang_code=args.lang) except Exception as e: print(f"Failed to initialize KPipeline: {e}") print( "Note: Japanese ('j') and Chinese ('z') require extra dependencies: pip install 'misaki[ja]' or 'misaki[zh]'" ) sys.exit(1) print(f"Generating audio with voice '{args.voice}'...") # Generate audio try: generator = pipeline(text, voice=args.voice, speed=1, split_pattern=r"\n+") for i, (gs, ps, audio) in enumerate(generator): filename = f"output_{i}.wav" sf.write(filename, audio, 24000) print(f"Playing segment {i}...") play_audio(filename) except Exception as e: print(f"Error during generation: {e}") if __name__ == "__main__": main()