import spaces import gradio as gr import torch from TTS.api import TTS import os from pydub import AudioSegment import re os.makedirs("audio", exist_ok=True) # Agree to Coqui TTS license os.environ["COQUI_TOS_AGREED"] = "1" # Auto-detect device device = "cuda" if torch.cuda.is_available() else "cpu" tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) # Function for long text voice cloning @spaces.GPU(enable_queue=True) def clone(text, audio): # Split input into sentences/phrases sentences = re.split(r'(?<=[.!?]) +', text) final_audio = AudioSegment.silent(duration=0) for i, chunk in enumerate(sentences): if not chunk.strip(): continue temp_path = f"chunk_{i}.wav" tts.tts_to_file( text=chunk, speaker_wav=audio, language="en", file_path=temp_path ) final_audio += AudioSegment.from_wav(temp_path) # Merge chunks into one file output_path = "./output.wav" final_audio.export(output_path, format="wav") return output_path # UI with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan", neutral_hue="slate")) as demo: # Custom CSS gr.HTML(""" """) with gr.Row(): with gr.Column(scale=1): gr.Markdown( """ # 🎙️ Voice Clone Studio By Tahir Turk Clone any voice by uploading a short reference audio file and typing what you want it to say. **Powered by XTTS v2 — multilingual voice cloning.** """ ) text_input = gr.Textbox( label="Enter your text", placeholder="Type anything you'd like the cloned voice to say...", lines=6 ) audio_input = gr.Audio( type="filepath", label="Upload voice reference (WAV or MP3)" ) submit_btn = gr.Button("✨ Generate Voice", variant="primary") with gr.Column(scale=1): output_audio = gr.Audio(type="filepath", label="🔊 Generated Voice Output") gr.Markdown( """ --- ⚡ **Tips for Best Results** - Use a **clean, clear** reference audio (5–15 seconds works best). - Long text will be split automatically for natural speech. - You can generate **minutes of audio** now without cutoff. --- """ ) with gr.Row(): gr.Examples( examples=[ ["Hey! It's me Dorthy, from the Wizard of Oz. Type in whatever you'd like me to say.", "./audio/Wizard-of-Oz-Dorthy.wav"], ["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.", "./audio/Godfather.wav"], ["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.", "./audio/Paris-Hilton.mp3"], ["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.", "./audio/Megan-Fox.mp3"], ["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.", "./audio/Jeff-Goldblum.mp3"], ["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.", "./audio/Heath-Ledger.mp3"], ], inputs=[text_input, audio_input], outputs=[output_audio], label="🎭 Try with these sample voices" ) submit_btn.click(fn=clone, inputs=[text_input, audio_input], outputs=output_audio) demo.launch()