Spaces:
Runtime error
Runtime error
| import os | |
| import gc | |
| import torch | |
| import shutil | |
| import atexit | |
| import torchaudio | |
| import numpy as np | |
| import gradio as gr | |
| from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline | |
| os.environ["CUDA_VISIBLE_DEVICES"] = "6" | |
| # Initialize AudioLDM2 Pipeline | |
| torch.cuda.set_device(0) | |
| dtype = torch.float32 | |
| pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=dtype) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| pipeline.to(device) | |
| def morph_audio(audio_file1, audio_file2, num_inference_steps, prompt1='', prompt2='', negative_prompt1="Low quality", negative_prompt2="Low quality"): | |
| save_lora_dir = "output" | |
| if os.path.exists(save_lora_dir): | |
| shutil.rmtree(save_lora_dir) | |
| os.makedirs(save_lora_dir, exist_ok=True) | |
| # Load audio and compute duration | |
| waveform1, sample_rate1 = torchaudio.load(audio_file1) | |
| duration1 = waveform1.shape[1] / sample_rate1 | |
| waveform2, sample_rate2 = torchaudio.load(audio_file2) | |
| duration2 = waveform2.shape[1] / sample_rate2 | |
| # Compare durations and take the shorter one | |
| duration = int(min(duration1, duration2)) | |
| # Perform morphing using the pipeline | |
| _ = pipeline( | |
| dtype = dtype, | |
| audio_file=audio_file1, | |
| audio_file2=audio_file2, | |
| audio_length_in_s=duration, | |
| time_pooling=2, | |
| freq_pooling=2, | |
| prompt_1=prompt1, | |
| prompt_2=prompt2, | |
| negative_prompt_1=negative_prompt1, | |
| negative_prompt_2=negative_prompt2, | |
| save_lora_dir=save_lora_dir, | |
| use_adain=True, | |
| use_reschedule=False, | |
| num_inference_steps=num_inference_steps, | |
| lamd=0.6, | |
| output_path=save_lora_dir, | |
| num_frames=5, | |
| fix_lora=None, | |
| use_lora=True, | |
| lora_steps=2, | |
| noisy_latent_with_lora=True, | |
| morphing_with_lora=True, | |
| use_morph_prompt=True, | |
| guidance_scale=7.5, | |
| ) | |
| # Collect the output file paths | |
| output_paths = sorted( | |
| [os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")], | |
| key=lambda x: int(os.path.splitext(os.path.basename(x))[0]) | |
| ) | |
| del waveform1, waveform2, _ | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| return output_paths | |
| def morph_audio_with_morphing_factor(audio_file1, audio_file2, alpha,num_inference_steps, prompt1='', prompt2='', negative_prompt1="Low quality", negative_prompt2="Low quality"): | |
| save_lora_dir = "output" | |
| if os.path.exists(save_lora_dir): | |
| shutil.rmtree(save_lora_dir) | |
| os.makedirs(save_lora_dir, exist_ok=True) | |
| # Load audio and compute duration | |
| waveform1, sample_rate1 = torchaudio.load(audio_file1) | |
| duration1 = waveform1.shape[1] / sample_rate1 | |
| waveform2, sample_rate2 = torchaudio.load(audio_file2) | |
| duration2 = waveform2.shape[1] / sample_rate2 | |
| # Compare durations and take the shorter one | |
| duration = int(min(duration1, duration2)) | |
| try: | |
| # Perform morphing using the pipeline | |
| _ = pipeline( | |
| dtype = dtype, | |
| morphing_factor = alpha, | |
| audio_file=audio_file1, | |
| audio_file2=audio_file2, | |
| audio_length_in_s=duration, | |
| time_pooling=2, | |
| freq_pooling=2, | |
| prompt_1=prompt1, | |
| prompt_2=prompt2, | |
| negative_prompt_1=negative_prompt1, | |
| negative_prompt_2=negative_prompt2, | |
| save_lora_dir=save_lora_dir, | |
| use_adain=True, | |
| use_reschedule=False, | |
| num_inference_steps=num_inference_steps, | |
| lamd=0.6, | |
| output_path=save_lora_dir, | |
| num_frames=5, | |
| fix_lora=None, | |
| use_lora=True, | |
| lora_steps=2, | |
| noisy_latent_with_lora=True, | |
| morphing_with_lora=True, | |
| use_morph_prompt=True, | |
| guidance_scale=7.5, | |
| ) | |
| output_paths = os.path.join(save_lora_dir, 'interpolated.wav') | |
| except RuntimeError as e: | |
| if "CUDA out of memory" in str(e): | |
| print("CUDA out of memory. Releasing unused memory...") | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| raise e | |
| # # Collect the output file paths | |
| # del waveform1, waveform2, _ | |
| # torch.cuda.empty_cache() | |
| # gc.collect() | |
| return output_paths | |
| def cleanup_output_dir(): | |
| save_lora_dir = "output" | |
| if os.path.exists(save_lora_dir): | |
| shutil.rmtree(save_lora_dir) | |
| print(f"Cleaned up directory: {save_lora_dir}") | |
| atexit.register(cleanup_output_dir) | |
| # Gradio interface function | |
| def interface(audio1, audio2, alpha, num_inference_steps): | |
| output_paths = morph_audio_with_morphing_factor(audio1, audio2, alpha, num_inference_steps) | |
| return output_paths | |
| # Gradio Interface | |
| # demo = gr.Interface( | |
| # fn=interface, | |
| # inputs=[ | |
| # gr.Audio(label="Upload Audio File 1", type="filepath"), | |
| # gr.Audio(label="Upload Audio File 2", type="filepath"), | |
| # gr.Slider(0, 1, step=0.01, label="Interpolation Alpha"), | |
| # gr.Slider(10, 50, step=1, label="Inference Steps"), | |
| # # gr.Textbox(label="Prompt for Audio File 1"), | |
| # # gr.Textbox(label="Prompt for Audio File 2"), | |
| # ], | |
| # outputs=gr.Audio(label="Interpolated Audio") | |
| # ) | |
| with gr.Blocks() as demo: | |
| with gr.Tab("Sound Morphing with fixed frames."): | |
| gr.Markdown("### Upload two audio files for morphing") | |
| with gr.Row(): | |
| audio1 = gr.Audio(label="Upload Audio File 1", type="filepath") | |
| audio2 = gr.Audio(label="Upload Audio File 2", type="filepath") | |
| num_inference_steps = gr.Slider(10, 50, step=1, label="Inference Steps", value=50) | |
| outputs = [ | |
| gr.Audio(label="Morphing audio 1"), | |
| gr.Audio(label="Morphing audio 2"), | |
| gr.Audio(label="Morphing audio 3"), | |
| gr.Audio(label="Morphing audio 4"), | |
| gr.Audio(label="Morphing audio 5"), | |
| ] | |
| submit_btn1 = gr.Button("Submit") | |
| submit_btn1.click(morph_audio, inputs=[audio1, audio2, num_inference_steps], outputs=outputs) | |
| with gr.Tab("Sound Morphing with specified morphing factor."): | |
| gr.Markdown("### Upload two audio files for morphing") | |
| with gr.Row(): | |
| audio1 = gr.Audio(label="Upload Audio File 1", type="filepath") | |
| audio2 = gr.Audio(label="Upload Audio File 2", type="filepath") | |
| alpha = gr.Slider(0, 1, step=0.01, label="Interpolation Alpha") | |
| num_inference_steps = gr.Slider(10, 50, step=1, label="Inference Steps", value=50) | |
| outputs=gr.Audio(label="Interpolated Audio") | |
| submit_btn2 = gr.Button("Submit") | |
| submit_btn2.click(morph_audio_with_morphing_factor, inputs=[audio1, audio2, alpha, num_inference_steps], outputs=outputs) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) |