Spaces:

DennisHung
/

DiffmorpherXAp-adapter

Runtime error

CSH-1220

File updates regarding memory-saving

117486a 11 months ago

6.92 kB

	import os
	import gc
	import torch
	import shutil
	import atexit
	import torchaudio
	import numpy as np
	import gradio as gr
	from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline
	os.environ["CUDA_VISIBLE_DEVICES"] = "6"
	# Initialize AudioLDM2 Pipeline
	torch.cuda.set_device(0)
	dtype = torch.float32
	pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=dtype)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	pipeline.to(device)


	def morph_audio(audio_file1, audio_file2, num_inference_steps, prompt1='', prompt2='', negative_prompt1="Low quality", negative_prompt2="Low quality"):
	save_lora_dir = "output"
	if os.path.exists(save_lora_dir):
	shutil.rmtree(save_lora_dir)
	os.makedirs(save_lora_dir, exist_ok=True)

	# Load audio and compute duration
	waveform1, sample_rate1 = torchaudio.load(audio_file1)
	duration1 = waveform1.shape[1] / sample_rate1
	waveform2, sample_rate2 = torchaudio.load(audio_file2)
	duration2 = waveform2.shape[1] / sample_rate2

	# Compare durations and take the shorter one
	duration = int(min(duration1, duration2))

	# Perform morphing using the pipeline
	_ = pipeline(
	dtype = dtype,
	audio_file=audio_file1,
	audio_file2=audio_file2,
	audio_length_in_s=duration,
	time_pooling=2,
	freq_pooling=2,
	prompt_1=prompt1,
	prompt_2=prompt2,
	negative_prompt_1=negative_prompt1,
	negative_prompt_2=negative_prompt2,
	save_lora_dir=save_lora_dir,
	use_adain=True,
	use_reschedule=False,
	num_inference_steps=num_inference_steps,
	lamd=0.6,
	output_path=save_lora_dir,
	num_frames=5,
	fix_lora=None,
	use_lora=True,
	lora_steps=2,
	noisy_latent_with_lora=True,
	morphing_with_lora=True,
	use_morph_prompt=True,
	guidance_scale=7.5,
	)

	# Collect the output file paths
	output_paths = sorted(
	[os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")],
	key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
	)
	del waveform1, waveform2, _
	torch.cuda.empty_cache()
	gc.collect()


	return output_paths

	def morph_audio_with_morphing_factor(audio_file1, audio_file2, alpha,num_inference_steps, prompt1='', prompt2='', negative_prompt1="Low quality", negative_prompt2="Low quality"):
	save_lora_dir = "output"
	if os.path.exists(save_lora_dir):
	shutil.rmtree(save_lora_dir)
	os.makedirs(save_lora_dir, exist_ok=True)

	# Load audio and compute duration
	waveform1, sample_rate1 = torchaudio.load(audio_file1)
	duration1 = waveform1.shape[1] / sample_rate1
	waveform2, sample_rate2 = torchaudio.load(audio_file2)
	duration2 = waveform2.shape[1] / sample_rate2

	# Compare durations and take the shorter one
	duration = int(min(duration1, duration2))
	try:
	# Perform morphing using the pipeline
	_ = pipeline(
	dtype = dtype,
	morphing_factor = alpha,
	audio_file=audio_file1,
	audio_file2=audio_file2,
	audio_length_in_s=duration,
	time_pooling=2,
	freq_pooling=2,
	prompt_1=prompt1,
	prompt_2=prompt2,
	negative_prompt_1=negative_prompt1,
	negative_prompt_2=negative_prompt2,
	save_lora_dir=save_lora_dir,
	use_adain=True,
	use_reschedule=False,
	num_inference_steps=num_inference_steps,
	lamd=0.6,
	output_path=save_lora_dir,
	num_frames=5,
	fix_lora=None,
	use_lora=True,
	lora_steps=2,
	noisy_latent_with_lora=True,
	morphing_with_lora=True,
	use_morph_prompt=True,
	guidance_scale=7.5,
	)
	output_paths = os.path.join(save_lora_dir, 'interpolated.wav')

	except RuntimeError as e:
	if "CUDA out of memory" in str(e):
	print("CUDA out of memory. Releasing unused memory...")
	torch.cuda.empty_cache()
	gc.collect()
	raise e
	# # Collect the output file paths
	# del waveform1, waveform2, _
	# torch.cuda.empty_cache()
	# gc.collect()

	return output_paths

	def cleanup_output_dir():
	save_lora_dir = "output"
	if os.path.exists(save_lora_dir):
	shutil.rmtree(save_lora_dir)
	print(f"Cleaned up directory: {save_lora_dir}")
	atexit.register(cleanup_output_dir)

	# Gradio interface function
	def interface(audio1, audio2, alpha, num_inference_steps):
	output_paths = morph_audio_with_morphing_factor(audio1, audio2, alpha, num_inference_steps)
	return output_paths

	# Gradio Interface
	# demo = gr.Interface(
	# fn=interface,
	# inputs=[
	# gr.Audio(label="Upload Audio File 1", type="filepath"),
	# gr.Audio(label="Upload Audio File 2", type="filepath"),
	# gr.Slider(0, 1, step=0.01, label="Interpolation Alpha"),
	# gr.Slider(10, 50, step=1, label="Inference Steps"),
	# # gr.Textbox(label="Prompt for Audio File 1"),
	# # gr.Textbox(label="Prompt for Audio File 2"),
	# ],
	# outputs=gr.Audio(label="Interpolated Audio")
	# )


	with gr.Blocks() as demo:
	with gr.Tab("Sound Morphing with fixed frames."):
	gr.Markdown("### Upload two audio files for morphing")
	with gr.Row():
	audio1 = gr.Audio(label="Upload Audio File 1", type="filepath")
	audio2 = gr.Audio(label="Upload Audio File 2", type="filepath")
	num_inference_steps = gr.Slider(10, 50, step=1, label="Inference Steps", value=50)
	outputs = [
	gr.Audio(label="Morphing audio 1"),
	gr.Audio(label="Morphing audio 2"),
	gr.Audio(label="Morphing audio 3"),
	gr.Audio(label="Morphing audio 4"),
	gr.Audio(label="Morphing audio 5"),
	]
	submit_btn1 = gr.Button("Submit")
	submit_btn1.click(morph_audio, inputs=[audio1, audio2, num_inference_steps], outputs=outputs)

	with gr.Tab("Sound Morphing with specified morphing factor."):
	gr.Markdown("### Upload two audio files for morphing")
	with gr.Row():
	audio1 = gr.Audio(label="Upload Audio File 1", type="filepath")
	audio2 = gr.Audio(label="Upload Audio File 2", type="filepath")
	alpha = gr.Slider(0, 1, step=0.01, label="Interpolation Alpha")
	num_inference_steps = gr.Slider(10, 50, step=1, label="Inference Steps", value=50)
	outputs=gr.Audio(label="Interpolated Audio")
	submit_btn2 = gr.Button("Submit")
	submit_btn2.click(morph_audio_with_morphing_factor, inputs=[audio1, audio2, alpha, num_inference_steps], outputs=outputs)

	if __name__ == "__main__":
	demo.launch(share=True)