import gradio as gr from diffusers import StableDiffusionControlNetPipeline, ControlNetModel from transformers import DPTFeatureExtractor, DPTForDepthEstimation import torch from PIL import Image import numpy as np import cv2 #Detect the GPU or fallback to CPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #load ControlNet Model (Depth conditioning) controlnet = ControlNetModel.from_pretrained( "lllyasviel/sd-controlnet-depth", torch_dtype = torch.float32 ).to(device) #load the Stable diffusion model pipe = StableDiffusionControlNetPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float32 ).to(device) #Load the depth estimation model depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device) depth_processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas") #create a function to generate def generate(input_image, prompt): # convert the image to depth inputs image = input_image.convert("RGB") # prepare the depth inputs inputs = depth_processor(images=image, return_tensors="pt").to(device) with torch.no_grad(): outputs = depth_model(**inputs) depth= outputs.predicted_depth.squeeze().cpu().numpy() # prepare the depth map for ControlNet - Normalize the depth to 0-255 and to convert to grayscale PIL depth = cv2.normalize(depth, None, 0, 255, norm_type=cv2.NORM_MINMAX) depth_image = Image.fromarray(depth.astype(np.uint8)) # Run Image generations result = pipe( prompt=prompt, image=depth_image, height=512, width=512, num_inference_steps=10 ).images[0] return result # Gradio interface gr.Interface( fn=generate, inputs=[ gr.Image(type="pil", label="Upload Room Image"), gr.Textbox(label="Enter Interior Style Prompt", placeholder="e.g. modern Japanese living room "), ], outputs = gr.Image(type="pil",label="Generated Room"), title="AI Room Redesign (ControlNet + Depth) 🏠💡", description="Upload a room image and get a redesigned version based on your style prompt using ControlNet Depth.", ).launch()