File size: 2,220 Bytes
c41796e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4cbbea
c41796e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b92dfc
c41796e
 
dfe7da3
c41796e
 
dfe7da3
ba4650b
c41796e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
import torch
from PIL import Image
import numpy as np
import cv2

#Detect the GPU or fallback to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


#load ControlNet Model (Depth conditioning)

controlnet = ControlNetModel.from_pretrained(
    "lllyasviel/sd-controlnet-depth", torch_dtype = torch.float32
).to(device)


#load the Stable diffusion model

pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet,
    torch_dtype=torch.float32
).to(device)

#Load the depth estimation model

depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
depth_processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")

#create a function to generate 

def generate(input_image, prompt):
    # convert the image to depth inputs
    image = input_image.convert("RGB")

    # prepare the depth inputs
    inputs = depth_processor(images=image, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = depth_model(**inputs)
        depth= outputs.predicted_depth.squeeze().cpu().numpy()

    # prepare the depth map for ControlNet - Normalize the depth to 0-255 and to convert to grayscale PIL
    depth = cv2.normalize(depth, None, 0, 255, norm_type=cv2.NORM_MINMAX)
    depth_image = Image.fromarray(depth.astype(np.uint8))


    # Run Image generations
    result = pipe(
        prompt=prompt,
        image=depth_image,
        height=512,
        width=512,
        num_inference_steps=10
    ).images[0]

    return result

# Gradio interface
gr.Interface(
    fn=generate,
    inputs=[
        gr.Image(type="pil", label="Upload Room Image"),
        gr.Textbox(label="Enter Interior Style Prompt", placeholder="e.g. modern Japanese living room "),
    ],
    outputs = gr.Image(type="pil",label="Generated Room"),
    title="AI Room Redesign (ControlNet + Depth) πŸ πŸ’‘",
    description="Upload a room image and get a redesigned version based on your style prompt using ControlNet Depth.",
).launch()