Spaces:

dn6
/

matrix-game-2

Runtime error

File size: 2,692 Bytes

f6ff21d
 
 
 
 
44bba6e
f6ff21d
 
44bba6e
f6ff21d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ceac0c
f6ff21d
 
a328e72
11a7dfe
eb033e5
ac3d493
f6ff21d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44bba6e
 
 
 
 
f6ff21d
 
44bba6e
f6ff21d
 
6ceac0c
f6ff21d
 
 
 
 
44bba6e
f6ff21d
 
44bba6e
f6ff21d
 
 
44bba6e
f6ff21d
a328e72
6ceac0c
f6ff21d
 
ac3d493

import torch
import spaces

import gradio as gr
from diffusers import ModularPipelineBlocks
from diffusers.utils import export_to_video
from diffusers.modular_pipelines import WanModularPipeline


class MatrixGameWanModularPipeline(WanModularPipeline):
    """
    A ModularPipeline for MatrixGameWan.

    <Tip warning={true}>

        This is an experimental feature and is likely to change in the future.

    </Tip>
    """

    @property
    def default_sample_height(self):
        return 44

    @property
    def default_sample_width(self):
        return 80


blocks = ModularPipelineBlocks.from_pretrained("diffusers/matrix-game-2-modular", trust_remote_code=True)
image_to_action_block = ModularPipelineBlocks.from_pretrained("dn6/matrix-game-image-to-action", trust_remote_code=True)

blocks.sub_blocks.insert("image_to_action", image_to_action_block, 0)

pipe = MatrixGameWanModularPipeline(blocks, "diffusers/matrix-game-2-modular")
pipe.load_components(trust_remote_code=True, device_map="cuda", torch_dtype={"default": torch.bfloat16, "vae": torch.float32})

@spaces.GPU(duration=120)
def predict(image, prompt):
    output = pipe(image=image, prompt=prompt, num_frames=81)
    return export_to_video(output.values['videos'][0], "output.mp4")

examples = []

css = """
#col-container {
    margin: 0 auto;
    max-width: 1024px;
}
#logo-title {
    text-align: center;
}
#logo-title img {
    width: 400px;
}
#edit_text{margin-top: -62px !important}
"""
with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(
            "Image to Video [Modular Diffusers](https://huggingface.co/docs/diffusers/v0.35.1/en/modular_diffusers/overview) "
            "Pipeline that uses Qwen 2.5 VL 72B Instruct and [Matrix Game 2.0](https://huggingface.co/Skywork/Matrix-Game-2.0) "
            "to allow you to move through images. Works best for scenes with static elements"
        )
        with gr.Row():
            with gr.Column():
                image = gr.Image(label="Input Image", show_label=False, type="pil", interactive=True)

            with gr.Column():
                result = gr.Video(label="Result")

        with gr.Row():
            prompt = gr.Text(
                    label="Prompt",
                    show_label=False,
                    placeholder="describe how you would like to move in the image",
                    container=False,
            )
        with gr.Row():
            run_button = gr.Button("Run!", variant="primary")

    gr.on(
        triggers=[run_button.click],
        fn=predict,
        inputs=[image, prompt],
        outputs=[result],
    )

if __name__ == "__main__":
    demo.launch()