import spaces import gradio as gr import os import pathlib import random import torch import numpy as np from huggingface_hub import snapshot_download from vibe.editor import ImageEditor MAX_SEED = np.iinfo(np.int32).max # --- Loading Pipelines --- def load_original_pipeline(): print("Loading Original Model...") model_path = snapshot_download( repo_id="iitolstykh/VIBE-Image-Edit", repo_type="model", ) # Load model with default guidance settings for the original editor = ImageEditor( checkpoint_path=model_path, image_guidance_scale=1.2, guidance_scale=4.5, num_inference_steps=20, device="cuda", ) print(f"Original Model loaded. Device: {editor.pipe.device}") return editor def load_distilled_pipeline(): print("Loading Distilled CFG Model...") model_path = snapshot_download( repo_id="iitolstykh/VIBE-Image-Edit-DistilledCFG", repo_type="model", ) # Load model with disabled cfg. editor = ImageEditor( checkpoint_path=model_path, num_inference_steps=20, guidance_scale=0.0, image_guidance_scale=0.0, device="cuda", ) print(f"Distilled Model loaded. Device: {editor.pipe.device}") return editor # Initialize pipelines globally pipeline_original = load_original_pipeline() pipeline_distilled = load_distilled_pipeline() # --- Inference Functions --- @spaces.GPU(duration=180) def edit_img_original( pil_image, edit_prompt: str, sample_steps, scale, image_guidance_scale, seed, progress=gr.Progress(track_tqdm=True), ): """Inference for the original model with CFG.""" if pil_image is None: raise gr.Error("Please upload an image.") edited_image = pipeline_original.generate_edited_image( instruction=edit_prompt, conditioning_image=pil_image, num_images_per_prompt=1, num_inference_steps=sample_steps, guidance_scale=scale, image_guidance_scale=image_guidance_scale, seed=seed, ) return edited_image[0] @spaces.GPU(duration=120) def edit_img_distilled( pil_image, edit_prompt: str, sample_steps, seed, progress=gr.Progress(track_tqdm=True), ): """Inference for the distilled model (No CFG).""" if pil_image is None: raise gr.Error("Please upload an image.") # Note: No guidance_scale or image_guidance_scale passed edited_image = pipeline_distilled.generate_edited_image( instruction=edit_prompt, conditioning_image=pil_image, num_images_per_prompt=1, num_inference_steps=sample_steps, guidance_scale=0.0, image_guidance_scale=0.0, seed=seed, ) return edited_image[0] @spaces.GPU(duration=180) def gen_img( prompt: str, height: int, width: int, sample_steps: int, scale: float, seed: int, progress=gr.Progress(track_tqdm=True), ): """Text-to-Image using the original model.""" generated_images = pipeline_original.generate_edited_image( instruction=prompt, num_images_per_prompt=1, t2i_height=height, t2i_width=width, num_inference_steps=sample_steps, guidance_scale=scale, seed=seed, ) return generated_images[0] # --- UI Construction --- if __name__ == "__main__": # Pre-define examples image_dir = pathlib.Path('images') if not image_dir.exists(): # Fallback if local images dir doesn't exist, though usually it should in the space os.makedirs('images', exist_ok=True) # [Image, Prompt, Steps, Guid_Scale, Img_Guid_Scale, Seed] edit_examples_original = [[path.as_posix(), "let this case swim in the river", 20, 4.5, 1.2, 42] for path in sorted(image_dir.glob('*.png'))] # [Image, Prompt, Steps, Seed] - Subset for distilled edit_examples_distilled = [[path.as_posix(), "let this case swim in the river", 20, 42] for path in sorted(image_dir.glob('*.png'))] # [Prompt, H, W, Steps, Scale, Seed] gen_examples = [["View through the clouds at Earth from a plane", 512, 1024, 20, 6.5, 234]] with gr.Blocks() as demo: gr.Markdown("# VIBE: Visual Instruction Based Editor") with gr.Tabs(): # --- TAB 1: Original Image Editing --- with gr.Tab(label="Image Editing"): gr.Markdown("### Image Editing demo for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit") with gr.Row(): with gr.Column(): edit_input_image = gr.Image(label="Input", type="pil") edit_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt (e.g., 'Add a cat on the sofa')") with gr.Accordion("Advanced Settings", open=True): edit_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1) edit_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=4.5, step=0.1) edit_image_guidance_scale = gr.Slider(label="Image Guidance Scale", minimum=0.1, maximum=30.0, value=1.2, step=0.1) edit_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42) edit_btn = gr.Button("Edit Image", variant="primary") with gr.Column(): edit_output = gr.Image(label="Result", type="pil") gr.Examples( examples=edit_examples_original, inputs=[edit_input_image, edit_prompt, edit_sample_steps, edit_guidance_scale, edit_image_guidance_scale, edit_seed], ) edit_btn.click( fn=edit_img_original, inputs=[edit_input_image, edit_prompt, edit_sample_steps, edit_guidance_scale, edit_image_guidance_scale, edit_seed], outputs=[edit_output], ) # --- TAB 2: Distilled Image Editing --- with gr.Tab(label="Image Editing [CFG Distill]"): gr.Markdown("### Image Editing demo for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit-DistilledCFG") gr.Markdown("*This model runs without CFG, providing faster inference.*") with gr.Row(): with gr.Column(): distill_input_image = gr.Image(label="Input", type="pil") distill_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt") with gr.Accordion("Advanced Settings", open=True): distill_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1) # No Guidance Sliders here distill_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42) distill_btn = gr.Button("Edit Image (Fast)", variant="primary") with gr.Column(): distill_output = gr.Image(label="Result", type="pil") gr.Examples( examples=edit_examples_distilled, inputs=[distill_input_image, distill_prompt, distill_sample_steps, distill_seed], ) distill_btn.click( fn=edit_img_distilled, inputs=[distill_input_image, distill_prompt, distill_sample_steps, distill_seed], outputs=[distill_output], ) # --- TAB 3: Text to Image --- with gr.Tab(label="Image Generation"): gr.Markdown("### Text-to-image demo for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit") with gr.Row(): with gr.Column(): gen_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt") with gr.Accordion("Advanced Settings", open=True): gen_height = gr.Slider(label="Height", minimum=64, maximum=2048, value=1024, step=64) gen_width = gr.Slider(label="Width", minimum=64, maximum=2048, value=1024, step=64) gen_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1) gen_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=6.5, step=0.1) gen_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42) gen_btn = gr.Button("Generate Image", variant="primary") with gr.Column(): gen_output = gr.Image(label="Result", type="pil") gr.Examples( examples=gen_examples, inputs=[gen_prompt, gen_height, gen_width, gen_sample_steps, gen_guidance_scale, gen_seed], ) gen_btn.click( fn=gen_img, inputs=[gen_prompt, gen_height, gen_width, gen_sample_steps, gen_guidance_scale, gen_seed], outputs=[gen_output], ) demo.queue(max_size=100).launch()