#!/usr/bin/env python3 """ Zero123++ Gradio Demo 用于 Hugging Face Spaces 部署 """ import gradio as gr import torch from PIL import Image from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler import os # 全局变量存储 pipeline pipeline = None def load_model(): """加载模型""" global pipeline if pipeline is not None: return print("正在加载模型...") # 检查 CUDA 可用性 device = 'cuda' if torch.cuda.is_available() else 'cpu' dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # 加载 pipeline pipeline = DiffusionPipeline.from_pretrained( "sudo-ai/zero123plus-v1.1", custom_pipeline="sudo-ai/zero123plus-pipeline", torch_dtype=dtype ) # 设置调度器 pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config( pipeline.scheduler.config, timestep_spacing='trailing' ) pipeline.to(device) print(f"✓ 模型加载完成 (设备: {device})") def process_image(input_image, remove_bg=False): """ 处理输入图像,生成多视图 输入: input_image: PIL Image remove_bg: 是否移除背景 (暂未实现) 输出: result_image: PIL Image (多视图合成图) """ if input_image is None: return None # 确保模型已加载 load_model() try: # 预处理图像 - 转为正方形 img = input_image # 如果不是正方形,裁剪为正方形 if img.size[0] != img.size[1]: size = min(img.size) img = img.crop(( (img.size[0] - size) // 2, (img.size[1] - size) // 2, (img.size[0] + size) // 2, (img.size[1] + size) // 2 )) # 调整到推荐尺寸 target_size = 320 if img.size[0] != target_size: img = img.resize((target_size, target_size), Image.LANCZOS) # 运行推理 print("正在生成多视图...") result = pipeline(img).images[0] return result except Exception as e: print(f"错误: {e}") raise gr.Error(f"处理失败: {str(e)}") # 创建 Gradio 界面 def create_demo(): with gr.Blocks(title="Zero123++ Demo") as demo: gr.Markdown(""" # Zero123++ 多视图生成 将单张图片转换为 6 个不同角度的视图 **输入要求:** - 建议使用正方形图片 - 推荐分辨率 >= 320x320 - 脚本会自动裁剪和调整非正方形图片 **输出说明:** - 生成 6 个视图 (2行 x 3列) - 方位角: 30°, 90°, 150°, 210°, 270°, 330° - 仰角: 30°, -20°, 30°, -20°, 30°, -20° """) with gr.Row(): with gr.Column(): input_image = gr.Image( label="输入图片", type="pil", height=400 ) # remove_bg = gr.Checkbox( # label="移除背景 (实验性)", # value=False # ) generate_btn = gr.Button("生成多视图", variant="primary") with gr.Column(): output_image = gr.Image( label="多视图输出", type="pil", height=400 ) gr.Examples( examples=[ ["examples/example1.png"], ["examples/example2.png"], ], inputs=input_image, label="示例图片" ) gr.Markdown(""" ### 技术说明 - 模型: [sudo-ai/zero123plus-v1.1](https://huggingface.co/sudo-ai/zero123plus-v1.1) - 首次运行需要加载模型,可能需要 20-30 秒 - 生成时间约 30-60 秒 (取决于硬件) ### 引用 ```bibtex @misc{shi2023zero123plus, title={Zero123++: a Single Image to Consistent Multi-view Diffusion Base Model}, author={Ruoxi Shi and Hansheng Chen and Zhuoyang Zhang and Minghua Liu and Chao Xu and Xinyue Wei and Linghao Chen and Chong Zeng and Hao Su}, year={2023}, eprint={2310.15110}, archivePrefix={arXiv}, primaryClass={cs.CV} } ``` """) # 绑定事件 generate_btn.click( fn=process_image, inputs=[input_image], outputs=output_image ) return demo if __name__ == "__main__": # 预加载模型 load_model() # 启动 demo demo = create_demo() demo.queue() demo.launch( server_name="0.0.0.0", server_port=7860, share=False )