zero123 / app.py
oscnet
Deploy Zero123++
81a2e17
raw
history blame
4.81 kB
#!/usr/bin/env python3
"""
Zero123++ Gradio Demo
用于 Hugging Face Spaces 部署
"""
import gradio as gr
import torch
from PIL import Image
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
import os
# 全局变量存储 pipeline
pipeline = None
def load_model():
"""加载模型"""
global pipeline
if pipeline is not None:
return
print("正在加载模型...")
# 检查 CUDA 可用性
device = 'cuda' if torch.cuda.is_available() else 'cpu'
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
# 加载 pipeline
pipeline = DiffusionPipeline.from_pretrained(
"sudo-ai/zero123plus-v1.1",
custom_pipeline="sudo-ai/zero123plus-pipeline",
torch_dtype=dtype
)
# 设置调度器
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
pipeline.scheduler.config,
timestep_spacing='trailing'
)
pipeline.to(device)
print(f"✓ 模型加载完成 (设备: {device})")
def process_image(input_image, remove_bg=False):
"""
处理输入图像,生成多视图
输入:
input_image: PIL Image
remove_bg: 是否移除背景 (暂未实现)
输出:
result_image: PIL Image (多视图合成图)
"""
if input_image is None:
return None
# 确保模型已加载
load_model()
try:
# 预处理图像 - 转为正方形
img = input_image
# 如果不是正方形,裁剪为正方形
if img.size[0] != img.size[1]:
size = min(img.size)
img = img.crop((
(img.size[0] - size) // 2,
(img.size[1] - size) // 2,
(img.size[0] + size) // 2,
(img.size[1] + size) // 2
))
# 调整到推荐尺寸
target_size = 320
if img.size[0] != target_size:
img = img.resize((target_size, target_size), Image.LANCZOS)
# 运行推理
print("正在生成多视图...")
result = pipeline(img).images[0]
return result
except Exception as e:
print(f"错误: {e}")
raise gr.Error(f"处理失败: {str(e)}")
# 创建 Gradio 界面
def create_demo():
with gr.Blocks(title="Zero123++ Demo") as demo:
gr.Markdown("""
# Zero123++ 多视图生成
将单张图片转换为 6 个不同角度的视图
**输入要求:**
- 建议使用正方形图片
- 推荐分辨率 >= 320x320
- 脚本会自动裁剪和调整非正方形图片
**输出说明:**
- 生成 6 个视图 (2行 x 3列)
- 方位角: 30°, 90°, 150°, 210°, 270°, 330°
- 仰角: 30°, -20°, 30°, -20°, 30°, -20°
""")
with gr.Row():
with gr.Column():
input_image = gr.Image(
label="输入图片",
type="pil",
height=400
)
# remove_bg = gr.Checkbox(
# label="移除背景 (实验性)",
# value=False
# )
generate_btn = gr.Button("生成多视图", variant="primary")
with gr.Column():
output_image = gr.Image(
label="多视图输出",
type="pil",
height=400
)
gr.Examples(
examples=[
["examples/example1.png"],
["examples/example2.png"],
],
inputs=input_image,
label="示例图片"
)
gr.Markdown("""
### 技术说明
- 模型: [sudo-ai/zero123plus-v1.1](https://huggingface.co/sudo-ai/zero123plus-v1.1)
- 首次运行需要加载模型,可能需要 20-30 秒
- 生成时间约 30-60 秒 (取决于硬件)
### 引用
```bibtex
@misc{shi2023zero123plus,
title={Zero123++: a Single Image to Consistent Multi-view Diffusion Base Model},
author={Ruoxi Shi and Hansheng Chen and Zhuoyang Zhang and Minghua Liu and Chao Xu and Xinyue Wei and Linghao Chen and Chong Zeng and Hao Su},
year={2023},
eprint={2310.15110},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
""")
# 绑定事件
generate_btn.click(
fn=process_image,
inputs=[input_image],
outputs=output_image
)
return demo
if __name__ == "__main__":
# 预加载模型
load_model()
# 启动 demo
demo = create_demo()
demo.queue()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False
)