|
|
|
|
|
""" |
|
|
Zero123++ Gradio Demo |
|
|
用于 Hugging Face Spaces 部署 |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
from PIL import Image |
|
|
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler |
|
|
import os |
|
|
|
|
|
|
|
|
pipeline = None |
|
|
|
|
|
def load_model(): |
|
|
"""加载模型""" |
|
|
global pipeline |
|
|
|
|
|
if pipeline is not None: |
|
|
return |
|
|
|
|
|
print("正在加载模型...") |
|
|
|
|
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
dtype = torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
|
|
|
|
|
|
pipeline = DiffusionPipeline.from_pretrained( |
|
|
"sudo-ai/zero123plus-v1.1", |
|
|
custom_pipeline="sudo-ai/zero123plus-pipeline", |
|
|
torch_dtype=dtype |
|
|
) |
|
|
|
|
|
|
|
|
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config( |
|
|
pipeline.scheduler.config, |
|
|
timestep_spacing='trailing' |
|
|
) |
|
|
|
|
|
pipeline.to(device) |
|
|
print(f"✓ 模型加载完成 (设备: {device})") |
|
|
|
|
|
def process_image(input_image, remove_bg=False): |
|
|
""" |
|
|
处理输入图像,生成多视图 |
|
|
|
|
|
输入: |
|
|
input_image: PIL Image |
|
|
remove_bg: 是否移除背景 (暂未实现) |
|
|
|
|
|
输出: |
|
|
result_image: PIL Image (多视图合成图) |
|
|
""" |
|
|
if input_image is None: |
|
|
return None |
|
|
|
|
|
|
|
|
load_model() |
|
|
|
|
|
try: |
|
|
|
|
|
img = input_image |
|
|
|
|
|
|
|
|
if img.size[0] != img.size[1]: |
|
|
size = min(img.size) |
|
|
img = img.crop(( |
|
|
(img.size[0] - size) // 2, |
|
|
(img.size[1] - size) // 2, |
|
|
(img.size[0] + size) // 2, |
|
|
(img.size[1] + size) // 2 |
|
|
)) |
|
|
|
|
|
|
|
|
target_size = 320 |
|
|
if img.size[0] != target_size: |
|
|
img = img.resize((target_size, target_size), Image.LANCZOS) |
|
|
|
|
|
|
|
|
print("正在生成多视图...") |
|
|
result = pipeline(img).images[0] |
|
|
|
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
print(f"错误: {e}") |
|
|
raise gr.Error(f"处理失败: {str(e)}") |
|
|
|
|
|
|
|
|
def create_demo(): |
|
|
with gr.Blocks(title="Zero123++ Demo") as demo: |
|
|
gr.Markdown(""" |
|
|
# Zero123++ 多视图生成 |
|
|
|
|
|
将单张图片转换为 6 个不同角度的视图 |
|
|
|
|
|
**输入要求:** |
|
|
- 建议使用正方形图片 |
|
|
- 推荐分辨率 >= 320x320 |
|
|
- 脚本会自动裁剪和调整非正方形图片 |
|
|
|
|
|
**输出说明:** |
|
|
- 生成 6 个视图 (2行 x 3列) |
|
|
- 方位角: 30°, 90°, 150°, 210°, 270°, 330° |
|
|
- 仰角: 30°, -20°, 30°, -20°, 30°, -20° |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_image = gr.Image( |
|
|
label="输入图片", |
|
|
type="pil", |
|
|
height=400 |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_btn = gr.Button("生成多视图", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
output_image = gr.Image( |
|
|
label="多视图输出", |
|
|
type="pil", |
|
|
height=400 |
|
|
) |
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["examples/example1.png"], |
|
|
["examples/example2.png"], |
|
|
], |
|
|
inputs=input_image, |
|
|
label="示例图片" |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
### 技术说明 |
|
|
- 模型: [sudo-ai/zero123plus-v1.1](https://huggingface.co/sudo-ai/zero123plus-v1.1) |
|
|
- 首次运行需要加载模型,可能需要 20-30 秒 |
|
|
- 生成时间约 30-60 秒 (取决于硬件) |
|
|
|
|
|
### 引用 |
|
|
```bibtex |
|
|
@misc{shi2023zero123plus, |
|
|
title={Zero123++: a Single Image to Consistent Multi-view Diffusion Base Model}, |
|
|
author={Ruoxi Shi and Hansheng Chen and Zhuoyang Zhang and Minghua Liu and Chao Xu and Xinyue Wei and Linghao Chen and Chong Zeng and Hao Su}, |
|
|
year={2023}, |
|
|
eprint={2310.15110}, |
|
|
archivePrefix={arXiv}, |
|
|
primaryClass={cs.CV} |
|
|
} |
|
|
``` |
|
|
""") |
|
|
|
|
|
|
|
|
generate_btn.click( |
|
|
fn=process_image, |
|
|
inputs=[input_image], |
|
|
outputs=output_image |
|
|
) |
|
|
|
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
load_model() |
|
|
|
|
|
|
|
|
demo = create_demo() |
|
|
demo.queue() |
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=False |
|
|
) |
|
|
|