Spaces:

oscnet
/

zero123

Running

zero123 / app.py

oscnet

Deploy Zero123++

81a2e17 23 days ago

4.81 kB

	#!/usr/bin/env python3
	"""
	Zero123++ Gradio Demo
	用于 Hugging Face Spaces 部署
	"""

	import gradio as gr
	import torch
	from PIL import Image
	from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
	import os

	# 全局变量存储 pipeline
	pipeline = None

	def load_model():
	"""加载模型"""
	global pipeline

	if pipeline is not None:
	return

	print("正在加载模型...")

	# 检查 CUDA 可用性
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	# 加载 pipeline
	pipeline = DiffusionPipeline.from_pretrained(
	"sudo-ai/zero123plus-v1.1",
	custom_pipeline="sudo-ai/zero123plus-pipeline",
	torch_dtype=dtype
	)

	# 设置调度器
	pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
	pipeline.scheduler.config,
	timestep_spacing='trailing'
	)

	pipeline.to(device)
	print(f"✓ 模型加载完成 (设备: {device})")

	def process_image(input_image, remove_bg=False):
	"""
	处理输入图像,生成多视图

	输入:
	input_image: PIL Image
	remove_bg: 是否移除背景 (暂未实现)

	输出:
	result_image: PIL Image (多视图合成图)
	"""
	if input_image is None:
	return None

	# 确保模型已加载
	load_model()

	try:
	# 预处理图像 - 转为正方形
	img = input_image

	# 如果不是正方形,裁剪为正方形
	if img.size[0] != img.size[1]:
	size = min(img.size)
	img = img.crop((
	(img.size[0] - size) // 2,
	(img.size[1] - size) // 2,
	(img.size[0] + size) // 2,
	(img.size[1] + size) // 2
	))

	# 调整到推荐尺寸
	target_size = 320
	if img.size[0] != target_size:
	img = img.resize((target_size, target_size), Image.LANCZOS)

	# 运行推理
	print("正在生成多视图...")
	result = pipeline(img).images[0]

	return result

	except Exception as e:
	print(f"错误: {e}")
	raise gr.Error(f"处理失败: {str(e)}")

	# 创建 Gradio 界面
	def create_demo():
	with gr.Blocks(title="Zero123++ Demo") as demo:
	gr.Markdown("""
	# Zero123++ 多视图生成

	将单张图片转换为 6 个不同角度的视图

	输入要求:
	- 建议使用正方形图片
	- 推荐分辨率 >= 320x320
	- 脚本会自动裁剪和调整非正方形图片

	输出说明:
	- 生成 6 个视图 (2行 x 3列)
	- 方位角: 30°, 90°, 150°, 210°, 270°, 330°
	- 仰角: 30°, -20°, 30°, -20°, 30°, -20°
	""")

	with gr.Row():
	with gr.Column():
	input_image = gr.Image(
	label="输入图片",
	type="pil",
	height=400
	)

	# remove_bg = gr.Checkbox(
	# label="移除背景 (实验性)",
	# value=False
	# )

	generate_btn = gr.Button("生成多视图", variant="primary")

	with gr.Column():
	output_image = gr.Image(
	label="多视图输出",
	type="pil",
	height=400
	)

	gr.Examples(
	examples=[
	["examples/example1.png"],
	["examples/example2.png"],
	],
	inputs=input_image,
	label="示例图片"
	)

	gr.Markdown("""
	### 技术说明
	- 模型: [sudo-ai/zero123plus-v1.1](https://huggingface.co/sudo-ai/zero123plus-v1.1)
	- 首次运行需要加载模型,可能需要 20-30 秒
	- 生成时间约 30-60 秒 (取决于硬件)

	### 引用
	```bibtex
	@misc{shi2023zero123plus,
	title={Zero123++: a Single Image to Consistent Multi-view Diffusion Base Model},
	author={Ruoxi Shi and Hansheng Chen and Zhuoyang Zhang and Minghua Liu and Chao Xu and Xinyue Wei and Linghao Chen and Chong Zeng and Hao Su},
	year={2023},
	eprint={2310.15110},
	archivePrefix={arXiv},
	primaryClass={cs.CV}
	}
	```
	""")

	# 绑定事件
	generate_btn.click(
	fn=process_image,
	inputs=[input_image],
	outputs=output_image
	)

	return demo

	if __name__ == "__main__":
	# 预加载模型
	load_model()

	# 启动 demo
	demo = create_demo()
	demo.queue()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False
	)