Boostly / app.py
EnginDev's picture
Update app.py
6e85785 verified
import gradio as gr
import torch
import numpy as np
from PIL import Image
import cv2
print("🚀 Starting SAM2 App v2.1 - OPTIMIZED...")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"📱 Using device: {device}")
model = None
processor = None
def load_model():
global model, processor
if model is None:
print("📦 Loading SAM model...")
try:
from transformers import SamModel, SamProcessor
model_name = "facebook/sam-vit-large"
processor = SamProcessor.from_pretrained(model_name)
model = SamModel.from_pretrained(model_name)
model.to(device)
print(f"✅ Model loaded: {model_name}")
except Exception as e:
print(f"❌ Error: {e}, falling back to base model")
model_name = "facebook/sam-vit-base"
processor = SamProcessor.from_pretrained(model_name)
model = SamModel.from_pretrained(model_name)
model.to(device)
return model, processor
def prepare_image(image, max_size=1024):
if isinstance(image, np.ndarray):
image_pil = Image.fromarray(image)
else:
image_pil = image
if image_pil.mode != 'RGB':
image_pil = image_pil.convert('RGB')
image_np = np.array(image_pil)
h, w = image_np.shape[:2]
if max(h, w) > max_size:
scale = max_size / max(h, w)
new_h, new_w = int(h * scale), int(w * scale)
image_pil = image_pil.resize((new_w, new_h), Image.Resampling.LANCZOS)
image_np = np.array(image_pil)
return image_pil, image_np
def refine_mask(mask, kernel_size=5):
"""Glättet Maskenkanten"""
mask_uint8 = (mask > 0).astype(np.uint8) * 255
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
mask_closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, kernel)
mask_refined = cv2.morphologyEx(mask_closed, cv2.MORPH_OPEN, kernel)
return mask_refined > 0
def segment_automatic(image, quality="high", merge_parts=True):
"""
OPTIMIERTE Automatische Segmentierung
Schnell & präzise - kombiniert mehrere Masken
"""
if image is None:
return None, {"error": "Kein Bild hochgeladen"}
try:
print(f"🔄 Starting segmentation (quality: {quality}, merge: {merge_parts})...")
model, processor = load_model()
image_pil, image_np = prepare_image(image)
h, w = image_np.shape[:2]
center_x, center_y = w // 2, h // 2
# Single point inference mit multimask_output
inputs = processor(
image_pil,
input_points=[[[center_x, center_y]]],
input_labels=[[1]],
return_tensors="pt"
).to(device)
print("🧠 Running inference...")
with torch.no_grad():
outputs = model(**inputs, multimask_output=True)
masks = processor.image_processor.post_process_masks(
outputs.pred_masks.cpu(),
inputs["original_sizes"].cpu(),
inputs["reshaped_input_sizes"].cpu()
)[0]
scores = outputs.iou_scores.cpu().numpy()
if scores.ndim > 1:
scores = scores.flatten()
print(f"✅ Got {len(scores)} masks with scores: {scores}")
# SMART MERGING: Kombiniere alle guten Masken
if merge_parts:
combined_mask = np.zeros((h, w), dtype=bool)
masks_used = 0
for idx, score in enumerate(scores):
if score > 0.5: # Nur Masken mit gutem Score
if masks.ndim == 4:
mask = masks[0, idx].numpy()
else:
mask = masks[idx].numpy()
# OR-Kombination (super schnell!)
combined_mask = combined_mask | (mask > 0)
masks_used += 1
print(f" ✅ Added mask {idx} (score: {score:.3f})")
final_mask = combined_mask
print(f"🔗 Combined {masks_used} masks into one!")
else:
# Nur beste Maske
best_idx = np.argmax(scores)
if masks.ndim == 4:
final_mask = masks[0, best_idx].numpy() > 0
else:
final_mask = masks[best_idx].numpy() > 0
masks_used = 1
print(f"✅ Using best mask (score: {scores[best_idx]:.3f})")
# Refinement für glatte Kanten
if quality == "high":
print("🎨 Refining mask...")
final_mask = refine_mask(final_mask, kernel_size=7)
# Overlay erstellen
overlay = image_np.copy()
color = np.array([255, 80, 180]) # Rosa/Pink
mask_float = final_mask.astype(float)
if quality == "high":
mask_float = cv2.GaussianBlur(mask_float, (5, 5), 0)
# Farbiges Overlay
for c in range(3):
overlay[:, :, c] = (
overlay[:, :, c] * (1 - mask_float * 0.65) +
color[c] * mask_float * 0.65
)
# Gelbe Kontur zeichnen
contours, _ = cv2.findContours(
final_mask.astype(np.uint8),
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE
)
cv2.drawContours(overlay, contours, -1, (255, 255, 0), 3)
metadata = {
"success": True,
"mode": "automatic_plus" if merge_parts else "automatic",
"quality": quality,
"masks_combined": masks_used,
"all_scores": scores.tolist(),
"image_size": [w, h],
"mask_area": int(np.sum(final_mask)),
"mask_percentage": float(np.sum(final_mask) / (h * w) * 100),
"num_contours": len(contours),
"device": device
}
print("✅ Segmentation complete!")
return Image.fromarray(overlay.astype(np.uint8)), metadata
except Exception as e:
import traceback
print(f"❌ ERROR:\n{traceback.format_exc()}")
return image, {"error": str(e)}
def segment_multi_dense(image, density="medium"):
"""Multi-Object Segmentierung mit Grid"""
if image is None:
return None, {"error": "Kein Bild"}
try:
print(f"🎯 Starting multi-region segmentation (density: {density})...")
model, processor = load_model()
image_pil, image_np = prepare_image(image)
h, w = image_np.shape[:2]
# Grid-Größe basierend auf Density
if density == "high":
grid_size = 5
elif density == "medium":
grid_size = 4
else:
grid_size = 3
# Grid-Punkte generieren
points = []
for i in range(1, grid_size + 1):
for j in range(1, grid_size + 1):
x = int(w * i / (grid_size + 1))
y = int(h * j / (grid_size + 1))
points.append([x, y])
print(f"📍 Using {len(points)} grid points ({grid_size}x{grid_size})...")
all_masks = []
all_scores = []
# Segmentiere jeden Punkt
for idx, point in enumerate(points):
inputs = processor(
image_pil,
input_points=[[point]],
input_labels=[[1]],
return_tensors="pt"
).to(device)
with torch.no_grad():
outputs = model(**inputs, multimask_output=True)
masks = processor.image_processor.post_process_masks(
outputs.pred_masks.cpu(),
inputs["original_sizes"].cpu(),
inputs["reshaped_input_sizes"].cpu()
)[0]
scores = outputs.iou_scores.cpu().numpy().flatten()
best_idx = np.argmax(scores)
if masks.ndim == 4:
mask = masks[0, best_idx].numpy()
else:
mask = masks[best_idx].numpy()
# Nur Masken mit gutem Score
if scores[best_idx] > 0.7:
all_masks.append(refine_mask(mask))
all_scores.append(scores[best_idx])
print(f"✅ Got {len(all_masks)} quality masks")
# Overlay mit verschiedenen Farben
overlay = image_np.copy()
# HSV-basierte Farbgenerierung
colors = []
for i in range(len(all_masks)):
hue = int(180 * i / max(len(all_masks), 1))
color_hsv = np.uint8([[[hue, 255, 200]]])
color_rgb = cv2.cvtColor(color_hsv, cv2.COLOR_HSV2RGB)[0][0]
colors.append(color_rgb)
# Masken anwenden
for mask, color, score in zip(all_masks, colors, all_scores):
alpha = 0.4 + (score - 0.7) * 0.2 # Höherer Score = stärkere Farbe
overlay[mask] = (
overlay[mask] * (1 - alpha) +
np.array(color) * alpha
).astype(np.uint8)
# Kontur
contours, _ = cv2.findContours(
mask.astype(np.uint8),
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE
)
cv2.drawContours(overlay, contours, -1, color.tolist(), 2)
metadata = {
"success": True,
"mode": "multi_object_dense",
"density": density,
"grid_size": f"{grid_size}x{grid_size}",
"total_points": len(points),
"quality_masks": len(all_masks),
"avg_score": float(np.mean(all_scores)) if all_scores else 0,
"scores": [float(s) for s in all_scores]
}
print("✅ Multi-region complete!")
return Image.fromarray(overlay), metadata
except Exception as e:
import traceback
print(f"❌ ERROR:\n{traceback.format_exc()}")
return image, {"error": str(e)}
# Gradio Interface
demo = gr.Blocks(title="SAM2 Boostly", theme=gr.themes.Soft())
with demo:
gr.Markdown("# 🎨 SAM2 Segmentierung - Boostly Edition")
gr.Markdown("### ⚡ Optimierte Zero-Shot Object Segmentation")
with gr.Tab("🤖 Automatisch PLUS"):
gr.Markdown("**Smart Multi-Mask Combining** - Kombiniert automatisch alle Objektteile!")
with gr.Row():
with gr.Column():
input_auto = gr.Image(type="pil", label="📸 Bild hochladen")
quality_radio = gr.Radio(
choices=["high", "fast"],
value="high",
label="⚙️ Qualität",
info="High = präzisere Kanten, Fast = schneller"
)
merge_checkbox = gr.Checkbox(
value=True,
label="🔗 Teile zusammenfügen",
info="Kombiniert alle erkannten Bereiche (Fisch + Flosse = 1 Objekt)"
)
btn_auto = gr.Button("🚀 Segmentieren", variant="primary", size="lg")
gr.Markdown("""
**✨ Funktionsweise:**
- SAM generiert 3 verschiedene Masken
- Wenn "Teile zusammenfügen" AN: Alle kombiniert → vollständiges Objekt
- Wenn AUS: Nur präziseste Maske
- ⚡ Optimiert: ~10-30 Sekunden statt 25 Minuten!
""")
with gr.Column():
output_auto = gr.Image(label="✨ Segmentiertes Bild")
json_auto = gr.JSON(label="📊 Metadata")
btn_auto.click(
fn=segment_automatic,
inputs=[input_auto, quality_radio, merge_checkbox],
outputs=[output_auto, json_auto]
)
gr.Examples(
examples=[],
inputs=input_auto,
label="💡 Tipp: Objekt sollte zentral im Bild sein"
)
with gr.Tab("🎯 Multi-Region"):
gr.Markdown("**Grid-basierte Segmentierung** - Für mehrere separate Objekte")
with gr.Row():
with gr.Column():
input_multi = gr.Image(type="pil", label="📸 Bild hochladen")
density_radio = gr.Radio(
choices=["high", "medium", "low"],
value="medium",
label="📊 Punkt-Dichte",
info="Mehr Punkte = mehr Details, aber langsamer"
)
btn_multi = gr.Button("🎯 Alle Bereiche segmentieren", variant="primary", size="lg")
gr.Markdown("""
**Grid-Größen:**
- 🔥 High: 5x5 = 25 Erkennungspunkte
- ⚡ Medium: 4x4 = 16 Punkte (empfohlen)
- 💨 Low: 3x3 = 9 Punkte
Jedes Objekt bekommt eigene Farbe!
""")
with gr.Column():
output_multi = gr.Image(label="✨ Segmentiertes Bild")
json_multi = gr.JSON(label="📊 Metadata")
btn_multi.click(
fn=segment_multi_dense,
inputs=[input_multi, density_radio],
outputs=[output_multi, json_multi]
)
with gr.Tab("📡 API Dokumentation"):
gr.Markdown("### 🔗 API Endpoint")
gr.Code(
"https://EnginDev-Boostly.hf.space/api/predict",
label="Base URL"
)
gr.Markdown("### 📝 JavaScript Integration (für Lovable)")
gr.Code('''
// Segmentation Service
const HUGGINGFACE_API = 'https://EnginDev-Boostly.hf.space';
async function segmentImage(imageFile, mode = 'automatic') {
// File zu Base64 konvertieren
const base64 = await new Promise((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result);
reader.readAsDataURL(imageFile);
});
// API Call
const response = await fetch(`${HUGGINGFACE_API}/api/predict`, {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({
data: [base64, "high", true], // [image, quality, merge]
fn_index: mode === 'automatic' ? 0 : 1
})
});
const result = await response.json();
return {
segmentedImage: result.data[0], // Base64 segmentiertes Bild
metadata: result.data[1] // JSON mit Details
};
}
// Verwendung:
const result = await segmentImage(myImageFile, 'automatic');
console.log('Mask covers:', result.metadata.mask_percentage + '%');
''', language="javascript")
gr.Markdown("### ⚙️ Parameter")
gr.Markdown("""
**fn_index:**
- `0` = Automatisch PLUS (empfohlen für einzelne Objekte)
- `1` = Multi-Region (für mehrere Objekte)
**quality:**
- `"high"` = Präzise Kanten, Gaussian Blur, Refinement (~20-30s)
- `"fast"` = Schneller, weniger Nachbearbeitung (~10-15s)
**merge (nur fn_index=0):**
- `true` = Kombiniert alle Masken → vollständiges Objekt
- `false` = Nur beste Maske → nur Hauptteil
**density (nur fn_index=1):**
- `"high"` = 5x5 Grid = 25 Punkte
- `"medium"` = 4x4 Grid = 16 Punkte
- `"low"` = 3x3 Grid = 9 Punkte
""")
gr.Markdown("### 📊 Response Format")
gr.Code('''
{
"data": [
"...", // Segmentiertes Bild
{
"success": true,
"mode": "automatic_plus",
"masks_combined": 3,
"mask_percentage": 12.5,
"num_contours": 1,
"all_scores": [0.998, 0.583, 0.864]
}
]
}
''', language="json")
if __name__ == "__main__":
print("🌐 Launching Boostly SAM2 v2.1...")
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)