|
|
import gradio as gr |
|
|
import torch |
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
import cv2 |
|
|
|
|
|
print("🚀 Starting SAM2 App v2.1 - OPTIMIZED...") |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
print(f"📱 Using device: {device}") |
|
|
|
|
|
model = None |
|
|
processor = None |
|
|
|
|
|
def load_model(): |
|
|
global model, processor |
|
|
if model is None: |
|
|
print("📦 Loading SAM model...") |
|
|
try: |
|
|
from transformers import SamModel, SamProcessor |
|
|
|
|
|
model_name = "facebook/sam-vit-large" |
|
|
|
|
|
processor = SamProcessor.from_pretrained(model_name) |
|
|
model = SamModel.from_pretrained(model_name) |
|
|
model.to(device) |
|
|
print(f"✅ Model loaded: {model_name}") |
|
|
except Exception as e: |
|
|
print(f"❌ Error: {e}, falling back to base model") |
|
|
model_name = "facebook/sam-vit-base" |
|
|
processor = SamProcessor.from_pretrained(model_name) |
|
|
model = SamModel.from_pretrained(model_name) |
|
|
model.to(device) |
|
|
return model, processor |
|
|
|
|
|
def prepare_image(image, max_size=1024): |
|
|
if isinstance(image, np.ndarray): |
|
|
image_pil = Image.fromarray(image) |
|
|
else: |
|
|
image_pil = image |
|
|
|
|
|
if image_pil.mode != 'RGB': |
|
|
image_pil = image_pil.convert('RGB') |
|
|
|
|
|
image_np = np.array(image_pil) |
|
|
h, w = image_np.shape[:2] |
|
|
|
|
|
if max(h, w) > max_size: |
|
|
scale = max_size / max(h, w) |
|
|
new_h, new_w = int(h * scale), int(w * scale) |
|
|
image_pil = image_pil.resize((new_w, new_h), Image.Resampling.LANCZOS) |
|
|
image_np = np.array(image_pil) |
|
|
|
|
|
return image_pil, image_np |
|
|
|
|
|
def refine_mask(mask, kernel_size=5): |
|
|
"""Glättet Maskenkanten""" |
|
|
mask_uint8 = (mask > 0).astype(np.uint8) * 255 |
|
|
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)) |
|
|
mask_closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, kernel) |
|
|
mask_refined = cv2.morphologyEx(mask_closed, cv2.MORPH_OPEN, kernel) |
|
|
return mask_refined > 0 |
|
|
|
|
|
def segment_automatic(image, quality="high", merge_parts=True): |
|
|
""" |
|
|
OPTIMIERTE Automatische Segmentierung |
|
|
Schnell & präzise - kombiniert mehrere Masken |
|
|
""" |
|
|
if image is None: |
|
|
return None, {"error": "Kein Bild hochgeladen"} |
|
|
|
|
|
try: |
|
|
print(f"🔄 Starting segmentation (quality: {quality}, merge: {merge_parts})...") |
|
|
model, processor = load_model() |
|
|
|
|
|
image_pil, image_np = prepare_image(image) |
|
|
h, w = image_np.shape[:2] |
|
|
|
|
|
center_x, center_y = w // 2, h // 2 |
|
|
|
|
|
|
|
|
inputs = processor( |
|
|
image_pil, |
|
|
input_points=[[[center_x, center_y]]], |
|
|
input_labels=[[1]], |
|
|
return_tensors="pt" |
|
|
).to(device) |
|
|
|
|
|
print("🧠 Running inference...") |
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs, multimask_output=True) |
|
|
|
|
|
masks = processor.image_processor.post_process_masks( |
|
|
outputs.pred_masks.cpu(), |
|
|
inputs["original_sizes"].cpu(), |
|
|
inputs["reshaped_input_sizes"].cpu() |
|
|
)[0] |
|
|
|
|
|
scores = outputs.iou_scores.cpu().numpy() |
|
|
if scores.ndim > 1: |
|
|
scores = scores.flatten() |
|
|
|
|
|
print(f"✅ Got {len(scores)} masks with scores: {scores}") |
|
|
|
|
|
|
|
|
if merge_parts: |
|
|
combined_mask = np.zeros((h, w), dtype=bool) |
|
|
masks_used = 0 |
|
|
|
|
|
for idx, score in enumerate(scores): |
|
|
if score > 0.5: |
|
|
if masks.ndim == 4: |
|
|
mask = masks[0, idx].numpy() |
|
|
else: |
|
|
mask = masks[idx].numpy() |
|
|
|
|
|
|
|
|
combined_mask = combined_mask | (mask > 0) |
|
|
masks_used += 1 |
|
|
print(f" ✅ Added mask {idx} (score: {score:.3f})") |
|
|
|
|
|
final_mask = combined_mask |
|
|
print(f"🔗 Combined {masks_used} masks into one!") |
|
|
else: |
|
|
|
|
|
best_idx = np.argmax(scores) |
|
|
if masks.ndim == 4: |
|
|
final_mask = masks[0, best_idx].numpy() > 0 |
|
|
else: |
|
|
final_mask = masks[best_idx].numpy() > 0 |
|
|
masks_used = 1 |
|
|
print(f"✅ Using best mask (score: {scores[best_idx]:.3f})") |
|
|
|
|
|
|
|
|
if quality == "high": |
|
|
print("🎨 Refining mask...") |
|
|
final_mask = refine_mask(final_mask, kernel_size=7) |
|
|
|
|
|
|
|
|
overlay = image_np.copy() |
|
|
color = np.array([255, 80, 180]) |
|
|
|
|
|
mask_float = final_mask.astype(float) |
|
|
if quality == "high": |
|
|
mask_float = cv2.GaussianBlur(mask_float, (5, 5), 0) |
|
|
|
|
|
|
|
|
for c in range(3): |
|
|
overlay[:, :, c] = ( |
|
|
overlay[:, :, c] * (1 - mask_float * 0.65) + |
|
|
color[c] * mask_float * 0.65 |
|
|
) |
|
|
|
|
|
|
|
|
contours, _ = cv2.findContours( |
|
|
final_mask.astype(np.uint8), |
|
|
cv2.RETR_EXTERNAL, |
|
|
cv2.CHAIN_APPROX_SIMPLE |
|
|
) |
|
|
cv2.drawContours(overlay, contours, -1, (255, 255, 0), 3) |
|
|
|
|
|
metadata = { |
|
|
"success": True, |
|
|
"mode": "automatic_plus" if merge_parts else "automatic", |
|
|
"quality": quality, |
|
|
"masks_combined": masks_used, |
|
|
"all_scores": scores.tolist(), |
|
|
"image_size": [w, h], |
|
|
"mask_area": int(np.sum(final_mask)), |
|
|
"mask_percentage": float(np.sum(final_mask) / (h * w) * 100), |
|
|
"num_contours": len(contours), |
|
|
"device": device |
|
|
} |
|
|
|
|
|
print("✅ Segmentation complete!") |
|
|
return Image.fromarray(overlay.astype(np.uint8)), metadata |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
print(f"❌ ERROR:\n{traceback.format_exc()}") |
|
|
return image, {"error": str(e)} |
|
|
|
|
|
def segment_multi_dense(image, density="medium"): |
|
|
"""Multi-Object Segmentierung mit Grid""" |
|
|
if image is None: |
|
|
return None, {"error": "Kein Bild"} |
|
|
|
|
|
try: |
|
|
print(f"🎯 Starting multi-region segmentation (density: {density})...") |
|
|
model, processor = load_model() |
|
|
image_pil, image_np = prepare_image(image) |
|
|
h, w = image_np.shape[:2] |
|
|
|
|
|
|
|
|
if density == "high": |
|
|
grid_size = 5 |
|
|
elif density == "medium": |
|
|
grid_size = 4 |
|
|
else: |
|
|
grid_size = 3 |
|
|
|
|
|
|
|
|
points = [] |
|
|
for i in range(1, grid_size + 1): |
|
|
for j in range(1, grid_size + 1): |
|
|
x = int(w * i / (grid_size + 1)) |
|
|
y = int(h * j / (grid_size + 1)) |
|
|
points.append([x, y]) |
|
|
|
|
|
print(f"📍 Using {len(points)} grid points ({grid_size}x{grid_size})...") |
|
|
|
|
|
all_masks = [] |
|
|
all_scores = [] |
|
|
|
|
|
|
|
|
for idx, point in enumerate(points): |
|
|
inputs = processor( |
|
|
image_pil, |
|
|
input_points=[[point]], |
|
|
input_labels=[[1]], |
|
|
return_tensors="pt" |
|
|
).to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs, multimask_output=True) |
|
|
|
|
|
masks = processor.image_processor.post_process_masks( |
|
|
outputs.pred_masks.cpu(), |
|
|
inputs["original_sizes"].cpu(), |
|
|
inputs["reshaped_input_sizes"].cpu() |
|
|
)[0] |
|
|
|
|
|
scores = outputs.iou_scores.cpu().numpy().flatten() |
|
|
best_idx = np.argmax(scores) |
|
|
|
|
|
if masks.ndim == 4: |
|
|
mask = masks[0, best_idx].numpy() |
|
|
else: |
|
|
mask = masks[best_idx].numpy() |
|
|
|
|
|
|
|
|
if scores[best_idx] > 0.7: |
|
|
all_masks.append(refine_mask(mask)) |
|
|
all_scores.append(scores[best_idx]) |
|
|
|
|
|
print(f"✅ Got {len(all_masks)} quality masks") |
|
|
|
|
|
|
|
|
overlay = image_np.copy() |
|
|
|
|
|
|
|
|
colors = [] |
|
|
for i in range(len(all_masks)): |
|
|
hue = int(180 * i / max(len(all_masks), 1)) |
|
|
color_hsv = np.uint8([[[hue, 255, 200]]]) |
|
|
color_rgb = cv2.cvtColor(color_hsv, cv2.COLOR_HSV2RGB)[0][0] |
|
|
colors.append(color_rgb) |
|
|
|
|
|
|
|
|
for mask, color, score in zip(all_masks, colors, all_scores): |
|
|
alpha = 0.4 + (score - 0.7) * 0.2 |
|
|
overlay[mask] = ( |
|
|
overlay[mask] * (1 - alpha) + |
|
|
np.array(color) * alpha |
|
|
).astype(np.uint8) |
|
|
|
|
|
|
|
|
contours, _ = cv2.findContours( |
|
|
mask.astype(np.uint8), |
|
|
cv2.RETR_EXTERNAL, |
|
|
cv2.CHAIN_APPROX_SIMPLE |
|
|
) |
|
|
cv2.drawContours(overlay, contours, -1, color.tolist(), 2) |
|
|
|
|
|
metadata = { |
|
|
"success": True, |
|
|
"mode": "multi_object_dense", |
|
|
"density": density, |
|
|
"grid_size": f"{grid_size}x{grid_size}", |
|
|
"total_points": len(points), |
|
|
"quality_masks": len(all_masks), |
|
|
"avg_score": float(np.mean(all_scores)) if all_scores else 0, |
|
|
"scores": [float(s) for s in all_scores] |
|
|
} |
|
|
|
|
|
print("✅ Multi-region complete!") |
|
|
return Image.fromarray(overlay), metadata |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
print(f"❌ ERROR:\n{traceback.format_exc()}") |
|
|
return image, {"error": str(e)} |
|
|
|
|
|
|
|
|
demo = gr.Blocks(title="SAM2 Boostly", theme=gr.themes.Soft()) |
|
|
|
|
|
with demo: |
|
|
gr.Markdown("# 🎨 SAM2 Segmentierung - Boostly Edition") |
|
|
gr.Markdown("### ⚡ Optimierte Zero-Shot Object Segmentation") |
|
|
|
|
|
with gr.Tab("🤖 Automatisch PLUS"): |
|
|
gr.Markdown("**Smart Multi-Mask Combining** - Kombiniert automatisch alle Objektteile!") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_auto = gr.Image(type="pil", label="📸 Bild hochladen") |
|
|
|
|
|
quality_radio = gr.Radio( |
|
|
choices=["high", "fast"], |
|
|
value="high", |
|
|
label="⚙️ Qualität", |
|
|
info="High = präzisere Kanten, Fast = schneller" |
|
|
) |
|
|
|
|
|
merge_checkbox = gr.Checkbox( |
|
|
value=True, |
|
|
label="🔗 Teile zusammenfügen", |
|
|
info="Kombiniert alle erkannten Bereiche (Fisch + Flosse = 1 Objekt)" |
|
|
) |
|
|
|
|
|
btn_auto = gr.Button("🚀 Segmentieren", variant="primary", size="lg") |
|
|
|
|
|
gr.Markdown(""" |
|
|
**✨ Funktionsweise:** |
|
|
- SAM generiert 3 verschiedene Masken |
|
|
- Wenn "Teile zusammenfügen" AN: Alle kombiniert → vollständiges Objekt |
|
|
- Wenn AUS: Nur präziseste Maske |
|
|
- ⚡ Optimiert: ~10-30 Sekunden statt 25 Minuten! |
|
|
""") |
|
|
|
|
|
with gr.Column(): |
|
|
output_auto = gr.Image(label="✨ Segmentiertes Bild") |
|
|
json_auto = gr.JSON(label="📊 Metadata") |
|
|
|
|
|
btn_auto.click( |
|
|
fn=segment_automatic, |
|
|
inputs=[input_auto, quality_radio, merge_checkbox], |
|
|
outputs=[output_auto, json_auto] |
|
|
) |
|
|
|
|
|
gr.Examples( |
|
|
examples=[], |
|
|
inputs=input_auto, |
|
|
label="💡 Tipp: Objekt sollte zentral im Bild sein" |
|
|
) |
|
|
|
|
|
with gr.Tab("🎯 Multi-Region"): |
|
|
gr.Markdown("**Grid-basierte Segmentierung** - Für mehrere separate Objekte") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_multi = gr.Image(type="pil", label="📸 Bild hochladen") |
|
|
|
|
|
density_radio = gr.Radio( |
|
|
choices=["high", "medium", "low"], |
|
|
value="medium", |
|
|
label="📊 Punkt-Dichte", |
|
|
info="Mehr Punkte = mehr Details, aber langsamer" |
|
|
) |
|
|
|
|
|
btn_multi = gr.Button("🎯 Alle Bereiche segmentieren", variant="primary", size="lg") |
|
|
|
|
|
gr.Markdown(""" |
|
|
**Grid-Größen:** |
|
|
- 🔥 High: 5x5 = 25 Erkennungspunkte |
|
|
- ⚡ Medium: 4x4 = 16 Punkte (empfohlen) |
|
|
- 💨 Low: 3x3 = 9 Punkte |
|
|
|
|
|
Jedes Objekt bekommt eigene Farbe! |
|
|
""") |
|
|
|
|
|
with gr.Column(): |
|
|
output_multi = gr.Image(label="✨ Segmentiertes Bild") |
|
|
json_multi = gr.JSON(label="📊 Metadata") |
|
|
|
|
|
btn_multi.click( |
|
|
fn=segment_multi_dense, |
|
|
inputs=[input_multi, density_radio], |
|
|
outputs=[output_multi, json_multi] |
|
|
) |
|
|
|
|
|
with gr.Tab("📡 API Dokumentation"): |
|
|
gr.Markdown("### 🔗 API Endpoint") |
|
|
gr.Code( |
|
|
"https://EnginDev-Boostly.hf.space/api/predict", |
|
|
label="Base URL" |
|
|
) |
|
|
|
|
|
gr.Markdown("### 📝 JavaScript Integration (für Lovable)") |
|
|
gr.Code(''' |
|
|
// Segmentation Service |
|
|
const HUGGINGFACE_API = 'https://EnginDev-Boostly.hf.space'; |
|
|
|
|
|
async function segmentImage(imageFile, mode = 'automatic') { |
|
|
// File zu Base64 konvertieren |
|
|
const base64 = await new Promise((resolve) => { |
|
|
const reader = new FileReader(); |
|
|
reader.onloadend = () => resolve(reader.result); |
|
|
reader.readAsDataURL(imageFile); |
|
|
}); |
|
|
|
|
|
// API Call |
|
|
const response = await fetch(`${HUGGINGFACE_API}/api/predict`, { |
|
|
method: 'POST', |
|
|
headers: {'Content-Type': 'application/json'}, |
|
|
body: JSON.stringify({ |
|
|
data: [base64, "high", true], // [image, quality, merge] |
|
|
fn_index: mode === 'automatic' ? 0 : 1 |
|
|
}) |
|
|
}); |
|
|
|
|
|
const result = await response.json(); |
|
|
|
|
|
return { |
|
|
segmentedImage: result.data[0], // Base64 segmentiertes Bild |
|
|
metadata: result.data[1] // JSON mit Details |
|
|
}; |
|
|
} |
|
|
|
|
|
// Verwendung: |
|
|
const result = await segmentImage(myImageFile, 'automatic'); |
|
|
console.log('Mask covers:', result.metadata.mask_percentage + '%'); |
|
|
''', language="javascript") |
|
|
|
|
|
gr.Markdown("### ⚙️ Parameter") |
|
|
gr.Markdown(""" |
|
|
**fn_index:** |
|
|
- `0` = Automatisch PLUS (empfohlen für einzelne Objekte) |
|
|
- `1` = Multi-Region (für mehrere Objekte) |
|
|
|
|
|
**quality:** |
|
|
- `"high"` = Präzise Kanten, Gaussian Blur, Refinement (~20-30s) |
|
|
- `"fast"` = Schneller, weniger Nachbearbeitung (~10-15s) |
|
|
|
|
|
**merge (nur fn_index=0):** |
|
|
- `true` = Kombiniert alle Masken → vollständiges Objekt |
|
|
- `false` = Nur beste Maske → nur Hauptteil |
|
|
|
|
|
**density (nur fn_index=1):** |
|
|
- `"high"` = 5x5 Grid = 25 Punkte |
|
|
- `"medium"` = 4x4 Grid = 16 Punkte |
|
|
- `"low"` = 3x3 Grid = 9 Punkte |
|
|
""") |
|
|
|
|
|
gr.Markdown("### 📊 Response Format") |
|
|
gr.Code(''' |
|
|
{ |
|
|
"data": [ |
|
|
"...", // Segmentiertes Bild |
|
|
{ |
|
|
"success": true, |
|
|
"mode": "automatic_plus", |
|
|
"masks_combined": 3, |
|
|
"mask_percentage": 12.5, |
|
|
"num_contours": 1, |
|
|
"all_scores": [0.998, 0.583, 0.864] |
|
|
} |
|
|
] |
|
|
} |
|
|
''', language="json") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("🌐 Launching Boostly SAM2 v2.1...") |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True) |