import gradio as gr import torch import numpy as np from PIL import Image import cv2 print("🚀 Starting SAM2 App v2.1 - OPTIMIZED...") device = "cuda" if torch.cuda.is_available() else "cpu" print(f"📱 Using device: {device}") model = None processor = None def load_model(): global model, processor if model is None: print("📦 Loading SAM model...") try: from transformers import SamModel, SamProcessor model_name = "facebook/sam-vit-large" processor = SamProcessor.from_pretrained(model_name) model = SamModel.from_pretrained(model_name) model.to(device) print(f"✅ Model loaded: {model_name}") except Exception as e: print(f"❌ Error: {e}, falling back to base model") model_name = "facebook/sam-vit-base" processor = SamProcessor.from_pretrained(model_name) model = SamModel.from_pretrained(model_name) model.to(device) return model, processor def prepare_image(image, max_size=1024): if isinstance(image, np.ndarray): image_pil = Image.fromarray(image) else: image_pil = image if image_pil.mode != 'RGB': image_pil = image_pil.convert('RGB') image_np = np.array(image_pil) h, w = image_np.shape[:2] if max(h, w) > max_size: scale = max_size / max(h, w) new_h, new_w = int(h * scale), int(w * scale) image_pil = image_pil.resize((new_w, new_h), Image.Resampling.LANCZOS) image_np = np.array(image_pil) return image_pil, image_np def refine_mask(mask, kernel_size=5): """Glättet Maskenkanten""" mask_uint8 = (mask > 0).astype(np.uint8) * 255 kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)) mask_closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, kernel) mask_refined = cv2.morphologyEx(mask_closed, cv2.MORPH_OPEN, kernel) return mask_refined > 0 def segment_automatic(image, quality="high", merge_parts=True): """ OPTIMIERTE Automatische Segmentierung Schnell & präzise - kombiniert mehrere Masken """ if image is None: return None, {"error": "Kein Bild hochgeladen"} try: print(f"🔄 Starting segmentation (quality: {quality}, merge: {merge_parts})...") model, processor = load_model() image_pil, image_np = prepare_image(image) h, w = image_np.shape[:2] center_x, center_y = w // 2, h // 2 # Single point inference mit multimask_output inputs = processor( image_pil, input_points=[[[center_x, center_y]]], input_labels=[[1]], return_tensors="pt" ).to(device) print("🧠 Running inference...") with torch.no_grad(): outputs = model(**inputs, multimask_output=True) masks = processor.image_processor.post_process_masks( outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu() )[0] scores = outputs.iou_scores.cpu().numpy() if scores.ndim > 1: scores = scores.flatten() print(f"✅ Got {len(scores)} masks with scores: {scores}") # SMART MERGING: Kombiniere alle guten Masken if merge_parts: combined_mask = np.zeros((h, w), dtype=bool) masks_used = 0 for idx, score in enumerate(scores): if score > 0.5: # Nur Masken mit gutem Score if masks.ndim == 4: mask = masks[0, idx].numpy() else: mask = masks[idx].numpy() # OR-Kombination (super schnell!) combined_mask = combined_mask | (mask > 0) masks_used += 1 print(f" ✅ Added mask {idx} (score: {score:.3f})") final_mask = combined_mask print(f"🔗 Combined {masks_used} masks into one!") else: # Nur beste Maske best_idx = np.argmax(scores) if masks.ndim == 4: final_mask = masks[0, best_idx].numpy() > 0 else: final_mask = masks[best_idx].numpy() > 0 masks_used = 1 print(f"✅ Using best mask (score: {scores[best_idx]:.3f})") # Refinement für glatte Kanten if quality == "high": print("🎨 Refining mask...") final_mask = refine_mask(final_mask, kernel_size=7) # Overlay erstellen overlay = image_np.copy() color = np.array([255, 80, 180]) # Rosa/Pink mask_float = final_mask.astype(float) if quality == "high": mask_float = cv2.GaussianBlur(mask_float, (5, 5), 0) # Farbiges Overlay for c in range(3): overlay[:, :, c] = ( overlay[:, :, c] * (1 - mask_float * 0.65) + color[c] * mask_float * 0.65 ) # Gelbe Kontur zeichnen contours, _ = cv2.findContours( final_mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE ) cv2.drawContours(overlay, contours, -1, (255, 255, 0), 3) metadata = { "success": True, "mode": "automatic_plus" if merge_parts else "automatic", "quality": quality, "masks_combined": masks_used, "all_scores": scores.tolist(), "image_size": [w, h], "mask_area": int(np.sum(final_mask)), "mask_percentage": float(np.sum(final_mask) / (h * w) * 100), "num_contours": len(contours), "device": device } print("✅ Segmentation complete!") return Image.fromarray(overlay.astype(np.uint8)), metadata except Exception as e: import traceback print(f"❌ ERROR:\n{traceback.format_exc()}") return image, {"error": str(e)} def segment_multi_dense(image, density="medium"): """Multi-Object Segmentierung mit Grid""" if image is None: return None, {"error": "Kein Bild"} try: print(f"🎯 Starting multi-region segmentation (density: {density})...") model, processor = load_model() image_pil, image_np = prepare_image(image) h, w = image_np.shape[:2] # Grid-Größe basierend auf Density if density == "high": grid_size = 5 elif density == "medium": grid_size = 4 else: grid_size = 3 # Grid-Punkte generieren points = [] for i in range(1, grid_size + 1): for j in range(1, grid_size + 1): x = int(w * i / (grid_size + 1)) y = int(h * j / (grid_size + 1)) points.append([x, y]) print(f"📍 Using {len(points)} grid points ({grid_size}x{grid_size})...") all_masks = [] all_scores = [] # Segmentiere jeden Punkt for idx, point in enumerate(points): inputs = processor( image_pil, input_points=[[point]], input_labels=[[1]], return_tensors="pt" ).to(device) with torch.no_grad(): outputs = model(**inputs, multimask_output=True) masks = processor.image_processor.post_process_masks( outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu() )[0] scores = outputs.iou_scores.cpu().numpy().flatten() best_idx = np.argmax(scores) if masks.ndim == 4: mask = masks[0, best_idx].numpy() else: mask = masks[best_idx].numpy() # Nur Masken mit gutem Score if scores[best_idx] > 0.7: all_masks.append(refine_mask(mask)) all_scores.append(scores[best_idx]) print(f"✅ Got {len(all_masks)} quality masks") # Overlay mit verschiedenen Farben overlay = image_np.copy() # HSV-basierte Farbgenerierung colors = [] for i in range(len(all_masks)): hue = int(180 * i / max(len(all_masks), 1)) color_hsv = np.uint8([[[hue, 255, 200]]]) color_rgb = cv2.cvtColor(color_hsv, cv2.COLOR_HSV2RGB)[0][0] colors.append(color_rgb) # Masken anwenden for mask, color, score in zip(all_masks, colors, all_scores): alpha = 0.4 + (score - 0.7) * 0.2 # Höherer Score = stärkere Farbe overlay[mask] = ( overlay[mask] * (1 - alpha) + np.array(color) * alpha ).astype(np.uint8) # Kontur contours, _ = cv2.findContours( mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE ) cv2.drawContours(overlay, contours, -1, color.tolist(), 2) metadata = { "success": True, "mode": "multi_object_dense", "density": density, "grid_size": f"{grid_size}x{grid_size}", "total_points": len(points), "quality_masks": len(all_masks), "avg_score": float(np.mean(all_scores)) if all_scores else 0, "scores": [float(s) for s in all_scores] } print("✅ Multi-region complete!") return Image.fromarray(overlay), metadata except Exception as e: import traceback print(f"❌ ERROR:\n{traceback.format_exc()}") return image, {"error": str(e)} # Gradio Interface demo = gr.Blocks(title="SAM2 Boostly", theme=gr.themes.Soft()) with demo: gr.Markdown("# 🎨 SAM2 Segmentierung - Boostly Edition") gr.Markdown("### ⚡ Optimierte Zero-Shot Object Segmentation") with gr.Tab("🤖 Automatisch PLUS"): gr.Markdown("**Smart Multi-Mask Combining** - Kombiniert automatisch alle Objektteile!") with gr.Row(): with gr.Column(): input_auto = gr.Image(type="pil", label="📸 Bild hochladen") quality_radio = gr.Radio( choices=["high", "fast"], value="high", label="⚙️ Qualität", info="High = präzisere Kanten, Fast = schneller" ) merge_checkbox = gr.Checkbox( value=True, label="🔗 Teile zusammenfügen", info="Kombiniert alle erkannten Bereiche (Fisch + Flosse = 1 Objekt)" ) btn_auto = gr.Button("🚀 Segmentieren", variant="primary", size="lg") gr.Markdown(""" **✨ Funktionsweise:** - SAM generiert 3 verschiedene Masken - Wenn "Teile zusammenfügen" AN: Alle kombiniert → vollständiges Objekt - Wenn AUS: Nur präziseste Maske - ⚡ Optimiert: ~10-30 Sekunden statt 25 Minuten! """) with gr.Column(): output_auto = gr.Image(label="✨ Segmentiertes Bild") json_auto = gr.JSON(label="📊 Metadata") btn_auto.click( fn=segment_automatic, inputs=[input_auto, quality_radio, merge_checkbox], outputs=[output_auto, json_auto] ) gr.Examples( examples=[], inputs=input_auto, label="💡 Tipp: Objekt sollte zentral im Bild sein" ) with gr.Tab("🎯 Multi-Region"): gr.Markdown("**Grid-basierte Segmentierung** - Für mehrere separate Objekte") with gr.Row(): with gr.Column(): input_multi = gr.Image(type="pil", label="📸 Bild hochladen") density_radio = gr.Radio( choices=["high", "medium", "low"], value="medium", label="📊 Punkt-Dichte", info="Mehr Punkte = mehr Details, aber langsamer" ) btn_multi = gr.Button("🎯 Alle Bereiche segmentieren", variant="primary", size="lg") gr.Markdown(""" **Grid-Größen:** - 🔥 High: 5x5 = 25 Erkennungspunkte - ⚡ Medium: 4x4 = 16 Punkte (empfohlen) - 💨 Low: 3x3 = 9 Punkte Jedes Objekt bekommt eigene Farbe! """) with gr.Column(): output_multi = gr.Image(label="✨ Segmentiertes Bild") json_multi = gr.JSON(label="📊 Metadata") btn_multi.click( fn=segment_multi_dense, inputs=[input_multi, density_radio], outputs=[output_multi, json_multi] ) with gr.Tab("📡 API Dokumentation"): gr.Markdown("### 🔗 API Endpoint") gr.Code( "https://EnginDev-Boostly.hf.space/api/predict", label="Base URL" ) gr.Markdown("### 📝 JavaScript Integration (für Lovable)") gr.Code(''' // Segmentation Service const HUGGINGFACE_API = 'https://EnginDev-Boostly.hf.space'; async function segmentImage(imageFile, mode = 'automatic') { // File zu Base64 konvertieren const base64 = await new Promise((resolve) => { const reader = new FileReader(); reader.onloadend = () => resolve(reader.result); reader.readAsDataURL(imageFile); }); // API Call const response = await fetch(`${HUGGINGFACE_API}/api/predict`, { method: 'POST', headers: {'Content-Type': 'application/json'}, body: JSON.stringify({ data: [base64, "high", true], // [image, quality, merge] fn_index: mode === 'automatic' ? 0 : 1 }) }); const result = await response.json(); return { segmentedImage: result.data[0], // Base64 segmentiertes Bild metadata: result.data[1] // JSON mit Details }; } // Verwendung: const result = await segmentImage(myImageFile, 'automatic'); console.log('Mask covers:', result.metadata.mask_percentage + '%'); ''', language="javascript") gr.Markdown("### ⚙️ Parameter") gr.Markdown(""" **fn_index:** - `0` = Automatisch PLUS (empfohlen für einzelne Objekte) - `1` = Multi-Region (für mehrere Objekte) **quality:** - `"high"` = Präzise Kanten, Gaussian Blur, Refinement (~20-30s) - `"fast"` = Schneller, weniger Nachbearbeitung (~10-15s) **merge (nur fn_index=0):** - `true` = Kombiniert alle Masken → vollständiges Objekt - `false` = Nur beste Maske → nur Hauptteil **density (nur fn_index=1):** - `"high"` = 5x5 Grid = 25 Punkte - `"medium"` = 4x4 Grid = 16 Punkte - `"low"` = 3x3 Grid = 9 Punkte """) gr.Markdown("### 📊 Response Format") gr.Code(''' { "data": [ "...", // Segmentiertes Bild { "success": true, "mode": "automatic_plus", "masks_combined": 3, "mask_percentage": 12.5, "num_contours": 1, "all_scores": [0.998, 0.583, 0.864] } ] } ''', language="json") if __name__ == "__main__": print("🌐 Launching Boostly SAM2 v2.1...") demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)