Spaces:

EnginDev
/

Boostly

Running

App Files Files Community

EnginDev commited on Oct 15

Commit

6e85785

verified ·

1 Parent(s): 82a6cd8

Update app.py

Browse files

Files changed (1) hide show

app.py +444 -43

app.py CHANGED Viewed

@@ -1,63 +1,464 @@
 import gradio as gr
-from transformers import SamProcessor, SamModel
-from PIL import Image
 import torch
 import numpy as np
-import random
-import traceback
-# Modell laden
-model_id = "facebook/sam-vit-base"
-processor = SamProcessor.from_pretrained(model_id)
-model = SamModel.from_pretrained(model_id)
-def random_color():
-    """Zufällige RGB-Farbe"""
-    return [random.randint(0, 255) for _ in range(3)]
-def segment_image(image):
-    try:
-        device = torch.device("cpu")
-        model.to(device)
-        inputs = processor(images=image, return_tensors="pt").to(device)
-        with torch.no_grad():
-            outputs = model(**inputs)
-        masks = processor.post_process_masks(
             outputs.pred_masks.cpu(),
             inputs["original_sizes"].cpu(),
             inputs["reshaped_input_sizes"].cpu()
         )
-        mask_arrays = masks[0].numpy()
-        img_array = np.array(image)
-        overlay = np.zeros_like(img_array, dtype=np.uint8)
-        # Jede Maske farbig einfärben
-        for mask in mask_arrays:
-            mask = mask[0]
-            color = random_color()
-            for c in range(3):
-                overlay[:, :, c] = np.where(mask > 0.5, color[c], overlay[:, :, c])
-        # Stärkere Farbmischung (80 % Maske / 20 % Original)
-        blended = Image.fromarray(
-            (0.2 * img_array + 0.8 * overlay).astype(np.uint8)
         )
-        return blended
-    except Exception:
-        return f"Fehler:\n{traceback.format_exc()}"
-demo = gr.Interface(
-    fn=segment_image,
-    inputs=gr.Image(type="pil", label="Upload your fish image"),
-    outputs=gr.Image(type="pil", label="Segmented Output"),
-    title="FishBoost – Colorful SAM Segmentation (Enhanced Colors)",
-    description="Erzeugt kräftige, farbige Masken mit Meta SAM (CPU-Version)."
-)
-demo.launch()

 import gradio as gr
 import torch
 import numpy as np
+from PIL import Image
+import cv2
+print("🚀 Starting SAM2 App v2.1 - OPTIMIZED...")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"📱 Using device: {device}")
+model = None
+processor = None
+def load_model():
+    global model, processor
+    if model is None:
+        print("📦 Loading SAM model...")
+        try:
+            from transformers import SamModel, SamProcessor
+            model_name = "facebook/sam-vit-large"
+            processor = SamProcessor.from_pretrained(model_name)
+            model = SamModel.from_pretrained(model_name)
+            model.to(device)
+            print(f"✅ Model loaded: {model_name}")
+        except Exception as e:
+            print(f"❌ Error: {e}, falling back to base model")
+            model_name = "facebook/sam-vit-base"
+            processor = SamProcessor.from_pretrained(model_name)
+            model = SamModel.from_pretrained(model_name)
+            model.to(device)
+    return model, processor
+def prepare_image(image, max_size=1024):
+    if isinstance(image, np.ndarray):
+        image_pil = Image.fromarray(image)
+    else:
+        image_pil = image
+    if image_pil.mode != 'RGB':
+        image_pil = image_pil.convert('RGB')
+    image_np = np.array(image_pil)
+    h, w = image_np.shape[:2]
+    if max(h, w) > max_size:
+        scale = max_size / max(h, w)
+        new_h, new_w = int(h * scale), int(w * scale)
+        image_pil = image_pil.resize((new_w, new_h), Image.Resampling.LANCZOS)
+        image_np = np.array(image_pil)
+    return image_pil, image_np
+def refine_mask(mask, kernel_size=5):
+    """Glättet Maskenkanten"""
+    mask_uint8 = (mask > 0).astype(np.uint8) * 255
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
+    mask_closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, kernel)
+    mask_refined = cv2.morphologyEx(mask_closed, cv2.MORPH_OPEN, kernel)
+    return mask_refined > 0
+def segment_automatic(image, quality="high", merge_parts=True):
+    """
+    OPTIMIERTE Automatische Segmentierung
+    Schnell & präzise - kombiniert mehrere Masken
+    """
+    if image is None:
+        return None, {"error": "Kein Bild hochgeladen"}
+    try:
+        print(f"🔄 Starting segmentation (quality: {quality}, merge: {merge_parts})...")
+        model, processor = load_model()
+        image_pil, image_np = prepare_image(image)
+        h, w = image_np.shape[:2]
+        center_x, center_y = w // 2, h // 2
+        # Single point inference mit multimask_output
+        inputs = processor(
+            image_pil,
+            input_points=[[[center_x, center_y]]],
+            input_labels=[[1]],
+            return_tensors="pt"
+        ).to(device)
+        print("🧠 Running inference...")
+        with torch.no_grad():
+            outputs = model(**inputs, multimask_output=True)
+        masks = processor.image_processor.post_process_masks(
             outputs.pred_masks.cpu(),
             inputs["original_sizes"].cpu(),
             inputs["reshaped_input_sizes"].cpu()
+        )[0]
+        scores = outputs.iou_scores.cpu().numpy()
+        if scores.ndim > 1:
+            scores = scores.flatten()
+        print(f"✅ Got {len(scores)} masks with scores: {scores}")
+        # SMART MERGING: Kombiniere alle guten Masken
+        if merge_parts:
+            combined_mask = np.zeros((h, w), dtype=bool)
+            masks_used = 0
+            for idx, score in enumerate(scores):
+                if score > 0.5:  # Nur Masken mit gutem Score
+                    if masks.ndim == 4:
+                        mask = masks[0, idx].numpy()
+                    else:
+                        mask = masks[idx].numpy()
+                    # OR-Kombination (super schnell!)
+                    combined_mask = combined_mask | (mask > 0)
+                    masks_used += 1
+                    print(f"  ✅ Added mask {idx} (score: {score:.3f})")
+            final_mask = combined_mask
+            print(f"🔗 Combined {masks_used} masks into one!")
+        else:
+            # Nur beste Maske
+            best_idx = np.argmax(scores)
+            if masks.ndim == 4:
+                final_mask = masks[0, best_idx].numpy() > 0
+            else:
+                final_mask = masks[best_idx].numpy() > 0
+            masks_used = 1
+            print(f"✅ Using best mask (score: {scores[best_idx]:.3f})")
+        # Refinement für glatte Kanten
+        if quality == "high":
+            print("🎨 Refining mask...")
+            final_mask = refine_mask(final_mask, kernel_size=7)
+        # Overlay erstellen
+        overlay = image_np.copy()
+        color = np.array([255, 80, 180])  # Rosa/Pink
+        mask_float = final_mask.astype(float)
+        if quality == "high":
+            mask_float = cv2.GaussianBlur(mask_float, (5, 5), 0)
+        # Farbiges Overlay
+        for c in range(3):
+            overlay[:, :, c] = (
+                overlay[:, :, c] * (1 - mask_float * 0.65) +
+                color[c] * mask_float * 0.65
+            )
+        # Gelbe Kontur zeichnen
+        contours, _ = cv2.findContours(
+            final_mask.astype(np.uint8),
+            cv2.RETR_EXTERNAL,
+            cv2.CHAIN_APPROX_SIMPLE
         )
+        cv2.drawContours(overlay, contours, -1, (255, 255, 0), 3)
+        metadata = {
+            "success": True,
+            "mode": "automatic_plus" if merge_parts else "automatic",
+            "quality": quality,
+            "masks_combined": masks_used,
+            "all_scores": scores.tolist(),
+            "image_size": [w, h],
+            "mask_area": int(np.sum(final_mask)),
+            "mask_percentage": float(np.sum(final_mask) / (h * w) * 100),
+            "num_contours": len(contours),
+            "device": device
+        }
+        print("✅ Segmentation complete!")
+        return Image.fromarray(overlay.astype(np.uint8)), metadata
+    except Exception as e:
+        import traceback
+        print(f"❌ ERROR:\n{traceback.format_exc()}")
+        return image, {"error": str(e)}
+def segment_multi_dense(image, density="medium"):
+    """Multi-Object Segmentierung mit Grid"""
+    if image is None:
+        return None, {"error": "Kein Bild"}
+    try:
+        print(f"🎯 Starting multi-region segmentation (density: {density})...")
+        model, processor = load_model()
+        image_pil, image_np = prepare_image(image)
+        h, w = image_np.shape[:2]
+        # Grid-Größe basierend auf Density
+        if density == "high":
+            grid_size = 5
+        elif density == "medium":
+            grid_size = 4
+        else:
+            grid_size = 3
+        # Grid-Punkte generieren
+        points = []
+        for i in range(1, grid_size + 1):
+            for j in range(1, grid_size + 1):
+                x = int(w * i / (grid_size + 1))
+                y = int(h * j / (grid_size + 1))
+                points.append([x, y])
+        print(f"📍 Using {len(points)} grid points ({grid_size}x{grid_size})...")
+        all_masks = []
+        all_scores = []
+        # Segmentiere jeden Punkt
+        for idx, point in enumerate(points):
+            inputs = processor(
+                image_pil,
+                input_points=[[point]],
+                input_labels=[[1]],
+                return_tensors="pt"
+            ).to(device)
+            with torch.no_grad():
+                outputs = model(**inputs, multimask_output=True)
+            masks = processor.image_processor.post_process_masks(
+                outputs.pred_masks.cpu(),
+                inputs["original_sizes"].cpu(),
+                inputs["reshaped_input_sizes"].cpu()
+            )[0]
+            scores = outputs.iou_scores.cpu().numpy().flatten()
+            best_idx = np.argmax(scores)
+            if masks.ndim == 4:
+                mask = masks[0, best_idx].numpy()
+            else:
+                mask = masks[best_idx].numpy()
+            # Nur Masken mit gutem Score
+            if scores[best_idx] > 0.7:
+                all_masks.append(refine_mask(mask))
+                all_scores.append(scores[best_idx])
+        print(f"✅ Got {len(all_masks)} quality masks")
+        # Overlay mit verschiedenen Farben
+        overlay = image_np.copy()
+        # HSV-basierte Farbgenerierung
+        colors = []
+        for i in range(len(all_masks)):
+            hue = int(180 * i / max(len(all_masks), 1))
+            color_hsv = np.uint8([[[hue, 255, 200]]])
+            color_rgb = cv2.cvtColor(color_hsv, cv2.COLOR_HSV2RGB)[0][0]
+            colors.append(color_rgb)
+        # Masken anwenden
+        for mask, color, score in zip(all_masks, colors, all_scores):
+            alpha = 0.4 + (score - 0.7) * 0.2  # Höherer Score = stärkere Farbe
+            overlay[mask] = (
+                overlay[mask] * (1 - alpha) +
+                np.array(color) * alpha
+            ).astype(np.uint8)
+            # Kontur
+            contours, _ = cv2.findContours(
+                mask.astype(np.uint8),
+                cv2.RETR_EXTERNAL,
+                cv2.CHAIN_APPROX_SIMPLE
+            )
+            cv2.drawContours(overlay, contours, -1, color.tolist(), 2)
+        metadata = {
+            "success": True,
+            "mode": "multi_object_dense",
+            "density": density,
+            "grid_size": f"{grid_size}x{grid_size}",
+            "total_points": len(points),
+            "quality_masks": len(all_masks),
+            "avg_score": float(np.mean(all_scores)) if all_scores else 0,
+            "scores": [float(s) for s in all_scores]
+        }
+        print("✅ Multi-region complete!")
+        return Image.fromarray(overlay), metadata
+    except Exception as e:
+        import traceback
+        print(f"❌ ERROR:\n{traceback.format_exc()}")
+        return image, {"error": str(e)}
+# Gradio Interface
+demo = gr.Blocks(title="SAM2 Boostly", theme=gr.themes.Soft())
+with demo:
+    gr.Markdown("# 🎨 SAM2 Segmentierung - Boostly Edition")
+    gr.Markdown("### ⚡ Optimierte Zero-Shot Object Segmentation")
+    with gr.Tab("🤖 Automatisch PLUS"):
+        gr.Markdown("**Smart Multi-Mask Combining** - Kombiniert automatisch alle Objektteile!")
+        with gr.Row():
+            with gr.Column():
+                input_auto = gr.Image(type="pil", label="📸 Bild hochladen")
+                quality_radio = gr.Radio(
+                    choices=["high", "fast"],
+                    value="high",
+                    label="⚙️ Qualität",
+                    info="High = präzisere Kanten, Fast = schneller"
+                )
+                merge_checkbox = gr.Checkbox(
+                    value=True,
+                    label="🔗 Teile zusammenfügen",
+                    info="Kombiniert alle erkannten Bereiche (Fisch + Flosse = 1 Objekt)"
+                )
+                btn_auto = gr.Button("🚀 Segmentieren", variant="primary", size="lg")
+                gr.Markdown("""
+                **✨ Funktionsweise:**
+                - SAM generiert 3 verschiedene Masken
+                - Wenn "Teile zusammenfügen" AN: Alle kombiniert → vollständiges Objekt
+                - Wenn AUS: Nur präziseste Maske
+                - ⚡ Optimiert: ~10-30 Sekunden statt 25 Minuten!
+                """)
+            with gr.Column():
+                output_auto = gr.Image(label="✨ Segmentiertes Bild")
+                json_auto = gr.JSON(label="📊 Metadata")
+        btn_auto.click(
+            fn=segment_automatic,
+            inputs=[input_auto, quality_radio, merge_checkbox],
+            outputs=[output_auto, json_auto]
         )
+        gr.Examples(
+            examples=[],
+            inputs=input_auto,
+            label="💡 Tipp: Objekt sollte zentral im Bild sein"
+        )
+    with gr.Tab("🎯 Multi-Region"):
+        gr.Markdown("**Grid-basierte Segmentierung** - Für mehrere separate Objekte")
+        with gr.Row():
+            with gr.Column():
+                input_multi = gr.Image(type="pil", label="📸 Bild hochladen")
+                density_radio = gr.Radio(
+                    choices=["high", "medium", "low"],
+                    value="medium",
+                    label="📊 Punkt-Dichte",
+                    info="Mehr Punkte = mehr Details, aber langsamer"
+                )
+                btn_multi = gr.Button("🎯 Alle Bereiche segmentieren", variant="primary", size="lg")
+                gr.Markdown("""
+                **Grid-Größen:**
+                - 🔥 High: 5x5 = 25 Erkennungspunkte
+                - ⚡ Medium: 4x4 = 16 Punkte (empfohlen)
+                - 💨 Low: 3x3 = 9 Punkte
+                Jedes Objekt bekommt eigene Farbe!
+                """)
+            with gr.Column():
+                output_multi = gr.Image(label="✨ Segmentiertes Bild")
+                json_multi = gr.JSON(label="📊 Metadata")
+        btn_multi.click(
+            fn=segment_multi_dense,
+            inputs=[input_multi, density_radio],
+            outputs=[output_multi, json_multi]
+        )
+    with gr.Tab("📡 API Dokumentation"):
+        gr.Markdown("### 🔗 API Endpoint")
+        gr.Code(
+            "https://EnginDev-Boostly.hf.space/api/predict",
+            label="Base URL"
+        )
+        gr.Markdown("### 📝 JavaScript Integration (für Lovable)")
+        gr.Code('''
+// Segmentation Service
+const HUGGINGFACE_API = 'https://EnginDev-Boostly.hf.space';
+async function segmentImage(imageFile, mode = 'automatic') {
+  // File zu Base64 konvertieren
+  const base64 = await new Promise((resolve) => {
+    const reader = new FileReader();
+    reader.onloadend = () => resolve(reader.result);
+    reader.readAsDataURL(imageFile);
+  });
+  // API Call
+  const response = await fetch(`${HUGGINGFACE_API}/api/predict`, {
+    method: 'POST',
+    headers: {'Content-Type': 'application/json'},
+    body: JSON.stringify({
+      data: [base64, "high", true],  // [image, quality, merge]
+      fn_index: mode === 'automatic' ? 0 : 1
+    })
+  });
+  const result = await response.json();
+  return {
+    segmentedImage: result.data[0],  // Base64 segmentiertes Bild
+    metadata: result.data[1]          // JSON mit Details
+  };
+}
+// Verwendung:
+const result = await segmentImage(myImageFile, 'automatic');
+console.log('Mask covers:', result.metadata.mask_percentage + '%');
+        ''', language="javascript")
+        gr.Markdown("### ⚙️ Parameter")
+        gr.Markdown("""
+        **fn_index:**
+        - `0` = Automatisch PLUS (empfohlen für einzelne Objekte)
+        - `1` = Multi-Region (für mehrere Objekte)
+        **quality:**
+        - `"high"` = Präzise Kanten, Gaussian Blur, Refinement (~20-30s)
+        - `"fast"` = Schneller, weniger Nachbearbeitung (~10-15s)
+        **merge (nur fn_index=0):**
+        - `true` = Kombiniert alle Masken → vollständiges Objekt
+        - `false` = Nur beste Maske → nur Hauptteil
+        **density (nur fn_index=1):**
+        - `"high"` = 5x5 Grid = 25 Punkte
+        - `"medium"` = 4x4 Grid = 16 Punkte
+        - `"low"` = 3x3 Grid = 9 Punkte
+        """)
+        gr.Markdown("### 📊 Response Format")
+        gr.Code('''
+{
+  "data": [
+    "data:image/png;base64,iVBORw0KGgo...",  // Segmentiertes Bild
+    {
+      "success": true,
+      "mode": "automatic_plus",
+      "masks_combined": 3,
+      "mask_percentage": 12.5,
+      "num_contours": 1,
+      "all_scores": [0.998, 0.583, 0.864]
+    }
+  ]
+}
+        ''', language="json")
+if __name__ == "__main__":
+    print("🌐 Launching Boostly SAM2 v2.1...")
+    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)