Spaces:

EnginDev
/

Boostly

Running

App Files Files Community

EnginDev commited on Oct 14

Commit

82a6cd8

verified ·

1 Parent(s): ca4455d

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -444

app.py CHANGED Viewed

@@ -1,462 +1,63 @@
 import gradio as gr
 import torch
 import numpy as np
-from PIL import Image
-import cv2
-print("🚀 Starting SAM2 App v2.1 - OPTIMIZED...")
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"📱 Using device: {device}")
-model = None
-processor = None
-def load_model():
-    global model, processor
-    if model is None:
-        print("📦 Loading SAM model...")
-        try:
-            from transformers import SamModel, SamProcessor
-            model_name = "facebook/sam-vit-large"
-            processor = SamProcessor.from_pretrained(model_name)
-            model = SamModel.from_pretrained(model_name)
-            model.to(device)
-            print(f"✅ Model loaded: {model_name}")
-        except Exception as e:
-            print(f"❌ Error: {e}, falling back to base model")
-            model_name = "facebook/sam-vit-base"
-            processor = SamProcessor.from_pretrained(model_name)
-            model = SamModel.from_pretrained(model_name)
-            model.to(device)
-    return model, processor
-def prepare_image(image, max_size=1024):
-    if isinstance(image, np.ndarray):
-        image_pil = Image.fromarray(image)
-    else:
-        image_pil = image
-    if image_pil.mode != 'RGB':
-        image_pil = image_pil.convert('RGB')
-    image_np = np.array(image_pil)
-    h, w = image_np.shape[:2]
-    if max(h, w) > max_size:
-        scale = max_size / max(h, w)
-        new_h, new_w = int(h * scale), int(w * scale)
-        image_pil = image_pil.resize((new_w, new_h), Image.Resampling.LANCZOS)
-        image_np = np.array(image_pil)
-    return image_pil, image_np
-def refine_mask(mask, kernel_size=5):
-    """Glättet Maskenkanten"""
-    mask_uint8 = (mask > 0).astype(np.uint8) * 255
-    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size))
-    mask_closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, kernel)
-    mask_refined = cv2.morphologyEx(mask_closed, cv2.MORPH_OPEN, kernel)
-    return mask_refined > 0
-def segment_automatic(image, quality="high", merge_parts=True):
-    """
-    OPTIMIERTE Automatische Segmentierung
-    Schnell & präzise - kombiniert mehrere Masken
-    """
-    if image is None:
-        return None, {"error": "Kein Bild hochgeladen"}
-    try:
-        print(f"🔄 Starting segmentation (quality: {quality}, merge: {merge_parts})...")
-        model, processor = load_model()
-        image_pil, image_np = prepare_image(image)
-        h, w = image_np.shape[:2]
-        center_x, center_y = w // 2, h // 2
-        # Single point inference mit multimask_output
-        inputs = processor(
-            image_pil,
-            input_points=[[[center_x, center_y]]],
-            input_labels=[[1]],
-            return_tensors="pt"
-        ).to(device)
-        print("🧠 Running inference...")
         with torch.no_grad():
-            outputs = model(**inputs, multimask_output=True)
-        masks = processor.image_processor.post_process_masks(
             outputs.pred_masks.cpu(),
             inputs["original_sizes"].cpu(),
             inputs["reshaped_input_sizes"].cpu()
-        )[0]
-        scores = outputs.iou_scores.cpu().numpy()
-        if scores.ndim > 1:
-            scores = scores.flatten()
-        print(f"✅ Got {len(scores)} masks with scores: {scores}")
-        # SMART MERGING: Kombiniere alle guten Masken
-        if merge_parts:
-            combined_mask = np.zeros((h, w), dtype=bool)
-            masks_used = 0
-            for idx, score in enumerate(scores):
-                if score > 0.5:  # Nur Masken mit gutem Score
-                    if masks.ndim == 4:
-                        mask = masks[0, idx].numpy()
-                    else:
-                        mask = masks[idx].numpy()
-                    # OR-Kombination (super schnell!)
-                    combined_mask = combined_mask | (mask > 0)
-                    masks_used += 1
-                    print(f"  ✅ Added mask {idx} (score: {score:.3f})")
-            final_mask = combined_mask
-            print(f"🔗 Combined {masks_used} masks into one!")
-        else:
-            # Nur beste Maske
-            best_idx = np.argmax(scores)
-            if masks.ndim == 4:
-                final_mask = masks[0, best_idx].numpy() > 0
-            else:
-                final_mask = masks[best_idx].numpy() > 0
-            masks_used = 1
-            print(f"✅ Using best mask (score: {scores[best_idx]:.3f})")
-        # Refinement für glatte Kanten
-        if quality == "high":
-            print("🎨 Refining mask...")
-            final_mask = refine_mask(final_mask, kernel_size=7)
-        # Overlay erstellen
-        overlay = image_np.copy()
-        color = np.array([255, 80, 180])  # Rosa/Pink
-        mask_float = final_mask.astype(float)
-        if quality == "high":
-            mask_float = cv2.GaussianBlur(mask_float, (5, 5), 0)
-        # Farbiges Overlay
-        for c in range(3):
-            overlay[:, :, c] = (
-                overlay[:, :, c] * (1 - mask_float * 0.65) +
-                color[c] * mask_float * 0.65
-            )
-        # Gelbe Kontur zeichnen
-        contours, _ = cv2.findContours(
-            final_mask.astype(np.uint8),
-            cv2.RETR_EXTERNAL,
-            cv2.CHAIN_APPROX_SIMPLE
         )
-        cv2.drawContours(overlay, contours, -1, (255, 255, 0), 3)
-        metadata = {
-            "success": True,
-            "mode": "automatic_plus" if merge_parts else "automatic",
-            "quality": quality,
-            "masks_combined": masks_used,
-            "all_scores": scores.tolist(),
-            "image_size": [w, h],
-            "mask_area": int(np.sum(final_mask)),
-            "mask_percentage": float(np.sum(final_mask) / (h * w) * 100),
-            "num_contours": len(contours),
-            "device": device
-        }
-        print("✅ Segmentation complete!")
-        return Image.fromarray(overlay.astype(np.uint8)), metadata
-    except Exception as e:
-        import traceback
-        print(f"❌ ERROR:\n{traceback.format_exc()}")
-        return image, {"error": str(e)}
-def segment_multi_dense(image, density="medium"):
-    """Multi-Object Segmentierung mit Grid"""
-    if image is None:
-        return None, {"error": "Kein Bild"}
-    try:
-        print(f"🎯 Starting multi-region segmentation (density: {density})...")
-        model, processor = load_model()
-        image_pil, image_np = prepare_image(image)
-        h, w = image_np.shape[:2]
-        # Grid-Größe basierend auf Density
-        if density == "high":
-            grid_size = 5
-        elif density == "medium":
-            grid_size = 4
-        else:
-            grid_size = 3
-        # Grid-Punkte generieren
-        points = []
-        for i in range(1, grid_size + 1):
-            for j in range(1, grid_size + 1):
-                x = int(w * i / (grid_size + 1))
-                y = int(h * j / (grid_size + 1))
-                points.append([x, y])
-        print(f"📍 Using {len(points)} grid points ({grid_size}x{grid_size})...")
-        all_masks = []
-        all_scores = []
-        # Segmentiere jeden Punkt
-        for idx, point in enumerate(points):
-            inputs = processor(
-                image_pil,
-                input_points=[[point]],
-                input_labels=[[1]],
-                return_tensors="pt"
-            ).to(device)
-            with torch.no_grad():
-                outputs = model(**inputs, multimask_output=True)
-            masks = processor.image_processor.post_process_masks(
-                outputs.pred_masks.cpu(),
-                inputs["original_sizes"].cpu(),
-                inputs["reshaped_input_sizes"].cpu()
-            )[0]
-            scores = outputs.iou_scores.cpu().numpy().flatten()
-            best_idx = np.argmax(scores)
-            if masks.ndim == 4:
-                mask = masks[0, best_idx].numpy()
-            else:
-                mask = masks[best_idx].numpy()
-            # Nur Masken mit gutem Score
-            if scores[best_idx] > 0.7:
-                all_masks.append(refine_mask(mask))
-                all_scores.append(scores[best_idx])
-        print(f"✅ Got {len(all_masks)} quality masks")
-        # Overlay mit verschiedenen Farben
-        overlay = image_np.copy()
-        # HSV-basierte Farbgenerierung
-        colors = []
-        for i in range(len(all_masks)):
-            hue = int(180 * i / max(len(all_masks), 1))
-            color_hsv = np.uint8([[[hue, 255, 200]]])
-            color_rgb = cv2.cvtColor(color_hsv, cv2.COLOR_HSV2RGB)[0][0]
-            colors.append(color_rgb)
-        # Masken anwenden
-        for mask, color, score in zip(all_masks, colors, all_scores):
-            alpha = 0.4 + (score - 0.7) * 0.2  # Höherer Score = stärkere Farbe
-            overlay[mask] = (
-                overlay[mask] * (1 - alpha) +
-                np.array(color) * alpha
-            ).astype(np.uint8)
-            # Kontur
-            contours, _ = cv2.findContours(
-                mask.astype(np.uint8),
-                cv2.RETR_EXTERNAL,
-                cv2.CHAIN_APPROX_SIMPLE
-            )
-            cv2.drawContours(overlay, contours, -1, color.tolist(), 2)
-        metadata = {
-            "success": True,
-            "mode": "multi_object_dense",
-            "density": density,
-            "grid_size": f"{grid_size}x{grid_size}",
-            "total_points": len(points),
-            "quality_masks": len(all_masks),
-            "avg_score": float(np.mean(all_scores)) if all_scores else 0,
-            "scores": [float(s) for s in all_scores]
-        }
-        print("✅ Multi-region complete!")
-        return Image.fromarray(overlay), metadata
-    except Exception as e:
-        import traceback
-        print(f"❌ ERROR:\n{traceback.format_exc()}")
-        return image, {"error": str(e)}
-# Gradio Interface
-demo = gr.Blocks(title="SAM2 Boostly", theme=gr.themes.Soft())
-with demo:
-    gr.Markdown("# 🎨 SAM2 Segmentierung - Boostly Edition")
-    gr.Markdown("### ⚡ Optimierte Zero-Shot Object Segmentation")
-    with gr.Tab("🤖 Automatisch PLUS"):
-        gr.Markdown("**Smart Multi-Mask Combining** - Kombiniert automatisch alle Objektteile!")
-        with gr.Row():
-            with gr.Column():
-                input_auto = gr.Image(type="pil", label="📸 Bild hochladen")
-                quality_radio = gr.Radio(
-                    choices=["high", "fast"],
-                    value="high",
-                    label="⚙️ Qualität",
-                    info="High = präzisere Kanten, Fast = schneller"
-                )
-                merge_checkbox = gr.Checkbox(
-                    value=True,
-                    label="🔗 Teile zusammenfügen",
-                    info="Kombiniert alle erkannten Bereiche (Fisch + Flosse = 1 Objekt)"
-                )
-                btn_auto = gr.Button("🚀 Segmentieren", variant="primary", size="lg")
-                gr.Markdown("""
-                **✨ Funktionsweise:**
-                - SAM generiert 3 verschiedene Masken
-                - Wenn "Teile zusammenfügen" AN: Alle kombiniert → vollständiges Objekt
-                - Wenn AUS: Nur präziseste Maske
-                - ⚡ Optimiert: ~10-30 Sekunden statt 25 Minuten!
-                """)
-            with gr.Column():
-                output_auto = gr.Image(label="✨ Segmentiertes Bild")
-                json_auto = gr.JSON(label="📊 Metadata")
-        btn_auto.click(
-            fn=segment_automatic,
-            inputs=[input_auto, quality_radio, merge_checkbox],
-            outputs=[output_auto, json_auto]
-        )
-        gr.Examples(
-            examples=[],
-            inputs=input_auto,
-            label="💡 Tipp: Objekt sollte zentral im Bild sein"
-        )
-    with gr.Tab("🎯 Multi-Region"):
-        gr.Markdown("**Grid-basierte Segmentierung** - Für mehrere separate Objekte")
-        with gr.Row():
-            with gr.Column():
-                input_multi = gr.Image(type="pil", label="📸 Bild hochladen")
-                density_radio = gr.Radio(
-                    choices=["high", "medium", "low"],
-                    value="medium",
-                    label="📊 Punkt-Dichte",
-                    info="Mehr Punkte = mehr Details, aber langsamer"
-                )
-                btn_multi = gr.Button("🎯 Alle Bereiche segmentieren", variant="primary", size="lg")
-                gr.Markdown("""
-                **Grid-Größen:**
-                - 🔥 High: 5x5 = 25 Erkennungspunkte
-                - ⚡ Medium: 4x4 = 16 Punkte (empfohlen)
-                - 💨 Low: 3x3 = 9 Punkte
-                Jedes Objekt bekommt eigene Farbe!
-                """)
-            with gr.Column():
-                output_multi = gr.Image(label="✨ Segmentiertes Bild")
-                json_multi = gr.JSON(label="📊 Metadata")
-        btn_multi.click(
-            fn=segment_multi_dense,
-            inputs=[input_multi, density_radio],
-            outputs=[output_multi, json_multi]
-        )
-    with gr.Tab("📡 API Dokumentation"):
-        gr.Markdown("### 🔗 API Endpoint")
-        gr.Code(
-            "https://EnginDev-Boostly.hf.space/api/predict",
-            label="Base URL"
         )
-        gr.Markdown("### 📝 JavaScript Integration (für Lovable)")
-        gr.Code('''
-// Segmentation Service
-const HUGGINGFACE_API = 'https://EnginDev-Boostly.hf.space';
-async function segmentImage(imageFile, mode = 'automatic') {
-  // File zu Base64 konvertieren
-  const base64 = await new Promise((resolve) => {
-    const reader = new FileReader();
-    reader.onloadend = () => resolve(reader.result);
-    reader.readAsDataURL(imageFile);
-  });
-  // API Call
-  const response = await fetch(`${HUGGINGFACE_API}/api/predict`, {
-    method: 'POST',
-    headers: {'Content-Type': 'application/json'},
-    body: JSON.stringify({
-      data: [base64, "high", true],  // [image, quality, merge]
-      fn_index: mode === 'automatic' ? 0 : 1
-    })
-  });
-  const result = await response.json();
-  return {
-    segmentedImage: result.data[0],  // Base64 segmentiertes Bild
-    metadata: result.data[1]          // JSON mit Details
-  };
-}
-// Verwendung:
-const result = await segmentImage(myImageFile, 'automatic');
-console.log('Mask covers:', result.metadata.mask_percentage + '%');
-        ''', language="javascript")
-        gr.Markdown("### ⚙️ Parameter")
-        gr.Markdown("""
-        **fn_index:**
-        - `0` = Automatisch PLUS (empfohlen für einzelne Objekte)
-        - `1` = Multi-Region (für mehrere Objekte)
-        **quality:**
-        - `"high"` = Präzise Kanten, Gaussian Blur, Refinement (~20-30s)
-        - `"fast"` = Schneller, weniger Nachbearbeitung (~10-15s)
-        **merge (nur fn_index=0):**
-        - `true` = Kombiniert alle Masken → vollständiges Objekt
-        - `false` = Nur beste Maske → nur Hauptteil
-        **density (nur fn_index=1):**
-        - `"high"` = 5x5 Grid = 25 Punkte
-        - `"medium"` = 4x4 Grid = 16 Punkte
-        - `"low"` = 3x3 Grid = 9 Punkte
-        """)
-        gr.Markdown("### 📊 Response Format")
-        gr.Code('''
-{
-  "data": [
-    "data:image/png;base64,iVBORw0KGgo...",  // Segmentiertes Bild
-    {
-      "success": true,
-      "mode": "automatic_plus",
-      "masks_combined": 3,
-      "mask_percentage": 12.5,
-      "num_contours": 1,
-      "all_scores": [0.998, 0.583, 0.864]
-    }
-  ]
-}
-        ''', language="json")
-if __name__ == "__main__":
-    print("🌐 Launching Boostly SAM2 v2.1...")
-    demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)

 import gradio as gr
+from transformers import SamProcessor, SamModel
+from PIL import Image
 import torch
 import numpy as np
+import random
+import traceback
+# Modell laden
+model_id = "facebook/sam-vit-base"
+processor = SamProcessor.from_pretrained(model_id)
+model = SamModel.from_pretrained(model_id)
+def random_color():
+    """Zufällige RGB-Farbe"""
+    return [random.randint(0, 255) for _ in range(3)]
+def segment_image(image):
+    try:
+        device = torch.device("cpu")
+        model.to(device)
+        inputs = processor(images=image, return_tensors="pt").to(device)
         with torch.no_grad():
+            outputs = model(**inputs)
+        masks = processor.post_process_masks(
             outputs.pred_masks.cpu(),
             inputs["original_sizes"].cpu(),
             inputs["reshaped_input_sizes"].cpu()
         )
+        mask_arrays = masks[0].numpy()
+        img_array = np.array(image)
+        overlay = np.zeros_like(img_array, dtype=np.uint8)
+        # Jede Maske farbig einfärben
+        for mask in mask_arrays:
+            mask = mask[0]
+            color = random_color()
+            for c in range(3):
+                overlay[:, :, c] = np.where(mask > 0.5, color[c], overlay[:, :, c])
+        # Stärkere Farbmischung (80 % Maske / 20 % Original)
+        blended = Image.fromarray(
+            (0.2 * img_array + 0.8 * overlay).astype(np.uint8)
         )
+        return blended
+    except Exception:
+        return f"Fehler:\n{traceback.format_exc()}"
+demo = gr.Interface(
+    fn=segment_image,
+    inputs=gr.Image(type="pil", label="Upload your fish image"),
+    outputs=gr.Image(type="pil", label="Segmented Output"),
+    title="FishBoost – Colorful SAM Segmentation (Enhanced Colors)",
+    description="Erzeugt kräftige, farbige Masken mit Meta SAM (CPU-Version)."
+)
+demo.launch()