Spaces:

fariasultana
/

MiniMind-API

Runtime error

App Files Files Community

fariasultana commited on 17 days ago

Commit

33ff849

verified ·

1 Parent(s): 3791867

fix: Simplified stable Gradio 4.x app

Browse files

Files changed (1) hide show

app.py +55 -187

app.py CHANGED Viewed

@@ -1,190 +1,58 @@
-"""
-MiniMind Max2 API - Enhanced with Thinking, Vision, and Agentic Capabilities
-HuggingFace Spaces Gradio Application
-"""
 import gradio as gr
-import json
-import time
-from typing import Dict, Any, List, Optional, Tuple
-from dataclasses import dataclass
-from enum import Enum
-# ============================================================================
-# Configuration
-# ============================================================================
-@dataclass
-class ModelConfig:
-    """Model configuration."""
-    hidden_size: int = 1024
-    num_layers: int = 12
-    num_attention_heads: int = 16
-    num_key_value_heads: int = 4
-    intermediate_size: int = 2816
-    vocab_size: int = 102400
-    num_experts: int = 8
-    num_experts_per_token: int = 2
-    max_seq_length: int = 32768
-class ThinkingMode(Enum):
-    """Thinking modes."""
-    INTERLEAVED = "interleaved"
-    SEQUENTIAL = "sequential"
-    HIDDEN = "hidden"
-# ============================================================================
-# Thinking Engine
-# ============================================================================
-class ThinkingEngine:
-    """Simulated thinking engine for demonstration."""
-    def __init__(self):
-        self.config = {
-            "think_start": "<Thinking>",
-            "think_end": "</Thinking>",
-            "step_marker": "<step>",
-            "reflect_marker": "<reflect>",
-            "conclude_marker": "<conclude>",
-        }
-    def think(self, query: str, mode: ThinkingMode = ThinkingMode.INTERLEAVED, show_thinking: bool = True) -> Dict[str, Any]:
-        """Generate response with thinking trace."""
-        steps = [
-            {"type": "reasoning", "content": f"Analyzing: '{query[:50]}...'", "confidence": 0.95},
-            {"type": "planning", "content": "Planning approach with MoE routing...", "confidence": 0.90},
-            {"type": "generation", "content": "Generating with 25% active parameters.", "confidence": 0.92},
-            {"type": "reflection", "content": "Verifying response quality.", "confidence": 0.88},
-        ]
-        thinking_trace = self._format_thinking(steps) if show_thinking else None
-        response = self._generate_response(query)
-        return {"response": response, "thinking": thinking_trace, "steps": steps, "mode": mode.value}
-    def _format_thinking(self, steps: List[Dict]) -> str:
-        cfg = self.config
-        lines = [cfg["think_start"]]
-        for i, step in enumerate(steps):
-            marker = cfg["step_marker"] if step["type"] != "reflection" else cfg["reflect_marker"]
-            lines.append(f"{marker} Step {i+1} ({step['type']}): {step['content']}")
-            lines.append(f"  Confidence: {step['confidence']:.0%}")
-        lines.append(cfg["conclude_marker"] + " Formulating final response...")
-        lines.append(cfg["think_end"])
-        return "\n".join(lines)
-    def _generate_response(self, query: str) -> str:
-        responses = {
-            "hello": "Hello! I'm MiniMind Max2, an efficient edge-deployed language model. How can I help?",
-            "help": "I can help with text generation, code assistance, reasoning, function calling, and more!",
-        }
-        query_lower = query.lower()
-        for key, response in responses.items():
-            if key in query_lower:
-                return response
-        return f"Processing your query with MoE architecture (8 experts, top-2 routing):\n\n{query}\n\nResponse generated with 25% active parameters for maximum efficiency."
-# ============================================================================
-# MDX & Templates
-# ============================================================================
-class MDXRenderer:
-    @staticmethod
-    def linear_process_flow(steps: List[Dict]) -> str:
-        html = '<div style="display:flex;gap:10px;flex-wrap:wrap;">'
-        for i, step in enumerate(steps):
-            html += f'<div style="background:#e3f2fd;padding:10px;border-radius:8px;"><b>{i+1}.</b> {step.get("title", "Step")}<br><small>{step.get("description", "")}</small></div>'
-            if i < len(steps)-1:
-                html += '<div style="font-size:20px;color:#1976d2;">→</div>'
-        html += '</div>'
-        return html
-class ToolRegistry:
-    TOOLS = {
-        "search": {"description": "Search the web"},
-        "calculate": {"description": "Math calculations"},
-        "code_execute": {"description": "Execute Python code"},
-    }
-    @classmethod
-    def execute(cls, tool: str, **kwargs) -> str:
-        if tool == "calculate":
-            try:
-                return f"Result: {eval(kwargs.get('expression', '0'), {'__builtins__': {}}, {})}"
-            except:
-                return "Error"
-        return f"Executed {tool}"
-# Initialize
-thinking_engine = ThinkingEngine()
-def respond(message, history, mode, show, temp, max_tok):
-    result = thinking_engine.think(message, ThinkingMode(mode.lower()), show)
-    history.append([message, result["response"]])
-    return history, "", result.get("thinking", "Hidden")
-def get_model_info():
-    return """
-# MiniMind Max2
-## Architecture
 - **MoE**: 8 experts, top-2 routing (25% activation)
-- **GQA**: 16 Q-heads, 4 KV-heads (4x memory reduction)
-- **Hidden Size**: 1024 | **Layers**: 12 | **Vocab**: 102,400
-## Capabilities
-- Chain-of-Thought Reasoning
-- Vision Adapter (SigLIP)
-- Function Calling
-- Fill-in-the-Middle Coding
-- Speculative Decoding
-- NPU Export (TFLite/QNN)
-"""
-# Gradio UI
-with gr.Blocks(title="MiniMind Max2", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🧠 MiniMind Max2 API\n### Efficient Edge AI with Interleaved Thinking")
-    with gr.Tabs():
-        with gr.Tab("💬 Chat"):
-            with gr.Row():
-                with gr.Column(scale=2):
-                    chatbot = gr.Chatbot(height=400)
-                    msg = gr.Textbox(placeholder="Ask anything...")
-                    with gr.Row():
-                        submit = gr.Button("Send", variant="primary")
-                        clear = gr.Button("Clear")
-                with gr.Column(scale=1):
-                    mode = gr.Radio(["Interleaved", "Sequential", "Hidden"], value="Interleaved", label="Thinking Mode")
-                    show = gr.Checkbox(label="Show Thinking", value=True)
-                    temp = gr.Slider(0, 1, 0.7, label="Temperature")
-                    tokens = gr.Slider(50, 2000, 500, label="Max Tokens")
-                    thinking = gr.Textbox(label="Thinking Trace", lines=8)
-            submit.click(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
-            msg.submit(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
-            clear.click(lambda: ([], "", ""), outputs=[chatbot, msg, thinking])
-        with gr.Tab("🔧 Tools"):
-            gr.Markdown("### Function Calling")
-            tool = gr.Dropdown(["calculate", "search", "code_execute"], value="calculate", label="Tool")
-            inp = gr.Textbox(value="2 + 2 * 3", label="Input")
-            btn = gr.Button("Execute", variant="primary")
-            out = gr.Textbox(label="Result")
-            btn.click(lambda t, i: ToolRegistry.execute(t, expression=i, query=i, code=i), [tool, inp], out)
-        with gr.Tab("ℹ️ Info"):
-            gr.Markdown(get_model_info())
-    gr.Markdown("---\n[Model](https://huggingface.co/fariasultana/MiniMind) | Apache 2.0")
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+def chat(message, history):
+    thinking = """<Thinking>
+<step> Analyzing query...
+<step> MoE routing (2/8 experts)
+<step> Generating response
+</Thinking>"""
+    response = f"**MiniMind Max2**: {message}\n\nProcessed with MoE (25% active params)."
+    return response, thinking
+def calculate(expr):
+    try:
+        result = eval(expr, {"__builtins__": {}}, {})
+        return f"Result: {result}"
+    except:
+        return "Error: Invalid expression"
+with gr.Blocks(title="MiniMind Max2") as demo:
+    gr.Markdown("# 🧠 MiniMind Max2 API\n*Efficient Edge AI with MoE Architecture*")
+    with gr.Tab("💬 Chat"):
+        chatbot = gr.Chatbot(type="messages", height=300)
+        msg = gr.Textbox(label="Message", placeholder="Ask anything...")
+        thinking_box = gr.Textbox(label="Thinking Trace", lines=5)
+        def respond(message, chat_history):
+            response, thinking = chat(message, chat_history)
+            chat_history.append({"role": "user", "content": message})
+            chat_history.append({"role": "assistant", "content": response})
+            return chat_history, "", thinking
+        msg.submit(respond, [msg, chatbot], [chatbot, msg, thinking_box])
+        gr.Button("Send", variant="primary").click(respond, [msg, chatbot], [chatbot, msg, thinking_box])
+    with gr.Tab("🔧 Tools"):
+        expr = gr.Textbox(label="Expression", value="2 + 2 * 3")
+        result = gr.Textbox(label="Result")
+        gr.Button("Calculate", variant="primary").click(calculate, expr, result)
+    with gr.Tab("ℹ️ Info"):
+        gr.Markdown("""
+## MiniMind Max2 Architecture
 - **MoE**: 8 experts, top-2 routing (25% activation)
+- **GQA**: 16 Q-heads, 4 KV-heads
+- **Capabilities**: Reasoning, Vision, Coding, Tools
+## Docker
+```bash
+docker pull sultanafariabd/minimind-max2
+docker run -p 8000:8000 sultanafariabd/minimind-max2
+```
+        """)
+    gr.Markdown("---\n[Model](https://huggingface.co/fariasultana/MiniMind) | [Collection](https://huggingface.co/collections/fariasultana/minimind-max2-edge-ai-models-69321e758f98df18d4f4ec05)")
+demo.launch()