fariasultana commited on
Commit
33ff849
·
verified ·
1 Parent(s): 3791867

fix: Simplified stable Gradio 4.x app

Browse files
Files changed (1) hide show
  1. app.py +55 -187
app.py CHANGED
@@ -1,190 +1,58 @@
1
- """
2
- MiniMind Max2 API - Enhanced with Thinking, Vision, and Agentic Capabilities
3
- HuggingFace Spaces Gradio Application
4
- """
5
-
6
  import gradio as gr
7
- import json
8
- import time
9
- from typing import Dict, Any, List, Optional, Tuple
10
- from dataclasses import dataclass
11
- from enum import Enum
12
-
13
-
14
- # ============================================================================
15
- # Configuration
16
- # ============================================================================
17
-
18
- @dataclass
19
- class ModelConfig:
20
- """Model configuration."""
21
- hidden_size: int = 1024
22
- num_layers: int = 12
23
- num_attention_heads: int = 16
24
- num_key_value_heads: int = 4
25
- intermediate_size: int = 2816
26
- vocab_size: int = 102400
27
- num_experts: int = 8
28
- num_experts_per_token: int = 2
29
- max_seq_length: int = 32768
30
-
31
-
32
- class ThinkingMode(Enum):
33
- """Thinking modes."""
34
- INTERLEAVED = "interleaved"
35
- SEQUENTIAL = "sequential"
36
- HIDDEN = "hidden"
37
-
38
-
39
- # ============================================================================
40
- # Thinking Engine
41
- # ============================================================================
42
-
43
- class ThinkingEngine:
44
- """Simulated thinking engine for demonstration."""
45
-
46
- def __init__(self):
47
- self.config = {
48
- "think_start": "<Thinking>",
49
- "think_end": "</Thinking>",
50
- "step_marker": "<step>",
51
- "reflect_marker": "<reflect>",
52
- "conclude_marker": "<conclude>",
53
- }
54
-
55
- def think(self, query: str, mode: ThinkingMode = ThinkingMode.INTERLEAVED, show_thinking: bool = True) -> Dict[str, Any]:
56
- """Generate response with thinking trace."""
57
- steps = [
58
- {"type": "reasoning", "content": f"Analyzing: '{query[:50]}...'", "confidence": 0.95},
59
- {"type": "planning", "content": "Planning approach with MoE routing...", "confidence": 0.90},
60
- {"type": "generation", "content": "Generating with 25% active parameters.", "confidence": 0.92},
61
- {"type": "reflection", "content": "Verifying response quality.", "confidence": 0.88},
62
- ]
63
- thinking_trace = self._format_thinking(steps) if show_thinking else None
64
- response = self._generate_response(query)
65
- return {"response": response, "thinking": thinking_trace, "steps": steps, "mode": mode.value}
66
-
67
- def _format_thinking(self, steps: List[Dict]) -> str:
68
- cfg = self.config
69
- lines = [cfg["think_start"]]
70
- for i, step in enumerate(steps):
71
- marker = cfg["step_marker"] if step["type"] != "reflection" else cfg["reflect_marker"]
72
- lines.append(f"{marker} Step {i+1} ({step['type']}): {step['content']}")
73
- lines.append(f" Confidence: {step['confidence']:.0%}")
74
- lines.append(cfg["conclude_marker"] + " Formulating final response...")
75
- lines.append(cfg["think_end"])
76
- return "\n".join(lines)
77
-
78
- def _generate_response(self, query: str) -> str:
79
- responses = {
80
- "hello": "Hello! I'm MiniMind Max2, an efficient edge-deployed language model. How can I help?",
81
- "help": "I can help with text generation, code assistance, reasoning, function calling, and more!",
82
- }
83
- query_lower = query.lower()
84
- for key, response in responses.items():
85
- if key in query_lower:
86
- return response
87
- return f"Processing your query with MoE architecture (8 experts, top-2 routing):\n\n{query}\n\nResponse generated with 25% active parameters for maximum efficiency."
88
-
89
-
90
- # ============================================================================
91
- # MDX & Templates
92
- # ============================================================================
93
-
94
- class MDXRenderer:
95
- @staticmethod
96
- def linear_process_flow(steps: List[Dict]) -> str:
97
- html = '<div style="display:flex;gap:10px;flex-wrap:wrap;">'
98
- for i, step in enumerate(steps):
99
- html += f'<div style="background:#e3f2fd;padding:10px;border-radius:8px;"><b>{i+1}.</b> {step.get("title", "Step")}<br><small>{step.get("description", "")}</small></div>'
100
- if i < len(steps)-1:
101
- html += '<div style="font-size:20px;color:#1976d2;">→</div>'
102
- html += '</div>'
103
- return html
104
-
105
 
106
- class ToolRegistry:
107
- TOOLS = {
108
- "search": {"description": "Search the web"},
109
- "calculate": {"description": "Math calculations"},
110
- "code_execute": {"description": "Execute Python code"},
111
- }
112
-
113
- @classmethod
114
- def execute(cls, tool: str, **kwargs) -> str:
115
- if tool == "calculate":
116
- try:
117
- return f"Result: {eval(kwargs.get('expression', '0'), {'__builtins__': {}}, {})}"
118
- except:
119
- return "Error"
120
- return f"Executed {tool}"
121
-
122
-
123
- # Initialize
124
- thinking_engine = ThinkingEngine()
125
-
126
-
127
- def respond(message, history, mode, show, temp, max_tok):
128
- result = thinking_engine.think(message, ThinkingMode(mode.lower()), show)
129
- history.append([message, result["response"]])
130
- return history, "", result.get("thinking", "Hidden")
131
-
132
-
133
- def get_model_info():
134
- return """
135
- # MiniMind Max2
136
-
137
- ## Architecture
 
 
 
 
 
 
 
 
 
 
138
  - **MoE**: 8 experts, top-2 routing (25% activation)
139
- - **GQA**: 16 Q-heads, 4 KV-heads (4x memory reduction)
140
- - **Hidden Size**: 1024 | **Layers**: 12 | **Vocab**: 102,400
141
-
142
- ## Capabilities
143
- - Chain-of-Thought Reasoning
144
- - Vision Adapter (SigLIP)
145
- - Function Calling
146
- - Fill-in-the-Middle Coding
147
- - Speculative Decoding
148
- - NPU Export (TFLite/QNN)
149
- """
150
-
151
-
152
- # Gradio UI
153
- with gr.Blocks(title="MiniMind Max2", theme=gr.themes.Soft()) as demo:
154
- gr.Markdown("# 🧠 MiniMind Max2 API\n### Efficient Edge AI with Interleaved Thinking")
155
-
156
- with gr.Tabs():
157
- with gr.Tab("💬 Chat"):
158
- with gr.Row():
159
- with gr.Column(scale=2):
160
- chatbot = gr.Chatbot(height=400)
161
- msg = gr.Textbox(placeholder="Ask anything...")
162
- with gr.Row():
163
- submit = gr.Button("Send", variant="primary")
164
- clear = gr.Button("Clear")
165
- with gr.Column(scale=1):
166
- mode = gr.Radio(["Interleaved", "Sequential", "Hidden"], value="Interleaved", label="Thinking Mode")
167
- show = gr.Checkbox(label="Show Thinking", value=True)
168
- temp = gr.Slider(0, 1, 0.7, label="Temperature")
169
- tokens = gr.Slider(50, 2000, 500, label="Max Tokens")
170
- thinking = gr.Textbox(label="Thinking Trace", lines=8)
171
-
172
- submit.click(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
173
- msg.submit(respond, [msg, chatbot, mode, show, temp, tokens], [chatbot, msg, thinking])
174
- clear.click(lambda: ([], "", ""), outputs=[chatbot, msg, thinking])
175
-
176
- with gr.Tab("🔧 Tools"):
177
- gr.Markdown("### Function Calling")
178
- tool = gr.Dropdown(["calculate", "search", "code_execute"], value="calculate", label="Tool")
179
- inp = gr.Textbox(value="2 + 2 * 3", label="Input")
180
- btn = gr.Button("Execute", variant="primary")
181
- out = gr.Textbox(label="Result")
182
- btn.click(lambda t, i: ToolRegistry.execute(t, expression=i, query=i, code=i), [tool, inp], out)
183
-
184
- with gr.Tab("ℹ️ Info"):
185
- gr.Markdown(get_model_info())
186
-
187
- gr.Markdown("---\n[Model](https://huggingface.co/fariasultana/MiniMind) | Apache 2.0")
188
-
189
- if __name__ == "__main__":
190
- demo.launch()
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ def chat(message, history):
4
+ thinking = """<Thinking>
5
+ <step> Analyzing query...
6
+ <step> MoE routing (2/8 experts)
7
+ <step> Generating response
8
+ </Thinking>"""
9
+
10
+ response = f"**MiniMind Max2**: {message}\n\nProcessed with MoE (25% active params)."
11
+ return response, thinking
12
+
13
+ def calculate(expr):
14
+ try:
15
+ result = eval(expr, {"__builtins__": {}}, {})
16
+ return f"Result: {result}"
17
+ except:
18
+ return "Error: Invalid expression"
19
+
20
+ with gr.Blocks(title="MiniMind Max2") as demo:
21
+ gr.Markdown("# 🧠 MiniMind Max2 API\n*Efficient Edge AI with MoE Architecture*")
22
+
23
+ with gr.Tab("💬 Chat"):
24
+ chatbot = gr.Chatbot(type="messages", height=300)
25
+ msg = gr.Textbox(label="Message", placeholder="Ask anything...")
26
+ thinking_box = gr.Textbox(label="Thinking Trace", lines=5)
27
+
28
+ def respond(message, chat_history):
29
+ response, thinking = chat(message, chat_history)
30
+ chat_history.append({"role": "user", "content": message})
31
+ chat_history.append({"role": "assistant", "content": response})
32
+ return chat_history, "", thinking
33
+
34
+ msg.submit(respond, [msg, chatbot], [chatbot, msg, thinking_box])
35
+ gr.Button("Send", variant="primary").click(respond, [msg, chatbot], [chatbot, msg, thinking_box])
36
+
37
+ with gr.Tab("🔧 Tools"):
38
+ expr = gr.Textbox(label="Expression", value="2 + 2 * 3")
39
+ result = gr.Textbox(label="Result")
40
+ gr.Button("Calculate", variant="primary").click(calculate, expr, result)
41
+
42
+ with gr.Tab("ℹ️ Info"):
43
+ gr.Markdown("""
44
+ ## MiniMind Max2 Architecture
45
  - **MoE**: 8 experts, top-2 routing (25% activation)
46
+ - **GQA**: 16 Q-heads, 4 KV-heads
47
+ - **Capabilities**: Reasoning, Vision, Coding, Tools
48
+
49
+ ## Docker
50
+ ```bash
51
+ docker pull sultanafariabd/minimind-max2
52
+ docker run -p 8000:8000 sultanafariabd/minimind-max2
53
+ ```
54
+ """)
55
+
56
+ gr.Markdown("---\n[Model](https://huggingface.co/fariasultana/MiniMind) | [Collection](https://huggingface.co/collections/fariasultana/minimind-max2-edge-ai-models-69321e758f98df18d4f4ec05)")
57
+
58
+ demo.launch()