wuhp commited on
Commit
be69d68
·
verified ·
1 Parent(s): c0153a6

Create app2.py

Browse files
Files changed (1) hide show
  1. backups/app2.py +973 -0
backups/app2.py ADDED
@@ -0,0 +1,973 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from google import genai
3
+ from google.genai import types
4
+ from google.genai.types import Tool, GoogleSearch, FunctionDeclaration
5
+ from PIL import Image
6
+ import io
7
+ import traceback
8
+ import datetime
9
+ import re
10
+ import importlib
11
+ import os
12
+ import sys
13
+ from typing import List, Dict, Any, Optional
14
+ from pathlib import Path
15
+
16
+ # Add current directory to path for imports
17
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
18
+
19
+ from base_extension import BaseExtension
20
+
21
+ DESCRIPTION = """
22
+ # GemiWine
23
+ **Powered by Gemini 2.5 Flash + Google Search Grounding + Agent Extensions**
24
+ """
25
+
26
+ BASE_SYSTEM_PROMPT = """
27
+ You are GemiWine, a helpful AI assistant with extensible capabilities.
28
+ Your core abilities include conversation, web search, and image understanding.
29
+
30
+ When users enable extensions, you gain additional tools and capabilities.
31
+ Always use the available tools when they would be helpful to the user.
32
+ Be proactive about suggesting when an extension might be useful.
33
+ """
34
+
35
+
36
+ def log(msg: str):
37
+ now = datetime.datetime.now().strftime("%H:%M:%S")
38
+ print(f"[{now}] {msg}", flush=True)
39
+
40
+
41
+ def get_mime_type(file_path: str) -> str:
42
+ """Determine MIME type from file extension"""
43
+ ext = Path(file_path).suffix.lower()
44
+ mime_types = {
45
+ # Images
46
+ '.jpg': 'image/jpeg',
47
+ '.jpeg': 'image/jpeg',
48
+ '.png': 'image/png',
49
+ '.gif': 'image/gif',
50
+ '.webp': 'image/webp',
51
+ '.heic': 'image/heic',
52
+ '.heif': 'image/heif',
53
+ # Documents
54
+ '.pdf': 'application/pdf',
55
+ '.txt': 'text/plain',
56
+ '.html': 'text/html',
57
+ '.md': 'text/markdown',
58
+ # Videos
59
+ '.mp4': 'video/mp4',
60
+ '.mpeg': 'video/mpeg',
61
+ '.mov': 'video/mov',
62
+ '.avi': 'video/avi',
63
+ '.flv': 'video/x-flv',
64
+ '.mpg': 'video/mpg',
65
+ '.webm': 'video/webm',
66
+ '.wmv': 'video/wmv',
67
+ '.3gpp': 'video/3gpp',
68
+ }
69
+ return mime_types.get(ext, 'application/octet-stream')
70
+
71
+
72
+ def process_uploaded_file(client: genai.Client, file_path: str) -> types.Part:
73
+ """Process an uploaded file and return a Part object"""
74
+ mime_type = get_mime_type(file_path)
75
+ file_size = Path(file_path).stat().st_size
76
+
77
+ log(f"📎 Processing file: {Path(file_path).name} ({mime_type}, {file_size/1024:.1f}KB)")
78
+
79
+ # For files > 20MB or videos, use File API
80
+ if file_size > 20 * 1024 * 1024 or mime_type.startswith('video/'):
81
+ log(f"📤 Uploading large file via File API...")
82
+ uploaded_file = client.files.upload(file=file_path)
83
+ log(f"✅ File uploaded: {uploaded_file.name}")
84
+ return uploaded_file
85
+ else:
86
+ # For smaller files, pass inline
87
+ with open(file_path, 'rb') as f:
88
+ file_bytes = f.read()
89
+ log(f"✅ File loaded inline")
90
+ return types.Part.from_bytes(data=file_bytes, mime_type=mime_type)
91
+
92
+
93
+ class ExtensionManager:
94
+ """Manages loading and interfacing with extensions"""
95
+
96
+ def __init__(self):
97
+ self.extensions: Dict[str, BaseExtension] = {}
98
+ self.load_extensions()
99
+
100
+ def load_extensions(self):
101
+ """Dynamically load all extensions from extensions/ folder"""
102
+ extensions_dir = Path("extensions")
103
+ if not extensions_dir.exists():
104
+ log("⚠️ Extensions directory not found, creating it...")
105
+ extensions_dir.mkdir()
106
+ return
107
+
108
+ log(f"🔍 Scanning for extensions in {extensions_dir.absolute()}")
109
+
110
+ for file in extensions_dir.glob("*.py"):
111
+ if file.name.startswith("_"):
112
+ log(f"⏭️ Skipping {file.name} (starts with _)")
113
+ continue
114
+
115
+ try:
116
+ log(f"📦 Attempting to load: {file.name}")
117
+ module_name = file.stem
118
+ spec = importlib.util.spec_from_file_location(module_name, file)
119
+ module = importlib.util.module_from_spec(spec)
120
+ spec.loader.exec_module(module)
121
+
122
+ # Find Extension class in module
123
+ found_extension = False
124
+ for attr_name in dir(module):
125
+ attr = getattr(module, attr_name)
126
+ if (isinstance(attr, type) and
127
+ issubclass(attr, BaseExtension) and
128
+ attr != BaseExtension):
129
+ ext = attr()
130
+ self.extensions[ext.name] = ext
131
+ log(f"✅ Loaded extension: {ext.display_name} ({ext.name})")
132
+ found_extension = True
133
+ break
134
+
135
+ if not found_extension:
136
+ log(f"⚠️ No extension class found in {file.name}")
137
+
138
+ except Exception as e:
139
+ log(f"❌ Failed to load {file.name}: {e}")
140
+ traceback.print_exc()
141
+
142
+ log(f"📊 Total extensions loaded: {len(self.extensions)}")
143
+
144
+ def get_extension(self, name: str) -> Optional[BaseExtension]:
145
+ return self.extensions.get(name)
146
+
147
+ def get_all_extensions(self) -> List[BaseExtension]:
148
+ return list(self.extensions.values())
149
+
150
+ def get_enabled_extensions(self, user_id: str, enabled_list: List[str]) -> List[BaseExtension]:
151
+ """Get list of enabled extension objects"""
152
+ return [ext for name, ext in self.extensions.items() if name in enabled_list]
153
+
154
+ def build_system_prompt(self, enabled_list: List[str]) -> str:
155
+ """Build system prompt with enabled extension contexts"""
156
+ prompt = BASE_SYSTEM_PROMPT
157
+
158
+ enabled_exts = self.get_enabled_extensions("", enabled_list)
159
+ if enabled_exts:
160
+ prompt += "\n\n# ENABLED EXTENSIONS\nYou currently have these extensions enabled:\n\n"
161
+ for ext in enabled_exts:
162
+ prompt += f"## {ext.display_name}\n{ext.get_system_context()}\n\n"
163
+
164
+ return prompt
165
+
166
+ def get_all_tools(self, enabled_list: List[str]) -> List[types.Tool]:
167
+ """Get all tools from enabled extensions (no search tool here)"""
168
+ tools = []
169
+
170
+ # Add extension tools only
171
+ for ext_name in enabled_list:
172
+ ext = self.get_extension(ext_name)
173
+ if ext:
174
+ tools.extend(ext.get_tools())
175
+
176
+ return tools
177
+
178
+ def get_search_tool(self) -> types.Tool:
179
+ """Get Google Search tool separately"""
180
+ return types.Tool(google_search=types.GoogleSearch())
181
+
182
+ def handle_function_calls(self, user_id: str, enabled_list: List[str], function_calls: List) -> List:
183
+ """Process function calls from Gemini and return results"""
184
+ results = []
185
+
186
+ for fc in function_calls:
187
+ function_name = fc.name
188
+ args = fc.args if hasattr(fc, 'args') else {}
189
+
190
+ # Find which extension owns this function
191
+ handled = False
192
+ for ext_name in enabled_list:
193
+ ext = self.get_extension(ext_name)
194
+ if ext:
195
+ # Check if this function is in the extension's tools
196
+ for tool in ext.get_tools():
197
+ if hasattr(tool, 'function_declarations'):
198
+ for func_decl in tool.function_declarations:
199
+ if func_decl.name == function_name:
200
+ result = ext.handle_tool_call(user_id, function_name, args)
201
+ # Keep result as dict, don't convert to string yet
202
+ results.append(result)
203
+ handled = True
204
+ break
205
+ if handled:
206
+ break
207
+ if handled:
208
+ break
209
+
210
+ if not handled:
211
+ results.append({"error": f"Unknown function {function_name}"})
212
+
213
+ return results
214
+
215
+
216
+ class AgentOrchestrator:
217
+ """Orchestrates multiple specialized agents"""
218
+
219
+ def __init__(self, client, chat, extension_manager, enabled_extensions):
220
+ self.client = client
221
+ self.chat = chat # Multi-turn chat session
222
+ self.extension_manager = extension_manager
223
+ self.enabled_extensions = enabled_extensions
224
+ # Create a separate chat session for search (to isolate it from tool calls)
225
+ self.search_chat = client.chats.create(model="gemini-2.5-flash")
226
+
227
+ def call_search_agent(self, query: str, file_parts: List = None) -> tuple:
228
+ """Call specialized search agent using streaming - returns (text, citations)"""
229
+ log("🔍 Calling Search Agent...")
230
+
231
+ grounding_tool = types.Tool(google_search=types.GoogleSearch())
232
+ config = types.GenerateContentConfig(
233
+ system_instruction="You are a search specialist. Use Google Search to find relevant, accurate information. Provide concise, well-cited answers.",
234
+ tools=[grounding_tool],
235
+ temperature=0.7,
236
+ max_output_tokens=2048
237
+ )
238
+
239
+ try:
240
+ # Build message content with files if provided
241
+ content_parts = []
242
+ if file_parts:
243
+ content_parts.extend(file_parts)
244
+ content_parts.append(query)
245
+
246
+ # Use streaming like the working example
247
+ result_text = ""
248
+ last_chunk = None
249
+
250
+ stream = self.search_chat.send_message_stream(content_parts, config=config)
251
+ for chunk in stream:
252
+ last_chunk = chunk
253
+ if hasattr(chunk, 'candidates') and chunk.candidates:
254
+ candidate = chunk.candidates[0]
255
+ if hasattr(candidate, 'content') and candidate.content:
256
+ if hasattr(candidate.content, 'parts') and candidate.content.parts:
257
+ for part in candidate.content.parts:
258
+ if hasattr(part, 'text') and part.text:
259
+ result_text += part.text
260
+
261
+ # Extract citations from the last chunk using the working function
262
+ citations = None
263
+ if last_chunk and hasattr(last_chunk, 'candidates') and last_chunk.candidates:
264
+ log(f"🔍 Extracting citations from search response...")
265
+ citations = insert_citations_from_grounding(last_chunk.candidates)
266
+ if citations:
267
+ log(f"✅ Citations extracted successfully")
268
+ else:
269
+ log(f"⚠️ No citations found in grounding metadata")
270
+
271
+ if result_text:
272
+ log(f"✅ Search Agent returned {len(result_text)} chars")
273
+ else:
274
+ log(f"⚠️ Search Agent returned empty result")
275
+
276
+ return result_text, citations
277
+
278
+ except Exception as e:
279
+ log(f"⚠️ Search Agent error: {e}")
280
+ traceback.print_exc()
281
+
282
+ return "", None
283
+
284
+ def call_tool_agent(self, query: str, search_context: str = "", reasoning_budget: int = -1, file_parts: List = None) -> tuple:
285
+ """Call tool execution agent with function calling - uses multi-turn chat"""
286
+ log("🛠️ Calling Tool Agent...")
287
+
288
+ # Build prompt with context if needed
289
+ prompt = query
290
+ if search_context:
291
+ prompt = f"[Context from Search]\n{search_context}\n\n[User Request]\n{query}"
292
+
293
+ # Get extension tools
294
+ tools = self.extension_manager.get_all_tools(self.enabled_extensions)
295
+
296
+ system_prompt = self.extension_manager.build_system_prompt(self.enabled_extensions)
297
+ system_prompt += """
298
+
299
+ CRITICAL INSTRUCTIONS FOR TOOL USAGE:
300
+ - You have PERSISTENT STATE across all conversations in this chat session
301
+ - Timers, tasks, notes, and other data remain even after responses
302
+ - When users ask about "the timer", "the alarm", "my tasks", etc., they're referring to items created earlier
303
+ - ALWAYS use your tools (list_timers, list_tasks, check_timer, etc.) when asked about status
304
+ - Don't say you can't access information - use your available tools first
305
+ - Be proactive: if user mentions checking something, use the appropriate tool immediately
306
+
307
+ If search context is provided, incorporate it naturally.
308
+ When images, PDFs, videos, or other files are provided, analyze them thoroughly and reference them in your response."""
309
+
310
+ config = types.GenerateContentConfig(
311
+ system_instruction=system_prompt,
312
+ tools=tools,
313
+ temperature=0.7,
314
+ max_output_tokens=4096,
315
+ thinking_config=types.ThinkingConfig(
316
+ include_thoughts=True,
317
+ thinking_budget=reasoning_budget,
318
+ )
319
+ )
320
+
321
+ try:
322
+ # Build message content with files if provided
323
+ content_parts = []
324
+ if file_parts:
325
+ content_parts.extend(file_parts)
326
+ content_parts.append(prompt)
327
+
328
+ # Use the chat session's send_message (maintains conversation history automatically)
329
+ response = self.chat.send_message(
330
+ content_parts,
331
+ config=config
332
+ )
333
+
334
+ function_calls = []
335
+ text_response = ""
336
+ thoughts = ""
337
+
338
+ if response.candidates and response.candidates[0].content:
339
+ for part in response.candidates[0].content.parts:
340
+ if hasattr(part, 'function_call') and part.function_call:
341
+ function_calls.append(part.function_call)
342
+ log(f"🔧 Tool call: {part.function_call.name}")
343
+
344
+ if getattr(part, "text", None):
345
+ if getattr(part, "thought", False):
346
+ thoughts += part.text
347
+ else:
348
+ text_response += part.text
349
+
350
+ return function_calls, text_response, thoughts
351
+
352
+ except Exception as e:
353
+ log(f"⚠️ Tool Agent error: {e}")
354
+ traceback.print_exc()
355
+ return [], "", ""
356
+
357
+ def synthesize_response(self, query: str, search_results: str, tool_results: list, search_citations: Optional[str] = None, file_parts: List = None) -> tuple:
358
+ """Synthesize final response from all sources - returns (text, images_html)"""
359
+ log("✨ Synthesizing final response...")
360
+
361
+ synthesis_prompt = f"[Original Query]\n{query}\n\n"
362
+
363
+ if search_results:
364
+ synthesis_prompt += f"[Web Search Results]\n{search_results}\n\n"
365
+
366
+ # Collect any generated images from tool results
367
+ generated_images = []
368
+ if tool_results:
369
+ synthesis_prompt += "[Tool Execution Results]\n"
370
+ for tool_name, result in tool_results:
371
+ if result is None:
372
+ result = "(no result)"
373
+
374
+ # Check if result contains a generated chart/image
375
+ if isinstance(result, dict) and 'image_base64' in result:
376
+ generated_images.append({
377
+ 'base64': result['image_base64'],
378
+ 'title': result.get('message', 'Generated visualization'),
379
+ 'filepath': result.get('filepath', '')
380
+ })
381
+ # Don't include base64 in the synthesis prompt (too long)
382
+ result_clean = dict(result)
383
+ result_clean.pop('image_base64', None)
384
+ synthesis_prompt += f"- {tool_name}: {result_clean.get('message', '')} (Chart created and will be displayed)\n"
385
+ else:
386
+ synthesis_prompt += f"- {tool_name}: {result}\n"
387
+ synthesis_prompt += "\n"
388
+
389
+ synthesis_prompt += "Provide a comprehensive answer that incorporates all available information above. Be natural and conversational."
390
+
391
+ # If files were provided, reference them in the context
392
+ if file_parts:
393
+ synthesis_prompt += "\n\nNote: The user has provided files (images/documents/videos) with their query. Make sure to reference and discuss the content of these files in your response."
394
+
395
+ config = types.GenerateContentConfig(
396
+ system_instruction="You are a synthesis specialist. Combine information from multiple sources into coherent, helpful responses. When files are provided, analyze and reference them in your answer.",
397
+ temperature=0.7,
398
+ max_output_tokens=4096
399
+ )
400
+
401
+ try:
402
+ # Build content parts with files if provided
403
+ content_parts = []
404
+ if file_parts:
405
+ content_parts.extend(file_parts)
406
+ content_parts.append(types.Part(text=synthesis_prompt))
407
+
408
+ response = self.client.models.generate_content(
409
+ model="gemini-2.5-flash",
410
+ contents=[types.Content(role="user", parts=content_parts)],
411
+ config=config
412
+ )
413
+
414
+ result_text = ""
415
+ if response.candidates and response.candidates[0].content:
416
+ for part in response.candidates[0].content.parts:
417
+ if getattr(part, "text", None):
418
+ result_text += part.text
419
+
420
+ return result_text, generated_images
421
+ except Exception as e:
422
+ log(f"⚠️ Synthesis error: {e}")
423
+
424
+ return "I encountered an error synthesizing the response.", []
425
+
426
+
427
+
428
+ def determine_needs_search(chat, query: str) -> bool:
429
+ """Determine if query needs web search - uses chat session for reliability"""
430
+
431
+ # Simple heuristic first - if query explicitly asks to search
432
+ search_keywords = ['search', 'find online', 'look up online', 'google', 'search online', 'check online']
433
+ if any(keyword in query.lower() for keyword in search_keywords):
434
+ log(f"🔍 Search triggered by explicit keyword")
435
+ return True
436
+
437
+ # For questions about recommendations, comparisons, "best" items - likely needs search
438
+ recommendation_keywords = ['best', 'recommend', 'top', 'which', 'what are good', 'compare']
439
+ if any(keyword in query.lower() for keyword in recommendation_keywords):
440
+ log(f"🔍 Search triggered by recommendation question")
441
+ return True
442
+
443
+ # Default to no search for timer/task management queries
444
+ internal_keywords = ['timer', 'alarm', 'task', 'note', 'how much time']
445
+ if any(keyword in query.lower() for keyword in internal_keywords):
446
+ log(f"❌ No search - internal tool query")
447
+ return False
448
+
449
+ log(f"❌ No search - general query")
450
+ return False
451
+
452
+
453
+ # Global instances
454
+ EXTENSION_MANAGER = ExtensionManager()
455
+ CHAT_SESSIONS: Dict[str, Dict[str, Any]] = {}
456
+
457
+
458
+ def get_or_create_session(api_key: str):
459
+ if not api_key:
460
+ return None, None
461
+ if api_key in CHAT_SESSIONS:
462
+ return (CHAT_SESSIONS[api_key]["client"],
463
+ CHAT_SESSIONS[api_key]["chat"])
464
+ try:
465
+ client = genai.Client(api_key=api_key)
466
+ # Create a chat session for multi-turn conversations
467
+ chat = client.chats.create(model="gemini-2.5-flash")
468
+ CHAT_SESSIONS[api_key] = {
469
+ "client": client,
470
+ "chat": chat
471
+ }
472
+ log("✅ Created new Gemini session with multi-turn chat.")
473
+ return client, chat
474
+ except Exception as e:
475
+ log(f"❌ Error creating Gemini client: {e}")
476
+ return None, None
477
+
478
+
479
+ def insert_citations_from_grounding(candidates):
480
+ """Extract citations from grounding metadata - using chunk titles as display names"""
481
+ try:
482
+ if not candidates:
483
+ log("⚠️ No candidates for citation extraction")
484
+ return None
485
+
486
+ cand = candidates[0]
487
+
488
+ # Check if grounding metadata exists
489
+ grounding = getattr(cand, "grounding_metadata", None)
490
+ if not grounding:
491
+ log("⚠️ No grounding_metadata found")
492
+ return None
493
+
494
+ # Get chunks
495
+ chunks = getattr(grounding, "grounding_chunks", None) or []
496
+
497
+ if not chunks:
498
+ log("⚠️ No grounding_chunks found")
499
+ return None
500
+
501
+ # Build citation list from chunks
502
+ citations = []
503
+ seen_titles = set()
504
+
505
+ for idx, chunk in enumerate(chunks):
506
+ if hasattr(chunk, 'web') and chunk.web:
507
+ uri = getattr(chunk.web, "uri", None)
508
+ title = getattr(chunk.web, "title", None)
509
+
510
+ # Use title as the clickable text since it shows the actual domain
511
+ if uri and title and title not in seen_titles:
512
+ seen_titles.add(title)
513
+ citations.append(f"[{title}]({uri})")
514
+ elif uri:
515
+ citations.append(f"[Source {idx+1}]({uri})")
516
+
517
+ if citations:
518
+ citation_text = "\n\n📚 **Sources:** " + " • ".join(citations)
519
+ log(f"✅ Created {len(citations)} citations with source domains")
520
+ return citation_text
521
+ else:
522
+ log("⚠️ No valid citations could be created")
523
+ return None
524
+
525
+ except Exception as e:
526
+ log(f"⚠️ Citation extraction failed: {e}")
527
+ traceback.print_exc()
528
+ return None
529
+
530
+
531
+ def reasoning_budget(level: str) -> int:
532
+ level = (level or "Dynamic").lower()
533
+ if level == "none":
534
+ return 0
535
+ elif level == "concise":
536
+ return 256
537
+ elif level == "strong":
538
+ return 2048
539
+ elif level == "dynamic":
540
+ return -1
541
+ return -1
542
+
543
+
544
+ def chat_with_gemini(api_key, chat_history_msgs, multimodal_input, show_thoughts, reasoning_level, enabled_extensions):
545
+ log("=== chat_with_gemini CALLED ===")
546
+
547
+ if not api_key:
548
+ chat_history_msgs = chat_history_msgs or []
549
+ chat_history_msgs.append({
550
+ "role": "assistant",
551
+ "content": "🔑 Please enter your Gemini API key first."
552
+ })
553
+ yield chat_history_msgs
554
+ return
555
+
556
+ client, chat = get_or_create_session(api_key)
557
+ if not client:
558
+ chat_history_msgs.append({
559
+ "role": "assistant",
560
+ "content": "⚠️ Could not create Gemini session."
561
+ })
562
+ yield chat_history_msgs
563
+ return
564
+
565
+ user_text = (multimodal_input or {}).get("text", "") or ""
566
+ uploaded_files = (multimodal_input or {}).get("files", []) or []
567
+
568
+ if chat_history_msgs is None:
569
+ chat_history_msgs = []
570
+
571
+ # Process uploaded files
572
+ file_parts = []
573
+ if uploaded_files:
574
+ log(f"📎 Processing {len(uploaded_files)} uploaded file(s)...")
575
+ for file_path in uploaded_files:
576
+ try:
577
+ file_part = process_uploaded_file(client, file_path)
578
+ file_parts.append(file_part)
579
+ except Exception as e:
580
+ log(f"❌ Error processing file {file_path}: {e}")
581
+ traceback.print_exc()
582
+
583
+ chat_history_msgs.append({"role": "user", "content": user_text})
584
+ yield chat_history_msgs
585
+
586
+ assistant_base_index = len(chat_history_msgs)
587
+
588
+ # Setup thinking display if enabled
589
+ if show_thoughts:
590
+ thought_index = assistant_base_index
591
+ chat_history_msgs.append({"role": "assistant", "content": "<em>💭 Thinking...</em>"})
592
+ answer_index = thought_index + 1
593
+ chat_history_msgs.append({"role": "assistant", "content": "🤔 Processing..."})
594
+ else:
595
+ thought_index = None
596
+ answer_index = assistant_base_index
597
+ chat_history_msgs.append({"role": "assistant", "content": "🤔 Processing..."})
598
+
599
+ yield chat_history_msgs
600
+
601
+ try:
602
+ # Initialize variables at function scope
603
+ search_citations = None
604
+
605
+ # AGENT ORCHESTRATION APPROACH
606
+ if enabled_extensions:
607
+ log("🎭 Using multi-agent orchestration with multi-turn chat")
608
+ orchestrator = AgentOrchestrator(client, chat, EXTENSION_MANAGER, enabled_extensions)
609
+
610
+ budget = reasoning_budget(reasoning_level)
611
+ thoughts_accumulated = ""
612
+
613
+ # Step 1: Determine if search is needed
614
+ needs_search = determine_needs_search(chat, user_text)
615
+ log(f"📊 Search needed: {needs_search}")
616
+
617
+ # Step 2: Call search agent if needed
618
+ search_results = ""
619
+ if needs_search:
620
+ chat_history_msgs[answer_index]["content"] = "🔍 Searching the web..."
621
+ yield chat_history_msgs
622
+
623
+ search_results, search_citations = orchestrator.call_search_agent(user_text, file_parts)
624
+ log(f"📋 After search: search_citations = {search_citations[:100] if search_citations else 'None'}")
625
+
626
+ if search_results:
627
+ chat_history_msgs[answer_index]["content"] = "✅ Found information online\n\n🛠️ Now processing with tools..."
628
+ yield chat_history_msgs
629
+
630
+ # Step 3: Call tool agent (with files)
631
+ function_calls, tool_response, tool_thoughts = orchestrator.call_tool_agent(
632
+ user_text, search_results, budget, file_parts
633
+ )
634
+
635
+ # Show thoughts if available
636
+ if tool_thoughts and show_thoughts:
637
+ thoughts_accumulated += tool_thoughts
638
+ chat_history_msgs[thought_index]["content"] = (
639
+ f"<details open>"
640
+ f"<summary><strong>💭 GemiWine's Thinking</strong></summary>"
641
+ f"<div style='white-space:pre-wrap;background:inherit;color:inherit;"
642
+ f"padding:8px;border-radius:8px;border:1px solid var(--border-color);'>"
643
+ f"{thoughts_accumulated.strip()}</div>"
644
+ f"</details>"
645
+ )
646
+ yield chat_history_msgs
647
+
648
+ # Step 4: Execute function calls if any
649
+ tool_results = []
650
+ if function_calls:
651
+ chat_history_msgs[answer_index]["content"] = "⚙️ Executing tools..."
652
+ yield chat_history_msgs
653
+
654
+ user_id = api_key
655
+ results = EXTENSION_MANAGER.handle_function_calls(
656
+ user_id, enabled_extensions, function_calls
657
+ )
658
+
659
+ for fc, result in zip(function_calls, results):
660
+ tool_results.append((fc.name, result))
661
+ log(f"✅ {fc.name}: {result}")
662
+
663
+ # Step 5: Synthesize final response
664
+ if search_results or tool_results or tool_response:
665
+ chat_history_msgs[answer_index]["content"] = "✨ Synthesizing answer..."
666
+ yield chat_history_msgs
667
+ final_answer, generated_images = orchestrator.synthesize_response(user_text, search_results, tool_results, search_citations, file_parts)
668
+ else:
669
+ final_answer = tool_response or "I couldn't process that request."
670
+ generated_images = []
671
+
672
+ # Build the final content with citations if available
673
+ final_content = (
674
+ f"<div><strong>🍇 Final Answer</strong>"
675
+ f"<div style='white-space:pre-wrap;background:inherit;color:inherit;"
676
+ f"padding:8px;border-radius:8px;border:1px solid var(--border-color);'>"
677
+ f"{final_answer.strip()}</div></div>"
678
+ )
679
+
680
+ # Add generated images/charts
681
+ if generated_images:
682
+ log(f"📊 Adding {len(generated_images)} generated visualizations to response")
683
+ for img_data in generated_images:
684
+ final_content += f"\n\n<div style='margin-top:16px;'>"
685
+ final_content += f"<strong>📊 {img_data['title']}</strong><br/>"
686
+ final_content += f"<img src='data:image/png;base64,{img_data['base64']}' style='max-width:100%;border-radius:8px;box-shadow:0 2px 8px rgba(0,0,0,0.1);'/>"
687
+ if img_data['filepath']:
688
+ final_content += f"<br/><small style='color:#666;'>Saved to: {img_data['filepath']}</small>"
689
+ final_content += "</div>"
690
+
691
+ # Append citations if they exist
692
+ if search_citations:
693
+ final_content += "\n\n" + search_citations
694
+ log(f"✅ Appended citations to final answer")
695
+
696
+ chat_history_msgs[answer_index]["content"] = final_content
697
+ yield chat_history_msgs
698
+
699
+ else:
700
+ # No extensions - simple streaming with search
701
+ log("📺 Using simple streaming mode")
702
+
703
+ # Build parts for message with files
704
+ parts = []
705
+ if file_parts:
706
+ parts.extend(file_parts)
707
+ parts.append(user_text)
708
+
709
+ budget = reasoning_budget(reasoning_level)
710
+ grounding_tool = types.Tool(google_search=types.GoogleSearch())
711
+ config = types.GenerateContentConfig(
712
+ system_instruction=BASE_SYSTEM_PROMPT,
713
+ tools=[grounding_tool],
714
+ temperature=0.7,
715
+ top_p=0.9,
716
+ max_output_tokens=8192,
717
+ thinking_config=types.ThinkingConfig(
718
+ include_thoughts=True,
719
+ thinking_budget=budget,
720
+ )
721
+ )
722
+
723
+ stream = chat.send_message_stream(parts, config=config)
724
+
725
+ answer = ""
726
+ thoughts = ""
727
+ last_chunk = None
728
+
729
+ # Add thinking placeholder if needed
730
+ if show_thoughts:
731
+ thought_index = answer_index
732
+ chat_history_msgs[answer_index]["content"] = "<em>💭 Thinking...</em>"
733
+ answer_index = len(chat_history_msgs)
734
+ chat_history_msgs.append({"role": "assistant", "content": ""})
735
+ yield chat_history_msgs
736
+
737
+ for chunk in stream:
738
+ last_chunk = chunk
739
+ if not getattr(chunk, "candidates", None):
740
+ continue
741
+ candidate = chunk.candidates[0]
742
+
743
+ if getattr(candidate, "content", None):
744
+ for part in candidate.content.parts:
745
+ if not getattr(part, "text", None):
746
+ continue
747
+
748
+ if getattr(part, "thought", False):
749
+ thoughts += part.text
750
+ if show_thoughts:
751
+ chat_history_msgs[thought_index]["content"] = (
752
+ f"<details open>"
753
+ f"<summary><strong>💭 GemiWine's Thinking</strong></summary>"
754
+ f"<div style='white-space:pre-wrap;background:inherit;color:inherit;"
755
+ f"padding:8px;border-radius:8px;border:1px solid var(--border-color);'>"
756
+ f"{thoughts.strip()}</div>"
757
+ f"</details>"
758
+ )
759
+ yield chat_history_msgs
760
+ else:
761
+ answer += part.text
762
+ chat_history_msgs[answer_index]["content"] = (
763
+ f"<div><strong>🍇 Final Answer</strong>"
764
+ f"<div style='white-space:pre-wrap;background:inherit;color:inherit;"
765
+ f"padding:8px;border-radius:8px;border:1px solid var(--border-color);'>"
766
+ f"{answer.strip()}</div></div>"
767
+ )
768
+ yield chat_history_msgs
769
+
770
+ # Add citations
771
+ if last_chunk:
772
+ citations = insert_citations_from_grounding(last_chunk.candidates)
773
+ if citations:
774
+ chat_history_msgs[answer_index]["content"] += "\n\n" + citations
775
+ yield chat_history_msgs
776
+
777
+ log("✅ Response complete.")
778
+ return
779
+
780
+ except Exception as e:
781
+ log(f"❌ Error: {e}")
782
+ traceback.print_exc()
783
+ chat_history_msgs[answer_index]["content"] = f"⚠️ Error: {e}"
784
+ yield chat_history_msgs
785
+ return
786
+
787
+
788
+ def build_extension_ui():
789
+ """Build the extension toggle UI"""
790
+ extensions = EXTENSION_MANAGER.get_all_extensions()
791
+
792
+ if not extensions:
793
+ return gr.Markdown("No extensions available"), []
794
+
795
+ checkboxes = []
796
+ with gr.Accordion("🔌 Agent Extensions", open=True):
797
+ gr.Markdown("Enable extensions to give the agent additional capabilities:")
798
+ gr.Markdown("✨ **Agentic Mode:** When extensions are enabled, the agent uses multi-step reasoning with search + tools")
799
+ for ext in extensions:
800
+ cb = gr.Checkbox(
801
+ label=f"{ext.icon} {ext.display_name}",
802
+ info=ext.description,
803
+ value=False
804
+ )
805
+ checkboxes.append((ext.name, cb))
806
+
807
+ return checkboxes
808
+
809
+
810
+ with gr.Blocks(
811
+ theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"),
812
+ title="GemiWine",
813
+ fill_width=True
814
+ ) as demo:
815
+ gr.HTML("""
816
+ <style>
817
+ .gradio-container { padding-top: 1.5rem; padding-bottom: 1.5rem; }
818
+ .chat-panel {
819
+ background: rgba(255, 255, 255, 0.05);
820
+ border-radius: 16px !important;
821
+ padding: 1.5rem;
822
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.05);
823
+ border: 1px solid rgba(255, 255, 255, 0.1);
824
+ }
825
+ .message-input {
826
+ border-radius: 12px !important;
827
+ border: 1px solid rgba(0,0,0,0.1);
828
+ }
829
+ </style>
830
+ """)
831
+
832
+ with gr.Row():
833
+ with gr.Column(scale=1, min_width=320):
834
+ gr.Markdown("## ⚙️ Settings & Controls")
835
+ api_key = gr.Textbox(
836
+ label="🔑 Gemini API Key",
837
+ placeholder="Paste your Gemini API key here...",
838
+ type="password",
839
+ )
840
+ reasoning_level = gr.Radio(
841
+ ["None", "Concise", "Strong", "Dynamic"],
842
+ label="🧠 Reasoning Level",
843
+ value="Dynamic",
844
+ info="Controls the model's thinking depth.",
845
+ )
846
+ show_thoughts = gr.Checkbox(
847
+ label="💭 Show Thinking",
848
+ value=True,
849
+ info="Display reasoning process before answers.",
850
+ )
851
+
852
+ # Build extension checkboxes
853
+ extension_checkboxes = build_extension_ui()
854
+
855
+ with gr.Column(scale=4):
856
+ with gr.Group(elem_classes="chat-panel"):
857
+ chatbot = gr.Chatbot(
858
+ label="🍇 Chat with GemiWine",
859
+ height=650,
860
+ show_copy_button=True,
861
+ type="messages",
862
+ avatar_images=(None, "https://i.imgur.com/Q2EMk2N.png"),
863
+ )
864
+ multimodal_msg = gr.MultimodalTextbox(
865
+ file_types=[
866
+ "image", "video", "audio", # Gradio presets
867
+ ".pdf", ".txt", ".md", ".html", ".xml", # Documents
868
+ ".doc", ".docx", ".csv", ".json" # Additional formats
869
+ ],
870
+ placeholder="Ask anything, upload images/PDFs/videos, or let extensions help you...",
871
+ label="Your Message",
872
+ elem_classes="message-input",
873
+ autofocus=True
874
+ )
875
+
876
+ # Hidden state to track enabled extensions
877
+ enabled_extensions_state = gr.State([])
878
+
879
+ def clear_box():
880
+ return {"text": "", "files": []}
881
+
882
+ def handle_chat(api_key_input, chat_history_msgs, multimodal_dict, thinking_flag, reasoning_lvl, *extension_states):
883
+ # Convert extension checkbox states to list of enabled extension names
884
+ enabled = []
885
+ for (ext_name, _), is_enabled in zip(extension_checkboxes, extension_states):
886
+ if is_enabled:
887
+ enabled.append(ext_name)
888
+
889
+ log(f"Enabled extensions: {enabled}")
890
+
891
+ yield from chat_with_gemini(
892
+ api_key_input, chat_history_msgs, multimodal_dict,
893
+ thinking_flag, reasoning_lvl, enabled
894
+ )
895
+
896
+ def check_timers(api_key_input, chat_history, enabled_exts):
897
+ """Background function to check for completed timers"""
898
+ if not api_key_input or 'timer' not in enabled_exts:
899
+ return chat_history
900
+
901
+ timer_ext = EXTENSION_MANAGER.get_extension('timer')
902
+ if not timer_ext:
903
+ return chat_history
904
+
905
+ user_id = api_key_input
906
+ timer_ext.initialize_state(user_id)
907
+ state = timer_ext.get_state(user_id)
908
+
909
+ import datetime as dt
910
+ now = dt.datetime.now()
911
+ newly_completed = []
912
+
913
+ for timer in state.get("timers", []):
914
+ if timer.get("active") and not timer.get("notified", False):
915
+ end_time = dt.datetime.fromisoformat(timer["end_time"])
916
+ if now >= end_time:
917
+ newly_completed.append(timer)
918
+ timer["notified"] = True
919
+
920
+ if newly_completed:
921
+ timer_ext.update_state(user_id, state)
922
+
923
+ # Add notification to chat
924
+ if chat_history is None:
925
+ chat_history = []
926
+
927
+ for timer in newly_completed:
928
+ notification = f"⏰ **Timer Complete!** Your timer '{timer['name']}' has finished!"
929
+ chat_history.append({"role": "assistant", "content": notification})
930
+ log(f"⏰ Timer notification sent: {timer['name']}")
931
+
932
+ return chat_history
933
+
934
+ # Get just the checkbox components for inputs
935
+ checkbox_components = [cb for _, cb in extension_checkboxes]
936
+
937
+ # Main chat submission
938
+ multimodal_msg.submit(
939
+ fn=handle_chat,
940
+ inputs=[api_key, chatbot, multimodal_msg, show_thoughts, reasoning_level] + checkbox_components,
941
+ outputs=[chatbot],
942
+ queue=True,
943
+ ).then(fn=clear_box, outputs=[multimodal_msg])
944
+
945
+ # Background timer check - runs every 10 seconds
946
+ timer_check = gr.Timer(value=10, active=True)
947
+
948
+ def update_enabled_state(*extension_states):
949
+ enabled = []
950
+ for (ext_name, _), is_enabled in zip(extension_checkboxes, extension_states):
951
+ if is_enabled:
952
+ enabled.append(ext_name)
953
+ return enabled
954
+
955
+ # Update enabled extensions state whenever checkboxes change
956
+ for _, cb in extension_checkboxes:
957
+ cb.change(
958
+ fn=update_enabled_state,
959
+ inputs=checkbox_components,
960
+ outputs=[enabled_extensions_state]
961
+ )
962
+
963
+ # Timer polling
964
+ timer_check.tick(
965
+ fn=check_timers,
966
+ inputs=[api_key, chatbot, enabled_extensions_state],
967
+ outputs=[chatbot]
968
+ )
969
+
970
+
971
+ if __name__ == "__main__":
972
+ log(f"===== GemiWine with Extensions started at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
973
+ demo.launch()