wuhp commited on
Commit
852a7ed
·
verified ·
1 Parent(s): 289a7eb

Create backups/app1.py

Browse files
Files changed (1) hide show
  1. backups/app1.py +931 -0
backups/app1.py ADDED
@@ -0,0 +1,931 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from google import genai
3
+ from google.genai import types
4
+ from google.genai.types import Tool, GoogleSearch, FunctionDeclaration
5
+ from PIL import Image
6
+ import io
7
+ import traceback
8
+ import datetime
9
+ import re
10
+ import importlib
11
+ import os
12
+ import sys
13
+ from typing import List, Dict, Any, Optional
14
+ from pathlib import Path
15
+
16
+ # Add current directory to path for imports
17
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
18
+
19
+ from base_extension import BaseExtension
20
+
21
+ DESCRIPTION = """
22
+ # GemiWine
23
+ **Powered by Gemini 2.5 Flash + Google Search Grounding + Agent Extensions**
24
+ """
25
+
26
+ BASE_SYSTEM_PROMPT = """
27
+ You are GemiWine, a helpful AI assistant with extensible capabilities.
28
+ Your core abilities include conversation, web search, and image understanding.
29
+
30
+ When users enable extensions, you gain additional tools and capabilities.
31
+ Always use the available tools when they would be helpful to the user.
32
+ Be proactive about suggesting when an extension might be useful.
33
+ """
34
+
35
+
36
+ def log(msg: str):
37
+ now = datetime.datetime.now().strftime("%H:%M:%S")
38
+ print(f"[{now}] {msg}", flush=True)
39
+
40
+
41
+ def get_mime_type(file_path: str) -> str:
42
+ """Determine MIME type from file extension"""
43
+ ext = Path(file_path).suffix.lower()
44
+ mime_types = {
45
+ # Images
46
+ '.jpg': 'image/jpeg',
47
+ '.jpeg': 'image/jpeg',
48
+ '.png': 'image/png',
49
+ '.gif': 'image/gif',
50
+ '.webp': 'image/webp',
51
+ '.heic': 'image/heic',
52
+ '.heif': 'image/heif',
53
+ # Documents
54
+ '.pdf': 'application/pdf',
55
+ '.txt': 'text/plain',
56
+ '.html': 'text/html',
57
+ '.md': 'text/markdown',
58
+ # Videos
59
+ '.mp4': 'video/mp4',
60
+ '.mpeg': 'video/mpeg',
61
+ '.mov': 'video/mov',
62
+ '.avi': 'video/avi',
63
+ '.flv': 'video/x-flv',
64
+ '.mpg': 'video/mpg',
65
+ '.webm': 'video/webm',
66
+ '.wmv': 'video/wmv',
67
+ '.3gpp': 'video/3gpp',
68
+ }
69
+ return mime_types.get(ext, 'application/octet-stream')
70
+
71
+
72
+ def process_uploaded_file(client: genai.Client, file_path: str) -> types.Part:
73
+ """Process an uploaded file and return a Part object"""
74
+ mime_type = get_mime_type(file_path)
75
+ file_size = Path(file_path).stat().st_size
76
+
77
+ log(f"📎 Processing file: {Path(file_path).name} ({mime_type}, {file_size/1024:.1f}KB)")
78
+
79
+ # For files > 20MB or videos, use File API
80
+ if file_size > 20 * 1024 * 1024 or mime_type.startswith('video/'):
81
+ log(f"📤 Uploading large file via File API...")
82
+ uploaded_file = client.files.upload(file=file_path)
83
+ log(f"✅ File uploaded: {uploaded_file.name}")
84
+ return uploaded_file
85
+ else:
86
+ # For smaller files, pass inline
87
+ with open(file_path, 'rb') as f:
88
+ file_bytes = f.read()
89
+ log(f"✅ File loaded inline")
90
+ return types.Part.from_bytes(data=file_bytes, mime_type=mime_type)
91
+
92
+
93
+ class ExtensionManager:
94
+ """Manages loading and interfacing with extensions"""
95
+
96
+ def __init__(self):
97
+ self.extensions: Dict[str, BaseExtension] = {}
98
+ self.load_extensions()
99
+
100
+ def load_extensions(self):
101
+ """Dynamically load all extensions from extensions/ folder"""
102
+ extensions_dir = Path("extensions")
103
+ if not extensions_dir.exists():
104
+ log("⚠️ Extensions directory not found, creating it...")
105
+ extensions_dir.mkdir()
106
+ return
107
+
108
+ for file in extensions_dir.glob("*.py"):
109
+ if file.name.startswith("_"):
110
+ continue
111
+
112
+ try:
113
+ module_name = file.stem
114
+ spec = importlib.util.spec_from_file_location(module_name, file)
115
+ module = importlib.util.module_from_spec(spec)
116
+ spec.loader.exec_module(module)
117
+
118
+ # Find Extension class in module
119
+ for attr_name in dir(module):
120
+ attr = getattr(module, attr_name)
121
+ if (isinstance(attr, type) and
122
+ issubclass(attr, BaseExtension) and
123
+ attr != BaseExtension):
124
+ ext = attr()
125
+ self.extensions[ext.name] = ext
126
+ log(f"✅ Loaded extension: {ext.display_name}")
127
+ break
128
+ except Exception as e:
129
+ log(f"❌ Failed to load {file.name}: {e}")
130
+
131
+ def get_extension(self, name: str) -> Optional[BaseExtension]:
132
+ return self.extensions.get(name)
133
+
134
+ def get_all_extensions(self) -> List[BaseExtension]:
135
+ return list(self.extensions.values())
136
+
137
+ def get_enabled_extensions(self, user_id: str, enabled_list: List[str]) -> List[BaseExtension]:
138
+ """Get list of enabled extension objects"""
139
+ return [ext for name, ext in self.extensions.items() if name in enabled_list]
140
+
141
+ def build_system_prompt(self, enabled_list: List[str]) -> str:
142
+ """Build system prompt with enabled extension contexts"""
143
+ prompt = BASE_SYSTEM_PROMPT
144
+
145
+ enabled_exts = self.get_enabled_extensions("", enabled_list)
146
+ if enabled_exts:
147
+ prompt += "\n\n# ENABLED EXTENSIONS\nYou currently have these extensions enabled:\n\n"
148
+ for ext in enabled_exts:
149
+ prompt += f"## {ext.display_name}\n{ext.get_system_context()}\n\n"
150
+
151
+ return prompt
152
+
153
+ def get_all_tools(self, enabled_list: List[str]) -> List[types.Tool]:
154
+ """Get all tools from enabled extensions (no search tool here)"""
155
+ tools = []
156
+
157
+ # Add extension tools only
158
+ for ext_name in enabled_list:
159
+ ext = self.get_extension(ext_name)
160
+ if ext:
161
+ tools.extend(ext.get_tools())
162
+
163
+ return tools
164
+
165
+ def get_search_tool(self) -> types.Tool:
166
+ """Get Google Search tool separately"""
167
+ return types.Tool(google_search=types.GoogleSearch())
168
+
169
+ def handle_function_calls(self, user_id: str, enabled_list: List[str], function_calls: List) -> List[str]:
170
+ """Process function calls from Gemini and return results"""
171
+ results = []
172
+
173
+ for fc in function_calls:
174
+ function_name = fc.name
175
+ args = fc.args if hasattr(fc, 'args') else {}
176
+
177
+ # Find which extension owns this function
178
+ handled = False
179
+ for ext_name in enabled_list:
180
+ ext = self.get_extension(ext_name)
181
+ if ext:
182
+ # Check if this function is in the extension's tools
183
+ for tool in ext.get_tools():
184
+ if hasattr(tool, 'function_declarations'):
185
+ for func_decl in tool.function_declarations:
186
+ if func_decl.name == function_name:
187
+ result = ext.handle_tool_call(user_id, function_name, args)
188
+ results.append(str(result))
189
+ handled = True
190
+ break
191
+ if handled:
192
+ break
193
+ if handled:
194
+ break
195
+
196
+ if not handled:
197
+ results.append(f"Error: Unknown function {function_name}")
198
+
199
+ return results
200
+
201
+
202
+ class AgentOrchestrator:
203
+ """Orchestrates multiple specialized agents"""
204
+
205
+ def __init__(self, client, chat, extension_manager, enabled_extensions):
206
+ self.client = client
207
+ self.chat = chat # Multi-turn chat session
208
+ self.extension_manager = extension_manager
209
+ self.enabled_extensions = enabled_extensions
210
+ # Create a separate chat session for search (to isolate it from tool calls)
211
+ self.search_chat = client.chats.create(model="gemini-2.5-flash")
212
+
213
+ def call_search_agent(self, query: str, file_parts: List = None) -> tuple:
214
+ """Call specialized search agent using streaming - returns (text, citations)"""
215
+ log("🔍 Calling Search Agent...")
216
+
217
+ grounding_tool = types.Tool(google_search=types.GoogleSearch())
218
+ config = types.GenerateContentConfig(
219
+ system_instruction="You are a search specialist. Use Google Search to find relevant, accurate information. Provide concise, well-cited answers.",
220
+ tools=[grounding_tool],
221
+ temperature=0.7,
222
+ max_output_tokens=2048
223
+ )
224
+
225
+ try:
226
+ # Build message content with files if provided
227
+ content_parts = []
228
+ if file_parts:
229
+ content_parts.extend(file_parts)
230
+ content_parts.append(query)
231
+
232
+ # Use streaming like the working example
233
+ result_text = ""
234
+ last_chunk = None
235
+
236
+ stream = self.search_chat.send_message_stream(content_parts, config=config)
237
+ for chunk in stream:
238
+ last_chunk = chunk
239
+ if hasattr(chunk, 'candidates') and chunk.candidates:
240
+ candidate = chunk.candidates[0]
241
+ if hasattr(candidate, 'content') and candidate.content:
242
+ if hasattr(candidate.content, 'parts') and candidate.content.parts:
243
+ for part in candidate.content.parts:
244
+ if hasattr(part, 'text') and part.text:
245
+ result_text += part.text
246
+
247
+ # Extract citations from the last chunk using the working function
248
+ citations = None
249
+ if last_chunk and hasattr(last_chunk, 'candidates') and last_chunk.candidates:
250
+ log(f"🔍 Extracting citations from search response...")
251
+ citations = insert_citations_from_grounding(last_chunk.candidates)
252
+ if citations:
253
+ log(f"✅ Citations extracted successfully")
254
+ else:
255
+ log(f"⚠️ No citations found in grounding metadata")
256
+
257
+ if result_text:
258
+ log(f"✅ Search Agent returned {len(result_text)} chars")
259
+ else:
260
+ log(f"⚠️ Search Agent returned empty result")
261
+
262
+ return result_text, citations
263
+
264
+ except Exception as e:
265
+ log(f"⚠️ Search Agent error: {e}")
266
+ traceback.print_exc()
267
+
268
+ return "", None
269
+
270
+ def call_tool_agent(self, query: str, search_context: str = "", reasoning_budget: int = -1, file_parts: List = None) -> tuple:
271
+ """Call tool execution agent with function calling - uses multi-turn chat"""
272
+ log("🛠️ Calling Tool Agent...")
273
+
274
+ # Build prompt with context if needed
275
+ prompt = query
276
+ if search_context:
277
+ prompt = f"[Context from Search]\n{search_context}\n\n[User Request]\n{query}"
278
+
279
+ # Get extension tools
280
+ tools = self.extension_manager.get_all_tools(self.enabled_extensions)
281
+
282
+ system_prompt = self.extension_manager.build_system_prompt(self.enabled_extensions)
283
+ system_prompt += """
284
+
285
+ CRITICAL INSTRUCTIONS FOR TOOL USAGE:
286
+ - You have PERSISTENT STATE across all conversations in this chat session
287
+ - Timers, tasks, notes, and other data remain even after responses
288
+ - When users ask about "the timer", "the alarm", "my tasks", etc., they're referring to items created earlier
289
+ - ALWAYS use your tools (list_timers, list_tasks, check_timer, etc.) when asked about status
290
+ - Don't say you can't access information - use your available tools first
291
+ - Be proactive: if user mentions checking something, use the appropriate tool immediately
292
+
293
+ If search context is provided, incorporate it naturally.
294
+ When images, PDFs, videos, or other files are provided, analyze them thoroughly and reference them in your response."""
295
+
296
+ config = types.GenerateContentConfig(
297
+ system_instruction=system_prompt,
298
+ tools=tools,
299
+ temperature=0.7,
300
+ max_output_tokens=4096,
301
+ thinking_config=types.ThinkingConfig(
302
+ include_thoughts=True,
303
+ thinking_budget=reasoning_budget,
304
+ )
305
+ )
306
+
307
+ try:
308
+ # Build message content with files if provided
309
+ content_parts = []
310
+ if file_parts:
311
+ content_parts.extend(file_parts)
312
+ content_parts.append(prompt)
313
+
314
+ # Use the chat session's send_message (maintains conversation history automatically)
315
+ response = self.chat.send_message(
316
+ content_parts,
317
+ config=config
318
+ )
319
+
320
+ function_calls = []
321
+ text_response = ""
322
+ thoughts = ""
323
+
324
+ if response.candidates and response.candidates[0].content:
325
+ for part in response.candidates[0].content.parts:
326
+ if hasattr(part, 'function_call') and part.function_call:
327
+ function_calls.append(part.function_call)
328
+ log(f"🔧 Tool call: {part.function_call.name}")
329
+
330
+ if getattr(part, "text", None):
331
+ if getattr(part, "thought", False):
332
+ thoughts += part.text
333
+ else:
334
+ text_response += part.text
335
+
336
+ return function_calls, text_response, thoughts
337
+
338
+ except Exception as e:
339
+ log(f"⚠️ Tool Agent error: {e}")
340
+ traceback.print_exc()
341
+ return [], "", ""
342
+
343
+ def synthesize_response(self, query: str, search_results: str, tool_results: list, search_citations: Optional[str] = None, file_parts: List = None) -> str:
344
+ """Synthesize final response from all sources"""
345
+ log("✨ Synthesizing final response...")
346
+
347
+ synthesis_prompt = f"[Original Query]\n{query}\n\n"
348
+
349
+ if search_results:
350
+ synthesis_prompt += f"[Web Search Results]\n{search_results}\n\n"
351
+
352
+ if tool_results:
353
+ synthesis_prompt += "[Tool Execution Results]\n"
354
+ for tool_name, result in tool_results:
355
+ if result is None:
356
+ result = "(no result)"
357
+ synthesis_prompt += f"- {tool_name}: {result}\n"
358
+ synthesis_prompt += "\n"
359
+
360
+ synthesis_prompt += "Provide a comprehensive answer that incorporates all available information above. Be natural and conversational."
361
+
362
+ # If files were provided, reference them in the context
363
+ if file_parts:
364
+ synthesis_prompt += "\n\nNote: The user has provided files (images/documents/videos) with their query. Make sure to reference and discuss the content of these files in your response."
365
+
366
+ config = types.GenerateContentConfig(
367
+ system_instruction="You are a synthesis specialist. Combine information from multiple sources into coherent, helpful responses. When files are provided, analyze and reference them in your answer.",
368
+ temperature=0.7,
369
+ max_output_tokens=4096
370
+ )
371
+
372
+ try:
373
+ # Build content parts with files if provided
374
+ content_parts = []
375
+ if file_parts:
376
+ content_parts.extend(file_parts)
377
+ content_parts.append(types.Part(text=synthesis_prompt))
378
+
379
+ response = self.client.models.generate_content(
380
+ model="gemini-2.5-flash",
381
+ contents=[types.Content(role="user", parts=content_parts)],
382
+ config=config
383
+ )
384
+
385
+ if response.candidates and response.candidates[0].content:
386
+ result = ""
387
+ for part in response.candidates[0].content.parts:
388
+ if getattr(part, "text", None):
389
+ result += part.text
390
+ return result
391
+ except Exception as e:
392
+ log(f"⚠️ Synthesis error: {e}")
393
+
394
+ return "I encountered an error synthesizing the response."
395
+
396
+
397
+
398
+ def determine_needs_search(chat, query: str) -> bool:
399
+ """Determine if query needs web search - uses chat session for reliability"""
400
+
401
+ # Simple heuristic first - if query explicitly asks to search
402
+ search_keywords = ['search', 'find online', 'look up online', 'google', 'search online', 'check online']
403
+ if any(keyword in query.lower() for keyword in search_keywords):
404
+ log(f"🔍 Search triggered by explicit keyword")
405
+ return True
406
+
407
+ # For questions about recommendations, comparisons, "best" items - likely needs search
408
+ recommendation_keywords = ['best', 'recommend', 'top', 'which', 'what are good', 'compare']
409
+ if any(keyword in query.lower() for keyword in recommendation_keywords):
410
+ log(f"🔍 Search triggered by recommendation question")
411
+ return True
412
+
413
+ # Default to no search for timer/task management queries
414
+ internal_keywords = ['timer', 'alarm', 'task', 'note', 'how much time']
415
+ if any(keyword in query.lower() for keyword in internal_keywords):
416
+ log(f"❌ No search - internal tool query")
417
+ return False
418
+
419
+ log(f"❌ No search - general query")
420
+ return False
421
+
422
+
423
+ # Global instances
424
+ EXTENSION_MANAGER = ExtensionManager()
425
+ CHAT_SESSIONS: Dict[str, Dict[str, Any]] = {}
426
+
427
+
428
+ def get_or_create_session(api_key: str):
429
+ if not api_key:
430
+ return None, None
431
+ if api_key in CHAT_SESSIONS:
432
+ return (CHAT_SESSIONS[api_key]["client"],
433
+ CHAT_SESSIONS[api_key]["chat"])
434
+ try:
435
+ client = genai.Client(api_key=api_key)
436
+ # Create a chat session for multi-turn conversations
437
+ chat = client.chats.create(model="gemini-2.5-flash")
438
+ CHAT_SESSIONS[api_key] = {
439
+ "client": client,
440
+ "chat": chat
441
+ }
442
+ log("✅ Created new Gemini session with multi-turn chat.")
443
+ return client, chat
444
+ except Exception as e:
445
+ log(f"❌ Error creating Gemini client: {e}")
446
+ return None, None
447
+
448
+
449
+ def insert_citations_from_grounding(candidates):
450
+ """Extract citations from grounding metadata - using chunk titles as display names"""
451
+ try:
452
+ if not candidates:
453
+ log("⚠️ No candidates for citation extraction")
454
+ return None
455
+
456
+ cand = candidates[0]
457
+
458
+ # Check if grounding metadata exists
459
+ grounding = getattr(cand, "grounding_metadata", None)
460
+ if not grounding:
461
+ log("⚠️ No grounding_metadata found")
462
+ return None
463
+
464
+ # Get chunks
465
+ chunks = getattr(grounding, "grounding_chunks", None) or []
466
+
467
+ if not chunks:
468
+ log("⚠️ No grounding_chunks found")
469
+ return None
470
+
471
+ # Build citation list from chunks
472
+ citations = []
473
+ seen_titles = set()
474
+
475
+ for idx, chunk in enumerate(chunks):
476
+ if hasattr(chunk, 'web') and chunk.web:
477
+ uri = getattr(chunk.web, "uri", None)
478
+ title = getattr(chunk.web, "title", None)
479
+
480
+ # Use title as the clickable text since it shows the actual domain
481
+ if uri and title and title not in seen_titles:
482
+ seen_titles.add(title)
483
+ citations.append(f"[{title}]({uri})")
484
+ elif uri:
485
+ citations.append(f"[Source {idx+1}]({uri})")
486
+
487
+ if citations:
488
+ citation_text = "\n\n📚 **Sources:** " + " • ".join(citations)
489
+ log(f"✅ Created {len(citations)} citations with source domains")
490
+ return citation_text
491
+ else:
492
+ log("⚠️ No valid citations could be created")
493
+ return None
494
+
495
+ except Exception as e:
496
+ log(f"⚠️ Citation extraction failed: {e}")
497
+ traceback.print_exc()
498
+ return None
499
+
500
+
501
+ def reasoning_budget(level: str) -> int:
502
+ level = (level or "Dynamic").lower()
503
+ if level == "none":
504
+ return 0
505
+ elif level == "concise":
506
+ return 256
507
+ elif level == "strong":
508
+ return 2048
509
+ elif level == "dynamic":
510
+ return -1
511
+ return -1
512
+
513
+
514
+ def chat_with_gemini(api_key, chat_history_msgs, multimodal_input, show_thoughts, reasoning_level, enabled_extensions):
515
+ log("=== chat_with_gemini CALLED ===")
516
+
517
+ if not api_key:
518
+ chat_history_msgs = chat_history_msgs or []
519
+ chat_history_msgs.append({
520
+ "role": "assistant",
521
+ "content": "🔑 Please enter your Gemini API key first."
522
+ })
523
+ yield chat_history_msgs
524
+ return
525
+
526
+ client, chat = get_or_create_session(api_key)
527
+ if not client:
528
+ chat_history_msgs.append({
529
+ "role": "assistant",
530
+ "content": "⚠️ Could not create Gemini session."
531
+ })
532
+ yield chat_history_msgs
533
+ return
534
+
535
+ user_text = (multimodal_input or {}).get("text", "") or ""
536
+ uploaded_files = (multimodal_input or {}).get("files", []) or []
537
+
538
+ if chat_history_msgs is None:
539
+ chat_history_msgs = []
540
+
541
+ # Process uploaded files
542
+ file_parts = []
543
+ if uploaded_files:
544
+ log(f"📎 Processing {len(uploaded_files)} uploaded file(s)...")
545
+ for file_path in uploaded_files:
546
+ try:
547
+ file_part = process_uploaded_file(client, file_path)
548
+ file_parts.append(file_part)
549
+ except Exception as e:
550
+ log(f"❌ Error processing file {file_path}: {e}")
551
+ traceback.print_exc()
552
+
553
+ chat_history_msgs.append({"role": "user", "content": user_text})
554
+ yield chat_history_msgs
555
+
556
+ assistant_base_index = len(chat_history_msgs)
557
+
558
+ # Setup thinking display if enabled
559
+ if show_thoughts:
560
+ thought_index = assistant_base_index
561
+ chat_history_msgs.append({"role": "assistant", "content": "<em>💭 Thinking...</em>"})
562
+ answer_index = thought_index + 1
563
+ chat_history_msgs.append({"role": "assistant", "content": "🤔 Processing..."})
564
+ else:
565
+ thought_index = None
566
+ answer_index = assistant_base_index
567
+ chat_history_msgs.append({"role": "assistant", "content": "🤔 Processing..."})
568
+
569
+ yield chat_history_msgs
570
+
571
+ try:
572
+ # Initialize variables at function scope
573
+ search_citations = None
574
+
575
+ # AGENT ORCHESTRATION APPROACH
576
+ if enabled_extensions:
577
+ log("🎭 Using multi-agent orchestration with multi-turn chat")
578
+ orchestrator = AgentOrchestrator(client, chat, EXTENSION_MANAGER, enabled_extensions)
579
+
580
+ budget = reasoning_budget(reasoning_level)
581
+ thoughts_accumulated = ""
582
+
583
+ # Step 1: Determine if search is needed
584
+ needs_search = determine_needs_search(chat, user_text)
585
+ log(f"📊 Search needed: {needs_search}")
586
+
587
+ # Step 2: Call search agent if needed
588
+ search_results = ""
589
+ if needs_search:
590
+ chat_history_msgs[answer_index]["content"] = "🔍 Searching the web..."
591
+ yield chat_history_msgs
592
+
593
+ search_results, search_citations = orchestrator.call_search_agent(user_text, file_parts)
594
+ log(f"📋 After search: search_citations = {search_citations[:100] if search_citations else 'None'}")
595
+
596
+ if search_results:
597
+ chat_history_msgs[answer_index]["content"] = "✅ Found information online\n\n🛠️ Now processing with tools..."
598
+ yield chat_history_msgs
599
+
600
+ # Step 3: Call tool agent (with files)
601
+ function_calls, tool_response, tool_thoughts = orchestrator.call_tool_agent(
602
+ user_text, search_results, budget, file_parts
603
+ )
604
+
605
+ # Show thoughts if available
606
+ if tool_thoughts and show_thoughts:
607
+ thoughts_accumulated += tool_thoughts
608
+ chat_history_msgs[thought_index]["content"] = (
609
+ f"<details open>"
610
+ f"<summary><strong>💭 GemiWine's Thinking</strong></summary>"
611
+ f"<div style='white-space:pre-wrap;background:inherit;color:inherit;"
612
+ f"padding:8px;border-radius:8px;border:1px solid var(--border-color);'>"
613
+ f"{thoughts_accumulated.strip()}</div>"
614
+ f"</details>"
615
+ )
616
+ yield chat_history_msgs
617
+
618
+ # Step 4: Execute function calls if any
619
+ tool_results = []
620
+ if function_calls:
621
+ chat_history_msgs[answer_index]["content"] = "⚙️ Executing tools..."
622
+ yield chat_history_msgs
623
+
624
+ user_id = api_key
625
+ results = EXTENSION_MANAGER.handle_function_calls(
626
+ user_id, enabled_extensions, function_calls
627
+ )
628
+
629
+ for fc, result in zip(function_calls, results):
630
+ tool_results.append((fc.name, result))
631
+ log(f"✅ {fc.name}: {result}")
632
+
633
+ # Step 5: Synthesize final response
634
+ if search_results or tool_results or tool_response:
635
+ chat_history_msgs[answer_index]["content"] = "✨ Synthesizing answer..."
636
+ yield chat_history_msgs
637
+ final_answer = orchestrator.synthesize_response(user_text, search_results, tool_results, search_citations, file_parts)
638
+ else:
639
+ final_answer = tool_response or "I couldn't process that request."
640
+
641
+ # Build the final content with citations if available
642
+ final_content = (
643
+ f"<div><strong>🍇 Final Answer</strong>"
644
+ f"<div style='white-space:pre-wrap;background:inherit;color:inherit;"
645
+ f"padding:8px;border-radius:8px;border:1px solid var(--border-color);'>"
646
+ f"{final_answer.strip()}</div></div>"
647
+ )
648
+
649
+ # Append citations if they exist
650
+ if search_citations:
651
+ final_content += "\n\n" + search_citations
652
+ log(f"✅ Appended citations to final answer")
653
+
654
+ chat_history_msgs[answer_index]["content"] = final_content
655
+ yield chat_history_msgs
656
+
657
+ else:
658
+ # No extensions - simple streaming with search
659
+ log("📺 Using simple streaming mode")
660
+
661
+ # Build parts for message with files
662
+ parts = []
663
+ if file_parts:
664
+ parts.extend(file_parts)
665
+ parts.append(user_text)
666
+
667
+ budget = reasoning_budget(reasoning_level)
668
+ grounding_tool = types.Tool(google_search=types.GoogleSearch())
669
+ config = types.GenerateContentConfig(
670
+ system_instruction=BASE_SYSTEM_PROMPT,
671
+ tools=[grounding_tool],
672
+ temperature=0.7,
673
+ top_p=0.9,
674
+ max_output_tokens=8192,
675
+ thinking_config=types.ThinkingConfig(
676
+ include_thoughts=True,
677
+ thinking_budget=budget,
678
+ )
679
+ )
680
+
681
+ stream = chat.send_message_stream(parts, config=config)
682
+
683
+ answer = ""
684
+ thoughts = ""
685
+ last_chunk = None
686
+
687
+ # Add thinking placeholder if needed
688
+ if show_thoughts:
689
+ thought_index = answer_index
690
+ chat_history_msgs[answer_index]["content"] = "<em>💭 Thinking...</em>"
691
+ answer_index = len(chat_history_msgs)
692
+ chat_history_msgs.append({"role": "assistant", "content": ""})
693
+ yield chat_history_msgs
694
+
695
+ for chunk in stream:
696
+ last_chunk = chunk
697
+ if not getattr(chunk, "candidates", None):
698
+ continue
699
+ candidate = chunk.candidates[0]
700
+
701
+ if getattr(candidate, "content", None):
702
+ for part in candidate.content.parts:
703
+ if not getattr(part, "text", None):
704
+ continue
705
+
706
+ if getattr(part, "thought", False):
707
+ thoughts += part.text
708
+ if show_thoughts:
709
+ chat_history_msgs[thought_index]["content"] = (
710
+ f"<details open>"
711
+ f"<summary><strong>💭 GemiWine's Thinking</strong></summary>"
712
+ f"<div style='white-space:pre-wrap;background:inherit;color:inherit;"
713
+ f"padding:8px;border-radius:8px;border:1px solid var(--border-color);'>"
714
+ f"{thoughts.strip()}</div>"
715
+ f"</details>"
716
+ )
717
+ yield chat_history_msgs
718
+ else:
719
+ answer += part.text
720
+ chat_history_msgs[answer_index]["content"] = (
721
+ f"<div><strong>🍇 Final Answer</strong>"
722
+ f"<div style='white-space:pre-wrap;background:inherit;color:inherit;"
723
+ f"padding:8px;border-radius:8px;border:1px solid var(--border-color);'>"
724
+ f"{answer.strip()}</div></div>"
725
+ )
726
+ yield chat_history_msgs
727
+
728
+ # Add citations
729
+ if last_chunk:
730
+ citations = insert_citations_from_grounding(last_chunk.candidates)
731
+ if citations:
732
+ chat_history_msgs[answer_index]["content"] += "\n\n" + citations
733
+ yield chat_history_msgs
734
+
735
+ log("✅ Response complete.")
736
+ return
737
+
738
+ except Exception as e:
739
+ log(f"❌ Error: {e}")
740
+ traceback.print_exc()
741
+ chat_history_msgs[answer_index]["content"] = f"⚠️ Error: {e}"
742
+ yield chat_history_msgs
743
+ return
744
+
745
+
746
+ def build_extension_ui():
747
+ """Build the extension toggle UI"""
748
+ extensions = EXTENSION_MANAGER.get_all_extensions()
749
+
750
+ if not extensions:
751
+ return gr.Markdown("No extensions available"), []
752
+
753
+ checkboxes = []
754
+ with gr.Accordion("🔌 Agent Extensions", open=True):
755
+ gr.Markdown("Enable extensions to give the agent additional capabilities:")
756
+ gr.Markdown("✨ **Agentic Mode:** When extensions are enabled, the agent uses multi-step reasoning with search + tools")
757
+ for ext in extensions:
758
+ cb = gr.Checkbox(
759
+ label=f"{ext.icon} {ext.display_name}",
760
+ info=ext.description,
761
+ value=False
762
+ )
763
+ checkboxes.append((ext.name, cb))
764
+
765
+ return checkboxes
766
+
767
+
768
+ with gr.Blocks(
769
+ theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"),
770
+ title="GemiWine",
771
+ fill_width=True
772
+ ) as demo:
773
+ gr.HTML("""
774
+ <style>
775
+ .gradio-container { padding-top: 1.5rem; padding-bottom: 1.5rem; }
776
+ .chat-panel {
777
+ background: rgba(255, 255, 255, 0.05);
778
+ border-radius: 16px !important;
779
+ padding: 1.5rem;
780
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.05);
781
+ border: 1px solid rgba(255, 255, 255, 0.1);
782
+ }
783
+ .message-input {
784
+ border-radius: 12px !important;
785
+ border: 1px solid rgba(0,0,0,0.1);
786
+ }
787
+ </style>
788
+ """)
789
+
790
+ with gr.Row():
791
+ with gr.Column(scale=1, min_width=320):
792
+ gr.Markdown("## ⚙️ Settings & Controls")
793
+ api_key = gr.Textbox(
794
+ label="🔑 Gemini API Key",
795
+ placeholder="Paste your Gemini API key here...",
796
+ type="password",
797
+ )
798
+ reasoning_level = gr.Radio(
799
+ ["None", "Concise", "Strong", "Dynamic"],
800
+ label="🧠 Reasoning Level",
801
+ value="Dynamic",
802
+ info="Controls the model's thinking depth.",
803
+ )
804
+ show_thoughts = gr.Checkbox(
805
+ label="💭 Show Thinking",
806
+ value=True,
807
+ info="Display reasoning process before answers.",
808
+ )
809
+
810
+ # Build extension checkboxes
811
+ extension_checkboxes = build_extension_ui()
812
+
813
+ with gr.Column(scale=4):
814
+ with gr.Group(elem_classes="chat-panel"):
815
+ chatbot = gr.Chatbot(
816
+ label="🍇 Chat with GemiWine",
817
+ height=650,
818
+ show_copy_button=True,
819
+ type="messages",
820
+ avatar_images=(None, "https://i.imgur.com/Q2EMk2N.png"),
821
+ )
822
+ multimodal_msg = gr.MultimodalTextbox(
823
+ file_types=[
824
+ "image", "video", "audio", # Gradio presets
825
+ ".pdf", ".txt", ".md", ".html", ".xml", # Documents
826
+ ".doc", ".docx", ".csv", ".json" # Additional formats
827
+ ],
828
+ placeholder="Ask anything, upload images/PDFs/videos, or let extensions help you...",
829
+ label="Your Message",
830
+ elem_classes="message-input",
831
+ autofocus=True
832
+ )
833
+
834
+ # Hidden state to track enabled extensions
835
+ enabled_extensions_state = gr.State([])
836
+
837
+ def clear_box():
838
+ return {"text": "", "files": []}
839
+
840
+ def handle_chat(api_key_input, chat_history_msgs, multimodal_dict, thinking_flag, reasoning_lvl, *extension_states):
841
+ # Convert extension checkbox states to list of enabled extension names
842
+ enabled = []
843
+ for (ext_name, _), is_enabled in zip(extension_checkboxes, extension_states):
844
+ if is_enabled:
845
+ enabled.append(ext_name)
846
+
847
+ log(f"Enabled extensions: {enabled}")
848
+
849
+ yield from chat_with_gemini(
850
+ api_key_input, chat_history_msgs, multimodal_dict,
851
+ thinking_flag, reasoning_lvl, enabled
852
+ )
853
+
854
+ def check_timers(api_key_input, chat_history, enabled_exts):
855
+ """Background function to check for completed timers"""
856
+ if not api_key_input or 'timer' not in enabled_exts:
857
+ return chat_history
858
+
859
+ timer_ext = EXTENSION_MANAGER.get_extension('timer')
860
+ if not timer_ext:
861
+ return chat_history
862
+
863
+ user_id = api_key_input
864
+ timer_ext.initialize_state(user_id)
865
+ state = timer_ext.get_state(user_id)
866
+
867
+ import datetime as dt
868
+ now = dt.datetime.now()
869
+ newly_completed = []
870
+
871
+ for timer in state.get("timers", []):
872
+ if timer.get("active") and not timer.get("notified", False):
873
+ end_time = dt.datetime.fromisoformat(timer["end_time"])
874
+ if now >= end_time:
875
+ newly_completed.append(timer)
876
+ timer["notified"] = True
877
+
878
+ if newly_completed:
879
+ timer_ext.update_state(user_id, state)
880
+
881
+ # Add notification to chat
882
+ if chat_history is None:
883
+ chat_history = []
884
+
885
+ for timer in newly_completed:
886
+ notification = f"⏰ **Timer Complete!** Your timer '{timer['name']}' has finished!"
887
+ chat_history.append({"role": "assistant", "content": notification})
888
+ log(f"⏰ Timer notification sent: {timer['name']}")
889
+
890
+ return chat_history
891
+
892
+ # Get just the checkbox components for inputs
893
+ checkbox_components = [cb for _, cb in extension_checkboxes]
894
+
895
+ # Main chat submission
896
+ multimodal_msg.submit(
897
+ fn=handle_chat,
898
+ inputs=[api_key, chatbot, multimodal_msg, show_thoughts, reasoning_level] + checkbox_components,
899
+ outputs=[chatbot],
900
+ queue=True,
901
+ ).then(fn=clear_box, outputs=[multimodal_msg])
902
+
903
+ # Background timer check - runs every 10 seconds
904
+ timer_check = gr.Timer(value=10, active=True)
905
+
906
+ def update_enabled_state(*extension_states):
907
+ enabled = []
908
+ for (ext_name, _), is_enabled in zip(extension_checkboxes, extension_states):
909
+ if is_enabled:
910
+ enabled.append(ext_name)
911
+ return enabled
912
+
913
+ # Update enabled extensions state whenever checkboxes change
914
+ for _, cb in extension_checkboxes:
915
+ cb.change(
916
+ fn=update_enabled_state,
917
+ inputs=checkbox_components,
918
+ outputs=[enabled_extensions_state]
919
+ )
920
+
921
+ # Timer polling
922
+ timer_check.tick(
923
+ fn=check_timers,
924
+ inputs=[api_key, chatbot, enabled_extensions_state],
925
+ outputs=[chatbot]
926
+ )
927
+
928
+
929
+ if __name__ == "__main__":
930
+ log(f"===== GemiWine with Extensions started at {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} =====")
931
+ demo.launch()