shukdevdattaEX commited on
Commit
bf3b321
Β·
verified Β·
1 Parent(s): d513747

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +405 -113
app.py CHANGED
@@ -5,59 +5,102 @@ import asyncio
5
  import json
6
  import io
7
  import os
8
- from typing import Optional, Tuple
 
 
 
 
 
 
9
 
10
- class DataAnalyzer:
 
 
 
 
11
  def __init__(self):
12
  self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str:
15
- """Send data to Chutes API for analysis"""
16
  headers = {
17
- "Authorization": f"Bearer {api_token}",
18
  "Content-Type": "application/json"
19
  }
20
 
21
- # Create the prompt based on whether it's initial analysis or follow-up question
22
  if user_question:
23
- prompt = f"""Based on this dataset summary:
 
24
  {data_summary}
25
 
26
- User question: {user_question}
27
 
28
- Please provide a detailed answer based on the data."""
29
  else:
30
- prompt = f"""Analyze the following dataset and provide comprehensive insights:
31
 
32
  {data_summary}
33
 
34
- Please provide:
35
- 1. Key statistical insights
36
- 2. Notable patterns or trends
37
- 3. Data quality observations
38
- 4. Business recommendations
39
- 5. Potential areas for further analysis
 
40
 
41
- Keep the analysis clear, actionable, and data-driven."""
42
 
43
  body = {
44
  "model": "openai/gpt-oss-20b",
45
  "messages": [
 
 
 
 
46
  {
47
  "role": "user",
48
  "content": prompt
49
  }
50
  ],
51
  "stream": True,
52
- "max_tokens": 2048,
53
- "temperature": 0.3 # Lower temperature for more consistent analysis
 
54
  }
55
 
56
  try:
57
- async with aiohttp.ClientSession() as session:
 
58
  async with session.post(self.api_base_url, headers=headers, json=body) as response:
59
- if response.status != 200:
60
- return f"Error: API request failed with status {response.status}"
 
 
 
 
61
 
62
  full_response = ""
63
  async for line in response.content:
@@ -76,178 +119,427 @@ Keep the analysis clear, actionable, and data-driven."""
76
  except json.JSONDecodeError:
77
  continue
78
 
79
- return full_response if full_response else "No response received from the model."
80
 
 
 
81
  except Exception as e:
82
- return f"Error connecting to Chutes API: {str(e)}"
 
83
 
84
- def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]:
85
- """Process uploaded CSV or Excel file"""
86
  try:
87
  file_extension = os.path.splitext(file_path)[1].lower()
88
 
 
89
  if file_extension == '.csv':
90
- df = pd.read_csv(file_path)
 
 
 
 
 
 
 
 
91
  elif file_extension in ['.xlsx', '.xls']:
92
  df = pd.read_excel(file_path)
93
  else:
94
  raise ValueError("Unsupported file format. Please upload CSV or Excel files.")
95
 
96
- # Generate comprehensive data summary
97
- summary = self.generate_data_summary(df)
98
- return df, summary
 
 
 
 
 
99
 
100
  except Exception as e:
101
  raise Exception(f"Error processing file: {str(e)}")
102
 
103
- def generate_data_summary(self, df: pd.DataFrame) -> str:
104
- """Generate a comprehensive summary of the dataset"""
105
  summary = []
106
 
107
- # Basic info
108
- summary.append(f"Dataset Overview:")
109
- summary.append(f"- Shape: {df.shape[0]} rows Γ— {df.shape[1]} columns")
110
- summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}")
111
 
112
- # Column information
113
- summary.append(f"\nColumn Information:")
114
- for i, (col, dtype) in enumerate(df.dtypes.items()):
115
- null_count = df[col].isnull().sum()
116
- null_pct = (null_count / len(df)) * 100
117
- summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)")
118
 
119
- # Numerical columns statistics
120
- numeric_cols = df.select_dtypes(include=['number']).columns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  if len(numeric_cols) > 0:
122
- summary.append(f"\nNumerical Columns Summary:")
123
- for col in numeric_cols:
124
  stats = df[col].describe()
125
- summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]")
 
126
 
127
- # Categorical columns
128
  categorical_cols = df.select_dtypes(include=['object', 'category']).columns
129
  if len(categorical_cols) > 0:
130
- summary.append(f"\nCategorical Columns Summary:")
131
- for col in categorical_cols:
132
  unique_count = df[col].nunique()
 
133
  most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A"
134
- summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'")
135
 
136
- # Sample data
137
- summary.append(f"\nFirst 5 rows preview:")
138
- summary.append(df.head().to_string())
 
 
 
 
139
 
140
  return "\n".join(summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  # Initialize the analyzer
143
- analyzer = DataAnalyzer()
144
 
145
- async def analyze_data(file, api_key, user_question=""):
146
- """Main function to analyze uploaded data"""
147
  if not file:
148
- return "Please upload a CSV or Excel file.", "", ""
 
 
 
 
 
 
 
 
149
 
150
- if not api_key:
151
- return "Please enter your Chutes API key.", "", ""
152
 
153
  try:
154
  # Process the uploaded file
155
- df, data_summary = analyzer.process_file(file.name)
 
 
 
 
 
156
 
157
  # Get AI analysis
158
  ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
 
159
 
160
  # Format the complete response
161
- response = f"""## πŸ“Š Data Analysis Complete!
162
-
163
- ### πŸ“ˆ Dataset Overview:
164
- {data_summary}
165
 
166
- ### πŸ€– AI Insights & Recommendations:
167
  {ai_analysis}
 
 
 
168
  """
169
 
170
- return response, data_summary, df.head(10).to_html()
 
171
 
172
  except Exception as e:
173
- return f"Error: {str(e)}", "", ""
 
174
 
175
- def sync_analyze_data(file, api_key, user_question=""):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  """Synchronous wrapper for the async analyze function"""
177
- return asyncio.run(analyze_data(file, api_key, user_question))
178
 
179
- # Create the Gradio interface
180
- with gr.Blocks(title="πŸ“Š Smart Data Analyzer", theme=gr.themes.Ocean()) as app:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  gr.Markdown("""
182
- # πŸ“Š Smart Data Analyzer
183
- ### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes!
 
 
184
  """)
185
 
 
186
  with gr.Row():
187
  with gr.Column(scale=1):
188
- # File upload
189
- file_input = gr.File(
190
- label="πŸ“ Upload CSV or Excel File",
191
- file_types=[".csv", ".xlsx", ".xls"],
192
- file_count="single"
193
- )
194
 
195
- # API key input
196
  api_key_input = gr.Textbox(
197
  label="πŸ”‘ Chutes API Key",
198
- placeholder="Enter your Chutes API token here...",
199
  type="password",
200
- lines=1
 
201
  )
202
 
203
- # Optional question input
204
- question_input = gr.Textbox(
205
- label="❓ Ask a Specific Question (Optional)",
206
- placeholder="e.g., What are the sales trends? Which region performs best?",
207
- lines=2
208
  )
209
 
210
- # Analyze button
211
- analyze_btn = gr.Button("πŸš€ Analyze Data", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  with gr.Column(scale=2):
214
- # Results display
 
 
215
  analysis_output = gr.Markdown(
216
- label="πŸ“‹ Analysis Results",
217
- value="Upload a file and click 'Analyze Data' to see insights..."
218
  )
219
 
220
- # Additional outputs (hidden by default)
221
- with gr.Accordion("πŸ“Š Data Preview", open=False):
222
- data_preview = gr.HTML(label="First 10 Rows")
223
-
224
- with gr.Accordion("πŸ” Raw Data Summary", open=False):
225
- raw_summary = gr.Textbox(label="Dataset Summary", lines=10)
226
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
228
  analyze_btn.click(
 
 
 
 
 
 
 
 
229
  fn=sync_analyze_data,
230
  inputs=[file_input, api_key_input, question_input],
231
- outputs=[analysis_output, raw_summary, data_preview]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  )
233
 
234
- # Example section
 
 
 
 
 
 
 
235
  gr.Markdown("""
236
- ### πŸ’‘ Tips for Best Results:
237
- - **File Size**: Keep files under 10MB for fastest processing
238
- - **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai)
239
- - **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations
240
- - **Formats**: Supports CSV, XLSX, and XLS files
241
-
242
- ### 🎯 Example Questions to Ask:
243
- - "What are the key trends in this sales data?"
244
- - "Which products are underperforming?"
245
- - "Are there any seasonal patterns?"
246
- - "What recommendations do you have based on this data?"
 
 
 
247
  """)
248
 
249
- # Launch the application
250
  if __name__ == "__main__":
 
251
  app.launch(
252
  share=True
253
  )
 
5
  import json
6
  import io
7
  import os
8
+ import numpy as np
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
+ from typing import Optional, Tuple, Dict, Any
12
+ import logging
13
+ from datetime import datetime
14
+ import re
15
 
16
+ # Configure logging
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
19
+
20
+ class EnhancedDataAnalyzer:
21
  def __init__(self):
22
  self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
23
+ self.max_file_size = 50 * 1024 * 1024 # 50MB limit
24
+ self.conversation_history = []
25
+
26
+ def validate_api_key(self, api_key: str) -> bool:
27
+ """Validate API key format"""
28
+ return bool(api_key and len(api_key.strip()) > 10)
29
+
30
+ def validate_file(self, file) -> Tuple[bool, str]:
31
+ """Validate uploaded file"""
32
+ if not file:
33
+ return False, "No file uploaded"
34
+
35
+ file_size = os.path.getsize(file.name)
36
+ if file_size > self.max_file_size:
37
+ return False, f"File too large. Maximum size: {self.max_file_size // (1024*1024)}MB"
38
+
39
+ file_extension = os.path.splitext(file.name)[1].lower()
40
+ if file_extension not in ['.csv', '.xlsx', '.xls']:
41
+ return False, "Unsupported format. Please upload CSV or Excel files only."
42
+
43
+ return True, "File valid"
44
 
45
  async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str:
46
+ """Enhanced API call with better error handling and streaming"""
47
  headers = {
48
+ "Authorization": f"Bearer {api_token.strip()}",
49
  "Content-Type": "application/json"
50
  }
51
 
52
+ # Create context-aware prompt
53
  if user_question:
54
+ prompt = f"""You are a data analyst expert. Based on this dataset:
55
+
56
  {data_summary}
57
 
58
+ User's specific question: {user_question}
59
 
60
+ Provide a detailed, actionable answer with specific data points and recommendations."""
61
  else:
62
+ prompt = f"""You are a senior data analyst. Analyze this dataset thoroughly:
63
 
64
  {data_summary}
65
 
66
+ Provide a comprehensive analysis including:
67
+
68
+ 1. **Key Statistical Insights**: Most important numbers and what they mean
69
+ 2. **Patterns & Trends**: Notable patterns, correlations, or anomalies
70
+ 3. **Data Quality Assessment**: Missing values, outliers, data consistency
71
+ 4. **Business Intelligence**: Actionable insights and opportunities
72
+ 5. **Recommendations**: Specific next steps or areas to investigate
73
 
74
+ Format your response with clear sections and bullet points for readability."""
75
 
76
  body = {
77
  "model": "openai/gpt-oss-20b",
78
  "messages": [
79
+ {
80
+ "role": "system",
81
+ "content": "You are an expert data analyst who provides clear, actionable insights from datasets. Always structure your responses with clear headings and specific data points."
82
+ },
83
  {
84
  "role": "user",
85
  "content": prompt
86
  }
87
  ],
88
  "stream": True,
89
+ "max_tokens": 3000,
90
+ "temperature": 0.2, # Very low for consistent analysis
91
+ "top_p": 0.9
92
  }
93
 
94
  try:
95
+ timeout = aiohttp.ClientTimeout(total=30) # 30 second timeout
96
+ async with aiohttp.ClientSession(timeout=timeout) as session:
97
  async with session.post(self.api_base_url, headers=headers, json=body) as response:
98
+ if response.status == 401:
99
+ return "❌ **Authentication Error**: Invalid API key. Please check your Chutes API token."
100
+ elif response.status == 429:
101
+ return "⏳ **Rate Limit**: Too many requests. Please wait a moment and try again."
102
+ elif response.status != 200:
103
+ return f"❌ **API Error**: Request failed with status {response.status}"
104
 
105
  full_response = ""
106
  async for line in response.content:
 
119
  except json.JSONDecodeError:
120
  continue
121
 
122
+ return full_response if full_response else "⚠️ No response received from the model."
123
 
124
+ except asyncio.TimeoutError:
125
+ return "⏰ **Timeout Error**: Request took too long. Please try again."
126
  except Exception as e:
127
+ logger.error(f"API Error: {str(e)}")
128
+ return f"❌ **Connection Error**: {str(e)}"
129
 
130
+ def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str, dict]:
131
+ """Enhanced file processing with better error handling"""
132
  try:
133
  file_extension = os.path.splitext(file_path)[1].lower()
134
 
135
+ # Read file with better error handling
136
  if file_extension == '.csv':
137
+ # Try different encodings
138
+ for encoding in ['utf-8', 'latin-1', 'cp1252']:
139
+ try:
140
+ df = pd.read_csv(file_path, encoding=encoding)
141
+ break
142
+ except UnicodeDecodeError:
143
+ continue
144
+ else:
145
+ raise ValueError("Could not decode CSV file. Please check file encoding.")
146
  elif file_extension in ['.xlsx', '.xls']:
147
  df = pd.read_excel(file_path)
148
  else:
149
  raise ValueError("Unsupported file format. Please upload CSV or Excel files.")
150
 
151
+ # Clean column names
152
+ df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True)
153
+
154
+ # Generate enhanced summaries
155
+ data_summary = self.generate_enhanced_summary(df)
156
+ charts_data = self.generate_chart_data(df)
157
+
158
+ return df, data_summary, charts_data
159
 
160
  except Exception as e:
161
  raise Exception(f"Error processing file: {str(e)}")
162
 
163
+ def generate_enhanced_summary(self, df: pd.DataFrame) -> str:
164
+ """Generate comprehensive data summary with statistical insights"""
165
  summary = []
166
 
167
+ # Header with timestamp
168
+ summary.append(f"# πŸ“Š Dataset Analysis Report")
169
+ summary.append(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
170
+ summary.append(f"**File Size**: {df.shape[0]:,} rows Γ— {df.shape[1]} columns")
171
 
172
+ # Memory usage
173
+ memory_usage = df.memory_usage(deep=True).sum() / 1024**2
174
+ summary.append(f"**Memory Usage**: {memory_usage:.2f} MB\n")
 
 
 
175
 
176
+ # Data types breakdown
177
+ type_counts = df.dtypes.value_counts()
178
+ summary.append("## πŸ“‹ Column Types:")
179
+ for dtype, count in type_counts.items():
180
+ summary.append(f"- **{dtype}**: {count} columns")
181
+
182
+ # Missing data analysis
183
+ missing_data = df.isnull().sum()
184
+ missing_pct = (missing_data / len(df) * 100).round(2)
185
+ missing_summary = missing_data[missing_data > 0].sort_values(ascending=False)
186
+
187
+ if len(missing_summary) > 0:
188
+ summary.append("\n## ⚠️ Missing Data:")
189
+ for col, count in missing_summary.head(10).items():
190
+ pct = missing_pct[col]
191
+ summary.append(f"- **{col}**: {count:,} missing ({pct}%)")
192
+ else:
193
+ summary.append("\n## βœ… Data Quality: No missing values detected!")
194
+
195
+ # Numerical analysis
196
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
197
  if len(numeric_cols) > 0:
198
+ summary.append(f"\n## πŸ“ˆ Numerical Columns Analysis ({len(numeric_cols)} columns):")
199
+ for col in numeric_cols[:10]: # Limit to first 10
200
  stats = df[col].describe()
201
+ outliers = len(df[df[col] > (stats['75%'] + 1.5 * (stats['75%'] - stats['25%']))])
202
+ summary.append(f"- **{col}**: ΞΌ={stats['mean']:.2f}, Οƒ={stats['std']:.2f}, outliers={outliers}")
203
 
204
+ # Categorical analysis
205
  categorical_cols = df.select_dtypes(include=['object', 'category']).columns
206
  if len(categorical_cols) > 0:
207
+ summary.append(f"\n## πŸ“ Categorical Columns Analysis ({len(categorical_cols)} columns):")
208
+ for col in categorical_cols[:10]: # Limit to first 10
209
  unique_count = df[col].nunique()
210
+ cardinality = "High" if unique_count > len(df) * 0.9 else "Medium" if unique_count > 10 else "Low"
211
  most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A"
212
+ summary.append(f"- **{col}**: {unique_count:,} unique values ({cardinality} cardinality), Top: '{most_common}'")
213
 
214
+ # Sample data with better formatting
215
+ summary.append("\n## πŸ” Data Sample (First 3 Rows):")
216
+ sample_df = df.head(3)
217
+ for idx, row in sample_df.iterrows():
218
+ summary.append(f"\n**Row {idx + 1}:**")
219
+ for col, val in row.items():
220
+ summary.append(f" - {col}: {val}")
221
 
222
  return "\n".join(summary)
223
+
224
+ def generate_chart_data(self, df: pd.DataFrame) -> dict:
225
+ """Generate data for automatic visualizations"""
226
+ charts = {}
227
+
228
+ # Numerical distribution charts
229
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
230
+ if len(numeric_cols) > 0:
231
+ for col in numeric_cols[:3]: # First 3 numeric columns
232
+ fig = px.histogram(df, x=col, title=f"Distribution of {col}")
233
+ charts[f"hist_{col}"] = fig
234
+
235
+ # Categorical charts
236
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns
237
+ if len(categorical_cols) > 0:
238
+ for col in categorical_cols[:2]: # First 2 categorical columns
239
+ if df[col].nunique() <= 20: # Only if reasonable number of categories
240
+ value_counts = df[col].value_counts().head(10)
241
+ fig = px.bar(x=value_counts.index, y=value_counts.values,
242
+ title=f"Top Values in {col}")
243
+ charts[f"bar_{col}"] = fig
244
+
245
+ return charts
246
 
247
  # Initialize the analyzer
248
+ analyzer = EnhancedDataAnalyzer()
249
 
250
+ async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
251
+ """Enhanced analysis function with progress tracking"""
252
  if not file:
253
+ return "❌ Please upload a CSV or Excel file.", "", "", None
254
+
255
+ if not analyzer.validate_api_key(api_key):
256
+ return "❌ Please enter a valid Chutes API key (minimum 10 characters).", "", "", None
257
+
258
+ # Validate file
259
+ is_valid, validation_msg = analyzer.validate_file(file)
260
+ if not is_valid:
261
+ return f"❌ {validation_msg}", "", "", None
262
 
263
+ progress(0.1, desc="πŸ“ Reading file...")
 
264
 
265
  try:
266
  # Process the uploaded file
267
+ df, data_summary, charts_data = analyzer.process_file(file.name)
268
+ progress(0.3, desc="πŸ“Š Processing data...")
269
+
270
+ # Generate visualizations
271
+ chart_html = create_basic_charts(df)
272
+ progress(0.5, desc="πŸ€– Generating AI insights...")
273
 
274
  # Get AI analysis
275
  ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
276
+ progress(0.9, desc="✨ Finalizing results...")
277
 
278
  # Format the complete response
279
+ response = f"""# 🎯 Analysis Complete!
 
 
 
280
 
 
281
  {ai_analysis}
282
+
283
+ ---
284
+ *Analysis powered by OpenAI gpt-oss-20b via Chutes β€’ Generated at {datetime.now().strftime('%H:%M:%S')}*
285
  """
286
 
287
+ progress(1.0, desc="βœ… Done!")
288
+ return response, data_summary, df.head(15).to_html(classes="table table-striped"), chart_html
289
 
290
  except Exception as e:
291
+ logger.error(f"Analysis error: {str(e)}")
292
+ return f"❌ **Error**: {str(e)}", "", "", None
293
 
294
+ def create_basic_charts(df: pd.DataFrame) -> str:
295
+ """Create basic visualizations for the dataset"""
296
+ charts_html = []
297
+
298
+ try:
299
+ # Chart 1: Data completeness heatmap
300
+ missing_data = df.isnull().sum()
301
+ if missing_data.sum() > 0:
302
+ fig = px.bar(x=missing_data.index, y=missing_data.values,
303
+ title="Missing Data by Column",
304
+ labels={'x': 'Columns', 'y': 'Missing Count'})
305
+ fig.update_layout(height=400, showlegend=False)
306
+ charts_html.append(fig.to_html(include_plotlyjs='cdn'))
307
+
308
+ # Chart 2: Numerical columns correlation (if multiple numeric columns)
309
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
310
+ if len(numeric_cols) > 1:
311
+ corr_matrix = df[numeric_cols].corr()
312
+ fig = px.imshow(corr_matrix,
313
+ title="Correlation Matrix",
314
+ color_continuous_scale='RdBu_r',
315
+ aspect="auto")
316
+ fig.update_layout(height=500)
317
+ charts_html.append(fig.to_html(include_plotlyjs='cdn'))
318
+
319
+ # Chart 3: Distribution of first numeric column
320
+ if len(numeric_cols) > 0:
321
+ first_numeric = numeric_cols[0]
322
+ fig = px.histogram(df, x=first_numeric,
323
+ title=f"Distribution: {first_numeric}",
324
+ marginal="box")
325
+ fig.update_layout(height=400)
326
+ charts_html.append(fig.to_html(include_plotlyjs='cdn'))
327
+
328
+ return "\n".join(charts_html) if charts_html else "<p>No charts generated for this dataset.</p>"
329
+
330
+ except Exception as e:
331
+ logger.error(f"Chart generation error: {str(e)}")
332
+ return f"<p>Chart generation failed: {str(e)}</p>"
333
+
334
+ def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
335
  """Synchronous wrapper for the async analyze function"""
336
+ return asyncio.run(analyze_data(file, api_key, user_question, progress))
337
 
338
+ def clear_all():
339
+ """Clear all inputs and outputs"""
340
+ return None, "", "", "", "", "", None
341
+
342
+ def download_summary(analysis_text, data_summary):
343
+ """Generate downloadable summary report"""
344
+ if not analysis_text:
345
+ return None
346
+
347
+ report = f"""# Data Analysis Report
348
+ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
349
+
350
+ ## AI Analysis:
351
+ {analysis_text}
352
+
353
+ ## Raw Data Summary:
354
+ {data_summary}
355
+ """
356
+
357
+ # Save to temporary file
358
+ filename = f"data_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
359
+ with open(filename, 'w', encoding='utf-8') as f:
360
+ f.write(report)
361
+
362
+ return filename
363
+
364
+ # Create enhanced Gradio interface
365
+ with gr.Blocks(
366
+ title="πŸš€ Smart Data Analyzer Pro",
367
+ theme=gr.themes.Soft(),
368
+ css="""
369
+ .gradio-container {
370
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
371
+ }
372
+ .tab-nav {
373
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
374
+ }
375
+ .upload-area {
376
+ border: 2px dashed #667eea;
377
+ border-radius: 10px;
378
+ padding: 20px;
379
+ text-align: center;
380
+ background: #f8f9ff;
381
+ }
382
+ """
383
+ ) as app:
384
+
385
+ # Header
386
  gr.Markdown("""
387
+ # πŸš€ Smart Data Analyzer Pro
388
+ ### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b
389
+
390
+ Upload your data files and get instant professional insights, visualizations, and recommendations!
391
  """)
392
 
393
+ # Main interface
394
  with gr.Row():
395
  with gr.Column(scale=1):
396
+ # Configuration section
397
+ gr.Markdown("### βš™οΈ Configuration")
 
 
 
 
398
 
 
399
  api_key_input = gr.Textbox(
400
  label="πŸ”‘ Chutes API Key",
401
+ placeholder="sk-chutes-your-api-key-here...",
402
  type="password",
403
+ lines=1,
404
+ info="Get your free API key from chutes.ai"
405
  )
406
 
407
+ file_input = gr.File(
408
+ label="πŸ“ Upload Data File",
409
+ file_types=[".csv", ".xlsx", ".xls"],
410
+ file_count="single",
411
+ elem_classes=["upload-area"]
412
  )
413
 
414
+ with gr.Row():
415
+ analyze_btn = gr.Button("πŸš€ Analyze Data", variant="primary", size="lg")
416
+ clear_btn = gr.Button("πŸ—‘οΈ Clear All", variant="secondary")
417
+
418
+ # Quick stats display
419
+ with gr.Group():
420
+ gr.Markdown("### πŸ“Š Quick Stats")
421
+ file_stats = gr.Textbox(
422
+ label="File Information",
423
+ lines=3,
424
+ interactive=False,
425
+ placeholder="Upload a file to see statistics..."
426
+ )
427
 
428
  with gr.Column(scale=2):
429
+ # Results section
430
+ gr.Markdown("### 🎯 Analysis Results")
431
+
432
  analysis_output = gr.Markdown(
433
+ value="πŸ“‹ **Ready to analyze your data!**\n\nUpload a CSV or Excel file and click 'Analyze Data' to get started.",
434
+ show_label=False
435
  )
436
 
437
+ # Advanced features in tabs
438
+ with gr.Tabs():
439
+ with gr.Tab("πŸ’¬ Ask Questions"):
440
+ question_input = gr.Textbox(
441
+ label="❓ Ask Specific Questions About Your Data",
442
+ placeholder="Examples:\nβ€’ What are the top 5 customers by revenue?\nβ€’ Are there any seasonal trends?\nβ€’ Which products have the highest margins?\nβ€’ What anomalies do you see in this data?",
443
+ lines=3
444
+ )
445
+ ask_btn = gr.Button("πŸ” Get Answer", variant="primary")
446
+ question_output = gr.Markdown()
447
+
448
+ with gr.Tab("πŸ“Š Data Preview"):
449
+ data_preview = gr.HTML(
450
+ label="Dataset Preview",
451
+ value="<p>Upload a file to see data preview...</p>"
452
+ )
453
+
454
+ with gr.Tab("πŸ“ˆ Visualizations"):
455
+ charts_output = gr.HTML(
456
+ label="Auto-Generated Charts",
457
+ value="<p>Charts will appear here after analysis...</p>"
458
+ )
459
+
460
+ with gr.Tab("πŸ” Raw Summary"):
461
+ raw_summary = gr.Textbox(
462
+ label="Detailed Data Summary",
463
+ lines=15,
464
+ max_lines=20,
465
+ show_copy_button=True
466
+ )
467
+
468
+ with gr.Tab("πŸ’Ύ Export"):
469
+ gr.Markdown("### Download Your Analysis Report")
470
+ download_btn = gr.Button("πŸ“₯ Download Report (.md)", variant="secondary")
471
+ download_file = gr.File(label="Download Link", visible=False)
472
+
473
  # Event handlers
474
+ def update_file_stats(file):
475
+ if not file:
476
+ return "No file uploaded"
477
+
478
+ try:
479
+ file_size = os.path.getsize(file.name) / (1024 * 1024) # MB
480
+ file_name = os.path.basename(file.name)
481
+ return f"πŸ“„ **File**: {file_name}\nπŸ“ **Size**: {file_size:.2f} MB\n⏰ **Uploaded**: {datetime.now().strftime('%H:%M:%S')}"
482
+ except:
483
+ return "File information unavailable"
484
+
485
+ # Main analysis
486
  analyze_btn.click(
487
+ fn=sync_analyze_data,
488
+ inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)],
489
+ outputs=[analysis_output, raw_summary, data_preview, charts_output],
490
+ show_progress=True
491
+ )
492
+
493
+ # Follow-up questions
494
+ ask_btn.click(
495
  fn=sync_analyze_data,
496
  inputs=[file_input, api_key_input, question_input],
497
+ outputs=[question_output, gr.Textbox(visible=False), gr.HTML(visible=False), gr.HTML(visible=False)],
498
+ show_progress=True
499
+ )
500
+
501
+ # File stats update
502
+ file_input.change(
503
+ fn=update_file_stats,
504
+ inputs=[file_input],
505
+ outputs=[file_stats]
506
+ )
507
+
508
+ # Clear functionality
509
+ clear_btn.click(
510
+ fn=clear_all,
511
+ outputs=[file_input, api_key_input, question_input, analysis_output,
512
+ question_output, data_preview, charts_output]
513
  )
514
 
515
+ # Download functionality
516
+ download_btn.click(
517
+ fn=download_summary,
518
+ inputs=[analysis_output, raw_summary],
519
+ outputs=[download_file]
520
+ )
521
+
522
+ # Footer with usage tips
523
  gr.Markdown("""
524
+ ---
525
+ ### πŸ’‘ Pro Tips for Better Analysis:
526
+
527
+ **🎯 For Best Results:**
528
+ - Clean your data before upload (remove extra headers, format dates consistently)
529
+ - Use descriptive column names
530
+ - Ask specific questions like "What drives the highest profits?" instead of "Analyze this data"
531
+
532
+ **⚑ Speed Optimization:**
533
+ - Files under 10MB process fastest
534
+ - CSV files typically load faster than Excel
535
+ - Limit to essential columns for quicker analysis
536
+
537
+ **πŸ”§ Supported Formats:** CSV, XLSX, XLS | **πŸ“ Max Size:** 50MB | **πŸš€ Response Time:** ~3-5 seconds
538
  """)
539
 
540
+ # Launch configuration
541
  if __name__ == "__main__":
542
+ app.queue(max_size=10) # Handle multiple users
543
  app.launch(
544
  share=True
545
  )