| import gradio as gr | |
| import pandas as pd | |
| import aiohttp | |
| import asyncio | |
| import json | |
| import os | |
| import numpy as np | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from typing import Optional, Tuple, Dict, Any | |
| import logging | |
| from datetime import datetime | |
| import re | |
| from jinja2 import Template | |
| import markdown # Requires 'markdown' package: install via `pip install markdown` | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class EnhancedDataAnalyzer: | |
| def __init__(self): | |
| self.api_base_url = "https://llm.chutes.ai/v1/chat/completions" | |
| self.max_file_size = 50 * 1024 * 1024 # 50MB limit | |
| self.conversation_history = [] | |
| self.current_df = None | |
| self.current_charts = None | |
| def validate_api_key(self, api_key: str) -> bool: | |
| """Validate API key format""" | |
| return bool(api_key and len(api_key.strip()) > 10) | |
| def validate_file(self, file) -> Tuple[bool, str]: | |
| """Validate uploaded file""" | |
| if not file: | |
| return False, "No file uploaded" | |
| file_size = os.path.getsize(file.name) | |
| if file_size > self.max_file_size: | |
| return False, f"File too large. Maximum size: {self.max_file_size // (1024*1024)}MB" | |
| file_extension = os.path.splitext(file.name)[1].lower() | |
| if file_extension not in ['.csv', '.xlsx', '.xls']: | |
| return False, "Unsupported format. Please upload CSV or Excel files only." | |
| return True, "File valid" | |
| async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str: | |
| """Enhanced API call with better error handling and streaming""" | |
| headers = { | |
| "Authorization": f"Bearer {api_token.strip()}", | |
| "Content-Type": "application/json" | |
| } | |
| # Create context-aware prompt | |
| if user_question: | |
| prompt = f"""You are a data analyst expert. Based on this dataset: | |
| {data_summary} | |
| User's specific question: {user_question} | |
| Provide a detailed, actionable answer with specific data points and recommendations.""" | |
| else: | |
| prompt = f"""You are a senior data analyst. Analyze this dataset thoroughly: | |
| {data_summary} | |
| Provide a comprehensive analysis including: | |
| 1. **Key Statistical Insights**: Most important numbers and what they mean | |
| 2. **Patterns & Trends**: Notable patterns, correlations, or anomalies | |
| 3. **Data Quality Assessment**: Missing values, outliers, data consistency | |
| 4. **Business Intelligence**: Actionable insights and opportunities | |
| 5. **Recommendations**: Specific next steps or areas to investigate | |
| Format your response with clear sections and bullet points for readability.""" | |
| body = { | |
| "model": "openai/gpt-oss-20b", | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": "You are an expert data analyst who provides clear, actionable insights from datasets. Always structure your responses with clear headings and specific data points." | |
| }, | |
| { | |
| "role": "user", | |
| "content": prompt | |
| } | |
| ], | |
| "stream": True, | |
| "max_tokens": 3000, | |
| "temperature": 0.2, | |
| "top_p": 0.9 | |
| } | |
| try: | |
| timeout = aiohttp.ClientTimeout(total=30) | |
| async with aiohttp.ClientSession(timeout=timeout) as session: | |
| async with session.post(self.api_base_url, headers=headers, json=body) as response: | |
| if response.status == 401: | |
| return "β **Authentication Error**: Invalid API key. Please check your Chutes API token." | |
| elif response.status == 429: | |
| return "β³ **Rate Limit**: Too many requests. Please wait a moment and try again." | |
| elif response.status != 200: | |
| return f"β **API Error**: Request failed with status {response.status}" | |
| full_response = "" | |
| async for line in response.content: | |
| line = line.decode("utf-8").strip() | |
| if line.startswith("data: "): | |
| data = line[6:] | |
| if data == "[DONE]": | |
| break | |
| try: | |
| chunk_data = json.loads(data) | |
| if "choices" in chunk_data and len(chunk_data["choices"]) > 0: | |
| delta = chunk_data["choices"][0].get("delta", {}) | |
| content = delta.get("content", "") | |
| if content: | |
| full_response += content | |
| except json.JSONDecodeError: | |
| continue | |
| return full_response if full_response else "β οΈ No response received from the model." | |
| except asyncio.TimeoutError: | |
| return "β° **Timeout Error**: Request took too long. Please try again." | |
| except Exception as e: | |
| logger.error(f"API Error: {str(e)}") | |
| return f"β **Connection Error**: {str(e)}" | |
| def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str, str]: | |
| """Enhanced file processing with better error handling""" | |
| try: | |
| file_extension = os.path.splitext(file_path)[1].lower() | |
| if file_extension == '.csv': | |
| for encoding in ['utf-8', 'latin-1', 'cp1252']: | |
| try: | |
| df = pd.read_csv(file_path, encoding=encoding) | |
| break | |
| except UnicodeDecodeError: | |
| continue | |
| else: | |
| raise ValueError("Could not decode CSV file. Please check file encoding.") | |
| elif file_extension in ['.xlsx', '.xls']: | |
| df = pd.read_excel(file_path) | |
| else: | |
| raise ValueError("Unsupported file format. Please upload CSV or Excel files.") | |
| df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True) | |
| self.current_df = df | |
| data_summary = self.generate_enhanced_summary(df) | |
| charts_html = self.generate_visualizations(df) | |
| return df, data_summary, charts_html | |
| except Exception as e: | |
| raise Exception(f"Error processing file: {str(e)}") | |
| def generate_enhanced_summary(self, df: pd.DataFrame) -> str: | |
| """Generate comprehensive data summary with statistical insights""" | |
| summary = [] | |
| summary.append(f"# π Dataset Analysis Report") | |
| summary.append(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") | |
| summary.append(f"**File Size**: {df.shape[0]:,} rows Γ {df.shape[1]} columns") | |
| memory_usage = df.memory_usage(deep=True).sum() / 1024**2 | |
| summary.append(f"**Memory Usage**: {memory_usage:.2f} MB\n") | |
| type_counts = df.dtypes.value_counts() | |
| summary.append("## π Column Types:") | |
| for dtype, count in type_counts.items(): | |
| summary.append(f"- **{dtype}**: {count} columns") | |
| missing_data = df.isnull().sum() | |
| missing_pct = (missing_data / len(df) * 100).round(2) | |
| missing_summary = missing_data[missing_data > 0].sort_values(ascending=False) | |
| if len(missing_summary) > 0: | |
| summary.append("\n## β οΈ Missing Data:") | |
| for col, count in missing_summary.head(10).items(): | |
| pct = missing_pct[col] | |
| summary.append(f"- **{col}**: {count:,} missing ({pct}%)") | |
| else: | |
| summary.append("\n## β Data Quality: No missing values detected!") | |
| numeric_cols = df.select_dtypes(include=[np.number]).columns | |
| if len(numeric_cols) > 0: | |
| summary.append(f"\n## π Numerical Columns Analysis ({len(numeric_cols)} columns):") | |
| for col in numeric_cols[:10]: | |
| stats = df[col].describe() | |
| outliers = len(df[df[col] > (stats['75%'] + 1.5 * (stats['75%'] - stats['25%']))]) | |
| summary.append(f"- **{col}**: ΞΌ={stats['mean']:.2f}, Ο={stats['std']:.2f}, outliers={outliers}") | |
| categorical_cols = df.select_dtypes(include=['object', 'category']).columns | |
| if len(categorical_cols) > 0: | |
| summary.append(f"\n## π Categorical Columns Analysis ({len(categorical_cols)} columns):") | |
| for col in categorical_cols[:10]: | |
| unique_count = df[col].nunique() | |
| cardinality = "High" if unique_count > len(df) * 0.9 else "Medium" if unique_count > 10 else "Low" | |
| most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A" | |
| summary.append(f"- **{col}**: {unique_count:,} unique values ({cardinality} cardinality), Top: '{most_common}'") | |
| summary.append("\n## π Data Sample (First 3 Rows):") | |
| sample_df = df.head(3) | |
| for idx, row in sample_df.iterrows(): | |
| summary.append(f"\n**Row {idx + 1}:**") | |
| for col, val in row.items(): | |
| summary.append(f" - {col}: {val}") | |
| return "\n".join(summary) | |
| def generate_visualizations(self, df: pd.DataFrame) -> str: | |
| """Generate comprehensive visualizations for the dataset""" | |
| charts_html = [] | |
| try: | |
| missing_data = df.isnull().sum() | |
| if missing_data.sum() > 0: | |
| fig = px.bar( | |
| x=missing_data.index, | |
| y=missing_data.values, | |
| title="π Missing Data Analysis", | |
| labels={'x': 'Columns', 'y': 'Missing Values Count'}, | |
| color=missing_data.values, | |
| color_continuous_scale='Reds' | |
| ) | |
| fig.update_layout( | |
| height=400, | |
| showlegend=False, | |
| title_x=0.5, | |
| xaxis_tickangle=-45 | |
| ) | |
| charts_html.append(f"<h3>π Data Quality Overview</h3>") | |
| charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="missing_data_chart")) | |
| numeric_cols = df.select_dtypes(include=[np.number]).columns | |
| if len(numeric_cols) > 1: | |
| corr_matrix = df[numeric_cols].corr() | |
| fig = px.imshow( | |
| corr_matrix, | |
| title="π Correlation Matrix - Numerical Variables", | |
| color_continuous_scale='RdBu_r', | |
| aspect="auto", | |
| text_auto=True | |
| ) | |
| fig.update_layout(height=500, title_x=0.5) | |
| charts_html.append(f"<h3>π Correlation Analysis</h3>") | |
| charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="correlation_chart")) | |
| if len(numeric_cols) > 0: | |
| for i, col in enumerate(numeric_cols[:3]): | |
| fig = px.histogram( | |
| df, | |
| x=col, | |
| title=f"π Distribution: {col}", | |
| marginal="box", | |
| nbins=30 | |
| ) | |
| fig.update_layout(height=400, title_x=0.5) | |
| if i == 0: | |
| charts_html.append(f"<h3>π Data Distributions</h3>") | |
| charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"dist_chart_{i}")) | |
| categorical_cols = df.select_dtypes(include=['object', 'category']).columns | |
| if len(categorical_cols) > 0: | |
| for i, col in enumerate(categorical_cols[:2]): | |
| if df[col].nunique() <= 20: | |
| value_counts = df[col].value_counts().head(10) | |
| fig = px.bar( | |
| x=value_counts.values, | |
| y=value_counts.index, | |
| orientation='h', | |
| title=f"π Top 10 Values: {col}", | |
| labels={'x': 'Count', 'y': col} | |
| ) | |
| fig.update_layout(height=400, title_x=0.5) | |
| if i == 0: | |
| charts_html.append(f"<h3>π Categorical Data Analysis</h3>") | |
| charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"cat_chart_{i}")) | |
| summary_data = { | |
| 'Metric': ['Total Rows', 'Total Columns', 'Numeric Columns', 'Categorical Columns', 'Missing Values'], | |
| 'Count': [ | |
| len(df), | |
| len(df.columns), | |
| len(numeric_cols), | |
| len(categorical_cols), | |
| df.isnull().sum().sum() | |
| ] | |
| } | |
| fig = px.bar( | |
| summary_data, | |
| x='Metric', | |
| y='Count', | |
| title="π Dataset Overview", | |
| color='Count', | |
| color_continuous_scale='Blues' | |
| ) | |
| fig.update_layout(height=400, title_x=0.5, showlegend=False) | |
| charts_html.append(f"<h3>π Dataset Overview</h3>") | |
| charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="overview_chart")) | |
| self.current_charts = charts_html | |
| return "\n".join(charts_html) if charts_html else "<p>No charts could be generated for this dataset.</p>" | |
| except Exception as e: | |
| logger.error(f"Chart generation error: {str(e)}") | |
| return f"<p>β Chart generation failed: {str(e)}</p>" | |
| def generate_report_html(self, analysis_text: str, data_summary: str, file_name: str = "Unknown") -> str: | |
| """Generate HTML report with properly formatted text and print button""" | |
| html_template = """ | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <title>Data Analysis Report</title> | |
| <style> | |
| body { | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
| line-height: 1.6; | |
| color: #333; | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| padding: 20px; | |
| background: #f8f9fa; | |
| } | |
| .header { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 30px; | |
| border-radius: 10px; | |
| margin-bottom: 30px; | |
| text-align: center; | |
| } | |
| .section { | |
| background: white; | |
| padding: 25px; | |
| margin-bottom: 20px; | |
| border-radius: 8px; | |
| box-shadow: 0 2px 10px rgba(0,0,0,0.1); | |
| } | |
| .chart-container { | |
| margin: 20px 0; | |
| padding: 15px; | |
| background: #f8f9ff; | |
| border-radius: 8px; | |
| border-left: 4px solid #667eea; | |
| } | |
| h1, h2, h3 { | |
| color: #2c3e50; | |
| margin-top: 20px; | |
| margin-bottom: 15px; | |
| } | |
| .metadata { | |
| background: #e8f4f8; | |
| padding: 15px; | |
| border-radius: 5px; | |
| margin-bottom: 20px; | |
| } | |
| .footer { | |
| text-align: center; | |
| color: #666; | |
| margin-top: 40px; | |
| padding: 20px; | |
| background: #f1f1f1; | |
| border-radius: 5px; | |
| } | |
| pre { | |
| background: #f4f4f4; | |
| padding: 15px; | |
| border-radius: 5px; | |
| overflow-x: auto; | |
| white-space: pre-wrap; | |
| font-size: 14px; | |
| } | |
| strong { | |
| color: #2c3e50; | |
| font-weight: 600; | |
| } | |
| table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| margin: 20px 0; | |
| } | |
| th, td { | |
| border: 1px solid #ddd; | |
| padding: 8px; | |
| text-align: left; | |
| } | |
| th { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| } | |
| tr:nth-child(even) { | |
| background-color: #f2f2f2; | |
| } | |
| .print-button { | |
| background: #667eea; | |
| color: white; | |
| padding: 10px 20px; | |
| border: none; | |
| border-radius: 5px; | |
| cursor: pointer; | |
| font-size: 16px; | |
| margin: 10px 0; | |
| display: inline-block; | |
| } | |
| .print-button:hover { | |
| background: #764ba2; | |
| } | |
| @media print { | |
| .print-button { | |
| display: none; | |
| } | |
| body { | |
| background: white; | |
| } | |
| .section, .metadata, .footer { | |
| box-shadow: none; | |
| } | |
| } | |
| </style> | |
| <script> | |
| function printReport() { | |
| window.print(); | |
| } | |
| </script> | |
| </head> | |
| <body> | |
| <div class="header"> | |
| <h1>π Smart Data Analysis Report</h1> | |
| <p>Comprehensive AI-Powered Data Insights</p> | |
| </div> | |
| <div class="metadata"> | |
| <strong>π File:</strong> {{ file_name }}<br> | |
| <strong>π Generated:</strong> {{ timestamp }}<br> | |
| <strong>π€ Model:</strong> OpenAI gpt-oss-20b | |
| </div> | |
| <div class="section"> | |
| <h2>π― AI Analysis & Insights</h2> | |
| <button class="print-button" onclick="printReport()">π¨οΈ Print as PDF</button> | |
| <div>{{ ai_analysis }}</div> | |
| </div> | |
| <div class="section"> | |
| <h2>π Visualizations</h2> | |
| <div class="chart-container"> | |
| {{ charts_html }} | |
| </div> | |
| </div> | |
| <div class="section"> | |
| <h2>π Technical Data Summary</h2> | |
| <pre>{{ data_summary }}</pre> | |
| </div> | |
| <div class="footer"> | |
| <p>Report generated by Smart Data Analyzer Pro β’ Powered by Smart AI</p> | |
| <p>For questions or support, contact +8801719296601 (via Whatsapp)</p> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| template = Template(html_template) | |
| ai_analysis_html = markdown.markdown(analysis_text, extensions=['extra', 'tables']) | |
| charts_content = "\n".join(self.current_charts) if self.current_charts else "<p>No visualizations available</p>" | |
| return template.render( | |
| file_name=file_name, | |
| timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), | |
| ai_analysis=ai_analysis_html, | |
| charts_html=charts_content, | |
| data_summary=data_summary | |
| ) | |
| analyzer = EnhancedDataAnalyzer() | |
| async def analyze_data(file, api_key, user_question="", progress=gr.Progress()): | |
| if not file: | |
| return "β Please upload a CSV or Excel file.", "", "", "", None | |
| if not analyzer.validate_api_key(api_key): | |
| return "β Please enter a valid Chutes API key (minimum 10 characters).", "", "", "", None | |
| is_valid, validation_msg = analyzer.validate_file(file) | |
| if not is_valid: | |
| return f"β {validation_msg}", "", "", "", None | |
| progress(0.1, desc="π Reading file...") | |
| try: | |
| df, data_summary, charts_html = analyzer.process_file(file.name) | |
| progress(0.3, desc="π Processing data...") | |
| progress(0.5, desc="π€ Generating AI insights...") | |
| ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question) | |
| progress(0.9, desc="β¨ Finalizing results...") | |
| response = f"""# π― Analysis Complete! | |
| {ai_analysis} | |
| --- | |
| *Analysis powered by OpenAI gpt-oss-20b via Chutes β’ Generated at {datetime.now().strftime('%H:%M:%S')}* | |
| """ | |
| data_preview_html = df.head(15).to_html( | |
| classes="table table-striped table-hover", | |
| table_id="data-preview-table", | |
| escape=False | |
| ) | |
| styled_preview = f""" | |
| <style> | |
| #data-preview-table {{ | |
| width: 100%; | |
| border-collapse: collapse; | |
| margin: 20px 0; | |
| font-size: 14px; | |
| }} | |
| #data-preview-table th {{ | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 12px 8px; | |
| text-align: left; | |
| font-weight: bold; | |
| }} | |
| #data-preview-table td {{ | |
| padding: 10px 8px; | |
| border-bottom: 1px solid #ddd; | |
| }} | |
| #data-preview-table tr:hover {{ | |
| background-color: #f5f5f5; | |
| }} | |
| </style> | |
| {data_preview_html} | |
| """ | |
| progress(1.0, desc="β Done!") | |
| return response, data_summary, styled_preview, charts_html, file.name | |
| except Exception as e: | |
| logger.error(f"Analysis error: {str(e)}") | |
| return f"β **Error**: {str(e)}", "", "", "", None | |
| def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()): | |
| return asyncio.run(analyze_data(file, api_key, user_question, progress)) | |
| def clear_all(): | |
| analyzer.current_df = None | |
| analyzer.current_charts = None | |
| return None, "", "", "", "", "", "", None | |
| def download_report(analysis_text, data_summary, file_name, format_choice): | |
| if not analysis_text: | |
| return None, "β No analysis data available for download." | |
| timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') | |
| file_base_name = os.path.splitext(file_name)[0] if file_name else "data_analysis" | |
| try: | |
| if format_choice == "HTML": | |
| html_content = analyzer.generate_report_html(analysis_text, data_summary, file_name) | |
| filename = f"{file_base_name}_analysis_report_{timestamp}.html" | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| f.write(html_content) | |
| return filename, f"β HTML report generated successfully! File: {filename}" | |
| else: # Markdown | |
| report = f"""# Data Analysis Report | |
| Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} | |
| File: {file_name} | |
| ## AI Analysis: | |
| {analysis_text} | |
| ## Raw Data Summary: | |
| {data_summary} | |
| """ | |
| filename = f"{file_base_name}_analysis_report_{timestamp}.md" | |
| with open(filename, 'w', encoding='utf-8') as f: | |
| f.write(report) | |
| return filename, f"β Markdown report generated successfully! File: {filename}" | |
| except Exception as e: | |
| logger.error(f"Report generation error: {str(e)}") | |
| return None, f"β Error generating report: {str(e)}" | |
| with gr.Blocks( | |
| title="π Smart Data Analyzer Pro", | |
| theme=gr.themes.Ocean(), | |
| css=""" | |
| .gradio-container { | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
| } | |
| .tab-nav { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| } | |
| .upload-area { | |
| border: 2px dashed #667eea; | |
| border-radius: 10px; | |
| padding: 20px; | |
| text-align: center; | |
| background: #f8f9ff; | |
| } | |
| """ | |
| ) as app: | |
| current_file_name = gr.State("") | |
| gr.Markdown(""" | |
| # π Smart Data Analyzer Pro | |
| ### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b | |
| Upload your data files and get instant professional insights and downloadable reports! | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### βοΈ Configuration") | |
| api_key_input = gr.Textbox( | |
| label="π Chutes API Key", | |
| placeholder="sk-chutes-your-api-key-here...", | |
| type="password", | |
| lines=1, | |
| info="Get your free API key from chutes.ai" | |
| ) | |
| file_input = gr.File( | |
| label="π Upload Data File", | |
| file_types=[".csv", ".xlsx", ".xls"], | |
| file_count="single", | |
| elem_classes=["upload-area"] | |
| ) | |
| with gr.Row(): | |
| analyze_btn = gr.Button("π Analyze Data", variant="primary", size="lg") | |
| clear_btn = gr.Button("ποΈ Clear All", variant="secondary") | |
| with gr.Group(): | |
| gr.Markdown("### π Quick Stats") | |
| file_stats = gr.Textbox( | |
| label="File Information", | |
| lines=3, | |
| interactive=False, | |
| placeholder="Upload a file to see statistics..." | |
| ) | |
| with gr.Column(scale=2): | |
| gr.Markdown("### π― Analysis Results") | |
| analysis_output = gr.Markdown( | |
| value="π **Ready to analyze your data!**\n\nUpload a CSV or Excel file and click 'Analyze Data' to get started.", | |
| show_label=False | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("π¬ Ask Questions"): | |
| question_input = gr.Textbox( | |
| label="β Ask Specific Questions About Your Data", | |
| placeholder="Examples:\nβ’ What are the top 5 customers by revenue?\nβ’ Are there any seasonal trends?\nβ’ Which products have the highest margins?\nβ’ What anomalies do you see in this data?", | |
| lines=3 | |
| ) | |
| ask_btn = gr.Button("π Get Answer", variant="primary") | |
| question_output = gr.Markdown() | |
| with gr.Tab("π Data Preview"): | |
| data_preview = gr.HTML( | |
| label="Dataset Preview", | |
| value="<p>Upload a file to see data preview...</p>" | |
| ) | |
| with gr.Tab("π Raw Summary"): | |
| raw_summary = gr.Textbox( | |
| label="Detailed Data Summary", | |
| lines=15, | |
| max_lines=20, | |
| show_copy_button=True | |
| ) | |
| with gr.Tab("πΎ Export Reports"): | |
| gr.Markdown("### π₯ Download Your Analysis Report") | |
| with gr.Row(): | |
| format_choice = gr.Radio( | |
| choices=["HTML", "Markdown"], | |
| value="HTML", | |
| label="π Report Format", | |
| info="Choose your preferred download format" | |
| ) | |
| download_btn = gr.Button("π₯ Generate & Download Report", variant="primary", size="lg") | |
| download_status = gr.Textbox(label="Download Status", interactive=False) | |
| download_file = gr.File(label="π Download Link", visible=True) | |
| def update_file_stats(file): | |
| if not file: | |
| return "No file uploaded" | |
| try: | |
| file_size = os.path.getsize(file.name) / (1024 * 1024) | |
| file_name = os.path.basename(file.name) | |
| return f"π **File**: {file_name}\nπ **Size**: {file_size:.2f} MB\nβ° **Uploaded**: {datetime.now().strftime('%H:%M:%S')}" | |
| except: | |
| return "File information unavailable" | |
| def handle_analysis(file, api_key, user_question="", progress=gr.Progress()): | |
| result = sync_analyze_data(file, api_key, user_question, progress) | |
| if len(result) == 5: | |
| return result[0], result[1], result[2], result[4] | |
| else: | |
| return result[0], result[1], result[2], "" | |
| def handle_question_analysis(file, api_key, question, progress=gr.Progress()): | |
| if not question.strip(): | |
| return "β Please enter a specific question about your data." | |
| result = sync_analyze_data(file, api_key, question, progress) | |
| return result[0] | |
| analyze_btn.click( | |
| fn=handle_analysis, | |
| inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)], | |
| outputs=[analysis_output, raw_summary, data_preview, current_file_name], | |
| show_progress=True | |
| ) | |
| ask_btn.click( | |
| fn=handle_question_analysis, | |
| inputs=[file_input, api_key_input, question_input], | |
| outputs=[question_output], | |
| show_progress=True | |
| ) | |
| file_input.change( | |
| fn=update_file_stats, | |
| inputs=[file_input], | |
| outputs=[file_stats] | |
| ) | |
| clear_btn.click( | |
| fn=clear_all, | |
| outputs=[file_input, api_key_input, question_input, analysis_output, | |
| question_output, data_preview, raw_summary, current_file_name] | |
| ) | |
| download_btn.click( | |
| fn=download_report, | |
| inputs=[analysis_output, raw_summary, current_file_name, format_choice], | |
| outputs=[download_file, download_status] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### π‘ Pro Tips for Better Analysis: | |
| **π― For Best Results:** | |
| - Clean your data before upload (remove extra headers, format dates consistently) | |
| - Use descriptive column names | |
| - Ask specific questions like "What drives the highest profits?" instead of "Analyze this data" | |
| **π₯ Export Options:** | |
| - **HTML**: Interactive report with embedded charts and print-to-PDF option | |
| - **Markdown**: Simple text format for documentation | |
| **β‘ Speed Optimization:** | |
| - Files under 10MB process fastest | |
| - CSV files typically load faster than Excel | |
| - Limit to essential columns for quicker analysis | |
| **π§ Supported Formats:** CSV, XLSX, XLS | **π Max Size:** 50MB | **π Response Time:** ~3-5 seconds | |
| """) | |
| if __name__ == "__main__": | |
| app.queue(max_size=10) | |
| app.launch() |