Spaces:

shukdevdattaEX
/

Data-Summarizer-Excel-CSV

Sleeping

App Files Files Community

shukdevdattaEX commited on Sep 1

Commit

bf3b321

verified ·

1 Parent(s): d513747

Update app.py

Browse files

Files changed (1) hide show

app.py +405 -113

app.py CHANGED Viewed

@@ -5,59 +5,102 @@ import asyncio
 import json
 import io
 import os
-from typing import Optional, Tuple
-class DataAnalyzer:
     def __init__(self):
         self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
     async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str:
-        """Send data to Chutes API for analysis"""
         headers = {
-            "Authorization": f"Bearer {api_token}",
             "Content-Type": "application/json"
         }
-        # Create the prompt based on whether it's initial analysis or follow-up question
         if user_question:
-            prompt = f"""Based on this dataset summary:
 {data_summary}
-User question: {user_question}
-Please provide a detailed answer based on the data."""
         else:
-            prompt = f"""Analyze the following dataset and provide comprehensive insights:
 {data_summary}
-Please provide:
-1. Key statistical insights
-2. Notable patterns or trends
-3. Data quality observations
-4. Business recommendations
-5. Potential areas for further analysis
-Keep the analysis clear, actionable, and data-driven."""
         body = {
             "model": "openai/gpt-oss-20b",
             "messages": [
                 {
                     "role": "user",
                     "content": prompt
                 }
             ],
             "stream": True,
-            "max_tokens": 2048,
-            "temperature": 0.3  # Lower temperature for more consistent analysis
         }
         try:
-            async with aiohttp.ClientSession() as session:
                 async with session.post(self.api_base_url, headers=headers, json=body) as response:
-                    if response.status != 200:
-                        return f"Error: API request failed with status {response.status}"
                     full_response = ""
                     async for line in response.content:
@@ -76,178 +119,427 @@ Keep the analysis clear, actionable, and data-driven."""
                             except json.JSONDecodeError:
                                 continue
-                    return full_response if full_response else "No response received from the model."
         except Exception as e:
-            return f"Error connecting to Chutes API: {str(e)}"
-    def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]:
-        """Process uploaded CSV or Excel file"""
         try:
             file_extension = os.path.splitext(file_path)[1].lower()
             if file_extension == '.csv':
-                df = pd.read_csv(file_path)
             elif file_extension in ['.xlsx', '.xls']:
                 df = pd.read_excel(file_path)
             else:
                 raise ValueError("Unsupported file format. Please upload CSV or Excel files.")
-            # Generate comprehensive data summary
-            summary = self.generate_data_summary(df)
-            return df, summary
         except Exception as e:
             raise Exception(f"Error processing file: {str(e)}")
-    def generate_data_summary(self, df: pd.DataFrame) -> str:
-        """Generate a comprehensive summary of the dataset"""
         summary = []
-        # Basic info
-        summary.append(f"Dataset Overview:")
-        summary.append(f"- Shape: {df.shape[0]} rows × {df.shape[1]} columns")
-        summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}")
-        # Column information
-        summary.append(f"\nColumn Information:")
-        for i, (col, dtype) in enumerate(df.dtypes.items()):
-            null_count = df[col].isnull().sum()
-            null_pct = (null_count / len(df)) * 100
-            summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)")
-        # Numerical columns statistics
-        numeric_cols = df.select_dtypes(include=['number']).columns
         if len(numeric_cols) > 0:
-            summary.append(f"\nNumerical Columns Summary:")
-            for col in numeric_cols:
                 stats = df[col].describe()
-                summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]")
-        # Categorical columns
         categorical_cols = df.select_dtypes(include=['object', 'category']).columns
         if len(categorical_cols) > 0:
-            summary.append(f"\nCategorical Columns Summary:")
-            for col in categorical_cols:
                 unique_count = df[col].nunique()
                 most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A"
-                summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'")
-        # Sample data
-        summary.append(f"\nFirst 5 rows preview:")
-        summary.append(df.head().to_string())
         return "\n".join(summary)
 # Initialize the analyzer
-analyzer = DataAnalyzer()
-async def analyze_data(file, api_key, user_question=""):
-    """Main function to analyze uploaded data"""
     if not file:
-        return "Please upload a CSV or Excel file.", "", ""
-    if not api_key:
-        return "Please enter your Chutes API key.", "", ""
     try:
         # Process the uploaded file
-        df, data_summary = analyzer.process_file(file.name)
         # Get AI analysis
         ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
         # Format the complete response
-        response = f"""## 📊 Data Analysis Complete!
-### 📈 Dataset Overview:
-{data_summary}
-### 🤖 AI Insights & Recommendations:
 {ai_analysis}
 """
-        return response, data_summary, df.head(10).to_html()
     except Exception as e:
-        return f"Error: {str(e)}", "", ""
-def sync_analyze_data(file, api_key, user_question=""):
     """Synchronous wrapper for the async analyze function"""
-    return asyncio.run(analyze_data(file, api_key, user_question))
-# Create the Gradio interface
-with gr.Blocks(title="📊 Smart Data Analyzer", theme=gr.themes.Ocean()) as app:
     gr.Markdown("""
-    # 📊 Smart Data Analyzer
-    ### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes!
     """)
     with gr.Row():
         with gr.Column(scale=1):
-            # File upload
-            file_input = gr.File(
-                label="📁 Upload CSV or Excel File",
-                file_types=[".csv", ".xlsx", ".xls"],
-                file_count="single"
-            )
-            # API key input
             api_key_input = gr.Textbox(
                 label="🔑 Chutes API Key",
-                placeholder="Enter your Chutes API token here...",
                 type="password",
-                lines=1
             )
-            # Optional question input
-            question_input = gr.Textbox(
-                label="❓ Ask a Specific Question (Optional)",
-                placeholder="e.g., What are the sales trends? Which region performs best?",
-                lines=2
             )
-            # Analyze button
-            analyze_btn = gr.Button("🚀 Analyze Data", variant="primary", size="lg")
         with gr.Column(scale=2):
-            # Results display
             analysis_output = gr.Markdown(
-                label="📋 Analysis Results",
-                value="Upload a file and click 'Analyze Data' to see insights..."
             )
-    # Additional outputs (hidden by default)
-    with gr.Accordion("📊 Data Preview", open=False):
-        data_preview = gr.HTML(label="First 10 Rows")
-    with gr.Accordion("🔍 Raw Data Summary", open=False):
-        raw_summary = gr.Textbox(label="Dataset Summary", lines=10)
     # Event handlers
     analyze_btn.click(
         fn=sync_analyze_data,
         inputs=[file_input, api_key_input, question_input],
-        outputs=[analysis_output, raw_summary, data_preview]
     )
-    # Example section
     gr.Markdown("""
-    ### 💡 Tips for Best Results:
-    - **File Size**: Keep files under 10MB for fastest processing
-    - **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai)
-    - **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations
-    - **Formats**: Supports CSV, XLSX, and XLS files
-    ### 🎯 Example Questions to Ask:
-    - "What are the key trends in this sales data?"
-    - "Which products are underperforming?"
-    - "Are there any seasonal patterns?"
-    - "What recommendations do you have based on this data?"
     """)
-# Launch the application
 if __name__ == "__main__":
     app.launch(
         share=True
     )

 import json
 import io
 import os
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from typing import Optional, Tuple, Dict, Any
+import logging
+from datetime import datetime
+import re
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class EnhancedDataAnalyzer:
     def __init__(self):
         self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
+        self.max_file_size = 50 * 1024 * 1024  # 50MB limit
+        self.conversation_history = []
+    def validate_api_key(self, api_key: str) -> bool:
+        """Validate API key format"""
+        return bool(api_key and len(api_key.strip()) > 10)
+    def validate_file(self, file) -> Tuple[bool, str]:
+        """Validate uploaded file"""
+        if not file:
+            return False, "No file uploaded"
+        file_size = os.path.getsize(file.name)
+        if file_size > self.max_file_size:
+            return False, f"File too large. Maximum size: {self.max_file_size // (1024*1024)}MB"
+        file_extension = os.path.splitext(file.name)[1].lower()
+        if file_extension not in ['.csv', '.xlsx', '.xls']:
+            return False, "Unsupported format. Please upload CSV or Excel files only."
+        return True, "File valid"
     async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str:
+        """Enhanced API call with better error handling and streaming"""
         headers = {
+            "Authorization": f"Bearer {api_token.strip()}",
             "Content-Type": "application/json"
         }
+        # Create context-aware prompt
         if user_question:
+            prompt = f"""You are a data analyst expert. Based on this dataset:
 {data_summary}
+User's specific question: {user_question}
+Provide a detailed, actionable answer with specific data points and recommendations."""
         else:
+            prompt = f"""You are a senior data analyst. Analyze this dataset thoroughly:
 {data_summary}
+Provide a comprehensive analysis including:
+1. **Key Statistical Insights**: Most important numbers and what they mean
+2. **Patterns & Trends**: Notable patterns, correlations, or anomalies
+3. **Data Quality Assessment**: Missing values, outliers, data consistency
+4. **Business Intelligence**: Actionable insights and opportunities
+5. **Recommendations**: Specific next steps or areas to investigate
+Format your response with clear sections and bullet points for readability."""
         body = {
             "model": "openai/gpt-oss-20b",
             "messages": [
+                {
+                    "role": "system",
+                    "content": "You are an expert data analyst who provides clear, actionable insights from datasets. Always structure your responses with clear headings and specific data points."
+                },
                 {
                     "role": "user",
                     "content": prompt
                 }
             ],
             "stream": True,
+            "max_tokens": 3000,
+            "temperature": 0.2,  # Very low for consistent analysis
+            "top_p": 0.9
         }
         try:
+            timeout = aiohttp.ClientTimeout(total=30)  # 30 second timeout
+            async with aiohttp.ClientSession(timeout=timeout) as session:
                 async with session.post(self.api_base_url, headers=headers, json=body) as response:
+                    if response.status == 401:
+                        return "❌ **Authentication Error**: Invalid API key. Please check your Chutes API token."
+                    elif response.status == 429:
+                        return "⏳ **Rate Limit**: Too many requests. Please wait a moment and try again."
+                    elif response.status != 200:
+                        return f"❌ **API Error**: Request failed with status {response.status}"
                     full_response = ""
                     async for line in response.content:
                             except json.JSONDecodeError:
                                 continue
+                    return full_response if full_response else "⚠️ No response received from the model."
+        except asyncio.TimeoutError:
+            return "⏰ **Timeout Error**: Request took too long. Please try again."
         except Exception as e:
+            logger.error(f"API Error: {str(e)}")
+            return f"❌ **Connection Error**: {str(e)}"
+    def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str, dict]:
+        """Enhanced file processing with better error handling"""
         try:
             file_extension = os.path.splitext(file_path)[1].lower()
+            # Read file with better error handling
             if file_extension == '.csv':
+                # Try different encodings
+                for encoding in ['utf-8', 'latin-1', 'cp1252']:
+                    try:
+                        df = pd.read_csv(file_path, encoding=encoding)
+                        break
+                    except UnicodeDecodeError:
+                        continue
+                else:
+                    raise ValueError("Could not decode CSV file. Please check file encoding.")
             elif file_extension in ['.xlsx', '.xls']:
                 df = pd.read_excel(file_path)
             else:
                 raise ValueError("Unsupported file format. Please upload CSV or Excel files.")
+            # Clean column names
+            df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True)
+            # Generate enhanced summaries
+            data_summary = self.generate_enhanced_summary(df)
+            charts_data = self.generate_chart_data(df)
+            return df, data_summary, charts_data
         except Exception as e:
             raise Exception(f"Error processing file: {str(e)}")
+    def generate_enhanced_summary(self, df: pd.DataFrame) -> str:
+        """Generate comprehensive data summary with statistical insights"""
         summary = []
+        # Header with timestamp
+        summary.append(f"# 📊 Dataset Analysis Report")
+        summary.append(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        summary.append(f"**File Size**: {df.shape[0]:,} rows × {df.shape[1]} columns")
+        # Memory usage
+        memory_usage = df.memory_usage(deep=True).sum() / 1024**2
+        summary.append(f"**Memory Usage**: {memory_usage:.2f} MB\n")
+        # Data types breakdown
+        type_counts = df.dtypes.value_counts()
+        summary.append("## 📋 Column Types:")
+        for dtype, count in type_counts.items():
+            summary.append(f"- **{dtype}**: {count} columns")
+        # Missing data analysis
+        missing_data = df.isnull().sum()
+        missing_pct = (missing_data / len(df) * 100).round(2)
+        missing_summary = missing_data[missing_data > 0].sort_values(ascending=False)
+        if len(missing_summary) > 0:
+            summary.append("\n## ⚠️ Missing Data:")
+            for col, count in missing_summary.head(10).items():
+                pct = missing_pct[col]
+                summary.append(f"- **{col}**: {count:,} missing ({pct}%)")
+        else:
+            summary.append("\n## ✅ Data Quality: No missing values detected!")
+        # Numerical analysis
+        numeric_cols = df.select_dtypes(include=[np.number]).columns
         if len(numeric_cols) > 0:
+            summary.append(f"\n## 📈 Numerical Columns Analysis ({len(numeric_cols)} columns):")
+            for col in numeric_cols[:10]:  # Limit to first 10
                 stats = df[col].describe()
+                outliers = len(df[df[col] > (stats['75%'] + 1.5 * (stats['75%'] - stats['25%']))])
+                summary.append(f"- **{col}**: μ={stats['mean']:.2f}, σ={stats['std']:.2f}, outliers={outliers}")
+        # Categorical analysis
         categorical_cols = df.select_dtypes(include=['object', 'category']).columns
         if len(categorical_cols) > 0:
+            summary.append(f"\n## 📝 Categorical Columns Analysis ({len(categorical_cols)} columns):")
+            for col in categorical_cols[:10]:  # Limit to first 10
                 unique_count = df[col].nunique()
+                cardinality = "High" if unique_count > len(df) * 0.9 else "Medium" if unique_count > 10 else "Low"
                 most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A"
+                summary.append(f"- **{col}**: {unique_count:,} unique values ({cardinality} cardinality), Top: '{most_common}'")
+        # Sample data with better formatting
+        summary.append("\n## 🔍 Data Sample (First 3 Rows):")
+        sample_df = df.head(3)
+        for idx, row in sample_df.iterrows():
+            summary.append(f"\n**Row {idx + 1}:**")
+            for col, val in row.items():
+                summary.append(f"  - {col}: {val}")
         return "\n".join(summary)
+    def generate_chart_data(self, df: pd.DataFrame) -> dict:
+        """Generate data for automatic visualizations"""
+        charts = {}
+        # Numerical distribution charts
+        numeric_cols = df.select_dtypes(include=[np.number]).columns
+        if len(numeric_cols) > 0:
+            for col in numeric_cols[:3]:  # First 3 numeric columns
+                fig = px.histogram(df, x=col, title=f"Distribution of {col}")
+                charts[f"hist_{col}"] = fig
+        # Categorical charts
+        categorical_cols = df.select_dtypes(include=['object', 'category']).columns
+        if len(categorical_cols) > 0:
+            for col in categorical_cols[:2]:  # First 2 categorical columns
+                if df[col].nunique() <= 20:  # Only if reasonable number of categories
+                    value_counts = df[col].value_counts().head(10)
+                    fig = px.bar(x=value_counts.index, y=value_counts.values,
+                               title=f"Top Values in {col}")
+                    charts[f"bar_{col}"] = fig
+        return charts
 # Initialize the analyzer
+analyzer = EnhancedDataAnalyzer()
+async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
+    """Enhanced analysis function with progress tracking"""
     if not file:
+        return "❌ Please upload a CSV or Excel file.", "", "", None
+    if not analyzer.validate_api_key(api_key):
+        return "❌ Please enter a valid Chutes API key (minimum 10 characters).", "", "", None
+    # Validate file
+    is_valid, validation_msg = analyzer.validate_file(file)
+    if not is_valid:
+        return f"❌ {validation_msg}", "", "", None
+    progress(0.1, desc="📁 Reading file...")
     try:
         # Process the uploaded file
+        df, data_summary, charts_data = analyzer.process_file(file.name)
+        progress(0.3, desc="📊 Processing data...")
+        # Generate visualizations
+        chart_html = create_basic_charts(df)
+        progress(0.5, desc="🤖 Generating AI insights...")
         # Get AI analysis
         ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
+        progress(0.9, desc="✨ Finalizing results...")
         # Format the complete response
+        response = f"""# 🎯 Analysis Complete!
 {ai_analysis}
+---
+*Analysis powered by OpenAI gpt-oss-20b via Chutes • Generated at {datetime.now().strftime('%H:%M:%S')}*
 """
+        progress(1.0, desc="✅ Done!")
+        return response, data_summary, df.head(15).to_html(classes="table table-striped"), chart_html
     except Exception as e:
+        logger.error(f"Analysis error: {str(e)}")
+        return f"❌ **Error**: {str(e)}", "", "", None
+def create_basic_charts(df: pd.DataFrame) -> str:
+    """Create basic visualizations for the dataset"""
+    charts_html = []
+    try:
+        # Chart 1: Data completeness heatmap
+        missing_data = df.isnull().sum()
+        if missing_data.sum() > 0:
+            fig = px.bar(x=missing_data.index, y=missing_data.values,
+                        title="Missing Data by Column",
+                        labels={'x': 'Columns', 'y': 'Missing Count'})
+            fig.update_layout(height=400, showlegend=False)
+            charts_html.append(fig.to_html(include_plotlyjs='cdn'))
+        # Chart 2: Numerical columns correlation (if multiple numeric columns)
+        numeric_cols = df.select_dtypes(include=[np.number]).columns
+        if len(numeric_cols) > 1:
+            corr_matrix = df[numeric_cols].corr()
+            fig = px.imshow(corr_matrix,
+                          title="Correlation Matrix",
+                          color_continuous_scale='RdBu_r',
+                          aspect="auto")
+            fig.update_layout(height=500)
+            charts_html.append(fig.to_html(include_plotlyjs='cdn'))
+        # Chart 3: Distribution of first numeric column
+        if len(numeric_cols) > 0:
+            first_numeric = numeric_cols[0]
+            fig = px.histogram(df, x=first_numeric,
+                             title=f"Distribution: {first_numeric}",
+                             marginal="box")
+            fig.update_layout(height=400)
+            charts_html.append(fig.to_html(include_plotlyjs='cdn'))
+        return "\n".join(charts_html) if charts_html else "<p>No charts generated for this dataset.</p>"
+    except Exception as e:
+        logger.error(f"Chart generation error: {str(e)}")
+        return f"<p>Chart generation failed: {str(e)}</p>"
+def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
     """Synchronous wrapper for the async analyze function"""
+    return asyncio.run(analyze_data(file, api_key, user_question, progress))
+def clear_all():
+    """Clear all inputs and outputs"""
+    return None, "", "", "", "", "", None
+def download_summary(analysis_text, data_summary):
+    """Generate downloadable summary report"""
+    if not analysis_text:
+        return None
+    report = f"""# Data Analysis Report
+Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+## AI Analysis:
+{analysis_text}
+## Raw Data Summary:
+{data_summary}
+"""
+    # Save to temporary file
+    filename = f"data_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
+    with open(filename, 'w', encoding='utf-8') as f:
+        f.write(report)
+    return filename
+# Create enhanced Gradio interface
+with gr.Blocks(
+    title="🚀 Smart Data Analyzer Pro",
+    theme=gr.themes.Soft(),
+    css="""
+    .gradio-container {
+        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    }
+    .tab-nav {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    }
+    .upload-area {
+        border: 2px dashed #667eea;
+        border-radius: 10px;
+        padding: 20px;
+        text-align: center;
+        background: #f8f9ff;
+    }
+    """
+) as app:
+    # Header
     gr.Markdown("""
+    # 🚀 Smart Data Analyzer Pro
+    ### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b
+    Upload your data files and get instant professional insights, visualizations, and recommendations!
     """)
+    # Main interface
     with gr.Row():
         with gr.Column(scale=1):
+            # Configuration section
+            gr.Markdown("### ⚙️ Configuration")
             api_key_input = gr.Textbox(
                 label="🔑 Chutes API Key",
+                placeholder="sk-chutes-your-api-key-here...",
                 type="password",
+                lines=1,
+                info="Get your free API key from chutes.ai"
             )
+            file_input = gr.File(
+                label="📁 Upload Data File",
+                file_types=[".csv", ".xlsx", ".xls"],
+                file_count="single",
+                elem_classes=["upload-area"]
             )
+            with gr.Row():
+                analyze_btn = gr.Button("🚀 Analyze Data", variant="primary", size="lg")
+                clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
+            # Quick stats display
+            with gr.Group():
+                gr.Markdown("### 📊 Quick Stats")
+                file_stats = gr.Textbox(
+                    label="File Information",
+                    lines=3,
+                    interactive=False,
+                    placeholder="Upload a file to see statistics..."
+                )
         with gr.Column(scale=2):
+            # Results section
+            gr.Markdown("### 🎯 Analysis Results")
             analysis_output = gr.Markdown(
+                value="📋 **Ready to analyze your data!**\n\nUpload a CSV or Excel file and click 'Analyze Data' to get started.",
+                show_label=False
             )
+    # Advanced features in tabs
+    with gr.Tabs():
+        with gr.Tab("💬 Ask Questions"):
+            question_input = gr.Textbox(
+                label="❓ Ask Specific Questions About Your Data",
+                placeholder="Examples:\n• What are the top 5 customers by revenue?\n• Are there any seasonal trends?\n• Which products have the highest margins?\n• What anomalies do you see in this data?",
+                lines=3
+            )
+            ask_btn = gr.Button("🔍 Get Answer", variant="primary")
+            question_output = gr.Markdown()
+        with gr.Tab("📊 Data Preview"):
+            data_preview = gr.HTML(
+                label="Dataset Preview",
+                value="<p>Upload a file to see data preview...</p>"
+            )
+        with gr.Tab("📈 Visualizations"):
+            charts_output = gr.HTML(
+                label="Auto-Generated Charts",
+                value="<p>Charts will appear here after analysis...</p>"
+            )
+        with gr.Tab("🔍 Raw Summary"):
+            raw_summary = gr.Textbox(
+                label="Detailed Data Summary",
+                lines=15,
+                max_lines=20,
+                show_copy_button=True
+            )
+        with gr.Tab("💾 Export"):
+            gr.Markdown("### Download Your Analysis Report")
+            download_btn = gr.Button("📥 Download Report (.md)", variant="secondary")
+            download_file = gr.File(label="Download Link", visible=False)
     # Event handlers
+    def update_file_stats(file):
+        if not file:
+            return "No file uploaded"
+        try:
+            file_size = os.path.getsize(file.name) / (1024 * 1024)  # MB
+            file_name = os.path.basename(file.name)
+            return f"📄 **File**: {file_name}\n📏 **Size**: {file_size:.2f} MB\n⏰ **Uploaded**: {datetime.now().strftime('%H:%M:%S')}"
+        except:
+            return "File information unavailable"
+    # Main analysis
     analyze_btn.click(
+        fn=sync_analyze_data,
+        inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)],
+        outputs=[analysis_output, raw_summary, data_preview, charts_output],
+        show_progress=True
+    )
+    # Follow-up questions
+    ask_btn.click(
         fn=sync_analyze_data,
         inputs=[file_input, api_key_input, question_input],
+        outputs=[question_output, gr.Textbox(visible=False), gr.HTML(visible=False), gr.HTML(visible=False)],
+        show_progress=True
+    )
+    # File stats update
+    file_input.change(
+        fn=update_file_stats,
+        inputs=[file_input],
+        outputs=[file_stats]
+    )
+    # Clear functionality
+    clear_btn.click(
+        fn=clear_all,
+        outputs=[file_input, api_key_input, question_input, analysis_output,
+                question_output, data_preview, charts_output]
     )
+    # Download functionality
+    download_btn.click(
+        fn=download_summary,
+        inputs=[analysis_output, raw_summary],
+        outputs=[download_file]
+    )
+    # Footer with usage tips
     gr.Markdown("""
+    ---
+    ### 💡 Pro Tips for Better Analysis:
+    **🎯 For Best Results:**
+    - Clean your data before upload (remove extra headers, format dates consistently)
+    - Use descriptive column names
+    - Ask specific questions like "What drives the highest profits?" instead of "Analyze this data"
+    **⚡ Speed Optimization:**
+    - Files under 10MB process fastest
+    - CSV files typically load faster than Excel
+    - Limit to essential columns for quicker analysis
+    **🔧 Supported Formats:** CSV, XLSX, XLS | **📏 Max Size:** 50MB | **🚀 Response Time:** ~3-5 seconds
     """)
+# Launch configuration
 if __name__ == "__main__":
+    app.queue(max_size=10)  # Handle multiple users
     app.launch(
         share=True
     )