Spaces:

shukdevdattaEX
/

Data-Summarizer-Excel-CSV

Sleeping

App Files Files Community

shukdevdattaEX commited on Sep 1

Commit

a620a11

verified ·

1 Parent(s): fc9cc46

Update app.py

Browse files

Files changed (1) hide show

app.py +393 -145

app.py CHANGED Viewed

@@ -12,8 +12,10 @@ from typing import Optional, Tuple, Dict, Any
 import logging
 from datetime import datetime
 import re
-# import markdown
-from weasyprint import HTML as WeasyHTML
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -24,6 +26,8 @@ class EnhancedDataAnalyzer:
         self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
         self.max_file_size = 50 * 1024 * 1024  # 50MB limit
         self.conversation_history = []
     def validate_api_key(self, api_key: str) -> bool:
         """Validate API key format"""
@@ -129,7 +133,7 @@ Format your response with clear sections and bullet points for readability."""
             logger.error(f"API Error: {str(e)}")
             return f"❌ **Connection Error**: {str(e)}"
-    def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str, dict]:
         """Enhanced file processing with better error handling"""
         try:
             file_extension = os.path.splitext(file_path)[1].lower()
@@ -153,11 +157,14 @@ Format your response with clear sections and bullet points for readability."""
             # Clean column names
             df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True)
             # Generate enhanced summaries
             data_summary = self.generate_enhanced_summary(df)
-            charts_data = self.generate_chart_data(df)
-            return df, data_summary, charts_data
         except Exception as e:
             raise Exception(f"Error processing file: {str(e)}")
@@ -223,28 +230,231 @@ Format your response with clear sections and bullet points for readability."""
         return "\n".join(summary)
-    def generate_chart_data(self, df: pd.DataFrame) -> dict:
-        """Generate data for automatic visualizations"""
-        charts = {}
-        # Numerical distribution charts
-        numeric_cols = df.select_dtypes(include=[np.number]).columns
-        if len(numeric_cols) > 0:
-            for col in numeric_cols[:3]:  # First 3 numeric columns
-                fig = px.histogram(df, x=col, title=f"Distribution of {col}")
-                charts[f"hist_{col}"] = fig
-        # Categorical charts
-        categorical_cols = df.select_dtypes(include=['object', 'category']).columns
-        if len(categorical_cols) > 0:
-            for col in categorical_cols[:2]:  # First 2 categorical columns
-                if df[col].nunique() <= 20:  # Only if reasonable number of categories
-                    value_counts = df[col].value_counts().head(10)
-                    fig = px.bar(x=value_counts.index, y=value_counts.values,
-                               title=f"Top Values in {col}")
-                    charts[f"bar_{col}"] = fig
-        return charts
 # Initialize the analyzer
 analyzer = EnhancedDataAnalyzer()
@@ -252,31 +462,29 @@ analyzer = EnhancedDataAnalyzer()
 async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
     """Enhanced analysis function with progress tracking"""
     if not file:
-        return "❌ Please upload a CSV or Excel file.", "", "", None
     if not analyzer.validate_api_key(api_key):
-        return "❌ Please enter a valid Chutes API key (minimum 10 characters).", "", "", None
     # Validate file
     is_valid, validation_msg = analyzer.validate_file(file)
     if not is_valid:
-        return f"❌ {validation_msg}", "", "", None
     progress(0.1, desc="📁 Reading file...")
     try:
         # Process the uploaded file
-        df, data_summary, charts_data = analyzer.process_file(file.name)
         progress(0.3, desc="📊 Processing data...")
-        # Generate visualizations
-        chart_html = create_basic_charts(df)
         progress(0.5, desc="🤖 Generating AI insights...")
         # Get AI analysis
         ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
         progress(0.9, desc="✨ Finalizing results...")
         # Format the complete response
         response = f"""# 🎯 Analysis Complete!
@@ -286,57 +494,46 @@ async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
 *Analysis powered by OpenAI gpt-oss-20b via Chutes • Generated at {datetime.now().strftime('%H:%M:%S')}*
 """
         progress(1.0, desc="✅ Done!")
-        return response, data_summary, df.head(15).to_html(classes="table table-striped"), chart_html
-    except Exception as e:
-        logger.error(f"Analysis error: {str(e)}")
-        return f"❌ **Error**: {str(e)}", "", "", None
-def create_basic_charts(df: pd.DataFrame) -> str:
-    """Create basic visualizations for the dataset"""
-    charts_html = []
-    try:
-        # Chart 1: Data completeness heatmap
-        missing_data = df.isnull().sum()
-        if missing_data.sum() > 0:
-            fig = px.bar(x=missing_data.index, y=missing_data.values,
-                        title="Missing Data by Column",
-                        labels={'x': 'Columns', 'y': 'Missing Count'})
-            fig.update_layout(height=400, showlegend=False)
-            charts_html.append(fig.to_html(include_plotlyjs='cdn'))
-        # Chart 2: Numerical columns correlation (if multiple numeric columns)
-        numeric_cols = df.select_dtypes(include=[np.number]).columns
-        if len(numeric_cols) > 1:
-            corr_matrix = df[numeric_cols].corr()
-            fig = px.imshow(corr_matrix,
-                          title="Correlation Matrix",
-                          color_continuous_scale='RdBu_r',
-                          aspect="auto")
-            fig.update_layout(height=500)
-            charts_html.append(fig.to_html(include_plotlyjs='cdn'))
-        # Chart 3: Distribution of first numeric column
-        if len(numeric_cols) > 0:
-            first_numeric = numeric_cols[0]
-            fig = px.histogram(df, x=first_numeric,
-                             title=f"Distribution: {first_numeric}",
-                             marginal="box")
-            fig.update_layout(height=400)
-            charts_html.append(fig.to_html(include_plotlyjs='cdn'))
-        # Additional charts from generate_chart_data
-        charts_data = analyzer.generate_chart_data(df)
-        for key, fig in charts_data.items():
-            charts_html.append(fig.to_html(include_plotlyjs='cdn'))
-        return "\n".join(charts_html) if charts_html else "<p>No charts generated for this dataset.</p>"
     except Exception as e:
-        logger.error(f"Chart generation error: {str(e)}")
-        return f"<p>Chart generation failed: {str(e)}</p>"
 def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
     """Synchronous wrapper for the async analyze function"""
@@ -344,15 +541,43 @@ def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
 def clear_all():
     """Clear all inputs and outputs"""
-    return None, "", "", "", "", "", None
-def download_summary(analysis_text, data_summary, format_choice):
-    """Generate downloadable summary report in chosen format"""
     if not analysis_text:
-        return None
-    report_md = f"""# Data Analysis Report
 Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
 ## AI Analysis:
 {analysis_text}
@@ -360,35 +585,15 @@ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
 ## Raw Data Summary:
 {data_summary}
 """
-    base_filename = f"data_analysis_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
-    filename = None
-    try:
-        if format_choice == "PDF":
-            # Convert MD to HTML first
-            report_html = markdown.markdown(report_md)
-            # Wrap in basic HTML structure for better PDF rendering
-            full_html = f"""
-            <html>
-            <head><style>body {{ font-family: Arial, sans-serif; }}</style></head>
-            <body>{report_html}</body>
-            </html>
-            """
-            filename = base_filename + ".pdf"
-            WeasyHTML(string=full_html).write_pdf(filename)
-        elif format_choice == "HTML":
-            report_html = markdown.markdown(report_md, output_format='html5')
-            filename = base_filename + ".html"
             with open(filename, 'w', encoding='utf-8') as f:
-                f.write(report_html)
-        return filename
     except Exception as e:
-        logger.error(f"Download generation error: {str(e)}")
-        return None
 # Create enhanced Gradio interface
 with gr.Blocks(
@@ -408,15 +613,25 @@ with gr.Blocks(
         text-align: center;
         background: #f8f9ff;
     }
     """
 ) as app:
     # Header
     gr.Markdown("""
     # 🚀 Smart Data Analyzer Pro
     ### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b
-    Upload your data files and get instant professional insights, visualizations, and recommendations!
     """)
     # Main interface
@@ -483,7 +698,8 @@ with gr.Blocks(
         with gr.Tab("📈 Visualizations"):
             charts_output = gr.HTML(
                 label="Auto-Generated Charts",
-                value="<p>Charts will appear here after analysis...</p>"
             )
         with gr.Tab("🔍 Raw Summary"):
@@ -494,15 +710,20 @@ with gr.Blocks(
                 show_copy_button=True
             )
-        with gr.Tab("💾 Export"):
-            gr.Markdown("### Download Your Analysis Report")
-            format_choice = gr.Dropdown(
-                choices=["PDF", "HTML"],
-                label="Choose Format",
-                value="PDF"
-            )
-            download_btn = gr.Button("📥 Download Report", variant="secondary")
-            download_file = gr.File(label="Download Link", visible=False)
     # Event handlers
     def update_file_stats(file):
@@ -516,19 +737,35 @@ with gr.Blocks(
         except:
             return "File information unavailable"
-    # Main analysis
     analyze_btn.click(
-        fn=sync_analyze_data,
         inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)],
-        outputs=[analysis_output, raw_summary, data_preview, charts_output],
         show_progress=True
     )
     # Follow-up questions
     ask_btn.click(
-        fn=sync_analyze_data,
         inputs=[file_input, api_key_input, question_input],
-        outputs=[question_output, gr.Textbox(visible=False), gr.HTML(visible=False), charts_output],  # Update charts on question too
         show_progress=True
     )
@@ -543,14 +780,14 @@ with gr.Blocks(
     clear_btn.click(
         fn=clear_all,
         outputs=[file_input, api_key_input, question_input, analysis_output,
-                question_output, data_preview, charts_output]
     )
-    # Download functionality
     download_btn.click(
-        fn=download_summary,
-        inputs=[analysis_output, raw_summary, format_choice],
-        outputs=[download_file]
     )
     # Footer with usage tips
@@ -563,6 +800,18 @@ with gr.Blocks(
     - Use descriptive column names
     - Ask specific questions like "What drives the highest profits?" instead of "Analyze this data"
     **⚡ Speed Optimization:**
     - Files under 10MB process fastest
     - CSV files typically load faster than Excel
@@ -571,14 +820,13 @@ with gr.Blocks(
     **🔧 Supported Formats:** CSV, XLSX, XLS | **📏 Max Size:** 50MB | **🚀 Response Time:** ~3-5 seconds
     """)
 # Launch configuration
 if __name__ == "__main__":
     app.queue(max_size=10)  # Handle multiple users
     app.launch(
-        share=True,
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True,
-        favicon_path=None,
-        ssl_verify=False
     )

 import logging
 from datetime import datetime
 import re
+import base64
+from io import BytesIO
+import weasyprint  # For PDF generation
+from jinja2 import Template  # For HTML templating
 # Configure logging
 logging.basicConfig(level=logging.INFO)
         self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
         self.max_file_size = 50 * 1024 * 1024  # 50MB limit
         self.conversation_history = []
+        self.current_df = None
+        self.current_charts = None
     def validate_api_key(self, api_key: str) -> bool:
         """Validate API key format"""
             logger.error(f"API Error: {str(e)}")
             return f"❌ **Connection Error**: {str(e)}"
+    def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str, str]:
         """Enhanced file processing with better error handling"""
         try:
             file_extension = os.path.splitext(file_path)[1].lower()
             # Clean column names
             df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True)
+            # Store dataframe for visualizations
+            self.current_df = df
             # Generate enhanced summaries
             data_summary = self.generate_enhanced_summary(df)
+            charts_html = self.generate_visualizations(df)
+            return df, data_summary, charts_html
         except Exception as e:
             raise Exception(f"Error processing file: {str(e)}")
         return "\n".join(summary)
+    def generate_visualizations(self, df: pd.DataFrame) -> str:
+        """Generate comprehensive visualizations for the dataset"""
+        charts_html = []
+        try:
+            # Chart 1: Data completeness analysis
+            missing_data = df.isnull().sum()
+            if missing_data.sum() > 0:
+                fig = px.bar(
+                    x=missing_data.index,
+                    y=missing_data.values,
+                    title="🔍 Missing Data Analysis",
+                    labels={'x': 'Columns', 'y': 'Missing Values Count'},
+                    color=missing_data.values,
+                    color_continuous_scale='Reds'
+                )
+                fig.update_layout(
+                    height=400,
+                    showlegend=False,
+                    title_x=0.5,
+                    xaxis_tickangle=-45
+                )
+                charts_html.append(f"<h3>📊 Data Quality Overview</h3>")
+                charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="missing_data_chart"))
+            # Chart 2: Numerical columns correlation heatmap
+            numeric_cols = df.select_dtypes(include=[np.number]).columns
+            if len(numeric_cols) > 1:
+                corr_matrix = df[numeric_cols].corr()
+                fig = px.imshow(
+                    corr_matrix,
+                    title="🔗 Correlation Matrix - Numerical Variables",
+                    color_continuous_scale='RdBu_r',
+                    aspect="auto",
+                    text_auto=True
+                )
+                fig.update_layout(height=500, title_x=0.5)
+                charts_html.append(f"<h3>📈 Correlation Analysis</h3>")
+                charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="correlation_chart"))
+            # Chart 3: Distribution plots for numerical columns
+            if len(numeric_cols) > 0:
+                for i, col in enumerate(numeric_cols[:3]):  # First 3 numeric columns
+                    fig = px.histogram(
+                        df,
+                        x=col,
+                        title=f"📊 Distribution: {col}",
+                        marginal="box",
+                        nbins=30
+                    )
+                    fig.update_layout(height=400, title_x=0.5)
+                    if i == 0:
+                        charts_html.append(f"<h3>📈 Data Distributions</h3>")
+                    charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"dist_chart_{i}"))
+            # Chart 4: Categorical analysis
+            categorical_cols = df.select_dtypes(include=['object', 'category']).columns
+            if len(categorical_cols) > 0:
+                for i, col in enumerate(categorical_cols[:2]):  # First 2 categorical columns
+                    if df[col].nunique() <= 20:  # Only if reasonable number of categories
+                        value_counts = df[col].value_counts().head(10)
+                        fig = px.bar(
+                            x=value_counts.values,
+                            y=value_counts.index,
+                            orientation='h',
+                            title=f"📊 Top 10 Values: {col}",
+                            labels={'x': 'Count', 'y': col}
+                        )
+                        fig.update_layout(height=400, title_x=0.5)
+                        if i == 0:
+                            charts_html.append(f"<h3>📝 Categorical Data Analysis</h3>")
+                        charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"cat_chart_{i}"))
+            # Chart 5: Data overview summary
+            summary_data = {
+                'Metric': ['Total Rows', 'Total Columns', 'Numeric Columns', 'Categorical Columns', 'Missing Values'],
+                'Count': [
+                    len(df),
+                    len(df.columns),
+                    len(numeric_cols),
+                    len(categorical_cols),
+                    df.isnull().sum().sum()
+                ]
+            }
+            fig = px.bar(
+                summary_data,
+                x='Metric',
+                y='Count',
+                title="📋 Dataset Overview",
+                color='Count',
+                color_continuous_scale='Blues'
+            )
+            fig.update_layout(height=400, title_x=0.5, showlegend=False)
+            charts_html.append(f"<h3>📊 Dataset Overview</h3>")
+            charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="overview_chart"))
+            # Store charts for export
+            self.current_charts = charts_html
+            return "\n".join(charts_html) if charts_html else "<p>No charts could be generated for this dataset.</p>"
+        except Exception as e:
+            logger.error(f"Chart generation error: {str(e)}")
+            return f"<p>❌ Chart generation failed: {str(e)}</p>"
+    def generate_report_html(self, analysis_text: str, data_summary: str, file_name: str = "Unknown") -> str:
+        """Generate HTML report with embedded charts"""
+        html_template = """
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <meta charset="UTF-8">
+            <title>Data Analysis Report</title>
+            <style>
+                body {
+                    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+                    line-height: 1.6;
+                    color: #333;
+                    max-width: 1200px;
+                    margin: 0 auto;
+                    padding: 20px;
+                    background: #f8f9fa;
+                }
+                .header {
+                    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                    color: white;
+                    padding: 30px;
+                    border-radius: 10px;
+                    margin-bottom: 30px;
+                    text-align: center;
+                }
+                .section {
+                    background: white;
+                    padding: 25px;
+                    margin-bottom: 20px;
+                    border-radius: 8px;
+                    box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+                }
+                .chart-container {
+                    margin: 20px 0;
+                    padding: 15px;
+                    background: #f8f9ff;
+                    border-radius: 8px;
+                    border-left: 4px solid #667eea;
+                }
+                h1, h2, h3 { color: #2c3e50; }
+                .metadata {
+                    background: #e8f4f8;
+                    padding: 15px;
+                    border-radius: 5px;
+                    margin-bottom: 20px;
+                }
+                .footer {
+                    text-align: center;
+                    color: #666;
+                    margin-top: 40px;
+                    padding: 20px;
+                    background: #f1f1f1;
+                    border-radius: 5px;
+                }
+                pre {
+                    background: #f4f4f4;
+                    padding: 15px;
+                    border-radius: 5px;
+                    overflow-x: auto;
+                    white-space: pre-wrap;
+                }
+            </style>
+        </head>
+        <body>
+            <div class="header">
+                <h1>🚀 Smart Data Analysis Report</h1>
+                <p>Comprehensive AI-Powered Data Insights</p>
+            </div>
+            <div class="metadata">
+                <strong>📁 File:</strong> {{ file_name }}<br>
+                <strong>📅 Generated:</strong> {{ timestamp }}<br>
+                <strong>🤖 Model:</strong> OpenAI gpt-oss-20b via Chutes AI
+            </div>
+            <div class="section">
+                <h2>🎯 AI Analysis & Insights</h2>
+                <div>{{ ai_analysis }}</div>
+            </div>
+            <div class="section">
+                <h2>📊 Visualizations</h2>
+                <div class="chart-container">
+                    {{ charts_html }}
+                </div>
+            </div>
+            <div class="section">
+                <h2>📋 Technical Data Summary</h2>
+                <pre>{{ data_summary }}</pre>
+            </div>
+            <div class="footer">
+                <p>Report generated by Smart Data Analyzer Pro • Powered by AI</p>
+                <p>For questions or support, visit chutes.ai</p>
+            </div>
+        </body>
+        </html>
+        """
+        template = Template(html_template)
+        # Convert markdown to HTML for AI analysis
+        ai_analysis_html = analysis_text.replace('\n', '<br>')
+        ai_analysis_html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', ai_analysis_html)
+        ai_analysis_html = re.sub(r'## (.*?)\n', r'<h3>\1</h3>', ai_analysis_html)
+        ai_analysis_html = re.sub(r'# (.*?)\n', r'<h2>\1</h2>', ai_analysis_html)
+        charts_content = "\n".join(self.current_charts) if self.current_charts else "<p>No visualizations available</p>"
+        return template.render(
+            file_name=file_name,
+            timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            ai_analysis=ai_analysis_html,
+            charts_html=charts_content,
+            data_summary=data_summary
+        )
 # Initialize the analyzer
 analyzer = EnhancedDataAnalyzer()
 async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
     """Enhanced analysis function with progress tracking"""
     if not file:
+        return "❌ Please upload a CSV or Excel file.", "", "", "", None
     if not analyzer.validate_api_key(api_key):
+        return "❌ Please enter a valid Chutes API key (minimum 10 characters).", "", "", "", None
     # Validate file
     is_valid, validation_msg = analyzer.validate_file(file)
     if not is_valid:
+        return f"❌ {validation_msg}", "", "", "", None
     progress(0.1, desc="📁 Reading file...")
     try:
         # Process the uploaded file
+        df, data_summary, charts_html = analyzer.process_file(file.name)
         progress(0.3, desc="📊 Processing data...")
         progress(0.5, desc="🤖 Generating AI insights...")
         # Get AI analysis
         ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
         progress(0.9, desc="✨ Finalizing results...")
         # Format the complete response
         response = f"""# 🎯 Analysis Complete!
 *Analysis powered by OpenAI gpt-oss-20b via Chutes • Generated at {datetime.now().strftime('%H:%M:%S')}*
 """
+        # Generate data preview
+        data_preview_html = df.head(15).to_html(
+            classes="table table-striped table-hover",
+            table_id="data-preview-table",
+            escape=False
+        )
+        # Add some styling to the preview
+        styled_preview = f"""
+        <style>
+            #data-preview-table {{
+                width: 100%;
+                border-collapse: collapse;
+                margin: 20px 0;
+                font-size: 14px;
+            }}
+            #data-preview-table th {{
+                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                color: white;
+                padding: 12px 8px;
+                text-align: left;
+                font-weight: bold;
+            }}
+            #data-preview-table td {{
+                padding: 10px 8px;
+                border-bottom: 1px solid #ddd;
+            }}
+            #data-preview-table tr:hover {{
+                background-color: #f5f5f5;
+            }}
+        </style>
+        {data_preview_html}
+        """
         progress(1.0, desc="✅ Done!")
+        return response, data_summary, styled_preview, charts_html, file.name
     except Exception as e:
+        logger.error(f"Analysis error: {str(e)}")
+        return f"❌ **Error**: {str(e)}", "", "", "", None
 def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
     """Synchronous wrapper for the async analyze function"""
 def clear_all():
     """Clear all inputs and outputs"""
+    analyzer.current_df = None
+    analyzer.current_charts = None
+    return None, "", "", "", "", "", "", None
+def download_report(analysis_text, data_summary, file_name, format_choice):
+    """Generate downloadable report in PDF or HTML format"""
     if not analysis_text:
+        return None, "❌ No analysis data available for download."
+    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+    file_base_name = os.path.splitext(file_name)[0] if file_name else "data_analysis"
+    try:
+        if format_choice == "HTML":
+            # Generate HTML report
+            html_content = analyzer.generate_report_html(analysis_text, data_summary, file_name)
+            filename = f"{file_base_name}_analysis_report_{timestamp}.html"
+            with open(filename, 'w', encoding='utf-8') as f:
+                f.write(html_content)
+            return filename, f"✅ HTML report generated successfully! File: {filename}"
+        elif format_choice == "PDF":
+            # Generate PDF report
+            html_content = analyzer.generate_report_html(analysis_text, data_summary, file_name)
+            filename = f"{file_base_name}_analysis_report_{timestamp}.pdf"
+            # Convert HTML to PDF using weasyprint
+            weasyprint.HTML(string=html_content).write_pdf(filename)
+            return filename, f"✅ PDF report generated successfully! File: {filename}"
+        else:  # Markdown fallback
+            report = f"""# Data Analysis Report
 Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+File: {file_name}
 ## AI Analysis:
 {analysis_text}
 ## Raw Data Summary:
 {data_summary}
 """
+            filename = f"{file_base_name}_analysis_report_{timestamp}.md"
             with open(filename, 'w', encoding='utf-8') as f:
+                f.write(report)
+            return filename, f"✅ Markdown report generated successfully! File: {filename}"
     except Exception as e:
+        logger.error(f"Report generation error: {str(e)}")
+        return None, f"❌ Error generating report: {str(e)}"
 # Create enhanced Gradio interface
 with gr.Blocks(
         text-align: center;
         background: #f8f9ff;
     }
+    .charts-container {
+        max-height: 800px;
+        overflow-y: auto;
+        padding: 10px;
+        background: #fafafa;
+        border-radius: 8px;
+    }
     """
 ) as app:
+    # Store file name for downloads
+    current_file_name = gr.State("")
     # Header
     gr.Markdown("""
     # 🚀 Smart Data Analyzer Pro
     ### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b
+    Upload your data files and get instant professional insights, visualizations, and downloadable reports!
     """)
     # Main interface
         with gr.Tab("📈 Visualizations"):
             charts_output = gr.HTML(
                 label="Auto-Generated Charts",
+                value="<div class='charts-container'><p>📊 Interactive charts will appear here after analysis...</p></div>",
+                elem_classes=["charts-container"]
             )
         with gr.Tab("🔍 Raw Summary"):
                 show_copy_button=True
             )
+        with gr.Tab("💾 Export Reports"):
+            gr.Markdown("### 📥 Download Your Analysis Report")
+            with gr.Row():
+                format_choice = gr.Radio(
+                    choices=["HTML", "PDF", "Markdown"],
+                    value="HTML",
+                    label="📄 Report Format",
+                    info="Choose your preferred download format"
+                )
+            download_btn = gr.Button("📥 Generate & Download Report", variant="primary", size="lg")
+            download_status = gr.Textbox(label="Download Status", interactive=False)
+            download_file = gr.File(label="📄 Download Link", visible=True)
     # Event handlers
     def update_file_stats(file):
         except:
             return "File information unavailable"
+    def handle_analysis(file, api_key, user_question="", progress=gr.Progress()):
+        """Handle main analysis and return all outputs including file name"""
+        result = sync_analyze_data(file, api_key, user_question, progress)
+        if len(result) == 5:  # Check if file name was returned
+            return result[0], result[1], result[2], result[3], result[4]  # analysis, summary, preview, charts, filename
+        else:
+            return result[0], result[1], result[2], result[3], ""  # fallback without filename
+    def handle_question_analysis(file, api_key, question, progress=gr.Progress()):
+        """Handle question-specific analysis"""
+        if not question.strip():
+            return "❓ Please enter a specific question about your data."
+        result = sync_analyze_data(file, api_key, question, progress)
+        return result[0]  # Return only the analysis output
+    # Main analysis event
     analyze_btn.click(
+        fn=handle_analysis,
         inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)],
+        outputs=[analysis_output, raw_summary, data_preview, charts_output, current_file_name],
         show_progress=True
     )
     # Follow-up questions
     ask_btn.click(
+        fn=handle_question_analysis,
         inputs=[file_input, api_key_input, question_input],
+        outputs=[question_output],
         show_progress=True
     )
     clear_btn.click(
         fn=clear_all,
         outputs=[file_input, api_key_input, question_input, analysis_output,
+                question_output, data_preview, charts_output, raw_summary]
     )
+    # Enhanced download functionality
     download_btn.click(
+        fn=download_report,
+        inputs=[analysis_output, raw_summary, current_file_name, format_choice],
+        outputs=[download_file, download_status]
     )
     # Footer with usage tips
     - Use descriptive column names
     - Ask specific questions like "What drives the highest profits?" instead of "Analyze this data"
+    **📊 Visualizations Include:**
+    - Missing data analysis
+    - Correlation matrices for numerical data
+    - Distribution plots and histograms
+    - Top categories for categorical data
+    - Dataset overview metrics
+    **📥 Export Options:**
+    - **HTML**: Interactive report with embedded charts
+    - **PDF**: Professional report for presentations
+    - **Markdown**: Simple text format for documentation
     **⚡ Speed Optimization:**
     - Files under 10MB process fastest
     - CSV files typically load faster than Excel
     **🔧 Supported Formats:** CSV, XLSX, XLS | **📏 Max Size:** 50MB | **🚀 Response Time:** ~3-5 seconds
     """)
+def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
+    """Synchronous wrapper for the async analyze function"""
+    return asyncio.run(analyze_data(file, api_key, user_question, progress))
 # Launch configuration
 if __name__ == "__main__":
     app.queue(max_size=10)  # Handle multiple users
     app.launch(
+        share=True
     )