| import gradio as gr | |
| import pandas as pd | |
| import aiohttp | |
| import asyncio | |
| import json | |
| import io | |
| import os | |
| from typing import Optional, Tuple | |
| class DataAnalyzer: | |
| def __init__(self): | |
| self.api_base_url = "https://llm.chutes.ai/v1/chat/completions" | |
| async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str: | |
| """Send data to Chutes API for analysis""" | |
| headers = { | |
| "Authorization": f"Bearer {api_token}", | |
| "Content-Type": "application/json" | |
| } | |
| # Create the prompt based on whether it's initial analysis or follow-up question | |
| if user_question: | |
| prompt = f"""Based on this dataset summary: | |
| {data_summary} | |
| User question: {user_question} | |
| Please provide a detailed answer based on the data.""" | |
| else: | |
| prompt = f"""Analyze the following dataset and provide comprehensive insights: | |
| {data_summary} | |
| Please provide: | |
| 1. Key statistical insights | |
| 2. Notable patterns or trends | |
| 3. Data quality observations | |
| 4. Business recommendations | |
| 5. Potential areas for further analysis | |
| Keep the analysis clear, actionable, and data-driven.""" | |
| body = { | |
| "model": "openai/gpt-oss-20b", | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": prompt | |
| } | |
| ], | |
| "stream": True, | |
| "max_tokens": 2048, | |
| "temperature": 0.3 # Lower temperature for more consistent analysis | |
| } | |
| try: | |
| async with aiohttp.ClientSession() as session: | |
| async with session.post(self.api_base_url, headers=headers, json=body) as response: | |
| if response.status != 200: | |
| return f"Error: API request failed with status {response.status}" | |
| full_response = "" | |
| async for line in response.content: | |
| line = line.decode("utf-8").strip() | |
| if line.startswith("data: "): | |
| data = line[6:] | |
| if data == "[DONE]": | |
| break | |
| try: | |
| chunk_data = json.loads(data) | |
| if "choices" in chunk_data and len(chunk_data["choices"]) > 0: | |
| delta = chunk_data["choices"][0].get("delta", {}) | |
| content = delta.get("content", "") | |
| if content: | |
| full_response += content | |
| except json.JSONDecodeError: | |
| continue | |
| return full_response if full_response else "No response received from the model." | |
| except Exception as e: | |
| return f"Error connecting to Chutes API: {str(e)}" | |
| def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]: | |
| """Process uploaded CSV or Excel file""" | |
| try: | |
| file_extension = os.path.splitext(file_path)[1].lower() | |
| if file_extension == '.csv': | |
| df = pd.read_csv(file_path) | |
| elif file_extension in ['.xlsx', '.xls']: | |
| df = pd.read_excel(file_path) | |
| else: | |
| raise ValueError("Unsupported file format. Please upload CSV or Excel files.") | |
| # Generate comprehensive data summary | |
| summary = self.generate_data_summary(df) | |
| return df, summary | |
| except Exception as e: | |
| raise Exception(f"Error processing file: {str(e)}") | |
| def generate_data_summary(self, df: pd.DataFrame) -> str: | |
| """Generate a comprehensive summary of the dataset""" | |
| summary = [] | |
| # Basic info | |
| summary.append(f"Dataset Overview:") | |
| summary.append(f"- Shape: {df.shape[0]} rows Γ {df.shape[1]} columns") | |
| summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}") | |
| # Column information | |
| summary.append(f"\nColumn Information:") | |
| for i, (col, dtype) in enumerate(df.dtypes.items()): | |
| null_count = df[col].isnull().sum() | |
| null_pct = (null_count / len(df)) * 100 | |
| summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)") | |
| # Numerical columns statistics | |
| numeric_cols = df.select_dtypes(include=['number']).columns | |
| if len(numeric_cols) > 0: | |
| summary.append(f"\nNumerical Columns Summary:") | |
| for col in numeric_cols: | |
| stats = df[col].describe() | |
| summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]") | |
| # Categorical columns | |
| categorical_cols = df.select_dtypes(include=['object', 'category']).columns | |
| if len(categorical_cols) > 0: | |
| summary.append(f"\nCategorical Columns Summary:") | |
| for col in categorical_cols: | |
| unique_count = df[col].nunique() | |
| most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A" | |
| summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'") | |
| # Sample data | |
| summary.append(f"\nFirst 5 rows preview:") | |
| summary.append(df.head().to_string()) | |
| return "\n".join(summary) | |
| # Initialize the analyzer | |
| analyzer = DataAnalyzer() | |
| async def analyze_data(file, api_key, user_question=""): | |
| """Main function to analyze uploaded data""" | |
| if not file: | |
| return "Please upload a CSV or Excel file.", "", "" | |
| if not api_key: | |
| return "Please enter your Chutes API key.", "", "" | |
| try: | |
| # Process the uploaded file | |
| df, data_summary = analyzer.process_file(file.name) | |
| # Get AI analysis | |
| ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question) | |
| # Format the complete response | |
| response = f"""## π Data Analysis Complete! | |
| ### π Dataset Overview: | |
| {data_summary} | |
| ### π€ AI Insights & Recommendations: | |
| {ai_analysis} | |
| """ | |
| return response, data_summary, df.head(10).to_html() | |
| except Exception as e: | |
| return f"Error: {str(e)}", "", "" | |
| def sync_analyze_data(file, api_key, user_question=""): | |
| """Synchronous wrapper for the async analyze function""" | |
| return asyncio.run(analyze_data(file, api_key, user_question)) | |
| # Create the Gradio interface | |
| with gr.Blocks(title="π Smart Data Analyzer", theme=gr.themes.Ocean()) as app: | |
| gr.Markdown(""" | |
| # π Smart Data Analyzer | |
| ### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes! | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # File upload | |
| file_input = gr.File( | |
| label="π Upload CSV or Excel File", | |
| file_types=[".csv", ".xlsx", ".xls"], | |
| file_count="single" | |
| ) | |
| # API key input | |
| api_key_input = gr.Textbox( | |
| label="π Chutes API Key", | |
| placeholder="Enter your Chutes API token here...", | |
| type="password", | |
| lines=1 | |
| ) | |
| # Optional question input | |
| question_input = gr.Textbox( | |
| label="β Ask a Specific Question (Optional)", | |
| placeholder="e.g., What are the sales trends? Which region performs best?", | |
| lines=2 | |
| ) | |
| # Analyze button | |
| analyze_btn = gr.Button("π Analyze Data", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| # Results display | |
| analysis_output = gr.Markdown( | |
| label="π Analysis Results", | |
| value="Upload a file and click 'Analyze Data' to see insights..." | |
| ) | |
| # Additional outputs (hidden by default) | |
| with gr.Accordion("π Data Preview", open=False): | |
| data_preview = gr.HTML(label="First 10 Rows") | |
| with gr.Accordion("π Raw Data Summary", open=False): | |
| raw_summary = gr.Textbox(label="Dataset Summary", lines=10) | |
| # Event handlers | |
| analyze_btn.click( | |
| fn=sync_analyze_data, | |
| inputs=[file_input, api_key_input, question_input], | |
| outputs=[analysis_output, raw_summary, data_preview] | |
| ) | |
| # Example section | |
| gr.Markdown(""" | |
| ### π‘ Tips for Best Results: | |
| - **File Size**: Keep files under 10MB for fastest processing | |
| - **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai) | |
| - **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations | |
| - **Formats**: Supports CSV, XLSX, and XLS files | |
| ### π― Example Questions to Ask: | |
| - "What are the key trends in this sales data?" | |
| - "Which products are underperforming?" | |
| - "Are there any seasonal patterns?" | |
| - "What recommendations do you have based on this data?" | |
| """) | |
| # Launch the application | |
| if __name__ == "__main__": | |
| app.launch( | |
| share=True | |
| ) |