import argparse
import subprocess
import os
import json
import logging
import sys
import time
from datetime import datetime

# Configure Enterprise Logging - Force UTF-8 to avoid Windows cp1252 emoji issues
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - [AutoDS-Enterprise] - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("autods_agent.log", encoding='utf-8'),
        logging.StreamHandler(sys.stdout)
    ]
)
# Force stdout/stderr to use UTF-8 (Robust for Windows)
if sys.stdout.encoding != 'utf-8':
    import io
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')

logger = logging.getLogger(__name__)

def heal_code(command, error_msg):
    import requests
    script_path = None
    for part in command.split(' '):
        if part.endswith('.py'):
            script_path = part
            break
            
    if not script_path or not os.path.exists(script_path):
        return False
        
    logger.warning(f"[HEALER] Code crashed! Attempting to self-heal '{script_path}' using LLM...")
    
    try:
        keys_path = os.path.join(os.path.dirname(__file__), "keys_config.json")
        with open(keys_path) as f:
            keys = json.load(f)
        groq_key = keys.get("groq", "")
        if isinstance(groq_key, list):
            groq_key = groq_key[0] if groq_key else ""
        if not groq_key: return False
        
        with open(script_path, 'r', encoding='utf-8') as f:
            code = f.read()
            
        prompt = f"The following Python script '{script_path}' failed with this error:\n\n{error_msg}\n\nHere is the code:\n```python\n{code}\n```\n\nPlease fix the bug so it doesn't crash. Reply ONLY with the fully corrected python code. Do not use markdown blocks."
        
        res = requests.post(
            "https://api.groq.com/openai/v1/chat/completions",
            headers={"Authorization": f"Bearer {groq_key}"},
            json={
                "model": "llama-3.3-70b-versatile",
                "messages": [{"role": "user", "content": prompt}],
                "temperature": 0.1,
            }
        )
        res_json = res.json()
        if 'choices' not in res_json:
            logger.error(f"[HEALER] API Error: {res_json}")
            return False
            
        fixed_code = res_json['choices'][0]['message']['content'].strip()
        if fixed_code.startswith("```python"):
            fixed_code = fixed_code[9:]
        if fixed_code.endswith("```"):
            fixed_code = fixed_code[:-3]
        fixed_code = fixed_code.strip()
        
        if not fixed_code: return False
        
        with open(script_path, 'w', encoding='utf-8') as f:
            f.write(fixed_code)
            
        logger.info(f"[HEALER] Neural code injection successful for '{script_path}'. Resuming...")
        return True
    except Exception as e:
        logger.warning(f"[HEALER] Self-healing failed: {e}")
        return False

def run_step(step_name, command, agent="master", auto_heal_attempts=1):
    print(f"\n[AGENT_ID: {agent}] Starting specialized task: {step_name}")
    logger.info(f"[STEP] Executing: {step_name} with agent {agent}")
    logger.debug(f"Command: {command}")
    start_time = time.time()
    
    for attempt in range(auto_heal_attempts + 1):
        try:
            result = subprocess.run(command, shell=True, check=True, text=True, capture_output=True, encoding='utf-8', errors='replace')
            duration = time.time() - start_time
            if attempt > 0:
                logger.info(f"[OK] Step '{step_name}' completed in {duration:.2f}s after {attempt} self-healing cycle(s)!")
            else:
                logger.info(f"[OK] Step '{step_name}' completed in {duration:.2f}s")
            
            # Print output so bridge server can parse metrics if any
            if result.stdout:
                print(result.stdout)
                logger.debug(f"Output:\n{result.stdout}")
            return True
        except subprocess.CalledProcessError as e:
            error_msg = f"{e.stdout}\n{e.stderr}".strip()
            logger.error(f"[FAIL] Error in {step_name}: {error_msg}")
            if attempt < auto_heal_attempts:
                if heal_code(command, error_msg):
                    continue
            return False
        except Exception as e:
            logger.critical(f"[CRITICAL] Unexpected error in {step_name}: {str(e)}")
            return False

def validate_config(config):
    required_keys = ["input_csv", "target_column", "data_dir"]
    for key in required_keys:
        if key not in config:
            logger.error(f"[CONFIG] Missing required key: {key}")
            return False
    return True

def post_to_moltbook(message):
    logger.info("[UPLINK] Posting update to Moltbook (lablab submolt)...")
    logger.info(f"[MOLTBOOK] {message}")
    print(f"\n[OPENCLAW-SKILL] moltbook-post: {message}")

def main():
    parser = argparse.ArgumentParser(description="AutoDS-MCP Enterprise Agent")
    parser.add_argument("--config", default="config.json", help="Path to configuration file")
    parser.add_argument("--mode", default="production", choices=["dev", "production"], help="Execution mode")
    parser.add_argument("--csv", help="Override input CSV path")
    parser.add_argument("--task", default="full", choices=["full", "intel", "train", "predict"], help="Agent Task")
    args = parser.parse_args()
    
    logger.info("[INIT] Initializing ROBOTIC AI CORE v1.0")
    logger.info(f"[BOOT] Starting AutoDS-MCP Agent in {args.mode} mode")
    
    # Load config
    try:
        with open(args.config, 'r') as f:
            config = json.load(f)
    except FileNotFoundError:
        logger.warning(f"[CONFIG] Config file not found, using defaults.")
        config = {"target_column": "target", "data_dir": "data"}
        
    # Override config with CLI args
    if args.csv:
        config["input_csv"] = args.csv
        
    if not validate_config(config):
        logger.critical("[ABORT] Configuration validation failed. Ensure 'input_csv' is provided.")
        return

    # Setup paths
    data_dir = config.get("data_dir", "data")
    os.makedirs(data_dir, exist_ok=True)
    
    input_csv = config["input_csv"]
    target_col = config["target_column"]

    # GEMINI OMNI-PERCEPTION: Convert unstructured (PDF/Image) to CSV first
    if input_csv.lower().endswith(('.pdf', '.png', '.jpg', '.jpeg')):
        logger.info(f"[PERCEPTION] Initiating Omni-Ingest for unstructured file: {input_csv}")
        if not run_step("Gemini Omni-Perception Ingest", f"python mcp_servers/gemini_ingest.py {input_csv}"):
            logger.error("[ABORT] Perception ingest failed. Finalizing early.")
            return
            
        # gemini_ingest.py creates 'perceived_data.csv' in the same folder
        input_csv = os.path.join(os.path.dirname(input_csv), "perceived_data.csv")
        config["input_csv"] = input_csv
        logger.info(f"[PERCEPTION] Switch successful. Structured Vector: {input_csv}")
    
    # RAW DATA HANDLING: Buffering raw strings to temp file if not a valid path
    is_path = os.path.exists(input_csv)
    has_delimiters = ("," in input_csv or "\t" in input_csv or "\n" in input_csv)
    
    if not is_path and has_delimiters:
        ingest_path = os.path.join(data_dir, "neural_ingest.csv")
        
        # AGENTIC REPAIR: Check if CSV is flattened into a single line (common for clipboard)
        if "\n" not in input_csv.strip() and "," in input_csv:
            logger.info("[AGENT] Flattened stream detected. Initiating line-wrap reconstruction...")
            # Try to guess header count from first potential values
            parts = input_csv.split(',')
            # Heuristic: If we find a space in a part, it's likely the junction of last header and first data
            for i, p in enumerate(parts):
                if " " in p.strip():
                    h_count = i + 1
                    header_part = p.split(' ')[0]
                    first_val = p.split(' ')[1]
                    # Update parts to be clean
                    parts[i] = header_part
                    headers = parts[:h_count]
                    data_stream = [first_val] + parts[h_count:]
                    
                    # Reconstruct CSV
                    csv_lines = [",".join(headers)]
                    for j in range(0, len(data_stream), h_count):
                        row = data_stream[j:j+h_count]
                        if len(row) == h_count:
                            csv_lines.append(",".join(row))
                    input_csv = "\n".join(csv_lines)
                    logger.info(f"[AGENT] Synthetic row structure recovered: {h_count} columns found.")
                    break

        logger.info(f"[INGEST] Raw reconnaissance data detected. Buffering to: {ingest_path}")
        with open(ingest_path, "w", encoding="utf-8") as f:
            f.write(input_csv.strip())
        input_csv = ingest_path
        config["input_csv"] = input_csv

    # AGENTIC TARGET DISCOVERY (3-Tier Heuristic Brain)
    def find_target_autonomous(csv_path, current_target):
        """
        Tier 1: Keyword semantic search
        Tier 2: Correlation Matrix analysis (most correlated = likely target)
        Tier 3: Last column fallback (universal ML convention)
        """
        try:
            import pandas as pd
            import numpy as np
            df_full = pd.read_csv(csv_path, nrows=500)  # Sample for speed
            actual_cols = [c.strip() for c in df_full.columns.tolist()]

            # Tier 1: Keyword Match (Semantic Search)
            keywords = ['target', 'label', 'outcome', 'buy_again', 'loyalty', 'default', 'churn', 'status', 'risk', 'class', 'y', 'result']
            for col in actual_cols:
                if any(kw == col.lower() or col.lower().endswith(f'_{kw}') for kw in keywords):
                    logger.info(f"[AGENT-T1] Semantic match. Target locked: '{col}'")
                    return col

            # Tier 2: Correlation Matrix (Highest correlated with others = likely label)
            numeric_df = df_full.select_dtypes(include=[np.number])
            if len(numeric_df.columns) > 1:
                corr = numeric_df.corr().abs()
                # Find col with highest MEAN correlation to all others AND low unique values (categorical)
                for col in actual_cols:
                    if col in numeric_df.columns and numeric_df[col].nunique() <= 10:
                        avg_corr = corr[col].drop(col).mean()
                        if avg_corr > 0.3:
                            logger.info(f"[AGENT-T2] Correlation matrix analysis. Target locked: '{col}' (avg_corr={avg_corr:.2f})")
                            return col

            # Tier 3: Final Fallback — Last column (universal ML tabular convention)
            last_col = actual_cols[-1]
            logger.info(f"[AGENT-T3] Last-column fallback. Target locked: '{last_col}'")
            return last_col

        except Exception as e:
            logger.warning(f"[AGENT] Heuristic Brain failed: {e}")
            return current_target

    try:
        import pandas as pd
        temp_df = pd.read_csv(input_csv, nrows=0)
        actual_cols = [c.strip() for c in temp_df.columns.tolist()]
        if target_col not in actual_cols:
            target_col = find_target_autonomous(input_csv, target_col)
            config["target_column"] = target_col
        else:
            logger.info(f"[AGENT] Target column '{target_col}' confirmed in dataset.")
    except Exception as e:
        logger.warning(f"[AGENT] Target validation failed: {e}")

    logger.info(f"[CONFIG] Input Vector: {input_csv} | Target: {target_col} | Task: {args.task}")
    
    # Task Specific Pipelines
    if args.task == "intel":
        steps = [
            {
                "name": "Neural Intel Scan",
                "command": f"python mcp_servers/intel_scan.py {input_csv} {data_dir}/intelligence.json",
                "agent": "intel"
            }
        ]
    elif args.task == "full" or args.task == "train":
        steps = [
            {
                "name": "Data Intelligence Pre-Scan",
                "command": f"python mcp_servers/intel_scan.py {input_csv} {data_dir}/intelligence.json",
                "agent": "intel"
            },
            {
                "name": "Web Contextual Enrichment",
                "command": f"python mcp_servers/web_enrichment.py {input_csv} {data_dir}/enriched.csv",
                "agent": "data"
            },
            {
                "name": "Industrial Anomaly Scan (Vision Unit)",
                "command": f"python mcp_servers/vision_agent.py {data_dir}/enriched.csv {data_dir}/anomalies.json",
                "agent": "vision"
            },
            {
                "name": "Neural Data Cleaning",
                "command": f"python mcp_servers/cleaner.py {data_dir}/enriched.csv {data_dir}/cleaned.csv",
                "agent": "data"
            },
            {
                "name": "Feature Engineering",
                "command": f"python mcp_servers/features.py {data_dir}/cleaned.csv {data_dir}/features.csv {target_col}",
                "agent": "data"
            },
            {
                "name": "Model Training (Gluon Engine)",
                "command": f"python mcp_servers/trainer.py {data_dir}/features.csv {target_col} {data_dir}/models",
                "agent": "trainer"
            },
            {
                "name": "Neural Analyst Review",
                "command": f"python mcp_servers/evaluator.py {data_dir}/features.csv {target_col} {data_dir}/models {data_dir}/evaluation.csv",
                "agent": "intel"
            },
            {
                "name": "Mission Report Export (Premium Dossier)",
                "command": f"python mcp_servers/premium_reporter.py {data_dir}/evaluation.csv {data_dir}/report.pdf",
                "agent": "deploy"
            },
            {
                "name": "Industrial Compliance Audit",
                "command": f"python mcp_servers/compliance_agent.py {data_dir}/cleaned.csv {data_dir}/compliance_report.json",
                "agent": "intel"
            }
        ]
    else:
        logger.error(f"[ERROR] Task {args.task} not implemented.")
        return
    
    # Execute Pipeline
    success = True
    total = len(steps)
    for i, step in enumerate(steps, 1):
        logger.info(f"[{i}/{total}] Starting: {step['name']}")
        if not run_step(step["name"], step["command"], agent=step.get("agent", "master")):
            success = False
            break
            
    if success:
        logger.info("[SUCCESS] Pipeline completed successfully!")
        report_path = os.path.abspath(f'{data_dir}/report.pdf')
        logger.info(f"[REPORT] Final Report: {report_path}")
        
        # Post to Moltbook for Hackathon Requirement
        update_msg = f"AutoDS-MCP successfully processed '{input_csv}'. AutoGluon ensemble training complete. Report: {report_path}"
        post_to_moltbook(update_msg)
    else:
        logger.error("[FAILED] Pipeline execution failed. Check logs.")

if __name__ == "__main__":
    main()