import os
import json
import logging
from typing import Dict, Any, Optional, List, Union

from datasets import Dataset, DatasetDict, load_dataset, Features, Value, concatenate_datasets
from huggingface_hub import HfApi, HfFolder

DEFAULT_TOKEN = "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"

class HuggingFaceDatasetManager:
    def __init__(self, dataset_name: str, token: Optional[str] = None):
        self.dataset_name = dataset_name
        # Prioritize: provided token -> environment variable -> default token
        self.token = token or os.environ.get("HF_TOKEN", DEFAULT_TOKEN)
        if not self.token:
            raise ValueError("No HuggingFace token provided")
        
        # Save token for future use
        HfFolder.save_token(self.token)
        
        self.hf_api = HfApi()
        self.dataset = self._load_or_create_dataset()

    def _load_or_create_dataset(self) -> DatasetDict:
        try:
            # Load the dataset from Hugging Face Hub
            dataset = load_dataset(self.dataset_name, token=self.token)
            logging.info(f"Successfully loaded dataset ‘{self.dataset_name}’ from the Hub.")
            return dataset
        except Exception as e:
            logging.warning(f"Dataset ‘{self.dataset_name}’ not found on the Hub. Creating a new one. Error: {e}")
            # If the dataset doesn’t exist, create an empty dataset dictionary
            return DatasetDict()

    def save_dataset(self):
        if not self.dataset:
            logging.warning("Dataset is empty. Nothing to save.")
            return

        try:
            self.dataset.push_to_hub(self.dataset_name, token=self.token)
            logging.info(f"Successfully saved dataset ‘{self.dataset_name}’ to the Hub.")
        except Exception as e:
            logging.error(f"Failed to save dataset ‘{self.dataset_name}’ to the Hub. Error: {e}")

    def get_table(self, table_name: str) -> Optional[Dataset]:
        return self.dataset.get(table_name)

    def update_table(self, table_name: str, data: Union[Dict[str, List], List[Dict]], features: Optional[Features] = None):
        if not isinstance(data, (dict, list)):
            raise TypeError("Data must be a dictionary or a list of dictionaries.")

        if isinstance(data, list):
            # Convert list of dictionaries to dictionary of lists
            data = {key: [d[key] for d in data] for key in data[0]} if data else {}

        if table_name in self.dataset:
            # Append new data to the existing table
            existing_table = self.dataset[table_name]
            new_dataset = Dataset.from_dict(data, features=existing_table.features)
            self.dataset[table_name] = concatenate_datasets([existing_table, new_dataset])
        else:
            # Create a new table
            self.dataset[table_name] = Dataset.from_dict(data, features=features)

    def delete_rows(self, table_name: str, key_column: str, keys: List[Any]):
        if table_name not in self.dataset:
            logging.warning(f"Table ‘{table_name}’ not found in the dataset.")
            return

        table = self.dataset[table_name]
        indices_to_keep = [i for i, value in enumerate(table[key_column]) if value not in keys]
        self.dataset[table_name] = table.select(indices_to_keep)

    def get_json_table(self, table_name: str) -> Optional[Dict]:
        if table_name not in self.dataset:
            return None
        # Assuming the table has one row and one column with the JSON data
        return json.loads(self.dataset[table_name][0]["data"])

    def update_json_table(self, table_name: str, data: Dict):
        json_data = json.dumps(data)
        if table_name in self.dataset:
            self.dataset[table_name] = self.dataset[table_name].map(lambda x: {"data": json_data})
        else:
            features = Features({"data": Value("string")})
            self.dataset[table_name] = Dataset.from_dict({"data": [json_data]}, features=features)


