"""
GPU Architecture configuration and optimization
"""
from dataclasses import dataclass

@dataclass
class SMConfig:
    tensor_cores: int = 8  # Tensor cores per SM
    cuda_cores: int = 128  # CUDA cores per SM
    shared_memory: int = 128 * 1024  # 128KB shared memory per SM
    max_threads: int = 1024  # Max threads per SM
    
@dataclass
class GPUArchitecture:
    streaming_multiprocessors: int = 80  # Number of SMs per GPU
    total_tensor_cores: int = 640  # Total tensor cores per GPU
    total_cuda_cores: int = 10240  # Total CUDA cores per GPU
    vram_size: int = 24 * 1024 * 1024 * 1024  # 24GB VRAM
    sm_config: SMConfig = SMConfig()

class GPUResourceManager:
    def __init__(self, num_gpus: int = 10):
        self.num_gpus = num_gpus
        self.architecture = GPUArchitecture()
        self.sm_allocation = {}  # Track SM allocation
        self.tensor_usage = {}   # Track tensor core usage
        self.cuda_usage = {}     # Track CUDA core usage
        
    def get_total_resources(self) -> dict:
        """Get total computational resources across all GPUs"""
        return {
            'total_sms': self.num_gpus * self.architecture.streaming_multiprocessors,
            'total_tensor_cores': self.num_gpus * self.architecture.total_tensor_cores,
            'total_cuda_cores': self.num_gpus * self.architecture.total_cuda_cores,
            'total_vram': self.num_gpus * self.architecture.vram_size
        }
        
    def allocate_cores(self, task_type: str, gpu_id: int, num_cores: int) -> bool:
        """Allocate cores for a specific task"""
        if task_type == 'tensor':
            available = self.architecture.total_tensor_cores
            usage_tracker = self.tensor_usage
        else:  # CUDA
            available = self.architecture.total_cuda_cores
            usage_tracker = self.cuda_usage
            
        current_usage = usage_tracker.get(gpu_id, 0)
        if current_usage + num_cores <= available:
            usage_tracker[gpu_id] = current_usage + num_cores
            return True
        return False
        
    def get_optimal_allocation(self, task_type: str) -> dict:
        """Get optimal core allocation for a task type"""
        allocations = {}
        
        if task_type == 'matrix_mining':
            # Prioritize tensor cores for matrix operations
            tensor_per_gpu = int(self.architecture.total_tensor_cores * 0.8)  # 80% tensor cores
            cuda_per_gpu = int(self.architecture.total_cuda_cores * 0.2)    # 20% CUDA cores
            
            for gpu_id in range(self.num_gpus):
                allocations[gpu_id] = {
                    'tensor_cores': tensor_per_gpu,
                    'cuda_cores': cuda_per_gpu,
                    'sms': int(self.architecture.streaming_multiprocessors * 0.8)
                }
                
        elif task_type == 'general_mining':
            # More balanced allocation
            tensor_per_gpu = int(self.architecture.total_tensor_cores * 0.4)  # 40% tensor cores
            cuda_per_gpu = int(self.architecture.total_cuda_cores * 0.6)    # 60% CUDA cores
            
            for gpu_id in range(self.num_gpus):
                allocations[gpu_id] = {
                    'tensor_cores': tensor_per_gpu,
                    'cuda_cores': cuda_per_gpu,
                    'sms': int(self.architecture.streaming_multiprocessors * 0.6)
                }
                
        return allocations

    def get_sm_utilization(self) -> dict:
        """Get current SM utilization per GPU"""
        utilization = {}
        for gpu_id in range(self.num_gpus):
            tensor_usage = self.tensor_usage.get(gpu_id, 0)
            cuda_usage = self.cuda_usage.get(gpu_id, 0)
            
            tensor_util = tensor_usage / self.architecture.total_tensor_cores
            cuda_util = cuda_usage / self.architecture.total_cuda_cores
            
            utilization[gpu_id] = {
                'tensor_utilization': tensor_util,
                'cuda_utilization': cuda_util,
                'overall_utilization': (tensor_util + cuda_util) / 2
            }
            
        return utilization
