"""
Virtual CPU implementation using voltage-based logic gates and flip-flops.
Built using realistic electronic components that simulate actual CPU behavior.
"""

from logic_gates import (NANDGate, ANDGate, ORGate, NOTGate, XORGate, 
                        ALU1Bit, Register2Bit, VDD, VSS, VTH, GATE_DELAY)
from flip_flops import DFlipFlop, TFlipFlop
from virtual_vram import VirtualVRAM
import time
import json
import os

# CPU Architecture Constants
WORD_SIZE = 32  # 32-bit CPU for better performance
REGISTER_COUNT = 32  # 32 general purpose registers like modern CPUs
MEMORY_SIZE = 1024 * 1024  # 1MB of memory
MAX_INSTRUCTIONS = 65536  # 64K instruction space

# Default Configuration
DEFAULT_CLOCK_FREQ = 1000  # 1000 Hz for high performance
DEFAULT_NUM_CORES = 5    # 5 physical cores
THREADS_PER_CORE = 10    # 10 threads per core for massive parallelism

# Instruction Set (32-bit instructions)
# Format: [31:26] = op type, [25:21] = dest reg, [20:16] = src reg1, [15:11] = src reg2
# [10:0] = immediate value for immediate-type instructions

  # 64K instruction space

# Instruction Set (32-bit instructions)
# Format: [31:26] = op type, [25:21] = dest reg, [20:16] = src reg1, [15:11] = src reg2
# [10:0] = immediate value for immediate-type instructions

# ALU Operations
OP_ADD = 0b000000  # Add registers
OP_SUB = 0b000001  # Subtract registers
OP_MUL = 0b000010  # Multiply registers
OP_DIV = 0b000011  # Divide registers
OP_AND = 0b000100  # AND registers
OP_OR  = 0b000101  # OR registers
OP_XOR = 0b000110  # XOR registers
OP_SHL = 0b000111  # Shift left
OP_SHR = 0b001000  # Shift right

# Memory Operations
OP_LOAD  = 0b001001  # Load from memory
OP_STORE = 0b001010  # Store to memory
OP_MOV   = 0b001011  # Move between registers
OP_LOAD_IMMED = 0b001100  # Load immediate value
OP_ROTATE = 0b001101  # Rotate bits

# Control Flow
OP_JMP = 0b001100  # Unconditional jump
OP_BEQ = 0b001101  # Branch if equal
OP_BNE = 0b001110  # Branch if not equal
OP_BGT = 0b001111  # Branch if greater than

# SIMD Operations
OP_VADD = 0b010000  # Vector add
OP_VMUL = 0b010001  # Vector multiply
OP_VDOT = 0b010010  # Vector dot product

class ClockGenerator:
    """High-performance clock generator with dynamic frequency scaling"""
    def __init__(self, frequency):
        self.base_frequency = frequency
        self.current_frequency = frequency
        self.period = 1.0 / frequency
        self.last_tick = time.time()
        self.cycles = 0
        self.performance_monitor = {'ipc': 0, 'cache_hits': 0, 'branch_accuracy': 0}
        
    def tick(self):
        """Generate optimized clock cycle"""
        now = time.time()
        elapsed = now - self.last_tick
        
        # Dynamic frequency adjustment based on workload
        if self.cycles % 1000 == 0:  # Check every 1000 cycles
            if self.performance_monitor['ipc'] > 0.8:  # High utilization
                self.current_frequency = min(self.base_frequency * 1.2, 10000)  # Max 10KHz
            elif self.performance_monitor['ipc'] < 0.2:  # Low utilization
                self.current_frequency = max(self.base_frequency * 0.8, 100)  # Min 100Hz
            
            self.period = 1.0 / self.current_frequency
            
        if elapsed >= self.period:
            self.last_tick = now - (elapsed % self.period)  # Maintain phase
            self.cycles += 1
            return VDD
            
        return VSS
        
    def get_stats(self):
        """Return clock performance statistics"""
        return {
            'frequency': self.current_frequency,
            'cycles': self.cycles,
            'uptime': time.time() - self.last_tick,
            'performance': self.performance_monitor
        }

class Register:
    """N-bit register using D flip-flops"""
    def __init__(self, bits=WORD_SIZE):
        self.bits = bits
        self.dffs = [DFlipFlop() for _ in range(bits)]
        
    def write(self, data, clk):
        """Write data when clock is high"""
        outputs = []
        for i in range(self.bits):
            bit = VDD if (data & (1 << i)) else VSS
            outputs.append(self.dffs[i].update(bit, clk)[0])
        return outputs
    
    def read(self):
        """Read current register value"""
        value = 0
        for i in range(self.bits):
            if self.dffs[i].sr.q > VTH:
                value |= (1 << i)
        return value

class ALU:
    """Enhanced ALU with full instruction support and SIMD capabilities"""
    def __init__(self, bits=WORD_SIZE):
        self.bits = bits
        self.flags = {'zero': False, 'negative': False, 'overflow': False, 'carry': False}
        
    def execute(self, a, b, op, immediate=None):
        """Execute ALU operation with full instruction support"""
        result = 0
        
        if op == OP_ADD:
            result = (a + b) & ((1 << self.bits) - 1)
            self.flags['carry'] = (a + b) >= (1 << self.bits)
        elif op == OP_SUB:
            result = (a - b) & ((1 << self.bits) - 1)
            self.flags['negative'] = result & (1 << (self.bits - 1))
        elif op == OP_MUL:
            result = (a * b) & ((1 << self.bits) - 1)
            self.flags['overflow'] = (a * b) >= (1 << self.bits)
        elif op == OP_DIV:
            result = a // b if b != 0 else 0
        elif op == OP_AND:
            result = a & b
        elif op == OP_OR:
            result = a | b
        elif op == OP_XOR:
            result = a ^ b
        elif op == OP_SHL:
            shift = immediate if immediate is not None else b
            result = (a << shift) & ((1 << self.bits) - 1)
        elif op == OP_SHR:
            shift = immediate if immediate is not None else b
            result = a >> shift
        # SIMD operations process 4 8-bit values in parallel for 32-bit word
        elif op == OP_VADD:
            mask = (1 << 8) - 1
            for i in range(4):
                a_part = (a >> (i * 8)) & mask
                b_part = (b >> (i * 8)) & mask
                result |= ((a_part + b_part) & mask) << (i * 8)
        elif op == OP_VMUL:
            mask = (1 << 8) - 1
            for i in range(4):
                a_part = (a >> (i * 8)) & mask
                b_part = (b >> (i * 8)) & mask
                result |= ((a_part * b_part) & mask) << (i * 8)
        
        self.flags['zero'] = result == 0
        return result, self.flags

class ControlUnit:
    """Enhanced CPU Control Unit with advanced instruction decoding and branch prediction"""
    def __init__(self):
        self.instruction_register = Register(WORD_SIZE)
        self.program_counter = Register(WORD_SIZE)
        self.branch_history = {}  # Simple branch prediction
        self.flags = {'zero': False, 'negative': False, 'overflow': False, 'carry': False}
        
    def decode(self, instruction, clk):
        """Decode 32-bit instruction format"""
        # Store instruction
        self.instruction_register.write(instruction, clk)
        
        # Extract instruction fields
        op_type = (instruction >> 26) & 0b111111  # 6-bit opcode
        dest_reg = (instruction >> 21) & 0b11111   # 5-bit dest register
        src_reg1 = (instruction >> 16) & 0b11111   # 5-bit source register 1
        src_reg2 = (instruction >> 11) & 0b11111   # 5-bit source register 2
        immediate = instruction & 0b11111111111    # 11-bit immediate value
        
        # Determine instruction type and parameters
        is_branch = op_type in [OP_JMP, OP_BEQ, OP_BNE, OP_BGT]
        is_memory = op_type in [OP_LOAD, OP_STORE]
        is_simd = op_type in [OP_VADD, OP_VMUL, OP_VDOT]
        
        # Branch prediction
        predicted_taken = False
        if is_branch and (src_reg1, immediate) in self.branch_history:
            predicted_taken = self.branch_history[(src_reg1, immediate)] > 0.5
        
        return {
            'op_type': op_type,
            'dest_reg': dest_reg,
            'src_reg1': src_reg1,
            'src_reg2': src_reg2,
            'immediate': immediate,
            'is_branch': is_branch,
            'is_memory': is_memory,
            'is_simd': is_simd,
            'predicted_taken': predicted_taken,
            'alu_op': None if op_type == OP_MOV else op_type
        }
        
    def update_branch_prediction(self, branch_key, taken):
        """Update branch prediction history"""
        if branch_key not in self.branch_history:
            self.branch_history[branch_key] = 0.5
        # Exponential moving average
        self.branch_history[branch_key] = 0.7 * self.branch_history[branch_key] + 0.3 * (1.0 if taken else 0.0)
        
    def increment_pc(self, clk, branch_target=None):
        """Update program counter with branch support"""
        current_pc = self.program_counter.read()
        next_pc = branch_target if branch_target is not None else (current_pc + 1) % MAX_INSTRUCTIONS
        self.program_counter.write(next_pc, clk)
        return self.program_counter.read()

class Memory:
    """CPU Memory using VirtualVRAM for efficient memory management"""
    def __init__(self, size=MEMORY_SIZE, _=None):  # _ parameter for backward compatibility
        self.vram = VirtualVRAM()
        self.size = size
        self.block_size = self.vram.block_size
        
    def read(self, address):
        """Read a value from memory"""
        if address >= self.size:
            return 0  # Return 0 for reads beyond memory size
            
        block_id = address // self.block_size
        offset = address % self.block_size
        
        # Handle reads that would cross block boundary
        if offset + 4 > self.block_size:
            return 0
            
        with self.vram.lock:
            if block_id in self.vram.active_blocks:
                block_data, _ = self.vram.active_blocks[block_id]
                try:
                    value_bytes = bytes(block_data[offset:offset + 4])
                    return int.from_bytes(value_bytes, byteorder='big')
                except (IndexError, ValueError):
                    return 0
            return 0
            
    def write(self, address, value):
        """Write a value to memory"""
        if address >= self.size:
            return  # Ignore writes beyond memory size
            
        block_id = address // self.block_size
        offset = address % self.block_size
        
        # Ensure we don't write past block boundary
        if offset + 4 > self.block_size:
            return
            
        with self.vram.lock:
            if block_id not in self.vram.active_blocks:
                # Create new block if it doesn't exist
                self.vram.active_blocks[block_id] = (bytearray(self.block_size), time.time())
            block_data, _ = self.vram.active_blocks[block_id]
            try:
                value_bytes = value.to_bytes(4, byteorder='big')
                for i in range(min(4, self.block_size - offset)):
                    block_data[offset + i] = value_bytes[i]
            except (ValueError, OverflowError):
                # Handle invalid values gracefully
                pass
            
    def clear(self):
        """Clear all memory blocks"""
        with self.vram.lock:
            self.vram.active_blocks.clear()

class Core:
    """Individual CPU core with its own execution units"""
    def __init__(self, core_id, shared_memory, clock_freq):
        self.core_id = core_id
        self.clock = ClockGenerator(clock_freq)
        self.control_unit = ControlUnit()
        self.alu = ALU(WORD_SIZE)
        # Each core has its own registers
        self.registers = [Register(WORD_SIZE) for _ in range(REGISTER_COUNT)]
        # L1 cache specific to this core (will be implemented later)
        self.l1_cache = None
        # Shared memory reference
        self.memory = shared_memory
        # Thread contexts for SMT (THREADS_PER_CORE threads per core)
        self.thread_contexts = [
            {'registers': [Register(WORD_SIZE) for _ in range(REGISTER_COUNT)],
             'program_counter': Register(WORD_SIZE),
             'active': False} for _ in range(THREADS_PER_CORE)
        ]
        self.current_thread = 0
        self.busy = False

class CPU:
    """Multi-core CPU implementation with shared memory"""
    def __init__(self, num_cores=4, clock_freq=1000, db_path='cpu_memory.db'):
        # Main system clock
        self.clock = ClockGenerator(clock_freq)
        
        # Shared memory for all cores
        self.memory = Memory(MEMORY_SIZE, db_path)
        self.db_path = db_path
        
        # Initialize multiple cores
        self.cores = [Core(i, self.memory, clock_freq) for i in range(num_cores)]
        
        # Global registers (shared between cores)
        self.registers = [Register(32) for _ in range(32)]  # 32 global registers
        
        # Control unit for instruction decode (shared)
        self.control_unit = ControlUnit()
        
        # ALU for instruction execution
        self.alu = ALU(WORD_SIZE)
        
        # Inter-core communication channels
        self.core_channels = {i: [] for i in range(num_cores)}
        
        # Core scheduling and load balancing
        self.scheduler_stats = {
            'core_usage': [0] * num_cores,
            'thread_usage': [[0, 0] for _ in range(num_cores)]
        }
        
        # Shared L3 cache (will be implemented later)
        self.l3_cache = None
        
        # Performance monitoring
        self.performance_counters = {
            'instructions_executed': 0,
            'cache_hits': 0,
            'cache_misses': 0,
            'clock_cycles': 0
        }
        
        # Initialize the main CPU loop
        self.running = True
        self.cycles = 0
        self.current_core = 0
        
        # Thread scheduling
        self.thread_quantum = 100  # Clock cycles per thread
        self.thread_counters = [[0, 0] for _ in range(num_cores)]  # Track cycles per thread
        
        # Start core synchronization
        self._sync_cores()
        
    def load_program(self, instructions):
        """Load program into memory"""
        clk = self.clock.tick()
        for addr, instr in enumerate(instructions):
            self.memory.write(addr, instr, clk)
            
    def fetch(self):
        """Fetch instruction from memory"""
        pc = self.control_unit.program_counter.read()
        return self.memory.read(pc)
        
    def execute_instruction(self, clk):
        """Execute one instruction cycle with advanced features"""
        # Fetch
        instruction = self.fetch()
        
        # Decode
        control = self.control_unit.decode(instruction, clk)
        
        # Execute
        src1_value = self.registers[control['src_reg1']].read()
        src2_value = self.registers[control['src_reg2']].read()
        immediate = control['immediate']
        result = 0
        branch_taken = False
        
        if control['is_memory']:
            if control['op_type'] == OP_LOAD:
                # Load from memory
                addr = (src1_value + immediate) % MEMORY_SIZE
                result = self.memory.read(addr)
            elif control['op_type'] == OP_STORE:
                # Store to memory
                addr = (src1_value + immediate) % MEMORY_SIZE
                self.memory.write(addr, src2_value, clk)
                
        elif control['is_branch']:
            if control['op_type'] == OP_JMP:
                branch_taken = True
            elif control['op_type'] == OP_BEQ:
                branch_taken = src1_value == src2_value
            elif control['op_type'] == OP_BNE:
                branch_taken = src1_value != src2_value
            elif control['op_type'] == OP_BGT:
                branch_taken = src1_value > src2_value
                
            # Update branch prediction
            self.control_unit.update_branch_prediction((control['src_reg1'], immediate), branch_taken)
            
        elif control['alu_op'] is not None:
            # ALU operation
            result, flags = self.alu.execute(src1_value, src2_value, control['op_type'], immediate)
            self.control_unit.flags = flags
        else:
            # Move operation
            result = src1_value
            
        # Write back if needed
        if not control['is_branch'] and (control['op_type'] != OP_STORE):
            self.registers[control['dest_reg']].write(result, clk)
        
        # Update PC
        branch_target = (self.control_unit.program_counter.read() + immediate) if branch_taken else None
        self.control_unit.increment_pc(clk, branch_target)
        
    def schedule_task(self, program, thread_count=1):
        """Schedule a program across available cores and threads"""
        if thread_count < 1:
            thread_count = 1
        
        # Find least loaded cores
        core_loads = [(i, sum(self.scheduler_stats['thread_usage'][i])) 
                     for i in range(len(self.cores))]
        core_loads.sort(key=lambda x: x[1])
        
        threads_assigned = 0
        thread_assignments = []
        
        # Distribute threads across cores
        for core_id, _ in core_loads:
            core = self.cores[core_id]
            
            # Check both hardware threads in the core
            for thread_id in range(2):
                if not core.thread_contexts[thread_id]['active']:
                    # Assign thread
                    start_addr = threads_assigned * (len(program) // thread_count)
                    end_addr = (threads_assigned + 1) * (len(program) // thread_count)
                    thread_program = program[start_addr:end_addr]
                    
                    # Load program segment into memory
                    for i, instr in enumerate(thread_program):
                        self.memory.write(start_addr + i, instr, VDD)
                    
                    # Initialize thread context
                    core.thread_contexts[thread_id]['active'] = True
                    core.thread_contexts[thread_id]['program_counter'].write(start_addr, VDD)
                    thread_assignments.append((core_id, thread_id))
                    
                    threads_assigned += 1
                    if threads_assigned >= thread_count:
                        return thread_assignments
                        
        return thread_assignments

    def run(self, max_cycles=None):
        """Run all cores with true parallel execution using the CPU driver"""
        from cpu_driver import CPUDriver
        
        # Initialize the parallel execution driver
        driver = CPUDriver(len(self.cores))
        driver.start()
        
        cycles = 0
        total_instructions = 0
        last_stats_time = time.time()
        stats_interval = 1.0
        
        try:
            while max_cycles is None or cycles < max_cycles:
                clk = self.clock.tick()
                
                if clk > VTH:
                    # Collect and distribute instructions from all cores in parallel
                    instructions = []
                    for core_id, core in enumerate(self.cores):
                        for thread_id in range(THREADS_PER_CORE):
                            if core.thread_contexts[thread_id]['active']:
                                # Get next instruction for this thread
                                pc = core.thread_contexts[thread_id]['program_counter'].read()
                                instr = self.memory.read_word(pc)
                                if instr:
                                    instructions.append((core_id, thread_id, instr))
                    
                    # Distribute instructions to driver for parallel execution
                    for core_id, thread_id, instr in instructions:
                        driver.submit_instruction(core_id, thread_id, instr)
                    
                    # Process completed instructions
                    results = driver.get_results()
                    for thread_id, result in results:
                        total_instructions += 1
                        
                        # Update performance counters
                        self.performance_counters['instructions_executed'] += 1
                        if result.get('cache_hit', False):
                            self.performance_counters['cache_hits'] += 1
                        else:
                            self.performance_counters['cache_misses'] += 1
                
                cycles += 1
                self.performance_counters['clock_cycles'] = cycles
                
                # Update performance statistics
                now = time.time()
                if now - last_stats_time >= stats_interval:
                    ipc = total_instructions / (now - last_stats_time)
                    
                    # Calculate core utilization
                    active_cores = len([core for core in self.cores if any(ctx['active'] for ctx in core.thread_contexts)])
                    
                    print(f"\rCycles: {cycles}, Active Cores: {active_cores}, IPC: {ipc:.2f}, "
                          f"Cache Hit Rate: {self.get_cache_hit_rate():.2f}%", end="")
                    
                    total_instructions = 0
                    last_stats_time = now
                    
        except KeyboardInterrupt:
            print("\nShutting down CPU...")
        finally:
            driver.stop()
            
    def get_cache_hit_rate(self):
        """Calculate cache hit rate"""
        total = self.performance_counters['cache_hits'] + self.performance_counters['cache_misses']
        if total == 0:
            return 0.0
        return (self.performance_counters['cache_hits'] / total) * 100.0
            
    def _sync_cores(self):
        """Initialize and synchronize all cores"""
        clk = self.clock.tick()
        
        # Reset all cores
        for core in self.cores:
            # Initialize both threads on each core
            for thread_id in range(THREADS_PER_CORE):
                core.thread_contexts[thread_id]['active'] = True
                core.thread_contexts[thread_id]['program_counter'].write(0, clk)
                
            # Reset core registers
            for reg in core.registers:
                reg.write(0, clk)
                
        # Reset global registers
        for reg in self.registers:
            reg.write(0, clk)
            
        # Clear memory
        self.memory.clear()
        
        print(f"CPU initialized with:")
        print(f"- {len(self.cores)} cores with {THREADS_PER_CORE} threads each (SMT/Hyperthreading)")
        print(f"- {WORD_SIZE}-bit architecture")
        print(f"- {MEMORY_SIZE} bytes shared memory")
        print(f"- {REGISTER_COUNT} registers per thread")
    
    def save_state(self, filename='cpu_state.json'):
        """Save CPU state to file"""
        state = {
            'registers': [reg.read() for reg in self.registers],
            'program_counter': self.control_unit.program_counter.read(),
            'instruction_register': self.control_unit.instruction_register.read(),
            'memory': self.memory.get_state()
        }
        
        with open(filename, 'w') as f:
            json.dump(state, f, indent=2)
            
    def send_message(self, from_core, to_core, message):
        """Send message between cores"""
        self.core_channels[to_core].append({
            'from': from_core,
            'data': message,
            'timestamp': time.time()
        })
    
    def check_messages(self, core_id):
        """Check messages for a specific core"""
        messages = self.core_channels[core_id]
        self.core_channels[core_id] = []
        return messages
    
    def load_state(self, filename='cpu_state.json'):
        """Load multi-core CPU state from file"""
        if not os.path.exists(filename):
            return False
            
        with open(filename, 'r') as f:
            state = json.load(f)
            
        # Load state for each core
        for core_id, core_state in enumerate(state['cores']):
            core = self.cores[core_id]
            clk = core.clock.tick()
            
            # Restore core registers
            for i, value in enumerate(core_state['registers']):
                core.registers[i].write(value, clk)
            
            # Restore thread contexts
            for thread_id, thread_state in enumerate(core_state['threads']):
                for i, value in enumerate(thread_state['registers']):
                    core.thread_contexts[thread_id]['registers'][i].write(value, clk)
                core.thread_contexts[thread_id]['active'] = thread_state['active']
                core.thread_contexts[thread_id]['program_counter'].write(
                    thread_state['program_counter'], clk)
            
            # Restore control unit state
            core.control_unit.program_counter.write(core_state['program_counter'], clk)
            core.control_unit.instruction_register.write(core_state['instruction_register'], clk)
        
        # Restore shared memory
        self.memory.set_state(state['memory'])
        
        return True
        
    def save_state(self, filename='cpu_state.json'):
        """Save multi-core CPU state to file"""
        state = {
            'cores': [],
            'memory': self.memory.get_state()
        }
        
        # Save state for each core
        for core in self.cores:
            core_state = {
                'registers': [reg.read() for reg in core.registers],
                'program_counter': core.control_unit.program_counter.read(),
                'instruction_register': core.control_unit.instruction_register.read(),
                'threads': []
            }
            
            # Save thread contexts
            for thread in core.thread_contexts:
                thread_state = {
                    'registers': [reg.read() for reg in thread['registers']],
                    'program_counter': thread['program_counter'].read(),
                    'active': thread['active']
                }
                core_state['threads'].append(thread_state)
                
            state['cores'].append(core_state)
            
        with open(filename, 'w') as f:
            json.dump(state, f, indent=2)

# Interactive CPU terminal
if __name__ == "__main__":
    try:
        print("Virtual Multi-Core CPU Terminal - Advanced Architecture Simulator")
        print("=" * 60)
        
        # Initialize multi-core CPU
        cpu = CPU(num_cores=DEFAULT_NUM_CORES, clock_freq=DEFAULT_CLOCK_FREQ)
        print(f"CPU initialized with:")
        print(f"- {DEFAULT_NUM_CORES} cores with {THREADS_PER_CORE} threads each (SMT/Hyperthreading)")
        print(f"- {WORD_SIZE}-bit architecture")
        print(f"- {MEMORY_SIZE} bytes shared memory")
        print(f"- {REGISTER_COUNT} registers per thread")
        
        # Vector addition program (will be split across threads)
        def create_vector_add_program(start_reg, vec_size):
            program = []
            for i in range(0, vec_size, 4):  # Process 4 elements at once using SIMD
                program.extend([
                    # Load vector elements using SIMD
                    (OP_LOAD << 26) | (start_reg << 21) | (0 << 16) | i,  # Load from memory[i]
                    (OP_LOAD << 26) | ((start_reg+1) << 21) | (0 << 16) | (i + vec_size),  # Load from memory[i+size]
                    # Add vectors using SIMD
                    (OP_VADD << 26) | ((start_reg+2) << 21) | (start_reg << 16) | ((start_reg+1) << 11),
                    # Store result using SIMD
                    (OP_STORE << 26) | (0 << 21) | ((start_reg+2) << 16) | (i + 2*vec_size)
                ])
            return program
        
        # Create demo data - two vectors to add
        vector_size = 64  # Total elements to process
        demo_program = create_vector_add_program(1, vector_size)
        
        # Load demo values
        clk = cpu.clock.tick()
        cpu.registers[1].write(5, clk)    # R1 = 5
        cpu.registers[2].write(3, clk)    # R2 = 3
        cpu.registers[4].write(2, clk)    # R4 = 2
        cpu.registers[6].write(0xFF, clk) # R6 = 255
        
        # Interactive menu
        # Interactive menu
        while True:
            print("\nMulti-Core CPU Operations:")
            print("1. Show all core states")
            print("2. Load demo parallel program")
            print("3. Run parallel program")
            print("4. Show memory dump")
            print("5. Save CPU state")
            print("6. Load CPU state")
            print("7. Clear memory")
            print("8. Show core utilization")
            print("9. Configure thread count")
            print("0. Exit")
            
           
    except KeyboardInterrupt:
        print("\n\nCPU simulation terminated by user")
    except Exception as e:
        print(f"\nError: {str(e)}")
    finally:
        print("\nCPU terminal session ended")