"""
Test the virtual CPU performance with SIMD and cryptographic operations
"""
from virtual_cpu import (
    CPU, WORD_SIZE, OP_ADD, OP_XOR, OP_SHR, OP_SHL, OP_AND, OP_OR,
    OP_LOAD, OP_STORE, OP_VADD, OP_VMUL, Memory, MEMORY_SIZE, THREADS_PER_CORE
)
from logic_gates import VDD, VSS, VTH
import time

def create_vector_add_test(size=64):
    """Create a test program for vector addition using SIMD"""
    program = []
    
    # Initialize vectors in memory
    # Vector 1: Starts at memory location 0
    # Vector 2: Starts at memory location size
    # Result: Starts at memory location size*2
    
    for i in range(0, size, 4):  # Process 4 elements at once using SIMD
        program.extend([
            # Load vector elements using SIMD
            (OP_LOAD << 26) | (1 << 21) | (0 << 16) | i,            # R1 = mem[i]
            (OP_LOAD << 26) | (2 << 21) | (0 << 16) | (i + size),   # R2 = mem[i+size]
            # Add vectors using SIMD
            (OP_VADD << 26) | (3 << 21) | (1 << 16) | (2 << 11),    # R3 = R1 + R2
            # Store result
            (OP_STORE << 26) | (0 << 21) | (3 << 16) | (i + 2*size) # mem[i+2*size] = R3
        ])
    
    return program

def create_crypto_test():
    """Create a test program for cryptographic operations"""
    program = []
    
    # Initialize registers with test values
    program.extend([
        (OP_ADD << 26) | (0 << 21) | (0 << 16) | 0x6a09e667,  # Initial H0
        (OP_ADD << 26) | (1 << 21) | (0 << 16) | 0xbb67ae85,  # Initial H1
        (OP_ADD << 26) | (2 << 21) | (0 << 16) | 0x3c6ef372,  # Initial H2
        (OP_ADD << 26) | (3 << 21) | (0 << 16) | 0xa54ff53a,  # Initial H3
    ])
    
    # Perform rotations and XOR operations (similar to crypto hash functions)
    for i in range(4):
        # Rotate right by 2 bits
        program.extend([
            (OP_SHR << 26) | (4 << 21) | (i << 16) | 2,                    # R4 = Ri >> 2
            (OP_SHL << 26) | (5 << 21) | (i << 16) | (WORD_SIZE - 2),     # R5 = Ri << 30
            (OP_OR << 26) | (6 << 21) | (4 << 16) | (5 << 11),            # R6 = R4 | R5
        ])
        
        # XOR operations
        program.extend([
            (OP_XOR << 26) | (i << 21) | (i << 16) | (6 << 11),           # Ri = Ri ^ R6
            (OP_XOR << 26) | (i << 21) | (i << 16) | ((i + 1) % 4 << 11), # Ri = Ri ^ R(i+1)
        ])
        
        # AND operations
        program.extend([
            (OP_AND << 26) | (7 << 21) | (i << 16) | ((i + 2) % 4 << 11), # R7 = Ri & R(i+2)
            (OP_XOR << 26) | (i << 21) | (i << 16) | (7 << 11),           # Ri = Ri ^ R7
        ])
    
    return program

def execute_instruction(cpu, instruction):
    """Execute a single instruction manually"""
    from logic_gates import VTH  # Import voltage threshold
    clk = cpu.clock.tick()
    if clk > VTH:
        # Store instruction in memory at current PC
        pc = cpu.control_unit.program_counter.read()
        cpu.memory.write(pc, instruction)
        # Execute the instruction
        cpu.execute_instruction(clk)

def test_cpu_performance():
    """Test CPU performance with various computations"""
    print("Initializing Virtual CPU for performance testing...")
    cpu = CPU(num_cores=4, clock_freq=2000)  # Create a 4-core CPU at 2KHz
    cpu.memory = Memory(MEMORY_SIZE)  # Initialize with VirtualVRAM-based memory
    
    # Test 1: Vector Addition using SIMD
    print("\nTest 1: Vector Addition using SIMD")
    vector_program = create_vector_add_test(size=64)  # 64 elements
    start_time = time.time()
    
    # Initialize memory with test data
    for i in range(64):
        cpu.memory.write(i, i % 256)  # First vector
        cpu.memory.write(i + 64, (i * 2) % 256)  # Second vector
    
    # Execute vector program manually
    print("Running vector addition...")
    for instruction in vector_program:
        execute_instruction(cpu, instruction)
    
    vector_time = time.time() - start_time
    print(f"Vector addition time: {vector_time:.4f} seconds")
    print(f"Instructions executed: {len(vector_program)}")
    print(f"Cache hit rate: {cpu.get_cache_hit_rate():.2f}%")
    
    # Verify some results
    for i in range(0, 10):
        result = cpu.memory.read(i + 128)  # Check first few results
        expected = (i % 256 + (i * 2) % 256) % 256
        print(f"Vector add result[{i}]: {result} (expected: {expected})")
    
    # Test 2: Cryptographic Operations
    print("\nTest 2: Cryptographic Operations")
    crypto_program = create_crypto_test()
    start_time = time.time()
    
    # Execute crypto program manually
    print("Running cryptographic operations...")
    for instruction in crypto_program:
        execute_instruction(cpu, instruction)
    
    crypto_time = time.time() - start_time
    print(f"Crypto computation time: {crypto_time:.4f} seconds")
    print(f"Instructions executed: {len(crypto_program)}")
    print(f"Cache hit rate: {cpu.get_cache_hit_rate():.2f}%")
    
    # Show final register values
    for i in range(4):
        value = cpu.registers[i].read()
        print(f"Register {i} final value: 0x{value:08x}")
    
    # Calculate approximate IPS (Instructions Per Second)
    total_time = vector_time + crypto_time
    total_instructions = len(vector_program) + len(crypto_program)
    ips = total_instructions / total_time
    
    print(f"\nOverall Performance:")
    print(f"Total execution time: {total_time:.4f} seconds")
    print(f"Instructions per second: {ips:.2f}")
    print(f"Clock frequency: {cpu.clock.current_frequency} Hz")
    
    # Show core utilization
    active_cores = sum(1 for core in cpu.cores if any(ctx['active'] for ctx in core.thread_contexts))
    print(f"Active cores: {active_cores} of {len(cpu.cores)}")
    print(f"Threads per core: {THREADS_PER_CORE}")

if __name__ == "__main__":
    test_cpu_performance()