"""
Test parallel performance with vector operations using shared memory
"""
from cpu_driver import CPUDriver
import numpy as np
from concurrent.futures import wait
import multiprocessing as mp

def create_test_program(size=1000):
    # Create vector data in shared memory
    shared_vectors = mp.RawArray('f', size * 256)
    vectors_np = np.frombuffer(shared_vectors, dtype=np.float32).reshape(size, 256)
    vectors_np[:] = np.random.rand(size, 256)
    
    program = []
    # Create parallel vector operations
    for i in range(size):
        # Distribute across cores and threads
        core_id = i % 4
        thread_id = (i // 4) % 2
        
        program.append({
            'type': 'VADD',
            'core_id': core_id,
            'thread_id': thread_id,
            'vector_offset': i * 256,
            'dest': i * 256,
            'size': 256
        })

def main():
    # Initialize CPU driver
    driver = CPUDriver(num_cores=mp.cpu_count())  # Use all available CPU cores
    
    # Create and run parallel program
    program = create_test_program(size=10000)  # 10K vector operations
    print(f"Executing {len(program)} parallel vector operations on {mp.cpu_count()} cores...")
    
    import time
    start = time.time()
    
    # Execute all operations in parallel
    results = driver.execute_batch(program)
    
    # Wait for all operations to complete
    for result in results:
        result.wait()
        
    end = time.time()
    duration = end - start
    ops_per_sec = len(program) / duration
    
    print(f"\nResults:")
    print(f"- Completed {len(program)} vector operations")
    print(f"- Total time: {duration:.3f} seconds")
    print(f"- Operations per second: {ops_per_sec:.2f}")
    print(f"- Operations per core per second: {ops_per_sec/mp.cpu_count():.2f}")

if __name__ == '__main__':
    main()