| import pandas as pd
|
| import numpy as np
|
|
|
|
|
|
|
| data1 = {
|
| 'Age': [25, 30, 22, np.nan, 28, 150, 35, 40, np.nan, 32],
|
| 'Income': [50000, 60000, 45000, 52000, 58000, 100000, 70000, 80000, 48000, 62000],
|
| 'Savings': [5000, 6000, 4500, 5200, 5800, 10000, 7000, 8000, 4800, 6200],
|
| 'Credit_Score': [700, 720, np.nan, 680, 710, 300, 750, 800, 690, 730],
|
| 'Target': [1, 1, 0, 1, 0, 0, 1, 1, 0, 1]
|
| }
|
| df1 = pd.DataFrame(data1)
|
| df1.to_csv(r'C:\Users\muham\Desktop\AutoDS-Hackathon\complex_anomalies.csv', index=False)
|
|
|
|
|
|
|
| n_rows = 200
|
| np.random.seed(42)
|
| data2 = {
|
| 'Feature_A': np.random.randn(n_rows),
|
| 'Feature_B': np.random.rand(n_rows) * 100,
|
| 'Feature_C': np.random.choice(['Alpha', 'Beta', 'Gamma'], n_rows),
|
| 'target': [1] * 180 + [0] * 20
|
| }
|
| df2 = pd.DataFrame(data2)
|
|
|
| df2 = pd.get_dummies(df2, columns=['Feature_C'])
|
| df2.to_csv(r'C:\Users\muham\Desktop\AutoDS-Hackathon\imbalanced_mission.csv', index=False)
|
|
|
| print("Double 'Hard' Datasets created on Desktop/AutoDS-Hackathon/")
|
|
|