|
|
import random
|
|
|
import math
|
|
|
import sys
|
|
|
|
|
|
def distance(x1, x2):
|
|
|
return ((abs(x1[0] - x2[0]))**2 + (abs(x1[1] - x2[1])**2)) **(1/2)
|
|
|
|
|
|
data_points = [(1,7),(1,11),(3,17),(7,18),(8,4),(8,12),(11,7),(12,14),(13,17),(16,11)]
|
|
|
|
|
|
|
|
|
for K in range(1,len(data_points)+1):
|
|
|
print("==============================")
|
|
|
print("K: {0}".format(K))
|
|
|
|
|
|
|
|
|
C = random.sample(data_points,K)
|
|
|
|
|
|
|
|
|
iteration = 1000000
|
|
|
|
|
|
|
|
|
min_decrease_sse = 1e-5
|
|
|
|
|
|
|
|
|
previous_sse = float("inf")
|
|
|
current_iteration = 0
|
|
|
|
|
|
while(True):
|
|
|
current_iteration +=1
|
|
|
current_sse = 0.0
|
|
|
|
|
|
|
|
|
cms_dict = {}
|
|
|
for c in C:
|
|
|
cms_dict[c] = []
|
|
|
|
|
|
for x in data_points:
|
|
|
max = float("inf")
|
|
|
closest = ()
|
|
|
|
|
|
for c in C:
|
|
|
d= distance(x,c)
|
|
|
if(d <= max):
|
|
|
max = d
|
|
|
closest = c
|
|
|
|
|
|
|
|
|
cms_dict[closest].append(x)
|
|
|
|
|
|
|
|
|
C = []
|
|
|
for cm in cms_dict:
|
|
|
cm_total_distance = 0.0
|
|
|
new_c_x = 0.0
|
|
|
new_c_y = 0.0
|
|
|
|
|
|
|
|
|
for x in cms_dict[cm]:
|
|
|
new_c_x += x[0]
|
|
|
new_c_y += x[1]
|
|
|
new_c_x /= len(cms_dict[cm])
|
|
|
new_c_y /= len(cms_dict[cm])
|
|
|
C.append((new_c_x,new_c_y))
|
|
|
|
|
|
|
|
|
for x in cms_dict[cm]:
|
|
|
cm_total_distance += distance(x,(new_c_x,new_c_y))**2
|
|
|
current_sse += cm_total_distance
|
|
|
|
|
|
|
|
|
if(previous_sse - current_sse <= min_decrease_sse or current_iteration > iteration):
|
|
|
print("Final SSE: {0}".format(current_sse))
|
|
|
print("Final Iteration: {0}".format(current_iteration))
|
|
|
i = 0
|
|
|
for cm in cms_dict:
|
|
|
i+=1
|
|
|
print("Cluster {0}: {1}".format(i,cms_dict[cm]))
|
|
|
break
|
|
|
else:
|
|
|
print("Current SSE: {0}".format(current_sse))
|
|
|
previous_sse = current_sse |