ewdlop
/

Raymond

Model card Files Files and versions

Raymond / k-mean-clustering.py

ewdlop's picture

Upload 6 files (#1)

7a1d574 verified 2 months ago

history blame contribute delete

2.63 kB

	import random
	import math
	import sys

	def distance(x1, x2):
	return ((abs(x1[0] - x2[0]))2 + (abs(x1[1] - x2[1])2)) **(1/2)

	data_points = [(1,7),(1,11),(3,17),(7,18),(8,4),(8,12),(11,7),(12,14),(13,17),(16,11)]

	#run all 1,2,3..,10-means clustering
	for K in range(1,len(data_points)+1):
	print("==============================")
	print("K: {0}".format(K))

	# pick random k pointS
	C = random.sample(data_points,K)

	#prevent infinite loop criteria
	iteration = 1000000

	#converage criteria for minmumal decrease in sum of square errors
	min_decrease_sse = 1e-5

	#intital conditions for comparsions
	previous_sse = float("inf")
	current_iteration = 0

	while(True):
	current_iteration +=1
	current_sse = 0.0

	#create cluster membership dictonary for each centroids
	cms_dict = {}
	for c in C:
	cms_dict[c] = []

	for x in data_points:
	max = float("inf")
	closest = ()
	#compute the distance from x to each centroid
	for c in C:
	d= distance(x,c)
	if(d <= max):
	max = d
	closest = c

	#assign x to the closet centroid and its cluster memberships
	cms_dict[closest].append(x)

	#recomputing new centroids
	C = []
	for cm in cms_dict:
	cm_total_distance = 0.0
	new_c_x = 0.0
	new_c_y = 0.0

	#recompute the centroids using the current cluster memberships
	for x in cms_dict[cm]:
	new_c_x += x[0]
	new_c_y += x[1]
	new_c_x /= len(cms_dict[cm])
	new_c_y /= len(cms_dict[cm])
	C.append((new_c_x,new_c_y))

	#calucation the sum of squared error
	for x in cms_dict[cm]:
	cm_total_distance += distance(x,(new_c_x,new_c_y))**2
	current_sse += cm_total_distance

	#getting the decrease value in the sse
	if(previous_sse - current_sse <= min_decrease_sse or current_iteration > iteration):
	print("Final SSE: {0}".format(current_sse))
	print("Final Iteration: {0}".format(current_iteration))
	i = 0
	for cm in cms_dict:
	i+=1
	print("Cluster {0}: {1}".format(i,cms_dict[cm]))
	break
	else:
	print("Current SSE: {0}".format(current_sse))
	previous_sse = current_sse