File size: 2,626 Bytes

7a1d574

import random
import math
import sys

def distance(x1, x2):
    return ((abs(x1[0] - x2[0]))**2 + (abs(x1[1] - x2[1])**2)) **(1/2)

data_points = [(1,7),(1,11),(3,17),(7,18),(8,4),(8,12),(11,7),(12,14),(13,17),(16,11)]

#run all 1,2,3..,10-means clustering
for K in range(1,len(data_points)+1):
    print("==============================")
    print("K: {0}".format(K))

    # pick random k pointS
    C = random.sample(data_points,K)

    #prevent infinite loop criteria
    iteration = 1000000

    #converage criteria for minmumal decrease in sum of square errors
    min_decrease_sse = 1e-5

    #intital conditions for comparsions
    previous_sse = float("inf")
    current_iteration = 0
    
    while(True):
        current_iteration +=1
        current_sse = 0.0
        
        #create cluster membership dictonary for each centroids
        cms_dict = {}
        for c in C:
            cms_dict[c] = []

        for x in data_points:
            max = float("inf")
            closest = ()
            #compute the distance from x to each centroid
            for c in C:
                d= distance(x,c)
                if(d <= max):
                    max = d
                    closest = c

            #assign x to the closet centroid and its cluster memberships
            cms_dict[closest].append(x)
        
        #recomputing new centroids
        C = []
        for cm in cms_dict:
            cm_total_distance = 0.0
            new_c_x = 0.0
            new_c_y = 0.0
            
            #recompute the centroids using the current cluster memberships
            for x in cms_dict[cm]:
                new_c_x += x[0]
                new_c_y += x[1]
            new_c_x /= len(cms_dict[cm])
            new_c_y /= len(cms_dict[cm])
            C.append((new_c_x,new_c_y))
            
            #calucation the sum of squared error
            for x in cms_dict[cm]:
                cm_total_distance += distance(x,(new_c_x,new_c_y))**2
            current_sse += cm_total_distance

        #getting the decrease value in the sse
        if(previous_sse - current_sse <= min_decrease_sse or current_iteration > iteration):
            print("Final SSE: {0}".format(current_sse))
            print("Final Iteration: {0}".format(current_iteration))
            i = 0
            for cm in cms_dict:
                i+=1
                print("Cluster {0}: {1}".format(i,cms_dict[cm]))
            break
        else:
            print("Current SSE: {0}".format(current_sse))
            previous_sse = current_sse