K Means Clustering Using Numpy

Machine Learning

K Means Clustering Using Numpy

Photo by h heyerlein on Unsplash

"Euclidean"Distance

where i is the dimension , as we are doing in 2 Dimension so i=2 A is Centeroids while B is the Data_points

$$\sum\sqrt{Ai^2+Bi^2}$$

import numpy as np # importing this for arr modifications
import matplotlib.pyplot as plt # Importing this to see
class Kmeans:
    def __init__(self,k=3):
        self.k=k
        self.centeroids=None

    @staticmethod
    def Eucid_dis(data_point,centeroids):
        return np.sqrt(np.sum((centeroids-data_point)**2 ,axis=1))

    def fit(self,x,maxItr=200):
        self.centeroids=np.random.uniform(np.amin(x,axis=0),np.amax(x,axis=0),size=(self.k,x.shape[1]))

        for _ in range(maxItr):
            y=[]
            for data_point in x:
                distances=Kmeans.Eucid_dis(data_point,self.centeroids)
                cluster_num=np.argmin(distances)
                y.append(cluster_num)
            y=np.array(y)

            cluster_idx=[]
            for i in range(self.k):
                cluster_idx.append(np.argwhere(y==i))

            cluster_cen=[]
            for i,idx in enumerate(cluster_idx):
                if len(idx)==0:
                    cluster_cen.append(self.centeroids[i])
                else:
                    cluster_cen.append(np.mean(x[idx],axis=0)[0])
            if np.max(self.centeroids - np.array(cluster_cen))<0.0001:
                break
            else:
                self.centeroids=np.array(cluster_cen)
        return y

Passing the Values to the Functions

random_pts=np.random.randint(0,100,(100,2))
kmean=Kmeans(k=3)
labels=kmean.fit(random_pts)
plt.scatter(random_pts[:,0],random_pts[:,1],c=labels)
plt.scatter(kmean.centeroids[:,0],kmean.centeroids[:,1],c=range(len(kmean.centeroids)),marker="*",s=200)
plt.show()

Here the first scatter is for the random data with class

and the second one is for the centeroids clustered with the random points

And the below is the output for it.