"Euclidean"Distance
where i is the dimension , as we are doing in 2 Dimension so i=2 A is Centeroids while B is the Data_points
$$\sum\sqrt{Ai^2+Bi^2}$$
import numpy as np # importing this for arr modifications
import matplotlib.pyplot as plt # Importing this to see
class Kmeans:
def __init__(self,k=3):
self.k=k
self.centeroids=None
@staticmethod
def Eucid_dis(data_point,centeroids):
return np.sqrt(np.sum((centeroids-data_point)**2 ,axis=1))
def fit(self,x,maxItr=200):
self.centeroids=np.random.uniform(np.amin(x,axis=0),np.amax(x,axis=0),size=(self.k,x.shape[1]))
for _ in range(maxItr):
y=[]
for data_point in x:
distances=Kmeans.Eucid_dis(data_point,self.centeroids)
cluster_num=np.argmin(distances)
y.append(cluster_num)
y=np.array(y)
cluster_idx=[]
for i in range(self.k):
cluster_idx.append(np.argwhere(y==i))
cluster_cen=[]
for i,idx in enumerate(cluster_idx):
if len(idx)==0:
cluster_cen.append(self.centeroids[i])
else:
cluster_cen.append(np.mean(x[idx],axis=0)[0])
if np.max(self.centeroids - np.array(cluster_cen))<0.0001:
break
else:
self.centeroids=np.array(cluster_cen)
return y
Passing the Values to the Functions
random_pts=np.random.randint(0,100,(100,2))
kmean=Kmeans(k=3)
labels=kmean.fit(random_pts)
plt.scatter(random_pts[:,0],random_pts[:,1],c=labels)
plt.scatter(kmean.centeroids[:,0],kmean.centeroids[:,1],c=range(len(kmean.centeroids)),marker="*",s=200)
plt.show()
Here the first scatter is for the random data with class
and the second one is for the centeroids clustered with the random points
And the below is the output for it.