1from sklearn.cluster import KMeans
2df = np.array([[1,4],[2,2],[2,5],[3,3],[3,4],[4,7],[5,6],[6,4],[6,7],[7,6],[7,9],[8,7],[8,9],[9,4],[9,8]])
3kmeans = KMeans(n_clusters=3, init='k-means++', max_iter=300, n_init=10)
4y_pred = kmeans.fit_predict(df)
1from sklearn.cluster import KMeans
2kmeans = KMeans(init="random", n_clusters=3, n_init=10, max_iter=300, random_state=42 )
3kmeans.fit(x_train) #Replace your training dataset instead of x_train
4# The lowest SSE value
5print(kmeans.inertia_)
6# Final locations of the centroid
7print(kmeans.cluster_centers_)
8# The number of iterations required to converge
9print(kmeans.n_iter_)
10# first five predicted labels
11print(kmeans.labels_[:5])
12
13
14# init controls the initialization technique. The standard version of the k-means algorithm is implemented by setting init to "random". Setting this to "k-means++" employs an advanced trick to speed up convergence, which you’ll use later.
15
16# n_clusters sets k for the clustering step. This is the most important parameter for k-means.
17
18# n_init sets the number of initializations to perform. This is important because two runs can converge on different cluster assignments. The default behavior for the scikit-learn algorithm is to perform ten k-means runs and return the results of the one with the lowest SSE.
19
20# max_iter sets the number of maximum iterations for each initialization of the k-means algorithm.
1import pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsimport numpy as np# read data into variable Iris_dataIris_data = pd.read_csv("D:\ProjectData\Iris.csv")#display first few rows of dataIris_data.head()