k_means#

KMeans using scikit-learn according to https://www.youtube.com/watch?v=i-gxm_ofjBo.

  • data
  • k means
  • sse
  • silhouette score
12 import matplotlib.pyplot as plt
13 import numpy as np
14
15 from EasyFEA import Display
16
17 from sklearn.cluster import KMeans
18 from sklearn.datasets import make_blobs
19 from sklearn.metrics import silhouette_score
20
21 if __name__ == "__main__":
22     # https://www.youtube.com/watch?v=i-gxm_ofjBo
23
24     Display.Clear()
25
26     N = 1000
27
28     K = 5
29
30     data = make_blobs(N, 2, centers=5, cluster_std=1, random_state=101)[0]
31
32     kmeans = KMeans(K, n_init="auto")
33
34     clusters = kmeans.fit_predict(data)
35
36     ax = Display.Init_Axes()
37     ax.plot(*data.T, "bo")
38     ax.set_title("data")
39
40     ax_c = Display.Init_Axes()
41     for k in range(K):
42         idx = np.where(clusters == k)
43         ax_c.plot(*data[idx].T, "o")
44         ax_c.plot(*np.mean(data[idx], 0), ls="", marker="+", c="black", zorder=10)
45
46     sil_score = []
47     sse = []
48
49     array_k = np.arange(2, 20)
50
51     for k in array_k:
52         kmeans = KMeans(k, n_init="auto")
53
54         clusters = kmeans.fit_predict(data)
55
56         sse.append(kmeans.inertia_)
57         sil_score.append(silhouette_score(data, clusters))
58
59     a_sse = Display.Init_Axes()
60     a_sse.plot(array_k, sse)
61     a_sse.set_title("sse")
62
63     a_sil = Display.Init_Axes()
64     a_sil.plot(array_k, sil_score)
65     a_sil.set_title("silhouette score")
66
67     plt.show()

Total running time of the script: (0 minutes 1.589 seconds)

Gallery generated by Sphinx-Gallery