def _init_params(self, x): """Initialize GMM parameters by K-means. :param x: (n_samples, n_features) features. :param n_components: the number of components. :return: Initialized GMM parameters: pi: (n_components,) mixing coefficients mean: (n_components, n_features) means cov: (n_components, n_features, n_features) covariances """ n_samples, n_features = x.shape k_means = KMeans(self.n_components) assigned_indices = k_means.fit_predict(x) mean_init = k_means.centers pi_init = np.zeros(self.n_components) cov_init = np.zeros((self.n_components, n_features, n_features)) for k in range(self.n_components): cond = assigned_indices == k d_k = x[cond] - mean_init[k] pi_init[k] = np.sum(cond) / n_samples cov_init[k] = np.dot(d_k.T, d_k) / np.sum(cond) return pi_init, mean_init, cov_init
def main(): iris_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/' \ 'iris/iris.data' x_col_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width'] y_col_name = 'label' iris_df = pd.read_csv(iris_url, names=x_col_names + [y_col_name]) x_data = np.array(iris_df[x_col_names]) # perform k-means clustering k_means = KMeans(n_centers=3, init='k-means++', random_state=np.random.RandomState(0)) y_pred = k_means.fit_predict(x_data) centers = k_means.centers # plot plot_colors = ['r', 'g', 'b'] for ci in range(k_means.n_centers): plt.scatter(x_data[y_pred == ci, 0], x_data[y_pred == ci, 1], c=plot_colors[ci]) plt.scatter(centers[:, 0], centers[:, 1], c='y', label='centers') plt.title('k-means example on the iris dataset') plt.xlabel(x_col_names[0]) plt.ylabel(x_col_names[1]) plt.legend() plt.show()