Пример #1
0
    def _init_params(self, x):
        """Initialize GMM parameters by K-means.

        :param x: (n_samples, n_features) features.
        :param n_components: the number of components.
        :return: Initialized GMM parameters:
            pi: (n_components,) mixing coefficients
            mean: (n_components, n_features) means
            cov: (n_components, n_features, n_features) covariances
        """
        n_samples, n_features = x.shape

        k_means = KMeans(self.n_components)
        assigned_indices = k_means.fit_predict(x)
        mean_init = k_means.centers

        pi_init = np.zeros(self.n_components)
        cov_init = np.zeros((self.n_components, n_features, n_features))
        for k in range(self.n_components):
            cond = assigned_indices == k
            d_k = x[cond] - mean_init[k]
            pi_init[k] = np.sum(cond) / n_samples
            cov_init[k] = np.dot(d_k.T, d_k) / np.sum(cond)

        return pi_init, mean_init, cov_init
Пример #2
0
def main():
    iris_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/' \
               'iris/iris.data'

    x_col_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
    y_col_name = 'label'
    iris_df = pd.read_csv(iris_url, names=x_col_names + [y_col_name])

    x_data = np.array(iris_df[x_col_names])

    # perform k-means clustering
    k_means = KMeans(n_centers=3, init='k-means++',
                     random_state=np.random.RandomState(0))
    y_pred = k_means.fit_predict(x_data)
    centers = k_means.centers

    # plot
    plot_colors = ['r', 'g', 'b']
    for ci in range(k_means.n_centers):
        plt.scatter(x_data[y_pred == ci, 0], x_data[y_pred == ci, 1],
                    c=plot_colors[ci])

    plt.scatter(centers[:, 0], centers[:, 1], c='y', label='centers')

    plt.title('k-means example on the iris dataset')
    plt.xlabel(x_col_names[0])
    plt.ylabel(x_col_names[1])
    plt.legend()
    plt.show()
Пример #3
0
    def _init_params(self, x):
        """Initialize GMM parameters by K-means.

        :param x: (n_samples, n_features) features.
        :param n_components: the number of components.
        :return: Initialized GMM parameters:
            pi: (n_components,) mixing coefficients
            mean: (n_components, n_features) means
            cov: (n_components, n_features, n_features) covariances
        """
        n_samples, n_features = x.shape

        k_means = KMeans(self.n_components)
        assigned_indices = k_means.fit_predict(x)
        mean_init = k_means.centers

        pi_init = np.zeros(self.n_components)
        cov_init = np.zeros((self.n_components, n_features, n_features))
        for k in range(self.n_components):
            cond = assigned_indices == k
            d_k = x[cond] - mean_init[k]
            pi_init[k] = np.sum(cond) / n_samples
            cov_init[k] = np.dot(d_k.T, d_k) / np.sum(cond)

        return pi_init, mean_init, cov_init