def test_kmeanspp_initialization(): random_state = check_random_state(1) n_samples = 300 n_features = 2 X = np.ndarray((n_samples, n_features)) X[:n_samples // 3, :] = random_state.multivariate_normal( [0.0, 1.0], [[0.5, -1.0], [-1.0, 5.0]], size=(n_samples // 3, )) X[n_samples // 3:-n_samples // 3, :] = random_state.multivariate_normal( [-2.0, -2.0], [[3.0, 1.0], [1.0, 1.0]], size=(n_samples // 3, )) X[-n_samples // 3:, :] = random_state.multivariate_normal( [3.0, 1.0], [[3.0, -1.0], [-1.0, 1.0]], size=(n_samples // 3, )) # artificial scaling, makes standard implementation fail # either the initial covariances have to be adjusted or we have # to normalize the dataset X[:, 1] *= 10000.0 gmm = GMM(n_components=3, random_state=random_state) gmm.from_samples(X, init_params="random") ellipses = gmm.to_ellipses() widths = np.array([ellipsis_params[1] for _, ellipsis_params in ellipses])[:, np.newaxis] average_widths_random = np.mean(pdist(widths)) gmm = GMM(n_components=3, random_state=random_state) gmm.from_samples(X, init_params="kmeans++") ellipses = gmm.to_ellipses() widths = np.array([ellipsis_params[1] for _, ellipsis_params in ellipses])[:, np.newaxis] average_widths_kmeanspp = np.mean(pdist(widths)) # random initialization produces uneven covariance scaling assert_less(average_widths_kmeanspp, average_widths_random)
def test_ellipses(): """Test equiprobable ellipses.""" random_state = check_random_state(0) means = np.array([[0.0, 1.0], [2.0, -1.0]]) covariances = np.array([[[0.5, 0.0], [0.0, 5.0]], [[5.0, 0.0], [0.0, 0.5]]]) gmm = GMM(n_components=2, priors=np.array([0.5, 0.5]), means=means, covariances=covariances, random_state=random_state) ellipses = gmm.to_ellipses() mean, (angle, width, height) = ellipses[0] assert_array_almost_equal(means[0], mean) assert_equal(angle, 0.5 * np.pi) assert_equal(width, np.sqrt(5.0)) assert_equal(height, np.sqrt(0.5)) mean, (angle, width, height) = ellipses[1] assert_array_almost_equal(means[1], mean) assert_equal(angle, -np.pi) assert_equal(width, np.sqrt(5.0)) assert_equal(height, np.sqrt(0.5))
def test_ellipses(): """Test equiprobable ellipses.""" random_state = check_random_state(0) means = np.array([[0.0, 1.0], [2.0, -1.0]]) covariances = np.array([[[0.5, 0.0], [0.0, 5.0]], [[5.0, 0.0], [0.0, 0.5]]]) gmm = GMM(n_components=2, priors=np.array([0.5, 0.5]), means=means, covariances=covariances, random_state=random_state) ellipses = gmm.to_ellipses() mean, (angle, width, height) = ellipses[0] assert_array_almost_equal(means[0], mean) assert_equal(angle, 0.5 * np.pi) assert_equal(width, np.sqrt(5.0)) assert_equal(height, np.sqrt(0.5)) mean, (angle, width, height) = ellipses[1] assert_array_almost_equal(means[1], mean) assert_equal(angle, -np.pi) assert_equal(width, np.sqrt(5.0)) assert_equal(height, np.sqrt(0.5))
plt.plot(means_over_time[:, 0], means_over_time[:, 1], c="r", lw=2) plt.fill_between(means_over_time[:, 0], means_over_time[:, 1] - 1.96 * y_stds, means_over_time[:, 1] + 1.96 * y_stds, color="r", alpha=0.5) if plot_covariances: colors = cycle(["r", "g", "b"]) for factor in np.linspace(0.5, 4.0, 8): new_gmm = GMM(n_components=len(gmm.means), priors=gmm.priors, means=gmm.means[:, 1:], covariances=gmm.covariances[:, 1:, 1:], random_state=gmm.random_state) for mean, (angle, width, height) in new_gmm.to_ellipses(factor): ell = Ellipse(xy=mean, width=width, height=height, angle=np.degrees(angle)) ell.set_alpha(0.15) ell.set_color(next(colors)) plt.gca().add_artist(ell) plt.xlabel("$x_1$") plt.ylabel("$x_2$") plt.subplot(122) plt.title("Confidence Interval from Raw Data") plt.plot(X[:, :, 0].T, X[:, :, 1].T, c="k", alpha=0.1)