def test(file, max_n_components, n_classes): print('GaussianMixture for set: ' + file) dataset = utils.dataset_reader(file) X, y = utils.data_process(dataset) list_sse = [] list_nmi = [] for n_components in range(1, max_n_components + 1): gmm = GaussianMixture(n_components=n_components) gmm.fit(X) y_hat = gmm.predict(X) sse = utils.sum_of_squared_errors(X, y_hat, gmm.means_) nmi = utils.normalized_mutual_information(y, n_classes, y_hat, n_components) print('{0:2d} components, SSE: {1:.2f}, NMI: {2:.4f}'.format( n_components, sse, nmi)) # print('iterations: ', gmm.n_iter_) # print(gmm.means_, gmm.covariances_, gmm.weights_) # print(gmm.lower_bound_) list_sse.append(sse) list_nmi.append(nmi) utils.plot_measure_vs_k('SSE', list_sse, range(1, max_n_components + 1)) utils.plot_measure_vs_k('NMI', list_nmi, range(1, max_n_components + 1))
def update_server_model(self): # The model must be regenerated with the new average parameters. It cannot simply be updated (it might be initialized again with wrong parameters) self.model = GaussianMixture( X=self.init_dataset, n_components=self.args.components, random_state=self.random_state, is_quiet=True, init_params=self.args.init, weights_init=self.avg_clients_weights, means_init=self.avg_clients_means, precisions_init=self.avg_clients_precisions) return
def testPredictClasses(self): """ Assert that torch.FloatTensor is handled correctly. """ x = torch.randn(4, 2) n_components = np.random.randint(1, 100) model = GaussianMixture(n_components, x.size(1)) model.fit(x) y = model.predict(x) # check that dimensionality of class memberships is (n) self.assertEqual(torch.Tensor(x.size(0)).size(), y.size())
def testPredictProbabilities(self): """ Assert that torch.FloatTensor is handled correctly when returning class probabilities. """ x = torch.randn(4, 2) n_components = np.random.randint(1, 100) model = GaussianMixture(n_components, x.size(1)) model.fit(x) # check that y_p has dimensions (n, k) y_p = model.predict(x, probs=True) self.assertEqual( torch.Tensor(x.size(0), n_components).size(), y_p.size())
def __init__(self, args, init_dataset, clients, output_dir): self.random_state = None if args.seed: self.random_state = (int(args.seed)) self.model = GaussianMixture(X=init_dataset, n_components=args.components, random_state=self.random_state, is_quiet=True, init_params=args.init) self.init_dataset = init_dataset self.args = args self.rounds = args.rounds self.clients = clients self.fraction_clients = float(args.C) self.n_clients = int(args.K) self.n_clients_round = int(self.fraction_clients * self.n_clients) self.selected_clients = {} self.output_dir = output_dir self.metrics_history = {'aic': [], 'bic': [], 'll': []}
def main(): n, d = 300, 2 # generate some data points .. data = torch.Tensor(n, d).normal_() # .. and shift them around to non-standard Gaussians data[:n//2] -= 1 data[:n//2] *= sqrt(3) data[n//2:] += 1 data[n//2:] *= sqrt(2) # Next, the Gaussian mixture is instantiated and .. n_components = 2 model = GaussianMixture(n_components, d) model.fit(data) # .. used to predict the data points as they where shifted y = model.predict(data) plot(data, y)
def testEmMatchesSkLearn(self): """ Assert that log-probabilities (E-step) and parameter updates (M-step) approximately match those of sklearn. """ d = 20 n_components = np.random.randint(1, 100) # (n, k, d) x = torch.randn(40, 1, d) # (n, d) x_np = np.squeeze(x.data.numpy()) var_init = torch.ones(1, n_components, d) - .4 model = GaussianMixture(n_components, d, var_init=var_init) model_sk = sklearn.mixture.GaussianMixture( n_components, covariance_type="diag", init_params="random", means_init=np.squeeze(model.mu.data.numpy()), precisions_init=np.squeeze(1. / np.sqrt(var_init.data.numpy()))) model_sk._initialize_parameters(x_np, np.random.RandomState()) log_prob_sk = model_sk._estimate_log_prob(x_np) log_prob = model._estimate_log_prob(x) # Test whether log-probabilities are approximately equal np.testing.assert_almost_equal(np.squeeze(log_prob.data.numpy()), log_prob_sk, decimal=2, verbose=True) _, log_resp_sk = model_sk._e_step(x_np) _, log_resp = model._e_step(x) # Test whether E-steps are approximately equal np.testing.assert_almost_equal(np.squeeze(log_resp.data.numpy()), log_resp_sk, decimal=0, verbose=True) model_sk._m_step(x_np, log_prob_sk) pi_sk = model_sk.weights_ mu_sk = model_sk.means_ var_sk = model_sk.means_ pi, mu, var = model._m_step(x, log_prob) # Test whether pi .. np.testing.assert_almost_equal(np.squeeze(pi.data.numpy()), pi_sk, decimal=1, verbose=True) # .. mu .. np.testing.assert_almost_equal(np.squeeze(mu.data.numpy()), mu_sk, decimal=1, verbose=True) # .. and var are approximately equal np.testing.assert_almost_equal(np.squeeze(var.data.numpy()), var_sk, decimal=1, verbose=True)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Apr 14 19:53:43 2018 @author: Garrett """ from kmeans import KMeans #from sklearn.cluster import KMeans from gmm import GaussianMixture import numpy as np X = np.array([[2, 2], [3, 4], [1, 0], [101, 2], [102, 4], [100, 0]]) kmeans = KMeans(n_clusters=2).fit(X) #print(kmeans.labels_) #print(kmeans.predict(np.array([[0, 0], [4, 4]]))) #print(kmeans.cluster_centers_) gmm = GaussianMixture(n_components=2).fit(X) print('gmm predict ', gmm.predict(X)) #print(gmm.predict(np.array([[0, 0], [4, 4]]))) print('gmm.means_ ', gmm.means_) print('gmm.covariances_ ', gmm.covariances_) print('gmm.n_iter', gmm.n_iter_)
train_dataset, train_dataset_labels, _ = get_dataset(args) print_configuration(args, train_dataset, False) save_configuration(args, train_dataset, output_dir, False) # Init the Gaussian Mixture Model seed = None if args.seed: seed = (int(args.seed)) # Prepare server --> init_dataset is given by 0.5% of the train_dataset randomly sampled # init_dataset_size = int(train_dataset.shape[0] * 0.005) # init_dataset = train_dataset[np.random.choice(train_dataset.shape[0], init_dataset_size, replace=False)] init_dataset = train_dataset model = GaussianMixture(X=init_dataset, n_components=args.components, random_state=seed, init_params=args.init) init_metrics = { 'aic': model.aic(train_dataset), 'bic': model.bic(train_dataset), 'll': model.score(train_dataset) } model.fit(train_dataset, args.epochs, train_dataset_labels, args, output_dir) predicted_labels = model.predict_proba(train_dataset).tolist() predicted_labels = np.array(predicted_labels) print('\nSaving images...')
def gmm(opt): return GaussianMixture(opt.GMM_NUM_COMPONENTS)