def plot(): fig, axes = plt.subplots(4, 4) fig.set_size_inches(10, 10) for i, ax in enumerate(axes.flatten()): n_ex = 150 n_in = 2 n_classes = np.random.randint(2, 4) X, y = make_blobs( n_samples=n_ex, centers=n_classes, n_features=n_in, random_state=i ) X -= X.mean(axis=0) # take best fit over 10 runs best_elbo = -np.inf for k in range(10): _G = GMM(C=n_classes, seed=i * 3) ret = _G.fit(X, max_iter=100, verbose=False) while ret != 0: print("Components collapsed; Refitting") ret = _G.fit(X, max_iter=100, verbose=False) if _G.best_elbo > best_elbo: best_elbo = _G.best_elbo G = _G ax = plot_clusters(G, X, ax) ax.xaxis.set_ticklabels([]) ax.yaxis.set_ticklabels([]) ax.set_title("# Classes: {}; Final VLB: {:.2f}".format(n_classes, G.best_elbo)) plt.tight_layout() plt.savefig("img/plot.png", dpi=300) plt.close("all")
def __init__(self, n_components=1, n_mix=1, startprob=None, transmat=None, startprob_prior=None, transmat_prior=None, gmms=None, cvtype=None, var=3): """Create a hidden Markov model with GMM emissions. Parameters ---------- n_components : int Number of states. """ super(GMMHMM, self).__init__(n_components, startprob, transmat, startprob_prior=startprob_prior, transmat_prior=transmat_prior) # XXX: Hotfit for n_mix that is incompatible with the scikit's # BaseEstimator API self.n_mix = n_mix self.cvtype = cvtype self.var = var if gmms is None: gmms = [] for x in xrange(self.n_components): if cvtype is None: g = GMM(n_mix) else: g = GMM(n_mix, cvtype=cvtype) gmms.append(g) self.gmms = gmms
def _accumulate_sufficient_statistics(self, stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice, params): super(GMMHMM, self)._accumulate_sufficient_statistics( stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice, params) for state, g in enumerate(self.gmms): lgmm_posteriors = np.log(g.eval(obs)[1] + np.finfo(np.float).eps) lgmm_posteriors += np.log(posteriors[:, state][:, np.newaxis] + np.finfo(np.float).eps) gmm_posteriors = np.exp(lgmm_posteriors) tmp_gmm = GMM(g.n_components, cvtype=g.cvtype) tmp_gmm.n_features = g.n_features tmp_gmm.covars = _distribute_covar_matrix_to_match_cvtype( np.eye(g.n_features), g.cvtype, g.n_components) norm = tmp_gmm._do_mstep(obs, gmm_posteriors, params) if np.any(np.isnan(tmp_gmm.covars)): raise ValueError stats['norm'][state] += norm if 'm' in params: stats['means'][state] += tmp_gmm.means * norm[:, np.newaxis] if 'c' in params: if tmp_gmm.cvtype == 'tied': stats['covars'][state] += tmp_gmm._covars * norm.sum() else: cvnorm = np.copy(norm) shape = np.ones(tmp_gmm._covars.ndim) shape[0] = np.shape(tmp_gmm._covars)[0] cvnorm.shape = shape stats['covars'][state] += tmp_gmm._covars * cvnorm
def test_forward_gmm(): batch_size = 10 dimension_embedding = 7 num_mixtures = 2 gmm = GMM(num_mixtures, dimension_embedding) latent_vectors = torch.randn(batch_size, dimension_embedding) gmm.train() out = gmm(latent_vectors) print(out)
def main(): n_components = 2 x = _get_faithful_data() _, n_features = x.shape pi_init = np.random.uniform(size=n_components) pi_init = pi_init / np.sum(pi_init) mean_init = np.random.randn(n_components, n_features) cov_init = np.stack([np.random.uniform() * np.eye(n_features), np.random.uniform() * np.eye(n_features)]) gmm = GMM(n_components, pi_init=pi_init, mean_init=mean_init, cov_init=cov_init) history = gmm.fit(x).history save_history_as_video_file(x, n_components, history, 'gmm_em.mp4')
def __init__(self, *, rho, mixtures, DecoderType, **kwargs): super().__init__(DecoderType=DecoderType, **kwargs) self.rho = rho self.phi = Phi(0.0, float('inf')) self.mixtures = mixtures self.MM = GMM(mixtures, self.z_size) self.add_tail('MM', Identity(), self._MMLoss, remap=False) self._mirror = DecoderType(bottle_size=self.h_size, data_shape=self.data_shape) self.add_cond('disc', self._discriminator_cond_z) self.add_tail('disc', self._mirror, self._discriminator_loss)
def get_item_pdf(data, ncomps): data = npa(data) #print(data) #pl.scatter(data[:,0],data[:,1]) gmm = GMM(dim=2, ncomps=ncomps, data=data, method="kmeans") gmm.em(data, nsteps=100) """ for item in data: #gmm.pdf(j) p=gmm.pdf(item) print("P(y|θ):",gmm.pdf(item)) """ return gmm
def __init__(self, k=1, dim=1, means=None, precisions=None, weights=None, shrinkage=None, dof=None): """ Initialize the structure, at least with the dimensions of the problem At most, with what is necessary to compute the likelihood of a point under the model """ GMM.__init__(self, k, dim, 'full', means, precisions, weights) self.shrinkage = shrinkage self.dof = dof if self.shrinkage==None: self.shrinkage = np.ones(self.k) if self.dof==None: self.dof = np.ones(self.k)
def test_implementation(self): centers_ = np.array([[3, 3], [-3, -3]]) pos_list, ground_truth = datasets.make_blobs(n_samples=100, centers=centers_, cluster_std=1, random_state=0) np.random.seed(2020) gmm = GMM(n_components=2) gmm.fit(pos_list) gmm_standard = mixture.GaussianMixture(n_components=2) gmm_standard.fit(pos_list) self.assertTrue(np.linalg.norm(gmm.means_ - centers_) < 0.4) self.assertAlmostEqual(gmm_standard.lower_bound_, gmm.lower_bound_) assert_array_almost_equal(gmm.weights_, [0.5, 0.5]) covariances_groud_truth = np.zeros([2, 2, 2]) covariances_groud_truth[0, :, :] = np.eye(2) covariances_groud_truth[1, :, :] = np.eye(2) self.assertTrue(np.linalg.norm(covariances_groud_truth - gmm.covariances_) < 0.2)
def condition(self, indices, x): """Conditional distribution over given indices. Parameters ---------- indices : array, shape (n_new_features,) Indices of dimensions that we want to condition. x : array, shape (n_new_features,) Values of the features that we know. Returns ------- conditional : GMM Conditional GMM distribution p(Y | X=x). """ n_features = self.n_dim - len(indices) priors = np.empty(self.n_components) means = np.empty((self.n_components, n_features)) covariances = np.empty((self.n_components, n_features, n_features)) for k in range(self.n_components): mvn = MVN(mean=self.means[k], covariance=self.covariance[k], random_state=self.random_state) conditioned = mvn.condition(indices, x) priors[k] = (self.alpha[k] * mvn.marginalize(indices).to_probability_density(x)) means[k] = conditioned.mean covariances[k] = conditioned.covariance priors /= priors.sum() return GMM(n_components=self.n_components, priors=priors, means=means, covariances=covariances, random_state=self.random_state)
def __init__(self, prec, cap=32): """Initialise with the precision matrix to use for the kernels, which implicitly provides the number of dimensions, and the cap on the number of kernels to allow.""" self.prec = numpy.asarray(prec, dtype=numpy.float32) self.gmm = GMM(prec.shape[0], cap) # Current mixture model. self.count = 0 # Number of samples provided so far. self.merge = numpy.empty( (cap, cap), dtype=numpy.float32 ) # [i,j]; cost of merging two entrys, only valid when j<i, other values set high to avoid issues. self.merge[:, :] = 1e64 # For holding the temporary merge costs calculated when adding a sample... self.mergeT = numpy.empty(cap, dtype=numpy.float32) # For the C code... self.temp = numpy.empty((2, prec.shape[0], prec.shape[0]), dtype=numpy.float32)
def check(self): """ Checking the shape of sifferent matrices involved in the model """ GMM.check(self) if self.prior_means.shape[0] != self.k: raise ValueError("Incorrect dimension for self.prior_means") if self.prior_means.shape[1] != self.dim: raise ValueError("Incorrect dimension for self.prior_means") if self.prior_scale.shape[0] != self.k: raise ValueError("Incorrect dimension for self.prior_scale") if self.prior_scale.shape[1] != self.dim: raise ValueError("Incorrect dimension for self.prior_scale") if self.prior_dof.shape[0] != self.k: raise ValueError("Incorrect dimension for self.prior_dof") if self.prior_weights.shape[0] != self.k: raise ValueError("Incorrect dimension for self.prior_weights")
def __init__(self): compressor = CompressionNetworkArrhythmia() estimator = EstimationNetworkArrhythmia() # TODO: the GMM module should be part of the estimator....? gmm = GMM(num_mixtures=2, dimension_embedding=4) super().__init__(compression_module=compressor, estimation_module=estimator, gmm_module=gmm)
def __init__(self, k=1, dim=1, means=None, precisions=None, weights=None, shrinkage=None, dof=None): """ Initialize the structure with the dimensions of the problem Eventually provide different terms """ GMM.__init__(self, k, dim, 'full', means, precisions, weights) self.shrinkage = shrinkage self.dof = dof if self.shrinkage == None: self.shrinkage = np.ones(self.k) if self.dof == None: self.dof = np.ones(self.k) if self.precisions != None: self._detp = [detsh(self.precisions[k]) for k in range(self.k)]
def __init__(self, N, A, GMM_kwargs, pi, state_map=None, observation_map=None): self.N = N # kinds of states, int # self.M = M # kinds of observed results, int self.A = np.array(A) # state transferring probability, N * N matrix self.B = [GMM(**GMM_kwargs) for _ in range(self.N - 1)] # pdf of observed results, for each state self.pi = pi # pdf of init state, len N list self.K = self.B[0].K self.feats_lists = [[] for _ in range(N - 1)] self.state_transfer_historys = [] if state_map is None: self.state_map = ['s' + str(i) for i in range(1, N + 1)]
def initialization(img, bbox, num_components=5, debug=False): xmin, ymin, xmax, ymax = bbox height, width, _ = img.shape alpha = np.zeros((height, width), dtype=np.int8) for h in xrange(height): # Rows for w in xrange(width): # Columns if (w >= xmin) and (w <= xmax) and (h >= ymin) and (h <= ymax): # Foreground alpha[h, w] = 1 foreground_gmm = GMM(num_components) background_gmm = GMM(num_components) fg_clusters = foreground_gmm.initialize_gmm(img[alpha == 1]) bg_clusters = background_gmm.initialize_gmm(img[alpha == 0]) if debug: k = np.ones(alpha.shape, dtype=int) * -1 k[alpha == 1] = fg_clusters[:] k[alpha == 0] = bg_clusters[:] visualize_clusters(img.shape, k, alpha) plt.imshow(alpha * 265) plt.show() for i in xrange(alpha.shape[0]): for j in xrange(alpha.shape[1]): print alpha[i, j], print '' return alpha, foreground_gmm, background_gmm
def test_tom(opt, test_loader, model, board): print('----Testing of module {} started----'.format(opt.name)) model.to(device) model.eval() unet_mask = UnetGenerator(25, 20, ngf=64) load_checkpoint(unet_mask, os.path.join(opt.checkpoint_dir, 'SEG', 'segm_final.pth')) unet_mask.to(device) unet_mask.eval() gmm = GMM(opt) load_checkpoint(gmm, os.path.join(opt.checkpoint_dir, 'GMM', 'gmm_final.pth')) gmm.to(device) gmm.eval() length = len(test_loader.data_loader) step = 0 pbar = tqdm(total=length) inputs = test_loader.next_batch() while inputs is not None: im_name = inputs['im_name'] im_h = inputs['head'].to(device) im = inputs['image'].to(device) agnostic = inputs['agnostic'].to(device) c = inputs['cloth'].to(device) # c_warp = inputs['cloth_warp'].to(device) im_c = inputs['parse_cloth'].to(device) im_c_mask = inputs['parse_cloth_mask'].to(device) im_ttp = inputs['texture_t_prior'].to(device) with torch.no_grad(): output_segm = unet_mask(torch.cat([agnostic, c], 1)) grid_zero, theta, grid_one, delta_theta = gmm(agnostic, c) c_warp = F.grid_sample(c, grid_one, padding_mode='border') output_segm = F.log_softmax(output_segm, dim=1) output_argm = torch.max(output_segm, dim=1, keepdim=True)[1] final_segm = torch.zeros(output_segm.shape).to(device).scatter( 1, output_argm, 1.0) input_tom = torch.cat([final_segm, c_warp, im_ttp], 1) with torch.no_grad(): output_tom = model(input_tom) person_r = torch.tanh(output_tom[:, :3, :, :]) mask_c = torch.sigmoid(output_tom[:, 3:, :, :]) mask_c = (mask_c >= 0.5).type(torch.float) img_tryon = mask_c * c_warp + (1 - mask_c) * person_r visuals = [[im, c, img_tryon], [im_c, c_warp, person_r], [im_c_mask, mask_c, im_h]] board_add_images(board, 'combine', visuals, step + 1) save_images(img_tryon, im_name, osp.join(opt.dataroot, opt.datamode, 'final-output')) inputs = test_loader.next_batch() step += 1 pbar.update(1)
def __call__(self, points, K, max_iters=100, abs_tol=1e-16, rel_tol=1e-16, **kwargs): return GMM()(points, K, max_iters=100, abs_tol=1e-16, rel_tol=1e-16, **kwargs)[1]
def __init__(self, prec, cap = 32): """Initialise with the precision matrix to use for the kernels, which implicitly provides the number of dimensions, and the cap on the number of kernels to allow.""" self.prec = numpy.asarray(prec, dtype=numpy.float32) self.gmm = GMM(prec.shape[0], cap) # Current mixture model. self.count = 0 # Number of samples provided so far. self.merge = numpy.empty((cap,cap), dtype=numpy.float32) # [i,j]; cost of merging two entrys, only valid when j<i, other values set high to avoid issues. self.merge[:,:] = 1e64 # For holding the temporary merge costs calculated when adding a sample... self.mergeT = numpy.empty(cap, dtype=numpy.float32) # For the C code... self.temp = numpy.empty((2, prec.shape[0], prec.shape[0]), dtype=numpy.float32)
def pi_N(N, pi, mean_1, cov_1, mean_2, cov_2, steps): gmm = GMM(N, pi, mean_1, cov_1, mean_2, cov_2) pi_steps = np.zeros(shape=steps) # 均值 pi_mu = 0 # 方差 pi_sigma = 0 # 计算steps次估计值和均值 for i in range(steps): D = gmm.dataset() pi_learn, _, _, _, _ = gmm.EM(D, N) pi_mu += pi_learn pi_steps[i] = pi_learn pi_mu /= steps # 计算steps次方差 for i in range(steps): pi_sigma += (pi_steps[i] - pi_mu)**2 pi_sigma /= steps return pi_mu, pi_sigma
class DAGMM2(nn.Module): def __init__(self): super().__init__() self.gmm = GMM(num_mixtures=3, dimension_embedding=4) self.estimator = EstimationNetworkArrhythmia() def forward(self, encode, AE, points): relative_ed = relative_euclidean_distance(AE, points) cosine_sim = cosine_similarity(AE, points) # Adding a dimension to prepare for concatenation. relative_ed = relative_ed.view(-1, 1) cosine_sim = cosine_sim.view(-1, 1) latent_vectors = torch.cat([encode, relative_ed, cosine_sim], dim=1) # latent_vectors has shape [batch_size, dim_embedding + 2] # Updating the parameters of the mixture. if self.training: mixtures_affiliations = self.estimator(latent_vectors) # mixtures_affiliations has shape [batch_size, num_mixtures] self.gmm._update_mixtures_parameters(latent_vectors, mixtures_affiliations) # Estimating the energy of the samples. return self.gmm(latent_vectors)
def main(): yml_path = sys.argv[1] with open(yml_path) as f: config = yaml.load(f) inlier_classes = config['train_data_params']['labels'] outlier_classes = [i for i in range(10) if i not in inlier_classes] x, y = load_data('test', normalization='tanh', with_label=True) y = np.array(y) x = tf.constant(x, dtype=tf.float32) autoencoder = AutoEncoder(**config['autoencoder_params']) estimation_network = EstimationNetwork(**config['estimator_params']) gmm = GMM(config['estimator_params']['dense_units'][-1], config['autoencoder_params']['latent_dim'] + 1) autoencoder.build(input_shape=(1, 32, 32, 1)) estimation_network.build( input_shape=(1, config['autoencoder_params']['latent_dim'] + 1)) gmm([ tf.random.normal((1, config['autoencoder_params']['latent_dim'] + 1)), tf.random.normal((1, config['estimator_params']['dense_units'][-1])) ]) # tf 2.1.0 doesn't accept # gmm.build(input_shape=[(1, config['autoencoder_params']['latent_dim']+1), # (1, config['estimator_params']['dense_units'][-1])]) dagmm = DAGMM(autoencoder, estimation_network, gmm) dagmm.load_weights( os.path.join(config['logdir'], 'model', 'dagmm_%d.h5' % config['test_epoch'])) outputs = dagmm(x, training=False) outputs = np.squeeze(np.asarray(outputs)) inlier_outputs = np.zeros(shape=(0, )) for c in inlier_classes: inlier_outputs = np.append(inlier_outputs, outputs[y == c]) df_inlier = pd.DataFrame({'energy': inlier_outputs, 'label': 'inlier'}) outlier_outputs = np.zeros(shape=(0, )) for c in outlier_classes: outlier_outputs = np.append(outlier_outputs, outputs[y == c]) df_outlier = pd.DataFrame({'energy': outlier_outputs, 'label': 'outlier'}) df = pd.concat([df_inlier, df_outlier], axis=0) df.to_csv(os.path.join(config['logdir'], 'outputs.csv'), index=None)
def posterior(self, X): """ The E-step of the EM algorithm. Returns the posterior probability p(y|X) Args: X (matrix, [n, d]): Data to compute posterior for. Returns: Matrix of size [n, K] """ P = np.zeros((X.shape[0], self.K)) for i in range(self.K): P[:,i] = GMM.prob(X, self.mu[i], self.Sigma[i]) return P / P.sum(axis=1, keepdims=True) # Normalize
def test_update_gmm(): batch_size = 5 dimension_embedding = 7 num_mixtures = 2 gmm = GMM(num_mixtures, dimension_embedding) latent_vectors = np.random.random([batch_size, dimension_embedding]) latent_vectors = convert_to_var(latent_vectors) affiliations = np.random.random([batch_size, num_mixtures]) affiliations = convert_to_var(affiliations) for param in gmm.parameters(): print(param) gmm.train() gmm._update_mixtures_parameters(latent_vectors, affiliations) for param in gmm.parameters(): print(param)
def test_update_gmm(): batch_size = 10 dimension_embedding = 7 num_mixtures = 2 gmm = GMM(num_mixtures, dimension_embedding) latent_vectors = torch.randn(batch_size, dimension_embedding) affiliations = torch.nn.functional.softmax(torch.rand( batch_size, num_mixtures), dim=1) print('----------parameters before update----------') for param in gmm.parameters(): print(param) gmm.train() gmm._update_mixtures_parameters(latent_vectors, affiliations) print('----------parameters after update----------') for param in gmm.parameters(): print(param)
def main(args): df = pd.read_csv(args.data) data = np.array(df[['X', 'Y']]) plt.clf() plt.scatter(data[:, 0], data[:, 1], s=3, color='blue') gmm = GMM(args.k) gmm.fit(data) mean = gmm.get_means() sigma = gmm.get_covariances() pi = gmm.get_pis() # Plot ellipses for each of covariance matrices. for k in range(len(sigma)): w, h, angle = get_ellipse_from_covariance(sigma[k]) e = patches.Ellipse(mean[k], w, h, angle=angle) e.set_alpha(np.power(pi[k], 0.1)) e.set_facecolor('red') plt.axes().add_artist(e) plt.savefig('covariances_{}_{}.jpg'.format(args.data, args.name)) plt.show()
def main(): opt = parser() test_dataset = SieveDataset(opt) # create dataloader test_loader = SieveDataLoader(opt, test_dataset) if opt.name == 'GMM': model = GMM(opt) # visualization if not os.path.exists( os.path.join(opt.tensorboard_dir, opt.name, opt.datamode)): os.makedirs( os.path.join(opt.tensorboard_dir, opt.name, opt.datamode)) board = SummaryWriter( log_dir=os.path.join(opt.tensorboard_dir, opt.name, opt.datamode)) checkpoint_path = osp.join(opt.checkpoint_dir, opt.name, 'gmm_final.pth') load_checkpoint(model, checkpoint_path) test_gmm(opt, test_loader, model, board) elif opt.name == 'TOM': model = UnetGenerator(26, 4, ngf=64) # visualization if not os.path.exists( os.path.join(opt.tensorboard_dir, opt.name, opt.datamode)): os.makedirs( os.path.join(opt.tensorboard_dir, opt.name, opt.datamode)) board = SummaryWriter( log_dir=os.path.join(opt.tensorboard_dir, opt.name, opt.datamode)) checkpoint_path = osp.join(opt.checkpoint_dir, opt.name, 'tom_final.pth') load_checkpoint(model, checkpoint_path) test_tom(opt, test_loader, model, board)
def main(): yml_path = sys.argv[1] with open(yml_path) as f: config = yaml.load(f) os.makedirs(config['logdir'], exist_ok=True) shutil.copy(yml_path, os.path.join(config['logdir'], 'config.yml')) x = load_specific_data(phase='train', **config['train_data_params']) autoencoder = AutoEncoder(**config['autoencoder_params']) estimation_network = EstimationNetwork(**config['estimator_params']) gmm = GMM(config['estimator_params']['dense_units'][-1], config['autoencoder_params']['latent_dim']+1) dagmm = DAGMM(autoencoder, estimation_network, gmm) solver = Solver(dagmm, **config['solver_params'], logdir=config['logdir']) solver.fit(x, **config['fit_params'])
def main(args): df = pd.read_csv(args.data) data = np.array(df[['X', 'Y']]) plt.clf() plt.scatter(data[:, 0], data[:, 1], s=3, color='blue') gmm = GMM(args.k) gmm.fit(data) mean = gmm.get_means() sigma = gmm.get_covariances() pi = gmm.get_pis() # Plot ellipses for each of covariance matrices. for k in range(len(sigma)): #print (sigma[k]) h, w, angle = get_ellipse_from_covariance(sigma[k]) e = patches.Ellipse(mean[k], w, h, angle=angle) e.set_alpha(np.power(pi[k], .3)) e.set_facecolor('red') plt.axes().add_artist(e) plt.savefig('edgar_gmm_sparse.jpg') #plt.show() winsound.Beep(2500, 1000)
def generate_3_clusters(): # generate 3 clusters c1 = sample(torch.Tensor([2.5, 2.5]), torch.Tensor([1.2, .8]), 500) c2 = sample(torch.Tensor([7.5, 7.5]), torch.Tensor([.75, .5]), 500) c3 = sample(torch.Tensor([8, 1.5]), torch.Tensor([.6, .8]), 1000) return torch.cat([c1, c2, c3]) if __name__ == '__main__': # generate data data = generate_3_clusters() # 3 components K = 3 # create model gm = GMM(data, K=3) # training iterations iterations = 50 # early stopping threshold thresh = 1e-6 loss_p = 100000. for i in range(iterations): # run a step loss_c = gm.step() print(f'[{i}] Loss : {loss_c}') # difference if torch.abs(loss_c - loss_p).item() < thresh: print('Early Stopping') break # keep track of previous
output = F.leaky_relu(self.fc2(F.leaky_relu(self.fc1(x)))) return output def energy(X): recon = netD(X) return torch.mean(torch.sum((X - recon)**2, 1)) args = get_args() n_samples = args.n_data n_epochs = args.n_epochs batch_size = args.batch_size m = 5 gmm = GMM(n_gaussians=2, dim=2, random_seed=22) sample_data = torch.Tensor(gmm.sample(n_samples)) dataloader_train = DataLoader(sample_data, batch_size=batch_size) noise = torch.rand(n_samples, 2) fixed_noise = torch.rand(n_samples, 2) netG = Generator(sample_data.numpy()) netD = Discrimator() if torch.cuda.is_available(): netG = netG.cuda() netD = netD.cuda() fixed_noise = fixed_noise.cuda()
return np.sqrt(1 / lambda1), np.sqrt(1 / lambda2), angle ################################################################################ # GMM on 2D toy dataset # The dataset is generated from N gaussian distributions equally spaced on N radius circle. # Here, N=4 # You should be able to visualize the learnt gaussian distribution in plots folder # Complete implementation of fit function for GMM class in gmm.py ################################################################################ x, y = toy_dataset(4, 100) init = ['k_means', 'random'] for i in init: n_cluster = 4 gmm = GMM(n_cluster=n_cluster, max_iter=1000, init=i, e=1e-6) iterations = gmm.fit(x) ll = gmm.compute_log_likelihood(x) assert gmm.means.shape == ( n_cluster, 2), 'means should be numpy array with {}X2 shape'.format(n_cluster) assert gmm.variances.shape == ( n_cluster, 2, 2), 'variances should be numpy array with {}X2X2 shape'.format(n_cluster) assert gmm.pi_k.shape == ( n_cluster,), 'pi_k should be numpy vector of size'.format(n_cluster) assert iterations > 0 and type( iterations) == int, 'Number of updates should be positive integer'
def main(): #log = open(LOG_FILE, "w") #sys.stdout = log #use train data to train GMM points = np.genfromtxt(TRAIN_FILE, usecols=(0,1)) label = np.genfromtxt(TRAIN_FILE, usecols=2) w1 = (label == 1) '''u1 = np.array([[ 0.92016682, -0.53710902], [-0.82053379, -0.52580246], [ 2.27051023, -0.8221437 ], [ 0.67995665, -0.57743096]]) u2 = np.array([[1.50122208, 1.65573219], [0.65636359, 0.23047148], [2.14053852, -0.08155318], [2.73604834, 0.3522032]]) sigma = np.empty((4,2,2)) sigma[:] = np.eye(2)''' #initialize 2 GMMs #gmm_1 = GMM(4,2,np.ones(4)*0.25, u1, sigma) #gmm_2 = GMM(4,2,np.ones(4)*0.25, u2, sigma) gmm_1 = GMM(4,2) gmm_2 = GMM(4,2) #train print "---------------GMM_1------------------" gmm_1.train(points[w1]) print gmm_1 print "---------------GMM_2------------------" gmm_2.train(points[~w1]) print gmm_2 #visualization 2 GMMs #plt.figure() #gmm_1.draw() #plt.show() #plt.figure() #gmm_2.draw() #plt.show() #use dev data to classify and compute accuracy devs = np.genfromtxt(DEV_FILE, usecols=(0,1)) dev_la = np.genfromtxt(DEV_FILE, usecols=2) re = (dev_la == 1) p_1 = gmm_1.predict(devs) p_2 = gmm_2.predict(devs) pr = ((p_1 > p_2) == re) accuracy = np.count_nonzero(pr) * 1.0 / len(pr) print "dev data classify accuracy is", accuracy #use test data to classify tests = np.genfromtxt(TEST_FILE, usecols=(0,1)) t_1 = gmm_1.predict(tests) t_2 = gmm_2.predict(tests) result = [1 if t_1[i] > t_2[i] else 2 for i in range(len(t_1))] f = open(RESULT_FILE, "w") for i in range(len(tests)): line = "%.6f %.6f %d\n" % (tests[i, 0], tests[i, 1], result[i]) f.write(line) f.close()
chi_gt = arr([[1, 1, 0],[1, 0, 1]]) + 1e-2 chi_gt = Normalize(chi_gt, 's1', 'row')[0] mu_gt = arr([[-1, -1], [1, -1], [0, 1]]) sigma_gt = 3e-2 # simulate data y_gt = randm(pi_gt, M) theta_gt = chi_gt[y_gt] N = random.poisson(mean_N, M) # N = random.exponential(mean_N, M) cN = cat(([0], cumsum(N))) z_gt = zeros(cN[-1]) X = zeros((cN[-1], mu_gt.shape[1])) group_id = zeros(cN[-1], int32) g_type = zeros(M) gmm = GMM(theta_gt[0], mu_gt, sigma_gt); for m in range(M): gmm.priors = theta_gt[m] X[cN[m]:cN[m+1]], z_gt[cN[m]:cN[m+1]] = gmm.GenerateSample(N[m]) group_id[cN[m]:cN[m+1]] = m # anomalies for ind in range(N_bad_instance): m =RI(1, M) g_type[m] = 1 X[group_id == m] = random.randn((group_id == m).sum(), mu_gt.shape[1])*0.5 for ind in range(N_bad_group): m =RI(1, M) g_type[m] = 2
import pylab as pl from normal import Normal from gmm import GMM from plot_normal import draw2dnormal from plot_gmm import draw2dgmm if False: fp = open("../data/faithful.txt") data = [] for line in fp.readlines(): x,y = line.split() data.append([float(x),float(y)]) data = npa(data) pl.scatter(data[:,0],data[:,1]) gmm = GMM(dim = 2, ncomps = 2, data = data, method = "kmeans") #x = Normal(2, data=data) #draw2dnormal(x,show=True,axes=pl.gca()) print gmm draw2dgmm(gmm) pl.show() if False: from test_func import noisy_cosine x,y = noisy_cosine() data = np.vstack([x,y]).transpose() pl.scatter(data[:,0],data[:,1])
class KDE_INC: """Provides an incrimental kernel density estimate system that uses Gaussians. A kernel density estimate system with Gaussian kernels that, on reaching a cap, starts merging kernels to limit the number of kernels to a constant - done in such a way as to minimise error whilst capping computation. (Computation is quite high however - this is not a very efficient implimentation.)""" def __init__(self, prec, cap = 32): """Initialise with the precision matrix to use for the kernels, which implicitly provides the number of dimensions, and the cap on the number of kernels to allow.""" self.prec = numpy.asarray(prec, dtype=numpy.float32) self.gmm = GMM(prec.shape[0], cap) # Current mixture model. self.count = 0 # Number of samples provided so far. self.merge = numpy.empty((cap,cap), dtype=numpy.float32) # [i,j]; cost of merging two entrys, only valid when j<i, other values set high to avoid issues. self.merge[:,:] = 1e64 # For holding the temporary merge costs calculated when adding a sample... self.mergeT = numpy.empty(cap, dtype=numpy.float32) # For the C code... self.temp = numpy.empty((2, prec.shape[0], prec.shape[0]), dtype=numpy.float32) def setPrec(self, prec): """Changes the precision matrix - must be called before any samples are added, and must have the same dimensions as the current one.""" self.prec = numpy.asarray(prec, dtype=numpy.float32) def samples(self): """Returns how many samples have been added to the object.""" return self.count def prob(self, sample): """Returns the probability of the given sample - must not be called until at least one sample has been added, though it will return a positive constant if called with no samples provided.""" if self.count!=0: return self.gmm.prob(sample) else: return 1.0 def nll(self, sample): """Returns the negative log liklihood of the given sample - must not be called until at least one sample has been added, though it will return a positive constant if called with no samples provided.""" if self.count!=0: return self.gmm.nll(sample) else: return 0.0 def __merge(self, weightA, meanA, precA, weightB, meanB, precB): """Merges two Gaussians and returns the merged result, as (weight, mean, prec)""" newWeight = weightA + weightB newMean = weightA/newWeight * meanA + weightB/newWeight * meanB deltaA = meanA - newMean covA = numpy.linalg.inv(precA) + numpy.outer(deltaA, deltaA) deltaB = meanB - newMean covB = numpy.linalg.inv(precB) + numpy.outer(deltaB, deltaB) newCov = weightA/newWeight * covA + weightB/newWeight * covB newPrec = numpy.linalg.inv(newCov) return (newWeight, newMean, newPrec) def __calcMergeCost(self, weightA, meanA, precA, weightB, meanB, precB): """Calculates and returns the cost of merging two Gaussians.""" # (For anyone wondering about the fact we are comparing them against each other rather than against the result of merging them that is because this way tends to get better results.) # The log determinants and delta... logDetA = math.log(numpy.linalg.det(precA)) logDetB = math.log(numpy.linalg.det(precB)) delta = meanA - meanB # Kullback-Leibler of representing A using B... klA = logDetB - logDetA klA += numpy.trace(numpy.dot(precB, numpy.linalg.inv(precA))) klA += numpy.dot(numpy.dot(delta, precB), delta) klA -= precA.shape[0] klA *= 0.5 # Kullback-Leibler of representing B using A... klB = logDetA - logDetB klB += numpy.trace(numpy.dot(precA, numpy.linalg.inv(precB))) klB += numpy.dot(numpy.dot(delta, precA), delta) klB -= precB.shape[0] klB *= 0.5 # Return a weighted average... return weightA * klA + weightB * klB def add(self, sample): """Adds a sample, updating the kde accordingly.""" global weave try: weave = None # Below code is actually slowing things down. Am disabling for now. if weave==None: raise Exception() support = matrix_code + start_cpp() + """ // Note - designed so that A and Out pointers can be the same. void doMerge(int size, float weightA, float * meanA, float * precA, float weightB, float * meanB, float * precB, float & weightOut, float * meanOut, float * precOut, float * tVec, float * tMat1, float * tMat2) { // Handle the weight, recording the ratios needed next... float wOut = weightA + weightB; float ratioA = weightA/wOut; float ratioB = weightB/wOut; weightOut = wOut; // Do the mean - simply a weighted average - store in a temporary for now... for (int i=0; i<size; i++) { tVec[i] = ratioA * meanA[i] + ratioB * meanB[i]; } // Put the covariance of precision A into tMat1... for (int i=0; i<size*size; i++) tMat2[i] = precA[i]; Inverse(tMat2, tMat1, size); // Add the outer product of the A delta into tMat1... for (int r=0; r<size; r++) { for (int c=0; c<size; c++) { tMat1[r*size + c] += (meanA[c] - tVec[c]) * (meanA[r] - tVec[r]); } } // Put the covariance of precision B into tMat2... for (int i=0; i<size*size; i++) precOut[i] = precB[i]; Inverse(precOut, tMat2, size); // Add the outer product of the B delta into tMat2... for (int r=0; r<size; r++) { for (int c=0; c<size; c++) { tMat2[r*size + c] += (meanB[c] - tVec[c]) * (meanB[r] - tVec[r]); } } // Get the weighted average of the covariance matrices into tMat1... for (int i=0; i<size*size; i++) { tMat1[i] = ratioA * tMat1[i] + ratioB * tMat2[i]; } // Dump the inverse of tMat1 into the output precision... Inverse(tMat1, precOut, size); // Copy from the temporary mean into the output mean... for (int i=0; i<size; i++) meanOut[i] = tVec[i]; } float mergeCost(int size, float weightA, float * meanA, float * precA, float weightB, float * meanB, float * precB, float * tVec1, float * tVec2, float * tMat1, float * tMat2) { // Calculate some shared values... float logDetA = log(Determinant(precA, size)); float logDetB = log(Determinant(precB, size)); for (int i=0; i<size; i++) { tVec1[i] = meanA[i] - meanB[i]; } // tVec1 now contains the delta. // Calculate the Kullback-Leibler divergance of substituting B for A... float klA = logDetB - logDetA; for (int i=0; i<size*size; i++) tMat1[i] = precA[i]; if (Inverse(tMat1, tMat2, size)==false) return 0.0; for (int i=0; i<size; i++) { for (int j=0; j<size; j++) { klA += precB[i*size + j] * tMat2[j*size + i]; } } for (int i=0; i<size; i++) { tVec2[i] = 0.0; for (int j=0; j<size; j++) { tVec2[i] += precB[i*size + j] * tVec1[j]; } } for (int i=0; i<size; i++) klA += tVec1[i] * tVec2[i]; klA -= size; klA *= 0.5; // Calculate the Kullback-Leibler divergance of substituting A for B... float klB = logDetA - logDetB; for (int i=0; i<size*size; i++) tMat1[i] = precB[i]; if (Inverse(tMat1, tMat2, size)==false) return 0.0; for (int i=0; i<size; i++) { for (int j=0; j<size; j++) { klB += precA[i*size + j] * tMat2[j*size + i]; } } for (int i=0; i<size; i++) { tVec2[i] = 0.0; for (int j=0; j<size; j++) { tVec2[i] += precA[i*size + j] * tVec1[j]; } } for (int i=0; i<size; i++) klB += tVec1[i] * tVec2[i]; klB -= size; klB *= 0.5; // Return a weighted average of the divergances... return weightA * klA + weightB * klB; } """ code = start_cpp(support) + """ if (count < Nweight[0]) { // Pure KDE mode - just add the kernel... for (int i=0; i<Nsample[0]; i++) { MEAN2(count, i) = sample[i]; } for (int i=0; i<Nsample[0]; i++) { for (int j=0; j<Nsample[0]; j++) { PREC3(count, i, j) = BASEPREC2(i, j); } } assert(Sprec[0]==sizeof(float)); assert(Sprec[1]==sizeof(float)*Nsample[0]); log_norm[count] = 0.5 * log(Determinant(&PREC3(count, 0, 0), Nsample[0])); log_norm[count] -= 0.5 * Nsample[0] * log(2.0*M_PI); float w = 1.0 / (count+1); for (int i=0; i<=count; i++) { weight[i] = w; } // If the next sample will involve merging then we need to fill in the merging costs cache in preperation... if (count+1==Nweight[0]) { for (int i=0; i<Nweight[0]; i++) { for (int j=0; j<i; j++) { MERGE2(i, j) = mergeCost(Nsample[0], weight[i], &MEAN2(i,0), &PREC3(i,0,0), weight[j], &MEAN2(j,0), &PREC3(j,0,0), &TEMP2(0,0), &TEMP2(1,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0)); } } } } else { // We have the maximum number of kernels - need to either merge the new kernel with an existing one, or merge two existing kernels and use the freed up slot for the new kernel... // Update the weights, and calculate the weight of the new kernel... float adjust = float(count) / float(count+1); for (int i=0; i<Nweight[0]; i++) weight[i] *= adjust; for (int i=0; i<Nweight[0]; i++) { for (int j=0; j<i; j++) MERGE2(i, j) *= adjust; } float w = 1.0 / float(count + 1.0); // Calculate the costs of merging the new kernel with each of the old kernels... for (int i=0; i<Nweight[0]; i++) { mergeT[i] = mergeCost(Nsample[0], w, sample, basePrec, weight[i], &MEAN2(i,0), &PREC3(i,0,0), &TEMP2(0,0), &TEMP2(1,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0)); } // Find the lowest merge cost and act accordingly - either we are merging the new kernel with an old one or merging two existing kernels and putting the new kernel in on its own... int lowI = 1; int lowJ = 0; for (int i=0; i<Nweight[0]; i++) { for (int j=0; j<i; j++) { if (MERGE2(i, j) < MERGE2(lowI, lowJ)) { lowI = i; lowJ = j; } } } int lowN = 0; for (int i=1; i<Nweight[0]; i++) { if (mergeT[i] < mergeT[lowN]) lowN = i; } if (mergeT[lowN] < MERGE2(lowI, lowJ)) { // We are merging the new kernel with an existing kernel... // Do the merge... doMerge(Nsample[0], weight[lowN], &MEAN2(lowN,0), &PREC3(lowN,0,0), w, sample, basePrec, weight[lowN], &MEAN2(lowN,0), &PREC3(lowN,0,0), &TEMP2(0,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0)); // Update the normalising constant... log_norm[lowN] = 0.5 * log(Determinant(&PREC3(lowN, 0, 0), Nsample[0])); log_norm[lowN] -= 0.5 * Nsample[0] * log(2.0*M_PI); // Update the array of merge costs... for (int i=0; i<Nweight[0]; i++) { if (i!=lowN) { float mc = mergeCost(Nsample[0], weight[i], &MEAN2(i,0), &PREC3(i,0,0), weight[lowN], &MEAN2(lowN,0), &PREC3(lowN,0,0), &TEMP2(0,0), &TEMP2(1,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0)); if (i<lowN) MERGE2(lowN, i) = mc; else MERGE2(i, lowN) = mc; } } } else { // We are merging two existing kernels then putting the new kernel into the freed up spot... // Do the merge... doMerge(Nsample[0], weight[lowI], &MEAN2(lowI,0), &PREC3(lowI,0,0), weight[lowJ], &MEAN2(lowJ,0), &PREC3(lowJ,0,0), weight[lowI], &MEAN2(lowI,0), &PREC3(lowI,0,0), &TEMP2(0,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0)); // Copy in the new kernel... weight[lowJ] = w; for (int i=0; i<Nsample[0]; i++) MEAN2(lowJ,i) = sample[i]; for (int i=0; i<Nsample[0];i++) { for (int j=0; j<Nsample[0]; j++) { PREC3(lowJ,i,j) = basePrec[i*Nsample[0] + j]; } } // Update both normalising constants... log_norm[lowI] = 0.5 * log(Determinant(&PREC3(lowI, 0, 0), Nsample[0])); log_norm[lowI] -= 0.5 * Nsample[0] * log(2.0*M_PI); log_norm[lowJ] = 0.5 * log(Determinant(&PREC3(lowJ, 0, 0), Nsample[0])); log_norm[lowJ] -= 0.5 * Nsample[0] * log(2.0*M_PI); // Update the array of merge costs... for (int i=0; i<Nweight[0]; i++) { if (i!=lowI) { float mc = mergeCost(Nsample[0], weight[i], &MEAN2(i,0), &PREC3(i,0,0), weight[lowI], &MEAN2(lowI,0), &PREC3(lowI,0,0), &TEMP2(0,0), &TEMP2(1,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0)); if (i<lowI) MERGE2(lowI, i) = mc; else MERGE2(i, lowI) = mc; } } for (int i=0; i<Nweight[0]; i++) { if ((i!=lowI)&&(i!=lowJ)) { float mc = mergeCost(Nsample[0], weight[i], &MEAN2(i,0), &PREC3(i,0,0), weight[lowJ], &MEAN2(lowJ,0), &PREC3(lowJ,0,0), &TEMP2(0,0), &TEMP2(1,0), &TEMPPREC3(0,0,0), &TEMPPREC3(1,0,0)); if (i<lowJ) MERGE2(lowJ, i) = mc; else MERGE2(i, lowJ) = mc; } } } } """ sample = numpy.asarray(sample, dtype=numpy.float32).flatten() basePrec = self.prec count = self.count merge = self.merge mergeT = self.mergeT tempPrec = self.temp weight = self.gmm.weight mean = self.gmm.mean prec = self.gmm.prec log_norm = self.gmm.log_norm temp = self.gmm.temp weave.inline(code, ['sample', 'basePrec', 'count', 'merge', 'mergeT', 'tempPrec', 'weight', 'mean', 'prec', 'log_norm', 'temp'], support_code = support) self.count += 1 except Exception, e: if weave!=None: print e weave = None if self.count<self.gmm.weight.shape[0]: # Pure kde phase... self.gmm.mean[self.count,:] = numpy.asarray(sample, dtype=numpy.float32) self.gmm.prec[self.count,:,:] = self.prec self.gmm.calcNorm(self.count) self.count += 1 self.gmm.weight[:self.count] = 1.0 / float(self.count) if self.count==self.gmm.weight.shape[0]: # Next sample starts merging - need to prepare by filling in the kl array... # (Below is grossly inefficient - calculates the same things more times than is possibly funny. I'll optimise it if I ever decide that I care enough to do so.) for i in xrange(self.merge.shape[0]): for j in xrange(i): self.merge[i,j] = self.__calcMergeCost(self.gmm.weight[i], self.gmm.mean[i,:], self.gmm.prec[i,:,:], self.gmm.weight[j], self.gmm.mean[j,:], self.gmm.prec[j,:,:]) else: # Merging phase... sample = numpy.asarray(sample, dtype=numpy.float32) # Adjust weights... adjust = float(self.count) / float(self.count+1) self.gmm.weight *= adjust for i in xrange(self.merge.shape[0]): self.merge[i,:i] *= adjust self.count += 1 weight = 1.0 / float(self.count) # Calculate the merging costs for the new kernel versus the old kernels... for i in xrange(self.merge.shape[0]): self.mergeT[i] = self.__calcMergeCost(weight, sample, self.prec, self.gmm.weight[i], self.gmm.mean[i,:], self.gmm.prec[i,:,:]) # Select the best merge - it either involves the new sample or it does not... bestOld = numpy.unravel_index(numpy.argmin(self.merge), self.merge.shape) bestNew = numpy.argmin(self.mergeT) if self.mergeT[bestNew] < self.merge[bestOld]: # Easy scenario - new kernel is being merged with an existing kernel - not too much fiddling involved... # Do the merge... newWeight, newMean, newPrec = self.__merge(weight, sample, self.prec, self.gmm.weight[bestNew], self.gmm.mean[bestNew,:], self.gmm.prec[bestNew,:,:]) # Store the result... self.gmm.weight[bestNew] = newWeight self.gmm.mean[bestNew,:] = newMean self.gmm.prec[bestNew,:,:] = newPrec self.gmm.calcNorm(bestNew) # Update the merge weights... for i in xrange(self.merge.shape[0]): if i!=bestNew: cost = self.__calcMergeCost(self.gmm.weight[i], self.gmm.mean[i,:], self.gmm.prec[i,:,:], self.gmm.weight[bestNew], self.gmm.mean[bestNew,:], self.gmm.prec[bestNew,:,:]) if i<bestNew: self.merge[bestNew,i] = cost else: self.merge[i,bestNew] = cost else: # We are merging two old kernels, and then putting the new kernel into the slot freed up - this is extra fiddly... # Do the merge... newWeight, newMean, newPrec = self.__merge(self.gmm.weight[bestOld[0]], self.gmm.mean[bestOld[0],:], self.gmm.prec[bestOld[0],:,:], self.gmm.weight[bestOld[1]], self.gmm.mean[bestOld[1],:], self.gmm.prec[bestOld[1],:,:]) # Store the result, put the new component in the other slot... self.gmm.weight[bestOld[0]] = newWeight self.gmm.mean[bestOld[0],:] = newMean self.gmm.prec[bestOld[0],:,:] = newPrec self.gmm.calcNorm(bestOld[0]) self.gmm.weight[bestOld[1]] = weight self.gmm.mean[bestOld[1],:] = sample self.gmm.prec[bestOld[1],:,:] = self.prec self.gmm.calcNorm(bestOld[1]) # Update the merge weights for both the merged and new kernels... for i in xrange(self.merge.shape[0]): if i!=bestOld[0]: cost = self.__calcMergeCost(self.gmm.weight[i], self.gmm.mean[i,:], self.gmm.prec[i,:,:], self.gmm.weight[bestOld[0]], self.gmm.mean[bestOld[0],:], self.gmm.prec[bestOld[0],:,:]) if i<bestOld[0]: self.merge[bestOld[0],i] = cost else: self.merge[i,bestOld[0]] = cost for i in xrange(self.merge.shape[0]): if i!=bestOld[0] and i!=bestOld[1]: cost = self.__calcMergeCost(self.gmm.weight[i], self.gmm.mean[i,:], self.gmm.prec[i,:,:], self.gmm.weight[bestOld[1]], self.gmm.mean[bestOld[1],:], self.gmm.prec[bestOld[1],:,:]) if i<bestOld[1]: self.merge[bestOld[1],i] = cost else: self.merge[i,bestOld[1]] = cost