xs = [mnorm.rvs(mu[k], sigma[k], size=n) for k in range(3)] z = np.random.multinomial(1, mix, size=n).astype('Bool') x = xs[0].copy() x[z[:,1]] = xs[1][z[:,1]] x[z[:,2]] = xs[2][z[:,2]] z_ind = np.zeros(n, dtype=int) z_ind[z[:,1]] = 1 z_ind[z[:,2]] = 2 init_mu = np.array([[0., 0.], [1., 1.], [2., 2.]]) init_sigma = [np.identity(2) for i in range(3)] init_mix = np.array([1., 1., 1.])/3 niter = 500 _, (logliks_em, _) = em_alg( x, init_mu, init_sigma, init_mix, num_iter=niter) i=0 run_storage = np.zeros(((niter+1)*2, 3)) run_storage[i*(niter+1):(i+1)*(niter+1), 0] = i run_storage[i*(niter+1):(i+1)*(niter+1), 1] = np.arange(niter+1) run_storage[i*(niter+1):(i+1)*(niter+1), 2] = logliks_em # More overlapping np.random.seed(29643) mu = np.array([[0., 2.], [2., 0.], [3., 4.]]) xs = [mnorm.rvs(mu[k], sigma[k], size=n) for k in range(3)] z = np.random.multinomial(1, mix, size=n).astype('Bool') x = xs[0].copy()
# Plot data cmap, norm = colors.from_levels_and_colors( levels=[0, 1, 2], colors=['magenta', 'cyan', 'green'], extend='max') fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(x[:,0], x[:,1], c=z_ind, cmap=cmap, norm=norm) fig.savefig('./sim_data.pdf') # Estimate init_mu = np.array([[0., 0.], [1., 1.], [2., 2.]]) init_sigma = [np.identity(2) for i in range(3)] init_mix = np.array([1., 1., 1.])/3 res_em, (logliks_em, times_em) = em_alg( x, init_mu, init_sigma, init_mix, num_iter=250) res_da, (logliks_da, times_da) = em_alg( x, init_mu, init_sigma, init_mix, num_iter=250, beta_func=lambda i: 1.-np.exp(-(i+1)/5)) res_sa, (logliks_sa, times_sa) = sim_anneal( x, init_mu, init_sigma, init_mix, num_iter=250, seed=29624, temp_func=lambda i: max(1e-4, 100*.992**i)) # Save results colnames = \ 'logliks_em, times_em, logliks_da, times_da, logliks_sa, times_sa' np.savetxt( './intermediate_data/singlerun_results.csv', np.column_stack((logliks_em, times_em,
z_ind = np.zeros(n, dtype=int) z_ind[z[:,1]] = 1 z_ind[z[:,2]] = 2 # Run SA # Note that some runs for small sample sizes run have issues with # singular covariance matrices, so I simply try a couple times # if there is an issue. res_sa, _ = sim_anneal( x, init_mu, init_sigma, init_mix, num_iter=iter_num, temp_func=lambda i: max(1e-4, 100*.992**i)) # Run EM res_em, _ = em_alg( x, init_mu, init_sigma, init_mix, num_iter=iter_num) # Run DA res_da, _ = em_alg( x, init_mu, init_sigma, init_mix, num_iter=iter_num, beta_func=lambda i: 1.-np.exp(-(i+1)/5)) distances = np.array([cluster_dist(mu, res_sa[0]), cluster_dist(mu, res_em[0]), cluster_dist(mu, res_da[0])]) # Store results run_storage[(j*3):((j+1)*3), 0] = j run_storage[(j*3):((j+1)*3), 1] = methods run_storage[(j*3):((j+1)*3), 2] = distances
colors=['magenta', 'cyan', 'green'], extend='max') fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.scatter(x[:, 0], x[:, 1], c=z_ind, cmap=cmap, norm=norm) fig.savefig('./sim_data.pdf') # Estimate init_mu = np.array([[0., 0.], [1., 1.], [2., 2.]]) init_sigma = [np.identity(2) for i in range(3)] init_mix = np.array([1., 1., 1.]) / 3 res_em, (logliks_em, times_em) = em_alg(x, init_mu, init_sigma, init_mix, num_iter=250) res_da, (logliks_da, times_da) = em_alg(x, init_mu, init_sigma, init_mix, num_iter=250, beta_func=lambda i: 1. - np.exp(-(i + 1) / 5)) res_sa, (logliks_sa, times_sa) = sim_anneal(x, init_mu, init_sigma,