def build_graph(B, K, learning_rate, D): #Parameters x = tf.placeholder(tf.float32, [None, 1, D], name="x") # input data mu_T = tf.Variable(tf.random_normal([1, K, D], mean=0, stddev=0.001), [1, K, D], name="mu_T") # mu tranpose phi = tf.Variable(tf.random_normal([1, K], mean=-1, stddev=0), [1, K], name="phi") psi = tf.Variable(tf.random_normal([1, K], mean=10, stddev=0), [1, K], name="psi") var = tf.exp(phi) #variance [0, inf) log_pi = logsoftmax(psi) # sum(log_pi) = 1 #Q2.1.2 log_prob_x_given_z = compute_log_prob_x_given_z(x, mu_T, var, D) #Q2.1.3 log_prob_z_given_x = compute_log_prob_z_given_x(log_prob_x_given_z, log_pi) argmaxs = tf.argmax(log_prob_z_given_x, 1) mu = tf.reduce_sum(mu_T, 0) #loss #[1,1] <------------------- [B,1] total_loss = -tf.reduce_mean( reduce_logsumexp(tf.add(log_prob_x_given_z, log_pi), keep_dims=True)) # Adam Optimizer optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-5) train = optimizer.minimize(loss=total_loss) pi = tf.exp(log_pi) return x, mu_T, train, mu, total_loss, argmaxs, log_prob_x_given_z, pi, var
def buildGraph(k, dimension, EXP=1e-5): tf.set_random_seed(time()) pz = tf.Variable(tf.zeros([1, k])) logpz = logsoftmax(pz) # Enforce simplex constraint over P(z) sigma = tf.Variable(tf.ones([k, 1]) * (-3)) expsigma = tf.exp(sigma) # Enforce sigma > 0 print expsigma mu = tf.Variable(tf.random_normal([k, dimension], mean=0, stddev=0.01), dtype=tf.float32) x = tf.placeholder(tf.float32, [None, dimension]) cost = -marginal_log_likelihood(x, logpz, mu, expsigma) iter_var = tf.Variable(0) optimizer = tf.train.AdamOptimizer(0.03, beta1=0.9, beta2=0.99, epsilon=1e-5) train = optimizer.minimize(cost, global_step=iter_var) return x, mu, cost, expsigma, logpz, train
data2D = np.float32(np.load('data2D.npy')) data = (data2D - data2D.mean()) / data2D.std() #data = np.float32(np.load('data2D.npy')) k = 5 num_sample = data.shape[0] dim = data.shape[1] tf_mean = tf.Variable(tf.random_normal([k, dim], mean=0.0, stddev=1.0, dtype=tf.float32)) #tf_mean = tf.Variable(tf.random_uniform([k, dim], minval=-3, maxval=3, dtype=tf.float32)) tf_covariance = tf.Variable(0.5 * tf.exp(tf.random_normal([k], mean=0.0, stddev=1.0, dtype=tf.float32))) #phi = tf.Variable(tf.random_normal([1, k], mean=0.0, stddev=1.0, dtype=tf.float32)) phi = tf.Variable(tf.truncated_normal([1, k], mean=0.0, stddev=1.0, dtype=tf.float32)) log_pi = utils.logsoftmax(phi) #tf_data = tf.Variable(data) tf_data = tf.placeholder(tf.float32, shape=(num_sample, dim)) tf_expanded_data = tf.expand_dims(tf_data, 0) tf_expanded_mean = tf.expand_dims(tf_mean, 1) tf_sub = tf.sub(tf_expanded_data, tf_expanded_mean) tf_sub_square = tf.square(tf_sub) tf_sub_square_sum = tf.reduce_sum(tf_sub_square, 2, True) tf_sub_square_sum_02 = tf.squeeze(tf.transpose(tf_sub_square_sum)) tf_index = (-0.5) * tf.div(tf_sub_square_sum_02, tf_covariance) tf_log_second_term = tf_index tf_log_first_term = (-0.5 * dim) * tf.log(2 * math.pi * tf_covariance)
def train_mog(data, k, EXP=1e-5): ''' Trains a single mixture of gaussian model for a zero-mean, unit variance normalized data. Args: x: Data (numpy array) k: Number of clusters Returns: Optimal cluster parameters ''' data_len = len(data) assert (data_len > 0), "Dataset is empty" data_d = len(data[0]) tf.set_random_seed(time()) pz = tf.Variable(tf.zeros([1, k])) logpz = logsoftmax(pz) # Enforce simplex constraint over P(z) sigma = tf.Variable(tf.ones([k, 1]) * (-3)) expsigma = tf.exp(sigma) # Enforce sigma > 0 mu = tf.Variable(tf.random_normal([k, data_d], mean=0, stddev=0.01)) x = tf.placeholder(tf.float32, [None, len(data[0])]) cost = -marginal_log_likelihood(x, logpz, mu, expsigma) iter_var = tf.Variable(0) optimizer = tf.train.AdamOptimizer(0.03, beta1=0.9, beta2=0.99, epsilon=1e-5) train = optimizer.minimize(cost, global_step=iter_var) sess = tf.Session() sess.run(tf.initialize_all_variables()) with sess.as_default(): costs = [] best_cost = float('inf') last_cost = float('inf') print "------------------" print "P(x): ", tf.exp(logpz).eval() print "Sigma: ", expsigma.eval().reshape((1, k)) print "Mu: ", mu.eval() print "------------------" try: while True: iter_cost = sess.run([cost, train], feed_dict={x: data})[0] iter = iter_var.eval() costs.append(iter_cost) if iter % 100 == 0: print "Iteration:", iter print "Log Likelihood:", -iter_cost if iter_cost < best_cost: best_cost = iter_cost clusters = [logpz.eval(), mu.eval(), expsigma.eval()] if iter > 5000 or abs(iter_cost - last_cost) < EXP: print "Converged!" break else: last_cost = iter_cost except KeyboardInterrupt: if len(clusters) == 0: clusters = [logpz.eval(), mu.eval(), expsigma.eval()] return clusters, costs
ITFP = 1.1 ITFS = 20 for v, var in tqdm.tqdm(enumerate(VARS)): for r in range(REPS): indices, values = sample(6000, size, var=var) dns = dense(indices, values, size) gold = undense(torch.softmax(dns, dim=1), indices, size) naive = softmax_naive(indices, values, size) pnorm = utils.logsoftmax(indices, values, size, max_method='pnorm', p=PNP) pnorf = utils.logsoftmax(indices, values, size, max_method='pnorm', p=PNFP) iters = utils.logsoftmax(indices, values, size, max_method='iteration', p=ITP, its=ITS) iterf = utils.logsoftmax(indices, values,
def _cost(self, weights, data, labels): return -np.sum(logsoftmax(self._process_layers(weights, data)) * labels) / self.batch_size
def train_mog(data, k, EXP=1e-5): ''' Trains a single mixture of gaussian model for a zero-mean, unit variance normalized data. Args: x: Data (numpy array) k: Number of clusters Returns: Optimal cluster parameters ''' data_len = len(data) assert (data_len > 0), "Dataset is empty" data_d = len(data[0]) tf.set_random_seed(time()) pz = tf.Variable(tf.zeros([1,k])) logpz = logsoftmax(pz) # Enforce simplex constraint over P(z) sigma = tf.Variable(tf.ones([k, 1])*(-3)) expsigma = tf.exp(sigma) # Enforce sigma > 0 mu = tf.Variable(tf.random_normal([k, data_d], mean=0, stddev=0.01)) x = tf.placeholder(tf.float32, [None, len(data[0])]) cost = -marginal_log_likelihood(x, logpz, mu, expsigma) iter_var = tf.Variable(0) optimizer = tf.train.AdamOptimizer(0.03, beta1=0.9, beta2=0.99, epsilon=1e-5) train = optimizer.minimize(cost, global_step=iter_var) sess = tf.Session() sess.run(tf.initialize_all_variables()) with sess.as_default(): costs = [] best_cost = float('inf') last_cost = float('inf') print "------------------" print "P(x): ", tf.exp(logpz).eval() print "Sigma: ", expsigma.eval().reshape((1,k)) print "Mu: ", mu.eval() print "------------------" try: while True: iter_cost = sess.run([cost, train], feed_dict={x: data})[0] iter = iter_var.eval() costs.append(iter_cost) if iter % 100 == 0: print "Iteration:", iter print "Log Likelihood:", -iter_cost if iter_cost < best_cost: best_cost = iter_cost clusters = [logpz.eval(), mu.eval(), expsigma.eval()] if iter > 5000 or abs(iter_cost - last_cost) < EXP: print "Converged!" break else: last_cost = iter_cost except KeyboardInterrupt: if len(clusters) == 0: clusters = [logpz.eval(), mu.eval(), expsigma.eval()] return clusters, costs
def _cost(self, weights, data, labels): return -np.sum( logsoftmax(self._process_layers(weights, data)) * labels) / self.batch_size
def mog(): # Load the data with np.load('mog_purchases.npz') as datafile: data = datafile[datafile.keys()[0]] # Set constants. K = 3 DATASET_SIZE, DATA_DIM = data.shape LEARNINGRATE = 0.05 ITERATIONS = 10000 # Initialize tf graph. graph = tf.Graph() with graph.as_default(): # Load data into tf. tf_data = tf.cast(tf.constant(data), tf.float32) # Initialize mu array. tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0)) tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM))) tf_sig_sq = tf.exp(tf_phi) tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM))) tf_pi = tf.exp(utils.logsoftmax(tf_psi)) ed = tf_eucl_dist(tf_data, tf_mu) loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1)) posterior = tf.exp(log_posterior(tf_data, tf_mu, tf_sig_sq, tf_pi, DATA_DIM)) cluster_hard_assignment = tf.argmax(posterior, 1) weight = tf.constant([[0, 0.5, 1.0]]) # TODO: Replace this with linspace as func of K cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1) optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) # Run session. with tf.Session(graph=graph) as session: losses = np.zeros(ITERATIONS, dtype=np.float32) tf.initialize_all_variables().run() #pdb.set_trace() for i in range(ITERATIONS): mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior]) _, l, m = session.run([optimizer, loss, tf_mu]) #l = session.run([loss]) #m = session.run([tf_mu]) #losses[i] = l if i % 100 == 0: print "Loss at iteration %d: " % (i), l print "Mu:" print mu print "Sigma:" print sig_sq print "Pi:" print pi print "Posterior:" print post print "Cluster hard assignment:" print ca red = [1, 0, 0] green = [0, 1, 0] blue = [0, 0, 1] colours = [red, green, blue] colour_list = [colours[ca[i]] for i in range(DATASET_SIZE)] # Plot data points labelled by the closest mean plt.scatter(data[:,0], data[:,1], c=colour_list, marker='.') # Plot mean plt.scatter(m[:,0], m[:,1], marker='h') plt.savefig("purchase_kmeans.png") #plt.show() print m down_dim = 2 mu_dim, top_ind = mog_dim_down(m, sig_sq, down_dim) #pdb.set_trace() 2d_data = np.concatenate((data[:, top_ind[0]][:, None], data[:, top_ind[1]][:, None]), axis=1) 2d_mu = np.concatenate((m[:,top_ind[0]][:, None], m[:,top_ind[1]][:, None]), axis=1) 2d_dicts = {'2d_data': 2d_data, 'mu': 2d_mu} np.savez_compressed('purchases_2d', 2d_data) np.savez_compressed('mu_2d', 2d_mu) # Plot soft assignment scatterplots # TODO: May be redo it so that C = C1*P(z=1|x) + C2*P(z=1|x) + C3*P(z=1|x) # Where C1 = Red, C2 = Green, C3 = Blue. Right now using colourmap 'viridis' print "Cluster soft assignment:" print ca_soft print "Top dimensions: %d %d" % (top_ind[0], top_ind[1]) plt.figure() plt.scatter(data[:,top_ind[0]], data[:,top_ind[1]], c=ca_soft, cmap='jet', marker='.') plt.scatter(m[:,top_ind[0]], m[:,top_ind[1]], marker='h') plt.title("Soft Assignment to Gaussian Cluster") # TODO: Add plot title, axis labels plt.savefig("purchase_mog.png") #plt.show() return mu, sig_sq
def main2_2_3(data): # Hold out 1/3 of the data for validation and for each value of K = 1; 2; 3; 4; 5, train a MoG # model. For each K, compute and report the loss function for the validation data and explain # which value of K is best. Include a 2D scatter plot of data points colored by their cluster # assignments. # Load the data data2D = np.load(data) # Set constants. DATASET_SIZE, DATA_DIM = data2D.shape LEARNINGRATE = 0.01 ITERATIONS = 750 Ks = range(1, 6) third = DATASET_SIZE / 3 val_data = data2D[:third] train_data = data2D[third:] for K in Ks: # Initialize tf graph. graph = tf.Graph() with graph.as_default(): # Training # Load data into tf. tf_data2D_train = tf.cast(tf.constant(train_data), tf.float32) # Initialize mu array. tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0)) tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM))) tf_sig_sq = tf.exp(tf_phi) tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM))) # tf_pi = tf.nn.softmax(tf_psi) # TODO: Use the utils function instead of the tf.nn.softmax tf_pi = tf.exp(utils.logsoftmax(tf_psi)) ed = tf_eucl_dist(tf_data2D_train, tf_mu) loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D_train,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1)) optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) # Validation # Load data into tf. tf_data2D_val = tf.cast(tf.constant(val_data), tf.float32) loss_v = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D_val,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1)) posterior = tf.exp(log_posterior(tf_data2D_val, tf_mu, tf_sig_sq, tf_pi, DATA_DIM)) cluster_hard_assignment = tf.argmax(posterior, 1) weight = tf.cast(tf.constant(np.linspace(0.0, 1.0, K)), tf.float32) cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1) # Run session. with tf.Session(graph=graph) as session: losses = np.zeros(ITERATIONS, dtype=np.float32) tf.initialize_all_variables().run() for i in range(ITERATIONS): mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior]) _, l, l_v, m = session.run([optimizer, loss, loss_v, tf_mu]) losses[i] = l if i % 10 == 0: print "Loss at iteration %d: " % (i), l_v print "Mu:" print mu print "Sigma:" print sig_sq print "Pi:" print pi print "Posterior:" print post print "Cluster hard assignment:" print ca red = [1, 0, 0] green = [0, 1, 0] blue = [0, 0, 1] cyan = [0, 1, 1] yellow = [1, 1, 0] colours = [red, green, blue, cyan, yellow] colour_list = [colours[ca[i]] for i in range(ca.shape[0])] # print colour_list # Plot data points labelled by the closest mean plt.figure() plt.scatter(val_data[:,0], val_data[:,1], c=colour_list, marker='.') # Plot mean plt.scatter(m[:,0], m[:,1], marker='h', s=200) plt.show() print m # Plot soft assignment scatterplots print "Cluster soft assignment:" print ca_soft plt.figure() plt.scatter(val_data[:,0], val_data[:,1], c=ca_soft, marker='.') plt.scatter(m[:,0], m[:,1], marker='h', s=200) plt.title("Soft Assignment to Gaussian Cluster") plt.show() return
def mog_k3(): # Load the data data2D = np.load("data2D.npy") # Set constants. K = 3 DATASET_SIZE, DATA_DIM = data2D.shape LEARNINGRATE = 0.01 ITERATIONS = 750 # Initialize tf graph. graph = tf.Graph() with graph.as_default(): # Load data into tf. tf_data2D = tf.cast(tf.constant(data2D), tf.float32) # Initialize mu array. tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0)) tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM))) tf_sig_sq = tf.exp(tf_phi) tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM))) tf_pi = tf.exp(utils.logsoftmax(tf_psi)) ed = tf_eucl_dist(tf_data2D, tf_mu) loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1)) posterior = tf.exp(log_posterior(tf_data2D, tf_mu, tf_sig_sq, tf_pi, DATA_DIM)) cluster_hard_assignment = tf.argmax(posterior, 1) weight = tf.constant([[0, 0.5, 1.0]]) # TODO: Replace this with linspace as func of K cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1) optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss) # Run session. with tf.Session(graph=graph) as session: losses = np.zeros(ITERATIONS, dtype=np.float32) tf.initialize_all_variables().run() for i in range(ITERATIONS): mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior]) _, l, m = session.run([optimizer, loss, tf_mu]) losses[i] = l if i % 100 == 0: print "Loss at iteration %d: " % (i), l print "Mu:" print mu print "Sigma:" print sig_sq print "Pi:" print pi print "Posterior:" print post print "Cluster hard assignment:" print ca red = [1, 0, 0] green = [0, 1, 0] blue = [0, 0, 1] colours = [red, green, blue] colour_list = [colours[ca[i]] for i in range(DATASET_SIZE)] # Plot data points labelled by the closest mean plt.scatter(data2D[:,0], data2D[:,1], c=colour_list, marker='.') # Plot mean plt.scatter(m[:,0], m[:,1], marker='h') plt.show() print m # Plot soft assignment scatterplots # TODO: May be redo it so that C = C1*P(z=1|x) + C2*P(z=1|x) + C3*P(z=1|x) # Where C1 = Red, C2 = Green, C3 = Blue. Right now using colourmap 'viridis' print "Cluster soft assignment:" print ca_soft plt.figure() plt.scatter(data2D[:,0], data2D[:,1], c=ca_soft, cmap='viridis', marker='.') plt.scatter(m[:,0], m[:,1], marker='h') plt.title("Soft Assignment to Gaussian Cluster") # TODO: Add plot title, axis labels plt.show() return
def initialize_pi(shape): temp = tf.ones(shape) log = logsoftmax(temp) return tf.exp(log)