Exemplo n.º 1
0
def build_graph(B, K, learning_rate, D):

    #Parameters
    x = tf.placeholder(tf.float32, [None, 1, D], name="x")  # input data
    mu_T = tf.Variable(tf.random_normal([1, K, D], mean=0, stddev=0.001),
                       [1, K, D],
                       name="mu_T")  # mu tranpose
    phi = tf.Variable(tf.random_normal([1, K], mean=-1, stddev=0), [1, K],
                      name="phi")
    psi = tf.Variable(tf.random_normal([1, K], mean=10, stddev=0), [1, K],
                      name="psi")
    var = tf.exp(phi)  #variance [0, inf)
    log_pi = logsoftmax(psi)  # sum(log_pi) = 1

    #Q2.1.2
    log_prob_x_given_z = compute_log_prob_x_given_z(x, mu_T, var, D)
    #Q2.1.3
    log_prob_z_given_x = compute_log_prob_z_given_x(log_prob_x_given_z, log_pi)

    argmaxs = tf.argmax(log_prob_z_given_x, 1)
    mu = tf.reduce_sum(mu_T, 0)
    #loss
    #[1,1] <------------------- [B,1]
    total_loss = -tf.reduce_mean(
        reduce_logsumexp(tf.add(log_prob_x_given_z, log_pi), keep_dims=True))

    # Adam Optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                       beta1=0.9,
                                       beta2=0.99,
                                       epsilon=1e-5)
    train = optimizer.minimize(loss=total_loss)

    pi = tf.exp(log_pi)
    return x, mu_T, train, mu, total_loss, argmaxs, log_prob_x_given_z, pi, var
Exemplo n.º 2
0
def buildGraph(k, dimension, EXP=1e-5):

    tf.set_random_seed(time())

    pz = tf.Variable(tf.zeros([1, k]))
    logpz = logsoftmax(pz)  # Enforce simplex constraint over P(z)

    sigma = tf.Variable(tf.ones([k, 1]) * (-3))
    expsigma = tf.exp(sigma)  # Enforce sigma > 0
    print expsigma

    mu = tf.Variable(tf.random_normal([k, dimension], mean=0, stddev=0.01),
                     dtype=tf.float32)
    x = tf.placeholder(tf.float32, [None, dimension])

    cost = -marginal_log_likelihood(x, logpz, mu, expsigma)
    iter_var = tf.Variable(0)
    optimizer = tf.train.AdamOptimizer(0.03,
                                       beta1=0.9,
                                       beta2=0.99,
                                       epsilon=1e-5)
    train = optimizer.minimize(cost, global_step=iter_var)

    return x, mu, cost, expsigma, logpz, train

data2D = np.float32(np.load('data2D.npy'))
data = (data2D - data2D.mean()) / data2D.std()
#data = np.float32(np.load('data2D.npy'))
k = 5

num_sample = data.shape[0]
dim = data.shape[1]

tf_mean = tf.Variable(tf.random_normal([k, dim], mean=0.0, stddev=1.0, dtype=tf.float32))
#tf_mean = tf.Variable(tf.random_uniform([k, dim], minval=-3, maxval=3, dtype=tf.float32))
tf_covariance = tf.Variable(0.5 * tf.exp(tf.random_normal([k], mean=0.0, stddev=1.0, dtype=tf.float32)))
#phi = tf.Variable(tf.random_normal([1, k], mean=0.0, stddev=1.0, dtype=tf.float32))
phi = tf.Variable(tf.truncated_normal([1, k], mean=0.0, stddev=1.0, dtype=tf.float32))
log_pi = utils.logsoftmax(phi)

#tf_data = tf.Variable(data)
tf_data = tf.placeholder(tf.float32, shape=(num_sample, dim))

tf_expanded_data = tf.expand_dims(tf_data, 0)
tf_expanded_mean = tf.expand_dims(tf_mean, 1)

tf_sub = tf.sub(tf_expanded_data, tf_expanded_mean)
tf_sub_square = tf.square(tf_sub)
tf_sub_square_sum = tf.reduce_sum(tf_sub_square, 2, True)
tf_sub_square_sum_02 = tf.squeeze(tf.transpose(tf_sub_square_sum))
tf_index = (-0.5) * tf.div(tf_sub_square_sum_02, tf_covariance)
tf_log_second_term = tf_index
tf_log_first_term = (-0.5 * dim) * tf.log(2 * math.pi * tf_covariance)
Exemplo n.º 4
0
def train_mog(data, k, EXP=1e-5):
    '''
        Trains a single mixture of gaussian model for a zero-mean,
        unit variance normalized data.
    Args:
        x: Data (numpy array)
        k: Number of clusters

    Returns:
        Optimal cluster parameters
    '''
    data_len = len(data)
    assert (data_len > 0), "Dataset is empty"
    data_d = len(data[0])
    tf.set_random_seed(time())

    pz = tf.Variable(tf.zeros([1, k]))
    logpz = logsoftmax(pz)  # Enforce simplex constraint over P(z)

    sigma = tf.Variable(tf.ones([k, 1]) * (-3))
    expsigma = tf.exp(sigma)  # Enforce sigma > 0

    mu = tf.Variable(tf.random_normal([k, data_d], mean=0, stddev=0.01))
    x = tf.placeholder(tf.float32, [None, len(data[0])])

    cost = -marginal_log_likelihood(x, logpz, mu, expsigma)

    iter_var = tf.Variable(0)
    optimizer = tf.train.AdamOptimizer(0.03,
                                       beta1=0.9,
                                       beta2=0.99,
                                       epsilon=1e-5)
    train = optimizer.minimize(cost, global_step=iter_var)

    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    with sess.as_default():
        costs = []
        best_cost = float('inf')
        last_cost = float('inf')
        print "------------------"
        print "P(x):  ", tf.exp(logpz).eval()
        print "Sigma: ", expsigma.eval().reshape((1, k))
        print "Mu:    ", mu.eval()
        print "------------------"
        try:
            while True:
                iter_cost = sess.run([cost, train], feed_dict={x: data})[0]

                iter = iter_var.eval()
                costs.append(iter_cost)
                if iter % 100 == 0:
                    print "Iteration:", iter
                    print "Log Likelihood:", -iter_cost

                if iter_cost < best_cost:
                    best_cost = iter_cost
                    clusters = [logpz.eval(), mu.eval(), expsigma.eval()]

                if iter > 5000 or abs(iter_cost - last_cost) < EXP:
                    print "Converged!"
                    break
                else:
                    last_cost = iter_cost

        except KeyboardInterrupt:
            if len(clusters) == 0:
                clusters = [logpz.eval(), mu.eval(), expsigma.eval()]

    return clusters, costs
Exemplo n.º 5
0
Arquivo: plots.py Projeto: pbloem/blog
ITFP = 1.1
ITFS = 20

for v, var in tqdm.tqdm(enumerate(VARS)):
    for r in range(REPS):

        indices, values = sample(6000, size, var=var)
        dns = dense(indices, values, size)

        gold = undense(torch.softmax(dns, dim=1), indices, size)

        naive = softmax_naive(indices, values, size)
        pnorm = utils.logsoftmax(indices,
                                 values,
                                 size,
                                 max_method='pnorm',
                                 p=PNP)
        pnorf = utils.logsoftmax(indices,
                                 values,
                                 size,
                                 max_method='pnorm',
                                 p=PNFP)
        iters = utils.logsoftmax(indices,
                                 values,
                                 size,
                                 max_method='iteration',
                                 p=ITP,
                                 its=ITS)
        iterf = utils.logsoftmax(indices,
                                 values,
Exemplo n.º 6
0
 def _cost(self, weights, data, labels):
     return -np.sum(logsoftmax(self._process_layers(weights, data)) * labels) / self.batch_size
Exemplo n.º 7
0
def train_mog(data, k, EXP=1e-5):
    '''
        Trains a single mixture of gaussian model for a zero-mean,
        unit variance normalized data.
    Args:
        x: Data (numpy array)
        k: Number of clusters

    Returns:
        Optimal cluster parameters
    '''
    data_len = len(data)
    assert (data_len > 0), "Dataset is empty"
    data_d = len(data[0])
    tf.set_random_seed(time())

    pz = tf.Variable(tf.zeros([1,k]))
    logpz = logsoftmax(pz) # Enforce simplex constraint over P(z)

    sigma = tf.Variable(tf.ones([k, 1])*(-3))
    expsigma = tf.exp(sigma) # Enforce sigma > 0

    mu = tf.Variable(tf.random_normal([k, data_d], mean=0, stddev=0.01))
    x = tf.placeholder(tf.float32, [None, len(data[0])])

    cost = -marginal_log_likelihood(x, logpz, mu, expsigma)

    iter_var = tf.Variable(0)
    optimizer = tf.train.AdamOptimizer(0.03, beta1=0.9, beta2=0.99, epsilon=1e-5)
    train = optimizer.minimize(cost, global_step=iter_var)

    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    with sess.as_default():
        costs = []
        best_cost = float('inf')
        last_cost = float('inf')
        print "------------------"
        print "P(x):  ", tf.exp(logpz).eval()
        print "Sigma: ", expsigma.eval().reshape((1,k))
        print "Mu:    ", mu.eval()
        print "------------------"
        try:
            while True:
                iter_cost = sess.run([cost, train], feed_dict={x: data})[0]

                iter = iter_var.eval()
                costs.append(iter_cost)
                if iter % 100 == 0:
                    print "Iteration:", iter
                    print "Log Likelihood:", -iter_cost

                if iter_cost < best_cost:
                    best_cost = iter_cost
                    clusters = [logpz.eval(), mu.eval(), expsigma.eval()]

                if iter > 5000 or abs(iter_cost - last_cost) < EXP:
                    print "Converged!"
                    break
                else:
                    last_cost = iter_cost

        except KeyboardInterrupt:
            if len(clusters) == 0:
                clusters = [logpz.eval(), mu.eval(), expsigma.eval()]

    return clusters, costs
Exemplo n.º 8
0
 def _cost(self, weights, data, labels):
     return -np.sum(
         logsoftmax(self._process_layers(weights, data)) *
         labels) / self.batch_size
Exemplo n.º 9
0
def mog():
    # Load the data
    with np.load('mog_purchases.npz') as datafile:
        data = datafile[datafile.keys()[0]]

    # Set constants.
    K = 3
    DATASET_SIZE, DATA_DIM  = data.shape
    LEARNINGRATE = 0.05
    ITERATIONS = 10000
    
    # Initialize tf graph.
    graph = tf.Graph()
    with graph.as_default():
        # Load data into tf.
        tf_data = tf.cast(tf.constant(data), tf.float32)
        
        # Initialize mu array.
        tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0))
        tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM)))
        tf_sig_sq = tf.exp(tf_phi)
        tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM)))
        tf_pi = tf.exp(utils.logsoftmax(tf_psi))
    
        ed = tf_eucl_dist(tf_data, tf_mu)
        loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1))
        posterior = tf.exp(log_posterior(tf_data, tf_mu, tf_sig_sq, tf_pi, DATA_DIM))
        cluster_hard_assignment = tf.argmax(posterior, 1)
        weight = tf.constant([[0, 0.5, 1.0]]) # TODO: Replace this with linspace as func of K
        cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1)
        optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss)

    # Run session.
    with tf.Session(graph=graph) as session:
        
        losses = np.zeros(ITERATIONS, dtype=np.float32)
        tf.initialize_all_variables().run()
        #pdb.set_trace()

        for i in range(ITERATIONS):
            mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior])
            _, l, m = session.run([optimizer, loss, tf_mu])
            #l = session.run([loss])
            #m = session.run([tf_mu])
            #losses[i] = l
            if i % 100 == 0:
                print "Loss at iteration %d: " % (i), l 
            
        print "Mu:"
        print mu
        print "Sigma:"
        print sig_sq
        print "Pi:"
        print pi
        print "Posterior:"
        print post
        print "Cluster hard assignment:"
        print ca
        red = [1, 0, 0]
        green = [0, 1, 0]
        blue = [0, 0, 1]
        colours = [red, green, blue]
        colour_list = [colours[ca[i]] for i in range(DATASET_SIZE)]
        
        # Plot data points labelled by the closest mean  
        plt.scatter(data[:,0], data[:,1], c=colour_list, marker='.')
        # Plot mean
        plt.scatter(m[:,0], m[:,1], marker='h')
        plt.savefig("purchase_kmeans.png")
        #plt.show()
        print m
        
        down_dim = 2
        mu_dim, top_ind = mog_dim_down(m, sig_sq, down_dim)
        #pdb.set_trace()
        2d_data = np.concatenate((data[:, top_ind[0]][:, None], data[:, top_ind[1]][:, None]), axis=1)
        2d_mu = np.concatenate((m[:,top_ind[0]][:, None], m[:,top_ind[1]][:, None]), axis=1)
        2d_dicts = {'2d_data': 2d_data,
                'mu': 2d_mu}

        np.savez_compressed('purchases_2d',
                2d_data)
        np.savez_compressed('mu_2d',
                2d_mu)
        # Plot soft assignment scatterplots
        # TODO: May be redo it so that C = C1*P(z=1|x) + C2*P(z=1|x) + C3*P(z=1|x)
        # Where C1 = Red, C2 = Green, C3 = Blue. Right now using colourmap 'viridis'
        print "Cluster soft assignment:"
        print ca_soft
        print "Top dimensions: %d %d" % (top_ind[0], top_ind[1])
        plt.figure()
        plt.scatter(data[:,top_ind[0]], data[:,top_ind[1]], c=ca_soft, cmap='jet', marker='.')
        plt.scatter(m[:,top_ind[0]], m[:,top_ind[1]], marker='h')
        plt.title("Soft Assignment to Gaussian Cluster")
        # TODO: Add plot title, axis labels
        plt.savefig("purchase_mog.png")

        #plt.show()
    
    return mu, sig_sq
Exemplo n.º 10
0
def main2_2_3(data):
	# Hold out 1/3 of the data for validation and for each value of K = 1; 2; 3; 4; 5, train a MoG
	# model. For each K, compute and report the loss function for the validation data and explain
	# which value of K is best. Include a 2D scatter plot of data points colored by their cluster
	# assignments.

    # Load the data
    data2D = np.load(data)

    # Set constants.
    DATASET_SIZE, DATA_DIM  = data2D.shape
    LEARNINGRATE = 0.01
    ITERATIONS = 750
    
    Ks = range(1, 6)
    
    third = DATASET_SIZE / 3
    val_data = data2D[:third]
    train_data = data2D[third:]
    
    for K in Ks:
        # Initialize tf graph.
        graph = tf.Graph()
        with graph.as_default():
            # Training
            # Load data into tf.
            tf_data2D_train = tf.cast(tf.constant(train_data), tf.float32)

            # Initialize mu array.
            tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0))
            tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM)))
            tf_sig_sq = tf.exp(tf_phi)
            tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM)))
    #         tf_pi = tf.nn.softmax(tf_psi) # TODO: Use the utils function instead of the tf.nn.softmax
            tf_pi = tf.exp(utils.logsoftmax(tf_psi))

            ed = tf_eucl_dist(tf_data2D_train, tf_mu)
            loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D_train,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1))
            
            optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss)
            
            # Validation
            # Load data into tf.
            tf_data2D_val = tf.cast(tf.constant(val_data), tf.float32)
            loss_v = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D_val,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1))
            posterior = tf.exp(log_posterior(tf_data2D_val, tf_mu, tf_sig_sq, tf_pi, DATA_DIM))
            cluster_hard_assignment = tf.argmax(posterior, 1)
            weight = tf.cast(tf.constant(np.linspace(0.0, 1.0, K)), tf.float32)
            cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1)
            
        # Run session.
        with tf.Session(graph=graph) as session:

            losses = np.zeros(ITERATIONS, dtype=np.float32)
            tf.initialize_all_variables().run()

            for i in range(ITERATIONS):
                mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior])
                _, l, l_v, m = session.run([optimizer, loss, loss_v, tf_mu])
                losses[i] = l
                if i % 10 == 0:
                    print "Loss at iteration %d: " % (i), l_v 

            print "Mu:"
            print mu
            print "Sigma:"
            print sig_sq
            print "Pi:"
            print pi
            print "Posterior:"
            print post
            print "Cluster hard assignment:"
            print ca
            
            red = [1, 0, 0]
            green = [0, 1, 0]
            blue = [0, 0, 1]
            cyan = [0, 1, 1]
            yellow = [1, 1, 0]
            colours = [red, green, blue, cyan, yellow]
            
            colour_list = [colours[ca[i]] for i in range(ca.shape[0])]
    #         print colour_list

            # Plot data points labelled by the closest mean  
            plt.figure()
            plt.scatter(val_data[:,0], val_data[:,1], c=colour_list, marker='.')
            # Plot mean
            plt.scatter(m[:,0], m[:,1], marker='h', s=200)
            plt.show()
            print m

            # Plot soft assignment scatterplots
            print "Cluster soft assignment:"
            print ca_soft
            plt.figure()
            plt.scatter(val_data[:,0], val_data[:,1], c=ca_soft, marker='.')
            plt.scatter(m[:,0], m[:,1], marker='h', s=200)
            plt.title("Soft Assignment to Gaussian Cluster")
            plt.show()

    return
Exemplo n.º 11
0
def mog_k3():
    # Load the data
    data2D = np.load("data2D.npy")

    # Set constants.
    K = 3
    DATASET_SIZE, DATA_DIM  = data2D.shape
    LEARNINGRATE = 0.01
    ITERATIONS = 750
    
    # Initialize tf graph.
    graph = tf.Graph()
    with graph.as_default():
        # Load data into tf.
        tf_data2D = tf.cast(tf.constant(data2D), tf.float32)
        
        # Initialize mu array.
        tf_mu = tf.Variable(tf.truncated_normal([K, DATA_DIM], dtype=tf.float32, stddev=1.0))
        tf_phi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0, stddev=1.0/np.sqrt(DATA_DIM)))
        tf_sig_sq = tf.exp(tf_phi)
        tf_psi = tf.Variable(tf.truncated_normal([1, K], dtype=tf.float32, mean=1.0,stddev=1.0/np.sqrt(DATA_DIM)))
        tf_pi = tf.exp(utils.logsoftmax(tf_psi))
    
        ed = tf_eucl_dist(tf_data2D, tf_mu)
        loss = -tf.reduce_sum(utils.reduce_logsumexp(tf_log_pdf_clust(tf_data2D,tf_mu,tf_sig_sq, DATA_DIM)+tf.log(tf_pi), reduction_indices=1))
        posterior = tf.exp(log_posterior(tf_data2D, tf_mu, tf_sig_sq, tf_pi, DATA_DIM))
        cluster_hard_assignment = tf.argmax(posterior, 1)
        weight = tf.constant([[0, 0.5, 1.0]]) # TODO: Replace this with linspace as func of K
        cluster_soft_assignment = tf.reduce_sum(tf.mul(weight, posterior), reduction_indices=1)
        optimizer = tf.train.AdamOptimizer(LEARNINGRATE, beta1=0.9, beta2=0.99, epsilon=1e-5).minimize(loss)

    # Run session.
    with tf.Session(graph=graph) as session:
        
        losses = np.zeros(ITERATIONS, dtype=np.float32)
        tf.initialize_all_variables().run()

        for i in range(ITERATIONS):
            mu, sig_sq, psi, pi, ca, ca_soft, post = session.run([tf_mu, tf_sig_sq, tf_psi, tf_pi, cluster_hard_assignment, cluster_soft_assignment, posterior])
            _, l, m = session.run([optimizer, loss, tf_mu])
            losses[i] = l
            if i % 100 == 0:
                print "Loss at iteration %d: " % (i), l 
            
        print "Mu:"
        print mu
        print "Sigma:"
        print sig_sq
        print "Pi:"
        print pi
        print "Posterior:"
        print post
        print "Cluster hard assignment:"
        print ca
        red = [1, 0, 0]
        green = [0, 1, 0]
        blue = [0, 0, 1]
        colours = [red, green, blue]
        colour_list = [colours[ca[i]] for i in range(DATASET_SIZE)]
        
        # Plot data points labelled by the closest mean  
        plt.scatter(data2D[:,0], data2D[:,1], c=colour_list, marker='.')
        # Plot mean
        plt.scatter(m[:,0], m[:,1], marker='h')
        plt.show()
        print m
        
        # Plot soft assignment scatterplots
        # TODO: May be redo it so that C = C1*P(z=1|x) + C2*P(z=1|x) + C3*P(z=1|x)
        # Where C1 = Red, C2 = Green, C3 = Blue. Right now using colourmap 'viridis'
        print "Cluster soft assignment:"
        print ca_soft
        plt.figure()
        plt.scatter(data2D[:,0], data2D[:,1], c=ca_soft, cmap='viridis', marker='.')
        plt.scatter(m[:,0], m[:,1], marker='h')
        plt.title("Soft Assignment to Gaussian Cluster")
        # TODO: Add plot title, axis labels
        plt.show()
    
    return
Exemplo n.º 12
0
def initialize_pi(shape):
    temp = tf.ones(shape)
    log = logsoftmax(temp)
    return tf.exp(log)