def mmsb(N, K, data): # sparsity rho = 0.3 # MODEL # probability of belonging to each of K blocks for each node gamma = Dirichlet(concentration=tf.ones([K])) # block connectivity Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K])) # probability of belonging to each of K blocks for all nodes Z = Multinomial(total_count=1.0, probs=gamma, sample_shape=N) # adjacency X = Bernoulli(probs=(1 - rho) * tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z)))) # INFERENCE (EM algorithm) qgamma = PointMass( params=tf.nn.softmax(tf.Variable(tf.random_normal([K])))) qPi = PointMass( params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K])))) qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K])))) #qgamma = Normal(loc=tf.get_variable("qgamma/loc", [K]), # scale=tf.nn.softplus( # tf.get_variable("qgamma/scale", [K]))) #qPi = Normal(loc=tf.get_variable("qPi/loc", [K, K]), # scale=tf.nn.softplus( # tf.get_variable("qPi/scale", [K, K]))) #qZ = Normal(loc=tf.get_variable("qZ/loc", [N, K]), # scale=tf.nn.softplus( # tf.get_variable("qZ/scale", [N, K]))) #inference = ed.KLqp({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data}) inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data}) #inference.run() n_iter = 6000 inference.initialize(optimizer=tf.train.AdamOptimizer(learning_rate=0.01), n_iter=n_iter) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) inference.finalize() print('qgamma after: ', qgamma.mean().eval()) return qZ.mean().eval(), qPi.eval()
def run(self, adj_mat, n_iter=1000): assert adj_mat.shape[0] == adj_mat.shape[1] n_node = adj_mat.shape[0] # model gamma = Dirichlet(concentration=tf.ones([self.n_cluster])) Pi = Beta(concentration0=tf.ones([self.n_cluster, self.n_cluster]), concentration1=tf.ones([self.n_cluster, self.n_cluster])) Z = Multinomial(total_count=1., probs=gamma, sample_shape=n_node) X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z)))) # inference (point estimation) qgamma = PointMass(params=tf.nn.softmax( tf.Variable(tf.random_normal([self.n_cluster])))) qPi = PointMass(params=tf.nn.sigmoid( tf.Variable(tf.random_normal([self.n_cluster, self.n_cluster])))) qZ = PointMass(params=tf.nn.softmax( tf.Variable(tf.random_normal([n_node, self.n_cluster])))) # map estimation inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: adj_mat}) inference.initialize(n_iter=n_iter) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) inference.finalize() return qZ.mean().eval().argmax(axis=1)
def mmsb(N, K, data): # sparsity rho = 0.3 # MODEL # probability of belonging to each of K blocks for each node gamma = Dirichlet(concentration=tf.ones([K])) # block connectivity Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K])) # probability of belonging to each of K blocks for all nodes Z = Multinomial(total_count=1.0, probs=gamma, sample_shape=N) # adjacency X = Bernoulli(probs=(1 - rho) * tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z)))) # INFERENCE (EM algorithm) qgamma = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([K])))) qPi = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K])))) qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K])))) #qgamma = Normal(loc=tf.get_variable("qgamma/loc", [K]), # scale=tf.nn.softplus( # tf.get_variable("qgamma/scale", [K]))) #qPi = Normal(loc=tf.get_variable("qPi/loc", [K, K]), # scale=tf.nn.softplus( # tf.get_variable("qPi/scale", [K, K]))) #qZ = Normal(loc=tf.get_variable("qZ/loc", [N, K]), # scale=tf.nn.softplus( # tf.get_variable("qZ/scale", [N, K]))) #inference = ed.KLqp({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data}) inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data}) #inference.run() n_iter = 6000 inference.initialize(optimizer=tf.train.AdamOptimizer(learning_rate=0.01), n_iter=n_iter) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) inference.finalize() print('qgamma after: ', qgamma.mean().eval()) return qZ.mean().eval(), qPi.eval()
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) qmu = PointMass(params=tf.Variable(1.0)) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.MAP({mu: qmu}, data={x: x_data}) inference.run(n_iter=1000) self.assertAllClose(qmu.mean().eval(), 0)
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(mu=0.0, sigma=1.0) x = Normal(mu=tf.ones(50) * mu, sigma=1.0) qmu = PointMass(params=tf.Variable(1.0)) # analytic solution: N(mu=0.0, sigma=\sqrt{1/51}=0.140) inference = ed.MAP({mu: qmu}, data={x: x_data}) inference.run(n_iter=1000) self.assertAllClose(qmu.mean().eval(), 0)
def main(_): ed.set_seed(42) # DATA X_data, Z_true = karate("~/data") N = X_data.shape[0] # number of vertices K = 2 # number of clusters # MODEL gamma = Dirichlet(concentration=tf.ones([K])) Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K])) Z = Multinomial(total_count=1.0, probs=gamma, sample_shape=N) X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z)))) # INFERENCE (EM algorithm) qgamma = PointMass(tf.nn.softmax(tf.get_variable("qgamma/params", [K]))) qPi = PointMass(tf.nn.sigmoid(tf.get_variable("qPi/params", [K, K]))) qZ = PointMass(tf.nn.softmax(tf.get_variable("qZ/params", [N, K]))) inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: X_data}) inference.initialize(n_iter=250) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) # CRITICISM Z_pred = qZ.mean().eval().argmax(axis=1) print("Result (label flip can happen):") print("Predicted") print(Z_pred) print("True") print(Z_true) print("Adjusted Rand Index =", adjusted_rand_score(Z_pred, Z_true))
def test_normalnormal_regularization(self): with self.test_session() as sess: x_data = np.array([5.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) qmu = PointMass(params=tf.Variable(1.0)) inference = ed.MAP({mu: qmu}, data={x: x_data}) inference.run(n_iter=1000) mu_val = qmu.mean().eval() # regularized solution regularizer = tf.contrib.layers.l2_regularizer(scale=1.0) mu_reg = tf.get_variable("mu_reg", shape=[], regularizer=regularizer) x_reg = Normal(loc=mu_reg, scale=1.0, sample_shape=50) inference_reg = ed.MAP(None, data={x_reg: x_data}) inference_reg.run(n_iter=1000) mu_reg_val = mu_reg.eval() self.assertAllClose(mu_val, mu_reg_val)
def bayes_mult_cmd(table_file, metadata_file, formula, output_file): #metadata = _type_cast_to_float(metadata.copy()) metadata = pd.read_table(metadata_file, index_col=0) G_data = dmatrix(formula, metadata, return_type='dataframe') table = load_table(table_file) # basic filtering parameters soil_filter = lambda val, id_, md: id_ in metadata.index read_filter = lambda val, id_, md: np.sum(val) > 10 #sparse_filter = lambda val, id_, md: np.mean(val > 0) > 0.1 sample_filter = lambda val, id_, md: np.sum(val) > 1000 table = table.filter(soil_filter, axis='sample') table = table.filter(sample_filter, axis='sample') table = table.filter(read_filter, axis='observation') #table = table.filter(sparse_filter, axis='observation') print(table.shape) y_data = pd.DataFrame(np.array(table.matrix_data.todense()).T, index=table.ids(axis='sample'), columns=table.ids(axis='observation')) y_data, G_data = y_data.align(G_data, axis=0, join='inner') psi = _gram_schmidt_basis(y_data.shape[1]) G_data = G_data.values y_data = y_data.values N, D = y_data.shape p = G_data.shape[1] # number of covariates r = G_data.shape[1] # rank of covariance matrix psi = tf.convert_to_tensor(psi, dtype=tf.float32) n = tf.convert_to_tensor(y_data.sum(axis=1), dtype=tf.float32) # hack to get multinomial working def _sample_n(self, n=1, seed=None): # define Python function which returns samples as a Numpy array def np_sample(p, n): return multinomial.rvs(p=p, n=n, random_state=seed).astype(np.float32) # wrap python function as tensorflow op val = tf.py_func(np_sample, [self.probs, n], [tf.float32])[0] # set shape from unknown shape batch_event_shape = self.batch_shape.concatenate(self.event_shape) shape = tf.concat( [tf.expand_dims(n, 0), tf.convert_to_tensor(batch_event_shape)], 0) val = tf.reshape(val, shape) return val Multinomial._sample_n = _sample_n # dummy variable for gradient G = tf.placeholder(tf.float32, [N, p]) b = Exponential(rate=1.0) B = Normal(loc=tf.zeros([p, D-1]), scale=tf.ones([p, D-1]) ) # Factorization of covariance matrix # http://edwardlib.org/tutorials/klqp l = Exponential(rate=1.0) L = Normal(loc=tf.zeros([p, D-1]), scale=tf.ones([p, D-1]) ) z = Normal(loc=tf.zeros([N, p]), scale=tf.ones([N, p])) # Cholesky trick to get multivariate normal v = tf.matmul(G, B) + tf.matmul(z, L) # get clr transformed values eta = tf.matmul(v, psi) Y = Multinomial(total_count=n, logits=eta) T = 100000 # the number of mixin samples from MCMC sampling qb = PointMass(params=tf.Variable(tf.random_normal([]))) qB = PointMass(params=tf.Variable(tf.random_normal([p, D-1]))) qz = Empirical(params=tf.Variable(tf.random_normal([T, N, p]))) ql = PointMass(params=tf.Variable(tf.random_normal([]))) qL = PointMass(params=tf.Variable(tf.random_normal([p, D-1]))) # Imputation inference_z = ed.SGLD( {z: qz}, data={G: G_data, Y: y_data, B: qB, L: qL} ) # Maximization inference_BL = ed.MAP( {B: qB, L: qL, b: qb, l: ql}, data={G: G_data, Y: y_data, z: qz} ) inference_z.initialize(step_size=1e-10) inference_BL.initialize(n_iter=1000) sess = ed.get_session() saver = tf.train.Saver() tf.global_variables_initializer().run() for i in range(inference_BL.n_iter): inference_z.update() # e-step # will need to compute the expectation of z info_dict = inference_BL.update() # m-step inference_BL.print_progress(info_dict) save_path = saver.save(sess, output_file) print("Model saved in file: %s" % save_path) pickle.dump({'qB': sess.run(qB.mean()), 'qL': sess.run(qL.mean()), 'qz': sess.run(qz.mean())}, open(output_file + '.params.pickle', 'wb') )
gamma = Dirichlet(concentration=tf.ones([K])) Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K])) Z = Multinomial(total_count=1., probs=gamma, sample_shape=N) X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z)))) # INFERENCE (EM algorithm) qgamma = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([K])))) qPi = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K])))) qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K])))) inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: X_data}) n_iter = 100 inference.initialize(n_iter=n_iter) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) inference.finalize() # CRITICISM Z_pred = qZ.mean().eval().argmax(axis=1) print("Result (label filp can happen):") print("Predicted") print(Z_pred) print("True") print(Z_true) print("Adjusted Rand Index =", adjusted_rand_score(Z_pred, Z_true))
Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K])) Z = Multinomial(total_count=1., probs=gamma, sample_shape=N) X = Bernoulli(probs=tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z)))) # INFERENCE (EM algorithm) qgamma = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([K])))) qPi = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K])))) qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K])))) inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: X_data}) n_iter = 100 inference.initialize(n_iter=n_iter) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) inference.finalize() # CRITICISM Z_pred = qZ.mean().eval().argmax(axis=1) print("Result (label filp can happen):") print("Predicted") print(Z_pred) print("True") print(Z_true) print("Adjusted Rand Index =", adjusted_rand_score(Z_pred, Z_true))
t = Multinomial(total_count=1., probs=pi, sample_shape=N) alpha = Beta(concentration0=tf.ones([K, 2]), concentration1=tf.ones([K, 2])) X = Bernoulli(probs=tf.matmul(alpha, tf.transpose(t))) # inference qpi = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([2])))) qt = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, 2])))) qalpha = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, 2])))) inference = ed.MAP({pi: qpi, t: qt, alpha: qalpha}, data={X: x_data}) inference.run(n_iter=5000) # criticism t_pred = qt.mean().eval().argmax(axis=1) accuracy = (N - np.count_nonzero(t_pred - t_true)) / N t_prob = qt.mean().eval()[:, 1] auc = roc_auc_score(t_true, t_prob) ## label flip may occur if auc < 0.5: t_pred = 1 - t_pred accuracy = 1. - accuracy auc = 1. - auc print('t_pred') print(t_pred) print('t_true') print(t_true)
inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: X_data}) n_iter = 250 inference.initialize(n_iter=n_iter) tf.global_variables_initializer().run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) inference.finalize() # CRITICISM Z_pred = qZ.mean().eval().argmax(axis=1) pi_pred = qPi.mean().eval(); #print("Actual"); #print(np.asarray(phi)); #print(membership_act); #print("predicted") #print(pi_pred); #print(qgamma.mean().eval()); X_pred = np.array(X.mean().eval() > 0.5, dtype=int); cnt = N*N; correct = np.sum(X_data == X_pred); plt.subplot(211);