def main(_): # data J = 8 data_y = np.array([28, 8, -3, 7, -1, 1, 18, 12]) data_sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18]) # model definition mu = Normal(0., 10.) logtau = Normal(5., 1.) theta_prime = Normal(tf.zeros(J), tf.ones(J)) sigma = tf.placeholder(tf.float32, J) y = Normal(mu + tf.exp(logtau) * theta_prime, sigma * tf.ones([J])) data = {y: data_y, sigma: data_sigma} # ed.KLqp inference with tf.variable_scope('q_logtau'): q_logtau = Normal(tf.get_variable('loc', []), tf.nn.softplus(tf.get_variable('scale', []))) with tf.variable_scope('q_mu'): q_mu = Normal(tf.get_variable('loc', []), tf.nn.softplus(tf.get_variable('scale', []))) with tf.variable_scope('q_theta_prime'): q_theta_prime = Normal(tf.get_variable('loc', [J]), tf.nn.softplus(tf.get_variable('scale', [J]))) inference = ed.KLqp({logtau: q_logtau, mu: q_mu, theta_prime: q_theta_prime}, data=data) inference.run(n_samples=15, n_iter=60000) print("==== ed.KLqp inference ====") print("E[mu] = %f" % (q_mu.mean().eval())) print("E[logtau] = %f" % (q_logtau.mean().eval())) print("E[theta_prime]=") print((q_theta_prime.mean().eval())) print("==== end ed.KLqp inference ====") print("") print("") # HMC inference S = 400000 burn = S // 2 hq_logtau = Empirical(tf.get_variable('hq_logtau', [S])) hq_mu = Empirical(tf.get_variable('hq_mu', [S])) hq_theta_prime = Empirical(tf.get_variable('hq_thetaprime', [S, J])) inference = ed.HMC({logtau: hq_logtau, mu: hq_mu, theta_prime: hq_theta_prime}, data=data) inference.run() print("==== ed.HMC inference ====") print("E[mu] = %f" % (hq_mu.params.eval()[burn:].mean())) print("E[logtau] = %f" % (hq_logtau.params.eval()[burn:].mean())) print("E[theta_prime]=") print(hq_theta_prime.params.eval()[burn:, ].mean(0)) print("==== end ed.HMC inference ====") print("") print("")
def main(_): ed.set_seed(42) N = 5000 # number of data points D = 10 # number of features # DATA w_true = np.random.randn(D) X_data = np.random.randn(N, D) p = expit(np.dot(X_data, w_true)) y_data = np.array([np.random.binomial(1, i) for i in p]) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) y = Bernoulli(logits=ed.dot(X, w)) # INFERENCE qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) inference = IWVI({w: qw}, data={X: X_data, y: y_data}) inference.run(K=5, n_iter=1000) # CRITICISM print("Mean squared error in true values to inferred posterior mean:") print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
def _test_normal_normal(self, Inference, default, *args, **kwargs): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) if not default: qmu_loc = tf.Variable(tf.random_normal([])) qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(loc=qmu_loc, scale=qmu_scale) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = Inference({mu: qmu}, data={x: x_data}) else: inference = Inference([mu], data={x: x_data}) qmu = inference.latent_vars[mu] inference.run(*args, **kwargs) self.assertAllClose(qmu.mean().eval(), 0, rtol=0.1, atol=0.6) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=0.15, atol=0.5) variables = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='optimizer') old_t, old_variables = sess.run([inference.t, variables]) self.assertEqual(old_t, inference.n_iter) sess.run(inference.reset) new_t, new_variables = sess.run([inference.t, variables]) self.assertEqual(new_t, 0) self.assertNotEqual(old_variables, new_variables)
def main(_): ed.set_seed(42) N = 5000 # number of data points D = 10 # number of features # DATA w_true = np.random.randn(D) X_data = np.random.randn(N, D) p = expit(np.dot(X_data, w_true)) y_data = np.array([np.random.binomial(1, i) for i in p]) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) y = Bernoulli(logits=ed.dot(X, w)) # INFERENCE qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) inference = IWVI({w: qw}, data={X: X_data, y: y_data}) inference.run(K=5, n_iter=1000) # CRITICISM print("Mean squared error in true values to inferred posterior mean:") print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
def _test_normal_normal(self, Inference, default, *args, **kwargs): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=mu, scale=1.0, sample_shape=50) if not default: qmu_loc = tf.Variable(tf.random_normal([])) qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(loc=qmu_loc, scale=qmu_scale) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = Inference({mu: qmu}, data={x: x_data}) else: inference = Inference([mu], data={x: x_data}) qmu = inference.latent_vars[mu] inference.run(*args, **kwargs) self.assertAllClose(qmu.mean().eval(), 0, rtol=0.15, atol=0.5) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=0.15, atol=0.5) variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='optimizer') old_t, old_variables = sess.run([inference.t, variables]) self.assertEqual(old_t, inference.n_iter) sess.run(inference.reset) new_t, new_variables = sess.run([inference.t, variables]) self.assertEqual(new_t, 0) self.assertNotEqual(old_variables, new_variables)
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=tf.ones(50) * mu, scale=1.0) qmu_loc = tf.Variable(tf.random_normal([])) qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(loc=qmu_loc, scale=qmu_scale) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) n_iter = 5000 inference = ed.KLqp({mu: qmu}, data={x: x_data}) inference.run(n_iter=n_iter) self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=1e-1, atol=1e-1) variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='optimizer') old_t, old_variables = sess.run([inference.t, variables]) self.assertEqual(old_t, n_iter) sess.run(inference.reset) new_t, new_variables = sess.run([inference.t, variables]) self.assertEqual(new_t, 0) self.assertNotEqual(old_variables, new_variables)
def main(_): ed.set_seed(142) # DATA x_train = build_toy_dataset(FLAGS.N, FLAGS.D, FLAGS.K) # MODEL w = Normal(loc=0.0, scale=10.0, sample_shape=[FLAGS.D, FLAGS.K]) z = Normal(loc=0.0, scale=1.0, sample_shape=[FLAGS.M, FLAGS.K]) x = Normal(loc=tf.matmul(w, z, transpose_b=True), scale=tf.ones([FLAGS.D, FLAGS.M])) # INFERENCE qw_variables = [tf.get_variable("qw/loc", [FLAGS.D, FLAGS.K]), tf.get_variable("qw/scale", [FLAGS.D, FLAGS.K])] qw = Normal(loc=qw_variables[0], scale=tf.nn.softplus(qw_variables[1])) qz_variables = [tf.get_variable("qz/loc", [FLAGS.N, FLAGS.K]), tf.get_variable("qz/scale", [FLAGS.N, FLAGS.K])] idx_ph = tf.placeholder(tf.int32, FLAGS.M) qz = Normal(loc=tf.gather(qz_variables[0], idx_ph), scale=tf.nn.softplus(tf.gather(qz_variables[1], idx_ph))) x_ph = tf.placeholder(tf.float32, [FLAGS.D, FLAGS.M]) inference_w = ed.KLqp({w: qw}, data={x: x_ph, z: qz}) inference_z = ed.KLqp({z: qz}, data={x: x_ph, w: qw}) scale_factor = float(FLAGS.N) / FLAGS.M inference_w.initialize(scale={x: scale_factor, z: scale_factor}, var_list=qz_variables, n_samples=5) inference_z.initialize(scale={x: scale_factor, z: scale_factor}, var_list=qw_variables, n_samples=5) sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference_w.n_iter): x_batch, idx_batch = next_batch(x_train, FLAGS.M) for _ in range(5): inference_z.update(feed_dict={x_ph: x_batch, idx_ph: idx_batch}) info_dict = inference_w.update(feed_dict={x_ph: x_batch, idx_ph: idx_batch}) inference_w.print_progress(info_dict) t = info_dict['t'] if t % 100 == 0: print("\nInferred principal axes:") print(sess.run(qw.mean()))
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=tf.ones(50) * mu, scale=1.0) qmu_loc = tf.Variable(tf.random_normal([])) qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(loc=qmu_loc, scale=qmu_scale) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.KLpq({mu: qmu}, data={x: x_data}) inference.run(n_samples=25, n_iter=100) self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=1e-1, atol=1e-1)
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(mu=0.0, sigma=1.0) x = Normal(mu=tf.ones(50) * mu, sigma=1.0) qmu_mu = tf.Variable(tf.random_normal([])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(mu=qmu_mu, sigma=qmu_sigma) # analytic solution: N(mu=0.0, sigma=\sqrt{1/51}=0.140) inference = ed.KLpq({mu: qmu}, data={x: x_data}) inference.run(n_samples=25, n_iter=100) self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1) self.assertAllClose(qmu.std().eval(), np.sqrt(1 / 51), rtol=1e-1, atol=1e-1)
def test_normal_run(self): def ratio_estimator(data, local_vars, global_vars): """Use the optimal ratio estimator, r(z) = log p(z). We add a TensorFlow variable as the algorithm assumes that the function has parameters to optimize.""" w = tf.get_variable("w", []) return z.log_prob(local_vars[z]) + w with self.test_session() as sess: z = Normal(loc=5.0, scale=1.0) qz = Normal(loc=tf.Variable(tf.random_normal([])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([])))) inference = ed.ImplicitKLqp({z: qz}, discriminator=ratio_estimator) inference.run(n_iter=200) self.assertAllClose(qz.mean().eval(), 5.0, atol=1.0)
def probabilistic_pca_example(): ed.set_seed(142) N = 5000 # Number of data points. D = 2 # Data dimensionality. K = 1 # Latent dimensionality. x_train = build_toy_dataset(N, D, K) plt.scatter(x_train[0, :], x_train[1, :], color='blue', alpha=0.1) plt.axis([-10, 10, -10, 10]) plt.title('Simulated data set') plt.show() #-------------------- # Model. w = Normal(loc=tf.zeros([D, K]), scale=2.0 * tf.ones([D, K])) z = Normal(loc=tf.zeros([N, K]), scale=tf.ones([N, K])) x = Normal(loc=tf.matmul(w, z, transpose_b=True), scale=tf.ones([D, N])) #-------------------- # Inference. qw = Normal(loc=tf.get_variable('qw/loc', [D, K]), scale=tf.nn.softplus(tf.get_variable('qw/scale', [D, K]))) qz = Normal(loc=tf.get_variable('qz/loc', [N, K]), scale=tf.nn.softplus(tf.get_variable('qz/scale', [N, K]))) inference = ed.KLqp({w: qw, z: qz}, data={x: x_train}) inference.run(n_iter=500, n_print=100, n_samples=10) #-------------------- # Criticism. sess = ed.get_session() print('Inferred principal axes:') print(sess.run(qw.mean())) # Build and then generate data from the posterior predictive distribution. x_post = ed.copy(x, {w: qw, z: qz}) x_gen = sess.run(x_post) plt.scatter(x_gen[0, :], x_gen[1, :], color='red', alpha=0.1) plt.axis([-10, 10, -10, 10]) plt.title('Data generated from model') plt.show()
def test_normal_run(self): def ratio_estimator(data, local_vars, global_vars): """Use the optimal ratio estimator, r(z) = log p(z). We add a TensorFlow variable as the algorithm assumes that the function has parameters to optimize.""" w = tf.get_variable("w", []) return z.log_prob(local_vars[z]) + w with self.test_session() as sess: z = Normal(loc=5.0, scale=1.0) qz = Normal(loc=tf.Variable(tf.random_normal([])), scale=tf.nn.softplus(tf.Variable(tf.random_normal( [])))) inference = ed.ImplicitKLqp({z: qz}, discriminator=ratio_estimator) inference.run(n_iter=200) self.assertAllClose(qz.mean().eval(), 5.0, atol=1.0)
def test_normalnormal_run(self): with self.test_session() as sess: x_data = np.array([0.0] * 50, dtype=np.float32) mu = Normal(loc=0.0, scale=1.0) x = Normal(loc=tf.ones(50) * mu, scale=1.0) qmu_loc = tf.Variable(tf.random_normal([])) qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qmu = Normal(loc=qmu_loc, scale=qmu_scale) # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140) inference = ed.KLqp({mu: qmu}, data={x: x_data}) inference.run(n_iter=5000) self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1) self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51), rtol=1e-1, atol=1e-1)
D])))) inference = ed.KLqp({mu: qmu, sigma: qsigma}, data={x: x_train}) inference.initialize(n_samples=20, n_iter=4000) sess = ed.get_session() init = tf.global_variables_initializer() init.run() for _ in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) t = info_dict['t'] if t % inference.n_print == 0: print("Inferred cluster means:") print(sess.run(qmu.mean())) # Calculate likelihood for each data point and cluster assignment, # averaged over many posterior samples. ``x_post`` has shape (N, 100, K, D). mu_sample = qmu.sample(100) sigma_sample = qsigma.sample(100) x_post = Normal(mu=tf.ones([N, 1, 1, 1]) * mu_sample, sigma=tf.ones([N, 1, 1, 1]) * sigma_sample) x_broadcasted = tf.tile(tf.reshape(x_train, [N, 1, 1, D]), [1, 100, K, 1]) # Sum over latent dimension, then average over posterior samples. # ``log_liks`` ends up with shape (N, K). log_liks = x_post.log_prob(x_broadcasted) log_liks = tf.reduce_sum(log_liks, 3) log_liks = tf.reduce_mean(log_liks, 1)
tf.Variable(tf.random_normal([1])))) qmu = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) latent_vars = { overall_mu: qmu, lnvar_students: qlnvarstudents, lnvar_questions: qlnvarquestions, student_etas: qstudents, question_etas: qquestions } data = {outcomes: obs} inference = ed.KLqp(latent_vars, data) inference.initialize(n_print=2, n_iter=50) qstudents_mean = qstudents.mean() qquestions_mean = qquestions.mean() init = tf.global_variables_initializer() init.run() f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) ax1.set_ylim([-3.0, 3.0]) ax2.set_ylim([-3.0, 3.0]) ax1.set_xlim([-3.0, 3.0]) ax2.set_xlim([-3.0, 3.0]) for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict)
# q(z|x,t,y) inpt2 = tf.concat([x_ph, qy], 1) hqz = fc_net(inpt2, (nh - 1) * [h], [], 'qz_xty_shared', lamba=lamba, activation=activation) muq_t0, sigmaq_t0 = fc_net(hqz, [h], [[d, None], [d, tf.nn.softplus]], 'qz_xt0', lamba=lamba, activation=activation) muq_t1, sigmaq_t1 = fc_net(hqz, [h], [[d, None], [d, tf.nn.softplus]], 'qz_xt1', lamba=lamba, activation=activation) muq = qt * muq_t1 + (1. - qt) * muq_t0 sigmaq = qt * sigmaq_t1 + (1. - qt) * sigmaq_t0 qz = Normal(loc=muq, scale=sigmaq) # Create data dictionary for edward data = {x1: x_ph_bin, x2: x_ph_cont, y: y_ph, qt: t_ph, t: t_ph, qy: y_ph} # sample posterior predictive for p(y|z,t) y_post = ed.copy(y, {z: qz, t: t_ph}, scope='y_post') # crude approximation of the above y_post_mean = ed.copy(y, {z: qz.mean(), t: t_ph}, scope='y_post_mean') # construct a deterministic version (i.e. use the mean of the approximate posterior) of the lower bound # for early stopping according to a validation set y_post_eval = ed.copy(y, {z: qz.mean(), qt: t_ph, qy: y_ph, t: t_ph}, scope='y_post_eval') x1_post_eval = ed.copy(x1, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='x1_post_eval') x2_post_eval = ed.copy(x2, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='x2_post_eval') t_post_eval = ed.copy(t, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='t_post_eval') # losses logp_valid = tf.reduce_mean(tf.reduce_sum(y_post_eval.log_prob(y_ph) + t_post_eval.log_prob(t_ph), axis=1) + tf.reduce_sum(x1_post_eval.log_prob(x_ph_bin), axis=1) + tf.reduce_sum(x2_post_eval.log_prob(x_ph_cont), axis=1) + tf.reduce_sum(z.log_prob(qz.mean()) - qz.log_prob(qz.mean()), axis=1)) inference = ed.KLqp({z: qz}, data) optimizer = tf.train.AdamOptimizer(learning_rate=lr) inference.initialize(optimizer=optimizer) # saver and initializer before experiment
def save(arr,xdata,ydata): tf.reset_default_graph() trainSetNumber = round(FLAGS.T* 0.8) x_train = xdata[:trainSetNumber] y_train = ydata[:trainSetNumber] x_test = xdata[trainSetNumber:] y_test = ydata[trainSetNumber:] x_train = np.asarray(x_train) x_test = np.asarray(x_test) x_train = np.asarray(x_train) x_test = np.asarray(x_test) # print(x_test) # print(y_test) pos = 0 name = arr[pos] pos +=1 H1 = int(arr[pos]) pos+=1 H2 = int(arr[pos]) pos+=1 param1 = float(arr[pos]) pos += 1 param2 = float(arr[pos]) graph1 = tf.Graph() with graph1.as_default(): with tf.name_scope("model"): W_0 = Normal(loc=tf.zeros([FLAGS.D, H1]), scale=param1*tf.ones([FLAGS.D,H1 ]),name="W_0") W_1 = Normal(loc=tf.zeros([H1, H2]), scale=param2*tf.ones([H1, H2]), name="W_1") W_2 = Normal(loc=tf.zeros([H2, FLAGS.O]), scale=param2*tf.ones([H2, FLAGS.O]), name="W_2") b_0 = Normal(loc=tf.zeros(H1), scale=param1 *tf.ones(H1), name="b_0") b_1 = Normal(loc=tf.zeros(H2), scale=param2* tf.ones(H2), name="b_1") b_2 = Normal(loc=tf.zeros(FLAGS.O), scale=param2* tf.ones(FLAGS.O), name="b_2") X = tf.placeholder(tf.float32, [trainSetNumber, FLAGS.D], name="X") y = Normal(loc=neural_network(x_train,W_0, W_1, W_2, b_0, b_1, b_2, trainSetNumber), scale=0.1*tf.ones([trainSetNumber,FLAGS.O]), name="y") with tf.variable_scope("posterior",reuse=tf.AUTO_REUSE): with tf.variable_scope("qW_0",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [FLAGS.D, H1]) scale = param1*tf.nn.softplus(tf.get_variable("scale", [FLAGS.D, H1])) qW_0 = Normal(loc=loc, scale=scale) with tf.variable_scope("qW_1",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [H1, H2]) scale = param2*tf.nn.softplus(tf.get_variable("scale", [H1, H2])) qW_1 = Normal(loc=loc, scale=scale) with tf.variable_scope("qW_2",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [H2, FLAGS.O]) scale = param2*tf.nn.softplus(tf.get_variable("scale", [H2, FLAGS.O])) qW_2 = Normal(loc=loc, scale=scale) with tf.variable_scope("qb_0",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [H1]) scale =param1 * tf.nn.softplus(tf.get_variable("scale", [H1])) qb_0 = Normal(loc=loc, scale=scale) with tf.variable_scope("qb_1",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [H2]) scale =param2 * tf.nn.softplus(tf.get_variable("scale", [H2])) qb_1 = Normal(loc=loc, scale=scale) with tf.variable_scope("qb_2",reuse=tf.AUTO_REUSE): loc = tf.get_variable("loc", [FLAGS.O]) scale =param2 * tf.nn.softplus(tf.get_variable("scale", [FLAGS.O])) qb_2 = Normal(loc=loc, scale=scale) #inference with tf.Session(graph=graph1) as sess: # Set up the inference method, mapping the prior to the posterior variables inference = ed.KLqp({W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2}, data={X: x_train, y: y_train}) # Set up the adam optimizer global_step = tf.Variable(0, trainable=False) starter_learning_rate = 0.1 learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,100, 0.3, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate) # Run the inference method pos += 1 iter1 = arr[pos] inference.run(n_iter=iter1,optimizer=optimizer ,n_samples=5) #Run the test data through the neural network infered = neural_network(x_test, qW_0, qW_1, qW_2, qb_0, qb_1, qb_2, len(x_test)) inferedList = infered.eval() #Accuracy checks on the data (The test data) # In order to work with PPC and other metrics, it must be a random variables # Normal creates this random varaibles by sampling from the poterior with a normal distribution NormalTest =Normal(loc=neural_network(x_test, qW_0, qW_1, qW_2, qb_0, qb_1, qb_2,len(x_test)), scale=0.1*tf.ones([len(x_test),FLAGS.O]), name="y_other") NormalTestList = NormalTest.eval() # Change the graph so that the posterior point to the output y_post = ed.copy(NormalTest, {W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2}) X = tf.placeholder(tf.float32, [len(x_test), FLAGS.D], name="X") y_test_tensor = tf.convert_to_tensor(y_test) MSE = ed.evaluate('mean_squared_error', data={X: x_test, NormalTest: y_test_tensor}) MAE =ed.evaluate('mean_absolute_error', data={X: x_test, NormalTest: y_test_tensor}) # PPC calculation PPCMean = ed.ppc(lambda xs, zs: tf.reduce_mean(xs[y_post]), data={y_post: y_test, X:x_test}, latent_vars={W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2}, n_samples=5) # Change the graph again, this is done to do epistemic uncertainty calculations posterior = ed.copy(NormalTest, dict_swap={W_0: qW_0.mean(), b_0: qb_0.mean(),W_1: qW_1.mean(), b_1: qb_1.mean(),W_2: qW_2.mean(), b_2: qb_2.mean()}) Y_post1 = sess.run(posterior.sample(len(x_test)), feed_dict={X: x_test, posterior: y_test}) mean_prob_over_samples=np.mean(Y_post1, axis=0) ## prediction means prediction_variances = np.apply_along_axis(predictive_entropy, axis=1, arr=mean_prob_over_samples) # Run analysis on test data, to see how many records were correct classes, actualClass, cor, firsts, seconds, thirds, fails, perCorrect = Analysis(inferedList, y_test) # Save the model through TF saver saver = tf.train.Saver() dir_path = os.path.dirname(os.path.realpath(__file__)) save_path = saver.save(sess, dir_path +"/"+name+"/model.ckpt") print("Model saved in path: %s" % save_path) file = open(dir_path+"/"+name +"/"+name+".csv",'w') file.write("MSE = " + str(MSE)) file.write("\nMAE = " + str(MAE)) file.write("\nPPC mean = " + str(PPCMean)) file.write("; Predicted First;Predicted Second; Predicted Third; Predicted Fail \n") classNames = ['First','Second', 'Third', 'Fail'] for x in range(len(firsts)): file.write(classNames[x] + ";" + str(firsts[x]) + ";" + str(seconds[x])+ ";" + str(thirds[x])+ ";" + str(fails[x]) + "\n") file.write("Num;Class 1;Class 2;Class 3;Class 4;Epi;Predicted Class;Correct Class\n ") for x in range(len(inferedList)): line = str(x) for i in range(len(inferedList[x])): line += ";" + str(round(inferedList[x][i],2)) line += ";" + str(round(prediction_variances[x],2)) + ";" + str(classes[x]+1) + ";" + str(actualClass[x]+1) + "\n" file.write(line) file.close() return perCorrect
sess = ed.get_session() tf.global_variables_initializer().run() i = 0 for _ in range(inference.n_iter): X_batch, y_batch, i = next_batch(M, i) for _ in range(5): info_dict_d = inference.update(variables="Disc", feed_dict={ X: X_batch, y_ph: y_batch }) info_dict = inference.update(variables="Gen", feed_dict={ X: X_batch, y_ph: y_batch }) info_dict['loss_d'] = info_dict_d['loss_d'] info_dict['t'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration t = info_dict['t'] inference.print_progress(info_dict) if t == 1 or t % inference.n_print == 0: # Check inferred posterior parameters. mean, std = sess.run([qw.mean(), qw.std()]) print("\nInferred mean & std:") print(mean) print(std)
scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0) # Create data dictionary for edward data = { x1: x_ph_bin, x2: x_ph_cont, y: y_ph, qt: t_ph, t: t_ph, qy: y_ph } # sample posterior predictive for p(y|z,t) y_post = ed.copy(y, {z: qz, t: t_ph}, scope='y_post') # crude approximation of the above y_post_mean = ed.copy(y, {z: qz.mean(), t: t_ph}, scope='y_post_mean') # construct a deterministic version (i.e. use the mean of the approximate posterior) of the lower bound # for early stopping according to a validation set y_post_eval = ed.copy(y, { z: qz.mean(), qt: t_ph, qy: y_ph, t: t_ph }, scope='y_post_eval') x1_post_eval = ed.copy(x1, { z: qz.mean(), qt: t_ph, qy: y_ph }, scope='x1_post_eval')
def main(_): def ratio_estimator(data, local_vars, global_vars): """Takes as input a dict of data x, local variable samples z, and global variable samples beta; outputs real values of shape (x.shape[0] + z.shape[0],). In this example, there are no local variables. """ # data[y] has shape (M,); global_vars[w] has shape (D,) # we concatenate w to each data point y, so input has shape (M, 1 + D) input = tf.concat([ tf.reshape(data[y], [FLAGS.M, 1]), tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1]) ], 1) hidden = tf.layers.dense(input, 64, activation=tf.nn.relu) output = tf.layers.dense(hidden, 1, activation=None) return output ed.set_seed(42) # DATA w_true = np.ones(FLAGS.D) * 5.0 X_train, y_train = build_toy_dataset(FLAGS.N, w_true) X_test, y_test = build_toy_dataset(FLAGS.N, w_true) data = generator([X_train, y_train], FLAGS.M) # MODEL X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D]) y_ph = tf.placeholder(tf.float32, [FLAGS.M]) w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M)) # INFERENCE qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0, scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D]))) inference = ed.ImplicitKLqp({w: qw}, data={y: y_ph}, discriminator=ratio_estimator, global_vars={w: qw}) inference.initialize(n_iter=5000, n_print=100, scale={y: float(FLAGS.N) / FLAGS.M}) sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference.n_iter): X_batch, y_batch = next(data) for _ in range(5): info_dict_d = inference.update(variables="Disc", feed_dict={ X: X_batch, y_ph: y_batch }) info_dict = inference.update(variables="Gen", feed_dict={ X: X_batch, y_ph: y_batch }) info_dict['loss_d'] = info_dict_d['loss_d'] info_dict[ 't'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration t = info_dict['t'] inference.print_progress(info_dict) if t == 1 or t % inference.n_print == 0: # Check inferred posterior parameters. mean, std = sess.run([qw.mean(), qw.stddev()]) print("\nInferred mean & std:") print(mean) print(std)
# VISUALIZATION def visualise(X_data, y_data, w, b, ax, n_samples=10): w_samples = w.sample(n_samples)[:, 0].eval() b_samples = b.sample(n_samples).eval() ax.scatter(X_data[:, 0], y_data) # Note, only the 1st input dimension is plotted. inputs = np.linspace(-8, 8, num=400) for ns in range(n_samples): output = inputs * w_samples[ns] + b_samples[ns] ax.plot(inputs, output) fig = plt.figure() ax1 = fig.add_subplot(1, 2, 1) ax2 = fig.add_subplot(1, 2, 2) visualise(X_train, y_train, w, b, ax1) # Models sampled from the prior visualise(X_train, y_train, qw, qb, ax2) # Models sampled from the posterior plt.show() # EXPLORE THE LEARNED MODEL print('Point estimate for STD of weights:', w_prior_std.eval()) # Retrieve the means and STDs of the estimated regression coefficients w_est_mean = qw.mean().eval() w_est_std = qw.stddev().eval() print('Correlation between estimated and learned weights: ', np.corrcoef(w_est_mean, w_true)[0, 1])
def cevae_tf(X, T, Y, n_epochs=100, early_stop = 10, d_cevae=20): T, Y = T.reshape((-1,1)), Y.reshape((-1,1)) args = dict() args['earl'] = early_stop args['lr'] = 0.001 args['opt'] = 'adam' args['epochs'] = n_epochs args['print_every'] = 10 args['true_post'] = True M = None # batch size during training d = d_cevae # latent dimension lamba = 1e-4 # weight decay nh, h = 3, 200 # number and size of hidden layers contfeats = list(range(X.shape[1])) # all continuous binfeats = [] # need for early stopping xtr, xva, ttr, tva, ytr, yva = train_test_split(X, T, Y) # zero mean, unit variance for y during training ym, ys = np.mean(Y), np.std(Y) ytr, yva = (ytr - ym) / ys, (yva - ym) / ys best_logpvalid = - np.inf with tf.Graph().as_default(): sess = tf.InteractiveSession() ed.set_seed(1) np.random.seed(1) tf.set_random_seed(1) # x_ph_bin = tf.placeholder(tf.float32, [M, len(binfeats)], name='x_bin') # binary inputs x_ph_cont = tf.placeholder(tf.float32, [M, len(contfeats)], name='x_cont') # continuous inputs t_ph = tf.placeholder(tf.float32, [M, 1]) y_ph = tf.placeholder(tf.float32, [M, 1]) # x_ph = tf.concat([x_ph_bin, x_ph_cont], 1) x_ph = x_ph_cont activation = tf.nn.elu # CEVAE model (decoder) # p(z) z = Normal(loc=tf.zeros([tf.shape(x_ph)[0], d]), scale=tf.ones([tf.shape(x_ph)[0], d])) # p(x|z) hx = fc_net(z, (nh - 1) * [h], [], 'px_z_shared', lamba=lamba, activation=activation) # logits = fc_net(hx, [h], [[len(binfeats), None]], 'px_z_bin', lamba=lamba, activation=activation) # x1 = Bernoulli(logits=logits, dtype=tf.float32, name='bernoulli_px_z') mu, sigma = fc_net(hx, [h], [[len(contfeats), None], [len(contfeats), tf.nn.softplus]], 'px_z_cont', lamba=lamba, activation=activation) x2 = Normal(loc=mu, scale=sigma, name='gaussian_px_z') # p(t|z) logits = fc_net(z, [h], [[1, None]], 'pt_z', lamba=lamba, activation=activation) t = Bernoulli(logits=logits, dtype=tf.float32) # p(y|t,z) mu2_t0 = fc_net(z, nh * [h], [[1, None]], 'py_t0z', lamba=lamba, activation=activation) mu2_t1 = fc_net(z, nh * [h], [[1, None]], 'py_t1z', lamba=lamba, activation=activation) y = Normal(loc=t * mu2_t1 + (1. - t) * mu2_t0, scale=tf.ones_like(mu2_t0)) # CEVAE variational approximation (encoder) # q(t|x) logits_t = fc_net(x_ph, [d], [[1, None]], 'qt', lamba=lamba, activation=activation) qt = Bernoulli(logits=logits_t, dtype=tf.float32) # q(y|x,t) hqy = fc_net(x_ph, (nh - 1) * [h], [], 'qy_xt_shared', lamba=lamba, activation=activation) mu_qy_t0 = fc_net(hqy, [h], [[1, None]], 'qy_xt0', lamba=lamba, activation=activation) mu_qy_t1 = fc_net(hqy, [h], [[1, None]], 'qy_xt1', lamba=lamba, activation=activation) qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0, scale=tf.ones_like(mu_qy_t0)) # q(z|x,t,y) inpt2 = tf.concat([x_ph, qy], 1) hqz = fc_net(inpt2, (nh - 1) * [h], [], 'qz_xty_shared', lamba=lamba, activation=activation) muq_t0, sigmaq_t0 = fc_net(hqz, [h], [[d, None], [d, tf.nn.softplus]], 'qz_xt0', lamba=lamba, activation=activation) muq_t1, sigmaq_t1 = fc_net(hqz, [h], [[d, None], [d, tf.nn.softplus]], 'qz_xt1', lamba=lamba, activation=activation) qz = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0, scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0) # Create data dictionary for edward data = {x2: x_ph_cont, y: y_ph, qt: t_ph, t: t_ph, qy: y_ph} # sample posterior predictive for p(y|z,t) y_post = ed.copy(y, {z: qz, t: t_ph}, scope='y_post') # crude approximation of the above y_post_mean = ed.copy(y, {z: qz.mean(), t: t_ph}, scope='y_post_mean') # construct a deterministic version (i.e. use the mean of the approximate posterior) of the lower bound # for early stopping according to a validation set y_post_eval = ed.copy(y, {z: qz.mean(), qt: t_ph, qy: y_ph, t: t_ph}, scope='y_post_eval') # x1_post_eval = ed.copy(x1, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='x1_post_eval') x2_post_eval = ed.copy(x2, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='x2_post_eval') t_post_eval = ed.copy(t, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='t_post_eval') logp_valid = tf.reduce_mean(tf.reduce_sum(y_post_eval.log_prob(y_ph) + t_post_eval.log_prob(t_ph), axis=1) + tf.reduce_sum(x2_post_eval.log_prob(x_ph_cont), axis=1) + tf.reduce_sum(z.log_prob(qz.mean()) - qz.log_prob(qz.mean()), axis=1)) inference = ed.KLqp({z: qz}, data) optimizer = tf.train.AdamOptimizer(learning_rate=args['lr']) inference.initialize(optimizer=optimizer) saver = tf.train.Saver(tf.contrib.slim.get_variables()) tf.global_variables_initializer().run() n_epoch, n_iter_per_epoch, idx = args['epochs'], 10 * int(xtr.shape[0] / 100), np.arange(xtr.shape[0]) # # dictionaries needed for evaluation t0, t1 = np.zeros((X.shape[0], 1)), np.ones((X.shape[0], 1)) # tr0t, tr1t = np.zeros((xte.shape[0], 1)), np.ones((xte.shape[0], 1)) f1 = {x_ph_cont: X, t_ph: t1} f0 = {x_ph_cont: X, t_ph: t0} # f1t = {x_ph_bin: xte[:, 0:len(binfeats)], x_ph_cont: xte[:, len(binfeats):], t_ph: tr1t} # f0t = {x_ph_bin: xte[:, 0:len(binfeats)], x_ph_cont: xte[:, len(binfeats):], t_ph: tr0t} for epoch in range(n_epoch): avg_loss = 0.0 widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(n_iter_per_epoch, widgets=widgets) pbar.start() np.random.shuffle(idx) for j in range(n_iter_per_epoch): # print('j', j) # pbar.update(j) batch = np.random.choice(idx, 100) x_train, y_train, t_train = xtr[batch], ytr[batch], ttr[batch] info_dict = inference.update(feed_dict={x_ph_cont: x_train, t_ph: t_train, y_ph: y_train}) avg_loss += info_dict['loss'] avg_loss = avg_loss / n_iter_per_epoch avg_loss = avg_loss / 100 if epoch % args['earl'] == 0 or epoch == (n_epoch - 1): logpvalid = sess.run(logp_valid, feed_dict={x_ph_cont: xva, t_ph: tva, y_ph: yva}) if logpvalid >= best_logpvalid: print('Improved validation bound, old: {:0.3f}, new: {:0.3f}'.format(best_logpvalid, logpvalid)) best_logpvalid = logpvalid saver.save(sess, 'data/cevae_models/dlvm') saver.restore(sess, 'data/cevae_models/dlvm') y0, y1 = get_y0_y1(sess, y_post, f0, f1, shape=Y.shape, L=100) y0, y1 = y0 * ys + ym, y1 * ys + ym sess.close() return y0.reshape((-1)), y1.reshape((-1))
qmu = Normal( loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) latent_vars = { overall_mu: qmu, lnvar_students: qlnvarstudents, lnvar_questions: qlnvarquestions, student_etas: qstudents, question_etas: qquestions } data = {outcomes: obs} inference = ed.KLqp(latent_vars, data) inference.initialize(n_print=2, n_iter=50) qstudents_mean = qstudents.mean() qquestions_mean = qquestions.mean() tf.global_variables_initializer().run() f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) ax1.set_ylim([-3.0, 3.0]) ax2.set_ylim([-3.0, 3.0]) ax1.set_xlim([-3.0, 3.0]) ax2.set_xlim([-3.0, 3.0]) for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) if t % inference.n_print == 0:
def main(_): def ratio_estimator(data, local_vars, global_vars): """Takes as input a dict of data x, local variable samples z, and global variable samples beta; outputs real values of shape (x.shape[0] + z.shape[0],). In this example, there are no local variables. """ # data[y] has shape (M,); global_vars[w] has shape (D,) # we concatenate w to each data point y, so input has shape (M, 1 + D) input = tf.concat([ tf.reshape(data[y], [FLAGS.M, 1]), tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1])], 1) hidden = tf.layers.dense(input, 64, activation=tf.nn.relu) output = tf.layers.dense(hidden, 1, activation=None) return output ed.set_seed(42) # DATA w_true = np.ones(FLAGS.D) * 5.0 X_train, y_train = build_toy_dataset(FLAGS.N, w_true) X_test, y_test = build_toy_dataset(FLAGS.N, w_true) data = generator([X_train, y_train], FLAGS.M) # MODEL X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D]) y_ph = tf.placeholder(tf.float32, [FLAGS.M]) w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M)) # INFERENCE qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0, scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D]))) inference = ed.ImplicitKLqp( {w: qw}, data={y: y_ph}, discriminator=ratio_estimator, global_vars={w: qw}) inference.initialize(n_iter=5000, n_print=100, scale={y: float(FLAGS.N) / FLAGS.M}) sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference.n_iter): X_batch, y_batch = next(data) for _ in range(5): info_dict_d = inference.update( variables="Disc", feed_dict={X: X_batch, y_ph: y_batch}) info_dict = inference.update( variables="Gen", feed_dict={X: X_batch, y_ph: y_batch}) info_dict['loss_d'] = info_dict_d['loss_d'] info_dict['t'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration t = info_dict['t'] inference.print_progress(info_dict) if t == 1 or t % inference.n_print == 0: # Check inferred posterior parameters. mean, std = sess.run([qw.mean(), qw.stddev()]) print("\nInferred mean & std:") print(mean) print(std)
def main(_): ed.set_seed(42) # DATA data, true_s_etas, true_q_etas = build_toy_dataset(FLAGS.n_students, FLAGS.n_questions, FLAGS.n_obs) obs = data['outcomes'].values student_ids = data['student_id'].values.astype(int) question_ids = data['question_id'].values.astype(int) # MODEL lnvar_students = Normal(loc=0.0, scale=1.0) lnvar_questions = Normal(loc=0.0, scale=1.0) sigma_students = tf.sqrt(tf.exp(lnvar_students)) sigma_questions = tf.sqrt(tf.exp(lnvar_questions)) overall_mu = Normal(loc=tf.zeros(1), scale=tf.ones(1)) student_etas = Normal(loc=0.0, scale=sigma_students, sample_shape=FLAGS.n_students) question_etas = Normal(loc=0.0, scale=sigma_questions, sample_shape=FLAGS.n_questions) observation_logodds = (tf.gather(student_etas, student_ids) + tf.gather(question_etas, question_ids) + overall_mu) outcomes = Bernoulli(logits=observation_logodds) # INFERENCE qstudents = Normal(loc=tf.get_variable("qstudents/loc", [FLAGS.n_students]), scale=tf.nn.softplus( tf.get_variable("qstudents/scale", [FLAGS.n_students]))) qquestions = Normal(loc=tf.get_variable("qquestions/loc", [FLAGS.n_questions]), scale=tf.nn.softplus( tf.get_variable("qquestions/scale", [FLAGS.n_questions]))) qlnvarstudents = Normal(loc=tf.get_variable("qlnvarstudents/loc", []), scale=tf.nn.softplus( tf.get_variable("qlnvarstudents/scale", []))) qlnvarquestions = Normal(loc=tf.get_variable("qlnvarquestions/loc", []), scale=tf.nn.softplus( tf.get_variable("qlnvarquestions/scale", []))) qmu = Normal(loc=tf.get_variable("qmu/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qmu/scale", [1]))) latent_vars = { overall_mu: qmu, lnvar_students: qlnvarstudents, lnvar_questions: qlnvarquestions, student_etas: qstudents, question_etas: qquestions } data = {outcomes: obs} inference = ed.KLqp(latent_vars, data) inference.initialize(n_print=2, n_iter=50) qstudents_mean = qstudents.mean() qquestions_mean = qquestions.mean() tf.global_variables_initializer().run() f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) ax1.set_ylim([-3.0, 3.0]) ax2.set_ylim([-3.0, 3.0]) ax1.set_xlim([-3.0, 3.0]) ax2.set_xlim([-3.0, 3.0]) for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) if t % inference.n_print == 0: # CRITICISM ax1.clear() ax2.clear() ax1.set_ylim([-3.0, 3.0]) ax2.set_ylim([-3.0, 3.0]) ax1.set_xlim([-3.0, 3.0]) ax2.set_xlim([-3.0, 3.0]) ax1.set_title('Student Intercepts') ax2.set_title('Question Intercepts') ax1.set_xlabel('True Student Random Intercepts') ax1.set_ylabel('Estimated Student Random Intercepts') ax2.set_xlabel('True Question Random Intercepts') ax2.set_ylabel('Estimated Question Random Intercepts') ax1.scatter(true_s_etas, qstudents_mean.eval(), s=0.05) ax2.scatter(true_q_etas, qquestions_mean.eval(), s=0.05) plt.draw() plt.pause(2.0 / 60.0)
scale=tf.Variable(5.0), ) q_inv_softplus_sigma = Normal( loc=tf.Variable(0.0), scale=tf.Variable(1.0), ) # Inference arguments latent_vars = {mu: q_mu, inv_softplus_sigma: q_inv_softplus_sigma} data = {y: y_train} # Inference inference = ed.KLqp(latent_vars, data) inference.run(n_samples=5, n_iter=2500) print(q_mu.mean().eval()) print(q_inv_softplus_sigma.mean().eval()) # Empirical Model with Sampler # Posterior distribution families q_mu = Empirical(params=tf.Variable(tf.random_normal([2000]))) q_inv_softplus_sigma = Empirical(params=tf.Variable(tf.random_normal([2000]))) # Inference arguments latent_vars = {mu: q_mu, inv_softplus_sigma: q_inv_softplus_sigma} data = {y: y_train} # Inference inference = ed.HMC(latent_vars, data) inference.run(step_size=0.003, n_steps=5)
# Create data dictionary for edward data = {xi1: xi_ph_bin, xi2: xi_ph_cont, yi: yi_ph, qti: ti_ph, ti: ti_ph, qyi: yi_ph, xj1: xj_ph_bin, xj2: xj_ph_cont, yj: yj_ph, qtj: tj_ph, tj: tj_ph, qyj: yj_ph} # sample posterior predictive for p(y|z,t) yi_post = ed.copy(yi, {zi: qzi, ti: ti_ph, zj: qzj, tj: tj_ph}, scope='yi_post') yj_post = ed.copy(yj, {zi: qzi, ti: ti_ph, zj: qzj, tj: tj_ph}, scope='yj_post') # crude approximation of the above, why not mean on ti or tj? # yi_post_mean = ed.copy(yi, {zi: qzi.mean(), ti: ti_ph, zj:qzj.mean(), tj: tj_ph}, scope='yi_post_mean') # yj_post_mean = ed.copy(yj, {zi: qzi.mean(), ti: ti_ph, zj:qzj.mean(), tj: tj_ph}, scope='yj_post_mean') # construct a deterministic version (i.e. use the mean of the approximate posterior) of the lower bound # for early stopping according to a validation set yi_post_eval = ed.copy(yi, {zi: qzi.mean(), qti: ti_ph, qyi: yi_ph, ti: ti_ph}, scope='yi_post_eval') yj_post_eval = ed.copy(yj, {zj: qzj.mean(), qtj: tj_ph, qyj: yj_ph, tj: tj_ph}, scope='yj_post_eval') xi1_post_eval = ed.copy(xi1, {zi: qzi.mean(), qti: ti_ph, qyi: yi_ph}, scope='xi1_post_eval') xi2_post_eval = ed.copy(xi2, {zi: qzi.mean(), qti: ti_ph, qyi: yi_ph}, scope='xi2_post_eval') xj1_post_eval = ed.copy(xj1, {zj: qzj.mean(), qtj: tj_ph, qyj: yj_ph}, scope='xj1_post_eval') xj2_post_eval = ed.copy(xj2, {zj: qzj.mean(), qtj: tj_ph, qyj: yj_ph}, scope='xj2_post_eval') ti_post_eval = ed.copy(ti, {zi: qzi.mean(), qti: ti_ph, qyi: yi_ph}, scope='ti_post_eval') tj_post_eval = ed.copy(tj, {zj: qzj.mean(), qtj: tj_ph, qyj: yj_ph}, scope='tj_post_eval') logp_valid = tf.reduce_mean(tf.reduce_sum(yi_post_eval.log_prob(yi_ph) + ti_post_eval.log_prob(ti_ph), axis=1) + tf.reduce_sum(xi1_post_eval.log_prob(xi_ph_bin), axis=1) + tf.reduce_sum(xi2_post_eval.log_prob(xi_ph_cont), axis=1) + tf.reduce_sum(zi.log_prob(qzi.mean()) - qzi.log_prob(qzi.mean()), axis=1)
qmesh_x1, qmesh_x2 = np.meshgrid( np.linspace(qminmax[0], qminmax[1] - qcellres[0], (qminmax[1] - qminmax[0]) // qcellres[0]), np.linspace(qminmax[2], qminmax[3] - qcellres[1], (qminmax[3] - qminmax[2]) // qcellres[1])) gamma_mesh_x1, gamma_mesh_x2 = np.meshgrid( np.linspace(cell_minmax[0], cell_minmax[1] - cell_res[0], (cell_minmax[1] - cell_minmax[0]) // cell_res[0]), np.linspace(cell_minmax[2], cell_minmax[3] - cell_res[1], (cell_minmax[3] - cell_minmax[2]) // cell_res[1])) X_q = calc_grid_v2(qcellres, qminmax, method='grid', X=None) X_q_tf = tf.constant(X_q, dtype=tfdt) X_q_features = rbf_kernel( X_q_tf, qhinge_grid.mean(), qgamma.bijector.forward(qgamma.distribution.mean()), tfdt) # Running inference for t in range(inference.n_iter): if t % 10 == 0: print("\nsaving {}".format(t)) qgamma_eval = qgamma.bijector.forward( qgamma.distribution.mean()).eval() qgamma_var_eval = qgamma.distribution.variance().eval() qhinge_grid_eval = qhinge_grid.mean().eval() post_mu = tf.matmul(X_q_features, qw.mean()) post_var = tf.reduce_sum(tf.square(X_q_features) * tf.transpose(qw.variance()), axis=1, keepdims=True)
N = 5000 # number of data points D = 2 # data dimensionality K = 1 # latent dimensionality # DATA x_train = build_toy_dataset(N, D, K) # MODEL w = Normal(mu=tf.zeros([D, K]), sigma=10.0 * tf.ones([D, K])) z = Normal(mu=tf.zeros([N, K]), sigma=tf.ones([N, K])) x = Normal(mu=tf.matmul(w, z, transpose_b=True), sigma=tf.ones([D, N])) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D, K])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, K])))) qz = Normal(mu=tf.Variable(tf.random_normal([N, K])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([N, K])))) inference = ed.KLqp({w: qw, z: qz}, data={x: x_train}) init = tf.initialize_all_variables() inference.run(n_iter=500, n_print=100, n_samples=10) sess = ed.get_session() print("Inferred principal axes:") print(sess.run(qw.mean()))
def main(_): ed.set_seed(42) # DATA data, true_s_etas, true_q_etas = build_toy_dataset( FLAGS.n_students, FLAGS.n_questions, FLAGS.n_obs) obs = data['outcomes'].values student_ids = data['student_id'].values.astype(int) question_ids = data['question_id'].values.astype(int) # MODEL lnvar_students = Normal(loc=0.0, scale=1.0) lnvar_questions = Normal(loc=0.0, scale=1.0) sigma_students = tf.sqrt(tf.exp(lnvar_students)) sigma_questions = tf.sqrt(tf.exp(lnvar_questions)) overall_mu = Normal(loc=tf.zeros(1), scale=tf.ones(1)) student_etas = Normal(loc=0.0, scale=sigma_students, sample_shape=FLAGS.n_students) question_etas = Normal(loc=0.0, scale=sigma_questions, sample_shape=FLAGS.n_questions) observation_logodds = (tf.gather(student_etas, student_ids) + tf.gather(question_etas, question_ids) + overall_mu) outcomes = Bernoulli(logits=observation_logodds) # INFERENCE qstudents = Normal( loc=tf.get_variable("qstudents/loc", [FLAGS.n_students]), scale=tf.nn.softplus( tf.get_variable("qstudents/scale", [FLAGS.n_students]))) qquestions = Normal( loc=tf.get_variable("qquestions/loc", [FLAGS.n_questions]), scale=tf.nn.softplus( tf.get_variable("qquestions/scale", [FLAGS.n_questions]))) qlnvarstudents = Normal( loc=tf.get_variable("qlnvarstudents/loc", []), scale=tf.nn.softplus( tf.get_variable("qlnvarstudents/scale", []))) qlnvarquestions = Normal( loc=tf.get_variable("qlnvarquestions/loc", []), scale=tf.nn.softplus( tf.get_variable("qlnvarquestions/scale", []))) qmu = Normal( loc=tf.get_variable("qmu/loc", [1]), scale=tf.nn.softplus( tf.get_variable("qmu/scale", [1]))) latent_vars = { overall_mu: qmu, lnvar_students: qlnvarstudents, lnvar_questions: qlnvarquestions, student_etas: qstudents, question_etas: qquestions } data = {outcomes: obs} inference = ed.KLqp(latent_vars, data) inference.initialize(n_print=2, n_iter=50) qstudents_mean = qstudents.mean() qquestions_mean = qquestions.mean() tf.global_variables_initializer().run() f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) ax1.set_ylim([-3.0, 3.0]) ax2.set_ylim([-3.0, 3.0]) ax1.set_xlim([-3.0, 3.0]) ax2.set_xlim([-3.0, 3.0]) for t in range(inference.n_iter): info_dict = inference.update() inference.print_progress(info_dict) if t % inference.n_print == 0: # CRITICISM ax1.clear() ax2.clear() ax1.set_ylim([-3.0, 3.0]) ax2.set_ylim([-3.0, 3.0]) ax1.set_xlim([-3.0, 3.0]) ax2.set_xlim([-3.0, 3.0]) ax1.set_title('Student Intercepts') ax2.set_title('Question Intercepts') ax1.set_xlabel('True Student Random Intercepts') ax1.set_ylabel('Estimated Student Random Intercepts') ax2.set_xlabel('True Question Random Intercepts') ax2.set_ylabel('Estimated Question Random Intercepts') ax1.scatter(true_s_etas, qstudents_mean.eval(), s=0.05) ax2.scatter(true_q_etas, qquestions_mean.eval(), s=0.05) plt.draw() plt.pause(2.0 / 60.0)
tf.Variable(tf.random_normal([n_dept])))) latent_vars = {eta_s: q_eta_s, eta_d: q_eta_d, eta_dept: q_eta_dept} data = { y: y_train, s_ph: s_train, d_ph: d_train, dept_ph: dept_train, service_ph: service_train } inference = ed.KLqp(latent_vars, data) # COMMAND ---------- yhat_test = ed.copy(yhat, { eta_s: q_eta_s.mean(), eta_d: q_eta_d.mean(), eta_dept: q_eta_dept.mean() }) # COMMAND ---------- inference.initialize(n_print=2000, n_iter=10000) tf.global_variables_initializer().run() for _ in range(inference.n_iter): # Update and print progress of algorithm. info_dict = inference.update() inference.print_progress(info_dict) t = info_dict['t']
grads = tf.gradients(loss, [v._ref() for v in var_list]) grads_and_vars = list(zip(grads, var_list)) return loss, grads_and_vars ed.set_seed(42) N = 5000 # number of data points D = 10 # number of features # DATA w_true = np.random.randn(D) X_data = np.random.randn(N, D) p = expit(np.dot(X_data, w_true)) y_data = np.array([np.random.binomial(1, i) for i in p]) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) y = Bernoulli(logits=ed.dot(X, w)) # INFERENCE qw = Normal(loc=tf.Variable(tf.random_normal([D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) inference = IWVI({w: qw}, data={X: X_data, y: y_data}) inference.run(K=5, n_iter=1000) # CRITICISM print("Mean squared error in true values to inferred posterior mean:") print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
ed.set_seed(142) N = 5000 # number of data points D = 2 # data dimensionality K = 1 # latent dimensionality # DATA x_train = build_toy_dataset(N, D, K) # MODEL w = Normal(mu=tf.zeros([D, K]), sigma=2.0 * tf.ones([D, K])) z = Normal(mu=tf.zeros([N, K]), sigma=tf.ones([N, K])) x = Normal(mu=tf.matmul(w, z, transpose_b=True), sigma=tf.ones([D, N])) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D, K])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, K])))) qz = Normal(mu=tf.Variable(tf.random_normal([N, K])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([N, K])))) inference = ed.KLqp({w: qw, z: qz}, data={x: x_train}) inference.run(n_iter=500, n_print=100, n_samples=10) sess = ed.get_session() print("Inferred principal axes:") print(sess.run(qw.mean()))
N = 40 # num data points D = 1 # num features ed.set_seed(42) X_train, y_train = build_toy_dataset(N) X_test, y_test = build_toy_dataset(N) X = ed.placeholder(tf.float32, [N, D], name='X') beta = Normal(mu=tf.zeros(D), sigma=tf.ones(D), name='beta') y = Normal(mu=ed.dot(X, beta), sigma=tf.ones(N), name='y') qmu_mu = tf.Variable(tf.random_normal([D])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) qbeta = Normal(mu=qmu_mu, sigma=qmu_sigma, name='qbeta') data = {X: X_train, y: y_train} inference = ed.MFVI({beta: qbeta}, data) inference.initialize(logdir='train') sess = ed.get_session() for t in range(501): _, loss = sess.run([inference.train, inference.loss], {X: data[X]}) inference.print_progress(t, loss) y_post = ed.copy(y, {beta: qbeta.mean()}) # This is equivalent to # y_post = Normal(mu=ed.dot(X, qbeta.mean()), sigma=tf.ones(N)) print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))
w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) b = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N)) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {X: X_train, y: y_train} inference = ed.KLqp({w: qw, b: qb}, data) inference.run() # CRITICISM y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()}) # This is equivalent to # y_post = Normal(mu=ed.dot(X, qw.mean()) + qb.mean(), sigma=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400, dtype=np.float32)
# DATA nsubj = 200 nitem = 25 trait_true = np.random.normal(size=[nsubj, 1]) thresh_true = np.random.normal(size=[1, nitem]) X_data = np.random.binomial(1, expit(trait_true - thresh_true)) # MODEL trait = Normal(mu=tf.zeros([nsubj, 1]), sigma=tf.ones([nsubj, 1])) thresh = Normal(mu=tf.zeros([1, nitem]), sigma=tf.ones([1, nitem])) X = Bernoulli(logits=trait - thresh) # INFERENCE q_trait = Normal(mu=tf.Variable(tf.random_normal([nsubj, 1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([nsubj, 1])))) q_thresh = Normal(mu=tf.Variable(tf.random_normal([1, nitem])), sigma=tf.nn.softplus( tf.Variable(tf.random_normal([1, nitem])))) inference = ed.KLqp({trait: q_trait, thresh: q_thresh}, data={X: X_data}) inference.run(n_iter=2500, n_samples=10) # CRITICISM # Check that the inferred posterior mean captures the true traits. plt.scatter(trait_true, q_trait.mean().eval()) plt.show() print("MSE between true traits and inferred posterior mean:") print(np.mean(np.square(trait_true - q_trait.mean().eval())))
grads = tf.gradients(loss, [v._ref() for v in var_list]) grads_and_vars = list(zip(grads, var_list)) return loss, grads_and_vars ed.set_seed(42) N = 5000 # number of data points D = 10 # number of features # DATA w_true = np.random.randn(D) X_data = np.random.randn(N, D) p = expit(np.dot(X_data, w_true)) y_data = np.array([np.random.binomial(1, i) for i in p]) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) y = Bernoulli(logits=ed.dot(X, w)) # INFERENCE qw = Normal(loc=tf.Variable(tf.random_normal([D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) inference = IWVI({w: qw}, data={X: X_data, y: y_data}) inference.run(K=5, n_iter=1000) # CRITICISM print("Mean squared error in true values to inferred posterior mean:") print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
#print("mu: ", qmu.value().eval()) #print("beta:\n", qbeta.value().eval()) Cb, Sb, taub = map_MOU(X.T, verbose=2) Cb[np.eye(d, dtype=bool)] = taub # VI print("setting up variational distributions") qmu = Normal(loc=tf.Variable(tf.random_normal([d])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([d])))) qbeta = Normal(loc=tf.Variable(tf.random_normal([d, d])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([d,d])))) print("constructing inference object") %time inference_vb = ed.KLqp({beta: qbeta, mu: qmu}, data={xt: xt_true for xt, xt_true in zip(x, X)}) print("running VB inference") inference_vb.run() Cvb = qbeta.mean().eval() pp = qbeta.cdf(0.).eval() Cvb_filt = Cvb.copy() Cvb_filt[pp<0.05] = 0 off_diag_mask = [np.logical_not(np.eye(d, dtype=bool))] print(pearsonr(C[off_diag_mask], Cvb[off_diag_mask])) print(pearsonr(C[off_diag_mask], Cvb_filt[off_diag_mask])) plt.figure() plt.subplot(121) plt.scatter(C[off_diag_mask], Cvb[off_diag_mask]) plt.subplot(122) plt.scatter(C[off_diag_mask], Cvb_filt[off_diag_mask]) plt.figure() plt.subplot(131) sns.heatmap(C)
def learn_separated(args, train_set, test_set, anlysis_flag=False): # Parameters n_hidd = 1000 # number of hidden units per layer n_epoch = args.n_epoch learning_rate = 0.001 batch_size = 128 hidden_layer = get_fc_layer_fn(l2_reg_scale=1e-4, depth=1) out_layer = get_fc_layer_fn(l2_reg_scale=1e-4) x_train, t_train, y_train = train_set['X'], train_set['T'], train_set['Y'] n_train = x_train.shape[0] x_dim = x_train.shape[1] batch_size = min(batch_size, n_train) # ------ Define Graph ---------------------# tf.reset_default_graph() # ------ Define Inputs ---------------------# # define placeholder which will receive data batches x_ph = tf.placeholder(tf.float32, [None, x_dim]) t_ph = tf.placeholder(tf.float32, [None, 1]) y_ph = tf.placeholder(tf.float32, [None, 1]) n_ph = tf.shape(x_ph)[0] # number of samples fed to placeholders # ------ Define generative model /decoder-----------------------# if anlysis_flag: z_t_dim = 1 z_y_dim = 1 else: # z_x_dim = 1 z_t_dim = 2 z_y_dim = 3 # latent_dims = (z_x_dim, z_t_dim, z_y_dim) latent_dims = (z_t_dim, z_y_dim) # prior over latent variables: # p(zx) - # zx = Normal(loc=tf.zeros([n_ph, z_x_dim]), scale=tf.ones([n_ph, z_x_dim])) # p(zt) - zt = Normal(loc=tf.zeros([n_ph, z_t_dim]), scale=tf.ones([n_ph, z_t_dim])) # p(zy) - zy = Normal(loc=tf.zeros([n_ph, z_y_dim]), scale=tf.ones([n_ph, z_y_dim])) z = tf.concat([zt, zy], axis=1) # p(x|z) - likelihood of proxy X # z = tf.concat([zx, zt, zy], axis=1) hidden = hidden_layer(z, n_hidd, tf.nn.elu) x = Normal(loc=out_layer(hidden, x_dim, None), scale=out_layer(hidden, x_dim, tf.nn.softplus), name='gaussian_px_z') # p(t|zt) if args.model_type == 'separated_with_confounder': hidden = hidden_layer(z, n_hidd, tf.nn.elu) else: hidden = hidden_layer(zt, n_hidd, tf.nn.elu) probs = out_layer(hidden, 1, tf.nn.sigmoid) # output in [0,1] t = Bernoulli(probs=probs, dtype=tf.float32, name='bernoulli_pt_z') # p(y|t,zy) hidden = hidden_layer(zy, n_hidd, tf.nn.elu) # shared hidden layer mu_y_t0 = out_layer(hidden, 1, None) mu_y_t1 = out_layer(hidden, 1, None) # y = Normal(loc=t * mu_y_t1 + (1. - t) * mu_y_t0, scale=tf.ones_like(mu_y_t0)) sigma_y_t0 = out_layer(hidden, 1, tf.nn.softplus) sigma_y_t1 = out_layer(hidden, 1, tf.nn.softplus) y = Normal(loc=t * mu_y_t1 + (1. - t) * mu_y_t0, scale=t * sigma_y_t1 + (1. - t) * sigma_y_t0) # ------ Define inference model - CEVAE variational approximation (encoder) # q(t|x) hqt = hidden_layer(x_ph, n_hidd, tf.nn.elu) probs_t = out_layer(hqt, 1, tf.nn.sigmoid) # output in [0,1] qt = Bernoulli(probs=probs_t, dtype=tf.float32) # q(y|x,t) hqy = hidden_layer(x_ph, n_hidd, tf.nn.elu) # shared hidden layer mu_qy_t0 = out_layer(hqy, 1, None) mu_qy_t1 = out_layer(hqy, 1, tf.nn.elu) sigma_qy_t1 = out_layer(hqy, 1, tf.nn.softplus) sigma_qy_t0 = out_layer(hqy, 1, tf.nn.softplus) # qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0, scale=tf.ones_like(mu_qy_t0)) qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0, scale=qt * sigma_qy_t1 + (1. - qt) * sigma_qy_t0) # # q(z_x|x,t,y) # inpt2 = tf.concat([x_ph, qy], axis=1) # hqz = hidden_layer(inpt2, n_hidd, tf.nn.elu) # shared hidden layer # muq_t0 = out_layer(hqz, z_x_dim, None) # sigmaq_t0 = out_layer(hqz, z_x_dim, tf.nn.softplus) # muq_t1 = out_layer(hqz, z_x_dim, None) # sigmaq_t1 = out_layer(hqz, z_x_dim, tf.nn.softplus) # qzx = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0, # scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0) # shared hidden layer inpt2 = tf.concat([x_ph, qy], axis=1) hqz = out_layer(inpt2, n_hidd, tf.nn.elu) # q(zt|x,t,y) muq_t0 = out_layer(hqz, z_t_dim, None) sigmaq_t0 = out_layer(hqz, z_t_dim, tf.nn.softplus) muq_t1 = out_layer(hqz, z_t_dim, None) sigmaq_t1 = out_layer(hqz, z_t_dim, tf.nn.softplus) qzt = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0, scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0) # q(zy|x,t,y) # inpt2 = tf.concat([x_ph, qy], axis=1) # hqz = hidden_layer(inpt2, n_hidd, tf.nn.elu) # shared hidden layer muq_t0 = out_layer(hqz, z_y_dim, None) sigmaq_t0 = out_layer(hqz, z_y_dim, tf.nn.softplus) muq_t1 = out_layer(hqz, z_y_dim, None) sigmaq_t1 = out_layer(hqz, z_y_dim, tf.nn.softplus) qzy = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0, scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0) # end graph def # ------ Criticism / evaluation graph: zy_learned = ed.copy(qzy, {x: x_ph}) zt_learned = ed.copy(qzt, {x: x_ph}) # sample posterior predictive for p(y|z_y,t) y_post = ed.copy(y, {zy: qzy, t: t_ph}, scope='y_post') # crude approximation of the above y_post_mean = ed.copy(y, {zy: qzy.mean(), t: t_ph}, scope='y_post_mean') # ------ Training - Run variational inference # Create data dictionary for edward data = {x: x_ph, y: y_ph, qt: t_ph, t: t_ph, qy: y_ph} batch_size = min(batch_size, n_train) n_iter_per_epoch = n_train // batch_size inference = ed.KLqp({zt: qzt, zy: qzy}, data=data) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) data_scaling = n_train / batch_size # to scale likelihood againt prior inference.initialize(optimizer=optimizer, n_samples=5, n_iter=n_iter_per_epoch * n_epoch, scale={ x: data_scaling, t: data_scaling, y: data_scaling }) with tf.Session() as sess: tf.global_variables_initializer().run() for epoch in range(n_epoch): train_generator = batch_generator(np.random.permutation(n_train), batch_size) avg_loss = 0.0 for j in range(n_iter_per_epoch): # Take batch: idx = next(train_generator) x_b, t_b, y_b = x_train[idx], t_train[idx], y_train[idx] info_dict = inference.update(feed_dict={ x_ph: x_b, t_ph: t_b, y_ph: y_b }) inference.print_progress(info_dict) avg_loss += info_dict['loss'] avg_loss = avg_loss / n_iter_per_epoch avg_loss = avg_loss / batch_size # print('Epoch {}, avg loss {}'.format(epoch, avg_loss)) # ------ Evaluation - x_test = test_set['X'] H_test = test_set['H'] z_y_test = sess.run(zy_learned.mean(), feed_dict={x_ph: x_test}) z_t_test = sess.run(zt_learned.mean(), feed_dict={x_ph: x_test}) z_y_train = sess.run(zy_learned.mean(), feed_dict={x_ph: x_train}) if args.show_plots: treat_probs = sess.run(qt.mean(), feed_dict={x_ph: x_test}) plt.scatter(z_t_test.flatten(), treat_probs.flatten(), label='Estimated Treatment Probability') plt.legend() plt.xlabel(r'$z_t$') plt.ylabel('Probability') plt.show() # plt.scatter(x_test[:, 1].flatten(), z_y_test.flatten()) # plt.xlabel('X_1') # plt.ylabel('z_y') # plt.show() # plt.scatter(H_test.flatten(), z_y_test.flatten()) plt.xlabel('H') plt.ylabel(r'$z_y$', fontsize=16) plt.show() plt.scatter(test_set['W'].flatten(), z_t_test.flatten()) plt.xlabel('W') plt.ylabel(r'$z_t$') plt.show() # CATE estimation: if args.estimation_type == 'approx_posterior': forced_t = np.ones((args.n_test, 1)) est_y0 = sess.run(y_post.mean(), feed_dict={ x_ph: x_test, t_ph: 0 * forced_t }) est_y1 = sess.run(y_post.mean(), feed_dict={ x_ph: x_test, t_ph: forced_t }) # std_y1 = sess.run(y_post.stddev(), feed_dict={x_ph: x_test, t_ph: forced_t}) elif args.estimation_type == 'latent_matching': est_y0, est_y1 = matching_estimate(z_y_train, t_train, y_train, z_y_test, args.n_neighbours) else: raise ValueError('Unrecognised estimation_type') return evalaute_effect_estimate( est_y0, est_y1, test_set, args, model_name='Separated CEVAE - Latent dims: ' + str(latent_dims), estimation_type=args.estimation_type)
D = 10 # number of features # DATA coeff = np.random.randn(D) X_train, y_train = build_toy_dataset(N, coeff) X_test, y_test = build_toy_dataset(N, coeff) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) b = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N)) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {X: X_train, y: y_train} inference = ed.KLqp({w: qw, b: qb}, data) inference.run(n_samples=5, n_iter=250) # CRITICISM y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()}) # This is equivalent to # y_post = Normal(mu=ed.dot(X, qw.mean()) + qb.mean(), sigma=tf.ones(N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))
muq_t0, sigmaq_t0 = fullyConnect_net(x_ph, [h], [[d, None], [d, tf.nn.softplus]], 'qz_xt0', lamba=lamba, activation=activation) qz = Normal(loc=muq_t0, scale=sigmaq_t0) # Sampling posterior predictive from p(y|z,t) y_post = ed.copy(y, {z: qz, t: t_ph}, scope='y_post') t_post = ed.copy(t, {z: qz, y: y_ph}, scope='t_post') # for early stopping according to a validation set y_post_eval = ed.copy(y, { z: qz.mean(), y: y_ph, t: t_ph }, scope='y_post_eval') t_post_eval = ed.copy(t, {z: qz.mean(), y: y_ph}, scope='t_post_eval') log_valid = tf.reduce_mean( tf.reduce_sum(y_post_eval.log_prob(y_ph) + t_post_eval.log_prob(t_ph), axis=1) + tf.reduce_sum(z.log_prob(qz.mean()) - qz.log_prob(qz.mean()), axis=1)) tf.global_variables_initializer().run()
qw = Normal(loc=tf.Variable(tf.random_normal([D]) + 1.0), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) inference = ed.ImplicitKLqp( {w: qw}, data={y: y_ph}, discriminator=ratio_estimator, global_vars={w: qw}) inference.initialize(n_iter=5000, n_print=100, scale={y: float(N) / M}) sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference.n_iter): X_batch, y_batch = next(data) for _ in range(5): info_dict_d = inference.update( variables="Disc", feed_dict={X: X_batch, y_ph: y_batch}) info_dict = inference.update( variables="Gen", feed_dict={X: X_batch, y_ph: y_batch}) info_dict['loss_d'] = info_dict_d['loss_d'] info_dict['t'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration t = info_dict['t'] inference.print_progress(info_dict) if t == 1 or t % inference.n_print == 0: # Check inferred posterior parameters. mean, std = sess.run([qw.mean(), qw.stddev()]) print("\nInferred mean & std:") print(mean) print(std)