示例#1
0
def main(_):
  # data
  J = 8
  data_y = np.array([28, 8, -3, 7, -1, 1, 18, 12])
  data_sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18])

  # model definition
  mu = Normal(0., 10.)
  logtau = Normal(5., 1.)
  theta_prime = Normal(tf.zeros(J), tf.ones(J))
  sigma = tf.placeholder(tf.float32, J)
  y = Normal(mu + tf.exp(logtau) * theta_prime, sigma * tf.ones([J]))

  data = {y: data_y, sigma: data_sigma}

  # ed.KLqp inference
  with tf.variable_scope('q_logtau'):
    q_logtau = Normal(tf.get_variable('loc', []),
                      tf.nn.softplus(tf.get_variable('scale', [])))

  with tf.variable_scope('q_mu'):
    q_mu = Normal(tf.get_variable('loc', []),
                  tf.nn.softplus(tf.get_variable('scale', [])))

  with tf.variable_scope('q_theta_prime'):
    q_theta_prime = Normal(tf.get_variable('loc', [J]),
                           tf.nn.softplus(tf.get_variable('scale', [J])))

  inference = ed.KLqp({logtau: q_logtau, mu: q_mu,
                      theta_prime: q_theta_prime}, data=data)
  inference.run(n_samples=15, n_iter=60000)
  print("====  ed.KLqp inference ====")
  print("E[mu] = %f" % (q_mu.mean().eval()))
  print("E[logtau] = %f" % (q_logtau.mean().eval()))
  print("E[theta_prime]=")
  print((q_theta_prime.mean().eval()))
  print("====  end ed.KLqp inference ====")
  print("")
  print("")

  # HMC inference
  S = 400000
  burn = S // 2

  hq_logtau = Empirical(tf.get_variable('hq_logtau', [S]))
  hq_mu = Empirical(tf.get_variable('hq_mu', [S]))
  hq_theta_prime = Empirical(tf.get_variable('hq_thetaprime', [S, J]))

  inference = ed.HMC({logtau: hq_logtau, mu: hq_mu,
                     theta_prime: hq_theta_prime}, data=data)
  inference.run()

  print("====  ed.HMC inference ====")
  print("E[mu] = %f" % (hq_mu.params.eval()[burn:].mean()))
  print("E[logtau] = %f" % (hq_logtau.params.eval()[burn:].mean()))
  print("E[theta_prime]=")
  print(hq_theta_prime.params.eval()[burn:, ].mean(0))
  print("====  end ed.HMC inference ====")
  print("")
  print("")
示例#2
0
def main(_):
    ed.set_seed(42)
    N = 5000  # number of data points
    D = 10  # number of features

    # DATA
    w_true = np.random.randn(D)
    X_data = np.random.randn(N, D)
    p = expit(np.dot(X_data, w_true))
    y_data = np.array([np.random.binomial(1, i) for i in p])

    # MODEL
    X = tf.placeholder(tf.float32, [N, D])
    w = Normal(loc=tf.zeros(D), scale=tf.ones(D))
    y = Bernoulli(logits=ed.dot(X, w))

    # INFERENCE
    qw = Normal(loc=tf.get_variable("qw/loc", [D]),
                scale=tf.nn.softplus(tf.get_variable("qw/scale", [D])))

    inference = IWVI({w: qw}, data={X: X_data, y: y_data})
    inference.run(K=5, n_iter=1000)

    # CRITICISM
    print("Mean squared error in true values to inferred posterior mean:")
    print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
示例#3
0
  def _test_normal_normal(self, Inference, default, *args, **kwargs):
    with self.test_session() as sess:
      x_data = np.array([0.0] * 50, dtype=np.float32)

      mu = Normal(loc=0.0, scale=1.0)
      x = Normal(loc=mu, scale=1.0, sample_shape=50)

      if not default:
        qmu_loc = tf.Variable(tf.random_normal([]))
        qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
        qmu = Normal(loc=qmu_loc, scale=qmu_scale)

        # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140)
        inference = Inference({mu: qmu}, data={x: x_data})
      else:
        inference = Inference([mu], data={x: x_data})
        qmu = inference.latent_vars[mu]
      inference.run(*args, **kwargs)

      self.assertAllClose(qmu.mean().eval(), 0, rtol=0.1, atol=0.6)
      self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51),
                          rtol=0.15, atol=0.5)

      variables = tf.get_collection(
          tf.GraphKeys.GLOBAL_VARIABLES, scope='optimizer')
      old_t, old_variables = sess.run([inference.t, variables])
      self.assertEqual(old_t, inference.n_iter)
      sess.run(inference.reset)
      new_t, new_variables = sess.run([inference.t, variables])
      self.assertEqual(new_t, 0)
      self.assertNotEqual(old_variables, new_variables)
示例#4
0
文件: iwvi.py 项目: JoyceYa/edward
def main(_):
  ed.set_seed(42)
  N = 5000  # number of data points
  D = 10  # number of features

  # DATA
  w_true = np.random.randn(D)
  X_data = np.random.randn(N, D)
  p = expit(np.dot(X_data, w_true))
  y_data = np.array([np.random.binomial(1, i) for i in p])

  # MODEL
  X = tf.placeholder(tf.float32, [N, D])
  w = Normal(loc=tf.zeros(D), scale=tf.ones(D))
  y = Bernoulli(logits=ed.dot(X, w))

  # INFERENCE
  qw = Normal(loc=tf.get_variable("qw/loc", [D]),
              scale=tf.nn.softplus(tf.get_variable("qw/scale", [D])))

  inference = IWVI({w: qw}, data={X: X_data, y: y_data})
  inference.run(K=5, n_iter=1000)

  # CRITICISM
  print("Mean squared error in true values to inferred posterior mean:")
  print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
示例#5
0
    def _test_normal_normal(self, Inference, default, *args, **kwargs):
        with self.test_session() as sess:
            x_data = np.array([0.0] * 50, dtype=np.float32)

            mu = Normal(loc=0.0, scale=1.0)
            x = Normal(loc=mu, scale=1.0, sample_shape=50)

            if not default:
                qmu_loc = tf.Variable(tf.random_normal([]))
                qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
                qmu = Normal(loc=qmu_loc, scale=qmu_scale)

                # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140)
                inference = Inference({mu: qmu}, data={x: x_data})
            else:
                inference = Inference([mu], data={x: x_data})
                qmu = inference.latent_vars[mu]
            inference.run(*args, **kwargs)

            self.assertAllClose(qmu.mean().eval(), 0, rtol=0.15, atol=0.5)
            self.assertAllClose(qmu.stddev().eval(),
                                np.sqrt(1 / 51),
                                rtol=0.15,
                                atol=0.5)

            variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                          scope='optimizer')
            old_t, old_variables = sess.run([inference.t, variables])
            self.assertEqual(old_t, inference.n_iter)
            sess.run(inference.reset)
            new_t, new_variables = sess.run([inference.t, variables])
            self.assertEqual(new_t, 0)
            self.assertNotEqual(old_variables, new_variables)
示例#6
0
    def test_normalnormal_run(self):
        with self.test_session() as sess:
            x_data = np.array([0.0] * 50, dtype=np.float32)

            mu = Normal(loc=0.0, scale=1.0)
            x = Normal(loc=tf.ones(50) * mu, scale=1.0)

            qmu_loc = tf.Variable(tf.random_normal([]))
            qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
            qmu = Normal(loc=qmu_loc, scale=qmu_scale)

            # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140)
            n_iter = 5000
            inference = ed.KLqp({mu: qmu}, data={x: x_data})
            inference.run(n_iter=n_iter)

            self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1)
            self.assertAllClose(qmu.stddev().eval(),
                                np.sqrt(1 / 51),
                                rtol=1e-1,
                                atol=1e-1)

            variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                          scope='optimizer')
            old_t, old_variables = sess.run([inference.t, variables])
            self.assertEqual(old_t, n_iter)
            sess.run(inference.reset)
            new_t, new_variables = sess.run([inference.t, variables])
            self.assertEqual(new_t, 0)
            self.assertNotEqual(old_variables, new_variables)
def main(_):
  ed.set_seed(142)

  # DATA
  x_train = build_toy_dataset(FLAGS.N, FLAGS.D, FLAGS.K)

  # MODEL
  w = Normal(loc=0.0, scale=10.0, sample_shape=[FLAGS.D, FLAGS.K])
  z = Normal(loc=0.0, scale=1.0, sample_shape=[FLAGS.M, FLAGS.K])
  x = Normal(loc=tf.matmul(w, z, transpose_b=True),
             scale=tf.ones([FLAGS.D, FLAGS.M]))

  # INFERENCE
  qw_variables = [tf.get_variable("qw/loc", [FLAGS.D, FLAGS.K]),
                  tf.get_variable("qw/scale", [FLAGS.D, FLAGS.K])]
  qw = Normal(loc=qw_variables[0], scale=tf.nn.softplus(qw_variables[1]))

  qz_variables = [tf.get_variable("qz/loc", [FLAGS.N, FLAGS.K]),
                  tf.get_variable("qz/scale", [FLAGS.N, FLAGS.K])]
  idx_ph = tf.placeholder(tf.int32, FLAGS.M)
  qz = Normal(loc=tf.gather(qz_variables[0], idx_ph),
              scale=tf.nn.softplus(tf.gather(qz_variables[1], idx_ph)))

  x_ph = tf.placeholder(tf.float32, [FLAGS.D, FLAGS.M])
  inference_w = ed.KLqp({w: qw}, data={x: x_ph, z: qz})
  inference_z = ed.KLqp({z: qz}, data={x: x_ph, w: qw})

  scale_factor = float(FLAGS.N) / FLAGS.M
  inference_w.initialize(scale={x: scale_factor, z: scale_factor},
                         var_list=qz_variables,
                         n_samples=5)
  inference_z.initialize(scale={x: scale_factor, z: scale_factor},
                         var_list=qw_variables,
                         n_samples=5)

  sess = ed.get_session()
  tf.global_variables_initializer().run()
  for _ in range(inference_w.n_iter):
    x_batch, idx_batch = next_batch(x_train, FLAGS.M)
    for _ in range(5):
      inference_z.update(feed_dict={x_ph: x_batch, idx_ph: idx_batch})

    info_dict = inference_w.update(feed_dict={x_ph: x_batch, idx_ph: idx_batch})
    inference_w.print_progress(info_dict)

    t = info_dict['t']
    if t % 100 == 0:
      print("\nInferred principal axes:")
      print(sess.run(qw.mean()))
示例#8
0
  def test_normalnormal_run(self):
    with self.test_session() as sess:
      x_data = np.array([0.0] * 50, dtype=np.float32)

      mu = Normal(loc=0.0, scale=1.0)
      x = Normal(loc=tf.ones(50) * mu, scale=1.0)

      qmu_loc = tf.Variable(tf.random_normal([]))
      qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
      qmu = Normal(loc=qmu_loc, scale=qmu_scale)

      # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140)
      inference = ed.KLpq({mu: qmu}, data={x: x_data})
      inference.run(n_samples=25, n_iter=100)

      self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1)
      self.assertAllClose(qmu.stddev().eval(), np.sqrt(1 / 51),
                          rtol=1e-1, atol=1e-1)
示例#9
0
  def test_normalnormal_run(self):
    with self.test_session() as sess:
      x_data = np.array([0.0] * 50, dtype=np.float32)

      mu = Normal(mu=0.0, sigma=1.0)
      x = Normal(mu=tf.ones(50) * mu, sigma=1.0)

      qmu_mu = tf.Variable(tf.random_normal([]))
      qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([])))
      qmu = Normal(mu=qmu_mu, sigma=qmu_sigma)

      # analytic solution: N(mu=0.0, sigma=\sqrt{1/51}=0.140)
      inference = ed.KLpq({mu: qmu}, data={x: x_data})
      inference.run(n_samples=25, n_iter=100)

      self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1)
      self.assertAllClose(qmu.std().eval(), np.sqrt(1 / 51),
                          rtol=1e-1, atol=1e-1)
示例#10
0
  def test_normal_run(self):
    def ratio_estimator(data, local_vars, global_vars):
      """Use the optimal ratio estimator, r(z) = log p(z). We add a
      TensorFlow variable as the algorithm assumes that the function
      has parameters to optimize."""
      w = tf.get_variable("w", [])
      return z.log_prob(local_vars[z]) + w

    with self.test_session() as sess:
      z = Normal(loc=5.0, scale=1.0)

      qz = Normal(loc=tf.Variable(tf.random_normal([])),
                  scale=tf.nn.softplus(tf.Variable(tf.random_normal([]))))

      inference = ed.ImplicitKLqp({z: qz}, discriminator=ratio_estimator)
      inference.run(n_iter=200)

      self.assertAllClose(qz.mean().eval(), 5.0, atol=1.0)
示例#11
0
def probabilistic_pca_example():
    ed.set_seed(142)

    N = 5000  # Number of data points.
    D = 2  # Data dimensionality.
    K = 1  # Latent dimensionality.

    x_train = build_toy_dataset(N, D, K)

    plt.scatter(x_train[0, :], x_train[1, :], color='blue', alpha=0.1)
    plt.axis([-10, 10, -10, 10])
    plt.title('Simulated data set')
    plt.show()

    #--------------------
    # Model.
    w = Normal(loc=tf.zeros([D, K]), scale=2.0 * tf.ones([D, K]))
    z = Normal(loc=tf.zeros([N, K]), scale=tf.ones([N, K]))
    x = Normal(loc=tf.matmul(w, z, transpose_b=True), scale=tf.ones([D, N]))

    #--------------------
    # Inference.
    qw = Normal(loc=tf.get_variable('qw/loc', [D, K]),
                scale=tf.nn.softplus(tf.get_variable('qw/scale', [D, K])))
    qz = Normal(loc=tf.get_variable('qz/loc', [N, K]),
                scale=tf.nn.softplus(tf.get_variable('qz/scale', [N, K])))

    inference = ed.KLqp({w: qw, z: qz}, data={x: x_train})
    inference.run(n_iter=500, n_print=100, n_samples=10)

    #--------------------
    # Criticism.
    sess = ed.get_session()
    print('Inferred principal axes:')
    print(sess.run(qw.mean()))

    # Build and then generate data from the posterior predictive distribution.
    x_post = ed.copy(x, {w: qw, z: qz})
    x_gen = sess.run(x_post)

    plt.scatter(x_gen[0, :], x_gen[1, :], color='red', alpha=0.1)
    plt.axis([-10, 10, -10, 10])
    plt.title('Data generated from model')
    plt.show()
示例#12
0
    def test_normal_run(self):
        def ratio_estimator(data, local_vars, global_vars):
            """Use the optimal ratio estimator, r(z) = log p(z). We add a
      TensorFlow variable as the algorithm assumes that the function
      has parameters to optimize."""
            w = tf.get_variable("w", [])
            return z.log_prob(local_vars[z]) + w

        with self.test_session() as sess:
            z = Normal(loc=5.0, scale=1.0)

            qz = Normal(loc=tf.Variable(tf.random_normal([])),
                        scale=tf.nn.softplus(tf.Variable(tf.random_normal(
                            []))))

            inference = ed.ImplicitKLqp({z: qz}, discriminator=ratio_estimator)
            inference.run(n_iter=200)

            self.assertAllClose(qz.mean().eval(), 5.0, atol=1.0)
示例#13
0
文件: test_klqp.py 项目: ylfzr/edward
    def test_normalnormal_run(self):
        with self.test_session() as sess:
            x_data = np.array([0.0] * 50, dtype=np.float32)

            mu = Normal(loc=0.0, scale=1.0)
            x = Normal(loc=tf.ones(50) * mu, scale=1.0)

            qmu_loc = tf.Variable(tf.random_normal([]))
            qmu_scale = tf.nn.softplus(tf.Variable(tf.random_normal([])))
            qmu = Normal(loc=qmu_loc, scale=qmu_scale)

            # analytic solution: N(loc=0.0, scale=\sqrt{1/51}=0.140)
            inference = ed.KLqp({mu: qmu}, data={x: x_data})
            inference.run(n_iter=5000)

            self.assertAllClose(qmu.mean().eval(), 0, rtol=1e-1, atol=1e-1)
            self.assertAllClose(qmu.stddev().eval(),
                                np.sqrt(1 / 51),
                                rtol=1e-1,
                                atol=1e-1)
                                                                        D]))))

inference = ed.KLqp({mu: qmu, sigma: qsigma}, data={x: x_train})
inference.initialize(n_samples=20, n_iter=4000)

sess = ed.get_session()
init = tf.global_variables_initializer()
init.run()

for _ in range(inference.n_iter):
    info_dict = inference.update()
    inference.print_progress(info_dict)
    t = info_dict['t']
    if t % inference.n_print == 0:
        print("Inferred cluster means:")
        print(sess.run(qmu.mean()))

# Calculate likelihood for each data point and cluster assignment,
# averaged over many posterior samples. ``x_post`` has shape (N, 100, K, D).
mu_sample = qmu.sample(100)
sigma_sample = qsigma.sample(100)
x_post = Normal(mu=tf.ones([N, 1, 1, 1]) * mu_sample,
                sigma=tf.ones([N, 1, 1, 1]) * sigma_sample)
x_broadcasted = tf.tile(tf.reshape(x_train, [N, 1, 1, D]), [1, 100, K, 1])

# Sum over latent dimension, then average over posterior samples.
# ``log_liks`` ends up with shape (N, K).
log_liks = x_post.log_prob(x_broadcasted)
log_liks = tf.reduce_sum(log_liks, 3)
log_liks = tf.reduce_mean(log_liks, 1)
示例#15
0
文件: irt.py 项目: ylfzr/edward
                             tf.Variable(tf.random_normal([1]))))
qmu = Normal(loc=tf.Variable(tf.random_normal([1])),
             scale=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

latent_vars = {
    overall_mu: qmu,
    lnvar_students: qlnvarstudents,
    lnvar_questions: qlnvarquestions,
    student_etas: qstudents,
    question_etas: qquestions
}
data = {outcomes: obs}
inference = ed.KLqp(latent_vars, data)
inference.initialize(n_print=2, n_iter=50)

qstudents_mean = qstudents.mean()
qquestions_mean = qquestions.mean()

init = tf.global_variables_initializer()
init.run()

f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
ax1.set_ylim([-3.0, 3.0])
ax2.set_ylim([-3.0, 3.0])
ax1.set_xlim([-3.0, 3.0])
ax2.set_xlim([-3.0, 3.0])

for t in range(inference.n_iter):
    info_dict = inference.update()
    inference.print_progress(info_dict)
示例#16
0
 # q(z|x,t,y)
 inpt2 = tf.concat([x_ph, qy], 1)
 hqz = fc_net(inpt2, (nh - 1) * [h], [], 'qz_xty_shared', lamba=lamba, activation=activation)
 muq_t0, sigmaq_t0 = fc_net(hqz, [h], [[d, None], [d, tf.nn.softplus]], 'qz_xt0', lamba=lamba,
                            activation=activation)
 muq_t1, sigmaq_t1 = fc_net(hqz, [h], [[d, None], [d, tf.nn.softplus]], 'qz_xt1', lamba=lamba,
                            activation=activation)
 muq = qt * muq_t1 + (1. - qt) * muq_t0
 sigmaq = qt * sigmaq_t1 + (1. - qt) * sigmaq_t0
 qz = Normal(loc=muq, scale=sigmaq)
 # Create data dictionary for edward
 data = {x1: x_ph_bin, x2: x_ph_cont, y: y_ph, qt: t_ph, t: t_ph, qy: y_ph}
 # sample posterior predictive for p(y|z,t)
 y_post = ed.copy(y, {z: qz, t: t_ph}, scope='y_post')
 # crude approximation of the above
 y_post_mean = ed.copy(y, {z: qz.mean(), t: t_ph}, scope='y_post_mean')
 # construct a deterministic version (i.e. use the mean of the approximate posterior) of the lower bound
 # for early stopping according to a validation set
 y_post_eval = ed.copy(y, {z: qz.mean(), qt: t_ph, qy: y_ph, t: t_ph}, scope='y_post_eval')
 x1_post_eval = ed.copy(x1, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='x1_post_eval')
 x2_post_eval = ed.copy(x2, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='x2_post_eval')
 t_post_eval = ed.copy(t, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='t_post_eval')
 # losses
 logp_valid = tf.reduce_mean(tf.reduce_sum(y_post_eval.log_prob(y_ph) + t_post_eval.log_prob(t_ph), axis=1) +
                             tf.reduce_sum(x1_post_eval.log_prob(x_ph_bin), axis=1) +
                             tf.reduce_sum(x2_post_eval.log_prob(x_ph_cont), axis=1) +
                             tf.reduce_sum(z.log_prob(qz.mean()) - qz.log_prob(qz.mean()), axis=1))
 inference = ed.KLqp({z: qz}, data)
 optimizer = tf.train.AdamOptimizer(learning_rate=lr)
 inference.initialize(optimizer=optimizer)
 # saver and initializer before experiment
示例#17
0
def save(arr,xdata,ydata):
	tf.reset_default_graph()

	trainSetNumber = round(FLAGS.T* 0.8)

	x_train = xdata[:trainSetNumber]
	y_train = ydata[:trainSetNumber]
	x_test = xdata[trainSetNumber:]
	y_test = ydata[trainSetNumber:]

	x_train = np.asarray(x_train)
	x_test = np.asarray(x_test)

	x_train = np.asarray(x_train)
	x_test = np.asarray(x_test)
	# print(x_test)
	# print(y_test)
	pos = 0
	name = arr[pos]
	pos +=1
	H1 = int(arr[pos])
	pos+=1
	H2 = int(arr[pos])
	pos+=1
	param1 = float(arr[pos])
	pos += 1
	param2 = float(arr[pos])

	graph1 = tf.Graph()
	with graph1.as_default():
		with tf.name_scope("model"):
			W_0 = Normal(loc=tf.zeros([FLAGS.D, H1]), scale=param1*tf.ones([FLAGS.D,H1 ]),name="W_0")
			W_1 = Normal(loc=tf.zeros([H1, H2]), scale=param2*tf.ones([H1, H2]), name="W_1")
			W_2 = Normal(loc=tf.zeros([H2, FLAGS.O]), scale=param2*tf.ones([H2, FLAGS.O]), name="W_2")
			b_0 = Normal(loc=tf.zeros(H1), scale=param1 *tf.ones(H1), name="b_0")
			b_1 = Normal(loc=tf.zeros(H2), scale=param2* tf.ones(H2), name="b_1")
			b_2 = Normal(loc=tf.zeros(FLAGS.O), scale=param2* tf.ones(FLAGS.O), name="b_2")

			X = tf.placeholder(tf.float32, [trainSetNumber, FLAGS.D], name="X")
			y = Normal(loc=neural_network(x_train,W_0, W_1, W_2, b_0, b_1, b_2, trainSetNumber), scale=0.1*tf.ones([trainSetNumber,FLAGS.O]), name="y")
		
		with tf.variable_scope("posterior",reuse=tf.AUTO_REUSE):
			with tf.variable_scope("qW_0",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [FLAGS.D, H1])
			    scale = param1*tf.nn.softplus(tf.get_variable("scale", [FLAGS.D, H1]))
			    qW_0 = Normal(loc=loc, scale=scale)
			with tf.variable_scope("qW_1",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [H1, H2])
			    scale = param2*tf.nn.softplus(tf.get_variable("scale", [H1, H2]))
			    qW_1 = Normal(loc=loc, scale=scale)
			with tf.variable_scope("qW_2",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [H2, FLAGS.O])
			    scale = param2*tf.nn.softplus(tf.get_variable("scale", [H2, FLAGS.O]))
			    qW_2 = Normal(loc=loc, scale=scale)
			with tf.variable_scope("qb_0",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [H1])
			    scale =param1 * tf.nn.softplus(tf.get_variable("scale", [H1]))
			    qb_0 = Normal(loc=loc, scale=scale)
			with tf.variable_scope("qb_1",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [H2])
			    scale =param2 * tf.nn.softplus(tf.get_variable("scale", [H2]))
			    qb_1 = Normal(loc=loc, scale=scale)
			with tf.variable_scope("qb_2",reuse=tf.AUTO_REUSE):
			    loc = tf.get_variable("loc", [FLAGS.O])
			    scale =param2 * tf.nn.softplus(tf.get_variable("scale", [FLAGS.O]))
			    qb_2 = Normal(loc=loc, scale=scale)
	#inference
	with tf.Session(graph=graph1) as sess:
		# Set up the inference method, mapping the prior to the posterior variables
		inference = ed.KLqp({W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2}, data={X: x_train, y: y_train})
		# Set up the adam optimizer
		global_step = tf.Variable(0, trainable=False)
		starter_learning_rate = 0.1
		learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,100, 0.3, staircase=True)
		optimizer = tf.train.AdamOptimizer(learning_rate)

		# Run the inference method
		pos += 1
		iter1 = arr[pos]
		inference.run(n_iter=iter1,optimizer=optimizer ,n_samples=5)

		#Run the test data through the neural network
		infered = neural_network(x_test, qW_0, qW_1, qW_2, qb_0, qb_1, qb_2, len(x_test))
		inferedList = infered.eval()

		#Accuracy checks on the data (The test data)
		# In order to work with PPC and other metrics, it must be a random variables
		# Normal creates this random varaibles by sampling from the poterior with a normal distribution
		NormalTest =Normal(loc=neural_network(x_test, qW_0, qW_1, qW_2, qb_0, qb_1, qb_2,len(x_test)), scale=0.1*tf.ones([len(x_test),FLAGS.O]), name="y_other") 
		NormalTestList = NormalTest.eval()
		
		# Change the graph so that the posterior point to the output
		y_post = ed.copy(NormalTest, {W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2})
		X = tf.placeholder(tf.float32, [len(x_test), FLAGS.D], name="X")
		y_test_tensor = tf.convert_to_tensor(y_test)
		MSE = ed.evaluate('mean_squared_error', data={X: x_test, NormalTest: y_test_tensor})
		MAE =ed.evaluate('mean_absolute_error', data={X: x_test, NormalTest: y_test_tensor})
		# PPC calculation
		PPCMean = ed.ppc(lambda xs, zs: tf.reduce_mean(xs[y_post]), data={y_post:  y_test, X:x_test}, latent_vars={W_0: qW_0, b_0: qb_0,W_1: qW_1, b_1: qb_1,W_2: qW_2, b_2: qb_2}, n_samples=5)
		# Change the graph again, this is done to do epistemic uncertainty calculations
		posterior = ed.copy(NormalTest, dict_swap={W_0: qW_0.mean(), b_0: qb_0.mean(),W_1: qW_1.mean(), b_1: qb_1.mean(),W_2: qW_2.mean(), b_2: qb_2.mean()})
		Y_post1 = sess.run(posterior.sample(len(x_test)), feed_dict={X: x_test, posterior: y_test})
		mean_prob_over_samples=np.mean(Y_post1, axis=0) ## prediction means
		prediction_variances = np.apply_along_axis(predictive_entropy, axis=1, arr=mean_prob_over_samples)
		
		# Run analysis on test data, to see how many records were correct
		classes, actualClass, cor, firsts, seconds, thirds, fails, perCorrect = Analysis(inferedList, y_test)
		# Save the model through TF saver
		saver = tf.train.Saver()
		dir_path = os.path.dirname(os.path.realpath(__file__))
		save_path = saver.save(sess, dir_path +"/"+name+"/model.ckpt")
		print("Model saved in path: %s" % save_path)

		file = open(dir_path+"/"+name +"/"+name+".csv",'w')
		file.write("MSE = " + str(MSE))
		file.write("\nMAE = " + str(MAE))
		file.write("\nPPC mean = " + str(PPCMean))
		file.write("; Predicted First;Predicted Second; Predicted Third; Predicted Fail \n")
		classNames = ['First','Second', 'Third', 'Fail']
		for x in range(len(firsts)):
			file.write(classNames[x] + ";" + str(firsts[x]) + ";" + str(seconds[x])+ ";" + str(thirds[x])+ ";" + str(fails[x]) + "\n")
		file.write("Num;Class 1;Class 2;Class 3;Class 4;Epi;Predicted Class;Correct Class\n ")
		for x in range(len(inferedList)):
			line = str(x) 
			for i in range(len(inferedList[x])):
				line += ";" + str(round(inferedList[x][i],2))
			line += ";" + str(round(prediction_variances[x],2)) + ";" + str(classes[x]+1) + ";" + str(actualClass[x]+1) + "\n"
			file.write(line) 
		file.close()

		return perCorrect
sess = ed.get_session()
tf.global_variables_initializer().run()

i = 0
for _ in range(inference.n_iter):
    X_batch, y_batch, i = next_batch(M, i)
    for _ in range(5):
        info_dict_d = inference.update(variables="Disc",
                                       feed_dict={
                                           X: X_batch,
                                           y_ph: y_batch
                                       })

    info_dict = inference.update(variables="Gen",
                                 feed_dict={
                                     X: X_batch,
                                     y_ph: y_batch
                                 })
    info_dict['loss_d'] = info_dict_d['loss_d']
    info_dict['t'] = info_dict['t'] // 6  # say set of 6 updates is 1 iteration

    t = info_dict['t']
    inference.print_progress(info_dict)
    if t == 1 or t % inference.n_print == 0:
        # Check inferred posterior parameters.
        mean, std = sess.run([qw.mean(), qw.std()])
        print("\nInferred mean & std:")
        print(mean)
        print(std)
示例#19
0
                    scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

        # Create data dictionary for edward
        data = {
            x1: x_ph_bin,
            x2: x_ph_cont,
            y: y_ph,
            qt: t_ph,
            t: t_ph,
            qy: y_ph
        }

        # sample posterior predictive for p(y|z,t)
        y_post = ed.copy(y, {z: qz, t: t_ph}, scope='y_post')
        # crude approximation of the above
        y_post_mean = ed.copy(y, {z: qz.mean(), t: t_ph}, scope='y_post_mean')
        # construct a deterministic version (i.e. use the mean of the approximate posterior) of the lower bound
        # for early stopping according to a validation set
        y_post_eval = ed.copy(y, {
            z: qz.mean(),
            qt: t_ph,
            qy: y_ph,
            t: t_ph
        },
                              scope='y_post_eval')
        x1_post_eval = ed.copy(x1, {
            z: qz.mean(),
            qt: t_ph,
            qy: y_ph
        },
                               scope='x1_post_eval')
def main(_):
    def ratio_estimator(data, local_vars, global_vars):
        """Takes as input a dict of data x, local variable samples z, and
    global variable samples beta; outputs real values of shape
    (x.shape[0] + z.shape[0],). In this example, there are no local
    variables.
    """
        # data[y] has shape (M,); global_vars[w] has shape (D,)
        # we concatenate w to each data point y, so input has shape (M, 1 + D)
        input = tf.concat([
            tf.reshape(data[y], [FLAGS.M, 1]),
            tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1])
        ], 1)
        hidden = tf.layers.dense(input, 64, activation=tf.nn.relu)
        output = tf.layers.dense(hidden, 1, activation=None)
        return output

    ed.set_seed(42)

    # DATA
    w_true = np.ones(FLAGS.D) * 5.0
    X_train, y_train = build_toy_dataset(FLAGS.N, w_true)
    X_test, y_test = build_toy_dataset(FLAGS.N, w_true)
    data = generator([X_train, y_train], FLAGS.M)

    # MODEL
    X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D])
    y_ph = tf.placeholder(tf.float32, [FLAGS.M])
    w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D))
    y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M))

    # INFERENCE
    qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0,
                scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D])))

    inference = ed.ImplicitKLqp({w: qw},
                                data={y: y_ph},
                                discriminator=ratio_estimator,
                                global_vars={w: qw})
    inference.initialize(n_iter=5000,
                         n_print=100,
                         scale={y: float(FLAGS.N) / FLAGS.M})

    sess = ed.get_session()
    tf.global_variables_initializer().run()

    for _ in range(inference.n_iter):
        X_batch, y_batch = next(data)
        for _ in range(5):
            info_dict_d = inference.update(variables="Disc",
                                           feed_dict={
                                               X: X_batch,
                                               y_ph: y_batch
                                           })

        info_dict = inference.update(variables="Gen",
                                     feed_dict={
                                         X: X_batch,
                                         y_ph: y_batch
                                     })
        info_dict['loss_d'] = info_dict_d['loss_d']
        info_dict[
            't'] = info_dict['t'] // 6  # say set of 6 updates is 1 iteration

        t = info_dict['t']
        inference.print_progress(info_dict)
        if t == 1 or t % inference.n_print == 0:
            # Check inferred posterior parameters.
            mean, std = sess.run([qw.mean(), qw.stddev()])
            print("\nInferred mean & std:")
            print(mean)
            print(std)
示例#21
0
# VISUALIZATION
def visualise(X_data, y_data, w, b, ax, n_samples=10):
    w_samples = w.sample(n_samples)[:, 0].eval()
    b_samples = b.sample(n_samples).eval()
    ax.scatter(X_data[:, 0],
               y_data)  # Note, only the 1st input dimension is plotted.
    inputs = np.linspace(-8, 8, num=400)
    for ns in range(n_samples):
        output = inputs * w_samples[ns] + b_samples[ns]
        ax.plot(inputs, output)


fig = plt.figure()
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)

visualise(X_train, y_train, w, b, ax1)  # Models sampled from the prior
visualise(X_train, y_train, qw, qb, ax2)  # Models sampled from the posterior
plt.show()

# EXPLORE THE LEARNED MODEL

print('Point estimate for STD of weights:', w_prior_std.eval())

# Retrieve the means and STDs of the estimated regression coefficients
w_est_mean = qw.mean().eval()
w_est_std = qw.stddev().eval()
print('Correlation between estimated and learned weights: ',
      np.corrcoef(w_est_mean, w_true)[0, 1])
示例#22
0
def cevae_tf(X, T, Y, n_epochs=100, early_stop = 10, d_cevae=20):

    T, Y = T.reshape((-1,1)), Y.reshape((-1,1))
    args = dict()
    args['earl'] = early_stop
    args['lr'] = 0.001
    args['opt'] = 'adam'
    args['epochs'] = n_epochs
    args['print_every'] = 10
    args['true_post'] = True

    M = None  # batch size during training
    d = d_cevae  # latent dimension
    lamba = 1e-4  # weight decay
    nh, h = 3, 200  # number and size of hidden layers

    contfeats = list(range(X.shape[1])) # all continuous
    binfeats = []
    
    # need for early stopping
    xtr, xva, ttr, tva, ytr, yva = train_test_split(X, T, Y)

    # zero mean, unit variance for y during training
    ym, ys = np.mean(Y), np.std(Y)
    ytr, yva = (ytr - ym) / ys, (yva - ym) / ys
    best_logpvalid = - np.inf

    with tf.Graph().as_default():
        sess = tf.InteractiveSession()

        ed.set_seed(1)
        np.random.seed(1)
        tf.set_random_seed(1)

        # x_ph_bin = tf.placeholder(tf.float32, [M, len(binfeats)], name='x_bin')  # binary inputs
        x_ph_cont = tf.placeholder(tf.float32, [M, len(contfeats)], name='x_cont')  # continuous inputs
        t_ph = tf.placeholder(tf.float32, [M, 1])
        y_ph = tf.placeholder(tf.float32, [M, 1])

        # x_ph = tf.concat([x_ph_bin, x_ph_cont], 1)
        x_ph = x_ph_cont
        activation = tf.nn.elu

        # CEVAE model (decoder)
        # p(z)
        z = Normal(loc=tf.zeros([tf.shape(x_ph)[0], d]), scale=tf.ones([tf.shape(x_ph)[0], d]))

        # p(x|z)
        hx = fc_net(z, (nh - 1) * [h], [], 'px_z_shared', lamba=lamba, activation=activation)
        # logits = fc_net(hx, [h], [[len(binfeats), None]], 'px_z_bin', lamba=lamba, activation=activation)
        # x1 = Bernoulli(logits=logits, dtype=tf.float32, name='bernoulli_px_z')

        mu, sigma = fc_net(hx, [h], [[len(contfeats), None], [len(contfeats), tf.nn.softplus]], 'px_z_cont', lamba=lamba,
                        activation=activation)
        x2 = Normal(loc=mu, scale=sigma, name='gaussian_px_z')

        # p(t|z)
        logits = fc_net(z, [h], [[1, None]], 'pt_z', lamba=lamba, activation=activation)
        t = Bernoulli(logits=logits, dtype=tf.float32)

        # p(y|t,z)
        mu2_t0 = fc_net(z, nh * [h], [[1, None]], 'py_t0z', lamba=lamba, activation=activation)
        mu2_t1 = fc_net(z, nh * [h], [[1, None]], 'py_t1z', lamba=lamba, activation=activation)
        y = Normal(loc=t * mu2_t1 + (1. - t) * mu2_t0, scale=tf.ones_like(mu2_t0))

        # CEVAE variational approximation (encoder)
        # q(t|x)
        logits_t = fc_net(x_ph, [d], [[1, None]], 'qt', lamba=lamba, activation=activation)
        qt = Bernoulli(logits=logits_t, dtype=tf.float32)
        # q(y|x,t)
        hqy = fc_net(x_ph, (nh - 1) * [h], [], 'qy_xt_shared', lamba=lamba, activation=activation)
        mu_qy_t0 = fc_net(hqy, [h], [[1, None]], 'qy_xt0', lamba=lamba, activation=activation)
        mu_qy_t1 = fc_net(hqy, [h], [[1, None]], 'qy_xt1', lamba=lamba, activation=activation)
        qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0, scale=tf.ones_like(mu_qy_t0))
        # q(z|x,t,y)
        inpt2 = tf.concat([x_ph, qy], 1)
        hqz = fc_net(inpt2, (nh - 1) * [h], [], 'qz_xty_shared', lamba=lamba, activation=activation)
        muq_t0, sigmaq_t0 = fc_net(hqz, [h], [[d, None], [d, tf.nn.softplus]], 'qz_xt0', lamba=lamba,
                                activation=activation)
        muq_t1, sigmaq_t1 = fc_net(hqz, [h], [[d, None], [d, tf.nn.softplus]], 'qz_xt1', lamba=lamba,
                                activation=activation)
        qz = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0, scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

        # Create data dictionary for edward
        data = {x2: x_ph_cont, y: y_ph, qt: t_ph, t: t_ph, qy: y_ph}

        # sample posterior predictive for p(y|z,t)
        y_post = ed.copy(y, {z: qz, t: t_ph}, scope='y_post')
        # crude approximation of the above
        y_post_mean = ed.copy(y, {z: qz.mean(), t: t_ph}, scope='y_post_mean')
        # construct a deterministic version (i.e. use the mean of the approximate posterior) of the lower bound
        # for early stopping according to a validation set
        y_post_eval = ed.copy(y, {z: qz.mean(), qt: t_ph, qy: y_ph, t: t_ph}, scope='y_post_eval')
        # x1_post_eval = ed.copy(x1, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='x1_post_eval')
        x2_post_eval = ed.copy(x2, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='x2_post_eval')
        t_post_eval = ed.copy(t, {z: qz.mean(), qt: t_ph, qy: y_ph}, scope='t_post_eval')
        logp_valid = tf.reduce_mean(tf.reduce_sum(y_post_eval.log_prob(y_ph) + t_post_eval.log_prob(t_ph), axis=1) +
                                    tf.reduce_sum(x2_post_eval.log_prob(x_ph_cont), axis=1) +
                                    tf.reduce_sum(z.log_prob(qz.mean()) - qz.log_prob(qz.mean()), axis=1))

        inference = ed.KLqp({z: qz}, data)
        optimizer = tf.train.AdamOptimizer(learning_rate=args['lr'])
        inference.initialize(optimizer=optimizer)

        saver = tf.train.Saver(tf.contrib.slim.get_variables())
        tf.global_variables_initializer().run()

        n_epoch, n_iter_per_epoch, idx = args['epochs'], 10 * int(xtr.shape[0] / 100), np.arange(xtr.shape[0])

        # # dictionaries needed for evaluation
        t0, t1 = np.zeros((X.shape[0], 1)), np.ones((X.shape[0], 1))
        # tr0t, tr1t = np.zeros((xte.shape[0], 1)), np.ones((xte.shape[0], 1))
        f1 = {x_ph_cont: X, t_ph: t1}
        f0 = {x_ph_cont: X, t_ph: t0}
        # f1t = {x_ph_bin: xte[:, 0:len(binfeats)], x_ph_cont: xte[:, len(binfeats):], t_ph: tr1t}
        # f0t = {x_ph_bin: xte[:, 0:len(binfeats)], x_ph_cont: xte[:, len(binfeats):], t_ph: tr0t}

        for epoch in range(n_epoch):
            avg_loss = 0.0

            
            widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()]
            pbar = ProgressBar(n_iter_per_epoch, widgets=widgets)
            pbar.start()
            np.random.shuffle(idx)
            for j in range(n_iter_per_epoch):
                # print('j', j)
                # pbar.update(j)
                batch = np.random.choice(idx, 100)
                x_train, y_train, t_train = xtr[batch], ytr[batch], ttr[batch]
                info_dict = inference.update(feed_dict={x_ph_cont: x_train,
                                                        t_ph: t_train, y_ph: y_train})
                avg_loss += info_dict['loss']

            avg_loss = avg_loss / n_iter_per_epoch
            avg_loss = avg_loss / 100

            if epoch % args['earl'] == 0 or epoch == (n_epoch - 1):
                logpvalid = sess.run(logp_valid, feed_dict={x_ph_cont: xva,
                                                            t_ph: tva, y_ph: yva})
                if logpvalid >= best_logpvalid:
                    print('Improved validation bound, old: {:0.3f}, new: {:0.3f}'.format(best_logpvalid, logpvalid))
                    best_logpvalid = logpvalid
                    saver.save(sess, 'data/cevae_models/dlvm')


        saver.restore(sess, 'data/cevae_models/dlvm')
        y0, y1 = get_y0_y1(sess, y_post, f0, f1, shape=Y.shape, L=100)
        y0, y1 = y0 * ys + ym, y1 * ys + ym
        
        sess.close()

    return y0.reshape((-1)), y1.reshape((-1))
示例#23
0
文件: irt.py 项目: wujsAct/edward
qmu = Normal(
    loc=tf.Variable(tf.random_normal([1])),
    scale=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

latent_vars = {
    overall_mu: qmu,
    lnvar_students: qlnvarstudents,
    lnvar_questions: qlnvarquestions,
    student_etas: qstudents,
    question_etas: qquestions
}
data = {outcomes: obs}
inference = ed.KLqp(latent_vars, data)
inference.initialize(n_print=2, n_iter=50)

qstudents_mean = qstudents.mean()
qquestions_mean = qquestions.mean()

tf.global_variables_initializer().run()

f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
ax1.set_ylim([-3.0, 3.0])
ax2.set_ylim([-3.0, 3.0])
ax1.set_xlim([-3.0, 3.0])
ax2.set_xlim([-3.0, 3.0])

for t in range(inference.n_iter):
  info_dict = inference.update()
  inference.print_progress(info_dict)

  if t % inference.n_print == 0:
def main(_):
  def ratio_estimator(data, local_vars, global_vars):
    """Takes as input a dict of data x, local variable samples z, and
    global variable samples beta; outputs real values of shape
    (x.shape[0] + z.shape[0],). In this example, there are no local
    variables.
    """
    # data[y] has shape (M,); global_vars[w] has shape (D,)
    # we concatenate w to each data point y, so input has shape (M, 1 + D)
    input = tf.concat([
        tf.reshape(data[y], [FLAGS.M, 1]),
        tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1])], 1)
    hidden = tf.layers.dense(input, 64, activation=tf.nn.relu)
    output = tf.layers.dense(hidden, 1, activation=None)
    return output

  ed.set_seed(42)

  # DATA
  w_true = np.ones(FLAGS.D) * 5.0
  X_train, y_train = build_toy_dataset(FLAGS.N, w_true)
  X_test, y_test = build_toy_dataset(FLAGS.N, w_true)
  data = generator([X_train, y_train], FLAGS.M)

  # MODEL
  X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D])
  y_ph = tf.placeholder(tf.float32, [FLAGS.M])
  w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D))
  y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M))

  # INFERENCE
  qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0,
              scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D])))

  inference = ed.ImplicitKLqp(
      {w: qw}, data={y: y_ph},
      discriminator=ratio_estimator, global_vars={w: qw})
  inference.initialize(n_iter=5000, n_print=100,
                       scale={y: float(FLAGS.N) / FLAGS.M})

  sess = ed.get_session()
  tf.global_variables_initializer().run()

  for _ in range(inference.n_iter):
    X_batch, y_batch = next(data)
    for _ in range(5):
      info_dict_d = inference.update(
          variables="Disc", feed_dict={X: X_batch, y_ph: y_batch})

    info_dict = inference.update(
        variables="Gen", feed_dict={X: X_batch, y_ph: y_batch})
    info_dict['loss_d'] = info_dict_d['loss_d']
    info_dict['t'] = info_dict['t'] // 6  # say set of 6 updates is 1 iteration

    t = info_dict['t']
    inference.print_progress(info_dict)
    if t == 1 or t % inference.n_print == 0:
      # Check inferred posterior parameters.
      mean, std = sess.run([qw.mean(), qw.stddev()])
      print("\nInferred mean & std:")
      print(mean)
      print(std)
示例#25
0
def main(_):
    ed.set_seed(42)

    # DATA
    data, true_s_etas, true_q_etas = build_toy_dataset(FLAGS.n_students,
                                                       FLAGS.n_questions,
                                                       FLAGS.n_obs)
    obs = data['outcomes'].values
    student_ids = data['student_id'].values.astype(int)
    question_ids = data['question_id'].values.astype(int)

    # MODEL
    lnvar_students = Normal(loc=0.0, scale=1.0)
    lnvar_questions = Normal(loc=0.0, scale=1.0)

    sigma_students = tf.sqrt(tf.exp(lnvar_students))
    sigma_questions = tf.sqrt(tf.exp(lnvar_questions))

    overall_mu = Normal(loc=tf.zeros(1), scale=tf.ones(1))

    student_etas = Normal(loc=0.0,
                          scale=sigma_students,
                          sample_shape=FLAGS.n_students)
    question_etas = Normal(loc=0.0,
                           scale=sigma_questions,
                           sample_shape=FLAGS.n_questions)

    observation_logodds = (tf.gather(student_etas, student_ids) +
                           tf.gather(question_etas, question_ids) + overall_mu)
    outcomes = Bernoulli(logits=observation_logodds)

    # INFERENCE
    qstudents = Normal(loc=tf.get_variable("qstudents/loc",
                                           [FLAGS.n_students]),
                       scale=tf.nn.softplus(
                           tf.get_variable("qstudents/scale",
                                           [FLAGS.n_students])))
    qquestions = Normal(loc=tf.get_variable("qquestions/loc",
                                            [FLAGS.n_questions]),
                        scale=tf.nn.softplus(
                            tf.get_variable("qquestions/scale",
                                            [FLAGS.n_questions])))
    qlnvarstudents = Normal(loc=tf.get_variable("qlnvarstudents/loc", []),
                            scale=tf.nn.softplus(
                                tf.get_variable("qlnvarstudents/scale", [])))
    qlnvarquestions = Normal(loc=tf.get_variable("qlnvarquestions/loc", []),
                             scale=tf.nn.softplus(
                                 tf.get_variable("qlnvarquestions/scale", [])))
    qmu = Normal(loc=tf.get_variable("qmu/loc", [1]),
                 scale=tf.nn.softplus(tf.get_variable("qmu/scale", [1])))

    latent_vars = {
        overall_mu: qmu,
        lnvar_students: qlnvarstudents,
        lnvar_questions: qlnvarquestions,
        student_etas: qstudents,
        question_etas: qquestions
    }
    data = {outcomes: obs}
    inference = ed.KLqp(latent_vars, data)
    inference.initialize(n_print=2, n_iter=50)

    qstudents_mean = qstudents.mean()
    qquestions_mean = qquestions.mean()

    tf.global_variables_initializer().run()

    f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
    ax1.set_ylim([-3.0, 3.0])
    ax2.set_ylim([-3.0, 3.0])
    ax1.set_xlim([-3.0, 3.0])
    ax2.set_xlim([-3.0, 3.0])

    for t in range(inference.n_iter):
        info_dict = inference.update()
        inference.print_progress(info_dict)

        if t % inference.n_print == 0:
            # CRITICISM
            ax1.clear()
            ax2.clear()
            ax1.set_ylim([-3.0, 3.0])
            ax2.set_ylim([-3.0, 3.0])
            ax1.set_xlim([-3.0, 3.0])
            ax2.set_xlim([-3.0, 3.0])

            ax1.set_title('Student Intercepts')
            ax2.set_title('Question Intercepts')
            ax1.set_xlabel('True Student Random Intercepts')
            ax1.set_ylabel('Estimated Student Random Intercepts')
            ax2.set_xlabel('True Question Random Intercepts')
            ax2.set_ylabel('Estimated Question Random Intercepts')

            ax1.scatter(true_s_etas, qstudents_mean.eval(), s=0.05)
            ax2.scatter(true_q_etas, qquestions_mean.eval(), s=0.05)
            plt.draw()
            plt.pause(2.0 / 60.0)
示例#26
0
    scale=tf.Variable(5.0),
)
q_inv_softplus_sigma = Normal(
    loc=tf.Variable(0.0),
    scale=tf.Variable(1.0),
)

# Inference arguments
latent_vars = {mu: q_mu, inv_softplus_sigma: q_inv_softplus_sigma}
data = {y: y_train}

# Inference
inference = ed.KLqp(latent_vars, data)
inference.run(n_samples=5, n_iter=2500)

print(q_mu.mean().eval())
print(q_inv_softplus_sigma.mean().eval())

# Empirical Model with Sampler

# Posterior distribution families
q_mu = Empirical(params=tf.Variable(tf.random_normal([2000])))
q_inv_softplus_sigma = Empirical(params=tf.Variable(tf.random_normal([2000])))

# Inference arguments
latent_vars = {mu: q_mu, inv_softplus_sigma: q_inv_softplus_sigma}
data = {y: y_train}

# Inference
inference = ed.HMC(latent_vars, data)
inference.run(step_size=0.003, n_steps=5)
示例#27
0
        # Create data dictionary for edward
        data = {xi1: xi_ph_bin, xi2: xi_ph_cont, yi: yi_ph, qti: ti_ph, ti: ti_ph, qyi: yi_ph,
        xj1: xj_ph_bin, xj2: xj_ph_cont, yj: yj_ph, qtj: tj_ph, tj: tj_ph, qyj: yj_ph}

        # sample posterior predictive for p(y|z,t)
        yi_post = ed.copy(yi, {zi: qzi, ti: ti_ph, zj: qzj, tj: tj_ph}, scope='yi_post')
        yj_post = ed.copy(yj, {zi: qzi, ti: ti_ph, zj: qzj, tj: tj_ph}, scope='yj_post')
        
        # crude approximation of the above, why not mean on ti or tj?
        # yi_post_mean = ed.copy(yi, {zi: qzi.mean(), ti: ti_ph, zj:qzj.mean(), tj: tj_ph}, scope='yi_post_mean')
        # yj_post_mean = ed.copy(yj, {zi: qzi.mean(), ti: ti_ph, zj:qzj.mean(), tj: tj_ph}, scope='yj_post_mean')

        # construct a deterministic version (i.e. use the mean of the approximate posterior) of the lower bound
        # for early stopping according to a validation set

        yi_post_eval = ed.copy(yi, {zi: qzi.mean(), qti: ti_ph, qyi: yi_ph, ti: ti_ph}, scope='yi_post_eval')
        yj_post_eval = ed.copy(yj, {zj: qzj.mean(), qtj: tj_ph, qyj: yj_ph, tj: tj_ph}, scope='yj_post_eval')

        xi1_post_eval = ed.copy(xi1, {zi: qzi.mean(), qti: ti_ph, qyi: yi_ph}, scope='xi1_post_eval')
        xi2_post_eval = ed.copy(xi2, {zi: qzi.mean(), qti: ti_ph, qyi: yi_ph}, scope='xi2_post_eval')

        xj1_post_eval = ed.copy(xj1, {zj: qzj.mean(), qtj: tj_ph, qyj: yj_ph}, scope='xj1_post_eval')
        xj2_post_eval = ed.copy(xj2, {zj: qzj.mean(), qtj: tj_ph, qyj: yj_ph}, scope='xj2_post_eval')
        
        ti_post_eval = ed.copy(ti, {zi: qzi.mean(), qti: ti_ph, qyi: yi_ph}, scope='ti_post_eval')
        tj_post_eval = ed.copy(tj, {zj: qzj.mean(), qtj: tj_ph, qyj: yj_ph}, scope='tj_post_eval')

        logp_valid = tf.reduce_mean(tf.reduce_sum(yi_post_eval.log_prob(yi_ph) + ti_post_eval.log_prob(ti_ph), axis=1) +
                                    tf.reduce_sum(xi1_post_eval.log_prob(xi_ph_bin), axis=1) +
                                    tf.reduce_sum(xi2_post_eval.log_prob(xi_ph_cont), axis=1) +
                                    tf.reduce_sum(zi.log_prob(qzi.mean()) - qzi.log_prob(qzi.mean()), axis=1)
    qmesh_x1, qmesh_x2 = np.meshgrid(
        np.linspace(qminmax[0], qminmax[1] - qcellres[0],
                    (qminmax[1] - qminmax[0]) // qcellres[0]),
        np.linspace(qminmax[2], qminmax[3] - qcellres[1],
                    (qminmax[3] - qminmax[2]) // qcellres[1]))

    gamma_mesh_x1, gamma_mesh_x2 = np.meshgrid(
        np.linspace(cell_minmax[0], cell_minmax[1] - cell_res[0],
                    (cell_minmax[1] - cell_minmax[0]) // cell_res[0]),
        np.linspace(cell_minmax[2], cell_minmax[3] - cell_res[1],
                    (cell_minmax[3] - cell_minmax[2]) // cell_res[1]))

    X_q = calc_grid_v2(qcellres, qminmax, method='grid', X=None)
    X_q_tf = tf.constant(X_q, dtype=tfdt)
    X_q_features = rbf_kernel(
        X_q_tf, qhinge_grid.mean(),
        qgamma.bijector.forward(qgamma.distribution.mean()), tfdt)

    # Running inference
    for t in range(inference.n_iter):
        if t % 10 == 0:
            print("\nsaving {}".format(t))
            qgamma_eval = qgamma.bijector.forward(
                qgamma.distribution.mean()).eval()
            qgamma_var_eval = qgamma.distribution.variance().eval()
            qhinge_grid_eval = qhinge_grid.mean().eval()
            post_mu = tf.matmul(X_q_features, qw.mean())
            post_var = tf.reduce_sum(tf.square(X_q_features) *
                                     tf.transpose(qw.variance()),
                                     axis=1,
                                     keepdims=True)
示例#29
0
N = 5000  # number of data points
D = 2  # data dimensionality
K = 1  # latent dimensionality

# DATA

x_train = build_toy_dataset(N, D, K)

# MODEL

w = Normal(mu=tf.zeros([D, K]), sigma=10.0 * tf.ones([D, K]))
z = Normal(mu=tf.zeros([N, K]), sigma=tf.ones([N, K]))
x = Normal(mu=tf.matmul(w, z, transpose_b=True), sigma=tf.ones([D, N]))

# INFERENCE

qw = Normal(mu=tf.Variable(tf.random_normal([D, K])),
            sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, K]))))
qz = Normal(mu=tf.Variable(tf.random_normal([N, K])),
            sigma=tf.nn.softplus(tf.Variable(tf.random_normal([N, K]))))

inference = ed.KLqp({w: qw, z: qz}, data={x: x_train})

init = tf.initialize_all_variables()
inference.run(n_iter=500, n_print=100, n_samples=10)

sess = ed.get_session()
print("Inferred principal axes:")
print(sess.run(qw.mean()))
示例#30
0
文件: irt.py 项目: JoyceYa/edward
def main(_):
  ed.set_seed(42)

  # DATA
  data, true_s_etas, true_q_etas = build_toy_dataset(
      FLAGS.n_students, FLAGS.n_questions, FLAGS.n_obs)
  obs = data['outcomes'].values
  student_ids = data['student_id'].values.astype(int)
  question_ids = data['question_id'].values.astype(int)

  # MODEL
  lnvar_students = Normal(loc=0.0, scale=1.0)
  lnvar_questions = Normal(loc=0.0, scale=1.0)

  sigma_students = tf.sqrt(tf.exp(lnvar_students))
  sigma_questions = tf.sqrt(tf.exp(lnvar_questions))

  overall_mu = Normal(loc=tf.zeros(1), scale=tf.ones(1))

  student_etas = Normal(loc=0.0, scale=sigma_students,
                        sample_shape=FLAGS.n_students)
  question_etas = Normal(loc=0.0, scale=sigma_questions,
                         sample_shape=FLAGS.n_questions)

  observation_logodds = (tf.gather(student_etas, student_ids) +
                         tf.gather(question_etas, question_ids) +
                         overall_mu)
  outcomes = Bernoulli(logits=observation_logodds)

  # INFERENCE
  qstudents = Normal(
      loc=tf.get_variable("qstudents/loc", [FLAGS.n_students]),
      scale=tf.nn.softplus(
          tf.get_variable("qstudents/scale", [FLAGS.n_students])))
  qquestions = Normal(
      loc=tf.get_variable("qquestions/loc", [FLAGS.n_questions]),
      scale=tf.nn.softplus(
          tf.get_variable("qquestions/scale", [FLAGS.n_questions])))
  qlnvarstudents = Normal(
      loc=tf.get_variable("qlnvarstudents/loc", []),
      scale=tf.nn.softplus(
          tf.get_variable("qlnvarstudents/scale", [])))
  qlnvarquestions = Normal(
      loc=tf.get_variable("qlnvarquestions/loc", []),
      scale=tf.nn.softplus(
          tf.get_variable("qlnvarquestions/scale", [])))
  qmu = Normal(
      loc=tf.get_variable("qmu/loc", [1]),
      scale=tf.nn.softplus(
          tf.get_variable("qmu/scale", [1])))

  latent_vars = {
      overall_mu: qmu,
      lnvar_students: qlnvarstudents,
      lnvar_questions: qlnvarquestions,
      student_etas: qstudents,
      question_etas: qquestions
  }
  data = {outcomes: obs}
  inference = ed.KLqp(latent_vars, data)
  inference.initialize(n_print=2, n_iter=50)

  qstudents_mean = qstudents.mean()
  qquestions_mean = qquestions.mean()

  tf.global_variables_initializer().run()

  f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
  ax1.set_ylim([-3.0, 3.0])
  ax2.set_ylim([-3.0, 3.0])
  ax1.set_xlim([-3.0, 3.0])
  ax2.set_xlim([-3.0, 3.0])

  for t in range(inference.n_iter):
    info_dict = inference.update()
    inference.print_progress(info_dict)

    if t % inference.n_print == 0:
      # CRITICISM
      ax1.clear()
      ax2.clear()
      ax1.set_ylim([-3.0, 3.0])
      ax2.set_ylim([-3.0, 3.0])
      ax1.set_xlim([-3.0, 3.0])
      ax2.set_xlim([-3.0, 3.0])

      ax1.set_title('Student Intercepts')
      ax2.set_title('Question Intercepts')
      ax1.set_xlabel('True Student Random Intercepts')
      ax1.set_ylabel('Estimated Student Random Intercepts')
      ax2.set_xlabel('True Question Random Intercepts')
      ax2.set_ylabel('Estimated Question Random Intercepts')

      ax1.scatter(true_s_etas, qstudents_mean.eval(), s=0.05)
      ax2.scatter(true_q_etas, qquestions_mean.eval(), s=0.05)
      plt.draw()
      plt.pause(2.0 / 60.0)
示例#31
0
                        tf.Variable(tf.random_normal([n_dept]))))

latent_vars = {eta_s: q_eta_s, eta_d: q_eta_d, eta_dept: q_eta_dept}
data = {
    y: y_train,
    s_ph: s_train,
    d_ph: d_train,
    dept_ph: dept_train,
    service_ph: service_train
}
inference = ed.KLqp(latent_vars, data)

# COMMAND ----------

yhat_test = ed.copy(yhat, {
    eta_s: q_eta_s.mean(),
    eta_d: q_eta_d.mean(),
    eta_dept: q_eta_dept.mean()
})

# COMMAND ----------

inference.initialize(n_print=2000, n_iter=10000)
tf.global_variables_initializer().run()

for _ in range(inference.n_iter):
    # Update and print progress of algorithm.
    info_dict = inference.update()
    inference.print_progress(info_dict)

    t = info_dict['t']
示例#32
0
文件: iwvi.py 项目: ekostem/edward
    grads = tf.gradients(loss, [v._ref() for v in var_list])
    grads_and_vars = list(zip(grads, var_list))
    return loss, grads_and_vars


ed.set_seed(42)
N = 5000  # number of data points
D = 10  # number of features

# DATA
w_true = np.random.randn(D)
X_data = np.random.randn(N, D)
p = expit(np.dot(X_data, w_true))
y_data = np.array([np.random.binomial(1, i) for i in p])

# MODEL
X = tf.placeholder(tf.float32, [N, D])
w = Normal(loc=tf.zeros(D), scale=tf.ones(D))
y = Bernoulli(logits=ed.dot(X, w))

# INFERENCE
qw = Normal(loc=tf.Variable(tf.random_normal([D])),
            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))

inference = IWVI({w: qw}, data={X: X_data, y: y_data})
inference.run(K=5, n_iter=1000)

# CRITICISM
print("Mean squared error in true values to inferred posterior mean:")
print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
示例#33
0
ed.set_seed(142)

N = 5000  # number of data points
D = 2  # data dimensionality
K = 1  # latent dimensionality

# DATA

x_train = build_toy_dataset(N, D, K)

# MODEL

w = Normal(mu=tf.zeros([D, K]), sigma=2.0 * tf.ones([D, K]))
z = Normal(mu=tf.zeros([N, K]), sigma=tf.ones([N, K]))
x = Normal(mu=tf.matmul(w, z, transpose_b=True), sigma=tf.ones([D, N]))

# INFERENCE

qw = Normal(mu=tf.Variable(tf.random_normal([D, K])),
            sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D, K]))))
qz = Normal(mu=tf.Variable(tf.random_normal([N, K])),
            sigma=tf.nn.softplus(tf.Variable(tf.random_normal([N, K]))))

inference = ed.KLqp({w: qw, z: qz}, data={x: x_train})
inference.run(n_iter=500, n_print=100, n_samples=10)

sess = ed.get_session()
print("Inferred principal axes:")
print(sess.run(qw.mean()))
示例#34
0
N = 40  # num data points
D = 1  # num features

ed.set_seed(42)
X_train, y_train = build_toy_dataset(N)
X_test, y_test = build_toy_dataset(N)

X = ed.placeholder(tf.float32, [N, D], name='X')
beta = Normal(mu=tf.zeros(D), sigma=tf.ones(D), name='beta')
y = Normal(mu=ed.dot(X, beta), sigma=tf.ones(N), name='y')

qmu_mu = tf.Variable(tf.random_normal([D]))
qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([D])))
qbeta = Normal(mu=qmu_mu, sigma=qmu_sigma, name='qbeta')

data = {X: X_train, y: y_train}
inference = ed.MFVI({beta: qbeta}, data)
inference.initialize(logdir='train')

sess = ed.get_session()
for t in range(501):
    _, loss = sess.run([inference.train, inference.loss], {X: data[X]})
    inference.print_progress(t, loss)

y_post = ed.copy(y, {beta: qbeta.mean()})
# This is equivalent to
# y_post = Normal(mu=ed.dot(X, qbeta.mean()), sigma=tf.ones(N))

print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))
示例#35
0
w = Normal(mu=tf.zeros(D), sigma=tf.ones(D))
b = Normal(mu=tf.zeros(1), sigma=tf.ones(1))
y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N))

# INFERENCE
qw = Normal(mu=tf.Variable(tf.random_normal([D])),
            sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))
qb = Normal(mu=tf.Variable(tf.random_normal([1])),
            sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

data = {X: X_train, y: y_train}
inference = ed.KLqp({w: qw, b: qb}, data)
inference.run()

# CRITICISM
y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()})
# This is equivalent to
# y_post = Normal(mu=ed.dot(X, qw.mean()) + qb.mean(), sigma=tf.ones(N))

print("Mean squared error on test data:")
print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))

print("Displaying prior predictive samples.")
n_prior_samples = 10

w_prior = w.sample(n_prior_samples).eval()
b_prior = b.sample(n_prior_samples).eval()

plt.scatter(X_train, y_train)

inputs = np.linspace(-1, 10, num=400, dtype=np.float32)
示例#36
0
# DATA
nsubj = 200
nitem = 25
trait_true = np.random.normal(size=[nsubj, 1])
thresh_true = np.random.normal(size=[1, nitem])
X_data = np.random.binomial(1, expit(trait_true - thresh_true))

# MODEL
trait = Normal(mu=tf.zeros([nsubj, 1]), sigma=tf.ones([nsubj, 1]))
thresh = Normal(mu=tf.zeros([1, nitem]), sigma=tf.ones([1, nitem]))
X = Bernoulli(logits=trait - thresh)

# INFERENCE
q_trait = Normal(mu=tf.Variable(tf.random_normal([nsubj, 1])),
                 sigma=tf.nn.softplus(tf.Variable(tf.random_normal([nsubj,
                                                                    1]))))
q_thresh = Normal(mu=tf.Variable(tf.random_normal([1, nitem])),
                  sigma=tf.nn.softplus(
                      tf.Variable(tf.random_normal([1, nitem]))))

inference = ed.KLqp({trait: q_trait, thresh: q_thresh}, data={X: X_data})
inference.run(n_iter=2500, n_samples=10)

# CRITICISM
# Check that the inferred posterior mean captures the true traits.
plt.scatter(trait_true, q_trait.mean().eval())
plt.show()

print("MSE between true traits and inferred posterior mean:")
print(np.mean(np.square(trait_true - q_trait.mean().eval())))
示例#37
0
        grads = tf.gradients(loss, [v._ref() for v in var_list])
        grads_and_vars = list(zip(grads, var_list))
        return loss, grads_and_vars


ed.set_seed(42)
N = 5000  # number of data points
D = 10  # number of features

# DATA
w_true = np.random.randn(D)
X_data = np.random.randn(N, D)
p = expit(np.dot(X_data, w_true))
y_data = np.array([np.random.binomial(1, i) for i in p])

# MODEL
X = tf.placeholder(tf.float32, [N, D])
w = Normal(loc=tf.zeros(D), scale=tf.ones(D))
y = Bernoulli(logits=ed.dot(X, w))

# INFERENCE
qw = Normal(loc=tf.Variable(tf.random_normal([D])),
            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))

inference = IWVI({w: qw}, data={X: X_data, y: y_data})
inference.run(K=5, n_iter=1000)

# CRITICISM
print("Mean squared error in true values to inferred posterior mean:")
print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
示例#38
0
#print("mu: ", qmu.value().eval())
#print("beta:\n", qbeta.value().eval())

Cb, Sb, taub = map_MOU(X.T, verbose=2)
Cb[np.eye(d, dtype=bool)] = taub

# VI
print("setting up variational distributions")
qmu = Normal(loc=tf.Variable(tf.random_normal([d])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([d]))))
qbeta = Normal(loc=tf.Variable(tf.random_normal([d, d])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([d,d]))))
print("constructing inference object")
%time inference_vb = ed.KLqp({beta: qbeta, mu: qmu}, data={xt: xt_true for xt, xt_true in zip(x, X)})
print("running VB inference")
inference_vb.run()

Cvb = qbeta.mean().eval()

pp = qbeta.cdf(0.).eval()
Cvb_filt = Cvb.copy()
Cvb_filt[pp<0.05] = 0
off_diag_mask = [np.logical_not(np.eye(d, dtype=bool))]
print(pearsonr(C[off_diag_mask], Cvb[off_diag_mask]))
print(pearsonr(C[off_diag_mask], Cvb_filt[off_diag_mask]))
plt.figure()
plt.subplot(121)
plt.scatter(C[off_diag_mask], Cvb[off_diag_mask])
plt.subplot(122)
plt.scatter(C[off_diag_mask], Cvb_filt[off_diag_mask])
plt.figure()
plt.subplot(131)
sns.heatmap(C)
示例#39
0
def learn_separated(args, train_set, test_set, anlysis_flag=False):

    # Parameters
    n_hidd = 1000  # number of hidden units per layer
    n_epoch = args.n_epoch
    learning_rate = 0.001
    batch_size = 128

    hidden_layer = get_fc_layer_fn(l2_reg_scale=1e-4, depth=1)
    out_layer = get_fc_layer_fn(l2_reg_scale=1e-4)

    x_train, t_train, y_train = train_set['X'], train_set['T'], train_set['Y']

    n_train = x_train.shape[0]
    x_dim = x_train.shape[1]

    batch_size = min(batch_size, n_train)

    # ------ Define Graph ---------------------#
    tf.reset_default_graph()
    # ------ Define Inputs ---------------------#
    # define placeholder which will receive data batches
    x_ph = tf.placeholder(tf.float32, [None, x_dim])
    t_ph = tf.placeholder(tf.float32, [None, 1])
    y_ph = tf.placeholder(tf.float32, [None, 1])

    n_ph = tf.shape(x_ph)[0]  # number of samples fed to placeholders

    # ------ Define generative model /decoder-----------------------#

    if anlysis_flag:
        z_t_dim = 1
        z_y_dim = 1
    else:
        # z_x_dim = 1
        z_t_dim = 2
        z_y_dim = 3
    # latent_dims = (z_x_dim, z_t_dim, z_y_dim)
    latent_dims = (z_t_dim, z_y_dim)
    # prior over latent variables:
    # p(zx) -
    # zx = Normal(loc=tf.zeros([n_ph, z_x_dim]), scale=tf.ones([n_ph, z_x_dim]))
    # p(zt) -
    zt = Normal(loc=tf.zeros([n_ph, z_t_dim]), scale=tf.ones([n_ph, z_t_dim]))
    # p(zy) -
    zy = Normal(loc=tf.zeros([n_ph, z_y_dim]), scale=tf.ones([n_ph, z_y_dim]))
    z = tf.concat([zt, zy], axis=1)

    # p(x|z) - likelihood of proxy X
    # z = tf.concat([zx, zt, zy], axis=1)

    hidden = hidden_layer(z, n_hidd, tf.nn.elu)
    x = Normal(loc=out_layer(hidden, x_dim, None),
               scale=out_layer(hidden, x_dim, tf.nn.softplus),
               name='gaussian_px_z')

    # p(t|zt)
    if args.model_type == 'separated_with_confounder':
        hidden = hidden_layer(z, n_hidd, tf.nn.elu)
    else:
        hidden = hidden_layer(zt, n_hidd, tf.nn.elu)
    probs = out_layer(hidden, 1, tf.nn.sigmoid)  # output in [0,1]
    t = Bernoulli(probs=probs, dtype=tf.float32, name='bernoulli_pt_z')

    # p(y|t,zy)
    hidden = hidden_layer(zy, n_hidd, tf.nn.elu)  # shared hidden layer
    mu_y_t0 = out_layer(hidden, 1, None)
    mu_y_t1 = out_layer(hidden, 1, None)
    # y = Normal(loc=t * mu_y_t1 + (1. - t) * mu_y_t0, scale=tf.ones_like(mu_y_t0))
    sigma_y_t0 = out_layer(hidden, 1, tf.nn.softplus)
    sigma_y_t1 = out_layer(hidden, 1, tf.nn.softplus)
    y = Normal(loc=t * mu_y_t1 + (1. - t) * mu_y_t0,
               scale=t * sigma_y_t1 + (1. - t) * sigma_y_t0)

    # ------ Define inference model - CEVAE variational approximation (encoder)

    # q(t|x)
    hqt = hidden_layer(x_ph, n_hidd, tf.nn.elu)
    probs_t = out_layer(hqt, 1, tf.nn.sigmoid)  # output in [0,1]
    qt = Bernoulli(probs=probs_t, dtype=tf.float32)

    # q(y|x,t)
    hqy = hidden_layer(x_ph, n_hidd, tf.nn.elu)  # shared hidden layer
    mu_qy_t0 = out_layer(hqy, 1, None)
    mu_qy_t1 = out_layer(hqy, 1, tf.nn.elu)
    sigma_qy_t1 = out_layer(hqy, 1, tf.nn.softplus)
    sigma_qy_t0 = out_layer(hqy, 1, tf.nn.softplus)
    # qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0, scale=tf.ones_like(mu_qy_t0))
    qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0,
                scale=qt * sigma_qy_t1 + (1. - qt) * sigma_qy_t0)

    # # q(z_x|x,t,y)
    # inpt2 = tf.concat([x_ph, qy], axis=1)
    # hqz = hidden_layer(inpt2, n_hidd, tf.nn.elu)  # shared hidden layer
    # muq_t0 = out_layer(hqz, z_x_dim, None)
    # sigmaq_t0 = out_layer(hqz, z_x_dim, tf.nn.softplus)
    # muq_t1 = out_layer(hqz, z_x_dim, None)
    # sigmaq_t1 = out_layer(hqz, z_x_dim, tf.nn.softplus)
    # qzx = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0,
    #             scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

    # shared hidden layer
    inpt2 = tf.concat([x_ph, qy], axis=1)
    hqz = out_layer(inpt2, n_hidd, tf.nn.elu)

    # q(zt|x,t,y)
    muq_t0 = out_layer(hqz, z_t_dim, None)
    sigmaq_t0 = out_layer(hqz, z_t_dim, tf.nn.softplus)
    muq_t1 = out_layer(hqz, z_t_dim, None)
    sigmaq_t1 = out_layer(hqz, z_t_dim, tf.nn.softplus)
    qzt = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0,
                 scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

    # q(zy|x,t,y)
    # inpt2 = tf.concat([x_ph, qy], axis=1)
    # hqz = hidden_layer(inpt2, n_hidd, tf.nn.elu)  # shared hidden layer
    muq_t0 = out_layer(hqz, z_y_dim, None)
    sigmaq_t0 = out_layer(hqz, z_y_dim, tf.nn.softplus)
    muq_t1 = out_layer(hqz, z_y_dim, None)
    sigmaq_t1 = out_layer(hqz, z_y_dim, tf.nn.softplus)
    qzy = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0,
                 scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

    # end graph def

    # ------ Criticism / evaluation graph:
    zy_learned = ed.copy(qzy, {x: x_ph})
    zt_learned = ed.copy(qzt, {x: x_ph})

    # sample posterior predictive for p(y|z_y,t)
    y_post = ed.copy(y, {zy: qzy, t: t_ph}, scope='y_post')
    # crude approximation of the above
    y_post_mean = ed.copy(y, {zy: qzy.mean(), t: t_ph}, scope='y_post_mean')

    # ------ Training -  Run variational inference

    # Create data dictionary for edward
    data = {x: x_ph, y: y_ph, qt: t_ph, t: t_ph, qy: y_ph}

    batch_size = min(batch_size, n_train)
    n_iter_per_epoch = n_train // batch_size

    inference = ed.KLqp({zt: qzt, zy: qzy}, data=data)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    data_scaling = n_train / batch_size  # to scale likelihood againt prior
    inference.initialize(optimizer=optimizer,
                         n_samples=5,
                         n_iter=n_iter_per_epoch * n_epoch,
                         scale={
                             x: data_scaling,
                             t: data_scaling,
                             y: data_scaling
                         })

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        for epoch in range(n_epoch):
            train_generator = batch_generator(np.random.permutation(n_train),
                                              batch_size)
            avg_loss = 0.0
            for j in range(n_iter_per_epoch):
                # Take batch:
                idx = next(train_generator)
                x_b, t_b, y_b = x_train[idx], t_train[idx], y_train[idx]
                info_dict = inference.update(feed_dict={
                    x_ph: x_b,
                    t_ph: t_b,
                    y_ph: y_b
                })
                inference.print_progress(info_dict)
                avg_loss += info_dict['loss']
            avg_loss = avg_loss / n_iter_per_epoch
            avg_loss = avg_loss / batch_size
            # print('Epoch {}, avg loss {}'.format(epoch, avg_loss))

        # ------ Evaluation -
        x_test = test_set['X']
        H_test = test_set['H']

        z_y_test = sess.run(zy_learned.mean(), feed_dict={x_ph: x_test})
        z_t_test = sess.run(zt_learned.mean(), feed_dict={x_ph: x_test})
        z_y_train = sess.run(zy_learned.mean(), feed_dict={x_ph: x_train})

        if args.show_plots:
            treat_probs = sess.run(qt.mean(), feed_dict={x_ph: x_test})
            plt.scatter(z_t_test.flatten(),
                        treat_probs.flatten(),
                        label='Estimated Treatment Probability')
            plt.legend()
            plt.xlabel(r'$z_t$')
            plt.ylabel('Probability')
            plt.show()

            # plt.scatter(x_test[:, 1].flatten(), z_y_test.flatten())
            # plt.xlabel('X_1')
            # plt.ylabel('z_y')
            # plt.show()
            #
            plt.scatter(H_test.flatten(), z_y_test.flatten())
            plt.xlabel('H')
            plt.ylabel(r'$z_y$', fontsize=16)
            plt.show()

            plt.scatter(test_set['W'].flatten(), z_t_test.flatten())
            plt.xlabel('W')
            plt.ylabel(r'$z_t$')
            plt.show()

        # CATE estimation:
        if args.estimation_type == 'approx_posterior':
            forced_t = np.ones((args.n_test, 1))
            est_y0 = sess.run(y_post.mean(),
                              feed_dict={
                                  x_ph: x_test,
                                  t_ph: 0 * forced_t
                              })
            est_y1 = sess.run(y_post.mean(),
                              feed_dict={
                                  x_ph: x_test,
                                  t_ph: forced_t
                              })
            # std_y1 = sess.run(y_post.stddev(), feed_dict={x_ph: x_test, t_ph: forced_t})
        elif args.estimation_type == 'latent_matching':
            est_y0, est_y1 = matching_estimate(z_y_train, t_train, y_train,
                                               z_y_test, args.n_neighbours)
        else:
            raise ValueError('Unrecognised estimation_type')

        return evalaute_effect_estimate(
            est_y0,
            est_y1,
            test_set,
            args,
            model_name='Separated CEVAE - Latent dims:  ' + str(latent_dims),
            estimation_type=args.estimation_type)
D = 10  # number of features

# DATA
coeff = np.random.randn(D)
X_train, y_train = build_toy_dataset(N, coeff)
X_test, y_test = build_toy_dataset(N, coeff)

# MODEL
X = tf.placeholder(tf.float32, [N, D])
w = Normal(mu=tf.zeros(D), sigma=tf.ones(D))
b = Normal(mu=tf.zeros(1), sigma=tf.ones(1))
y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N))

# INFERENCE
qw = Normal(mu=tf.Variable(tf.random_normal([D])),
            sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))
qb = Normal(mu=tf.Variable(tf.random_normal([1])),
            sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

data = {X: X_train, y: y_train}
inference = ed.KLqp({w: qw, b: qb}, data)
inference.run(n_samples=5, n_iter=250)

# CRITICISM
y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()})
# This is equivalent to
# y_post = Normal(mu=ed.dot(X, qw.mean()) + qb.mean(), sigma=tf.ones(N))

print("Mean squared error on test data:")
print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test}))
示例#41
0
        muq_t0, sigmaq_t0 = fullyConnect_net(x_ph, [h],
                                             [[d, None], [d, tf.nn.softplus]],
                                             'qz_xt0',
                                             lamba=lamba,
                                             activation=activation)

        qz = Normal(loc=muq_t0, scale=sigmaq_t0)

        # Sampling posterior predictive from p(y|z,t)
        y_post = ed.copy(y, {z: qz, t: t_ph}, scope='y_post')
        t_post = ed.copy(t, {z: qz, y: y_ph}, scope='t_post')

        # for early stopping according to a validation set
        y_post_eval = ed.copy(y, {
            z: qz.mean(),
            y: y_ph,
            t: t_ph
        },
                              scope='y_post_eval')

        t_post_eval = ed.copy(t, {z: qz.mean(), y: y_ph}, scope='t_post_eval')

        log_valid = tf.reduce_mean(
            tf.reduce_sum(y_post_eval.log_prob(y_ph) +
                          t_post_eval.log_prob(t_ph),
                          axis=1) +
            tf.reduce_sum(z.log_prob(qz.mean()) - qz.log_prob(qz.mean()),
                          axis=1))

        tf.global_variables_initializer().run()
qw = Normal(loc=tf.Variable(tf.random_normal([D]) + 1.0),
            scale=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))

inference = ed.ImplicitKLqp(
    {w: qw}, data={y: y_ph},
    discriminator=ratio_estimator, global_vars={w: qw})
inference.initialize(n_iter=5000, n_print=100, scale={y: float(N) / M})

sess = ed.get_session()
tf.global_variables_initializer().run()

for _ in range(inference.n_iter):
  X_batch, y_batch = next(data)
  for _ in range(5):
    info_dict_d = inference.update(
        variables="Disc", feed_dict={X: X_batch, y_ph: y_batch})

  info_dict = inference.update(
      variables="Gen", feed_dict={X: X_batch, y_ph: y_batch})
  info_dict['loss_d'] = info_dict_d['loss_d']
  info_dict['t'] = info_dict['t'] // 6  # say set of 6 updates is 1 iteration

  t = info_dict['t']
  inference.print_progress(info_dict)
  if t == 1 or t % inference.n_print == 0:
    # Check inferred posterior parameters.
    mean, std = sess.run([qw.mean(), qw.stddev()])
    print("\nInferred mean & std:")
    print(mean)
    print(std)