def lstm_cell_1(x, h, c, name=None, reuse=False): """LSTM returning hidden state and content cell at a specific timestep.""" nin = x.shape[-1].value nout = h.shape[-1].value with tf.variable_scope(name, default_name="lstm_1", values=[x, h, c], reuse=reuse): wx = get_variable_wrap("kernel/input", [nin, nout * 4], dtype=tf.float32, initializer=tf.orthogonal_initializer(1.0)) wh = get_variable_wrap("kernel/hidden", [nout, nout * 4], dtype=tf.float32, initializer=tf.orthogonal_initializer(1.0)) b = get_variable_wrap("bias", [nout * 4], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) z = ed.dot(x, wx) + ed.dot(h, wh) + b i, f, o, u = tf.split(z, 4, axis=0) i = tf.sigmoid(i) f = tf.sigmoid(f + 1.0) o = tf.sigmoid(o) u = tf.tanh(u) c = f * c + i * u h = o * tf.tanh(c) return h, c
def main(_): ed.set_seed(42) N = 5000 # number of data points D = 10 # number of features # DATA w_true = np.random.randn(D) X_data = np.random.randn(N, D) p = expit(np.dot(X_data, w_true)) y_data = np.array([np.random.binomial(1, i) for i in p]) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) y = Bernoulli(logits=ed.dot(X, w)) # INFERENCE qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) inference = IWVI({w: qw}, data={X: X_data, y: y_data}) inference.run(K=5, n_iter=1000) # CRITICISM print("Mean squared error in true values to inferred posterior mean:") print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
def bayesian_linear_regression(): # underlying model params N = 5000 # number of data points D = 100 # number of features noise_std = .1 # Generate simulated data w_true = np.random.randn(D) X_train, y_train = build_lin_reg_toy_dataset(N, w_true, noise_std) X_test, y_test = build_lin_reg_toy_dataset(N, w_true, noise_std) # Set up edward model X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) log_sd = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.exp(log_sd)) # Inference in edward qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) qb = Normal(loc=tf.get_variable("qb/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qb/scale", [1]))) qlog_sd = Normal(loc=tf.get_variable("qlog_sd/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qlog_sd/scale", [1]))) inference = ed.KLqp({w: qw, b: qb, log_sd: qlog_sd}, data={X: X_train, y: y_train}) inference.run(n_iter=1000) pdb.set_trace()
def main(): X_train, y_train, X_test, y_test, train_filenames, test_filenames = prepare_scutfbp5500( feat_layers=["conv4_1", "conv5_1"]) print('Shape of X_train: {0}'.format(X_train)) print('Shape of X_test: {0}'.format(X_test)) print('Shape of y_train: {0}'.format(y_train)) print('Shape of y_test: {0}'.format(y_test)) N = 3300 D = len(X_train[0]) X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N)) qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) qb = Normal(loc=tf.get_variable("qb/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qb/scale", [1]))) inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(n_samples=3300, n_iter=250) y_post = ed.copy(y, {w: qw, b: qb}) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Mean absolute error on test data:") print(ed.evaluate('mean_absolute_error', data={X: X_test, y_post: y_test}))
def log_prob(self, xs, zs): x, y = xs['x'], xs['y'] w, b = zs['w'], zs['b'] log_prior = tf.reduce_sum(norm.logpdf(w, 0.0, self.prior_std)) log_prior += tf.reduce_sum(norm.logpdf(b, 0.0, self.prior_std)) log_lik = tf.reduce_sum(norm.logpdf(y, ed.dot(x, w) + b, self.lik_std)) return log_lik + log_prior
def log_prob(self, xs, zs): x, y = xs['x'], xs['y'] w, b = zs['w'], zs['b'] log_prior = tf.reduce_sum(norm.logpdf(w, 0.0, self.prior_std)) log_prior += tf.reduce_sum(norm.logpdf(b, 0.0, self.prior_std)) log_lik = tf.reduce_sum(bernoulli.logpmf(y, p=self.inv_link(ed.dot(x, w) + b))) return log_lik + log_prior
def log_prob(self, xs, zs): """Return scalar, the log joint density log p(xs, zs).""" x, y = xs['x'], xs['y'] w, b = zs['w'], zs['b'] log_prior = tf.reduce_sum(norm.logpdf(w, 0.0, self.prior_std)) log_prior += tf.reduce_sum(norm.logpdf(b, 0.0, self.prior_std)) log_lik = tf.reduce_sum(norm.logpdf(y, ed.dot(x, w) + b, self.lik_std)) return log_lik + log_prior
def log_prob(self, xs, zs): x, y = xs['x'], xs['y'] w, b = zs['w'], zs['b'] log_prior = tf.reduce_sum(norm.logpdf(w, 0.0, self.prior_std)) log_prior += tf.reduce_sum(norm.logpdf(b, 0.0, self.prior_std)) log_lik = tf.reduce_sum( bernoulli.logpmf(y, p=self.inv_link(ed.dot(x, w) + b))) return log_lik + log_prior
def _test_linear_regression(self, default, dtype): def build_toy_dataset(N, w, noise_std=0.1): D = len(w) x = np.random.randn(N, D) y = np.dot(x, w) + np.random.normal(0, noise_std, size=N) return x, y with self.test_session() as sess: N = 40 # number of data points D = 10 # number of features w_true = np.random.randn(D) X_train, y_train = build_toy_dataset(N, w_true) X_test, y_test = build_toy_dataset(N, w_true) X = tf.placeholder(dtype, [N, D]) w = Normal(loc=tf.zeros(D, dtype=dtype), scale=tf.ones(D, dtype=dtype)) b = Normal(loc=tf.zeros(1, dtype=dtype), scale=tf.ones(1, dtype=dtype)) y = Normal(loc=ed.dot(X, w) + b, scale=0.1 * tf.ones(N, dtype=dtype)) n_samples = 2000 if not default: qw = Empirical( tf.Variable(tf.zeros([n_samples, D], dtype=dtype))) qb = Empirical( tf.Variable(tf.zeros([n_samples, 1], dtype=dtype))) inference = ed.SGLD({ w: qw, b: qb }, data={ X: X_train, y: y_train }) else: inference = ed.SGLD([w, b], data={X: X_train, y: y_train}) qw = inference.latent_vars[w] qb = inference.latent_vars[b] inference.run(step_size=0.001) self.assertAllClose(qw.mean().eval(), w_true, rtol=5e-1, atol=5e-1) self.assertAllClose(qb.mean().eval(), [0.0], rtol=5e-1, atol=5e-1) old_t, old_n_accept = sess.run([inference.t, inference.n_accept]) if not default: self.assertEqual(old_t, n_samples) else: self.assertEqual(old_t, 1e4) self.assertGreater(old_n_accept, 0.1) sess.run(inference.reset) new_t, new_n_accept = sess.run([inference.t, inference.n_accept]) self.assertEqual(new_t, 0) self.assertEqual(new_n_accept, 0)
def train(self, n_iter=1000): D = len(self.team_num_map.keys()) N = self.xs.shape[0] with tf.name_scope('model'): self.X = tf.placeholder(tf.float32, [N, D]) self.w1 = Normal(loc=tf.zeros(D), scale=tf.ones(D)) # self.b1 = Normal(loc=tf.zeros(1), scale=tf.ones(1)) self.y1 = Poisson(rate=tf.exp(ed.dot(self.X, self.w1))) with tf.name_scope('posterior'): if self.inf_type == 'Var': self.qw1 = Normal(loc=tf.get_variable("qw1_ll/loc", [D]), scale=tf.nn.softplus( tf.get_variable("qw1_ll/scale", [D]))) # self.qb1 = Normal(loc=tf.get_variable("qb1/loc", [1]), # scale=tf.nn.softplus(tf.get_variable("qb1/scale", # [1]))) elif self.inf_type == 'MAP': self.qw1 = PointMass( Normal(loc=tf.get_variable("qw1_ll/loc", [D]), scale=tf.nn.softplus( tf.get_variable("qw1_ll/scale", [D])))) if self.inf_type == 'Var': inference = ed.ReparameterizationKLqp({self.w1: self.qw1}, data={ self.X: self.xs, self.y1: self.ys }) elif self.inf_type == 'MAP': inference = ed.MAP({self.w1: self.qw1}, data={ self.X: self.xs, self.y1: self.ys }) inference.initialize(optimizer=tf.train.AdamOptimizer( learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08), n_iter=n_iter) tf.global_variables_initializer().run() self.loss = np.empty(n_iter, dtype=np.float32) for i in range(n_iter): info_dict = inference.update() self.loss[i] = info_dict["loss"] inference.print_progress(info_dict) self._trained = True graph = tf.get_default_graph() self.team_skill = graph.get_tensor_by_name("qw1_ll/loc:0").eval() self.perf_variance = graph.get_tensor_by_name("qw1_ll/scale:0").eval() # self.bias = (graph.get_tensor_by_name("qb1/loc:0").eval(), # graph.get_tensor_by_name("qb2/loc:0").eval()) self.y_post = ed.copy(self.y1, {self.w1: self.qw1}) return
def encode_z(hprev, L, name=None, reuse=False): # input: hprev should change to [#batch, dim] #hprev = tf.expand_dims(hprev, 0) #hidden_dim = 15 #with tf.variable_scope("prior"): # prior = fc_act(hprev, hidden_dim, act=tf.nn.relu, name="fc_prior") #with tf.variable_scope("prior_mu"): # prior_mu = fc_act(prior, L, name="fc_prior_mu") #with tf.variable_scope("prior_sigma"): # prior_sigma = fc_act(prior, L, act=tf.nn.softplus, name="fc_prior_sigma") #zt = Normal(loc=tf.squeeze(prior_mu, 0), scale = tf.squeeze(prior_sigma, 0)) #AR1 cell using difussion process: z_t = z_t-1 + eta #zt = Normal(hprev, 0.1) # NN for encoding ht -> mu_zt, sigma_zt H = hprev.shape[0] with tf.variable_scope(name, default_name="encode_z", reuse=reuse): Whz_mean = get_variable_wrap("Wmean", [H, L], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) bhz_mean = get_variable_wrap("bmean", [L], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) Whz_cov = get_variable_wrap("Wvar", [H, L], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) bhz_cov = get_variable_wrap("bvar", [L], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) #Whz_mean = tf.Variable(np.zeros([H, L]), dtype=tf.float32) #bhz_mean = tf.Variable(np.zeros(L), dtype=tf.float32) #Whz_cov = tf.Variable(np.zeros([H, L]), dtype=tf.float32) #bhz_cov = tf.Variable(np.zeros(L), dtype=tf.float32) zt = Normal(loc=ed.dot(hprev, Whz_mean) + bhz_mean, scale=tf.nn.softplus(ed.dot(hprev, Whz_cov) + bhz_cov)) return zt
def rnn_cell(hprev, zt, name=None, reuse=False): """basic RNN returning next hidden state at a specific timestep.""" nin = zt.shape[-1].value nout = hprev.shape[-1].value with tf.variable_scope(name, default_name="rnn", values=[hprev, zt], reuse=reuse): wz = get_variable_wrap("kernel/input", [nin, nout], dtype=tf.float32, initializer=tf.random_normal_initializer( 0, 0.01)) wh = get_variable_wrap("kernel/hidden", [nout, nout], dtype=tf.float32, initializer=tf.random_normal_initializer( 0, 0.01)) bh = get_variable_wrap("bias", [nout], dtype=tf.float32, initializer=tf.random_normal_initializer( 0, 0.01)) return tf.tanh(ed.dot(hprev, wh) + ed.dot(zt, wz) + bh)
def _test_linear_regression(self, default, dtype): def build_toy_dataset(N, w, noise_std=0.1): D = len(w) x = np.random.randn(N, D) y = np.dot(x, w) + np.random.normal(0, noise_std, size=N) return x, y with self.test_session() as sess: N = 40 # number of data points D = 10 # number of features w_true = np.random.randn(D) X_train, y_train = build_toy_dataset(N, w_true) X_test, y_test = build_toy_dataset(N, w_true) X = tf.placeholder(dtype, [N, D]) w = Normal(loc=tf.zeros(D, dtype=dtype), scale=tf.ones(D, dtype=dtype)) b = Normal(loc=tf.zeros(1, dtype=dtype), scale=tf.ones(1, dtype=dtype)) y = Normal(loc=ed.dot(X, w) + b, scale=0.1 * tf.ones(N, dtype=dtype)) proposal_w = Normal(loc=w, scale=0.5 * tf.ones(D, dtype=dtype)) proposal_b = Normal(loc=b, scale=0.5 * tf.ones(1, dtype=dtype)) n_samples = 2000 if not default: qw = Empirical(tf.Variable(tf.zeros([n_samples, D], dtype=dtype))) qb = Empirical(tf.Variable(tf.zeros([n_samples, 1], dtype=dtype))) inference = ed.ReplicaExchangeMC( {w: qw, b: qb}, {w: proposal_w, b: proposal_b}, data={X: X_train, y: y_train}) else: inference = ed.ReplicaExchangeMC( [w, b], {w: proposal_w, b: proposal_b}, data={X: X_train, y: y_train}) qw = inference.latent_vars[w] qb = inference.latent_vars[b] inference.run() self.assertAllClose(qw.mean().eval(), w_true, rtol=5e-1, atol=5e-1) self.assertAllClose(qb.mean().eval(), [0.0], rtol=5e-1, atol=5e-1) old_t, old_n_accept = sess.run([inference.t, inference.n_accept]) if not default: self.assertEqual(old_t, n_samples) else: self.assertEqual(old_t, 1e4) self.assertGreater(old_n_accept, 0.1) sess.run(inference.reset) new_t, new_n_accept = sess.run([inference.t, inference.n_accept]) self.assertEqual(new_t, 0) self.assertEqual(new_n_accept, 0)
def _setup(self): N = 250 # number of data points D = 5 # number of features # DATA w_true = np.ones(D) * 5.0 X_train, y_train = build_toy_dataset(N, w_true) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) b = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N)) return N, D, w_true, X_train, y_train, X, w, b, y
def _setup(self): N = 250 # number of data points D = 5 # number of features # DATA w_true = np.ones(D) * 5.0 X_train, y_train = build_toy_dataset(N, w_true) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N)) return N, D, w_true, X_train, y_train, X, w, b, y
def train(self, n_iter=1000): D = len(self.team_num_map.keys()) N = self.xs.shape[0] with tf.name_scope('model'): self.X = tf.placeholder(tf.float32, [N, D]) self.w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) self.b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) self.y = Normal(loc=ed.dot(self.X, self.w) + self.b, scale=tf.ones(N)) with tf.name_scope('posterior'): self.qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus( tf.get_variable("qw/scale", [D]))) self.qb = Normal(loc=tf.get_variable("qb/loc", [1]), scale=tf.nn.softplus( tf.get_variable("qb/scale", [1]))) inference = ed.ReparameterizationKLqp( { self.w: self.qw, self.b: self.qb }, data={ self.X: self.xs, self.y: self.ys }) inference.initialize(optimizer=tf.train.AdamOptimizer( learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08), n_iter=n_iter) tf.global_variables_initializer().run() # inference.run() self.loss = np.empty(n_iter, dtype=np.float32) for i in range(n_iter): info_dict = inference.update() self.loss[i] = info_dict["loss"] inference.print_progress(info_dict) self._trained = True graph = tf.get_default_graph() self.team_skill = graph.get_tensor_by_name("qw/loc:0").eval() self.bias = graph.get_tensor_by_name("qb/loc:0").eval() self.y_post = ed.copy(self.y, {self.w: self.qw, self.b: self.qb}) return
def main(_): ed.set_seed(FLAGS.seed) ((Xtrain, ytrain), (Xtest, ytest)) = blr_utils.get_data() N, D = Xtrain.shape N_test, D_test = Xtest.shape weights, q_components = [], [] g = tf.Graph() with g.as_default(): tf.set_random_seed(FLAGS.seed) sess = tf.InteractiveSession() with sess.as_default(): # MODEL w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D)) X = tf.placeholder(tf.float32, [N, D]) y = Bernoulli(logits=ed.dot(X, w)) X_test = tf.placeholder( tf.float32, [N_test, D_test ]) # TODO why are these test variables necessary? y_test = Bernoulli(logits=ed.dot(X_test, w)) iter = 42 # TODO qw = construct_multivariatenormaldiag([D], iter, 'qw') inference = ed.KLqp({w: qw}, data={X: Xtrain, y: ytrain}) tf.global_variables_initializer().run() inference.run(n_iter=FLAGS.LMO_iter) x_post = ed.copy(y, {w: qw}) x_post_t = ed.copy(y_test, {w: qw}) print( 'log-likelihood train ', ed.evaluate('log_likelihood', data={ x_post: ytrain, X: Xtrain })) print( 'log-likelihood test ', ed.evaluate('log_likelihood', data={ x_post_t: ytest, X_test: Xtest })) print( 'binary_accuracy train ', ed.evaluate('binary_accuracy', data={ x_post: ytrain, X: Xtrain })) print( 'binary_accuracy test ', ed.evaluate('binary_accuracy', data={ x_post_t: ytest, X_test: Xtest }))
def get_prediction_tf(self, X, Z): w, s, b = Z['w'], Z['s'], Z['b'] return s * tf.tanh(ed.dot(X, w) + b)
D = 10 w_true = np.random.randn(D) X_train, y_train = build_toy_dataset(N, w_true) X_test, y_test = build_toy_dataset(N, w_true) # Bayesian regression: # p(w) ~ N(0, sigma_w^2 * I) # p(b) ~ N(0, sigma_b^2) # p(y|x,w,b) ~ N(x'w + b, sigma_y^2) # p(Y|x,w,b) ~ Prod[ N(x'w + b, sigma_y^2) ] # Build the compute graphs matching our Bayesian regression model: X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w), scale=tf.ones(N)) # Variational approximations - fully factorized w and b graphs: # This doesn't really count as an approximation, but whatever. qw = Normal(loc=tf.Variable(tf.random_normal([D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) # Run Kullback-Leibler divergence inference # Minimize KL[q(w) || p(w|X)] where # q(w) ~ Normal # p(w|X) = p(X|w)p(w) / p(X) inference = ed.KLqp({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(n_samples=5, n_iter=250)
def main(_): ed.set_seed(FLAGS.seed) # setting up output directory outdir = FLAGS.outdir if '~' in outdir: outdir = os.path.expanduser(outdir) os.makedirs(outdir, exist_ok=True) is_vector = FLAGS.base_dist in ['mvnormal', 'mvlaplace'] ((Xtrain, ytrain), (Xtest, ytest)) = blr_utils.get_data() N, D = Xtrain.shape N_test, D_test = Xtest.shape assert D_test == D, 'Test dimension %d different than train %d' % (D_test, D) logger.info('D = %d, Ntrain = %d, Ntest = %d' % (D, N, N_test)) # Solution components weights, q_params = [], [] # L-continous gradient estimate lipschitz_estimate = None # Metrics to log times_filename = os.path.join(outdir, 'times.csv') open(times_filename, 'w').close() # (mean, +- std) elbos_filename = os.path.join(outdir, 'elbos.csv') logger.info('saving elbos to, %s' % elbos_filename) open(elbos_filename, 'w').close() rocs_filename = os.path.join(outdir, 'roc.csv') logger.info('saving rocs to, %s' % rocs_filename) open(rocs_filename, 'w').close() gap_filename = os.path.join(outdir, 'gap.csv') open(gap_filename, 'w').close() step_filename = os.path.join(outdir, 'steps.csv') open(step_filename, 'w').close() # (mean, std) ll_train_filename = os.path.join(outdir, 'll_train.csv') open(ll_train_filename, 'w').close() ll_test_filename = os.path.join(outdir, 'll_test.csv') open(ll_test_filename, 'w').close() # (bin_ac_train, bin_ac_test) bin_ac_filename = os.path.join(outdir, 'bin_ac.csv') open(bin_ac_filename, 'w').close() # 'adafw', 'ada_afw', 'ada_pfw' if FLAGS.fw_variant.startswith('ada'): lipschitz_filename = os.path.join(outdir, 'lipschitz.csv') open(lipschitz_filename, 'w').close() iter_info_filename = os.path.join(outdir, 'iter_info.txt') open(iter_info_filename, 'w').close() for t in range(FLAGS.n_fw_iter): g = tf.Graph() with g.as_default(): sess = tf.InteractiveSession() with sess.as_default(): tf.set_random_seed(FLAGS.seed) # Build Model w = Normal(loc=tf.zeros(D, tf.float32), scale=tf.ones(D, tf.float32)) X = tf.placeholder(tf.float32, [None, D]) y = Bernoulli(logits=ed.dot(X, w)) p_joint = blr_utils.Joint(Xtrain, ytrain, sess, FLAGS.n_monte_carlo_samples, logger) # vectorized Model evaluations n_test_samples = 100 W = tf.placeholder(tf.float32, [n_test_samples, D]) y_data = tf.placeholder(tf.float32, [None]) # N -> (N, n_test) y_data_matrix = tf.tile(tf.expand_dims(y_data, 1), (1, n_test_samples)) pred_logits = tf.matmul(X, tf.transpose(W)) # (N, n_test) ypred = tf.sigmoid(tf.reduce_mean(pred_logits, axis=1)) pY = Bernoulli(logits=pred_logits) # (N, n_test) log_likelihoods = pY.log_prob(y_data_matrix) # (N, n_test) log_likelihood_expectation = tf.reduce_mean(log_likelihoods, axis=1) # (N, ) ll_mean, ll_std = tf.nn.moments(log_likelihood_expectation, axes=[0]) if t == 0: fw_iterates = {} else: # Current solution prev_components = [ coreutils.base_loc_scale(FLAGS.base_dist, c['loc'], c['scale'], multivariate=is_vector) for c in q_params ] qtw_prev = coreutils.get_mixture(weights, prev_components) fw_iterates = {w: qtw_prev} # s is the solution to LMO, random initialization s = coreutils.construct_base(FLAGS.base_dist, [D], t, 's', multivariate=is_vector) sess.run(tf.global_variables_initializer()) total_time = 0. inference_time_start = time.time() # Run relbo to solve LMO problem # If the first atom is being selected through running LMO # it is equivalent to running vi on a uniform prior # Since uniform is not in our variational family try # only random element (without LMO inference) as initial iterate if FLAGS.iter0 == 'vi' or t > 0: inference = relbo.KLqp({w: s}, fw_iterates=fw_iterates, data={ X: Xtrain, y: ytrain }, fw_iter=t) inference.run(n_iter=FLAGS.LMO_iter) inference_time_end = time.time() # compute only step size selection time #total_time += float(inference_time_end - inference_time_start) loc_s = s.mean().eval() scale_s = s.stddev().eval() # Evaluate the next step step_result = {} if t == 0: # Initialization, q_0 q_params.append({'loc': loc_s, 'scale': scale_s}) weights.append(1.) if FLAGS.fw_variant.startswith('ada'): lipschitz_estimate = opt.adafw_linit(s, p_joint) step_type = 'init' elif FLAGS.fw_variant == 'fixed': start_step_time = time.time() step_result = opt.fixed(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t) end_step_time = time.time() total_time += float(end_step_time - start_step_time) elif FLAGS.fw_variant == 'adafw': start_step_time = time.time() step_result = opt.adaptive_fw(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type == 'adaptive': lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'ada_pfw': start_step_time = time.time() step_result = opt.adaptive_pfw(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type in ['adaptive', 'drop']: lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'ada_afw': start_step_time = time.time() step_result = opt.adaptive_afw(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type in ['adaptive', 'away', 'drop']: lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'line_search': start_step_time = time.time() step_result = opt.line_search_dkl(weights, q_params, qtw_prev, loc_s, scale_s, s, p_joint, t) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] else: raise NotImplementedError( 'Step size variant %s not implemented' % FLAGS.fw_variant) if t == 0: gamma = 1. new_components = [s] else: q_params = step_result['params'] weights = step_result['weights'] gamma = step_result['gamma'] new_components = [ coreutils.base_loc_scale(FLAGS.base_dist, c['loc'], c['scale'], multivariate=is_vector) for c in q_params ] qtw_new = coreutils.get_mixture(weights, new_components) # Log metrics for current iteration logger.info('total time %f' % total_time) append_to_file(times_filename, total_time) elbo_t = elbo(qtw_new, p_joint, return_std=False) # testing elbo directory from KLqp elbo_loss = elboModel.KLqp({w: qtw_new}, data={ X: Xtrain, y: ytrain }) res_update = elbo_loss.run() logger.info("iter, %d, elbo, %.2f loss %.2f" % (t, elbo_t, res_update['loss'])) append_to_file(elbos_filename, "%f,%f" % (elbo_t, res_update['loss'])) logger.info('iter %d, gamma %.4f' % (t, gamma)) append_to_file(step_filename, gamma) if t > 0: gap_t = step_result['gap'] logger.info('iter %d, gap %.4f' % (t, gap_t)) append_to_file(gap_filename, gap_t) if FLAGS.fw_variant.startswith('ada'): append_to_file(lipschitz_filename, lipschitz_estimate) append_to_file(iter_info_filename, step_type) logger.info('lt = %.5f, iter_type = %s' % (lipschitz_estimate, step_type)) # get weight samples to evaluate expectations w_samples = qtw_new.sample([n_test_samples]).eval() ll_train_mean, ll_train_std = sess.run([ll_mean, ll_std], feed_dict={ W: w_samples, X: Xtrain, y_data: ytrain }) logger.info("iter, %d, train ll, %.2f +/- %.2f" % (t, ll_train_mean, ll_train_std)) append_to_file(ll_train_filename, "%f,%f" % (ll_train_mean, ll_train_std)) ll_test_mean, ll_test_std, y_test_pred = sess.run( [ll_mean, ll_std, ypred], feed_dict={ W: w_samples, X: Xtest, y_data: ytest }) logger.info("iter, %d, test ll, %.2f +/- %.2f" % (t, ll_test_mean, ll_test_std)) append_to_file(ll_test_filename, "%f,%f" % (ll_test_mean, ll_test_std)) roc_score = roc_auc_score(ytest, y_test_pred) logger.info("iter %d, roc %.4f" % (t, roc_score)) append_to_file(rocs_filename, roc_score) y_post = ed.copy(y, {w: qtw_new}) # eq. to y = Bernoulli(logits=ed.dot(X, qtw_new)) ed_train_ll = ed.evaluate('log_likelihood', data={ X: Xtrain, y_post: ytrain, }) ed_test_ll = ed.evaluate('log_likelihood', data={ X: Xtest, y_post: ytest, }) logger.info("edward train ll %.2f test ll %.2f" % (ed_train_ll, ed_test_ll)) bin_ac_train = ed.evaluate('binary_accuracy', data={ X: Xtrain, y_post: ytrain, }) bin_ac_test = ed.evaluate('binary_accuracy', data={ X: Xtest, y_post: ytest, }) append_to_file(bin_ac_filename, "%f,%f" % (bin_ac_train, bin_ac_test)) logger.info( "edward binary accuracy train ll %.2f test ll %.2f" % (bin_ac_train, bin_ac_test)) mse_test = ed.evaluate('mean_squared_error', data={ X: Xtest, y_post: ytest, }) logger.info("edward mse test ll %.2f" % (mse_test)) sess.close() tf.reset_default_graph()
weights = Normal(loc=tf.zeros(P), scale=tf.ones(P)) # coefficients in the model pi = Beta(concentration0=1.0, concentration1=1.0) # beta conjugate prior for bernoulli for inclusion of snps G_mask = Bernoulli(probs=tf.ones(P)*pi) # bernoulli prior for inclusion of snps eps_nm = Normal(loc=tf.zeros([N]), scale=tf.ones([N])) # gaussion noise, epsilon in themodel def hadamard_product(X, b): """ X and b are expected to be tensorflow objects, X has shape (N, P) and b has shape (P,), b is transformed to have shape (N, P), then an element wise product is computed X * b_transformed, which has shape (N, P) """ N, P = list(map(int, X.shape)) bmat = tf.reshape(tf.tile(b, [N]), [N, P]) return tf.multiply(X, tf.to_float(bmat)) x_nm = Normal( loc=ed.dot(hadamard_product(G, G_mask), weights) + eps_nm, scale=tf.nn.softplus(tf.ones([N])) ) qe = Normal(loc=tf.get_variable("qe/loc", [N]), scale=tf.get_variable("qe/scale", [N])) qw = Normal(loc=tf.get_variable("qw/loc", [P]), scale=tf.get_variable("qw/scale", [P])) qgm = Bernoulli(logits=tf.get_variable("qgm/logits", [P])) qpi = Beta( tf.nn.softplus(tf.Variable(tf.random_normal([]))), tf.nn.softplus(tf.Variable(tf.random_normal([]))) ) inference = ed.KLqp({weights:qw, G_mask:qgm, eps_nm:qe, pi:qpi}, data={G:x_train, x_nm:y_train})
X = (X - 4.0) / 4.0 X = X.reshape((N, D)) return X, y ed.set_seed(42) N = 40 # number of data points D = 1 # number of features X_train, y_train = build_toy_dataset(N) X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D)) b = Normal(loc=tf.zeros([]), scale=1.0 * tf.ones([])) y = Bernoulli(logits=ed.dot(X, w) + b) # inference T = 5000 qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) qb = Empirical(params=tf.Variable(tf.random_normal([T]))) inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.initialize(n_print=10, step_size=0.6) tf.global_variables_initializer().run() # criticism & set up figure fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion()
ed.set_seed(42) N = 40 # number of data points D = 10 # number of features # DATA coeff = np.random.randn(D) X_train, y_train = build_toy_dataset(N, coeff) X_test, y_test = build_toy_dataset(N, coeff) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) b = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N)) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {X: X_train, y: y_train} inference = ed.KLqp({w: qw, b: qb}, data) inference.run(n_samples=5, n_iter=250) # CRITICISM y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()}) # This is equivalent to # y_post = Normal(mu=ed.dot(X, qw.mean()) + qb.mean(), sigma=tf.ones(N))
np.linspace(6, 8, num=N / 2)]) y = 5.0 * X + norm.rvs(0, noise_std, size=N) X = X.reshape((N, 1)) return X.astype(np.float32), y.astype(np.float32) N = 40 # num data points p = 1 # num features ed.set_seed(42) X_data, y_data = build_toy_dataset(N) X = X_data beta = Normal(mu=tf.zeros(p), sigma=tf.ones(p)) y = Normal(mu=ed.dot(X, beta), sigma=tf.ones(N)) qmu_mu = tf.Variable(tf.random_normal([p])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([p]))) qbeta = Normal(mu=qmu_mu, sigma=qmu_sigma) data = {y: y_data} inference = ed.MFVI({beta: qbeta}, data) inference.initialize() sess = ed.get_session() for t in range(501): _, loss = sess.run([inference.train, inference.loss]) inference.print_progress(t, loss)
N = 500 # number of data points M = 50 # batch size during training D = 2 # number of features # DATA w_true = np.ones(D) * 5.0 X_train, y_train = build_toy_dataset(N, w_true) X_test, y_test = build_toy_dataset(N, w_true) data = generator([X_train, y_train], M) # MODEL X = tf.placeholder(tf.float32, [M, D]) y_ph = tf.placeholder(tf.float32, [M]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) y = Normal(loc=ed.dot(X, w), scale=tf.ones(M)) # INFERENCE qw = Normal(loc=tf.Variable(tf.random_normal([D]) + 1.0), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) inference = ed.ImplicitKLqp( {w: qw}, data={y: y_ph}, discriminator=ratio_estimator, global_vars={w: qw}) inference.initialize(n_iter=5000, n_print=100, scale={y: float(N) / M}) sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference.n_iter): X_batch, y_batch = next(data)
def main(_): def ratio_estimator(data, local_vars, global_vars): """Takes as input a dict of data x, local variable samples z, and global variable samples beta; outputs real values of shape (x.shape[0] + z.shape[0],). In this example, there are no local variables. """ # data[y] has shape (M,); global_vars[w] has shape (D,) # we concatenate w to each data point y, so input has shape (M, 1 + D) input = tf.concat([ tf.reshape(data[y], [FLAGS.M, 1]), tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1])], 1) hidden = tf.layers.dense(input, 64, activation=tf.nn.relu) output = tf.layers.dense(hidden, 1, activation=None) return output ed.set_seed(42) # DATA w_true = np.ones(FLAGS.D) * 5.0 X_train, y_train = build_toy_dataset(FLAGS.N, w_true) X_test, y_test = build_toy_dataset(FLAGS.N, w_true) data = generator([X_train, y_train], FLAGS.M) # MODEL X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D]) y_ph = tf.placeholder(tf.float32, [FLAGS.M]) w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M)) # INFERENCE qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0, scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D]))) inference = ed.ImplicitKLqp( {w: qw}, data={y: y_ph}, discriminator=ratio_estimator, global_vars={w: qw}) inference.initialize(n_iter=5000, n_print=100, scale={y: float(FLAGS.N) / FLAGS.M}) sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference.n_iter): X_batch, y_batch = next(data) for _ in range(5): info_dict_d = inference.update( variables="Disc", feed_dict={X: X_batch, y_ph: y_batch}) info_dict = inference.update( variables="Gen", feed_dict={X: X_batch, y_ph: y_batch}) info_dict['loss_d'] = info_dict_d['loss_d'] info_dict['t'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration t = info_dict['t'] inference.print_progress(info_dict) if t == 1 or t % inference.n_print == 0: # Check inferred posterior parameters. mean, std = sess.run([qw.mean(), qw.stddev()]) print("\nInferred mean & std:") print(mean) print(std)
return X, y ed.set_seed(42) N = 40 # number of data points D = 1 # number of features # DATA X_train, y_train = build_toy_dataset(N) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=3.0 * tf.ones(D)) b = Normal(loc=tf.zeros([]), scale=3.0 * tf.ones([])) y = Bernoulli(logits=ed.dot(X, w) + b) # INFERENCE T = 5000 # number of samples qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) qb = Empirical(params=tf.Variable(tf.random_normal([T]))) inference = ed.HMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.initialize(n_print=10, step_size=0.6) tf.global_variables_initializer().run() # Set up figure. fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion()
def get_prediction_tf(self, X, Z): return ed.dot(X, Z['w'])
def get_prediction_tf(self, X, Z): return ed.dot(X, Z['w1']) + ed.dot(X**2, Z['w2'])
book = xlrd.open_workbook(DATA_FILE, encoding_override="utf-8") sheet = book.sheet_by_index(0) data = np.asarray([sheet.row_values(i) for i in range(1, sheet.nrows)]) n_samples = sheet.nrows - 1 #2 create placeholders x = tf.placeholder(tf.float32, shape=[n_samples, 1], name="x") y_ph = tf.placeholder(tf.float32, shape=[n_samples], name="y") #3 create weight, bias, initialized to 0 #variables name w and b w = Normal(loc=tf.zeros(1), scale=tf.ones(1)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) #4 predict Y (number of theft) from the number of fire #variable name Y_predicted y = Normal(loc=ed.dot(x, w) + b, scale=tf.ones(n_samples)) qw = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) qb = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) sess = ed.get_session() tf.global_variables_initializer().run() a = np.reshape(data.T[0], (42, 1)) inference = ed.KLqp({w: qw, b: qb}, data={x: a, y_ph: data.T[1]}) inference.initialize() inference.run(n_samples=2, n_iter=150)
np.linspace(6, 8, num=N / 2)]) y = 5.0 * X + norm.rvs(0, noise_std, size=N) X = X.reshape((N, 1)) return X.astype(np.float32), y.astype(np.float32) N = 40 # num data points D = 1 # num features ed.set_seed(42) X_train, y_train = build_toy_dataset(N) X_test, y_test = build_toy_dataset(N) X = ed.placeholder(tf.float32, [N, D], name='X') beta = Normal(mu=tf.zeros(D), sigma=tf.ones(D), name='beta') y = Normal(mu=ed.dot(X, beta), sigma=tf.ones(N), name='y') qmu_mu = tf.Variable(tf.random_normal([D])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) qbeta = Normal(mu=qmu_mu, sigma=qmu_sigma, name='qbeta') data = {X: X_train, y: y_train} inference = ed.MFVI({beta: qbeta}, data) inference.initialize(logdir='train') sess = ed.get_session() for t in range(501): _, loss = sess.run([inference.train, inference.loss], {X: data[X]}) inference.print_progress(t, loss) y_post = ed.copy(y, {beta: qbeta.mean()})
import numpy as np import tensorflow as tf from edward.models import Normal feature_nd = np.genfromtxt( 'C:\\Users\\Administrator\\Desktop\\数学建模\\mlp_regression_train.csv', dtype=float, delimiter=',') n_samples = np.shape(feature_nd)[0] feature = feature_nd[:, :np.shape(feature_nd)[1] - 1] feature = np.float32(feature) price = feature_nd[:, -1] price = np.float32(price) X = tf.placeholder(tf.float32, [np.shape(feature)[0], 11]) w = Normal(loc=tf.zeros(11), scale=tf.ones(11)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(np.shape(feature)[0])) qw = Normal(loc=tf.Variable(tf.random_normal([11])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([11])))) qb = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) inference = ed.KLqp({w: qw, b: qb}, data={X: feature, y: price}) inference.run(n_samples=11, n_iter=250) print("debug") test_all = np.genfromtxt( 'C:\\Users\\Administrator\\Desktop\\数学建模\\mlp_regression_val.csv', dtype=float, delimiter=',') test_feature = test_all[:, :np.shape(test_all)[1] - 1] test_feature = np.float32(test_feature)
# tf.Variable(tf.random_normal([20, 1]), name="scale"))) # with tf.name_scope("qb_2"): # qb_2 = Normal(loc=tf.Variable(tf.random_normal([1]), name="loc"), # scale=tf.nn.softplus( # tf.Variable(tf.random_normal([1]), name="scale"))) # # inference = ed.KLqp({W_0: qW_0, b_0: qb_0, # W_1: qW_1, b_1: qb_1, # W_2: qW_2, b_2: qb_2}, data={X: train, y: label[:,1].reshape(-1, 1)}) #inference.run(logdir='log') # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=3.0 * tf.ones(D)) b = Normal(loc=tf.zeros([]), scale=3.0 * tf.ones([])) y = Bernoulli(logits=ed.dot(X, w) + b) # INFERENCE qw_loc = tf.Variable(tf.random_normal([D])) qw_scale = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) qb_loc = tf.Variable(tf.random_normal([]) + 10) qb_scale = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qw = Normal(loc=qw_loc, scale=qw_scale) qb = Normal(loc=qb_loc, scale=qb_scale) inference = ed.KLqp({w: qw, b: qb}, data={X: train, y: label[:, 1]}) inference.initialize(n_print=100, n_iter=10000, n_samples=5) tf.global_variables_initializer().run()
def rnn_cell(hprev, xt): return tf.tanh(ed.dot(hprev, Wh) + ed.dot(xt, Wx) + bh)
def fwd_infer(x): h = tf.nn.relu(ed.dot(x, qW_0.sample()) + qb_0.sample()) h = tf.nn.relu(ed.dot(h, qW_1.sample()) + qb_1.sample()) h = tf.nn.sigmoid(ed.dot(h, qW_2.sample()) + qb_2.sample()) return h
qi_mu = tf.Variable(tf.random_normal([1])) qi_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([1]))) qi = Normal(loc=qi_mu, scale=qi_sigma) #qw_mu = tf.expand_dims(tf.convert_to_tensor(beta0[0].astype(np.float32)),1) qw_mu = tf.Variable(tf.random_normal([D])) qw_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) qw = Normal(loc=qw_mu, scale=qw_sigma) #qb_mu = tf.Variable(tf.random_normal([Db,1])) qb_mu = tf.Variable(tf.random_normal( [Db])) #force the random coeff to be zero-distributed #qb_mu = qb_mu - tf.reduce_mean(qb_mu) qb_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([Db]))) qb = Normal(loc=qb_mu, scale=qb_sigma) yhat = ed.dot(Xnew, Wf) + ed.dot(Znew, Wb) + Ib y = Normal(loc=yhat, scale=tf.ones(N)) sess = ed.get_session() inference = ed.KLqp({ Wf: qw, Wb: qb, Ib: qi }, data={ y: y_train, Xnew: x_train, Znew: z_train }) #optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0)
D = len(w) x = np.random.randn(N, D) y = np.dot(x, w) + np.random.normal(0, noise_std, size=N) return x, y N = 40 # number of data points D = 10 # number of features w_true = np.random.randn(D) x_train, y_train = build_toy_dataset(N, w_true) x_test, y_test = build_toy_dataset(N, w_true) x = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(x, w) + b, scale=tf.ones(N)) qw = Normal(loc=tf.get_variable("qw/loc", [D]), scale=tf.nn.softplus(tf.get_variable("qw/scale", [D]))) qb = Normal(loc=tf.get_variable("qb/loc", [1]), scale=tf.nn.softplus(tf.get_variable("qb/scale", [1]))) inference = ed.KLpq({w: qw, b: qb}, data={x: x_train, y: y_train}) inference.run(n_samples=5, n_iter=250) y_post = ed.copy(y, {w: qw, b: qb}) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={x: x_test, y_post: y_test}))
grads = tf.gradients(loss, [v._ref() for v in var_list]) grads_and_vars = list(zip(grads, var_list)) return loss, grads_and_vars ed.set_seed(42) N = 5000 # number of data points D = 10 # number of features # DATA w_true = np.random.randn(D) X_data = np.random.randn(N, D) p = expit(np.dot(X_data, w_true)) y_data = np.array([np.random.binomial(1, i) for i in p]) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) y = Bernoulli(logits=ed.dot(X, w)) # INFERENCE qw = Normal(loc=tf.Variable(tf.random_normal([D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) inference = IWVI({w: qw}, data={X: X_data, y: y_data}) inference.run(K=5, n_iter=1000) # CRITICISM print("Mean squared error in true values to inferred posterior mean:") print(tf.reduce_mean(tf.square(w_true - qw.mean())).eval())
def main(_): def ratio_estimator(data, local_vars, global_vars): """Takes as input a dict of data x, local variable samples z, and global variable samples beta; outputs real values of shape (x.shape[0] + z.shape[0],). In this example, there are no local variables. """ # data[y] has shape (M,); global_vars[w] has shape (D,) # we concatenate w to each data point y, so input has shape (M, 1 + D) input = tf.concat([ tf.reshape(data[y], [FLAGS.M, 1]), tf.tile(tf.reshape(global_vars[w], [1, FLAGS.D]), [FLAGS.M, 1]) ], 1) hidden = tf.layers.dense(input, 64, activation=tf.nn.relu) output = tf.layers.dense(hidden, 1, activation=None) return output ed.set_seed(42) # DATA w_true = np.ones(FLAGS.D) * 5.0 X_train, y_train = build_toy_dataset(FLAGS.N, w_true) X_test, y_test = build_toy_dataset(FLAGS.N, w_true) data = generator([X_train, y_train], FLAGS.M) # MODEL X = tf.placeholder(tf.float32, [FLAGS.M, FLAGS.D]) y_ph = tf.placeholder(tf.float32, [FLAGS.M]) w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) y = Normal(loc=ed.dot(X, w), scale=tf.ones(FLAGS.M)) # INFERENCE qw = Normal(loc=tf.get_variable("qw/loc", [FLAGS.D]) + 1.0, scale=tf.nn.softplus(tf.get_variable("qw/scale", [FLAGS.D]))) inference = ed.ImplicitKLqp({w: qw}, data={y: y_ph}, discriminator=ratio_estimator, global_vars={w: qw}) inference.initialize(n_iter=5000, n_print=100, scale={y: float(FLAGS.N) / FLAGS.M}) sess = ed.get_session() tf.global_variables_initializer().run() for _ in range(inference.n_iter): X_batch, y_batch = next(data) for _ in range(5): info_dict_d = inference.update(variables="Disc", feed_dict={ X: X_batch, y_ph: y_batch }) info_dict = inference.update(variables="Gen", feed_dict={ X: X_batch, y_ph: y_batch }) info_dict['loss_d'] = info_dict_d['loss_d'] info_dict[ 't'] = info_dict['t'] // 6 # say set of 6 updates is 1 iteration t = info_dict['t'] inference.print_progress(info_dict) if t == 1 or t % inference.n_print == 0: # Check inferred posterior parameters. mean, std = sess.run([qw.mean(), qw.stddev()]) print("\nInferred mean & std:") print(mean) print(std)
return x,y ed.set_seed(123) N = 40 D = 10 w_true = np.random.randn(D) X_train,y_train = build_toy_dataset(N,w_true) X_test,y_test = build_toy_dataset(N,w_true) X = tf.placeholder(tf.float32, [N,D]) w = Normal(mu=tf.zeros(D),sigma= tf.ones(D)) b = Normal(mu=tf.zeros(1),sigma= tf.ones(1)) y = Normal(mu=ed.dot(X,w)+b,sigma= tf.ones(N)) #Inference qw = Normal(mu = tf.Variable(tf.random_normal([D])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(mu = tf.Variable(tf.random_normal(([1]))), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) inference = ed.KLqp({w: qw, b: qb},data = {X:X_train, y: y_train}) inference.run(n_samples=3, n_iter=1000) #Criticism y_post = ed.copy(y , {w: qw, b:qb })
def build_toy_dataset(N, noise_std=0.1): X = np.concatenate([np.linspace(0, 2, num=N / 2), np.linspace(6, 8, num=N / 2)]) y = 5.0 * X + np.random.normal(0, noise_std, size=N) X = X.reshape((N, 1)) return X, y ed.set_seed(42) N = 40 # num data points D = 1 # num features # DATA X_data, y_data = build_toy_dataset(N) # MODEL X = tf.cast(X_data, tf.float32) w = Normal(loc=tf.zeros(D), scale=tf.ones(D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(N)) # INFERENCE qw = Normal(loc=tf.Variable(tf.random_normal([D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(loc=tf.Variable(tf.random_normal([1])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) inference = ed.KLqp({w: qw, b: qb}, data={y: y_data}) inference.run()
ed.set_seed(42) N = 500 # number of data points M = 50 # batch size during training D = 2 # number of features # DATA w_true = np.ones(D) * 5.0 X_train, y_train = build_toy_dataset(N, w_true) X_test, y_test = build_toy_dataset(N, w_true) # MODEL X = tf.placeholder(tf.float32, [M, D]) y_ph = tf.placeholder(tf.float32, [M]) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) y = Normal(mu=ed.dot(X, w), sigma=tf.ones(M)) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D]) + 1.0), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) inference = ed.ImplicitKLqp({w: qw}, data={y: y_ph}, discriminator=ratio_estimator, global_vars={w: qw}) inference.initialize(n_iter=5000, n_print=100, scale={y: float(N) / M}) sess = ed.get_session() tf.global_variables_initializer().run() i = 0
def main(_): ed.set_seed(42) # DATA X_train, y_train = build_toy_dataset(FLAGS.N) X_test, y_test = build_toy_dataset(FLAGS.N) # MODEL X = tf.placeholder(tf.float32, [FLAGS.N, FLAGS.D]) w = Normal(loc=tf.zeros(FLAGS.D), scale=tf.ones(FLAGS.D)) b = Normal(loc=tf.zeros(1), scale=tf.ones(1)) y = Normal(loc=ed.dot(X, w) + b, scale=tf.ones(FLAGS.N)) # INFERENCE qw = Empirical(params=tf.get_variable("qw/params", [FLAGS.T, FLAGS.D])) qb = Empirical(params=tf.get_variable("qb/params", [FLAGS.T, 1])) inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(step_size=1e-3) # CRITICISM # Plot posterior samples. sns.jointplot(qb.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride], qw.params.eval()[FLAGS.nburn:FLAGS.T:FLAGS.stride]) plt.show() # Posterior predictive checks. y_post = ed.copy(y, {w: qw, b: qb}) # This is equivalent to # y_post = Normal(loc=ed.dot(X, qw) + qb, scale=tf.ones(FLAGS.N)) print("Mean squared error on test data:") print(ed.evaluate('mean_squared_error', data={X: X_test, y_post: y_test})) print("Displaying prior predictive samples.") n_prior_samples = 10 w_prior = w.sample(n_prior_samples).eval() b_prior = b.sample(n_prior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_prior_samples): output = inputs * w_prior[ns] + b_prior[ns] plt.plot(inputs, output) plt.show() print("Displaying posterior predictive samples.") n_posterior_samples = 10 w_post = qw.sample(n_posterior_samples).eval() b_post = qb.sample(n_posterior_samples).eval() plt.scatter(X_train, y_train) inputs = np.linspace(-1, 10, num=400) for ns in range(n_posterior_samples): output = inputs * w_post[ns] + b_post[ns] plt.plot(inputs, output) plt.show()
x = (x - 4.0) / 4.0 x = x.reshape((N, D)) return x, y ed.set_seed(42) N = 40 # number of data points D = 1 # number of features x_train, y_train = build_toy_dataset(N) x = tf.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=3.0 * tf.ones(D)) b = Normal(mu=tf.zeros([]), sigma=3.0 * tf.ones([])) y = Bernoulli(logits=ed.dot(x, w) + b) qw_mu = tf.Variable(tf.random_normal([D])) qw_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) qb_mu = tf.Variable(tf.random_normal([]) + 10) qb_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qw = Normal(mu=qw_mu, sigma=qw_sigma) qb = Normal(mu=qb_mu, sigma=qb_sigma) sess = ed.get_session() data = {x: x_train, y: y_train} inference = ed.KLqp({w: qw, b: qb}, data) inference.initialize(n_print=10, n_iter=600) init = tf.global_variables_initializer()