np.random.seed(1) tf.set_random_seed(1) x_ph_bin = tf.placeholder(tf.float32, [M, len(binfeats)], name='x_bin') # binary inputs x_ph_cont = tf.placeholder(tf.float32, [M, len(contfeats)], name='x_cont') # continuous inputs t_ph = tf.placeholder(tf.float32, [M, 1]) y_ph = tf.placeholder(tf.float32, [M, 1]) x_ph = tf.concat([x_ph_bin, x_ph_cont], 1) activation = tf.nn.elu # CEVAE model (decoder) # p(z) z = Normal(loc=tf.zeros([tf.shape(x_ph)[0], d]), scale=tf.ones([tf.shape(x_ph)[0], d])) # p(x|z) hx = fc_net(z, (nh - 1) * [h], [], 'px_z_shared', lamba=lamba, activation=activation) logits = fc_net(hx, [h], [[len(binfeats), None]], 'px_z_bin'.format(i + 1), lamba=lamba, activation=activation) x1 = Bernoulli(logits=logits, dtype=tf.float32, name='bernoulli_px_z') mu, sigma = fc_net( hx, [h], [[len(contfeats), None], [len(contfeats), tf.nn.softplus]],
def line_search_dkl(weights, locs, diags, mu_s, cov_s, x, k): def softmax(v): return np.log(1 + np.exp(v)) N_samples = 10 weights = [weights] qt_comps = [ Normal(loc=tf.convert_to_tensor(locs[i]), scale=tf.convert_to_tensor(diags[i])) for i in range(len(locs)) ] qt = Mixture(cat=Categorical(probs=tf.convert_to_tensor(weights)), components=qt_comps, sample_shape=N) qt = InfiniteMixtureScipy(stats.multivariate_normal) qt.weights = weights[0] qt.params = list( zip([[l] for l in locs], [[softmax(np.dot(d, d))] for d in diags])) sample_q = qt.sample_n(N_samples) s = stats.multivariate_normal([mu_s], np.dot(np.array([cov_s]), np.array([cov_s]))) sample_s = s.rvs(N_samples) new_locs = copy.copy(locs) new_diags = copy.copy(diags) new_locs.append([mu_s]) new_diags.append([cov_s]) gamma = 2. / (k + 2.) n_steps = 10 prog_bar = ed.util.Progbar(n_steps) for it in range(n_steps): print("line_search iter %d, %.5f" % (it, gamma)) new_weights = copy.copy(weights) new_weights[0] = [(1. - gamma) * w for w in new_weights[0]] new_weights[0].append(gamma) q_next = InfiniteMixtureScipy(stats.multivariate_normal) q_next.weights = new_weights[0] q_next.params = list( zip([[l] for l in new_locs], [[np.dot(d, d)] for d in new_diags])) def px_qx_ratio_log_prob(v): Lambda = 1. ret = x.log_prob([v]).eval()[0] - q_next.log_prob(v) ret /= Lambda return ret rez_s = [ px_qx_ratio_log_prob(sample_s[ss]) for ss in range(len(sample_s)) ] rez_q = [ px_qx_ratio_log_prob(sample_q[ss]) for ss in range(len(sample_q)) ] gamma = gamma + 0.1 * (sum(rez_s) - sum(rez_q)) / (N_samples * (it + 1.)) if gamma >= 1 or gamma <= 0: gamma = max(min(gamma, 1.), 0.) break return gamma
def construct_normal(dims, iter, name='', sample_shape=N): loc = tf.get_variable(name + "_loc%d" % iter, initializer=tf.random_normal(dims) + \ np.random.normal()) scale = tf.get_variable(name + "_scale%d" % iter, initializer=tf.random_normal(dims)) return Normal(loc=loc, scale=tf.nn.softplus(scale), sample_shape=N)
def generative_adversarial_network_example(): ed.set_seed(42) N = 40 # Number of data points. D = 1 # Number of features. X_train, y_train = build_toy_dataset(N) #-------------------- # Model. with tf.name_scope('model'): W_0 = Normal(loc=tf.zeros([D, 10]), scale=tf.ones([D, 10]), name='W_0') W_1 = Normal(loc=tf.zeros([10, 10]), scale=tf.ones([10, 10]), name='W_1') W_2 = Normal(loc=tf.zeros([10, 1]), scale=tf.ones([10, 1]), name='W_2') b_0 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name='b_0') b_1 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name='b_1') b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name='b_2') X = tf.placeholder(tf.float32, [N, D], name='X') y = Normal(loc=neural_network(X, W_0, W_1, W_2, b_0, b_1, b_2), scale=0.1 * tf.ones(N), name='y') #-------------------- # Inference. with tf.variable_scope('posterior'): with tf.variable_scope('qW_0'): loc = tf.get_variable('loc', [D, 10]) scale = tf.nn.softplus(tf.get_variable('scale', [D, 10])) qW_0 = Normal(loc=loc, scale=scale) with tf.variable_scope('qW_1'): loc = tf.get_variable('loc', [10, 10]) scale = tf.nn.softplus(tf.get_variable('scale', [10, 10])) qW_1 = Normal(loc=loc, scale=scale) with tf.variable_scope('qW_2'): loc = tf.get_variable('loc', [10, 1]) scale = tf.nn.softplus(tf.get_variable('scale', [10, 1])) qW_2 = Normal(loc=loc, scale=scale) with tf.variable_scope('qb_0'): loc = tf.get_variable('loc', [10]) scale = tf.nn.softplus(tf.get_variable('scale', [10])) qb_0 = Normal(loc=loc, scale=scale) with tf.variable_scope('qb_1'): loc = tf.get_variable('loc', [10]) scale = tf.nn.softplus(tf.get_variable('scale', [10])) qb_1 = Normal(loc=loc, scale=scale) with tf.variable_scope('qb_2'): loc = tf.get_variable('loc', [1]) scale = tf.nn.softplus(tf.get_variable('scale', [1])) qb_2 = Normal(loc=loc, scale=scale) inference = ed.KLqp( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={ X: X_train, y: y_train }) inference.run(logdir='log')
import edward as ed import tensorflow as tf from edward.models import Variational, Normal from edward.stats import multivariate_normal from edward.util import get_dims class NormalPosterior: """ p(x, z) = p(z) = p(z | x) = Normal(z; mu, Sigma) """ def __init__(self, mu, Sigma): self.mu = mu self.Sigma = Sigma self.num_vars = get_dims(mu)[0] def log_prob(self, xs, zs): return multivariate_normal.logpdf(zs, self.mu, self.Sigma) ed.set_seed(42) mu = tf.constant([1.0, 1.0]) Sigma = tf.constant([[1.0, 0.1], [0.1, 1.0]]) model = NormalPosterior(mu, Sigma) variational = Variational() variational.add(Normal(model.num_vars)) inference = ed.MFVI(model, variational) inference.run(n_iter=10000)
from edward.models import Categorical, Normal import edward as ed # Use the TensorFlow method to download and/or load the data. mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # parameters N = 256 # number of images in a minibatch. D = 784 # number of features. K = 10 # number of classes. # Create a placeholder to hold the data (in minibatches) in a TensorFlow graph. x = tf.placeholder(tf.float32, [None, D]) # Normal(0,1) priors for the variables. Note that the syntax assumes TensorFlow 1.1. w = Normal(loc=tf.zeros([D, K]), scale=tf.ones([D, K])) b = Normal(loc=tf.zeros(K), scale=tf.ones(K)) # Categorical likelihood for classication. y = Categorical(tf.matmul(x,w)+b) # Contruct the q(w) and q(b). in this case we assume Normal distributions. qw = Normal(loc=tf.Variable(tf.random_normal([D, K])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, K])))) qb = Normal(loc=tf.Variable(tf.random_normal([K])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([K])))) # We use a placeholder for the labels in anticipation of the traning data. y_ph = tf.placeholder(tf.int32, [N]) # Define the VI inference technique, ie. minimise the KL divergence between q and p. inference = ed.KLqp({w: qw, b: qb}, data={y:y_ph})
def define_variational_distribution(N): qf = Normal(loc=tf.Variable(tf.random_normal([N])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([N])))) return qf
def main(_): # Generate data true_mu = np.array([-1.0, 0.0, 1.0], np.float32) * 10 true_sigmasq = np.array([1.0**2, 2.0**2, 3.0**2], np.float32) true_pi = np.array([0.2, 0.3, 0.5], np.float32) N = 10000 K = len(true_mu) true_z = np.random.choice(np.arange(K), size=N, p=true_pi) x_data = true_mu[true_z] + np.random.randn(N) * np.sqrt( true_sigmasq[true_z]) # Prior hyperparameters pi_alpha = np.ones(K, dtype=np.float32) mu_sigma = np.std(true_mu) sigmasq_alpha = 1.0 sigmasq_beta = 2.0 # Model pi = Dirichlet(pi_alpha) mu = Normal(0.0, mu_sigma, sample_shape=K) sigmasq = InverseGamma(sigmasq_alpha, sigmasq_beta, sample_shape=K) x = ParamMixture(pi, { 'loc': mu, 'scale': tf.sqrt(sigmasq) }, Normal, sample_shape=N) z = x.cat # Conditionals mu_cond = ed.complete_conditional(mu) sigmasq_cond = ed.complete_conditional(sigmasq) pi_cond = ed.complete_conditional(pi) z_cond = ed.complete_conditional(z) sess = ed.get_session() # Initialize randomly pi_est, mu_est, sigmasq_est, z_est = sess.run([pi, mu, sigmasq, z]) print('Initial parameters:') print('pi:', pi_est) print('mu:', mu_est) print('sigmasq:', sigmasq_est) print() # Gibbs sampler cond_dict = { pi: pi_est, mu: mu_est, sigmasq: sigmasq_est, z: z_est, x: x_data } t0 = time() T = 500 for t in range(T): z_est = sess.run(z_cond, cond_dict) cond_dict[z] = z_est pi_est, mu_est = sess.run([pi_cond, mu_cond], cond_dict) cond_dict[pi] = pi_est cond_dict[mu] = mu_est sigmasq_est = sess.run(sigmasq_cond, cond_dict) cond_dict[sigmasq] = sigmasq_est print('took %.3f seconds to run %d iterations' % (time() - t0, T)) print() print('Final sample for parameters::') print('pi:', pi_est) print('mu:', mu_est) print('sigmasq:', sigmasq_est) print() print() print('True parameters:') print('pi:', true_pi) print('mu:', true_mu) print('sigmasq:', true_sigmasq) print() plt.figure(figsize=[10, 10]) plt.subplot(2, 1, 1) plt.hist(x_data, 50) plt.title('Empirical Distribution of $x$') plt.xlabel('$x$') plt.ylabel('frequency') xl = plt.xlim() plt.subplot(2, 1, 2) plt.hist(sess.run(x, {pi: pi_est, mu: mu_est, sigmasq: sigmasq_est}), 50) plt.title("Predictive distribution $p(x \mid \mathrm{inferred }\ " "\pi, \mu, \sigma^2)$") plt.xlabel('$x$') plt.ylabel('frequency') plt.xlim(xl) plt.show()
import tensorflow as tf from edward.models import Normal from edward.stats import multivariate_normal from edward.util import get_dims class NormalPosterior: """p(x, z) = p(z) = p(z | x) = Normal(z; mu, sigma)""" def __init__(self, mu, sigma): self.mu = mu self.sigma = sigma self.n_vars = get_dims(mu)[0] def log_prob(self, xs, zs): return multivariate_normal.logpdf(zs['z'], self.mu, self.sigma) ed.set_seed(42) mu = tf.constant([1.0, 1.0]) sigma = tf.constant([[1.0, 0.1], [0.1, 1.0]]) model = NormalPosterior(mu, sigma) qz_mu = tf.Variable(tf.random_normal([model.n_vars])) qz_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([model.n_vars]))) qz = Normal(mu=qz_mu, sigma=qz_sigma) inference = ed.MFVI({'z': qz}, model_wrapper=model) inference.run(n_iter=300)
y = y.astype(np.float32) return X, y ed.set_seed(42) N = 40 # number of data points D = 1 # number of features # DATA X_train, y_train = build_toy_dataset(N) X_test, y_test = build_toy_dataset(N) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) b = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N)) # INFERENCE T = 5000 # Number of samples. nburn = 100 # Number of burn-in samples. stride = 10 # Frequency with which to plot samples. qw = Empirical(params=tf.Variable(tf.random_normal([T, D]))) qb = Empirical(params=tf.Variable(tf.random_normal([T, 1]))) inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train}) inference.run(step_size=1e-3) # CRITICISM
def make_bayes_net(self, topk=np.inf, filename='', visualize_graph=False): N, D = self.design_matrix.shape num_discrete_variables = self.num_discrete_variables discrete_variable_idxs = tuple(np.arange(num_discrete_variables)) discrete_variable_outs = [ dict(zip(*np.unique(self.design_matrix[:, idx], return_index=True))) for idx in discrete_variable_idxs ] discrete_variable_outs_size = [ len(out) for out in discrete_variable_outs ] discrete_variable_prior_pi = [ tf.convert_to_tensor( Dirichlet(concentration=tf.ones( [discrete_variable_outs_size[idx]]), name='Dirichlet_d_pi_' + str(idx))) for idx in range(num_discrete_variables) ] discrete_variable_vars = [ tf.convert_to_tensor( Categorical(logits=tf.tile( tf.expand_dims(discrete_variable_prior_pi[idx], axis=0), [N, discrete_variable_outs_size[idx]]), name='Categorical_d_' + str(idx))) for idx in range(num_discrete_variables) ] continus_variable_prior_w = dict() continus_variable_prior_b = dict() continus_variable_prior_sigma = dict() continus_variable_vars = dict() tmp_idx = 1 for idx in self.continus_variable_idxs: if tmp_idx % 100 == 0: print(tmp_idx) tmp_idx += 1 discrete_pars = np.where(self.network[discrete_variable_idxs, idx])[0] discrete_par_size = [ discrete_variable_outs_size[par] for par in discrete_pars ] if len(discrete_par_size) == 0: discrete_par_vars = tf.zeros([N, 0]) elif len(discrete_par_size) == 1: discrete_par_vars = tf.expand_dims( discrete_variable_vars[discrete_pars[0]], axis=1) else: discrete_par_vars = tf.stack( [discrete_variable_vars[par] for par in discrete_pars], axis=1) continus_pars = self.continus_variable_idxs[np.where( self.network[self.continus_variable_idxs, idx])[0]] if topk < len(continus_pars): continus_pars = list( map( lambda x: x[0], sorted(zip(continus_pars, self.network[continus_pars, :].sum(1)), key=lambda x: x[1], reverse=True)[:topk])) continus_par_size = len(continus_pars) if continus_par_size == 0: continus_par_vars = tf.zeros([N, 0]) elif continus_par_size == 1: continus_par_vars = tf.expand_dims( continus_variable_vars[continus_pars[0]], axis=1) else: continus_par_vars = tf.stack( [continus_variable_vars[par] for par in continus_pars], axis=1) continus_variable_prior_w[idx] = tf.convert_to_tensor( Normal(loc=tf.zeros(discrete_par_size + [continus_par_size]), scale=tf.ones(discrete_par_size + [continus_par_size]), name='Normal_c_w_' + str(idx))) continus_variable_prior_b[idx] = tf.convert_to_tensor( Normal(loc=tf.zeros(discrete_par_size), scale=tf.ones(discrete_par_size), name='Normal_c_b_' + str(idx))) continus_variable_prior_sigma[idx] = tf.convert_to_tensor( Normal(loc=tf.zeros([1]), scale=tf.ones([1]), name='Normal_c_sigma_' + str(idx))) continus_variable_vars[idx] = tf.convert_to_tensor(Normal(loc=tf.add_n([tf.reduce_sum(tf.multiply(continus_par_vars, tf.gather_nd(continus_variable_prior_w[idx], discrete_par_vars)), axis=1), tf.gather_nd(continus_variable_prior_b[idx], discrete_par_vars)]), \ scale=continus_variable_prior_sigma[idx], name='Normal_c_'+str(idx))) for i in range(num_discrete_variables): tf.add_to_collection('d_pi', discrete_variable_prior_pi[i]) tf.add_to_collection('d', discrete_variable_vars[i]) for i in self.continus_variable_idxs: tf.add_to_collection('c_w', continus_variable_prior_w[i]) tf.add_to_collection('c_b', continus_variable_prior_b[i]) tf.add_to_collection('c_sigma', continus_variable_prior_sigma[i]) tf.add_to_collection('c', continus_variable_vars[i]) filename = '_'.join( [self.data_filename, self.name, filename, 'bayes_net.meta']) tf.train.export_meta_graph( filename, as_text=True, collection_list=['d_pi', 'd', 'c_w', 'c_b', 'c_sigma', 'c']) if visualize_graph: #for tensorboard; run >>> tensorboard ==logdir=. sess = tf.Session() tf.summary.FileWriter(filename + '_tensorboard', sess.graph)
os.makedirs(out_model) # DATA. MNIST batches are fed at training time. #(x_train, _), (x_test, _) = mnist(data_dir) x_data=np.load("animeface-character-dataset/anime.npy") x_train=x_data x_train_generator = generator(x_train, M) x_ph = tf.placeholder(tf.float32, [M, 128*128*nch]) # MODEL with tf.variable_scope("Gen"): #eps = Uniform(low=tf.zeros([M, D]) - 1.0, high=tf.ones([M, D])) eps = Normal(loc=tf.zeros([M, D]), scale=tf.ones([M, D])) x = generative_network(eps) # INFERENCE #optimizer = tf.train.RMSPropOptimizer(learning_rate=5e-5) #optimizer_d = tf.train.RMSPropOptimizer(learning_rate=5e-6) optimizer = tf.train.AdamOptimizer(learning_rate=5e-5) optimizer_d = tf.train.AdamOptimizer(learning_rate=5e-5) inference = ed.GANInference( data={x: x_ph}, discriminator=discriminative_network) inference.initialize( optimizer=optimizer, optimizer_d=optimizer_d) #n_iter=15000, n_print=1000) # n_iter=15000, n_print=1000, clip=0.01, penalty=None)
def main(): try: if not ('.csv' in args.input): raise Exception('input_format') if not ('.pkl' in args.output): raise Exception('output_format') with open(args.input, 'rb') as input: # DATA reader = csv.reader(input, delimiter=';') reader.next() n = 0 xn = [] for track in reader: print('Track {}'.format(n)) track = format_track(track[0]) xn.append(track) n += 1 xn = np.asarray(xn) # N x D xn = xn.T # D x N D = len(xn) N = len(xn[0]) # MODEL ds = tf.contrib.distributions sigma = ed.models.Gamma(1.0, 1.0) alpha = ed.models.Gamma(tf.ones([K]), tf.ones([K])) w = Normal(mu=tf.zeros([D, K]), sigma=tf.reshape(tf.tile(alpha, [D]), [D, K])) z = Normal(mu=tf.zeros([K, N]), sigma=tf.ones([K, N])) mu = Normal(mu=tf.zeros([D]), sigma=tf.ones([D])) x = Normal(mu=tf.matmul(w, z) + tf.transpose(tf.reshape(tf.tile(mu, [N]), [N, D])), sigma=sigma * tf.ones([D, N])) # INFERENCE qalpha = ed.models.TransformedDistribution( distribution=ed.models.NormalWithSoftplusSigma( mu=tf.Variable(tf.random_normal([K])), sigma=tf.Variable(tf.random_normal([K]))), bijector=ds.bijector.Exp(), name='qalpha') qw = Normal(mu=tf.Variable(tf.random_normal([D, K])), sigma=tf.nn.softplus( tf.Variable(tf.random_normal([D, K])))) qz = Normal(mu=tf.Variable(tf.random_normal([K, N])), sigma=tf.nn.softplus( tf.Variable(tf.random_normal([K, N])))) data_mean = np.mean(xn, axis=1).astype(np.float32, copy=False) qmu = Normal(mu=tf.Variable(data_mean + tf.random_normal([D])), sigma=tf.nn.softplus( tf.Variable(tf.random_normal([D])))) qsigma = ed.models.TransformedDistribution( distribution=ed.models.NormalWithSoftplusSigma( mu=tf.Variable(0.0), sigma=tf.Variable(1.0)), bijector=ds.bijector.Exp(), name='qsigma') inference = ed.KLqp( { alpha: qalpha, w: qw, z: qz, mu: qmu, sigma: qsigma }, data={x: xn}) inference.run(n_iter=N_ITERS, n_samples=N_SAMPLES) alphas = tf.exp(qalpha.distribution.mean()).eval() alphas.sort() # mean_alphas = np.mean(alphas) print('Alphas: {}'.format(alphas)) points = qz.eval() xn_new = [] for i in range(len(alphas)): # if alphas[i] > (mean_alphas * 1.2): xn_new.append(points[i]) xn_new = np.asarray(xn_new).T # Normalization maxs = np.max(xn_new, axis=0) mins = np.min(xn_new, axis=0) rng = maxs - mins high = 100.0 low = 0.0 xn_new = high - (((high - low) * (maxs - xn_new)) / rng) print('New points: {}'.format(xn_new)) print('Number of points: {}'.format(len(xn_new))) print('Point dimensions: {}'.format(len(xn_new[0]))) with open(args.output, 'w') as output: pkl.dump({'xn': np.array(xn_new)}, output) except IOError: print('File not found!') except Exception as e: if e.args[0] == 'input_format': print('Input must be a CSV file') elif e.args[0] == 'output_format': print('Output must be a PKL file') else: print('Unexpected error: {}'.format(sys.exc_info()[0])) raise
def test_model_wrapper(self): tf.InteractiveSession() model = NormalNormal() qmu = Normal(mu=tf.Variable(0.0), sigma=tf.constant(1.0)) ed.Inference({'mu': qmu}, model_wrapper=model)
ed.set_seed(42) N = 40 # number of data points D = 1 # number of features x_train, y_train = build_toy_dataset(N) model = LinearModel() qw_mu = tf.Variable(tf.random_normal([D])) qw_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) qb_mu = tf.Variable(tf.random_normal([])) qb_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([]))) qw = Normal(mu=qw_mu, sigma=qw_sigma) qb = Normal(mu=qb_mu, sigma=qb_sigma) # Set up figure fig = plt.figure(figsize=(8, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) plt.ion() plt.show(block=False) sess = ed.get_session() data = {'x': x_train, 'y': y_train} inference = ed.KLqp({'w': qw, 'b': qb}, data, model) inference.initialize(n_samples=5, n_iter=250, n_print=5) init = tf.initialize_all_variables() init.run()
period_pre = tf.Variable(np.log(np.exp(7.0 * len_init) - 1), dtype=tf.float32) period_len_pre = tf.Variable(1.0) period_var_pre = tf.Variable(np.log(np.exp(0.5) - 1), dtype=tf.float32) # period = tf.nn.softplus(period_pre) period_length = tf.nn.softplus(period_len_pre) Kuu = kernelfx(xu, xu) fu_loc = tf.zeros((p, m)) fu_scale = tf.cast(tf.cholesky(Kuu + offset * tf.eye(m, dtype=tf.float64), name='fu_scale'), dtype=tf.float32) u = MultivariateNormalTriL(loc=fu_loc, scale_tril=fu_scale, name='pu') x_var = Normal(loc=tf.zeros((M, Q)), scale=1.0, name='x_var') idx_ph = tf.placeholder(tf.int32, M) z = tf.constant(z_init, dtype=tf.float32) x = tf.concat([x_var, tf.gather(z, idx_ph)], 1, name='x') print(x.shape) Kfu = kernelfx(x, xu) Kff = kernelfx(x, x) Kuuinv = tf.matrix_inverse(Kuu + offset * tf.eye(m, dtype=tf.float64)) KfuKuuinv = tf.matmul(Kfu, Kuuinv) KffKuuinvU = [
# Build graph for prior distributions # if str(sys.argv[5]) == 'laplace': W_0 = Laplace(loc=tf.zeros([D, n_hidden]), scale=std**2 / D * tf.ones([D, n_hidden])) W_1 = Laplace(loc=tf.zeros([n_hidden, n_hidden]), scale=std**2 / n_hidden * tf.ones([n_hidden, n_hidden])) W_2 = Laplace(loc=tf.zeros([n_hidden, K]), scale=std**2 / n_hidden * tf.ones([n_hidden, K])) b_0 = Laplace(loc=tf.zeros(n_hidden), scale=std**2 / D * tf.ones(n_hidden)) b_1 = Laplace(loc=tf.zeros(n_hidden), scale=std**2 / n_hidden * tf.ones(n_hidden)) b_2 = Laplace(loc=tf.zeros(K), scale=std**2 / n_hidden * tf.ones(K)) if str(sys.argv[5]) == 'normal': W_0 = Normal(loc=tf.zeros([D, n_hidden]), scale=std * D**(-.5) * tf.ones([D, n_hidden])) W_1 = Normal(loc=tf.zeros([n_hidden, n_hidden]), scale=std * n_hidden**(-.5) * tf.ones([n_hidden, n_hidden])) W_2 = Normal(loc=tf.zeros([n_hidden, K]), scale=std * n_hidden**(-.5) * tf.ones([n_hidden, K])) b_0 = Normal(loc=tf.zeros(n_hidden), scale=std * D**(-.5) * tf.ones(n_hidden)) b_1 = Normal(loc=tf.zeros(n_hidden), scale=std * n_hidden**(-.5) * tf.ones(n_hidden)) b_2 = Normal(loc=tf.zeros(K), scale=std * n_hidden**(-.5) * tf.ones(K)) if str(sys.argv[5]) == 'T': W_0 = StudentT(df=df * tf.ones([D, n_hidden]), loc=tf.zeros([D, n_hidden]), scale=std**2 / D * tf.ones([D, n_hidden])) W_1 = StudentT(df=df * tf.ones([n_hidden, n_hidden]),
from tensorflow.examples.tutorials.mnist import input_data from edward.models import Categorical, Normal import edward as ed # Use the TensorFlow method to download and/or load the data. mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) # parameters N = 128 # number of images in a minibatch. D = 784 # number of features. K = 10 # number of classes. # Create a placeholder to hold the data (in minibatches) in a TensorFlow graph. x = tf.placeholder(tf.float32, [None, D]) # Normal(0,1) priors for the variables. Note that the syntax assumes TensorFlow 1.1. w1 = Normal(loc=tf.zeros([D, 256]), scale=tf.ones([D, 256])) b1 = Normal(loc=tf.zeros(256), scale=tf.ones(256)) l1 = tf.nn.leaky_relu(tf.matmul(x, w1) + b1) w2 = Normal(loc=tf.zeros([256, 256]), scale=tf.ones([256, 256])) b2 = Normal(loc=tf.zeros(256), scale=tf.ones(256)) l2 = tf.nn.leaky_relu(tf.matmul(l1, w2) + b2) w3 = Normal(loc=tf.zeros([256, K]), scale=tf.ones([256, K])) b3 = Normal(loc=tf.zeros(K), scale=tf.ones(K)) # Categorical likelihood for classication. y = Categorical(tf.matmul(l2, w3) + b3) # Contruct the q(w) and q(b). in this case we assume Normal distributions. qw1 = Normal(loc=tf.Variable(tf.random_normal([D, 256])),
ed.set_seed(42) n_students = 50000 n_questions = 2000 n_obs = 200000 # DATA data, true_s_etas, true_q_etas = build_toy_dataset(n_students, n_questions, n_obs) obs = data['outcomes'].values student_ids = data['student_id'].values.astype(int) question_ids = data['question_id'].values.astype(int) # MODEL lnvar_students = Normal(loc=tf.zeros(1), scale=tf.ones(1)) lnvar_questions = Normal(loc=tf.zeros(1), scale=tf.ones(1)) sigma_students = tf.sqrt(tf.exp(lnvar_students)) sigma_questions = tf.sqrt(tf.exp(lnvar_questions)) overall_mu = Normal(loc=tf.zeros(1), scale=tf.ones(1)) student_etas = Normal(loc=tf.zeros(n_students), scale=sigma_students * tf.ones(n_students)) question_etas = Normal(loc=tf.zeros(n_questions), scale=sigma_questions * tf.ones(n_questions)) observation_logodds = tf.gather(student_etas, student_ids) + \ tf.gather(question_etas, question_ids) + \ overall_mu
# M: nb_datapoints # N: nb_components hpos = tf.reshape(hpos, (1, 2)) euclidean_distance = tf.square( tf.subtract( gpos, # shape=(M, 2) tf.expand_dims(hpos, axis=1) # shape=(N, 1, 2) ) # shape=(N, M, 2) ) distance_factor = tf.divide(1., euclidean_distance) # shape=(N, M, 2) mean = tf.reduce_sum(distance_factor, axis=(0, )) # shape=(M, 2) return mean # (x, y) ~ Normal([0.5, 0.5], [0.5, 0.5]) galaxies_pos = Normal(mu=tf.fill([nb_datapoints, nb_features], 0.5), sigma=tf.fill([nb_datapoints, nb_features], POS_STD)) # latent variable z mu = Normal(mu=tf.fill([nb_components, nb_features], 0.5), sigma=tf.fill([nb_components, nb_features], POS_STD)) sigma = InverseGamma(alpha=tf.ones([nb_components, nb_features]), beta=tf.ones([nb_components, nb_features])) cat = Categorical(logits=tf.zeros([nb_datapoints, nb_components])) components = [ MultivariateNormalDiag(mu=calculte_mean_from_distance_factor( galaxies_pos, mu[k]), diag_stdev=tf.ones([nb_datapoints, 1]) * sigma[k]) for k in range(nb_components) ] x = Mixture(cat=cat, components=components)
def define_ard_variational_distribution(D): qgamma = Normal(loc=tf.Variable(tf.random_normal([D])), scale=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) return qgamma
def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") x = tf.placeholder(tf.float32, shape=[N, 784], name="x_placeholder") #y_ = tf.placeholder("float", shape = [None, 10]) y_ = tf.placeholder(tf.int32, [N], name="y_placeholder") x_image = tf.reshape(x, [-1, 28, 28, 1]) with tf.name_scope("model"): W_conv1 = Normal(loc=tf.zeros([5, 5, 1, 32]), scale=tf.ones([5, 5, 1, 32]), name="W_conv1") b_conv1 = Normal(loc=tf.zeros([32]), scale=tf.ones([32]), name="b_conv1") h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1.value()) + b_conv1.value() ) may be necessary h_pool1 = max_pool_2x2(h_conv1) W_conv2 = Normal(loc=tf.zeros([5, 5, 32, 64]), scale=tf.ones([5, 5, 32, 64]), name="W_conv2") b_conv2 = Normal(loc=tf.zeros([64]), scale=tf.ones([64]), name="b_conv2") h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) W_fc1 = Normal(loc=tf.zeros([7 * 7 * 64, 64]), scale=tf.ones([7 * 7 * 64, 64]),
ed.set_seed(142) N = 5000 # number of data points M = 100 # minibatch size D = 2 # data dimensionality K = 1 # latent dimensionality # DATA x_train = build_toy_dataset(N, D, K) # MODEL w = Normal(mu=tf.zeros([D, K]), sigma=10.0 * tf.ones([D, K])) z = Normal(mu=tf.zeros([M, K]), sigma=tf.ones([M, K])) x = Normal(mu=tf.matmul(w, z, transpose_b=True), sigma=tf.ones([D, M])) # INFERENCE qw_variables = [ tf.Variable(tf.random_normal([D, K])), tf.Variable(tf.random_normal([D, K])) ] qw = Normal(mu=qw_variables[0], sigma=tf.nn.softplus(qw_variables[1])) qz_variables = [ tf.Variable(tf.random_normal([N, K])), tf.Variable(tf.random_normal([N, K])) ]
def train(self, X_train, y_train, X_val, is_print=True): ''' set up BNN and run HMC inference ''' def neural_network(X): # set up the BNN structure using tf if self.activation_fn == 'relu': h = tf.maximum(tf.matmul(X, W_0) + b_0, 0) # relu elif self.activation_fn == 'Lrelu': a = 0.2 h = tf.maximum( tf.matmul(X, W_0) + b_0, a * (tf.matmul(X, W_0) + b_0)) # leakly relu elif self.activation_fn == 'erf': h = tf.erf(tf.matmul(X, W_0) + b_0) elif self.activation_fn == 'tanh': h = tf.tanh(tf.matmul(X, W_0) + b_0) # h = tf.tanh(1.23*tf.matmul(X, W_0) + b_0) # add 1.23 for close to GP erf elif self.activation_fn == 'sigmoid': h = tf.sigmoid(tf.matmul(X, W_0) + b_0) elif self.activation_fn == 'softplus': self.c = 2. # if this is bigger -> relu behaviour, but less 'soft' h = tf.divide( tf.log( tf.exp(tf.multiply(tf.matmul(X, W_0) + b_0, c)) + 1), c) elif self.activation_fn == 'rbf': self.beta_2 = 1 / (2 * self.g_var) h = tf.exp(-self.beta_2 * tf.square(X - W_0)) h = tf.matmul(h, W_1) #+ b_1 return tf.reshape(h, [-1]) def neural_network_deep(X): # set up the BNN structure using tf if self.activation_fn == 'relu': h1 = tf.maximum(tf.matmul(X, W_0) + b_0, 0) # relu h = tf.maximum(tf.matmul(h1, W_1) + b_1, 0) # relu elif self.activation_fn == 'Lrelu': a = 0.2 h1 = tf.maximum( tf.matmul(X, W_0) + b_0, a * (tf.matmul(X, W_0) + b_0)) # leakly relu h = tf.maximum( tf.matmul(h1, W_1) + b_1, a * (tf.matmul(h1, W_1) + b_1)) # leakly relu elif self.activation_fn == 'erf': h1 = tf.erf(tf.matmul(X, W_0) + b_0) h = tf.erf(tf.matmul(h1, W_1) + b_1) else: raise Exception('tp: activation not implemented') h = tf.matmul(h, W_2) #+ b_2 return tf.reshape(h, [-1]) if self.activation_fn == 'relu' or self.activation_fn == 'softplus' or self.activation_fn == 'Lrelu': init_stddev_0_w = np.sqrt(self.w_0_var) # /d_in init_stddev_0_b = np.sqrt(self.b_0_var) # /d_in init_stddev_1_w = 1.0 / np.sqrt( self.hidden_size) #*np.sqrt(10) # 2nd layer init. dist elif self.activation_fn == 'tanh' or self.activation_fn == 'erf': init_stddev_0_w = np.sqrt( self.w_0_var) # 1st layer init. dist for weights init_stddev_0_b = np.sqrt(self.b_0_var) # for bias init_stddev_1_w = 1.0 / np.sqrt( self.hidden_size) # 2nd layer init. dist elif self.activation_fn == 'rbf': init_stddev_0_w = np.sqrt(self.u_var) # centres = sig_u init_stddev_0_b = np.sqrt(self.g_var) # fixed /beta init_stddev_1_w = 1.0 / np.sqrt( self.hidden_size) # 2nd layer init. dist n = X_train.shape[0] X_dim = X_train.shape[1] y_dim = 1 #y_train.shape[1] with tf.name_scope("model"): W_0 = Normal(loc=tf.zeros([X_dim, self.hidden_size]), scale=init_stddev_0_w * tf.ones([X_dim, self.hidden_size]), name="W_0") if self.deep_NN == False: W_1 = Normal(loc=tf.zeros([self.hidden_size, y_dim]), scale=init_stddev_1_w * tf.ones([self.hidden_size, y_dim]), name="W_1") b_0 = Normal(loc=tf.zeros(self.hidden_size), scale=init_stddev_0_b * tf.ones(self.hidden_size), name="b_0") b_1 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_1") else: W_1 = Normal( loc=tf.zeros([self.hidden_size, self.hidden_size]), scale=init_stddev_1_w * tf.ones([self.hidden_size, y_dim]), name="W_1") b_0 = Normal(loc=tf.zeros(self.hidden_size), scale=init_stddev_0_b * tf.ones(self.hidden_size), name="b_0") W_2 = Normal(loc=tf.zeros([self.hidden_size, y_dim]), scale=init_stddev_1_w * tf.ones([self.hidden_size, y_dim]), name="W_2") b_1 = Normal(loc=tf.zeros(self.hidden_size), scale=init_stddev_1_w * tf.ones(self.hidden_size), name="b_1") b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_2") X = tf.placeholder(tf.float32, [n, X_dim], name="X") if self.deep_NN == False: y = Normal(loc=neural_network(X), scale=np.sqrt(self.data_noise) * tf.ones(n), name="y") else: y = Normal(loc=neural_network_deep(X), scale=np.sqrt(self.data_noise) * tf.ones(n), name="y") # inference if self.deep_NN == False: qW_0 = Empirical( tf.Variable(tf.zeros([self.n_samples, X_dim, self.hidden_size]))) qW_1 = Empirical( tf.Variable(tf.zeros([self.n_samples, self.hidden_size, y_dim]))) qb_0 = Empirical( tf.Variable(tf.zeros([self.n_samples, self.hidden_size]))) qb_1 = Empirical(tf.Variable(tf.zeros([self.n_samples, y_dim]))) else: qW_0 = Empirical( tf.Variable(tf.zeros([self.n_samples, X_dim, self.hidden_size]))) qW_1 = Empirical( tf.Variable( tf.zeros( [self.n_samples, self.hidden_size, self.hidden_size]))) qW_2 = Empirical( tf.Variable(tf.zeros([self.n_samples, self.hidden_size, y_dim]))) qb_0 = Empirical( tf.Variable(tf.zeros([self.n_samples, self.hidden_size]))) qb_1 = Empirical( tf.Variable(tf.zeros([self.n_samples, self.hidden_size]))) qb_2 = Empirical(tf.Variable(tf.zeros([self.n_samples, y_dim]))) # get some priors ### !!! TODO, turn this into a proper function # X_pred = X_val.astype(np.float32).reshape((X_val.shape[0], 1)) # self.y_priors = tf.stack([nn_predict(X_pred, W_0.sample(), W_1.sample(),b_0.sample(), b_1.sample()) # for _ in range(10)]) # Neal 2012 # Too large a stepsize will result in a very low acceptance rate for states # proposed by simulating trajectories. Too small a stepsize will either waste # computation time, by the same factor as the stepsize is too small, or (worse) # will lead to slow exploration by a random walk, # https://stats.stackexchange.com/questions/304942/how-to-set-step-size-in-hamiltonian-monte-carlo # If ϵ is too large, then there will be large discretisation error and low acceptance, if ϵ # is too small then more expensive leapfrog steps will be required to move large distances. # Ideally we want the largest possible value of ϵ # that gives reasonable acceptance probability. Unfortunately this may vary for different values of the target variable. # A simple heuristic to set this may be to do a preliminary run with fixed L, # gradually increasing ϵ until the acceptance probability is at an appropriate level. # Setting the trajectory length by trial and error therefore seems necessary. # For a problem thought to be fairly difficult, a trajectory with L = 100 might be a # suitable starting point. If preliminary runs (with a suitable ε; see above) show that HMC # reaches a nearly independent point after only one iteration, a smaller value of L might be # tried next. (Unless these “preliminary” runs are actually sufficient, in which case there is # of course no need to do more runs.) If instead there is high autocorrelation in the run # with L = 100, runs with L = 1000 might be tried next # It may also be advisable to randomly sample ϵ # and L form suitable ranges to avoid the possibility of having paths that are close to periodic as this would slow mixing. if self.deep_NN == False: inference = ed.HMC({ W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1 }, data={ X: X_train, y: y_train.ravel() }) else: inference = ed.HMC( { W_0: qW_0, b_0: qb_0, W_1: qW_1, b_1: qb_1, W_2: qW_2, b_2: qb_2 }, data={ X: X_train, y: y_train.ravel() }) inference.run(step_size=self.step_size, n_steps=self.n_steps) # logdir='log' # drop first chunk of burn in samples if self.deep_NN == False: self.qW_0_keep = qW_0.params[self.burn_in:].eval() self.qW_1_keep = qW_1.params[self.burn_in:].eval() self.qb_0_keep = qb_0.params[self.burn_in:].eval() self.qb_1_keep = qb_1.params[self.burn_in:].eval() else: self.qW_0_keep = qW_0.params[self.burn_in:].eval() self.qW_1_keep = qW_1.params[self.burn_in:].eval() self.qb_0_keep = qb_0.params[self.burn_in:].eval() self.qW_2_keep = qW_2.params[self.burn_in:].eval() self.qb_1_keep = qb_1.params[self.burn_in:].eval() self.qb_2_keep = qb_2.params[self.burn_in:].eval() return
s_ph = tf.placeholder(tf.int32, [None]) #学生编号category d_ph = tf.placeholder(tf.int32, [None]) #教师编号category dept_ph = tf.placeholder(tf.int32, [None]) #部门编号category service_ph = tf.placeholder(tf.float32, [None]) #二值项,固定特征 #固定特征参数项 mu = tf.Variable(tf.random_normal([])) #Bf service = tf.Variable(tf.random_normal([])) #beta #随机特征截距的参数 sigma_s = tf.sqrt(tf.exp(tf.Variable(tf.random_normal([])))) #学生Bs的方差 sigma_d = tf.sqrt(tf.exp(tf.Variable(tf.random_normal([])))) #教师Bd的方差 sigma_dept = tf.sqrt(tf.exp(tf.Variable(tf.random_normal([])))) #部门Bdept方差 # 随机特征截距 eta_s = Normal(loc=tf.zeros(n_s), scale=sigma_s * tf.ones(n_s)) eta_d = Normal(loc=tf.zeros(n_d), scale=sigma_d * tf.ones(n_d)) eta_dept = Normal(loc=tf.zeros(n_dept), scale=sigma_dept * tf.ones(n_dept)) #随机特征项+固定特征项 yhat = tf.gather(eta_s, s_ph) + \ tf.gather(eta_d, d_ph) + \ tf.gather(eta_dept, dept_ph) + \ mu + service * service_ph #这里tf.gather实际作用是样本采样 https://blog.csdn.net/guotong1988/article/details/53172882 y = Normal(loc=yhat, scale=tf.ones(n_obs)) q_eta_s = Normal(loc=tf.get_variable("q_eta_s/loc", [n_s]), scale=tf.nn.softplus(tf.get_variable("q_eta_s/scale", [n_s]))) q_eta_d = Normal(loc=tf.get_variable("q_eta_d/loc", [n_d]), scale=tf.nn.softplus(tf.get_variable("q_eta_d/scale", [n_d]))) q_eta_dept = Normal(loc=tf.get_variable("q_eta_dept/loc", [n_dept]),
plt.axis([-3, 3, -3, 3]) plt.title("Simulated dataset") plt.show() K = 2 D = 2 model = MixtureGaussian(K, D) qpi_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K]))) qmu_mu = tf.Variable(tf.random_normal([K * D])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qsigma_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qsigma_beta = tf.nn.softplus(tf.Variable(tf.random_normal([K * D]))) qpi = Dirichlet(alpha=qpi_alpha) qmu = Normal(mu=qmu_mu, sigma=qmu_sigma) qsigma = InverseGamma(alpha=qsigma_alpha, beta=qsigma_beta) data = {'x': x_train} inference = ed.MFVI({'pi': qpi, 'mu': qmu, 'sigma': qsigma}, data, model) inference.run(n_iter=2500, n_samples=10, n_minibatch=20) # Average per-cluster and per-data point likelihood over many posterior samples. log_liks = [] for s in range(100): zrep = { 'pi': qpi.sample(()), 'mu': qmu.sample(()), 'sigma': qsigma.sample(()) } log_liks += [model.predict(data, zrep)]
def main(argv): del argv x_train, components = build_toy_dataset(N) n_examples, n_features = x_train.shape # save the target outdir = setup_outdir() np.savez(os.path.join(outdir, 'target_dist.npz'), pi=pi, mus=mus, stds=stds) weights, comps = [], [] elbos = [] relbo_vals = [] times = [] for iter in range(FLAGS.n_fw_iter): g = tf.Graph() with g.as_default(): tf.set_random_seed(FLAGS.seed) sess = tf.InteractiveSession() with sess.as_default(): # build model xcomps = [ Normal(loc=tf.convert_to_tensor(mus[i]), scale=tf.convert_to_tensor(stds[i])) for i in range(len(mus)) ] x = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi)), components=xcomps, sample_shape=N) qx = construct_normal([n_features], iter, 'qx') if iter > 0: qtx = Mixture( cat=Categorical(probs=tf.convert_to_tensor(weights)), components=[ Normal( loc=c['loc'][0], #scale_diag=tf.nn.softplus(c['scale_diag'])) for c in comps], sample_shape=N) scale=c['scale_diag'][0]) for c in comps ], sample_shape=N) fw_iterates = {x: qtx} else: fw_iterates = {} sess.run(tf.global_variables_initializer()) total_time = 0 start_inference_time = time.time() inference = relbo.KLqp({x: qx}, fw_iterates=fw_iterates, fw_iter=iter) inference.run(n_iter=FLAGS.LMO_iter) end_inference_time = time.time() total_time += end_inference_time - start_inference_time if iter > 0: relbo_vals.append(-utils.compute_relbo( qx, fw_iterates[x], x, np.log(iter + 1))) if iter == 0: gamma = 1. elif iter > 0 and FLAGS.fw_variant == 'fixed': gamma = 2. / (iter + 2.) elif iter > 0 and FLAGS.fw_variant == 'line_search': start_line_search_time = time.time() gamma = line_search_dkl(weights, [c['loc'] for c in comps], [c['scale_diag'] for c in comps], qx.loc.eval(), qx.stddev().eval(), x, iter) end_line_search_time = time.time() total_time += end_line_search_time - start_line_search_time elif iter > 0 and FLAGS.fw_variant == 'fc': gamma = 2. / (iter + 2.) comps.append({ 'loc': qx.mean().eval(), 'scale_diag': qx.stddev().eval() }) weights = utils.update_weights(weights, gamma, iter) print("weights", weights) print("comps", [c['loc'] for c in comps]) print("scale_diags", [c['scale_diag'] for c in comps]) q_latest = Mixture( cat=Categorical(probs=tf.convert_to_tensor(weights)), components=[MultivariateNormalDiag(**c) for c in comps], sample_shape=N) if FLAGS.fw_variant == "fc": start_fc_time = time.time() weights = fully_corrective(q_latest, x) weights = list(weights) for i in reversed(range(len(weights))): w = weights[i] if w == 0: del weights[i] del comps[i] weights = np.array(weights) end_fc_time = time.time() total_time += end_fc_time - start_fc_time q_latest = Mixture( cat=Categorical(probs=tf.convert_to_tensor(weights)), components=[MultivariateNormalDiag(**c) for c in comps], sample_shape=N) elbos.append(elbo(q_latest, x)) outdir = setup_outdir() print("total time", total_time) times.append(float(total_time)) utils.save_times(os.path.join(outdir, 'times.csv'), times) elbos_filename = os.path.join(outdir, 'elbos.csv') logger.info("iter, %d, elbo, %.2f +/- %.2f" % (iter, *elbos[-1])) np.savetxt(elbos_filename, elbos, delimiter=',') logger.info("saving elbos to, %s" % elbos_filename) relbos_filename = os.path.join(outdir, 'relbos.csv') np.savetxt(relbos_filename, relbo_vals, delimiter=',') logger.info("saving relbo values to, %s" % relbos_filename) for_serialization = { 'locs': np.array([c['loc'] for c in comps]), 'scale_diags': np.array([c['scale_diag'] for c in comps]) } qt_outfile = os.path.join(outdir, 'qt_iter%d.npz' % iter) np.savez(qt_outfile, weights=weights, **for_serialization) np.savez(os.path.join(outdir, 'qt_latest.npz'), weights=weights, **for_serialization) logger.info("saving qt to, %s" % qt_outfile) tf.reset_default_graph()
return x, y ed.set_seed(42) N = 40 # number of data points D = 10 # number of features # DATA coeff = np.random.randn(D) X_train, y_train = build_toy_dataset(N, coeff) X_test, y_test = build_toy_dataset(N, coeff) # MODEL X = tf.placeholder(tf.float32, [N, D]) w = Normal(mu=tf.zeros(D), sigma=tf.ones(D)) b = Normal(mu=tf.zeros(1), sigma=tf.ones(1)) y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N)) # INFERENCE qw = Normal(mu=tf.Variable(tf.random_normal([D])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D])))) qb = Normal(mu=tf.Variable(tf.random_normal([1])), sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1])))) data = {X: X_train, y: y_train} inference = ed.KLqp({w: qw, b: qb}, data) inference.run(n_samples=5, n_iter=250) # CRITICISM y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()})
from scipy.stats import norm def build_toy_dataset(N, noise_std=0.1): X = np.concatenate( [np.linspace(0, 2, num=N / 2), np.linspace(6, 8, num=N / 2)]) y = 5.0 * X + norm.rvs(0, noise_std, size=N) X = X.reshape((N, 1)) return X.astype(np.float32), y.astype(np.float32) N = 40 # num data points p = 1 # num features ed.set_seed(42) X_data, y_data = build_toy_dataset(N) X = X_data beta = Normal(mu=tf.zeros(p), sigma=tf.ones(p)) y = Normal(mu=ed.dot(X, beta), sigma=tf.ones(N)) qmu_mu = tf.Variable(tf.random_normal([p])) qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([p]))) qbeta = Normal(mu=qmu_mu, sigma=qmu_sigma) data = {y: y_data} inference = ed.MFVI({beta: qbeta}, data) inference.run(n_iter=500)
def main(_): # setting up output directory outdir = os.path.expanduser(FLAGS.outdir) os.makedirs(outdir, exist_ok=True) N, M, D, R_true, I_train, I_test = get_data() debug('N, M, D', N, M, D) # Solution components weights, qUVt_components = [], [] # Files to log metrics times_filename = os.path.join(outdir, 'times.csv') mse_train_filename = os.path.join(outdir, 'mse_train.csv') mse_test_filename = os.path.join(outdir, 'mse_test.csv') ll_test_filename = os.path.join(outdir, 'll_test.csv') ll_train_filename = os.path.join(outdir, 'll_train.csv') elbos_filename = os.path.join(outdir, 'elbos.csv') gap_filename = os.path.join(outdir, 'gap.csv') step_filename = os.path.join(outdir, 'steps.csv') # 'adafw', 'ada_afw', 'ada_pfw' if FLAGS.fw_variant.startswith('ada'): lipschitz_filename = os.path.join(outdir, 'lipschitz.csv') iter_info_filename = os.path.join(outdir, 'iter_info.txt') start = 0 if FLAGS.restore: #start = 50 #qUVt_components = get_random_components(D, N, M, start) #weights = np.random.dirichlet([1.] * start).astype(np.float32) #lipschitz_estimate = opt.adafw_linit() parameters = np.load(os.path.join(outdir, 'qt_latest.npz')) weights = list(parameters['weights']) start = parameters['fw_iter'] qUVt_components = list(parameters['comps']) assert len(weights) == len(qUVt_components), "Inconsistent storage" # get lipschitz estimate from the file, could've stored it # in params but that would mean different saved file for # adaptive variants if FLAGS.fw_variant.startswith('ada'): lipschitz_filename = os.path.join(outdir, 'lipschitz.csv') if not os.path.isfile(lipschitz_filename): raise ValueError("Inconsistent storage") with open(lipschitz_filename, 'r') as f: l = f.readlines() lipschitz_estimate = float(l[-1].strip()) else: # empty the files present in the folder already open(times_filename, 'w').close() open(mse_train_filename, 'w').close() open(mse_test_filename, 'w').close() open(ll_test_filename, 'w').close() open(ll_train_filename, 'w').close() open(elbos_filename, 'w').close() open(gap_filename, 'w').close() open(step_filename, 'w').close() # 'adafw', 'ada_afw', 'ada_pfw' if FLAGS.fw_variant.startswith('ada'): open(lipschitz_filename, 'w').close() open(iter_info_filename, 'w').close() for t in range(start, start + FLAGS.n_fw_iter): g = tf.Graph() with g.as_default(): tf.set_random_seed(FLAGS.seed) sess = tf.InteractiveSession() with sess.as_default(): # MODEL I = tf.placeholder(tf.float32, [N, M]) scale_uv = tf.concat( [tf.ones([D, N]), tf.ones([D, M])], axis=1) mean_uv = tf.concat( [tf.zeros([D, N]), tf.zeros([D, M])], axis=1) UV = Normal(loc=mean_uv, scale=scale_uv) R = Normal(loc=tf.matmul(tf.transpose(UV[:, :N]), UV[:, N:]), scale=tf.ones([N, M])) # generator dist. for matrix R_mask = R * I # generated masked matrix p_joint = Joint(R_true, I_train, sess, D, N, M) if t == 0: fw_iterates = {} else: # Current solution prev_components = [ coreutils.base_loc_scale('mvn0', c['loc'], c['scale'], multivariate=False) for c in qUVt_components ] qUV_prev = coreutils.get_mixture(weights, prev_components) fw_iterates = {UV: qUV_prev} # LMO (via relbo INFERENCE) mean_suv = tf.concat([ tf.get_variable("qU/loc", [D, N]), tf.get_variable("qV/loc", [D, M]) ], axis=1) scale_suv = tf.concat([ tf.nn.softplus(tf.get_variable("qU/scale", [D, N])), tf.nn.softplus(tf.get_variable("qV/scale", [D, M])) ], axis=1) sUV = Normal(loc=mean_suv, scale=scale_suv) #inference = relbo.KLqp({UV: sUV}, data={R: R_true, I: I_train}, inference = relbo.KLqp({UV: sUV}, data={ R_mask: R_true, I: I_train }, fw_iterates=fw_iterates, fw_iter=t) inference.run(n_iter=FLAGS.LMO_iter) loc_s = sUV.mean().eval() scale_s = sUV.stddev().eval() # sUV is batched distrbution, there are issues making # Mixture with batch distributions. mvn0 # with event size (D, N + M) and batch size () # NOTE log_prob(sample) still returns tensor # mvn and multivariatenormaldiag work for 1-D not 2-D shapes sUV_mv = coreutils.base_loc_scale('mvn0', loc_s, scale_s, multivariate=False) # TODO send sUV or sUV_mv as argument to step size? sample # works the same way. same with log_prob total_time = 0. data = {R: R_true, I: I_train} if t == 0: gamma = 1. lipschitz_estimate = opt.adafw_linit() step_type = 'init' elif FLAGS.fw_variant == 'fixed': start_step_time = time.time() step_result = opt.fixed(weights, qUVt_components, qUV_prev, loc_s, scale_s, sUV, p_joint, data, t) end_step_time = time.time() total_time += float(end_step_time - start_step_time) elif FLAGS.fw_variant == 'line_search': start_step_time = time.time() step_result = opt.line_search_dkl(weights, qUVt_components, qUV_prev, loc_s, scale_s, sUV, p_joint, data, t) end_step_time = time.time() total_time += float(end_step_time - start_step_time) elif FLAGS.fw_variant == 'adafw': start_step_time = time.time() step_result = opt.adaptive_fw(weights, qUVt_components, qUV_prev, loc_s, scale_s, sUV, p_joint, data, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type == 'adaptive': lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'ada_pfw': start_step_time = time.time() step_result = opt.adaptive_pfw(weights, qUVt_components, qUV_prev, loc_s, scale_s, sUV, p_joint, data, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type in ['adaptive', 'drop']: lipschitz_estimate = step_result['l_estimate'] elif FLAGS.fw_variant == 'ada_afw': start_step_time = time.time() step_result = opt.adaptive_pfw(weights, qUVt_components, qUV_prev, loc_s, scale_s, sUV, p_joint, data, t, lipschitz_estimate) end_step_time = time.time() total_time += float(end_step_time - start_step_time) step_type = step_result['step_type'] if step_type in ['adaptive', 'away', 'drop']: lipschitz_estimate = step_result['l_estimate'] if t == 0: gamma = 1. weights.append(gamma) qUVt_components.append({'loc': loc_s, 'scale': scale_s}) new_components = [sUV_mv] else: qUVt_components = step_result['params'] weights = step_result['weights'] gamma = step_result['gamma'] new_components = [ coreutils.base_loc_scale('mvn0', c['loc'], c['scale'], multivariate=False) for c in qUVt_components ] qUV_new = coreutils.get_mixture(weights, new_components) #qR = Normal( # loc=tf.matmul( # tf.transpose(qUV_new[:, :N]), qUV_new[:, N:]), # scale=tf.ones([N, M])) qR = ed.copy(R, {UV: qUV_new}) cR = ed.copy(R_mask, {UV: qUV_new}) # reconstructed matrix # Log metrics for current iteration logger.info('total time %f' % total_time) append_to_file(times_filename, total_time) logger.info('iter %d, gamma %.4f' % (t, gamma)) append_to_file(step_filename, gamma) if t > 0: gap_t = step_result['gap'] logger.info('iter %d, gap %.4f' % (t, gap_t)) append_to_file(gap_filename, gap_t) # CRITICISM if FLAGS.fw_variant.startswith('ada'): append_to_file(lipschitz_filename, lipschitz_estimate) append_to_file(iter_info_filename, step_type) logger.info('lt = %.5f, iter_type = %s' % (lipschitz_estimate, step_type)) test_mse = ed.evaluate('mean_squared_error', data={ cR: R_true, I: I_test }) logger.info("iter %d ed test mse %.5f" % (t, test_mse)) append_to_file(mse_test_filename, test_mse) train_mse = ed.evaluate('mean_squared_error', data={ cR: R_true, I: I_train }) logger.info("iter %d ed train mse %.5f" % (t, train_mse)) append_to_file(mse_train_filename, train_mse) # very slow #train_ll = log_likelihood(qUV_new, R_true, I_train, sess, D, N, # M) train_ll = ed.evaluate('log_lik', data={ qR: R_true.astype(np.float32), I: I_train }) logger.info("iter %d train log lik %.5f" % (t, train_ll)) append_to_file(ll_train_filename, train_ll) #test_ll = log_likelihood(qUV_new, R_true, I_test, sess, D, N, M) test_ll = ed.evaluate('log_lik', data={ qR: R_true.astype(np.float32), I: I_test }) logger.info("iter %d test log lik %.5f" % (t, test_ll)) append_to_file(ll_test_filename, test_ll) # elbo_loss might be meaningless elbo_loss = elboModel.KLqp({UV: qUV_new}, data={ R: R_true, I: I_train }) elbo_t = elbo(qUV_new, p_joint) res_update = elbo_loss.run() logger.info('iter %d -elbo loss %.2f or %.2f' % (t, res_update['loss'], elbo_t)) append_to_file(elbos_filename, "%f,%f" % (elbo_t, res_update['loss'])) # serialize the current iterate np.savez(os.path.join(outdir, 'qt_latest.npz'), weights=weights, comps=qUVt_components, fw_iter=t + 1) sess.close() tf.reset_default_graph()