예제 #1
0
 def log_joint(observed):
     model, _, _ = module.p_Y_Xw(observed, X, DROP_RATE,
                                 n_basis, net_sizes,
                                 n_samples, task)
     log_py_xw = model.local_log_prob('y')
     log_j = zs.log_mean_exp(log_py_xw, 0) * N
     if (len(w_names)):
         log_pws = model.local_log_prob(w_names)
         log_j += tf.add_n(log_pws)
     return log_j
예제 #2
0
def log_likelihood(log_py_xw, std_y_train):
    """ Log Likelihood.
    :param log_py_xw: [n_particles, batch_size] or [batch_size]
    :param std_y_train: float
    :return : tensor of shape []. RMSE.
    """
    rank = len(log_py_xw.get_shape())
    if rank == 1:
        log_py_xw = tf.expand_dims(log_py_xw, [0])
    ll = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - tf.log(std_y_train)
    return ll
    def forward(self, observed, reduce_mean=True):
        nodes_q = self.variational(observed).nodes

        _v_inputs = {k: v.tensor for k, v in nodes_q.items()}
        _observed = {**_v_inputs, **observed}

        nodes_p = self.generator(_observed).nodes

        logpxz = self.log_joint(nodes_p)
        logqz = self.log_joint(nodes_q)
        lower_bound = logpxz - logqz

        if self._axis is not None:
            lower_bound = log_mean_exp(lower_bound, self._axis)

        if reduce_mean:
            return fluid.layers.reduce_mean(-lower_bound)
        else:
            return -lower_bound
예제 #4
0
def main(hps):
    tf.set_random_seed(hps.seed)
    np.random.seed(hps.seed)

    # Load data
    data_path = os.path.join(hps.data_dir, hps.dataset + '.data')
    data_func = dataset.data_dict()[hps.dataset]
    x_train, y_train, x_valid, y_valid, x_test, y_test = data_func(data_path)
    x_train = np.vstack([x_train, x_valid])
    y_train = np.hstack([y_train, y_valid])
    n_train, x_dim = x_train.shape
    x_train, x_test, mean_x_train, std_x_train = dataset.standardize(
        x_train, x_test)
    y_train, y_test, mean_y_train, std_y_train = dataset.standardize(
        y_train, y_test)

    # Define model parameters
    n_hiddens = hps.layers 

    # Build the computation graph
    x = tf.placeholder(tf.float32, shape=[None, x_dim])
    y = tf.placeholder(tf.float32, shape=[None])
    layer_sizes = [x_dim] + n_hiddens + [1]
    w_names = ["w" + str(i) for i in range(len(layer_sizes) - 1)]

    meta_model = build_model(x, layer_sizes, hps.n_particles, hps.fix_variance)

    def log_joint(bn):
        log_pws = bn.cond_log_prob(w_names)
        log_py_xw = bn.cond_log_prob('y')
        return tf.add_n(log_pws) + tf.reduce_mean(log_py_xw, 1) * n_train

    meta_model.log_joint = log_joint

    latent = {}
    for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        buf = tf.get_variable(
            'buf_'+str(i),
            initializer=init_bnn_weight(hps.n_particles, n_in, n_out))
        latent['w'+str(i)] = buf

    hmc = zs.HMC(step_size=hps.lr, n_leapfrogs=10, adapt_step_size=True)
    sample_op, hmc_info = hmc.sample(meta_model, observed={'y': y}, latent=latent)

    var_bn = meta_model.observe(**latent)
    log_joint = var_bn.log_joint()
    optimizer = tf.train.AdamOptimizer(learning_rate=hps.lr)
    global_step = tf.get_variable(
        'global_step', initializer=0, trainable=False)
    opt_op = optimizer.minimize(
        -log_joint, var_list=[var_bn.y_logstd], global_step=global_step)

    # prediction: rmse & log likelihood
    y_mean = var_bn["y_mean"]
    y_pred = tf.reduce_mean(y_mean, 0)
    rmse = tf.sqrt(tf.reduce_mean((y_pred - y) ** 2)) * std_y_train
    log_py_xw = var_bn.cond_log_prob("y")
    log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \
        tf.log(std_y_train)
    ystd_avg = var_bn.y_logstd

    # Define training/evaluation parameters
    epochs = hps.n_epoch
    batch_size = hps.batch_size
    iters = int(np.ceil(x_train.shape[0] / float(batch_size)))
    test_freq = hps.test_freq

    # Run the inference
    dump_buf = []
    with wrapped_supervisor.create_sv(hps, global_step=global_step) as sv:
        sess = sv.sess_
        for epoch in range(1, epochs + 1):
            lbs = []
            perm = np.arange(x_train.shape[0])
            np.random.shuffle(perm)
            x_train = x_train[perm]
            y_train = y_train[perm]
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                y_batch = y_train[t * batch_size:(t + 1) * batch_size]
                _, _, accr = sess.run(
                    [sample_op, opt_op, hmc_info.acceptance_rate],
                    feed_dict={x: x_batch, y: y_batch})
                lbs.append(accr)
            if epoch % 10 == 0:
                print('Epoch {}: Acceptance rate = {}'.format(epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                test_rmse, test_ll = sess.run(
                    [rmse, log_likelihood],
                    feed_dict={x: x_test, y: y_test})
                print('>> TEST')
                print('>> Test rmse = {}, log_likelihood = {}'
                      .format(test_rmse, test_ll))

            if epoch>epochs //3 and epoch % hps.dump_freq == 0:
                dump_buf.append(sess.run(var_bn['y_mean'], {x:x_test, y: y_test}))

        if len(hps.dump_pred_dir) > 0:
            pred_out = sess.run([var_bn['y_mean'], var_bn.y_logstd], {x: x_test, y: y_test})
            pred_out[0] = np.concatenate(dump_buf, axis=0)
            pred_out[0] = pred_out[0] * std_y_train + mean_y_train
            pred_out[1] = np.exp(pred_out[1])
            f = lambda a, b: [a*std_x_train + mean_x_train, b*std_y_train + mean_y_train]
            todump = pred_out + f(x_test, y_test) + f(x_train, y_train)
            with open(hps.dump_pred_dir, 'wb') as fout:
                import pickle
                pickle.dump(todump, fout)
예제 #5
0
        log_joint, observed={'y': y_obs}, latent=latent, axis=0)
    cost = tf.reduce_mean(lower_bound.sgvb())
    lower_bound = tf.reduce_mean(lower_bound)

    learning_rate_ph = tf.placeholder(tf.float32, shape=[])
    optimizer = tf.train.AdamOptimizer(learning_rate_ph)
    infer_op = optimizer.minimize(cost)

    # prediction: rmse & log likelihood
    observed = dict((w_name, latent[w_name][0]) for w_name in w_names)
    observed.update({'y': y_obs})
    model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
    y_pred = tf.reduce_mean(y_mean, 0)
    rmse = tf.sqrt(tf.reduce_mean((y_pred - y) ** 2)) * std_y_train
    log_py_xw = model.local_log_prob('y')
    log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \
        tf.log(std_y_train)

    params = tf.trainable_variables()
    for i in params:
        print(i.name, i.get_shape())

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(1, epochs + 1):
            time_epoch = -time.time()
            if epoch % anneal_lr_freq == 0:
                learning_rate *= anneal_lr_rate
            lbs = []
            for t in range(iters):
예제 #6
0
    log_qWs = [log_qW / x_train.shape[0] for log_qW in log_qWs]
    W_dict = dict(zip(W_names, zip(qW_samples, log_qWs)))
    # wdict of the form {'W_names': [qW_samples, log_qWs] } input to the ELBO
    lower_bound = zs.variational.elbo(log_joint, {'y': y_obs}, W_dict, axis=0)
    cost = tf.reduce_mean(lower_bound.sgvb())
    lower_bound = tf.reduce_mean(lower_bound)

    # Predictions
    model, h_pred = var_dropout(dict(zip(W_names, qW_samples)), x_obs, n,
                                net_size, n_particles, is_training)
    h_pred = tf.reduce_mean(tf.nn.softmax(h_pred), 0)
    y_pred = tf.argmax(h_pred, 1, output_type=tf.int32)
    acc = tf.reduce_mean(tf.cast(tf.equal(y_pred, y), tf.float32))

    log_py_xw = model.local_log_prob('y')
    log_likelihood = zs.log_mean_exp(log_py_xw, 0)

    optimizer = tf.train.AdamOptimizer(learning_rate_ph, epsilon=1e-4)
    infer = optimizer.minimize(cost)

    params = tf.trainable_variables()
    for i in params:
        print('variable name = {}, shape = {}'.format(i.name, i.get_shape()))

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(1, epochs + 1):
            if epoch % anneal_lr_freq == 0:
                learning_rate *= anneal_lr_rate
            time_epoch = -time.time()
예제 #7
0
def main():
    np.random.seed(1234)
    tf.set_random_seed(1237)

    # Load UCI Boston housing data
    data_path = os.path.join(conf.data_dir, 'housing.data')
    x_train, y_train, x_valid, y_valid, x_test, y_test = \
        dataset.load_uci_boston_housing(data_path)
    N, n_x = x_train.shape

    # Standardize data
    x_train, x_test, _, _ = dataset.standardize(x_train, x_test)
    y_train, y_test, mean_y_train, std_y_train = dataset.standardize(
        y_train, y_test)

    # Define model parameters
    n_hiddens = [50]

    @zs.reuse('model')
    def bayesianNN(observed, x, n_x, layer_sizes, n_particles):
        with zs.BayesianNet(observed=observed) as model:
            ws = []
            for i, (n_in,
                    n_out) in enumerate(zip(layer_sizes[:-1],
                                            layer_sizes[1:])):
                w_mu = tf.zeros([1, n_out, n_in + 1])
                ws.append(
                    zs.Normal('w' + str(i),
                              w_mu,
                              std=1.,
                              n_samples=n_particles,
                              group_event_ndims=2))

            # forward
            ly_x = tf.expand_dims(
                tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1]), 3)
            for i in range(len(ws)):
                w = tf.tile(ws[i], [1, tf.shape(x)[0], 1, 1])
                ly_x = tf.concat(
                    [ly_x, tf.ones([n_particles,
                                    tf.shape(x)[0], 1, 1])], 2)
                ly_x = tf.matmul(w, ly_x) / \
                    tf.sqrt(tf.to_float(tf.shape(ly_x)[2]))
                if i < len(ws) - 1:
                    ly_x = tf.nn.relu(ly_x)

            y_mean = tf.squeeze(ly_x, [2, 3])
            y_logstd = tf.get_variable('y_logstd',
                                       shape=[],
                                       initializer=tf.constant_initializer(0.))
            y = zs.Normal('y', y_mean, logstd=y_logstd)

        return model, y_mean

    def mean_field_variational(layer_sizes, n_particles):
        with zs.BayesianNet() as variational:
            ws = []
            for i, (n_in,
                    n_out) in enumerate(zip(layer_sizes[:-1],
                                            layer_sizes[1:])):
                w_mean = tf.get_variable(
                    'w_mean_' + str(i),
                    shape=[1, n_out, n_in + 1],
                    initializer=tf.constant_initializer(0.))
                w_logstd = tf.get_variable(
                    'w_logstd_' + str(i),
                    shape=[1, n_out, n_in + 1],
                    initializer=tf.constant_initializer(0.))
                ws.append(
                    zs.Normal('w' + str(i),
                              w_mean,
                              logstd=w_logstd,
                              n_samples=n_particles,
                              group_event_ndims=2))
        return variational

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles')
    x = tf.placeholder(tf.float32, shape=[None, n_x])
    y = tf.placeholder(tf.float32, shape=[None])
    layer_sizes = [n_x] + n_hiddens + [1]
    w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)]

    def log_joint(observed):
        model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
        log_pws = model.local_log_prob(w_names)
        log_py_xw = model.local_log_prob('y')
        return tf.add_n(log_pws) + log_py_xw * N

    variational = mean_field_variational(layer_sizes, n_particles)
    qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True)
    latent = dict(zip(w_names, qw_outputs))
    y_obs = tf.tile(tf.expand_dims(y, 0), [n_particles, 1])
    lower_bound = tf.reduce_mean(
        zs.sgvb(log_joint, {'y': y_obs}, latent, axis=0))

    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    grads = optimizer.compute_gradients(-lower_bound)
    infer = optimizer.apply_gradients(grads)

    # prediction: rmse & log likelihood
    observed = dict((w_name, latent[w_name][0]) for w_name in w_names)
    observed.update({'y': y_obs})
    model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
    y_pred = tf.reduce_mean(y_mean, 0)
    rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train
    log_py_xw = model.local_log_prob('y')
    log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \
        tf.log(std_y_train)

    # Define training/evaluation parameters
    lb_samples = 10
    ll_samples = 5000
    epochs = 500
    batch_size = 10
    iters = int(np.floor(x_train.shape[0] / float(batch_size)))
    test_freq = 10

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(1, epochs + 1):
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                y_batch = y_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer, lower_bound],
                                 feed_dict={
                                     n_particles: lb_samples,
                                     x: x_batch,
                                     y: y_batch
                                 })
                lbs.append(lb)
            print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                test_lb, test_rmse, test_ll = sess.run(
                    [lower_bound, rmse, log_likelihood],
                    feed_dict={
                        n_particles: ll_samples,
                        x: x_test,
                        y: y_test
                    })
                print('>> TEST')
                print('>> lower bound = {}, rmse = {}, log_likelihood = {}'.
                      format(test_lb, test_rmse, test_ll))
예제 #8
0
def main():
    # tf.set_random_seed(1237)
    # np.random.seed(1234)
    hps = parser.parse_args()

    # Load data
    data_path = os.path.join(conf.data_dir, hps.dataset + '.data')
    data_func = getattr(dataset, 'load_uci_' + hps.dataset)
    x_train, y_train, x_valid, y_valid, x_test, y_test = data_func(data_path)
    x_train = np.vstack([x_train, x_valid])
    y_train = np.hstack([y_train, y_valid])
    n_train, n_covariates = x_train.shape
    hps.dtype = getattr(tf, hps.dtype)

    # Standardize data
    x_train, x_test, _, _ = dataset.standardize(x_train, x_test)
    y_train, y_test, mean_y_train, std_y_train = dataset.standardize(
        y_train, y_test)

    # Build model
    kernel = RBFKernel(n_covariates)
    x_ph = tf.placeholder(hps.dtype, [None, n_covariates], 'x')
    y_ph = tf.placeholder(hps.dtype, [None], 'y')
    z_pos = tf.get_variable('z/pos', [hps.n_z, n_covariates],
                            hps.dtype,
                            initializer=tf.random_uniform_initializer(-1, 1))
    n_particles_ph = n_particles_ph = tf.placeholder(tf.int32, [],
                                                     'n_particles')
    batch_size = tf.cast(tf.shape(x_ph)[0], hps.dtype)

    model = build_model(hps, kernel, z_pos, x_ph, n_particles_ph)
    variational = build_variational(hps, kernel, z_pos, x_ph, n_particles_ph)

    # ELBO = E_q log (p(y|fx)p(fx|fz)p(fz) / p(fx|fz)q(fz))
    # So we remove p(fx|fz) in both log_joint and latent
    def log_joint(bn):
        prior, log_py_given_fx = bn.cond_log_prob(['fz', 'y'])
        return prior + log_py_given_fx / batch_size * n_train

    model.log_joint = log_joint

    [var_fz, var_fx] = variational.query(['fz', 'fx'],
                                         outputs=True,
                                         local_log_prob=True)
    var_fx = (var_fx[0], tf.zeros_like(var_fx[1]))
    lower_bound = zs.variational.elbo(model,
                                      observed={'y': y_ph},
                                      latent={
                                          'fz': var_fz,
                                          'fx': var_fx
                                      },
                                      axis=0)
    cost = lower_bound.sgvb()
    optimizer = tf.train.AdamOptimizer(learning_rate=hps.lr)
    infer_op = optimizer.minimize(cost)

    # Prediction ops
    model = model.observe(fx=var_fx[0], y=y_ph)
    log_likelihood = model.cond_log_prob('y')
    std_y_train = tf.cast(std_y_train, hps.dtype)
    log_likelihood = zs.log_mean_exp(log_likelihood, 0) / batch_size - \
        tf.log(std_y_train)
    y_pred_mean = tf.reduce_mean(model['y'].distribution.mean, axis=0)
    pred_mse = tf.reduce_mean((y_pred_mean - y_ph)**2) * std_y_train**2

    def infer_step(sess, x_batch, y_batch):
        fd = {x_ph: x_batch, y_ph: y_batch, n_particles_ph: hps.n_particles}
        return sess.run([infer_op, lower_bound], fd)[1]

    def predict_step(sess, x_batch, y_batch):
        fd = {
            x_ph: x_batch,
            y_ph: y_batch,
            n_particles_ph: hps.n_particles_test
        }
        return sess.run([log_likelihood, pred_mse], fd)

    iters = int(np.ceil(x_train.shape[0] / float(hps.batch_size)))
    test_freq = 100
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(1, hps.n_epoch + 1):
            lbs = []
            indices = np.arange(x_train.shape[0])
            np.random.shuffle(indices)
            x_train = x_train[indices]
            y_train = y_train[indices]
            for t in range(iters):
                lb = infer_step(
                    sess, x_train[t * hps.batch_size:(t + 1) * hps.batch_size],
                    y_train[t * hps.batch_size:(t + 1) * hps.batch_size])
                lbs.append(lb)
            if 10 * epoch % test_freq == 0:
                print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs)))
            if epoch % test_freq == 0:
                test_lls = []
                test_mses = []
                for t in range(0, x_test.shape[0], hps.batch_size):
                    ll, mse = predict_step(sess, x_test[t:t + hps.batch_size],
                                           y_test[t:t + hps.batch_size])
                    test_lls.append(ll)
                    test_mses.append(mse)
                print('>> TEST')
                print('>> Test log likelihood = {}, rmse = {}'.format(
                    np.mean(test_lls), np.sqrt(np.mean(test_mses))))
예제 #9
0
def main():
    tf.set_random_seed(1237)
    np.random.seed(2345)

    # Load UCI Boston housing data
    data_path = os.path.join(conf.data_dir, "housing.data")
    x_train, y_train, x_valid, y_valid, x_test, y_test = \
        dataset.load_uci_boston_housing(data_path)
    x_train = np.vstack([x_train, x_valid])
    y_train = np.hstack([y_train, y_valid])
    n_train, x_dim = x_train.shape

    # Standardize data
    x_train, x_test, _, _ = dataset.standardize(x_train, x_test)
    y_train, y_test, mean_y_train, std_y_train = dataset.standardize(
        y_train, y_test)

    # Define model parameters
    n_hiddens = [50]

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name="n_particles")
    x = tf.placeholder(tf.float32, shape=[None, x_dim])
    y = tf.placeholder(tf.float32, shape=[None])
    layer_sizes = [x_dim] + n_hiddens + [1]
    w_names = ["w" + str(i) for i in range(len(layer_sizes) - 1)]

    model = build_bnn(x, layer_sizes, n_particles)
    variational = build_mean_field_variational(layer_sizes, n_particles)

    def log_joint(bn):
        log_pws = bn.cond_log_prob(w_names)
        log_py_xw = bn.cond_log_prob('y')
        return tf.add_n(log_pws) + tf.reduce_mean(log_py_xw, 1) * n_train

    model.log_joint = log_joint

    lower_bound = zs.variational.elbo(model, {'y': y},
                                      variational=variational,
                                      axis=0)
    cost = lower_bound.sgvb()

    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    infer_op = optimizer.minimize(cost)

    # prediction: rmse & log likelihood
    y_mean = lower_bound.bn["y_mean"]
    y_pred = tf.reduce_mean(y_mean, 0)
    rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train
    log_py_xw = lower_bound.bn.cond_log_prob("y")
    log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw,
                                                    0)) - tf.log(std_y_train)

    # Define training/evaluation parameters
    lb_samples = 10
    ll_samples = 5000
    epochs = 500
    batch_size = 10
    iters = (n_train - 1) // batch_size + 1
    test_freq = 10

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(1, epochs + 1):
            perm = np.random.permutation(x_train.shape[0])
            x_train = x_train[perm, :]
            y_train = y_train[perm]
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                y_batch = y_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer_op, lower_bound],
                                 feed_dict={
                                     n_particles: lb_samples,
                                     x: x_batch,
                                     y: y_batch
                                 })
                lbs.append(lb)
            print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                test_rmse, test_ll = sess.run([rmse, log_likelihood],
                                              feed_dict={
                                                  n_particles: ll_samples,
                                                  x: x_test,
                                                  y: y_test
                                              })
                print('>> TEST')
                print('>> Test rmse = {}, log_likelihood = {}'.format(
                    test_rmse, test_ll))
예제 #10
0
def main():
    tf.set_random_seed(1237)
    np.random.seed(1234)

    # Load UCI Boston housing data
    data_path = os.path.join(conf.data_dir, 'housing.data')
    x_train, y_train, x_valid, y_valid, x_test, y_test = \
        dataset.load_uci_boston_housing(data_path)
    x_train = np.vstack([x_train, x_valid])
    y_train = np.hstack([y_train, y_valid])
    N, n_x = x_train.shape

    # Standardize data
    x_train, x_test, _, _ = dataset.standardize(x_train, x_test)
    y_train, y_test, mean_y_train, std_y_train = dataset.standardize(
        y_train, y_test)

    # Define model parameters
    n_hiddens = [50]

    # Build the computation graph
    n_particles = tf.placeholder(tf.int32, shape=[], name='n_particles')
    x = tf.placeholder(tf.float32, shape=[None, n_x])
    y = tf.placeholder(tf.float32, shape=[None])
    layer_sizes = [n_x] + n_hiddens + [1]
    w_names = ['w' + str(i) for i in range(len(layer_sizes) - 1)]

    def log_joint(observed):
        model, _ = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
        log_pws = model.local_log_prob(w_names)
        log_py_xw = model.local_log_prob('y')
        return tf.add_n(log_pws) + log_py_xw * N

    variational = mean_field_variational(layer_sizes, n_particles)
    qw_outputs = variational.query(w_names, outputs=True, local_log_prob=True)
    latent = dict(zip(w_names, qw_outputs))
    lower_bound = zs.variational.elbo(log_joint,
                                      observed={'y': y},
                                      latent=latent,
                                      axis=0)
    cost = tf.reduce_mean(lower_bound.sgvb())
    lower_bound = tf.reduce_mean(lower_bound)

    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    infer_op = optimizer.minimize(cost)

    # prediction: rmse & log likelihood
    observed = dict((w_name, latent[w_name][0]) for w_name in w_names)
    observed.update({'y': y})
    model, y_mean = bayesianNN(observed, x, n_x, layer_sizes, n_particles)
    y_pred = tf.reduce_mean(y_mean, 0)
    rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * std_y_train
    log_py_xw = model.local_log_prob('y')
    log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0)) - \
        tf.log(std_y_train)

    # Define training/evaluation parameters
    lb_samples = 10
    ll_samples = 5000
    epochs = 500
    batch_size = 10
    iters = int(np.floor(x_train.shape[0] / float(batch_size)))
    test_freq = 10

    # Run the inference
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(1, epochs + 1):
            lbs = []
            for t in range(iters):
                x_batch = x_train[t * batch_size:(t + 1) * batch_size]
                y_batch = y_train[t * batch_size:(t + 1) * batch_size]
                _, lb = sess.run([infer_op, lower_bound],
                                 feed_dict={
                                     n_particles: lb_samples,
                                     x: x_batch,
                                     y: y_batch
                                 })
                lbs.append(lb)
            print('Epoch {}: Lower bound = {}'.format(epoch, np.mean(lbs)))

            if epoch % test_freq == 0:
                test_lb, test_rmse, test_ll = sess.run(
                    [lower_bound, rmse, log_likelihood],
                    feed_dict={
                        n_particles: ll_samples,
                        x: x_test,
                        y: y_test
                    })
                print('>> TEST')
                print(
                    '>> Test lower bound = {}, rmse = {}, log_likelihood = {}'.
                    format(test_lb, test_rmse, test_ll))
예제 #11
0
    def __init__(self, hps, S):

        # Build the computation graph
        x = tf.placeholder(tf.float32, shape=[None, S.x_dim])
        if hps.regression:
            y = tf.placeholder(tf.float32, shape=[None])
            layer_sizes = [S.x_dim] + hps.layers + [1]
        else:
            y = tf.placeholder(tf.int32, shape=[None])
            layer_sizes = [S.x_dim] + hps.layers + [S.y_dim]

        # ===== MODEL & VARIATIONAL =====
        svgd_latent = dict()
        svgd_variables = dict()
        if hps.regression:
            # observation noise
            std_raw = tf.get_variable('std_raw',
                                      shape=[hps.n_particles, 1],
                                      initializer=tf.constant_initializer(
                                          inv_softplus(0.5)))
            svgd_variables['y_std'] = std_raw
            y_std_sym = tf.nn.softplus(std_raw)
            if hps.fix_variance > 0:
                y_std_sym = tf.clip_by_value(y_std_sym, hps.fix_variance,
                                             hps.fix_variance + 1e-5)
            svgd_latent['y_std'] = y_std_sym
        # weight variance
        if hps.model_spec == 'lq':
            w_std_raw = tf.get_variable('w_std_raw',
                                        shape=[
                                            hps.n_particles,
                                        ],
                                        initializer=tf.constant_initializer(
                                            inv_softplus(0.5)))
            svgd_variables['w_scale'] = w_std_raw
            w_std_sym = tf.nn.softplus(w_std_raw)
            svgd_latent['w_scale'] = w_std_sym

        meta_model = build_model(x, layer_sizes, hps.n_particles,
                                 hps.model_spec, hps.regression,
                                 hps.logits_w_sd)

        def log_joint(bn):
            rv_names = ["w" + str(i) for i in range(len(layer_sizes) - 1)]
            if hps.regression:
                rv_names += ['y_std']
            if hps.model_spec == 'lq':
                rv_names.append('w_scale')
            log_pws = bn.cond_log_prob(rv_names)
            log_py_xw = bn.cond_log_prob('y')
            return tf.add_n(log_pws) + tf.reduce_mean(log_py_xw, 1) * S.n_train

        meta_model.log_joint = log_joint

        # variational: w
        for i, (n_in,
                n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
            buf = tf.get_variable('buf_' + str(i),
                                  initializer=init_bnn_weight(
                                      hps.n_particles, n_in, n_out))
            svgd_latent['w' + str(i)] = svgd_variables['w' + str(i)] = buf

        grad_and_var_w, var_bn = stein_variational_gradient_stationary(
            meta_model, {'y': y},
            svgd_latent,
            variables=svgd_variables,
            method=hps.psvi_method)

        optimizer_class = {
            'adam': tf.train.AdamOptimizer,
            'adagrad': tf.train.AdagradOptimizer
        }[hps.optimizer]
        optimizer = optimizer_class(learning_rate=hps.lr)
        global_step = tf.get_variable('global_step',
                                      initializer=0,
                                      trainable=False)
        infer_op = optimizer.apply_gradients([(-g, v)
                                              for g, v in grad_and_var_w],
                                             global_step=global_step)

        # prediction: rmse & log likelihood
        log_py_xw = var_bn.cond_log_prob("y")
        log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0))
        if hps.regression:
            y_mean = var_bn["y_mean"]
            y_pred = tf.reduce_mean(y_mean, 0)
            rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * S.std_y_train
            log_likelihood -= tf.log(S.std_y_train)
            ystd_avg = tf.reduce_mean(y_std_sym)
        else:
            y_pred = tf.reduce_mean(tf.exp(var_bn['y_mean']), axis=0)
            rmse = 1 - tf.reduce_mean(tf.to_float(tf.nn.in_top_k(y_pred, y,
                                                                 1)))
            ystd_avg = tf.constant(-1.)

        self.__dict__.update(locals())
예제 #12
0
    def __init__(self, hps, S):

        # Build the computation graph
        x = tf.placeholder(tf.float32, shape=[None, S.x_dim])
        if hps.regression:
            y = tf.placeholder(tf.float32, shape=[None])
            layer_sizes = [S.x_dim] + hps.layers + [1]
        else:
            y = tf.placeholder(tf.int32, shape=[None])
            layer_sizes = [S.x_dim] + hps.layers + [S.y_dim]

        # ===== MODEL & VARIATIONAL =====
        svgd_latent = dict()
        svgd_variables = dict()

        if hps.regression:
            std_raw = tf.get_variable('std_raw',
                                      shape=[hps.n_particles, 1],
                                      initializer=tf.constant_initializer(
                                          inv_softplus(0.5)))
            svgd_variables['y_std'] = std_raw
            y_std_sym = tf.nn.softplus(std_raw)
            if hps.fix_variance > 0:
                y_std_sym = tf.clip_by_value(y_std_sym, hps.fix_variance,
                                             hps.fix_variance + 1e-5)
            svgd_latent['y_std'] = y_std_sym

        real_batch_size = tf.shape(x)[0]
        inp, x_extra = add_perturb_input(x,
                                         hps.extra_batch_size,
                                         S.n_train,
                                         ptb_type=hps.ptb_type,
                                         ptb_scale=hps.ptb_scale)
        meta_model = build_model(inp, layer_sizes, hps.n_particles,
                                 real_batch_size, hps.regression,
                                 hps.logits_w_sd)

        def log_likelihood_fn(bn):
            log_py_xw = bn.cond_log_prob('y')
            return tf.reduce_mean(log_py_xw, 1) * S.n_train

        meta_model.log_joint = log_likelihood_fn

        # variational: w
        for i, (n_in,
                n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
            buf = tf.get_variable('buf_' + str(i),
                                  initializer=init_bnn_weight(
                                      hps.n_particles, n_in, n_out))
            svgd_latent['w' + str(i)] = svgd_variables['w' + str(i)] = buf

        # combined
        observed_bn_ = {
            'y': y
        }
        observed_bn_.update(svgd_latent)
        var_bn = meta_model.observe(**observed_bn_)
        log_likelihood = var_bn.log_joint()
        fval_all = var_bn['y_mean_all']

        log_joint_svgd = log_likelihood

        # ===== PRIOR GRADIENT =====
        fv_all_prior = tf.concat([
            meta_model.observe()['y_mean_all']
            for i in range(hps.mm_n_particles // hps.n_particles)
        ],
                                 axis=0)
        pg_indices = tf.concat([
            tf.range(hps.n_mm_sample // 2),
            tf.range(
                tf.shape(fval_all)[1] - hps.n_mm_sample // 2,
                tf.shape(fval_all)[1])
        ],
                               axis=0)
        prior_fval = tf.gather(fv_all_prior, pg_indices, axis=1)
        var_fval = tf.to_double(tf.gather(fval_all, pg_indices, axis=1))
        if not hps.regression:
            prior_fval = merge_last_axes(prior_fval, 1)
            var_fval = merge_last_axes(var_fval, 1)
        hpmean, hpcov = reduce_moments_ax0(prior_fval)
        hpprec = matrix_inverse(hpcov, hps.mm_jitter)
        log_joint_svgd += tf.to_float(mvn_log_prob(var_fval, hpprec, hpmean))

        # ===== SVGD-F GRADIENT =====
        svgd_grad, _ = svgd._svgd_stationary(hps.n_particles,
                                             log_joint_svgd, [fval_all],
                                             svgd.rbf_kernel,
                                             additional_grad=None,
                                             method=hps.psvi_method)[0]

        # ===== INFER OP =====
        optimizer_class = {
            'adam': tf.train.AdamOptimizer,
            'adagrad': tf.train.AdagradOptimizer
        }[hps.optimizer]
        global_step = tf.get_variable('global_step',
                                      initializer=0,
                                      trainable=False)
        if hps.lr_decay:
            lr_sym = tf.train.exponential_decay(hps.lr,
                                                global_step,
                                                10000,
                                                0.2,
                                                staircase=True)
        else:
            lr_sym = hps.lr
        optimizer = optimizer_class(learning_rate=lr_sym)
        targ = tf.stop_gradient(svgd_grad + fval_all)
        infer_op = optimizer.minimize(tf.reduce_mean((targ - fval_all)**2),
                                      global_step=global_step)

        if hps.regression:
            # the target above doesn't include std. Use MAP
            with tf.control_dependencies([infer_op]):
                infer_op = optimizer_class(learning_rate=lr_sym).minimize(
                    -(log_likelihood + var_bn.cond_log_prob('y_std')),
                    var_list=[std_raw])

        # prediction: rmse & log likelihood
        log_py_xw = var_bn.cond_log_prob("y")
        log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0))
        if hps.regression:
            log_likelihood -= tf.log(S.std_y_train)
            y_pred = tf.reduce_mean(var_bn['y_mean_sup'], 0)
            rmse = tf.sqrt(tf.reduce_mean((y_pred - y)**2)) * S.std_y_train
            ystd_avg = tf.reduce_mean(y_std_sym)
        else:
            y_pred = tf.reduce_mean(tf.exp(var_bn['y_mean_sup']), axis=0)
            rmse = 1 - tf.reduce_mean(tf.to_float(tf.nn.in_top_k(y_pred, y,
                                                                 1)))
            ystd_avg = tf.constant(-1.)

        self.__dict__.update(locals())
예제 #13
0
파일: nnet.py 프로젝트: strategist922/fpovi
def build_bnn(x_ph, y_ph, weight_ph, n_train_ph, hps):

    inp, n_supervised = inplace_perturb(x_ph, hps.interp_batch_size,
                                        n_train_ph)
    layer_sizes = [x_ph.get_shape().as_list()[1]] + hps.layer_sizes + \
      [y_ph.get_shape().as_list()[1]]
    out_mask = weight_ph[None, :n_supervised, :]

    # ============== MODEL =======================
    weight_sd = np.sqrt(hps.prior_variance)
    meta_model = bnn_meta_model(inp, layer_sizes, hps.n_particles,
                                n_supervised, weight_sd)

    def log_likelihood_fn(bn):
        log_py_xw = bn.cond_log_prob('y')
        assert len(log_py_xw.get_shape().as_list()) == 3  # [nPar, nBa, nOut]
        log_py_xw = tf.reduce_sum(log_py_xw * out_mask, axis=-1)
        return tf.reduce_mean(log_py_xw, 1) * n_train_ph

    meta_model.log_joint = log_likelihood_fn

    # ============== VARIATIONAL ==================
    svgd_latent = dict()
    svgd_variables = dict()

    if hps.use_sigma_exp_transform:
        sigma_transform = tfd.bijectors.Exp()
    else:
        sigma_transform = tfd.bijectors.Softplus()

    std_raw = tf.get_variable('std_raw',
                              shape=[hps.n_particles, 1, layer_sizes[-1]],
                              initializer=tf.zeros_initializer())
    svgd_variables['y_std'] = std_raw
    y_std_sym = sigma_transform.forward(
        std_raw + sigma_transform.inverse(hps.noise_sigma))
    svgd_latent['y_std'] = y_std_sym

    # w
    for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        w_init = init_bnn_weight(hps.n_particles, n_in, n_out) * weight_sd
        buf = tf.get_variable('buf_' + str(i), initializer=w_init)
        svgd_latent['w' + str(i)] = svgd_variables['w' + str(i)] = buf

    # combined
    observed_bn_ = {'y': y_ph[:n_supervised]}
    observed_bn_.update(svgd_latent)
    var_bn = meta_model.observe(**observed_bn_)
    log_likelihood = var_bn.log_joint()

    fval_all = var_bn['y_mean_all']
    log_joint_svgd = log_likelihood

    # ===== PRIOR GRADIENT =====
    param_names = [name for name in svgd_variables if name.startswith('w')]
    log_prior = var_bn.cond_log_prob(param_names)
    hpv = []
    for i in range(hps.mm_n_particles // hps.n_particles):
        temp_bn = meta_model.observe()
        hpv.append(temp_bn['y_mean_all'])
    hpv = tf.concat(hpv, axis=0)

    n_mms = hps.n_mm_sample // 2
    hp_val = tf.concat([hpv[:, :n_mms], hpv[:, -n_mms:]], axis=1)
    mm_fval = tf.to_double(
        tf.concat([fval_all[:, :n_mms], fval_all[:, -n_mms:]], axis=1))

    hp_val = merge_last_axes(hp_val, 1)
    mm_fval = merge_last_axes(mm_fval, 1)
    hpmean, hpcov = reduce_moments_ax0(hp_val)
    hpprec = matrix_inverse(hpcov, hps.mm_jitter)
    pd = tf.to_float(mvn_log_prob(mm_fval, hpprec, hpmean)) / tf.to_float(
        hps.n_mm_sample)
    log_joint_svgd += pd

    # ===== SVGD-F GRADIENT =====
    svgd_grad, _ = svgd._svgd_stationary(hps.n_particles, log_joint_svgd,
                                         [fval_all], svgd.rbf_kernel)[0]

    optimizer = tf.train.AdamOptimizer(learning_rate=hps.lr)
    global_step = tf.get_variable('global_step',
                                  initializer=0,
                                  trainable=False)
    targ = tf.stop_gradient(svgd_grad + fval_all)
    infer_op = optimizer.minimize(tf.reduce_mean((targ - fval_all)**2),
                                  global_step=global_step)

    if getattr(hps, "infer_noise_sigma", False):
        with tf.control_dependencies([infer_op]):
            infer_op = tf.train.AdamOptimizer(learning_rate=hps.lr).minimize(
                -(log_likelihood + var_bn.cond_log_prob('y_std')),
                var_list=[std_raw])

    log_py_xw = tf.reduce_sum(var_bn.cond_log_prob("y") * out_mask, axis=-1)
    log_likelihood = tf.reduce_mean(zs.log_mean_exp(log_py_xw, 0))
    y_pred = tf.reduce_mean(var_bn['y_mean_sup'], axis=0)
    rmse = tf.sqrt(
        tf.reduce_mean(
            (y_pred - y_ph[:n_supervised])**2 * weight_ph[:n_supervised]))

    logs = {
        'rmse': rmse,
        'log_likelihood': log_likelihood,
        'mean_std': tf.reduce_mean(y_std_sym),
        'std_first': y_std_sym[0, 0, 0],
        'std_last': y_std_sym[0, 0, -1]
    }
    for k in logs:
        tf.summary.scalar(k, logs[k])

    return infer_op, var_bn['y_mean_sup'], locals()