def _test(p, n):
    rv = Bernoulli(p=p)
    rv_sample = rv.sample(n)
    x = rv_sample.eval()
    x_tf = tf.constant(x, dtype=tf.float32)
    p = p.eval()
    assert np.allclose(rv.log_prob(x_tf).eval(), stats.bernoulli.logpmf(x, p))
示例#2
0
class Joint:
    '''
  Wrapper to handle calculating the log p(y, w | X) = log [ p(y | X, w) *
  p(w) ] for a given sample of w.
  Should be the same as the slow version but vectorized and therefore faster.
  '''
    def __init__(self, Xtrain, ytrain, sess):
        self.Xtrain = Xtrain
        self.ytrain = ytrain
        self.sess = sess

        self.n_samples = 1000  # TODO this is hard coded and must be matched in elbo and fc.
        N, D = Xtrain.shape
        self.w = tf.placeholder(tf.float32, [D, self.n_samples])
        self.X = tf.placeholder(tf.float32, [N, D])
        #self.y = Bernoulli(logits=ed.dot(self.X, self.w))
        self.y = Bernoulli(logits=tf.matmul(self.X, self.w))
        self.prior = Normal(loc=tf.zeros([self.n_samples, D]),
                            scale=1.0 *
                            tf.ones([self.n_samples, D]))  # TODO hard coded

    def log_prob(self, samples):
        copied_ytrain = np.repeat(self.ytrain[:, np.newaxis],
                                  self.n_samples,
                                  axis=1)
        per_sample = self.sess.run(self.y.log_prob(copied_ytrain),
                                   feed_dict={
                                       self.X: self.Xtrain,
                                       self.w: samples.T
                                   }).astype(np.float32)
        lik = np.sum(per_sample, axis=0)
        prior = np.sum(self.prior.log_prob(samples).eval(), axis=1)
        return lik + prior
def _test(p, n):
  rv = Bernoulli(p=p)
  rv_sample = rv.sample(n)
  x = rv_sample.eval()
  x_tf = tf.constant(x, dtype=tf.float32)
  p = p.eval()
  assert np.allclose(rv.log_prob(x_tf).eval(),
                     stats.bernoulli.logpmf(x, p))
class Joint:
    '''Wrapper to handle calculating the joint probability of data

    log p(y, w | X) = log [ p(y | X, w) * p(w) ]
    '''
    def __init__(self, X, y, sess, n_samples, logger=None):
        """Initialize the distribution.

            Constructs the graph for evaluation of joint probabilities
            of data X and weights (latent vars) w
        
            Args:
                X:  [N x D] data
                y:  [D] predicted target variable
                sess: tensorflow session
                n_samples: number of monte carlo samples to compute expectation
        """
        self.sess = sess
        self.n_samples = n_samples
        # (N, ) -> (N, n_samples)
        # np.tile(y[:, np.newaxis], (1, self.n_samples))
        y_matrix = np.repeat(y[:, np.newaxis], self.n_samples, axis=1)
        if logger is not None: self.logger = logger

        # Define the model graph
        N, D = X.shape
        self.X = tf.convert_to_tensor(X, dtype=tf.float32)
        self.Y = tf.convert_to_tensor(y_matrix, dtype=tf.float32)
        self.W = tf.get_variable('samples', (self.n_samples, D),
                                 tf.float32,
                                 initializer=tf.zeros_initializer())
        # (N, n_samples)
        self.py = Bernoulli(logits=tf.matmul(self.X, tf.transpose(self.W)))
        self.w_prior = Normal(loc=tf.zeros([self.n_samples, D], tf.float32),
                              scale=tf.ones([self.n_samples, D], tf.float32))
        # to get prior log probability would be summed across the D features
        # [n_samples D] -> [n_samples]
        self.prior = tf.reduce_sum(self.w_prior.log_prob(self.W), axis=1)
        log_likelihoods = self.py.log_prob(self.Y)  # (N, n_samples)
        self.ll = tf.reduce_sum(log_likelihoods, axis=0)  # (n_samples, )
        self.joint = self.ll + self.prior

    def log_prob(self, samples):
        """Log probability of samples.
        
        Since X is already given. samples, like for target distribution, for
        base distributions on approximation, for individual atoms are all
        samples of w.

        Args:
            samples: [self.n_samples x D] tensor
        Returns:
            [self.n_samples, ] joint log probability of samples, X, y
        """
        assert samples.shape[
            0] == self.n_samples, 'Different number of samples'
        self.sess.run(self.W.assign(samples))
        return self.joint
def _test(shape, n):
    # using Bernoulli's internally implemented log_prob_idx() to check
    # Distribution's log_prob()
    rv = Bernoulli(shape, p=tf.zeros(shape)+0.5)
    rv_sample = rv.sample(n)
    
    x = rv_sample.eval()
    x_tf = tf.constant(x, dtype=tf.float32)
    p = rv.p.eval()
    val_ed = rv.log_prob(x_tf).eval()
    val_true = 0.0
    for idx in range(shape[0]):
        val_true += stats.bernoulli.logpmf(x[:, idx], p[idx])

    assert np.allclose(val_ed, val_true)
def _test(shape, n):
    # using Bernoulli's internally implemented log_prob_idx() to check
    # Distribution's log_prob()
    rv = Bernoulli(shape, p=tf.zeros(shape)+0.5)
    rv_sample = rv.sample(n)
    with sess.as_default():
        x = rv_sample.eval()
        x_tf = tf.constant(x, dtype=tf.float32)
        p = rv.p.eval()
        val_ed = rv.log_prob(x_tf).eval()
        val_true = 0.0
        for idx in range(shape[0]):
            val_true += stats.bernoulli.logpmf(x[:, idx], p[idx])

        assert np.allclose(val_ed, val_true)
def main(_):
    ed.set_seed(FLAGS.seed)
    # setting up output directory
    outdir = FLAGS.outdir
    if '~' in outdir: outdir = os.path.expanduser(outdir)
    os.makedirs(outdir, exist_ok=True)

    is_vector = FLAGS.base_dist in ['mvnormal', 'mvlaplace']

    ((Xtrain, ytrain), (Xtest, ytest)) = blr_utils.get_data()
    N, D = Xtrain.shape
    N_test, D_test = Xtest.shape
    assert D_test == D, 'Test dimension %d different than train %d' % (D_test,
                                                                       D)
    logger.info('D = %d, Ntrain = %d, Ntest = %d' % (D, N, N_test))

    # Solution components
    weights, q_params = [], []
    # L-continous gradient estimate
    lipschitz_estimate = None

    # Metrics to log
    times_filename = os.path.join(outdir, 'times.csv')
    open(times_filename, 'w').close()

    # (mean, +- std)
    elbos_filename = os.path.join(outdir, 'elbos.csv')
    logger.info('saving elbos to, %s' % elbos_filename)
    open(elbos_filename, 'w').close()

    rocs_filename = os.path.join(outdir, 'roc.csv')
    logger.info('saving rocs to, %s' % rocs_filename)
    open(rocs_filename, 'w').close()

    gap_filename = os.path.join(outdir, 'gap.csv')
    open(gap_filename, 'w').close()

    step_filename = os.path.join(outdir, 'steps.csv')
    open(step_filename, 'w').close()

    # (mean, std)
    ll_train_filename = os.path.join(outdir, 'll_train.csv')
    open(ll_train_filename, 'w').close()
    ll_test_filename = os.path.join(outdir, 'll_test.csv')
    open(ll_test_filename, 'w').close()

    # (bin_ac_train, bin_ac_test)
    bin_ac_filename = os.path.join(outdir, 'bin_ac.csv')
    open(bin_ac_filename, 'w').close()

    # 'adafw', 'ada_afw', 'ada_pfw'
    if FLAGS.fw_variant.startswith('ada'):
        lipschitz_filename = os.path.join(outdir, 'lipschitz.csv')
        open(lipschitz_filename, 'w').close()

        iter_info_filename = os.path.join(outdir, 'iter_info.txt')
        open(iter_info_filename, 'w').close()

    for t in range(FLAGS.n_fw_iter):
        g = tf.Graph()
        with g.as_default():
            sess = tf.InteractiveSession()
            with sess.as_default():
                tf.set_random_seed(FLAGS.seed)

                # Build Model
                w = Normal(loc=tf.zeros(D, tf.float32),
                           scale=tf.ones(D, tf.float32))

                X = tf.placeholder(tf.float32, [None, D])
                y = Bernoulli(logits=ed.dot(X, w))

                p_joint = blr_utils.Joint(Xtrain, ytrain, sess,
                                          FLAGS.n_monte_carlo_samples, logger)

                # vectorized Model evaluations
                n_test_samples = 100
                W = tf.placeholder(tf.float32, [n_test_samples, D])
                y_data = tf.placeholder(tf.float32, [None])  # N -> (N, n_test)
                y_data_matrix = tf.tile(tf.expand_dims(y_data, 1),
                                        (1, n_test_samples))
                pred_logits = tf.matmul(X, tf.transpose(W))  # (N, n_test)
                ypred = tf.sigmoid(tf.reduce_mean(pred_logits, axis=1))
                pY = Bernoulli(logits=pred_logits)  # (N, n_test)
                log_likelihoods = pY.log_prob(y_data_matrix)  # (N, n_test)
                log_likelihood_expectation = tf.reduce_mean(log_likelihoods,
                                                            axis=1)  # (N, )
                ll_mean, ll_std = tf.nn.moments(log_likelihood_expectation,
                                                axes=[0])

                if t == 0:
                    fw_iterates = {}
                else:
                    # Current solution
                    prev_components = [
                        coreutils.base_loc_scale(FLAGS.base_dist,
                                                 c['loc'],
                                                 c['scale'],
                                                 multivariate=is_vector)
                        for c in q_params
                    ]
                    qtw_prev = coreutils.get_mixture(weights, prev_components)
                    fw_iterates = {w: qtw_prev}

                # s is the solution to LMO, random initialization
                s = coreutils.construct_base(FLAGS.base_dist, [D],
                                             t,
                                             's',
                                             multivariate=is_vector)

                sess.run(tf.global_variables_initializer())

                total_time = 0.
                inference_time_start = time.time()
                # Run relbo to solve LMO problem
                # If the first atom is being selected through running LMO
                # it is equivalent to running vi on a uniform prior
                # Since uniform is not in our variational family try
                # only random element (without LMO inference) as initial iterate
                if FLAGS.iter0 == 'vi' or t > 0:
                    inference = relbo.KLqp({w: s},
                                           fw_iterates=fw_iterates,
                                           data={
                                               X: Xtrain,
                                               y: ytrain
                                           },
                                           fw_iter=t)
                    inference.run(n_iter=FLAGS.LMO_iter)
                inference_time_end = time.time()
                # compute only step size selection time
                #total_time += float(inference_time_end - inference_time_start)

                loc_s = s.mean().eval()
                scale_s = s.stddev().eval()

                # Evaluate the next step
                step_result = {}
                if t == 0:
                    # Initialization, q_0
                    q_params.append({'loc': loc_s, 'scale': scale_s})
                    weights.append(1.)
                    if FLAGS.fw_variant.startswith('ada'):
                        lipschitz_estimate = opt.adafw_linit(s, p_joint)
                    step_type = 'init'
                elif FLAGS.fw_variant == 'fixed':
                    start_step_time = time.time()
                    step_result = opt.fixed(weights, q_params, qtw_prev, loc_s,
                                            scale_s, s, p_joint, t)
                    end_step_time = time.time()
                    total_time += float(end_step_time - start_step_time)
                elif FLAGS.fw_variant == 'adafw':
                    start_step_time = time.time()
                    step_result = opt.adaptive_fw(weights, q_params, qtw_prev,
                                                  loc_s, scale_s, s, p_joint,
                                                  t, lipschitz_estimate)
                    end_step_time = time.time()
                    total_time += float(end_step_time - start_step_time)
                    step_type = step_result['step_type']
                    if step_type == 'adaptive':
                        lipschitz_estimate = step_result['l_estimate']
                elif FLAGS.fw_variant == 'ada_pfw':
                    start_step_time = time.time()
                    step_result = opt.adaptive_pfw(weights, q_params, qtw_prev,
                                                   loc_s, scale_s, s, p_joint,
                                                   t, lipschitz_estimate)
                    end_step_time = time.time()
                    total_time += float(end_step_time - start_step_time)
                    step_type = step_result['step_type']
                    if step_type in ['adaptive', 'drop']:
                        lipschitz_estimate = step_result['l_estimate']
                elif FLAGS.fw_variant == 'ada_afw':
                    start_step_time = time.time()
                    step_result = opt.adaptive_afw(weights, q_params, qtw_prev,
                                                   loc_s, scale_s, s, p_joint,
                                                   t, lipschitz_estimate)
                    end_step_time = time.time()
                    total_time += float(end_step_time - start_step_time)
                    step_type = step_result['step_type']
                    if step_type in ['adaptive', 'away', 'drop']:
                        lipschitz_estimate = step_result['l_estimate']
                elif FLAGS.fw_variant == 'line_search':
                    start_step_time = time.time()
                    step_result = opt.line_search_dkl(weights, q_params,
                                                      qtw_prev, loc_s, scale_s,
                                                      s, p_joint, t)
                    end_step_time = time.time()
                    total_time += float(end_step_time - start_step_time)
                    step_type = step_result['step_type']
                else:
                    raise NotImplementedError(
                        'Step size variant %s not implemented' %
                        FLAGS.fw_variant)

                if t == 0:
                    gamma = 1.
                    new_components = [s]
                else:
                    q_params = step_result['params']
                    weights = step_result['weights']
                    gamma = step_result['gamma']
                    new_components = [
                        coreutils.base_loc_scale(FLAGS.base_dist,
                                                 c['loc'],
                                                 c['scale'],
                                                 multivariate=is_vector)
                        for c in q_params
                    ]
                qtw_new = coreutils.get_mixture(weights, new_components)

                # Log metrics for current iteration
                logger.info('total time %f' % total_time)
                append_to_file(times_filename, total_time)

                elbo_t = elbo(qtw_new, p_joint, return_std=False)
                # testing elbo directory from KLqp
                elbo_loss = elboModel.KLqp({w: qtw_new},
                                           data={
                                               X: Xtrain,
                                               y: ytrain
                                           })
                res_update = elbo_loss.run()

                logger.info("iter, %d, elbo, %.2f loss %.2f" %
                            (t, elbo_t, res_update['loss']))
                append_to_file(elbos_filename,
                               "%f,%f" % (elbo_t, res_update['loss']))

                logger.info('iter %d, gamma %.4f' % (t, gamma))
                append_to_file(step_filename, gamma)

                if t > 0:
                    gap_t = step_result['gap']
                    logger.info('iter %d, gap %.4f' % (t, gap_t))
                    append_to_file(gap_filename, gap_t)

                if FLAGS.fw_variant.startswith('ada'):
                    append_to_file(lipschitz_filename, lipschitz_estimate)
                    append_to_file(iter_info_filename, step_type)
                    logger.info('lt = %.5f, iter_type = %s' %
                                (lipschitz_estimate, step_type))

                # get weight samples to evaluate expectations
                w_samples = qtw_new.sample([n_test_samples]).eval()
                ll_train_mean, ll_train_std = sess.run([ll_mean, ll_std],
                                                       feed_dict={
                                                           W: w_samples,
                                                           X: Xtrain,
                                                           y_data: ytrain
                                                       })
                logger.info("iter, %d, train ll, %.2f +/- %.2f" %
                            (t, ll_train_mean, ll_train_std))
                append_to_file(ll_train_filename,
                               "%f,%f" % (ll_train_mean, ll_train_std))

                ll_test_mean, ll_test_std, y_test_pred = sess.run(
                    [ll_mean, ll_std, ypred],
                    feed_dict={
                        W: w_samples,
                        X: Xtest,
                        y_data: ytest
                    })
                logger.info("iter, %d, test ll, %.2f +/- %.2f" %
                            (t, ll_test_mean, ll_test_std))
                append_to_file(ll_test_filename,
                               "%f,%f" % (ll_test_mean, ll_test_std))

                roc_score = roc_auc_score(ytest, y_test_pred)
                logger.info("iter %d, roc %.4f" % (t, roc_score))
                append_to_file(rocs_filename, roc_score)

                y_post = ed.copy(y, {w: qtw_new})
                # eq. to y = Bernoulli(logits=ed.dot(X, qtw_new))

                ed_train_ll = ed.evaluate('log_likelihood',
                                          data={
                                              X: Xtrain,
                                              y_post: ytrain,
                                          })
                ed_test_ll = ed.evaluate('log_likelihood',
                                         data={
                                             X: Xtest,
                                             y_post: ytest,
                                         })
                logger.info("edward train ll %.2f test ll %.2f" %
                            (ed_train_ll, ed_test_ll))

                bin_ac_train = ed.evaluate('binary_accuracy',
                                           data={
                                               X: Xtrain,
                                               y_post: ytrain,
                                           })
                bin_ac_test = ed.evaluate('binary_accuracy',
                                          data={
                                              X: Xtest,
                                              y_post: ytest,
                                          })
                append_to_file(bin_ac_filename,
                               "%f,%f" % (bin_ac_train, bin_ac_test))
                logger.info(
                    "edward binary accuracy train ll %.2f test ll %.2f" %
                    (bin_ac_train, bin_ac_test))

                mse_test = ed.evaluate('mean_squared_error',
                                       data={
                                           X: Xtest,
                                           y_post: ytest,
                                       })
                logger.info("edward mse test ll %.2f" % (mse_test))

            sess.close()
        tf.reset_default_graph()
示例#8
0
def main(_):
    outdir = setup_outdir()
    ed.set_seed(FLAGS.seed)

    ((Xtrain, ytrain), (Xtest, ytest)) = blr_utils.get_data()
    N, D = Xtrain.shape
    N_test, D_test = Xtest.shape

    print("Xtrain")
    print(Xtrain)
    print(Xtrain.shape)

    if 'synthetic' in FLAGS.exp:
        w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D))
        X = tf.placeholder(tf.float32, [N, D])
        y = Bernoulli(logits=ed.dot(X, w))

        #n_posterior_samples = 100000
        n_posterior_samples = 10
        qw_empirical = Empirical(
            params=tf.get_variable("qw/params", [n_posterior_samples, D]))
        inference = ed.HMC({w: qw_empirical}, data={X: Xtrain, y: ytrain})
        inference.initialize(n_print=10, step_size=0.6)

        tf.global_variables_initializer().run()
        inference.run()

        empirical_samples = qw_empirical.sample(50).eval()
        #fig, ax = plt.subplots()
        #ax.scatter(posterior_samples[:,0], posterior_samples[:,1])
        #plt.show()

    weights, q_components = [], []
    ll_trains, ll_tests, bin_ac_trains, bin_ac_tests, elbos, rocs, gaps = [], [], [], [], [], [], []
    total_time, times = 0., []
    for iter in range(0, FLAGS.n_fw_iter):
        print("iter %d" % iter)
        g = tf.Graph()
        with g.as_default():
            sess = tf.InteractiveSession()
            with sess.as_default():
                tf.set_random_seed(FLAGS.seed)
                # MODEL
                w = Normal(loc=tf.zeros(D), scale=1.0 * tf.ones(D))

                X = tf.placeholder(tf.float32, [N, D])
                y = Bernoulli(logits=ed.dot(X, w))

                X_test = tf.placeholder(tf.float32, [N_test, D_test])
                y_test = Bernoulli(logits=ed.dot(X_test, w))

                qw = construct_base_dist([D], iter, 'qw')
                inference_time_start = time.time()
                inference = relbo.KLqp({w: qw},
                                       fw_iterates=get_fw_iterates(
                                           weights, w, q_components),
                                       data={
                                           X: Xtrain,
                                           y: ytrain
                                       },
                                       fw_iter=iter)
                tf.global_variables_initializer().run()
                inference.run(n_iter=FLAGS.LMO_iter)
                inference_time_end = time.time()
                total_time += float(inference_time_end - inference_time_start)

                joint = Joint(Xtrain, ytrain, sess)
                if iter > 0:
                    qtw_prev = build_mixture(weights, q_components)
                    gap = compute_duality_gap(joint, qtw_prev, qw)
                    gaps.append(gap)
                    np.savetxt(os.path.join(outdir, "gaps.csv"),
                               gaps,
                               delimiter=',')
                    print("duality gap", gap)

                # update weights
                gamma = 2. / (iter + 2.)
                weights = [(1. - gamma) * w for w in weights]
                weights.append(gamma)

                # update components
                q_components = update_iterate(q_components, qw)

                if len(q_components) > 1 and FLAGS.fw_variant == 'fc':
                    print("running fully corrective")
                    # overwrite the weights
                    weights = fully_corrective(
                        build_mixture(weights, q_components), joint)

                    if True:
                        # remove inactivate iterates
                        weights = list(weights)
                        for i in reversed(range(len(weights))):
                            if weights[i] == 0:
                                del weights[i]
                                del q_components[i]
                        weights = np.array(
                            weights
                        )  # TODO type acrobatics to make elements deletable
                elif len(q_components
                         ) > 1 and FLAGS.fw_variant == 'line_search':
                    print("running line search")
                    weights = line_search(
                        build_mixture(weights[:-1], q_components[:-1]), qw,
                        joint)

                qtw_new = build_mixture(weights, q_components)

                if False:
                    for i, comp in enumerate(qtw_new.components):
                        print("component", i, "\tmean",
                              comp.mean().eval(), "\tstddev",
                              comp.stddev().eval())

                train_lls = [
                    sess.run(y.log_prob(ytrain),
                             feed_dict={
                                 X: Xtrain,
                                 w: qtw_new.sample().eval()
                             }) for _ in range(50)
                ]
                train_lls = np.mean(train_lls, axis=0)
                ll_trains.append((np.mean(train_lls), np.std(train_lls)))

                test_lls = [
                    sess.run(y_test.log_prob(ytest),
                             feed_dict={
                                 X_test: Xtest,
                                 w: qtw_new.sample().eval()
                             }) for _ in range(50)
                ]
                test_lls = np.mean(test_lls, axis=0)
                ll_tests.append((np.mean(test_lls), np.std(test_lls)))

                logits = np.mean([
                    np.dot(Xtest,
                           qtw_new.sample().eval()) for _ in range(50)
                ],
                                 axis=0)
                ypred = tf.sigmoid(logits).eval()
                roc_score = roc_auc_score(ytest, ypred)
                rocs.append(roc_score)

                print('roc_score', roc_score)
                print('ytrain', np.mean(train_lls), np.std(train_lls))
                print('ytest', np.mean(test_lls), np.std(test_lls))

                order = np.argsort(ytest)
                plt.scatter(range(len(ypred)), ypred[order], c=ytest[order])
                plt.savefig(os.path.join(outdir, 'ypred%d.pdf' % iter))
                plt.close()

                np.savetxt(os.path.join(outdir, "train_lls.csv"),
                           ll_trains,
                           delimiter=',')
                np.savetxt(os.path.join(outdir, "test_lls.csv"),
                           ll_tests,
                           delimiter=',')
                np.savetxt(os.path.join(outdir, "rocs.csv"),
                           rocs,
                           delimiter=',')

                x_post = ed.copy(y, {w: qtw_new})
                x_post_t = ed.copy(y_test, {w: qtw_new})

                print(
                    'log lik train',
                    ed.evaluate('log_likelihood',
                                data={
                                    x_post: ytrain,
                                    X: Xtrain
                                }))
                print(
                    'log lik test',
                    ed.evaluate('log_likelihood',
                                data={
                                    x_post_t: ytest,
                                    X_test: Xtest
                                }))

                #ll_train = ed.evaluate('log_likelihood', data={x_post: ytrain, X:Xtrain})
                #ll_test = ed.evaluate('log_likelihood', data={x_post_t: ytest, X_test:Xtest})
                bin_ac_train = ed.evaluate('binary_accuracy',
                                           data={
                                               x_post: ytrain,
                                               X: Xtrain
                                           })
                bin_ac_test = ed.evaluate('binary_accuracy',
                                          data={
                                              x_post_t: ytest,
                                              X_test: Xtest
                                          })
                print('binary accuracy train', bin_ac_train)
                print('binary accuracy test', bin_ac_test)
                #latest_elbo = elbo(qtw_new, joint, w)

                #foo = ed.KLqp({w: qtw_new}, data={X: Xtrain, y: ytrain})
                #op = myloss(foo)
                #print("myloss", sess.run(op[0], feed_dict={X: Xtrain, y: ytrain}), sess.run(op[1], feed_dict={X: Xtrain, y: ytrain}))

                #append_and_save(ll_trains, ll_train, "loglik_train.csv", np.savetxt)
                #append_and_save(ll_tests, ll_train, "loglik_test.csv", np.savetxt) #append_and_save(bin_ac_trains, bin_ac_train, "bin_acc_train.csv", np.savetxt) #append_and_save(bin_ac_tests, bin_ac_test, "bin_acc_test.csv", np.savetxt)
                ##append_and_save(elbos, latest_elbo, "elbo.csv", np.savetxt)

                #print('log-likelihood train ', ll_train)
                #print('log-likelihood test ', ll_test)
                #print('binary_accuracy train ', bin_ac_train)
                #print('binary_accuracy test ', bin_ac_test)
                #print('elbo', latest_elbo)
                times.append(total_time)
                np.savetxt(os.path.join(setup_outdir(), 'times.csv'), times)

        tf.reset_default_graph()
 def _test(self, probs, n):
     rv = Bernoulli(probs)
     dist = ds.Bernoulli(probs)
     x = rv.sample(n).eval()
     self.assertAllEqual(rv.log_prob(x).eval(), dist.log_prob(x).eval())
示例#10
0
 def _test(self, probs, n):
   rv = Bernoulli(probs)
   dist = ds.Bernoulli(probs)
   x = rv.sample(n).eval()
   self.assertAllEqual(rv.log_prob(x).eval(), dist.log_prob(x).eval())
示例#11
0
class bern_emb_model():
    def __init__(self, d, K, sig, sess, logdir):
        self.K = K
        self.sig = sig
        self.sess = sess
        self.logdir = logdir

        with tf.name_scope('model'):
            # Data Placeholder
            with tf.name_scope('input'):
                self.placeholders = tf.placeholder(tf.int32)
                self.words = self.placeholders

            # Index Masks
            with tf.name_scope('context_mask'):
                self.p_mask = tf.cast(
                    tf.range(d.cs / 2, d.n_minibatch + d.cs / 2), tf.int32)
                rows = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, d.cs / 2), [0]),
                            [d.n_minibatch, 1]), tf.int32)
                columns = tf.cast(
                    tf.tile(tf.expand_dims(tf.range(0, d.n_minibatch), [1]),
                            [1, d.cs / 2]), tf.int32)
                self.ctx_mask = tf.concat(
                    [rows + columns, rows + columns + d.cs / 2 + 1], 1)

            with tf.name_scope('embeddings'):
                # Embedding vectors
                self.rho = tf.Variable(tf.random_normal([d.L, self.K]) /
                                       self.K,
                                       name='rho')

                # Context vectors
                self.alpha = tf.Variable(tf.random_normal([d.L, self.K]) /
                                         self.K,
                                         name='alpha')

                with tf.name_scope('priors'):
                    prior = Normal(loc=0.0, scale=self.sig)
                    self.log_prior = tf.reduce_sum(
                        prior.log_prob(self.rho) + prior.log_prob(self.alpha))

            with tf.name_scope('natural_param'):
                # Taget and Context Indices
                with tf.name_scope('target_word'):
                    self.p_idx = tf.gather(self.words, self.p_mask)
                    self.p_rho = tf.squeeze(tf.gather(self.rho, self.p_idx))

                # Negative samples
                with tf.name_scope('negative_samples'):
                    unigram_logits = tf.tile(
                        tf.expand_dims(tf.log(tf.constant(d.unigram)), [0]),
                        [d.n_minibatch, 1])
                    self.n_idx = tf.multinomial(unigram_logits, d.ns)
                    self.n_rho = tf.gather(self.rho, self.n_idx)

                with tf.name_scope('context'):
                    self.ctx_idx = tf.squeeze(
                        tf.gather(self.words, self.ctx_mask))
                    self.ctx_alphas = tf.gather(self.alpha, self.ctx_idx)

                # Natural parameter
                ctx_sum = tf.reduce_sum(self.ctx_alphas, [1])
                self.p_eta = tf.expand_dims(
                    tf.reduce_sum(tf.multiply(self.p_rho, ctx_sum), -1), 1)
                self.n_eta = tf.reduce_sum(
                    tf.multiply(
                        self.n_rho,
                        tf.tile(tf.expand_dims(ctx_sum, 1), [1, d.ns, 1])), -1)

            # Conditional likelihood
            self.y_pos = Bernoulli(logits=self.p_eta)
            self.y_neg = Bernoulli(logits=self.n_eta)

            self.ll_pos = tf.reduce_sum(self.y_pos.log_prob(1.0))
            self.ll_neg = tf.reduce_sum(self.y_neg.log_prob(0.0))

            self.log_likelihood = self.ll_pos + self.ll_neg

            scale = 1.0 * d.N / d.n_minibatch
            self.loss = -(scale * self.log_likelihood + self.log_prior)

            # Training
            optimizer = tf.train.AdamOptimizer()
            self.train = optimizer.minimize(self.loss)
            with self.sess.as_default():
                tf.global_variables_initializer().run()
            variable_summaries('rho', self.rho)
            variable_summaries('alpha', self.alpha)
            with tf.name_scope('objective'):
                tf.summary.scalar('loss', self.loss)
                tf.summary.scalar('priors', self.log_prior)
                tf.summary.scalar('ll_pos', self.ll_pos)
                tf.summary.scalar('ll_neg', self.ll_neg)
            self.summaries = tf.summary.merge_all()
            self.train_writer = tf.summary.FileWriter(self.logdir,
                                                      self.sess.graph)
            self.saver = tf.train.Saver()
            config = projector.ProjectorConfig()

            alpha = config.embeddings.add()
            alpha.tensor_name = 'model/embeddings/alpha'
            alpha.metadata_path = '../vocab.tsv'
            rho = config.embeddings.add()
            rho.tensor_name = 'model/embeddings/rho'
            rho.metadata_path = '../vocab.tsv'
            projector.visualize_embeddings(self.train_writer, config)

    def dump(self, fname):
        with self.sess.as_default():
            dat = {'rho': self.rho.eval(), 'alpha': self.alpha.eval()}
        pickle.dump(dat, open(fname, "a+"))

    def plot_params(self, dir_name, labels):
        plot_only = len(labels)

        with self.sess.as_default():
            tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
            low_dim_embs_alpha2 = tsne.fit_transform(
                self.alpha.eval()[:plot_only])
            plot_with_labels(low_dim_embs_alpha2[:plot_only],
                             labels[:plot_only], dir_name + '/alpha.eps')

            tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
            low_dim_embs_rho2 = tsne.fit_transform(self.rho.eval()[:plot_only])
            plot_with_labels(low_dim_embs_rho2[:plot_only], labels[:plot_only],
                             dir_name + '/rho.eps')