Exemplo n.º 1
0
        np.random.seed(1)
        tf.set_random_seed(1)

        x_ph_bin = tf.placeholder(tf.float32, [M, len(binfeats)],
                                  name='x_bin')  # binary inputs
        x_ph_cont = tf.placeholder(tf.float32, [M, len(contfeats)],
                                   name='x_cont')  # continuous inputs
        t_ph = tf.placeholder(tf.float32, [M, 1])
        y_ph = tf.placeholder(tf.float32, [M, 1])

        x_ph = tf.concat([x_ph_bin, x_ph_cont], 1)
        activation = tf.nn.elu

        # CEVAE model (decoder)
        # p(z)
        z = Normal(loc=tf.zeros([tf.shape(x_ph)[0], d]),
                   scale=tf.ones([tf.shape(x_ph)[0], d]))

        # p(x|z)
        hx = fc_net(z, (nh - 1) * [h], [],
                    'px_z_shared',
                    lamba=lamba,
                    activation=activation)
        logits = fc_net(hx, [h], [[len(binfeats), None]],
                        'px_z_bin'.format(i + 1),
                        lamba=lamba,
                        activation=activation)
        x1 = Bernoulli(logits=logits, dtype=tf.float32, name='bernoulli_px_z')

        mu, sigma = fc_net(
            hx, [h],
            [[len(contfeats), None], [len(contfeats), tf.nn.softplus]],
Exemplo n.º 2
0
def line_search_dkl(weights, locs, diags, mu_s, cov_s, x, k):
    def softmax(v):
        return np.log(1 + np.exp(v))

    N_samples = 10

    weights = [weights]

    qt_comps = [
        Normal(loc=tf.convert_to_tensor(locs[i]),
               scale=tf.convert_to_tensor(diags[i])) for i in range(len(locs))
    ]

    qt = Mixture(cat=Categorical(probs=tf.convert_to_tensor(weights)),
                 components=qt_comps,
                 sample_shape=N)

    qt = InfiniteMixtureScipy(stats.multivariate_normal)
    qt.weights = weights[0]
    qt.params = list(
        zip([[l] for l in locs], [[softmax(np.dot(d, d))] for d in diags]))

    sample_q = qt.sample_n(N_samples)

    s = stats.multivariate_normal([mu_s],
                                  np.dot(np.array([cov_s]), np.array([cov_s])))
    sample_s = s.rvs(N_samples)

    new_locs = copy.copy(locs)
    new_diags = copy.copy(diags)
    new_locs.append([mu_s])
    new_diags.append([cov_s])

    gamma = 2. / (k + 2.)
    n_steps = 10
    prog_bar = ed.util.Progbar(n_steps)
    for it in range(n_steps):
        print("line_search iter %d, %.5f" % (it, gamma))
        new_weights = copy.copy(weights)
        new_weights[0] = [(1. - gamma) * w for w in new_weights[0]]
        new_weights[0].append(gamma)

        q_next = InfiniteMixtureScipy(stats.multivariate_normal)
        q_next.weights = new_weights[0]
        q_next.params = list(
            zip([[l] for l in new_locs], [[np.dot(d, d)] for d in new_diags]))

        def px_qx_ratio_log_prob(v):
            Lambda = 1.
            ret = x.log_prob([v]).eval()[0] - q_next.log_prob(v)
            ret /= Lambda
            return ret

        rez_s = [
            px_qx_ratio_log_prob(sample_s[ss]) for ss in range(len(sample_s))
        ]

        rez_q = [
            px_qx_ratio_log_prob(sample_q[ss]) for ss in range(len(sample_q))
        ]

        gamma = gamma + 0.1 * (sum(rez_s) - sum(rez_q)) / (N_samples *
                                                           (it + 1.))

        if gamma >= 1 or gamma <= 0:
            gamma = max(min(gamma, 1.), 0.)
            break
    return gamma
Exemplo n.º 3
0
def construct_normal(dims, iter, name='', sample_shape=N):
    loc = tf.get_variable(name + "_loc%d" % iter, initializer=tf.random_normal(dims) + \
            np.random.normal())
    scale = tf.get_variable(name + "_scale%d" % iter,
                            initializer=tf.random_normal(dims))
    return Normal(loc=loc, scale=tf.nn.softplus(scale), sample_shape=N)
Exemplo n.º 4
0
def generative_adversarial_network_example():
    ed.set_seed(42)

    N = 40  # Number of data points.
    D = 1  # Number of features.

    X_train, y_train = build_toy_dataset(N)

    #--------------------
    # Model.
    with tf.name_scope('model'):
        W_0 = Normal(loc=tf.zeros([D, 10]), scale=tf.ones([D, 10]), name='W_0')
        W_1 = Normal(loc=tf.zeros([10, 10]),
                     scale=tf.ones([10, 10]),
                     name='W_1')
        W_2 = Normal(loc=tf.zeros([10, 1]), scale=tf.ones([10, 1]), name='W_2')
        b_0 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name='b_0')
        b_1 = Normal(loc=tf.zeros(10), scale=tf.ones(10), name='b_1')
        b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name='b_2')

        X = tf.placeholder(tf.float32, [N, D], name='X')
        y = Normal(loc=neural_network(X, W_0, W_1, W_2, b_0, b_1, b_2),
                   scale=0.1 * tf.ones(N),
                   name='y')

    #--------------------
    # Inference.
    with tf.variable_scope('posterior'):
        with tf.variable_scope('qW_0'):
            loc = tf.get_variable('loc', [D, 10])
            scale = tf.nn.softplus(tf.get_variable('scale', [D, 10]))
            qW_0 = Normal(loc=loc, scale=scale)
        with tf.variable_scope('qW_1'):
            loc = tf.get_variable('loc', [10, 10])
            scale = tf.nn.softplus(tf.get_variable('scale', [10, 10]))
            qW_1 = Normal(loc=loc, scale=scale)
        with tf.variable_scope('qW_2'):
            loc = tf.get_variable('loc', [10, 1])
            scale = tf.nn.softplus(tf.get_variable('scale', [10, 1]))
            qW_2 = Normal(loc=loc, scale=scale)
        with tf.variable_scope('qb_0'):
            loc = tf.get_variable('loc', [10])
            scale = tf.nn.softplus(tf.get_variable('scale', [10]))
            qb_0 = Normal(loc=loc, scale=scale)
        with tf.variable_scope('qb_1'):
            loc = tf.get_variable('loc', [10])
            scale = tf.nn.softplus(tf.get_variable('scale', [10]))
            qb_1 = Normal(loc=loc, scale=scale)
        with tf.variable_scope('qb_2'):
            loc = tf.get_variable('loc', [1])
            scale = tf.nn.softplus(tf.get_variable('scale', [1]))
            qb_2 = Normal(loc=loc, scale=scale)

    inference = ed.KLqp(
        {
            W_0: qW_0,
            b_0: qb_0,
            W_1: qW_1,
            b_1: qb_1,
            W_2: qW_2,
            b_2: qb_2
        },
        data={
            X: X_train,
            y: y_train
        })
    inference.run(logdir='log')
Exemplo n.º 5
0
import edward as ed
import tensorflow as tf

from edward.models import Variational, Normal
from edward.stats import multivariate_normal
from edward.util import get_dims


class NormalPosterior:
    """
    p(x, z) = p(z) = p(z | x) = Normal(z; mu, Sigma)
    """
    def __init__(self, mu, Sigma):
        self.mu = mu
        self.Sigma = Sigma
        self.num_vars = get_dims(mu)[0]

    def log_prob(self, xs, zs):
        return multivariate_normal.logpdf(zs, self.mu, self.Sigma)


ed.set_seed(42)
mu = tf.constant([1.0, 1.0])
Sigma = tf.constant([[1.0, 0.1], [0.1, 1.0]])
model = NormalPosterior(mu, Sigma)
variational = Variational()
variational.add(Normal(model.num_vars))

inference = ed.MFVI(model, variational)
inference.run(n_iter=10000)
from edward.models import Categorical, Normal
import edward as ed

# Use the TensorFlow method to download and/or load the data.
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# parameters
N = 256   # number of images in a minibatch.
D = 784   # number of features.
K = 10    # number of classes.


# Create a placeholder to hold the data (in minibatches) in a TensorFlow graph.
x = tf.placeholder(tf.float32, [None, D])
# Normal(0,1) priors for the variables. Note that the syntax assumes TensorFlow 1.1.
w = Normal(loc=tf.zeros([D, K]), scale=tf.ones([D, K]))
b = Normal(loc=tf.zeros(K), scale=tf.ones(K))
# Categorical likelihood for classication.
y = Categorical(tf.matmul(x,w)+b)

# Contruct the q(w) and q(b). in this case we assume Normal distributions.
qw = Normal(loc=tf.Variable(tf.random_normal([D, K])),
              scale=tf.nn.softplus(tf.Variable(tf.random_normal([D, K]))))
qb = Normal(loc=tf.Variable(tf.random_normal([K])),
              scale=tf.nn.softplus(tf.Variable(tf.random_normal([K]))))

# We use a placeholder for the labels in anticipation of the traning data.
y_ph = tf.placeholder(tf.int32, [N])
# Define the VI inference technique, ie. minimise the KL divergence between q and p.
inference = ed.KLqp({w: qw, b: qb}, data={y:y_ph})
Exemplo n.º 7
0
def define_variational_distribution(N):
    qf = Normal(loc=tf.Variable(tf.random_normal([N])),
                scale=tf.nn.softplus(tf.Variable(tf.random_normal([N]))))
    return qf
Exemplo n.º 8
0
def main(_):
    # Generate data
    true_mu = np.array([-1.0, 0.0, 1.0], np.float32) * 10
    true_sigmasq = np.array([1.0**2, 2.0**2, 3.0**2], np.float32)
    true_pi = np.array([0.2, 0.3, 0.5], np.float32)
    N = 10000
    K = len(true_mu)
    true_z = np.random.choice(np.arange(K), size=N, p=true_pi)
    x_data = true_mu[true_z] + np.random.randn(N) * np.sqrt(
        true_sigmasq[true_z])

    # Prior hyperparameters
    pi_alpha = np.ones(K, dtype=np.float32)
    mu_sigma = np.std(true_mu)
    sigmasq_alpha = 1.0
    sigmasq_beta = 2.0

    # Model
    pi = Dirichlet(pi_alpha)
    mu = Normal(0.0, mu_sigma, sample_shape=K)
    sigmasq = InverseGamma(sigmasq_alpha, sigmasq_beta, sample_shape=K)
    x = ParamMixture(pi, {
        'loc': mu,
        'scale': tf.sqrt(sigmasq)
    },
                     Normal,
                     sample_shape=N)
    z = x.cat

    # Conditionals
    mu_cond = ed.complete_conditional(mu)
    sigmasq_cond = ed.complete_conditional(sigmasq)
    pi_cond = ed.complete_conditional(pi)
    z_cond = ed.complete_conditional(z)

    sess = ed.get_session()

    # Initialize randomly
    pi_est, mu_est, sigmasq_est, z_est = sess.run([pi, mu, sigmasq, z])

    print('Initial parameters:')
    print('pi:', pi_est)
    print('mu:', mu_est)
    print('sigmasq:', sigmasq_est)
    print()

    # Gibbs sampler
    cond_dict = {
        pi: pi_est,
        mu: mu_est,
        sigmasq: sigmasq_est,
        z: z_est,
        x: x_data
    }
    t0 = time()
    T = 500
    for t in range(T):
        z_est = sess.run(z_cond, cond_dict)
        cond_dict[z] = z_est
        pi_est, mu_est = sess.run([pi_cond, mu_cond], cond_dict)
        cond_dict[pi] = pi_est
        cond_dict[mu] = mu_est
        sigmasq_est = sess.run(sigmasq_cond, cond_dict)
        cond_dict[sigmasq] = sigmasq_est
    print('took %.3f seconds to run %d iterations' % (time() - t0, T))

    print()
    print('Final sample for parameters::')
    print('pi:', pi_est)
    print('mu:', mu_est)
    print('sigmasq:', sigmasq_est)
    print()

    print()
    print('True parameters:')
    print('pi:', true_pi)
    print('mu:', true_mu)
    print('sigmasq:', true_sigmasq)
    print()

    plt.figure(figsize=[10, 10])
    plt.subplot(2, 1, 1)
    plt.hist(x_data, 50)
    plt.title('Empirical Distribution of $x$')
    plt.xlabel('$x$')
    plt.ylabel('frequency')
    xl = plt.xlim()
    plt.subplot(2, 1, 2)
    plt.hist(sess.run(x, {pi: pi_est, mu: mu_est, sigmasq: sigmasq_est}), 50)
    plt.title("Predictive distribution $p(x \mid \mathrm{inferred }\ "
              "\pi, \mu, \sigma^2)$")
    plt.xlabel('$x$')
    plt.ylabel('frequency')
    plt.xlim(xl)
    plt.show()
Exemplo n.º 9
0
import tensorflow as tf

from edward.models import Normal
from edward.stats import multivariate_normal
from edward.util import get_dims


class NormalPosterior:
  """p(x, z) = p(z) = p(z | x) = Normal(z; mu, sigma)"""
  def __init__(self, mu, sigma):
    self.mu = mu
    self.sigma = sigma
    self.n_vars = get_dims(mu)[0]

  def log_prob(self, xs, zs):
    return multivariate_normal.logpdf(zs['z'], self.mu, self.sigma)


ed.set_seed(42)
mu = tf.constant([1.0, 1.0])
sigma = tf.constant([[1.0, 0.1],
                     [0.1, 1.0]])
model = NormalPosterior(mu, sigma)

qz_mu = tf.Variable(tf.random_normal([model.n_vars]))
qz_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([model.n_vars])))
qz = Normal(mu=qz_mu, sigma=qz_sigma)

inference = ed.MFVI({'z': qz}, model_wrapper=model)
inference.run(n_iter=300)
  y = y.astype(np.float32)
  return X, y


ed.set_seed(42)

N = 40  # number of data points
D = 1  # number of features

# DATA
X_train, y_train = build_toy_dataset(N)
X_test, y_test = build_toy_dataset(N)

# MODEL
X = tf.placeholder(tf.float32, [N, D])
w = Normal(mu=tf.zeros(D), sigma=tf.ones(D))
b = Normal(mu=tf.zeros(1), sigma=tf.ones(1))
y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N))

# INFERENCE
T = 5000                        # Number of samples.
nburn = 100                     # Number of burn-in samples.
stride = 10                    # Frequency with which to plot samples.
qw = Empirical(params=tf.Variable(tf.random_normal([T, D])))
qb = Empirical(params=tf.Variable(tf.random_normal([T, 1])))

inference = ed.SGHMC({w: qw, b: qb}, data={X: X_train, y: y_train})
inference.run(step_size=1e-3)


# CRITICISM
Exemplo n.º 11
0
 def make_bayes_net(self, topk=np.inf, filename='', visualize_graph=False):
     N, D = self.design_matrix.shape
     num_discrete_variables = self.num_discrete_variables
     discrete_variable_idxs = tuple(np.arange(num_discrete_variables))
     discrete_variable_outs = [
         dict(zip(*np.unique(self.design_matrix[:,
                                                idx], return_index=True)))
         for idx in discrete_variable_idxs
     ]
     discrete_variable_outs_size = [
         len(out) for out in discrete_variable_outs
     ]
     discrete_variable_prior_pi = [
         tf.convert_to_tensor(
             Dirichlet(concentration=tf.ones(
                 [discrete_variable_outs_size[idx]]),
                       name='Dirichlet_d_pi_' + str(idx)))
         for idx in range(num_discrete_variables)
     ]
     discrete_variable_vars = [
         tf.convert_to_tensor(
             Categorical(logits=tf.tile(
                 tf.expand_dims(discrete_variable_prior_pi[idx], axis=0),
                 [N, discrete_variable_outs_size[idx]]),
                         name='Categorical_d_' + str(idx)))
         for idx in range(num_discrete_variables)
     ]
     continus_variable_prior_w = dict()
     continus_variable_prior_b = dict()
     continus_variable_prior_sigma = dict()
     continus_variable_vars = dict()
     tmp_idx = 1
     for idx in self.continus_variable_idxs:
         if tmp_idx % 100 == 0: print(tmp_idx)
         tmp_idx += 1
         discrete_pars = np.where(self.network[discrete_variable_idxs,
                                               idx])[0]
         discrete_par_size = [
             discrete_variable_outs_size[par] for par in discrete_pars
         ]
         if len(discrete_par_size) == 0:
             discrete_par_vars = tf.zeros([N, 0])
         elif len(discrete_par_size) == 1:
             discrete_par_vars = tf.expand_dims(
                 discrete_variable_vars[discrete_pars[0]], axis=1)
         else:
             discrete_par_vars = tf.stack(
                 [discrete_variable_vars[par] for par in discrete_pars],
                 axis=1)
         continus_pars = self.continus_variable_idxs[np.where(
             self.network[self.continus_variable_idxs, idx])[0]]
         if topk < len(continus_pars):
             continus_pars = list(
                 map(
                     lambda x: x[0],
                     sorted(zip(continus_pars,
                                self.network[continus_pars, :].sum(1)),
                            key=lambda x: x[1],
                            reverse=True)[:topk]))
         continus_par_size = len(continus_pars)
         if continus_par_size == 0: continus_par_vars = tf.zeros([N, 0])
         elif continus_par_size == 1:
             continus_par_vars = tf.expand_dims(
                 continus_variable_vars[continus_pars[0]], axis=1)
         else:
             continus_par_vars = tf.stack(
                 [continus_variable_vars[par] for par in continus_pars],
                 axis=1)
         continus_variable_prior_w[idx] = tf.convert_to_tensor(
             Normal(loc=tf.zeros(discrete_par_size + [continus_par_size]),
                    scale=tf.ones(discrete_par_size + [continus_par_size]),
                    name='Normal_c_w_' + str(idx)))
         continus_variable_prior_b[idx] = tf.convert_to_tensor(
             Normal(loc=tf.zeros(discrete_par_size),
                    scale=tf.ones(discrete_par_size),
                    name='Normal_c_b_' + str(idx)))
         continus_variable_prior_sigma[idx] = tf.convert_to_tensor(
             Normal(loc=tf.zeros([1]),
                    scale=tf.ones([1]),
                    name='Normal_c_sigma_' + str(idx)))
         continus_variable_vars[idx] = tf.convert_to_tensor(Normal(loc=tf.add_n([tf.reduce_sum(tf.multiply(continus_par_vars, tf.gather_nd(continus_variable_prior_w[idx], discrete_par_vars)), axis=1), tf.gather_nd(continus_variable_prior_b[idx], discrete_par_vars)]), \
                                             scale=continus_variable_prior_sigma[idx], name='Normal_c_'+str(idx)))
     for i in range(num_discrete_variables):
         tf.add_to_collection('d_pi', discrete_variable_prior_pi[i])
         tf.add_to_collection('d', discrete_variable_vars[i])
     for i in self.continus_variable_idxs:
         tf.add_to_collection('c_w', continus_variable_prior_w[i])
         tf.add_to_collection('c_b', continus_variable_prior_b[i])
         tf.add_to_collection('c_sigma', continus_variable_prior_sigma[i])
         tf.add_to_collection('c', continus_variable_vars[i])
     filename = '_'.join(
         [self.data_filename, self.name, filename, 'bayes_net.meta'])
     tf.train.export_meta_graph(
         filename,
         as_text=True,
         collection_list=['d_pi', 'd', 'c_w', 'c_b', 'c_sigma', 'c'])
     if visualize_graph:
         #for tensorboard; run >>> tensorboard ==logdir=.
         sess = tf.Session()
         tf.summary.FileWriter(filename + '_tensorboard', sess.graph)
Exemplo n.º 12
0
	os.makedirs(out_model)


# DATA. MNIST batches are fed at training time.

#(x_train, _), (x_test, _) = mnist(data_dir)
x_data=np.load("animeface-character-dataset/anime.npy")
x_train=x_data

x_train_generator = generator(x_train, M)
x_ph = tf.placeholder(tf.float32, [M, 128*128*nch])

# MODEL
with tf.variable_scope("Gen"):
	#eps = Uniform(low=tf.zeros([M, D]) - 1.0, high=tf.ones([M, D]))
	eps = Normal(loc=tf.zeros([M, D]), scale=tf.ones([M, D]))
	x = generative_network(eps)

# INFERENCE
#optimizer = tf.train.RMSPropOptimizer(learning_rate=5e-5)
#optimizer_d = tf.train.RMSPropOptimizer(learning_rate=5e-6)
optimizer = tf.train.AdamOptimizer(learning_rate=5e-5)
optimizer_d = tf.train.AdamOptimizer(learning_rate=5e-5)

inference = ed.GANInference(
		data={x: x_ph}, discriminator=discriminative_network)
inference.initialize(
		optimizer=optimizer, optimizer_d=optimizer_d)
		#n_iter=15000, n_print=1000)
#		n_iter=15000, n_print=1000, clip=0.01, penalty=None)
Exemplo n.º 13
0
def main():
    try:
        if not ('.csv' in args.input): raise Exception('input_format')
        if not ('.pkl' in args.output): raise Exception('output_format')

        with open(args.input, 'rb') as input:

            # DATA
            reader = csv.reader(input, delimiter=';')
            reader.next()
            n = 0
            xn = []
            for track in reader:
                print('Track {}'.format(n))
                track = format_track(track[0])
                xn.append(track)
                n += 1
            xn = np.asarray(xn)  # N x D
            xn = xn.T  # D x N

            D = len(xn)
            N = len(xn[0])

            # MODEL
            ds = tf.contrib.distributions
            sigma = ed.models.Gamma(1.0, 1.0)

            alpha = ed.models.Gamma(tf.ones([K]), tf.ones([K]))
            w = Normal(mu=tf.zeros([D, K]),
                       sigma=tf.reshape(tf.tile(alpha, [D]), [D, K]))
            z = Normal(mu=tf.zeros([K, N]), sigma=tf.ones([K, N]))
            mu = Normal(mu=tf.zeros([D]), sigma=tf.ones([D]))
            x = Normal(mu=tf.matmul(w, z) +
                       tf.transpose(tf.reshape(tf.tile(mu, [N]), [N, D])),
                       sigma=sigma * tf.ones([D, N]))

            # INFERENCE
            qalpha = ed.models.TransformedDistribution(
                distribution=ed.models.NormalWithSoftplusSigma(
                    mu=tf.Variable(tf.random_normal([K])),
                    sigma=tf.Variable(tf.random_normal([K]))),
                bijector=ds.bijector.Exp(),
                name='qalpha')

            qw = Normal(mu=tf.Variable(tf.random_normal([D, K])),
                        sigma=tf.nn.softplus(
                            tf.Variable(tf.random_normal([D, K]))))
            qz = Normal(mu=tf.Variable(tf.random_normal([K, N])),
                        sigma=tf.nn.softplus(
                            tf.Variable(tf.random_normal([K, N]))))

            data_mean = np.mean(xn, axis=1).astype(np.float32, copy=False)

            qmu = Normal(mu=tf.Variable(data_mean + tf.random_normal([D])),
                         sigma=tf.nn.softplus(
                             tf.Variable(tf.random_normal([D]))))

            qsigma = ed.models.TransformedDistribution(
                distribution=ed.models.NormalWithSoftplusSigma(
                    mu=tf.Variable(0.0), sigma=tf.Variable(1.0)),
                bijector=ds.bijector.Exp(),
                name='qsigma')

            inference = ed.KLqp(
                {
                    alpha: qalpha,
                    w: qw,
                    z: qz,
                    mu: qmu,
                    sigma: qsigma
                },
                data={x: xn})
            inference.run(n_iter=N_ITERS, n_samples=N_SAMPLES)

            alphas = tf.exp(qalpha.distribution.mean()).eval()
            alphas.sort()
            # mean_alphas = np.mean(alphas)
            print('Alphas: {}'.format(alphas))

            points = qz.eval()
            xn_new = []
            for i in range(len(alphas)):
                # if alphas[i] > (mean_alphas * 1.2):
                xn_new.append(points[i])
            xn_new = np.asarray(xn_new).T

            # Normalization
            maxs = np.max(xn_new, axis=0)
            mins = np.min(xn_new, axis=0)
            rng = maxs - mins
            high = 100.0
            low = 0.0
            xn_new = high - (((high - low) * (maxs - xn_new)) / rng)

            print('New points: {}'.format(xn_new))
            print('Number of points: {}'.format(len(xn_new)))
            print('Point dimensions: {}'.format(len(xn_new[0])))

            with open(args.output, 'w') as output:
                pkl.dump({'xn': np.array(xn_new)}, output)

    except IOError:
        print('File not found!')
    except Exception as e:
        if e.args[0] == 'input_format': print('Input must be a CSV file')
        elif e.args[0] == 'output_format': print('Output must be a PKL file')
        else:
            print('Unexpected error: {}'.format(sys.exc_info()[0]))
            raise
Exemplo n.º 14
0
    def test_model_wrapper(self):
        tf.InteractiveSession()
        model = NormalNormal()
        qmu = Normal(mu=tf.Variable(0.0), sigma=tf.constant(1.0))

        ed.Inference({'mu': qmu}, model_wrapper=model)
ed.set_seed(42)

N = 40  # number of data points
D = 1  # number of features

x_train, y_train = build_toy_dataset(N)

model = LinearModel()

qw_mu = tf.Variable(tf.random_normal([D]))
qw_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([D])))
qb_mu = tf.Variable(tf.random_normal([]))
qb_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([])))

qw = Normal(mu=qw_mu, sigma=qw_sigma)
qb = Normal(mu=qb_mu, sigma=qb_sigma)

# Set up figure
fig = plt.figure(figsize=(8, 8), facecolor='white')
ax = fig.add_subplot(111, frameon=False)
plt.ion()
plt.show(block=False)

sess = ed.get_session()
data = {'x': x_train, 'y': y_train}
inference = ed.KLqp({'w': qw, 'b': qb}, data, model)
inference.initialize(n_samples=5, n_iter=250, n_print=5)

init = tf.initialize_all_variables()
init.run()
Exemplo n.º 16
0
period_pre = tf.Variable(np.log(np.exp(7.0 * len_init) - 1), dtype=tf.float32)
period_len_pre = tf.Variable(1.0)
period_var_pre = tf.Variable(np.log(np.exp(0.5) - 1), dtype=tf.float32)  #

period = tf.nn.softplus(period_pre)
period_length = tf.nn.softplus(period_len_pre)

Kuu = kernelfx(xu, xu)

fu_loc = tf.zeros((p, m))
fu_scale = tf.cast(tf.cholesky(Kuu + offset * tf.eye(m, dtype=tf.float64),
                               name='fu_scale'),
                   dtype=tf.float32)

u = MultivariateNormalTriL(loc=fu_loc, scale_tril=fu_scale, name='pu')
x_var = Normal(loc=tf.zeros((M, Q)), scale=1.0, name='x_var')

idx_ph = tf.placeholder(tf.int32, M)

z = tf.constant(z_init, dtype=tf.float32)
x = tf.concat([x_var, tf.gather(z, idx_ph)], 1, name='x')

print(x.shape)

Kfu = kernelfx(x, xu)

Kff = kernelfx(x, x)

Kuuinv = tf.matrix_inverse(Kuu + offset * tf.eye(m, dtype=tf.float64))
KfuKuuinv = tf.matmul(Kfu, Kuuinv)
KffKuuinvU = [
Exemplo n.º 17
0
# Build graph for prior distributions
#
if str(sys.argv[5]) == 'laplace':
    W_0 = Laplace(loc=tf.zeros([D, n_hidden]),
                  scale=std**2 / D * tf.ones([D, n_hidden]))
    W_1 = Laplace(loc=tf.zeros([n_hidden, n_hidden]),
                  scale=std**2 / n_hidden * tf.ones([n_hidden, n_hidden]))
    W_2 = Laplace(loc=tf.zeros([n_hidden, K]),
                  scale=std**2 / n_hidden * tf.ones([n_hidden, K]))
    b_0 = Laplace(loc=tf.zeros(n_hidden), scale=std**2 / D * tf.ones(n_hidden))
    b_1 = Laplace(loc=tf.zeros(n_hidden),
                  scale=std**2 / n_hidden * tf.ones(n_hidden))
    b_2 = Laplace(loc=tf.zeros(K), scale=std**2 / n_hidden * tf.ones(K))

if str(sys.argv[5]) == 'normal':
    W_0 = Normal(loc=tf.zeros([D, n_hidden]),
                 scale=std * D**(-.5) * tf.ones([D, n_hidden]))
    W_1 = Normal(loc=tf.zeros([n_hidden, n_hidden]),
                 scale=std * n_hidden**(-.5) * tf.ones([n_hidden, n_hidden]))
    W_2 = Normal(loc=tf.zeros([n_hidden, K]),
                 scale=std * n_hidden**(-.5) * tf.ones([n_hidden, K]))
    b_0 = Normal(loc=tf.zeros(n_hidden),
                 scale=std * D**(-.5) * tf.ones(n_hidden))
    b_1 = Normal(loc=tf.zeros(n_hidden),
                 scale=std * n_hidden**(-.5) * tf.ones(n_hidden))
    b_2 = Normal(loc=tf.zeros(K), scale=std * n_hidden**(-.5) * tf.ones(K))

if str(sys.argv[5]) == 'T':
    W_0 = StudentT(df=df * tf.ones([D, n_hidden]),
                   loc=tf.zeros([D, n_hidden]),
                   scale=std**2 / D * tf.ones([D, n_hidden]))
    W_1 = StudentT(df=df * tf.ones([n_hidden, n_hidden]),
from tensorflow.examples.tutorials.mnist import input_data
from edward.models import Categorical, Normal
import edward as ed

# Use the TensorFlow method to download and/or load the data.
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# parameters
N = 128  # number of images in a minibatch.
D = 784  # number of features.
K = 10  # number of classes.

# Create a placeholder to hold the data (in minibatches) in a TensorFlow graph.
x = tf.placeholder(tf.float32, [None, D])
# Normal(0,1) priors for the variables. Note that the syntax assumes TensorFlow 1.1.
w1 = Normal(loc=tf.zeros([D, 256]), scale=tf.ones([D, 256]))
b1 = Normal(loc=tf.zeros(256), scale=tf.ones(256))
l1 = tf.nn.leaky_relu(tf.matmul(x, w1) + b1)

w2 = Normal(loc=tf.zeros([256, 256]), scale=tf.ones([256, 256]))
b2 = Normal(loc=tf.zeros(256), scale=tf.ones(256))
l2 = tf.nn.leaky_relu(tf.matmul(l1, w2) + b2)

w3 = Normal(loc=tf.zeros([256, K]), scale=tf.ones([256, K]))
b3 = Normal(loc=tf.zeros(K), scale=tf.ones(K))

# Categorical likelihood for classication.
y = Categorical(tf.matmul(l2, w3) + b3)

# Contruct the q(w) and q(b). in this case we assume Normal distributions.
qw1 = Normal(loc=tf.Variable(tf.random_normal([D, 256])),
Exemplo n.º 19
0
ed.set_seed(42)

n_students = 50000
n_questions = 2000
n_obs = 200000

# DATA
data, true_s_etas, true_q_etas = build_toy_dataset(n_students, n_questions,
                                                   n_obs)
obs = data['outcomes'].values
student_ids = data['student_id'].values.astype(int)
question_ids = data['question_id'].values.astype(int)

# MODEL
lnvar_students = Normal(loc=tf.zeros(1), scale=tf.ones(1))
lnvar_questions = Normal(loc=tf.zeros(1), scale=tf.ones(1))

sigma_students = tf.sqrt(tf.exp(lnvar_students))
sigma_questions = tf.sqrt(tf.exp(lnvar_questions))

overall_mu = Normal(loc=tf.zeros(1), scale=tf.ones(1))

student_etas = Normal(loc=tf.zeros(n_students),
                      scale=sigma_students * tf.ones(n_students))
question_etas = Normal(loc=tf.zeros(n_questions),
                       scale=sigma_questions * tf.ones(n_questions))

observation_logodds = tf.gather(student_etas, student_ids) + \
    tf.gather(question_etas, question_ids) + \
    overall_mu
Exemplo n.º 20
0
    # M: nb_datapoints
    # N: nb_components
    hpos = tf.reshape(hpos, (1, 2))
    euclidean_distance = tf.square(
        tf.subtract(
            gpos,  # shape=(M, 2)
            tf.expand_dims(hpos, axis=1)  # shape=(N, 1, 2)
        )  # shape=(N, M, 2)
    )
    distance_factor = tf.divide(1., euclidean_distance)  # shape=(N, M, 2)
    mean = tf.reduce_sum(distance_factor, axis=(0, ))  # shape=(M, 2)
    return mean


# (x, y) ~ Normal([0.5, 0.5], [0.5, 0.5])
galaxies_pos = Normal(mu=tf.fill([nb_datapoints, nb_features], 0.5),
                      sigma=tf.fill([nb_datapoints, nb_features], POS_STD))

# latent variable z
mu = Normal(mu=tf.fill([nb_components, nb_features], 0.5),
            sigma=tf.fill([nb_components, nb_features], POS_STD))
sigma = InverseGamma(alpha=tf.ones([nb_components, nb_features]),
                     beta=tf.ones([nb_components, nb_features]))
cat = Categorical(logits=tf.zeros([nb_datapoints, nb_components]))
components = [
    MultivariateNormalDiag(mu=calculte_mean_from_distance_factor(
        galaxies_pos, mu[k]),
                           diag_stdev=tf.ones([nb_datapoints, 1]) * sigma[k])
    for k in range(nb_components)
]
x = Mixture(cat=cat, components=components)
Exemplo n.º 21
0
def define_ard_variational_distribution(D):
    qgamma = Normal(loc=tf.Variable(tf.random_normal([D])),
                    scale=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))
    return qgamma
Exemplo n.º 22
0
def max_pool_2x2(x):
    return tf.nn.max_pool(x,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding="SAME")


x = tf.placeholder(tf.float32, shape=[N, 784], name="x_placeholder")
#y_ = tf.placeholder("float", shape = [None, 10])
y_ = tf.placeholder(tf.int32, [N], name="y_placeholder")

x_image = tf.reshape(x, [-1, 28, 28, 1])

with tf.name_scope("model"):
    W_conv1 = Normal(loc=tf.zeros([5, 5, 1, 32]),
                     scale=tf.ones([5, 5, 1, 32]),
                     name="W_conv1")
    b_conv1 = Normal(loc=tf.zeros([32]), scale=tf.ones([32]), name="b_conv1")
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    # h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1.value()) + b_conv1.value() )    may be necessary
    h_pool1 = max_pool_2x2(h_conv1)

    W_conv2 = Normal(loc=tf.zeros([5, 5, 32, 64]),
                     scale=tf.ones([5, 5, 32, 64]),
                     name="W_conv2")
    b_conv2 = Normal(loc=tf.zeros([64]), scale=tf.ones([64]), name="b_conv2")
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    W_fc1 = Normal(loc=tf.zeros([7 * 7 * 64, 64]),
                   scale=tf.ones([7 * 7 * 64, 64]),
Exemplo n.º 23
0

ed.set_seed(142)

N = 5000  # number of data points
M = 100  # minibatch size
D = 2  # data dimensionality
K = 1  # latent dimensionality

# DATA

x_train = build_toy_dataset(N, D, K)

# MODEL

w = Normal(mu=tf.zeros([D, K]), sigma=10.0 * tf.ones([D, K]))
z = Normal(mu=tf.zeros([M, K]), sigma=tf.ones([M, K]))
x = Normal(mu=tf.matmul(w, z, transpose_b=True), sigma=tf.ones([D, M]))

# INFERENCE

qw_variables = [
    tf.Variable(tf.random_normal([D, K])),
    tf.Variable(tf.random_normal([D, K]))
]
qw = Normal(mu=qw_variables[0], sigma=tf.nn.softplus(qw_variables[1]))

qz_variables = [
    tf.Variable(tf.random_normal([N, K])),
    tf.Variable(tf.random_normal([N, K]))
]
    def train(self, X_train, y_train, X_val, is_print=True):
        ''' set up BNN and run HMC inference '''
        def neural_network(X):
            # set up the BNN structure using tf

            if self.activation_fn == 'relu':
                h = tf.maximum(tf.matmul(X, W_0) + b_0, 0)  # relu
            elif self.activation_fn == 'Lrelu':
                a = 0.2
                h = tf.maximum(
                    tf.matmul(X, W_0) + b_0,
                    a * (tf.matmul(X, W_0) + b_0))  # leakly relu
            elif self.activation_fn == 'erf':
                h = tf.erf(tf.matmul(X, W_0) + b_0)
            elif self.activation_fn == 'tanh':
                h = tf.tanh(tf.matmul(X, W_0) + b_0)
                # h = tf.tanh(1.23*tf.matmul(X, W_0) + b_0) # add 1.23 for close to GP erf
            elif self.activation_fn == 'sigmoid':
                h = tf.sigmoid(tf.matmul(X, W_0) + b_0)
            elif self.activation_fn == 'softplus':
                self.c = 2.  # if this is bigger -> relu behaviour, but less 'soft'
                h = tf.divide(
                    tf.log(
                        tf.exp(tf.multiply(tf.matmul(X, W_0) + b_0, c)) + 1),
                    c)
            elif self.activation_fn == 'rbf':
                self.beta_2 = 1 / (2 * self.g_var)
                h = tf.exp(-self.beta_2 * tf.square(X - W_0))

            h = tf.matmul(h, W_1)  #+ b_1
            return tf.reshape(h, [-1])

        def neural_network_deep(X):
            # set up the BNN structure using tf

            if self.activation_fn == 'relu':
                h1 = tf.maximum(tf.matmul(X, W_0) + b_0, 0)  # relu
                h = tf.maximum(tf.matmul(h1, W_1) + b_1, 0)  # relu
            elif self.activation_fn == 'Lrelu':
                a = 0.2
                h1 = tf.maximum(
                    tf.matmul(X, W_0) + b_0,
                    a * (tf.matmul(X, W_0) + b_0))  # leakly relu
                h = tf.maximum(
                    tf.matmul(h1, W_1) + b_1,
                    a * (tf.matmul(h1, W_1) + b_1))  # leakly relu
            elif self.activation_fn == 'erf':
                h1 = tf.erf(tf.matmul(X, W_0) + b_0)
                h = tf.erf(tf.matmul(h1, W_1) + b_1)
            else:
                raise Exception('tp: activation not implemented')

            h = tf.matmul(h, W_2)  #+ b_2
            return tf.reshape(h, [-1])

        if self.activation_fn == 'relu' or self.activation_fn == 'softplus' or self.activation_fn == 'Lrelu':
            init_stddev_0_w = np.sqrt(self.w_0_var)  # /d_in
            init_stddev_0_b = np.sqrt(self.b_0_var)  # /d_in
            init_stddev_1_w = 1.0 / np.sqrt(
                self.hidden_size)  #*np.sqrt(10) # 2nd layer init. dist
        elif self.activation_fn == 'tanh' or self.activation_fn == 'erf':
            init_stddev_0_w = np.sqrt(
                self.w_0_var)  # 1st layer init. dist for weights
            init_stddev_0_b = np.sqrt(self.b_0_var)  # for bias
            init_stddev_1_w = 1.0 / np.sqrt(
                self.hidden_size)  # 2nd layer init. dist
        elif self.activation_fn == 'rbf':
            init_stddev_0_w = np.sqrt(self.u_var)  # centres = sig_u
            init_stddev_0_b = np.sqrt(self.g_var)  # fixed /beta
            init_stddev_1_w = 1.0 / np.sqrt(
                self.hidden_size)  # 2nd layer init. dist

        n = X_train.shape[0]
        X_dim = X_train.shape[1]
        y_dim = 1  #y_train.shape[1]

        with tf.name_scope("model"):
            W_0 = Normal(loc=tf.zeros([X_dim, self.hidden_size]),
                         scale=init_stddev_0_w *
                         tf.ones([X_dim, self.hidden_size]),
                         name="W_0")
            if self.deep_NN == False:
                W_1 = Normal(loc=tf.zeros([self.hidden_size, y_dim]),
                             scale=init_stddev_1_w *
                             tf.ones([self.hidden_size, y_dim]),
                             name="W_1")
                b_0 = Normal(loc=tf.zeros(self.hidden_size),
                             scale=init_stddev_0_b * tf.ones(self.hidden_size),
                             name="b_0")
                b_1 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_1")
            else:
                W_1 = Normal(
                    loc=tf.zeros([self.hidden_size, self.hidden_size]),
                    scale=init_stddev_1_w * tf.ones([self.hidden_size, y_dim]),
                    name="W_1")
                b_0 = Normal(loc=tf.zeros(self.hidden_size),
                             scale=init_stddev_0_b * tf.ones(self.hidden_size),
                             name="b_0")
                W_2 = Normal(loc=tf.zeros([self.hidden_size, y_dim]),
                             scale=init_stddev_1_w *
                             tf.ones([self.hidden_size, y_dim]),
                             name="W_2")
                b_1 = Normal(loc=tf.zeros(self.hidden_size),
                             scale=init_stddev_1_w * tf.ones(self.hidden_size),
                             name="b_1")
                b_2 = Normal(loc=tf.zeros(1), scale=tf.ones(1), name="b_2")

            X = tf.placeholder(tf.float32, [n, X_dim], name="X")
            if self.deep_NN == False:
                y = Normal(loc=neural_network(X),
                           scale=np.sqrt(self.data_noise) * tf.ones(n),
                           name="y")
            else:
                y = Normal(loc=neural_network_deep(X),
                           scale=np.sqrt(self.data_noise) * tf.ones(n),
                           name="y")
        # inference
        if self.deep_NN == False:
            qW_0 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, X_dim,
                                      self.hidden_size])))
            qW_1 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, self.hidden_size,
                                      y_dim])))
            qb_0 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, self.hidden_size])))
            qb_1 = Empirical(tf.Variable(tf.zeros([self.n_samples, y_dim])))
        else:
            qW_0 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, X_dim,
                                      self.hidden_size])))
            qW_1 = Empirical(
                tf.Variable(
                    tf.zeros(
                        [self.n_samples, self.hidden_size, self.hidden_size])))
            qW_2 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, self.hidden_size,
                                      y_dim])))
            qb_0 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, self.hidden_size])))
            qb_1 = Empirical(
                tf.Variable(tf.zeros([self.n_samples, self.hidden_size])))
            qb_2 = Empirical(tf.Variable(tf.zeros([self.n_samples, y_dim])))

        # get some priors
        ### !!! TODO, turn this into a proper function
        # X_pred = X_val.astype(np.float32).reshape((X_val.shape[0], 1))
        # self.y_priors = tf.stack([nn_predict(X_pred, W_0.sample(), W_1.sample(),b_0.sample(), b_1.sample())
        # 	for _ in range(10)])

        # Neal 2012
        # Too large a stepsize will result in a very low acceptance rate for states
        # proposed by simulating trajectories. Too small a stepsize will either waste
        # computation time, by the same factor as the stepsize is too small, or (worse)
        # will lead to slow exploration by a random walk,

        # https://stats.stackexchange.com/questions/304942/how-to-set-step-size-in-hamiltonian-monte-carlo
        # If ϵ is too large, then there will be large discretisation error and low acceptance, if ϵ
        # is too small then more expensive leapfrog steps will be required to move large distances.
        # Ideally we want the largest possible value of ϵ
        # that gives reasonable acceptance probability. Unfortunately this may vary for different values of the target variable.
        # A simple heuristic to set this may be to do a preliminary run with fixed L,
        # gradually increasing ϵ until the acceptance probability is at an appropriate level.

        # Setting the trajectory length by trial and error therefore seems necessary.
        # For a problem thought to be fairly difficult, a trajectory with L = 100 might be a
        # suitable starting point. If preliminary runs (with a suitable ε; see above) show that HMC
        # reaches a nearly independent point after only one iteration, a smaller value of L might be
        # tried next. (Unless these “preliminary” runs are actually sufficient, in which case there is
        # of course no need to do more runs.) If instead there is high autocorrelation in the run
        # with L = 100, runs with L = 1000 might be tried next
        # It may also be advisable to randomly sample ϵ
        # and L form suitable ranges to avoid the possibility of having paths that are close to periodic as this would slow mixing.

        if self.deep_NN == False:
            inference = ed.HMC({
                W_0: qW_0,
                b_0: qb_0,
                W_1: qW_1,
                b_1: qb_1
            },
                               data={
                                   X: X_train,
                                   y: y_train.ravel()
                               })
        else:
            inference = ed.HMC(
                {
                    W_0: qW_0,
                    b_0: qb_0,
                    W_1: qW_1,
                    b_1: qb_1,
                    W_2: qW_2,
                    b_2: qb_2
                },
                data={
                    X: X_train,
                    y: y_train.ravel()
                })
        inference.run(step_size=self.step_size,
                      n_steps=self.n_steps)  # logdir='log'

        # drop first chunk of burn in samples
        if self.deep_NN == False:
            self.qW_0_keep = qW_0.params[self.burn_in:].eval()
            self.qW_1_keep = qW_1.params[self.burn_in:].eval()
            self.qb_0_keep = qb_0.params[self.burn_in:].eval()
            self.qb_1_keep = qb_1.params[self.burn_in:].eval()
        else:
            self.qW_0_keep = qW_0.params[self.burn_in:].eval()
            self.qW_1_keep = qW_1.params[self.burn_in:].eval()
            self.qb_0_keep = qb_0.params[self.burn_in:].eval()
            self.qW_2_keep = qW_2.params[self.burn_in:].eval()
            self.qb_1_keep = qb_1.params[self.burn_in:].eval()
            self.qb_2_keep = qb_2.params[self.burn_in:].eval()

        return
Exemplo n.º 25
0
s_ph = tf.placeholder(tf.int32, [None])  #学生编号category
d_ph = tf.placeholder(tf.int32, [None])  #教师编号category
dept_ph = tf.placeholder(tf.int32, [None])  #部门编号category
service_ph = tf.placeholder(tf.float32, [None])  #二值项,固定特征

#固定特征参数项
mu = tf.Variable(tf.random_normal([]))  #Bf
service = tf.Variable(tf.random_normal([]))  #beta

#随机特征截距的参数
sigma_s = tf.sqrt(tf.exp(tf.Variable(tf.random_normal([]))))  #学生Bs的方差
sigma_d = tf.sqrt(tf.exp(tf.Variable(tf.random_normal([]))))  #教师Bd的方差
sigma_dept = tf.sqrt(tf.exp(tf.Variable(tf.random_normal([]))))  #部门Bdept方差

# 随机特征截距
eta_s = Normal(loc=tf.zeros(n_s), scale=sigma_s * tf.ones(n_s))
eta_d = Normal(loc=tf.zeros(n_d), scale=sigma_d * tf.ones(n_d))
eta_dept = Normal(loc=tf.zeros(n_dept), scale=sigma_dept * tf.ones(n_dept))

#随机特征项+固定特征项
yhat = tf.gather(eta_s, s_ph) + \
    tf.gather(eta_d, d_ph) + \
    tf.gather(eta_dept, dept_ph) + \
    mu + service * service_ph     #这里tf.gather实际作用是样本采样  https://blog.csdn.net/guotong1988/article/details/53172882
y = Normal(loc=yhat, scale=tf.ones(n_obs))

q_eta_s = Normal(loc=tf.get_variable("q_eta_s/loc", [n_s]),
                 scale=tf.nn.softplus(tf.get_variable("q_eta_s/scale", [n_s])))
q_eta_d = Normal(loc=tf.get_variable("q_eta_d/loc", [n_d]),
                 scale=tf.nn.softplus(tf.get_variable("q_eta_d/scale", [n_d])))
q_eta_dept = Normal(loc=tf.get_variable("q_eta_dept/loc", [n_dept]),
Exemplo n.º 26
0
plt.axis([-3, 3, -3, 3])
plt.title("Simulated dataset")
plt.show()

K = 2
D = 2
model = MixtureGaussian(K, D)

qpi_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K])))
qmu_mu = tf.Variable(tf.random_normal([K * D]))
qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([K * D])))
qsigma_alpha = tf.nn.softplus(tf.Variable(tf.random_normal([K * D])))
qsigma_beta = tf.nn.softplus(tf.Variable(tf.random_normal([K * D])))

qpi = Dirichlet(alpha=qpi_alpha)
qmu = Normal(mu=qmu_mu, sigma=qmu_sigma)
qsigma = InverseGamma(alpha=qsigma_alpha, beta=qsigma_beta)

data = {'x': x_train}
inference = ed.MFVI({'pi': qpi, 'mu': qmu, 'sigma': qsigma}, data, model)
inference.run(n_iter=2500, n_samples=10, n_minibatch=20)

# Average per-cluster and per-data point likelihood over many posterior samples.
log_liks = []
for s in range(100):
    zrep = {
        'pi': qpi.sample(()),
        'mu': qmu.sample(()),
        'sigma': qsigma.sample(())
    }
    log_liks += [model.predict(data, zrep)]
Exemplo n.º 27
0
def main(argv):
    del argv

    x_train, components = build_toy_dataset(N)
    n_examples, n_features = x_train.shape

    # save the target
    outdir = setup_outdir()
    np.savez(os.path.join(outdir, 'target_dist.npz'),
             pi=pi,
             mus=mus,
             stds=stds)

    weights, comps = [], []
    elbos = []
    relbo_vals = []
    times = []
    for iter in range(FLAGS.n_fw_iter):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(FLAGS.seed)
            sess = tf.InteractiveSession()
            with sess.as_default():
                # build model
                xcomps = [
                    Normal(loc=tf.convert_to_tensor(mus[i]),
                           scale=tf.convert_to_tensor(stds[i]))
                    for i in range(len(mus))
                ]
                x = Mixture(cat=Categorical(probs=tf.convert_to_tensor(pi)),
                            components=xcomps,
                            sample_shape=N)

                qx = construct_normal([n_features], iter, 'qx')
                if iter > 0:
                    qtx = Mixture(
                        cat=Categorical(probs=tf.convert_to_tensor(weights)),
                        components=[
                            Normal(
                                loc=c['loc'][0],
                                #scale_diag=tf.nn.softplus(c['scale_diag'])) for c in comps], sample_shape=N)
                                scale=c['scale_diag'][0]) for c in comps
                        ],
                        sample_shape=N)
                    fw_iterates = {x: qtx}
                else:
                    fw_iterates = {}

                sess.run(tf.global_variables_initializer())

                total_time = 0
                start_inference_time = time.time()
                inference = relbo.KLqp({x: qx},
                                       fw_iterates=fw_iterates,
                                       fw_iter=iter)
                inference.run(n_iter=FLAGS.LMO_iter)
                end_inference_time = time.time()

                total_time += end_inference_time - start_inference_time

                if iter > 0:
                    relbo_vals.append(-utils.compute_relbo(
                        qx, fw_iterates[x], x, np.log(iter + 1)))

                if iter == 0:
                    gamma = 1.
                elif iter > 0 and FLAGS.fw_variant == 'fixed':
                    gamma = 2. / (iter + 2.)
                elif iter > 0 and FLAGS.fw_variant == 'line_search':
                    start_line_search_time = time.time()
                    gamma = line_search_dkl(weights, [c['loc'] for c in comps],
                                            [c['scale_diag'] for c in comps],
                                            qx.loc.eval(),
                                            qx.stddev().eval(), x, iter)
                    end_line_search_time = time.time()
                    total_time += end_line_search_time - start_line_search_time
                elif iter > 0 and FLAGS.fw_variant == 'fc':
                    gamma = 2. / (iter + 2.)

                comps.append({
                    'loc': qx.mean().eval(),
                    'scale_diag': qx.stddev().eval()
                })
                weights = utils.update_weights(weights, gamma, iter)

                print("weights", weights)
                print("comps", [c['loc'] for c in comps])
                print("scale_diags", [c['scale_diag'] for c in comps])

                q_latest = Mixture(
                    cat=Categorical(probs=tf.convert_to_tensor(weights)),
                    components=[MultivariateNormalDiag(**c) for c in comps],
                    sample_shape=N)

                if FLAGS.fw_variant == "fc":
                    start_fc_time = time.time()
                    weights = fully_corrective(q_latest, x)
                    weights = list(weights)
                    for i in reversed(range(len(weights))):
                        w = weights[i]
                        if w == 0:
                            del weights[i]
                            del comps[i]
                    weights = np.array(weights)
                    end_fc_time = time.time()
                    total_time += end_fc_time - start_fc_time

                q_latest = Mixture(
                    cat=Categorical(probs=tf.convert_to_tensor(weights)),
                    components=[MultivariateNormalDiag(**c) for c in comps],
                    sample_shape=N)

                elbos.append(elbo(q_latest, x))

                outdir = setup_outdir()

                print("total time", total_time)
                times.append(float(total_time))
                utils.save_times(os.path.join(outdir, 'times.csv'), times)

                elbos_filename = os.path.join(outdir, 'elbos.csv')
                logger.info("iter, %d, elbo, %.2f +/- %.2f" %
                            (iter, *elbos[-1]))
                np.savetxt(elbos_filename, elbos, delimiter=',')
                logger.info("saving elbos to, %s" % elbos_filename)

                relbos_filename = os.path.join(outdir, 'relbos.csv')
                np.savetxt(relbos_filename, relbo_vals, delimiter=',')
                logger.info("saving relbo values to, %s" % relbos_filename)

                for_serialization = {
                    'locs': np.array([c['loc'] for c in comps]),
                    'scale_diags': np.array([c['scale_diag'] for c in comps])
                }
                qt_outfile = os.path.join(outdir, 'qt_iter%d.npz' % iter)
                np.savez(qt_outfile, weights=weights, **for_serialization)
                np.savez(os.path.join(outdir, 'qt_latest.npz'),
                         weights=weights,
                         **for_serialization)
                logger.info("saving qt to, %s" % qt_outfile)
        tf.reset_default_graph()
Exemplo n.º 28
0
  return x, y


ed.set_seed(42)

N = 40  # number of data points
D = 10  # number of features

# DATA
coeff = np.random.randn(D)
X_train, y_train = build_toy_dataset(N, coeff)
X_test, y_test = build_toy_dataset(N, coeff)

# MODEL
X = tf.placeholder(tf.float32, [N, D])
w = Normal(mu=tf.zeros(D), sigma=tf.ones(D))
b = Normal(mu=tf.zeros(1), sigma=tf.ones(1))
y = Normal(mu=ed.dot(X, w) + b, sigma=tf.ones(N))

# INFERENCE
qw = Normal(mu=tf.Variable(tf.random_normal([D])),
            sigma=tf.nn.softplus(tf.Variable(tf.random_normal([D]))))
qb = Normal(mu=tf.Variable(tf.random_normal([1])),
            sigma=tf.nn.softplus(tf.Variable(tf.random_normal([1]))))

data = {X: X_train, y: y_train}
inference = ed.KLqp({w: qw, b: qb}, data)
inference.run(n_samples=5, n_iter=250)

# CRITICISM
y_post = ed.copy(y, {w: qw.mean(), b: qb.mean()})
from scipy.stats import norm


def build_toy_dataset(N, noise_std=0.1):
    X = np.concatenate(
        [np.linspace(0, 2, num=N / 2),
         np.linspace(6, 8, num=N / 2)])
    y = 5.0 * X + norm.rvs(0, noise_std, size=N)
    X = X.reshape((N, 1))
    return X.astype(np.float32), y.astype(np.float32)


N = 40  # num data points
p = 1  # num features

ed.set_seed(42)

X_data, y_data = build_toy_dataset(N)

X = X_data
beta = Normal(mu=tf.zeros(p), sigma=tf.ones(p))
y = Normal(mu=ed.dot(X, beta), sigma=tf.ones(N))

qmu_mu = tf.Variable(tf.random_normal([p]))
qmu_sigma = tf.nn.softplus(tf.Variable(tf.random_normal([p])))
qbeta = Normal(mu=qmu_mu, sigma=qmu_sigma)

data = {y: y_data}
inference = ed.MFVI({beta: qbeta}, data)
inference.run(n_iter=500)
Exemplo n.º 30
0
def main(_):
    # setting up output directory
    outdir = os.path.expanduser(FLAGS.outdir)
    os.makedirs(outdir, exist_ok=True)

    N, M, D, R_true, I_train, I_test = get_data()
    debug('N, M, D', N, M, D)

    # Solution components
    weights, qUVt_components = [], []

    # Files to log metrics
    times_filename = os.path.join(outdir, 'times.csv')
    mse_train_filename = os.path.join(outdir, 'mse_train.csv')
    mse_test_filename = os.path.join(outdir, 'mse_test.csv')
    ll_test_filename = os.path.join(outdir, 'll_test.csv')
    ll_train_filename = os.path.join(outdir, 'll_train.csv')
    elbos_filename = os.path.join(outdir, 'elbos.csv')
    gap_filename = os.path.join(outdir, 'gap.csv')
    step_filename = os.path.join(outdir, 'steps.csv')
    # 'adafw', 'ada_afw', 'ada_pfw'
    if FLAGS.fw_variant.startswith('ada'):
        lipschitz_filename = os.path.join(outdir, 'lipschitz.csv')
        iter_info_filename = os.path.join(outdir, 'iter_info.txt')

    start = 0
    if FLAGS.restore:
        #start = 50
        #qUVt_components = get_random_components(D, N, M, start)
        #weights = np.random.dirichlet([1.] * start).astype(np.float32)
        #lipschitz_estimate = opt.adafw_linit()
        parameters = np.load(os.path.join(outdir, 'qt_latest.npz'))
        weights = list(parameters['weights'])
        start = parameters['fw_iter']
        qUVt_components = list(parameters['comps'])
        assert len(weights) == len(qUVt_components), "Inconsistent storage"
        # get lipschitz estimate from the file, could've stored it
        # in params but that would mean different saved file for
        # adaptive variants
        if FLAGS.fw_variant.startswith('ada'):
            lipschitz_filename = os.path.join(outdir, 'lipschitz.csv')
            if not os.path.isfile(lipschitz_filename):
                raise ValueError("Inconsistent storage")
            with open(lipschitz_filename, 'r') as f:
                l = f.readlines()
                lipschitz_estimate = float(l[-1].strip())
    else:
        # empty the files present in the folder already
        open(times_filename, 'w').close()
        open(mse_train_filename, 'w').close()
        open(mse_test_filename, 'w').close()
        open(ll_test_filename, 'w').close()
        open(ll_train_filename, 'w').close()
        open(elbos_filename, 'w').close()
        open(gap_filename, 'w').close()
        open(step_filename, 'w').close()
        # 'adafw', 'ada_afw', 'ada_pfw'
        if FLAGS.fw_variant.startswith('ada'):
            open(lipschitz_filename, 'w').close()
            open(iter_info_filename, 'w').close()

    for t in range(start, start + FLAGS.n_fw_iter):
        g = tf.Graph()
        with g.as_default():
            tf.set_random_seed(FLAGS.seed)
            sess = tf.InteractiveSession()
            with sess.as_default():
                # MODEL
                I = tf.placeholder(tf.float32, [N, M])

                scale_uv = tf.concat(
                    [tf.ones([D, N]), tf.ones([D, M])], axis=1)
                mean_uv = tf.concat(
                    [tf.zeros([D, N]), tf.zeros([D, M])], axis=1)

                UV = Normal(loc=mean_uv, scale=scale_uv)
                R = Normal(loc=tf.matmul(tf.transpose(UV[:, :N]), UV[:, N:]),
                           scale=tf.ones([N, M]))  # generator dist. for matrix
                R_mask = R * I  # generated masked matrix

                p_joint = Joint(R_true, I_train, sess, D, N, M)

                if t == 0:
                    fw_iterates = {}
                else:
                    # Current solution
                    prev_components = [
                        coreutils.base_loc_scale('mvn0',
                                                 c['loc'],
                                                 c['scale'],
                                                 multivariate=False)
                        for c in qUVt_components
                    ]
                    qUV_prev = coreutils.get_mixture(weights, prev_components)
                    fw_iterates = {UV: qUV_prev}

                # LMO (via relbo INFERENCE)
                mean_suv = tf.concat([
                    tf.get_variable("qU/loc", [D, N]),
                    tf.get_variable("qV/loc", [D, M])
                ],
                                     axis=1)
                scale_suv = tf.concat([
                    tf.nn.softplus(tf.get_variable("qU/scale", [D, N])),
                    tf.nn.softplus(tf.get_variable("qV/scale", [D, M]))
                ],
                                      axis=1)

                sUV = Normal(loc=mean_suv, scale=scale_suv)

                #inference = relbo.KLqp({UV: sUV}, data={R: R_true, I: I_train},
                inference = relbo.KLqp({UV: sUV},
                                       data={
                                           R_mask: R_true,
                                           I: I_train
                                       },
                                       fw_iterates=fw_iterates,
                                       fw_iter=t)
                inference.run(n_iter=FLAGS.LMO_iter)

                loc_s = sUV.mean().eval()
                scale_s = sUV.stddev().eval()
                # sUV is batched distrbution, there are issues making
                # Mixture with batch distributions. mvn0
                # with event size (D, N + M) and batch size ()
                # NOTE log_prob(sample) still returns tensor
                # mvn and multivariatenormaldiag work for 1-D not 2-D shapes
                sUV_mv = coreutils.base_loc_scale('mvn0',
                                                  loc_s,
                                                  scale_s,
                                                  multivariate=False)
                # TODO send sUV or sUV_mv as argument to step size? sample
                # works the same way. same with log_prob

                total_time = 0.
                data = {R: R_true, I: I_train}
                if t == 0:
                    gamma = 1.
                    lipschitz_estimate = opt.adafw_linit()
                    step_type = 'init'
                elif FLAGS.fw_variant == 'fixed':
                    start_step_time = time.time()
                    step_result = opt.fixed(weights, qUVt_components, qUV_prev,
                                            loc_s, scale_s, sUV, p_joint, data,
                                            t)
                    end_step_time = time.time()
                    total_time += float(end_step_time - start_step_time)
                elif FLAGS.fw_variant == 'line_search':
                    start_step_time = time.time()
                    step_result = opt.line_search_dkl(weights, qUVt_components,
                                                      qUV_prev, loc_s, scale_s,
                                                      sUV, p_joint, data, t)
                    end_step_time = time.time()
                    total_time += float(end_step_time - start_step_time)
                elif FLAGS.fw_variant == 'adafw':
                    start_step_time = time.time()
                    step_result = opt.adaptive_fw(weights, qUVt_components,
                                                  qUV_prev, loc_s, scale_s,
                                                  sUV, p_joint, data, t,
                                                  lipschitz_estimate)
                    end_step_time = time.time()
                    total_time += float(end_step_time - start_step_time)

                    step_type = step_result['step_type']
                    if step_type == 'adaptive':
                        lipschitz_estimate = step_result['l_estimate']
                elif FLAGS.fw_variant == 'ada_pfw':
                    start_step_time = time.time()
                    step_result = opt.adaptive_pfw(weights, qUVt_components,
                                                   qUV_prev, loc_s, scale_s,
                                                   sUV, p_joint, data, t,
                                                   lipschitz_estimate)
                    end_step_time = time.time()
                    total_time += float(end_step_time - start_step_time)

                    step_type = step_result['step_type']
                    if step_type in ['adaptive', 'drop']:
                        lipschitz_estimate = step_result['l_estimate']
                elif FLAGS.fw_variant == 'ada_afw':
                    start_step_time = time.time()
                    step_result = opt.adaptive_pfw(weights, qUVt_components,
                                                   qUV_prev, loc_s, scale_s,
                                                   sUV, p_joint, data, t,
                                                   lipschitz_estimate)
                    end_step_time = time.time()
                    total_time += float(end_step_time - start_step_time)

                    step_type = step_result['step_type']
                    if step_type in ['adaptive', 'away', 'drop']:
                        lipschitz_estimate = step_result['l_estimate']

                if t == 0:
                    gamma = 1.
                    weights.append(gamma)
                    qUVt_components.append({'loc': loc_s, 'scale': scale_s})
                    new_components = [sUV_mv]
                else:
                    qUVt_components = step_result['params']
                    weights = step_result['weights']
                    gamma = step_result['gamma']
                    new_components = [
                        coreutils.base_loc_scale('mvn0',
                                                 c['loc'],
                                                 c['scale'],
                                                 multivariate=False)
                        for c in qUVt_components
                    ]

                qUV_new = coreutils.get_mixture(weights, new_components)

                #qR = Normal(
                #    loc=tf.matmul(
                #        tf.transpose(qUV_new[:, :N]), qUV_new[:, N:]),
                #    scale=tf.ones([N, M]))
                qR = ed.copy(R, {UV: qUV_new})
                cR = ed.copy(R_mask, {UV: qUV_new})  # reconstructed matrix

                # Log metrics for current iteration
                logger.info('total time %f' % total_time)
                append_to_file(times_filename, total_time)

                logger.info('iter %d, gamma %.4f' % (t, gamma))
                append_to_file(step_filename, gamma)

                if t > 0:
                    gap_t = step_result['gap']
                    logger.info('iter %d, gap %.4f' % (t, gap_t))
                    append_to_file(gap_filename, gap_t)

                # CRITICISM
                if FLAGS.fw_variant.startswith('ada'):
                    append_to_file(lipschitz_filename, lipschitz_estimate)
                    append_to_file(iter_info_filename, step_type)
                    logger.info('lt = %.5f, iter_type = %s' %
                                (lipschitz_estimate, step_type))

                test_mse = ed.evaluate('mean_squared_error',
                                       data={
                                           cR: R_true,
                                           I: I_test
                                       })
                logger.info("iter %d ed test mse %.5f" % (t, test_mse))
                append_to_file(mse_test_filename, test_mse)

                train_mse = ed.evaluate('mean_squared_error',
                                        data={
                                            cR: R_true,
                                            I: I_train
                                        })
                logger.info("iter %d ed train mse %.5f" % (t, train_mse))
                append_to_file(mse_train_filename, train_mse)

                # very slow
                #train_ll = log_likelihood(qUV_new, R_true, I_train, sess, D, N,
                #                          M)
                train_ll = ed.evaluate('log_lik',
                                       data={
                                           qR: R_true.astype(np.float32),
                                           I: I_train
                                       })
                logger.info("iter %d train log lik %.5f" % (t, train_ll))
                append_to_file(ll_train_filename, train_ll)

                #test_ll = log_likelihood(qUV_new, R_true, I_test, sess, D, N, M)
                test_ll = ed.evaluate('log_lik',
                                      data={
                                          qR: R_true.astype(np.float32),
                                          I: I_test
                                      })
                logger.info("iter %d test log lik %.5f" % (t, test_ll))
                append_to_file(ll_test_filename, test_ll)

                # elbo_loss might be meaningless
                elbo_loss = elboModel.KLqp({UV: qUV_new},
                                           data={
                                               R: R_true,
                                               I: I_train
                                           })
                elbo_t = elbo(qUV_new, p_joint)
                res_update = elbo_loss.run()
                logger.info('iter %d -elbo loss %.2f or %.2f' %
                            (t, res_update['loss'], elbo_t))
                append_to_file(elbos_filename,
                               "%f,%f" % (elbo_t, res_update['loss']))

                # serialize the current iterate
                np.savez(os.path.join(outdir, 'qt_latest.npz'),
                         weights=weights,
                         comps=qUVt_components,
                         fw_iter=t + 1)

                sess.close()
        tf.reset_default_graph()