def vae(observed, n, dim_x, dim_z, n_particles): '''decoder: z-->x''' with zs.BayesianNet(observed=observed) as model: pai = tf.get_variable('pai', shape=[dim_z], dtype=tf.float32, trainable=True, initializer=tf.constant_initializer(1.0), #tf.random_uniform_initializer(), #tf.ones([dim_z]), ) n_pai = tf.tile(tf.expand_dims(pai, 0), [n, 1]) z = zs.OnehotCategorical('z', logits=n_pai, dtype=tf.float32, n_samples=n_particles #group_event_ndims=1 ) # zhusuan.model.stochastic.OnehotCategorical print('-'*10, 'z:', z.tensor.get_shape().as_list()) # [n_particles, None, dim_z] mu = tf.get_variable('mu', shape=[dim_z, dim_x], dtype=tf.float32, initializer=tf.random_uniform_initializer(0, 1)) log_sigma = tf.get_variable('log_sigma', shape=[dim_z, dim_x], dtype=tf.float32, initializer=tf.random_uniform_initializer(-3, -2) ) # tf.random_normal_initializer(-3, 0.5)) #tf.contrib.layers.xavier_initializer()) x_mean = tf.reshape(tf.matmul(tf.reshape(z, [-1, dim_z]), mu), [n_particles, n, dim_x]) # [n_particles, None, dim_x] x_logstd = tf.reshape(tf.matmul(tf.reshape(z, [-1, dim_z]), log_sigma), [n_particles, n, dim_x]) # print('x_mean:', x_mean.get_shape().as_list()) # print('x_logstd:', x_logstd.get_shape().as_list()) x = zs.Normal('x', mean=x_mean, logstd=x_logstd, group_event_ndims=1) # print('x:', x.tensor.get_shape().as_list()) return model, x.tensor, z.tensor
def bayesianNN(observed, x, n_x, layer_sizes, n_particles): with zs.BayesianNet(observed=observed) as model: ws = [] for i, (n_in, n_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): w_mu = tf.zeros([1, n_out, n_in + 1]) ws.append( zs.Normal('w' + str(i), w_mu, std=1., n_samples=n_particles, group_ndims=2)) # forward ly_x = tf.expand_dims( tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1]), 3) for i in range(len(ws)): w = tf.tile(ws[i], [1, tf.shape(x)[0], 1, 1]) ly_x = tf.concat( [ly_x, tf.ones([n_particles, tf.shape(x)[0], 1, 1])], 2) ly_x = tf.matmul(w, ly_x) / \ tf.sqrt(tf.to_float(tf.shape(ly_x)[2])) if i < len(ws) - 1: ly_x = tf.nn.relu(ly_x) y_mean = tf.squeeze(ly_x, [3]) # y_logstd = tf.get_variable( # 'y_logstd', shape=[], # initializer=tf.constant_initializer(0.)) # noise = tf.random_normal(shape=tf.shape(y_mean), mean=0.0, stddev=0.1, dtype=tf.float32) y = zs.OnehotCategorical('y', y_mean, dtype=tf.float32) return model, y_mean
def q_net(x, n_x, n_z, n_hidden=8): with zs.BayesianNet() as variational: lz_x = layers.fully_connected(tf.to_float(x), n_hidden) lz_x = layers.fully_connected(lz_x, n_hidden) log_z_pi = layers.fully_connected(lz_x, n_z, activation_fn=None) z = zs.OnehotCategorical('z', log_z_pi) return variational
def unlabeled_proposal(x, n_y, n_z, n_particles): with zs.BayesianNet() as proposal: y_logits = qy_x(x, n_y) y = zs.OnehotCategorical('y', y_logits, n_samples=n_particles) x_tiled = tf.tile(tf.expand_dims(x, 0), [n_particles, 1, 1]) z_mean, z_logstd = qz_xy(x_tiled, y, n_z) z = zs.Normal('z', z_mean, logstd=z_logstd, group_ndims=1, is_reparameterized=False) return proposal
def M2(observed, n, n_x, n_y, n_z, n_particles): with zs.BayesianNet(observed=observed) as model: z_mean = tf.zeros([n, n_z]) z = zs.Normal('z', z_mean, std=1., n_samples=n_particles, group_ndims=1) y_logits = tf.zeros([n, n_y]) y = zs.OnehotCategorical('y', y_logits, n_samples=n_particles) lx_zy = layers.fully_connected(tf.concat([z, tf.to_float(y)], 2), 500) lx_zy = layers.fully_connected(lx_zy, 500) x_logits = layers.fully_connected(lx_zy, n_x, activation_fn=None) x = zs.Bernoulli('x', x_logits, group_ndims=1) return model
def gmm(observed, n, n_x, n_z): with zs.BayesianNet(observed=observed) as model: log_pi = tf.get_variable('log_pi', n_z, initializer=tf.truncated_normal_initializer(mean=1., stddev=0.5), regularizer=var_regularizer(1.0)) mu = tf.get_variable('mu', [n_x, n_z], initializer=tf.orthogonal_initializer(gain=4.0)) # try uniform init log_sigma = tf.get_variable('log_sigma', [n_x, n_z], initializer=tf.truncated_normal_initializer(stddev=0.5), regularizer=l1_regularizer(0.01)) # try not l1_reg z = zs.OnehotCategorical('z', log_pi, n_samples=n) x_mean = tf.matmul(tf.to_float(z.tensor), tf.transpose(mu)) x_logstd = tf.matmul(tf.to_float(z.tensor), tf.transpose(log_sigma)) x = zs.Normal('x', x_mean, x_logstd, group_event_ndims=1) return model, x.tensor, z.tensor
def q_net(x, dim_z, n_particles): '''encoder: x-->z''' with zs.BayesianNet() as variational: lz_x = layers.fully_connected(tf.to_float(x), 256, weights_initializer=tf.contrib.layers.xavier_initializer()) # lz_x = layers.fully_connected(lz_x, 256, # weights_initializer=tf.contrib.layers.xavier_initializer()) z_logits = layers.fully_connected(lz_x, dim_z, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer()) z = zs.OnehotCategorical('z', logits=z_logits, dtype=tf.float32, n_samples=n_particles, #group_event_ndims=1 ) return variational, z_logits
def var_dropout(observed, x, n, net_size, n_particles, is_training): with zs.BayesianNet(observed=observed) as model: h = x normalizer_params = {'is_training': is_training, 'updates_collections': None} for i, [n_in, n_out] in enumerate(zip(net_size[:-1], net_size[1:])): eps_mean = tf.ones([n, n_in]) eps = zs.Normal( 'layer' + str(i) + '/eps', eps_mean, std=1., n_samples=n_particles, group_event_ndims=1) h = layers.fully_connected( h * eps, n_out, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params) if i < len(net_size) - 2: h = tf.nn.relu(h) y = zs.OnehotCategorical('y', h) return model, h
def q_net(observed, x, n_z, n_k, tau, n_particles, relaxed=False): with zs.BayesianNet(observed=observed) as variational: lz_x = tf.layers.dense(tf.to_float(x), 200, activation=tf.tanh) lz_x = tf.layers.dense(lz_x, 200, activation=tf.tanh) z_logits = tf.layers.dense(lz_x, n_z * n_k) z_stacked_logits = tf.reshape(z_logits, [-1, n_z, n_k]) if relaxed: z = zs.ExpConcrete('z', tau, z_stacked_logits, n_samples=n_particles, group_ndims=1) else: z = zs.OnehotCategorical('z', z_stacked_logits, n_samples=n_particles, group_ndims=1, dtype=tf.float32) return variational
def p_Y_Xw(observed, X, drop_rate, n_basis, net_sizes, n_samples, task): with zs.BayesianNet(observed=observed) as model: f = tf.expand_dims(tf.tile(tf.expand_dims(X, 0), [n_samples, 1, 1]), 2) for i in range(len(net_sizes) - 1): f = tf.layers.dense(f, net_sizes[i + 1]) w_shape = [1, 1, net_sizes[i + 1]] w_p = tf.ones([1, 1, net_sizes[i + 1]]) * drop_rate w_u = tf.random_uniform(tf.concat([[n_samples], w_shape], 0), 0, 1) f = f * tf.cast(tf.less(w_u, 1 - drop_rate), tf.float32) if (i < len(net_sizes) - 2): f = tf.nn.relu(f) f = tf.squeeze(f, [2]) if (task == "regression"): y_logstd = tf.get_variable('y_logstd', shape=[], initializer=tf.constant_initializer(0.)) y = zs.Normal('y', f, logstd=y_logstd, group_ndims=1) elif (task == "classification"): y = zs.OnehotCategorical('y', f) return model, f, None
def vae(observed, x_dim, z_dim, n, n_particles=1): with zs.BayesianNet(observed=observed) as model: y = zs.OnehotCategorical('y', logits=tf.ones([n, 10]), group_ndims=1, n_samples=n_particles) y = tf.to_float(y) y = tf.reshape(y, (1, n, 10)) z_mean = tf.zeros([n, z_dim]) z = zs.Normal('z', z_mean, std=1., group_ndims=1, n_samples=n_particles) z = tf.concat([z, y], 2) lx_z = tf.layers.dense(z, 500, activation=tf.nn.relu) lx_z = tf.layers.dense(lx_z, 500, activation=tf.nn.relu) x_logits = tf.layers.dense(lx_z, x_dim) x_mean = zs.Implicit("x_mean", tf.sigmoid(x_logits), group_ndims=1) x = zs.Bernoulli('x', x_logits, group_ndims=1) return model
def q_net(observed, x, n_z, n_k, tau, n_particles, relaxed=False): with zs.BayesianNet(observed=observed) as variational: lz_x = layers.fully_connected(tf.to_float(x), 200, activation_fn=tf.tanh) lz_x = layers.fully_connected(lz_x, 200, activation_fn=tf.tanh) z_logits = layers.fully_connected(lz_x, n_z * n_k, activation_fn=None) z_stacked_logits = tf.reshape(z_logits, [n, n_z, n_k]) if relaxed: z = zs.ExpConcrete('z', tau, z_stacked_logits, n_samples=n_particles, group_event_ndims=1) else: z = zs.OnehotCategorical('z', z_stacked_logits, dtype=tf.float32, n_samples=n_particles, group_event_ndims=1) return variational
def vae(observed, batch_size, n_x, n_h, n_z, n_particles): with zs.BayesianNet(observed=observed) as model: log_pi = tf.get_variable('log_pi', n_z, initializer=tf.zeros_initializer()) mu = tf.get_variable('mu', [n_h, n_z], initializer=tf.random_uniform_initializer(-1, 1)) log_sigma = tf.get_variable('log_sigma', [n_h, n_z], initializer=tf.random_normal_initializer( 0, 0.1)) n_log_pi = tf.tile(tf.expand_dims(log_pi, 0), [batch_size, 1]) # (batch_size, n_z) z = zs.OnehotCategorical( 'z', n_log_pi, n_samples=n_particles, group_event_ndims=0) # (n_particles, batch_size, n_z) z_tensor = tf.reshape(z.tensor, [-1, n_z]) h_mean = tf.matmul(tf.to_float(z_tensor), tf.transpose( mu)) # (n_particles x batch_size, n_z) OneHot val_shape [n_z] h_logstd = tf.matmul(tf.to_float(z_tensor), tf.transpose(log_sigma)) h_mean = tf.reshape( h_mean, [-1, batch_size, n_h]) # (n_particles, batch_size, n_h) h_logstd = tf.reshape(h_logstd, [-1, batch_size, n_h]) # returned tensor of log_prob() has shape (... + )batch_shape[:-group_event_ndims] = (n_particles, batch_size). h = zs.Normal( 'h', h_mean, h_logstd, group_event_ndims=1 ) # Multivariate Normal. val_shape []. see zhusuan Basic Concepts. lx_h = layers.fully_connected(h, 500) lx_h = layers.fully_connected(lx_h, 500) x_logits = layers.fully_connected( lx_h, n_x, activation_fn=None) # (n_particles, batch_size, n_x) x = zs.Bernoulli( 'x', x_logits, group_event_ndims=1 ) # (n_particles, batch_size, n_x) n_x=784 pixel as one event return model, x_logits, z.tensor
def vae(observed, n, n_x, n_z, n_k, tau, n_particles, relaxed=False): with zs.BayesianNet(observed=observed) as model: z_stacked_logits = tf.zeros([n, n_z, n_k]) if relaxed: z = zs.ExpConcrete('z', tau, z_stacked_logits, n_samples=n_particles, group_event_ndims=1) z = tf.exp(tf.reshape(z, [n_particles, n, n_z * n_k])) else: z = zs.OnehotCategorical('z', z_stacked_logits, dtype=tf.float32, n_samples=n_particles, group_event_ndims=1) z = tf.reshape(z, [n_particles, n, n_z * n_k]) lx_z = layers.fully_connected(z, 200, activation_fn=tf.tanh) lx_z = layers.fully_connected(lx_z, 200, activation_fn=tf.tanh) x_logits = layers.fully_connected(lx_z, n_x, activation_fn=None) x = zs.Bernoulli('x', x_logits, group_event_ndims=1) return model
def p_Y_Xw(observed, X, drop_rate, n_basis, net_sizes, n_samples, task): with zs.BayesianNet(observed=observed) as model: f = tf.expand_dims(tf.tile(tf.expand_dims(X, 0), [n_samples, 1, 1]), 2) for i in range(len(net_sizes) - 1): w_mu = tf.zeros([1, net_sizes[i] + 1, net_sizes[i + 1]]) w = zs.Normal('w' + str(i), w_mu, std=1., n_samples=n_samples, group_ndims=2) w = tf.tile(w, [1, tf.shape(X)[0], 1, 1]) f = tf.concat([f, tf.ones([n_samples, tf.shape(X)[0], 1, 1])], 3) f = tf.matmul(f, w) / tf.sqrt(net_sizes[i] + 1.) if (i < len(net_sizes) - 2): f = tf.nn.relu(f) f = tf.squeeze(f, [2]) if (task == "regression"): y_logstd = tf.get_variable('y_logstd', shape=[], initializer=tf.constant_initializer(0.)) y = zs.Normal('y', f, logstd=y_logstd, group_ndims=1) elif (task == "classification"): y = zs.OnehotCategorical('y', f) return model, f, None
def px_z_y(self, observed, captions=None, lengths=None, gen_mode=False, n_x=None): """ Args: observed: for q, parametrized by encoder, used during training Returns: model: zhusuan model object, can be used for getting probabilities """ if captions is not None and lengths is not None: self.captions = captions self.lengths = lengths if n_x is None: n_x = tf.shape(self.images_fv)[0] with zs.BayesianNet(observed) as model: z_mean = tf.zeros([n_x, self.params.latent_size]) z = zs.Normal('z', mean=z_mean, std=self.params.std, group_ndims=1, n_samples=self.params.gen_z_samples) tf.summary.histogram("distributions/z", z) y_logits = tf.zeros([n_x, self.n_classes]) y = zs.OnehotCategorical('y', y_logits, n_samples=self.params.gen_z_samples) with tf.variable_scope("net"): embedding = tf.get_variable( "dec_embeddings", [self.data_dict.vocab_size, self.params.embed_size], dtype=tf.float32) # word dropout before = tf.reshape(self.captions, [-1]) word_drop_keep = self.params.word_dropout_keep if gen_mode: word_drop_keep = 1.0 captions = tf.nn.dropout(tf.to_float(self.captions), word_drop_keep) after = tf.reshape(tf.to_int32(captions), [-1]) mask_after = tf.to_int32(tf.not_equal(before, after)) to_unk = mask_after * self.data_dict.word2idx['<UNK>'] captions = tf.reshape(tf.add(after, to_unk), [tf.shape(self.images_fv)[0], -1]) vect_inputs = tf.nn.embedding_lookup(embedding, captions) dec_lstm_drop = self.params.dec_lstm_drop if gen_mode: dec_lstm_drop = 1.0 cell_0 = make_rnn_cell([self.params.decoder_hidden], base_cell=tf.contrib.rnn.LSTMCell, dropout_keep_prob=dec_lstm_drop) # zero_state0 = cell_0.zero_state( # batch_size=tf.shape(self.images_fv)[0], # dtype=tf.float32) # run this cell to get initial state added_shape = self.params.gen_z_samples * self.params.n_classes +\ self.params.embed_size # added_shape = self.params.embed_size # f_mapping = tf.layers.dense(self.images_fv, added_shape, # name='f_emb2') c = h = tf.layers.dense(self.images_fv, self.params.decoder_hidden, name='dec_init_map') initial_state0 = (tf.nn.rnn_cell.LSTMStateTuple(c, h), ) # vector z, mapped into embed_dim z = tf.concat([z, tf.to_float(y)], 2) z = tf.reshape(z, [n_x, (self.params.latent_size + self.n_classes)\ * self.params.gen_z_samples]) z_dec = layers.dense(z, added_shape, name='z_rnn') _, z_state = cell_0(z_dec, initial_state0) initial_state = rnn_placeholders(z_state) # concat with inputs y_re = tf.to_float( tf.reshape(y, [ tf.shape(self.images_fv)[0], self.params.gen_z_samples * self.params.n_classes ])) y = tf.tile(tf.expand_dims(y_re, 1), [1, tf.shape(vect_inputs)[1], 1]) vect_inputs = tf.concat([vect_inputs, y], 2) # vect_inputs = tf.Print(vect_inputs, [tf.shape(vect_inputs)], # first_n=1) # captions LSTM outputs, final_state = tf.nn.dynamic_rnn( cell_0, inputs=vect_inputs, sequence_length=self.lengths, initial_state=initial_state, swap_memory=True, dtype=tf.float32) # output shape [batch_size, seq_length, self.params.decoder_hidden] if gen_mode: # only interested in the last output outputs = outputs[:, -1, :] outputs_r = tf.reshape(outputs, [-1, cell_0.output_size]) x_logits = tf.layers.dense(outputs_r, units=self.data_dict.vocab_size, name='rnn_logits') x_logits_r = tf.reshape( x_logits, [tf.shape(outputs)[0], tf.shape(outputs)[1], -1]) x = zs.Categorical('x', x_logits_r, group_ndims=1) # for generating sample = None if gen_mode: if self.params.sample_gen == 'sample': sample = tf.multinomial(x_logits / self.params.temperature, 1)[0][0] elif self.params.sample_gen == 'beam_search': sample = tf.nn.softmax(x_logits) else: sample = tf.nn.softmax(x_logits) return model, x_logits, (initial_state, final_state, sample)
def vae(observed, n, n_particles, is_training, dim_h=40, dim_z=10, dim_x=784): '''decoder: z-->h-->x n: batch_size dim_z: K = 10 dim_x: 784 dim_h: D = 40 ''' with zs.BayesianNet(observed=observed) as model: normalizer_params = { 'is_training': is_training, 'updates_collections': None } pai = tf.get_variable('pai', shape=[dim_z], dtype=tf.float32, trainable=True, initializer=tf.constant_initializer(1.0)) n_pai = tf.tile(tf.expand_dims(pai, 0), [n, 1]) z = zs.OnehotCategorical('z', logits=n_pai, dtype=tf.float32, n_samples=n_particles) mu = tf.get_variable('mu', shape=[dim_z, dim_h], dtype=tf.float32, initializer=tf.random_uniform_initializer( -1, 1)) log_sigma = tf.get_variable( 'log_sigma', shape=[dim_z, dim_h], dtype=tf.float32, initializer=tf.random_uniform_initializer(-3, -2)) h_mean = tf.reshape( tf.matmul(tf.reshape(z, [-1, dim_z]), mu), [n_particles, -1, dim_h]) # [n_particles, None, dim_x] h_logstd = tf.reshape( tf.matmul(tf.reshape(z, [-1, dim_z]), log_sigma), [n_particles, -1, dim_h]) h = zs.Normal( 'h', mean=h_mean, logstd=h_logstd, #n_samples=n_particles, group_event_ndims=1) lx_h = layers.fully_connected( h, 512, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params ) lx_h = layers.fully_connected( lx_h, 512, # normalizer_fn=layers.batch_norm, # normalizer_params=normalizer_params ) x_logits = layers.fully_connected( lx_h, dim_x, activation_fn=None) # the log odds of being 1 x = zs.Bernoulli( 'x', x_logits, #n_samples=n_particles, group_event_ndims=1) return model, x_logits, h, z.tensor