def create_graph(): graph = tf.Graph() with graph.as_default(): images = tf.placeholder(tf.float32, shape=[None, 48, 48, 1]) bn_flag = tf.placeholder_with_default(tf.zeros(shape=[]), shape=[]) x_tilde, z_e_x, z_q_x, z_i_x, z_nst_q_x, z_emb = create_vqvae( images, bn_flag) rec_loss = tf.reduce_mean(BernoulliCrossEntropyCost(x_tilde, images)) vq_loss = tf.reduce_mean( tf.square(tf.stop_gradient(z_e_x) - z_nst_q_x)) commit_loss = tf.reduce_mean( tf.square(z_e_x - tf.stop_gradient(z_nst_q_x))) beta = 0.25 loss = rec_loss + vq_loss + beta * commit_loss params = get_params_dict() grads = tf.gradients(loss, params.values()) learning_rate = 0.0002 optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True) assert len(grads) == len(params) j = [(g, p) for g, p in zip(grads, params.values())] train_step = optimizer.apply_gradients(j) things_names = [ "images", "bn_flag", "x_tilde", "z_e_x", "z_q_x", "z_i_x", "z_emb", "loss", "rec_loss", "train_step" ] things_tf = [eval(name) for name in things_names] for tn, tt in zip(things_names, things_tf): graph.add_to_collection(tn, tt) train_model = namedtuple('Model', things_names)(*things_tf) return graph, train_model
def create_graph(): graph = tf.Graph() with graph.as_default(): images = tf.placeholder(tf.float32, shape=[None, 12, 6, 1]) labels = tf.placeholder(tf.float32, shape=[None, 1]) x_tilde = create_pixel_cnn(images, labels) loss = tf.reduce_mean( CategoricalCrossEntropyLinearIndexCost(x_tilde, images)) #loss = tf.reduce_mean(BernoulliCrossEntropyCost(x_tilde, images)) #loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=x_tilde, labels=images)) #loss = tf.reduce_mean((x_tilde - images) ** 2) params = get_params_dict() grads = tf.gradients(loss, params.values()) learning_rate = 0.0002 optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True) assert len(grads) == len(params) j = [(g, p) for g, p in zip(grads, params.values())] train_step = optimizer.apply_gradients(j) things_names = ["images", "labels", "x_tilde", "loss", "train_step"] things_tf = [eval(name) for name in things_names] for tn, tt in zip(things_names, things_tf): graph.add_to_collection(tn, tt) train_model = namedtuple('Model', things_names)(*things_tf) return graph, train_model
def create_graph(): graph = tf.Graph() with graph.as_default(): inputs = tf.placeholder(tf.float32, shape=[None, batch_size, 1]) inputs_tm1 = inputs[:-1] inputs_t = inputs[1:] init_hidden = tf.placeholder(tf.float32, shape=[batch_size, n_hid]) init_cell = tf.placeholder(tf.float32, shape=[batch_size, n_hid]) pred, hiddens, cells = create_model(inputs_tm1, inputs_t, init_hidden, init_cell) rec_loss = tf.reduce_mean(tf.square(pred - inputs_t)) loss = rec_loss params = get_params_dict() grads = tf.gradients(loss, params.values()) learning_rate = 0.0002 optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True) assert len(grads) == len(params) j = [(g, p) for g, p in zip(grads, params.values())] train_step = optimizer.apply_gradients(j) things_names = [ "inputs", "inputs_tm1", "inputs_t", "init_hidden", "init_cell", "hiddens", "cells", "pred", "loss", "rec_loss", "train_step" ] things_tf = [ inputs, inputs_tm1, inputs_t, init_hidden, init_cell, hiddens, cells, pred, loss, rec_loss, train_step ] assert len(things_names) == len(things_tf) for tn, tt in zip(things_names, things_tf): graph.add_to_collection(tn, tt) train_model = namedtuple('Model', things_names)(*things_tf) return graph, train_model
def create_graph(): graph = tf.Graph() with graph.as_default(): inputs = tf.placeholder(tf.float32, shape=[None, batch_size, n_inputs]) targets = tf.placeholder(tf.float32, shape=[None, batch_size, n_inputs]) init_hidden = tf.placeholder(tf.float32, shape=[batch_size, n_hid]) init_cell = tf.placeholder(tf.float32, shape=[batch_size, n_hid]) init_q_hidden = tf.placeholder(tf.float32, shape=[batch_size, n_hid]) init_q_cell = tf.placeholder(tf.float32, shape=[batch_size, n_hid]) r = create_model(inputs, init_hidden, init_cell, init_q_hidden, init_q_cell) # reconstruction loss rec_loss = tf.reduce_mean( BernoulliCrossEntropyCost(tf.reshape(r.pred_sig, (-1, 1)), tf.reshape(targets, (-1, 1)))) # h2h embedding losses alpha = 1. beta = 0.25 vq_h_loss = tf.reduce_mean( tf.square(tf.stop_gradient(r.q_nvq_hiddens) - r.q_nst_hiddens)) commit_h_loss = tf.reduce_mean( tf.square(r.q_nvq_hiddens - tf.stop_gradient(r.q_nst_hiddens))) # output embedding losses vq_o_loss = tf.reduce_mean( tf.square(tf.stop_gradient(r.q_nvq_out) - r.q_nst_out)) commit_o_loss = tf.reduce_mean( tf.square(r.q_nvq_out - tf.stop_gradient(r.q_nst_out))) loss_r = rec_loss loss_h = alpha * vq_h_loss + beta * commit_h_loss loss_o = alpha * vq_o_loss + beta * commit_o_loss loss = loss_r + loss_h + loss_o params = get_params_dict() grads = tf.gradients(loss, params.values()) learning_rate = 0.0001 optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True) assert len(grads) == len(params) grads = [ tf.clip_by_value(g, -10., 10.) if g is not None else None for g in grads ] j = [(g, p) for g, p in zip(grads, params.values())] train_step = optimizer.apply_gradients(j) things_names = [ "inputs", "targets", "init_hidden", "init_cell", "init_q_hidden", "init_q_cell", "loss", "rec_loss", "train_step" ] things_tf = [eval(name) for name in things_names] things_names += r._asdict().keys() things_tf += r._asdict().values() train_model = namedtuple('Model', things_names)(*things_tf) for tn, tt in zip(things_names, things_tf): graph.add_to_collection(tn, tt) return graph, train_model
def create_graph(): graph = tf.Graph() with graph.as_default(): images = tf.placeholder(tf.float32, shape=[None, cut_len, 257, 1]) bn_flag = tf.placeholder_with_default(tf.zeros(shape=[]), shape=[]) x_tilde, z_e_x, z_q_x, z_i_x, z_emb = create_vqvae(images, bn_flag) rec_loss = tf.reduce_mean(BernoulliCrossEntropyCost(x_tilde, images)) vq_loss = tf.reduce_mean(tf.square(tf.stop_gradient(z_e_x) - z_q_x)) commit_loss = tf.reduce_mean(tf.square(z_e_x - tf.stop_gradient(z_q_x))) #rec_loss = tf.reduce_mean(tf.reduce_sum(BernoulliCrossEntropyCost(x_tilde, images), axis=[1, 2])) #vq_loss = tf.reduce_mean(tf.reduce_sum(tf.square(tf.stop_gradient(z_e_x) - z_q_x), axis=[1, 2, 3])) #commit_loss = tf.reduce_mean(tf.reduce_sum(tf.square(z_e_x - tf.stop_gradient(z_q_x)), axis=[1, 2, 3])) beta = 0.25 loss = rec_loss + vq_loss + beta * commit_loss params = get_params_dict() enc_params = [params[k] for k in params.keys() if "enc" in k] dec_params = [params[k] for k in params.keys() if "dec" in k] emb_params = [params[k] for k in params.keys() if "embed" in k] dec_grads = list(zip(tf.gradients(loss, dec_params), dec_params)) # scaled loss by alpha, but crank up vq loss grad # like having a higher lr only on embeds embed_grads = list(zip(tf.gradients(vq_loss, emb_params), emb_params)) grad_z = tf.gradients(rec_loss, z_q_x) enc_grads = [(tf.gradients(z_e_x, p, grad_z)[0] + tf.gradients(beta * commit_loss, p)[0], p) for p in enc_params] learning_rate = 0.0002 optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True) train_step = optimizer.apply_gradients(dec_grads + enc_grads + embed_grads) things_names = ["images", "bn_flag", "x_tilde", "z_e_x", "z_q_x", "z_i_x", "z_emb", "loss", "rec_loss", "train_step"] things_tf = [images, bn_flag, x_tilde, z_e_x, z_q_x, z_i_x, z_emb, loss, rec_loss, train_step] assert len(things_names) == len(things_tf) for tn, tt in zip(things_names, things_tf): graph.add_to_collection(tn, tt) train_model = namedtuple('Model', things_names)(*things_tf) return graph, train_model
def create_graph(): graph = tf.Graph() with graph.as_default(): inputs = tf.placeholder(tf.float32, shape=[None, batch_size, 1]) inputs_tm1 = inputs[:-1] inputs_t = inputs[1:] init_hidden = tf.placeholder(tf.float32, shape=[batch_size, n_hid]) init_cell = tf.placeholder(tf.float32, shape=[batch_size, n_hid]) init_q_hidden = tf.placeholder(tf.float32, shape=[batch_size, n_hid]) init_q_cell = tf.placeholder(tf.float32, shape=[batch_size, n_hid]) r = create_model(inputs_tm1, inputs_t, init_hidden, init_cell, init_q_hidden, init_q_cell) pred_sm, pred, hiddens, cells, q_hiddens, q_cells, q_nst_hiddens, q_nvq_hiddens, i_hiddens, oh_tm1 = r rec_loss = tf.reduce_mean( CategoricalCrossEntropyIndexCost(pred_sm, inputs_t)) alpha = 1. beta = 0.25 vq_h_loss = tf.reduce_mean( tf.square(tf.stop_gradient(q_nvq_hiddens) - q_nst_hiddens)) commit_h_loss = tf.reduce_mean( tf.square(q_nvq_hiddens - tf.stop_gradient(q_nst_hiddens))) loss = rec_loss + alpha * vq_h_loss + beta * commit_h_loss params = get_params_dict() grads = tf.gradients(loss, params.values()) learning_rate = 0.0001 optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True) assert len(grads) == len(params) grads = [ tf.clip_by_value(g, -10., 10.) if g is not None else None for g in grads ] j = [(g, p) for g, p in zip(grads, params.values())] train_step = optimizer.apply_gradients(j) things_names = [ "inputs", "inputs_tm1", "inputs_t", "init_hidden", "init_cell", "init_q_hidden", "init_q_cell", "hiddens", "cells", "q_hiddens", "q_cells", "q_nvq_hiddens", "i_hiddens", "pred", "pred_sm", "oh_tm1", "loss", "rec_loss", "train_step" ] things_tf = [ inputs, inputs_tm1, inputs_t, init_hidden, init_cell, init_q_hidden, init_q_cell, hiddens, cells, q_hiddens, q_cells, q_nvq_hiddens, i_hiddens, pred, pred_sm, oh_tm1, loss, rec_loss, train_step ] assert len(things_names) == len(things_tf) for tn, tt in zip(things_names, things_tf): graph.add_to_collection(tn, tt) train_model = namedtuple('Model', things_names)(*things_tf) return graph, train_model
def create_graph(): graph = tf.Graph() with graph.as_default(): # THIS ONE HAS TO HAVE SHAPE inputs = tf.placeholder(tf.float32, shape=[word_length_limit, batch_size, 1]) #outputs = tf.placeholder(tf.float32, shape=[word_length_limit + 1, batch_size, 1]) #outputs_masks = tf.placeholder(tf.float32, shape=[word_length_limit + 1, batch_size]) # THESE DO NOT outputs = tf.placeholder(tf.float32, shape=[None, batch_size, 1]) outputs_masks = tf.placeholder(tf.float32, shape=[None, batch_size]) pred_logits, enc_atts, dec_atts = create_model(inputs, outputs) enc_atts_0 = enc_atts[0] enc_atts_1 = enc_atts[1] enc_atts_2 = enc_atts[2] dec_atts_0 = dec_atts[0] dec_atts_1 = dec_atts[1] dec_atts_2 = dec_atts[2] loss_i = CategoricalCrossEntropyLinearIndexCost( pred_logits[:-1], outputs[1:]) loss = tf.reduce_sum(outputs_masks[:-1] * loss_i) / tf.reduce_sum( outputs_masks[1:]) params = get_params_dict() grads = tf.gradients(loss, params.values()) learning_rate = 0.0002 optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True) assert len(grads) == len(params) j = [(g, p) for g, p in zip(grads, params.values())] train_step = optimizer.apply_gradients(j) things_names = [ "inputs", "outputs", "outputs_masks", "pred_logits", "enc_atts_0", "enc_atts_1", "enc_atts_2", "dec_atts_0", "dec_atts_1", "dec_atts_2", "loss", "train_step" ] things_tf = [eval(tn) for tn in things_names] assert len(things_names) == len(things_tf) for tn, tt in zip(things_names, things_tf): graph.add_to_collection(tn, tt) train_model = namedtuple('Model', things_names)(*things_tf) return graph, train_model
def step(inp_t, h_tm1): output, state = SimpleRNNCell([inp_t], [3], h_tm1, h_dim, 20, random_state=random_state, name="l1") h = state[0] return output, h o = scan(step, [inputs], [None, init_h]) loss = tf.reduce_mean(o[0]) h_o = o[1] params_dict = get_params_dict() params = params_dict.values() grads = tf.gradients(loss, params) learning_rate = 0.0002 opt = tf.train.AdamOptimizer(learning_rate=learning_rate, use_locking=True) updates = opt.apply_gradients(zip(grads, params)) inputs_np = random_state.randn(33, n_batch, 3) init_h_np = np.zeros((n_batch, h_dim)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) feed = {inputs: inputs_np, init_h: init_h_np} outs = [loss, updates, h_o] lop = sess.run(outs, feed)
def create_graph(): graph = tf.Graph() with graph.as_default(): # vqvae part # define all the vqvae inputs and outputs vqvae_inputs = tf.placeholder(tf.float32, shape=[ None, train_audio[0].shape[0], train_audio[0].shape[1], train_audio[0].shape[2] ]) bn_flag = tf.placeholder_with_default(tf.zeros(shape=[]), shape=[]) x_tilde, z_e_x, z_q_x, z_i_x, z_nst_q_x, z_emb = create_vqvae( vqvae_inputs, bn_flag) #rec_loss = tf.reduce_mean(BernoulliCrossEntropyCost(x_tilde, images)) vqvae_rec_loss = tf.reduce_mean(tf.square(x_tilde - vqvae_inputs)) vqvae_vq_loss = tf.reduce_mean( tf.square(tf.stop_gradient(z_e_x) - z_nst_q_x)) vqvae_commit_loss = tf.reduce_mean( tf.square(z_e_x - tf.stop_gradient(z_nst_q_x))) vqvae_alpha = 1. vqvae_beta = 0.25 vqvae_loss = vqvae_rec_loss + vqvae_alpha * vqvae_vq_loss + vqvae_beta * vqvae_commit_loss vqvae_params = get_params_dict() # get vqvae keys now, dict is *dynamic* and shared vqvae_params_keys = [k for k in vqvae_params.keys()] vqvae_grads = tf.gradients(vqvae_loss, vqvae_params.values()) learning_rate = 0.0002 vqvae_optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True) assert len(vqvae_grads) == len(vqvae_params) j = [(g, p) for g, p in zip(vqvae_grads, vqvae_params.values())] vqvae_train_step = vqvae_optimizer.apply_gradients(j) # rnn part # ultimately we will use 2 calls to feed_dict to make lookup mappings easier, but could do it like this #rnn_inputs = tf.cast(tf.stop_gradient(tf.transpose(z_i_x, (2, 0, 1))), tf.float32) rnn_inputs = tf.placeholder(tf.float32, shape=[None, rnn_batch_size, 1]) rnn_inputs_tm1 = rnn_inputs[:-1] rnn_inputs_t = rnn_inputs[1:] init_hidden = tf.placeholder(tf.float32, shape=[rnn_batch_size, n_hid]) init_cell = tf.placeholder(tf.float32, shape=[rnn_batch_size, n_hid]) init_q_hidden = tf.placeholder(tf.float32, shape=[rnn_batch_size, n_hid]) init_q_cell = tf.placeholder(tf.float32, shape=[rnn_batch_size, n_hid]) r = create_vqrnn(rnn_inputs_tm1, rnn_inputs_t, init_hidden, init_cell, init_q_hidden, init_q_cell) pred_sm, pred, hiddens, cells, q_hiddens, q_cells, q_nst_hiddens, q_nvq_hiddens, i_hiddens, oh_tm1 = r rnn_rec_loss = tf.reduce_mean( CategoricalCrossEntropyIndexCost(pred_sm, rnn_inputs_t)) #rnn_rec_loss = tf.reduce_mean(CategoricalCrossEntropyLinearIndexCost(pred, rnn_inputs_t)) rnn_alpha = 1. rnn_beta = 0.25 rnn_vq_h_loss = tf.reduce_mean( tf.square(tf.stop_gradient(q_nvq_hiddens) - q_nst_hiddens)) rnn_commit_h_loss = tf.reduce_mean( tf.square(q_nvq_hiddens - tf.stop_gradient(q_nst_hiddens))) rnn_loss = rnn_rec_loss + rnn_alpha * rnn_vq_h_loss + rnn_beta * rnn_commit_h_loss rnn_params = { k: v for k, v in get_params_dict().items() if k not in vqvae_params_keys } rnn_grads = tf.gradients(rnn_loss, rnn_params.values()) learning_rate = 0.0001 rnn_optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True) assert len(rnn_grads) == len(rnn_params) rnn_grads = [ tf.clip_by_value(g, -10., 10.) if g is not None else None for g in rnn_grads ] j = [(g, p) for g, p in zip(rnn_grads, rnn_params.values())] rnn_train_step = rnn_optimizer.apply_gradients(j) things_names = [ "vqvae_inputs", "bn_flag", "x_tilde", "z_e_x", "z_q_x", "z_i_x", "z_emb", "vqvae_loss", "vqvae_rec_loss", "vqvae_train_step", "rnn_inputs", "rnn_inputs_tm1", "rnn_inputs_t", "init_hidden", "init_cell", "init_q_hidden", "init_q_cell", "hiddens", "cells", "q_hiddens", "q_cells", "q_nvq_hiddens", "i_hiddens", "pred", "pred_sm", "oh_tm1", "rnn_loss", "rnn_rec_loss", "rnn_train_step" ] things_tf = [eval(name) for name in things_names] for tn, tt in zip(things_names, things_tf): graph.add_to_collection(tn, tt) train_model = namedtuple('Model', things_names)(*things_tf) return graph, train_model