def __init__(self, nspecies, n_hidden_precisions, inputs=None, hidden_activation=tf.nn.tanh): '''Initialize neural precisions layers''' self.nspecies = nspecies if inputs is None: inputs = self.nspecies + 1 inp = Dense(n_hidden_precisions, activation=hidden_activation, use_bias=True, name="prec_hidden", input_shape=(inputs, )) act_layer = Dense(4, activation=tf.nn.sigmoid, name="prec_act", bias_constraint=NonNeg()) deg_layer = Dense(4, activation=tf.nn.sigmoid, name="prec_deg", bias_constraint=NonNeg()) self.act = Sequential([inp, act_layer]) self.deg = Sequential([inp, deg_layer]) for layer in [inp, act_layer, deg_layer]: weights, bias = layer.weights variable_summaries(weights, layer.name + "_kernel", False) variable_summaries(bias, layer.name + "_bias", False)
def _linear_layer(self, input, dim_0, dim_1, name='', out_layer=False, lap=0): ''' Builds a linear layer :param input: (tensor) :param dim_0: (int) :param dim_1: (int) :param name: (str) name of the layer :param out_layer: (Boolean) if True, no activation :return: (tensor) ''' with tf.name_scope(name): with tf.name_scope('Weights'): W = tf.Variable(tf.truncated_normal([dim_0, dim_1], stddev=0.1), name='W') variable_summaries(W, ['train'], family='Lap_{}'.format(lap)) b = tf.Variable(tf.zeros([1, dim_1]), name='Bias_hidden') variable_summaries(b, ['train'], family='Lap_{}'.format(lap)) out_matmul = tf.matmul(input, W, name='Matmul') out = tf.add(out_matmul, b, name='Add') tf.summary.histogram('pre_activations', out, collections=['train'], family='Lap_{}'.format(lap)) if out_layer == False: out = tf.nn.relu(out, name='Relu') tf.summary.histogram('post_activations', out, collections=['train'], family='Lap_{}'.format(lap)) return out
def build_train_step(self, dreg, encoder, objective, opt_func): '''Returns a computation that is run in the tensorflow session.''' # This path is for b_use_correct_iwae_gradients = True. For False, we would just # want to return opt_func.minimize(objective.vae_cost) trainable_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if dreg: grads = self.create_dreg_gradients(encoder, objective, trainable_params) print("Set up Doubly Reparameterized Gradient (dreg)") else: # ... so, get list of params we will change with grad and ... # ... compute the VAE elbo gradient, using special stop grad function to prevent propagating gradients # through ws (ie just a copy) grads = tf.gradients(objective.vae_cost, trainable_params) print("Set up non-dreg gradient") # grads = [tf.clip_by_value(g, -0.1, 0.1) for g in iwae_grads] if self.tb_gradients: with tf.name_scope('Gradients'): for p, g in zip(trainable_params, grads): variable_summaries(g, p.name.split(':')[0], self.plot_histograms) # TODO(dacart): check if this should go above "optimizer =" or be deleted. #clipped_grads = [tf.clip_by_norm(g, 1.0) for g in grads] # This depends on update rule implemented in AdamOptimizer: optimizer = opt_func.apply_gradients(zip(grads, trainable_params)) return optimizer
def masked_softmax_cross_entropy(preds, labels, mask): """Softmax cross-entropy loss with masking.""" loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels) mask = tf.cast(mask, dtype=tf.float32) mask /= tf.reduce_mean(mask) loss *= mask fin_loss=tf.reduce_mean(loss) variable_summaries(fin_loss) return fin_loss
def masked_accuracy(preds, labels, mask): """Accuracy with masking.""" correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1)) accuracy_all = tf.cast(correct_prediction, tf.float32) mask = tf.cast(mask, dtype=tf.float32) mask /= tf.reduce_mean(mask) accuracy_all *= mask accuracy=tf.reduce_mean(accuracy_all) variable_summaries(accuracy) return accuracy
def one2oneLayer(self, input, namespace): with tf.name_scope(namespace): shape = X_SHAPE weights = tf.Variable(tf.ones(shape), name='weights') bias = tf.Variable(tf.zeros(shape), name='bias') out = tf.multiply(input, weights) + bias # out = tf.identity(out, name="out") with tf.name_scope('summary'): variable_summaries(weights, name='weights') variable_summaries(bias, name='bias') return out
def build_ff_neural_net(nn_input, n_inputs, hidden_layers, nonlinearity, scope_name, variable_name, collect_summary, logit_weights=None, initializer=layers.xavier_initializer(), dropout=False): assert len(hidden_layers) == len(nonlinearity) name_scope = '%s/%s' % (scope_name, variable_name) h = nn_input n_hiddens = n_inputs n_hiddens_next = hidden_layers[0] for i in range(len(hidden_layers)): w = get_scope_variable(scope_name, "%s/layer%d/weights" % (variable_name, i), shape=(n_hiddens, n_hiddens_next), initializer=initializer) b = get_scope_variable(scope_name, "%s/layer%d/biases" % (variable_name, i), shape=(n_hiddens_next), initializer=initializer) if collect_summary: with tf.name_scope(name_scope + '/layer%d' % i): with tf.name_scope('weights'): variable_summaries(w) with tf.name_scope('biases'): variable_summaries(b) with tf.name_scope('Wx_plus_b'): pre_h = tf.matmul(h, w) + b # Yunfei: dropout option is useless now if dropout: # if i == 0: # pre_h = tf.nn.dropout(tf.matmul(h,w), keep_prob=0.8) + b # else: pre_h = tf.nn.dropout(tf.matmul(h, w), keep_prob=dropout) + b tf.summary.histogram('pre_activations', pre_h) h = nonlinearity[i](pre_h, name='activation') tf.summary.histogram('activations', h) else: pre_h = tf.matmul(h, w) + b h = nonlinearity[i](pre_h, name='activation') n_hiddens = hidden_layers[i] if i + 1 < len(hidden_layers): n_hiddens_next = hidden_layers[i + 1] if logit_weights is not None and i == len(hidden_layers) - 2: h *= logit_weights return h
def initialize_training(self): #optimizer = tf.train.GradientDescentOptimizer(self.eta) optimizer = tf.train.AdamOptimizer() self.train = optimizer.minimize(self.loss) self.sess = tf.Session() with self.sess.as_default(): tf.global_variables_initializer().run() #if self.hierarchical: # for t in range(4): # variable_summaries(self.states[t]+'_diff', tf.abs(self.rho # - self.geo_rho[self.states[t]])) if self.amortized: variable_summaries('phi', self.phi) variable_summaries('rho', self.rho) variable_summaries('alpha', self.alpha) for t in range(self.n_states): variable_summaries( self.states[t] + '_rho', neural_network(self.rho, self.phi, self.K, t, self.H0, self.resnet)) variable_summaries( self.states[t] + '_diff', tf.abs(self.rho - neural_network( self.rho, self.phi, self.K, t, self.H0, self.resnet))) with tf.name_scope('objective'): tf.summary.scalar('loss', self.loss) tf.summary.scalar('priors', self.log_prior) tf.summary.scalar('ll_pos', self.ll_pos) tf.summary.scalar('ll_neg', self.ll_neg) self.summaries = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter(self.logdir, self.sess.graph) self.saver = tf.train.Saver() config = projector.ProjectorConfig() alpha = config.embeddings.add() alpha.tensor_name = 'model/embeddings/alpha' alpha.metadata_path = '../vocab.tsv' if self.amortized: phi = config.embeddings.add() phi.tensor_name = 'model/embeddings/phi' phi.metadata_path = '../states.tsv' rho = config.embeddings.add() rho.tensor_name = 'model/embeddings/rho' rho.metadata_path = '../vocab.tsv' for state in self.states: rho = config.embeddings.add() rho.tensor_name = 'model/embeddings/' + state + '_rho' rho.metadata_path = '../vocab.tsv' projector.visualize_embeddings(self.train_writer, config)
def model(self, X, Y): feature = int(np.prod(X.get_shape()[1:])) classes = int(np.prod(Y.get_shape()[1:])) keep_prob = tf.placeholder(tf.float32) # cnn layer W = weight_variable([feature, classes]) b = bias_variable([classes]) # loss logits = tf.matmul(X, W) + b entropy = tf.nn.softmax_cross_entropy_with_logits(logits, Y, name='loss') loss = tf.reduce_mean(entropy) variable_summaries(loss, 'loss') return logits, loss, keep_prob, "linear"
def ConvNet(x, input_shape, filters_out=64, n_classes=10, non_linearity='relu'): # Basic CNN from Cleverhans MNIST tutorial: # https://github.com/mmarius/cleverhans/blob/master/cleverhans_tutorials/tutorial_models.py#L155 h = x input_shape = list(input_shape) h, output_shape = l.conv2d(h, kernel_size=8, stride=2, filters_in=input_shape[-1], filters_out=filters_out, padding='SAME', name='conv1') h = l.non_linearity(h, name=non_linearity) h, output_shape = l.conv2d(h, kernel_size=6, stride=2, filters_in=output_shape[-1], filters_out=filters_out * 2, padding='VALID', name='conv2') h = l.non_linearity(h, name=non_linearity) h, output_shape = l.conv2d(h, kernel_size=5, stride=1, filters_in=output_shape[-1], filters_out=filters_out * 2, padding='VALID', name='conv3') h = l.non_linearity(h, name=non_linearity) h, output_shape = l.flatten(input_shape=output_shape, x=h) logits, output_shape = l.linear(input_shape=output_shape, n_hidden=n_classes, x=h, name='output-layer') utils.variable_summaries(logits, name='unscaled-logits-output-layer') return logits
def model(self, X, Y): feature = int(np.prod(X.get_shape()[1:])) classes = int(np.prod(Y.get_shape()[1:])) x_image = tf.reshape(X, [-1, feature, 1, 1]) # 1st conv layer with tf.name_scope('conv1'): W = weight_variable([5, 1, 1, 32]) b = bias_variable([32]) h = tf.nn.relu(conv2d(x_image, W) + b) conv1 = max_pool_2x2(h) # 2nd conv layer with tf.name_scope('conv2'): W = weight_variable([5, 1, 32, 64]) b = bias_variable([64]) conv2 = tf.nn.relu(conv2d(conv1, W) + b) keep_prob = tf.placeholder(tf.float32) # 1st fc layer with tf.name_scope('fc1'): shape = int(np.prod(conv2.get_shape()[1:])) W = weight_variable([shape, 1024]) b = bias_variable([1024]) conv2_flat = tf.reshape(conv2, [-1, shape]) h = tf.nn.relu(tf.matmul(conv2_flat, W) + b) fc1 = tf.nn.dropout(h, keep_prob) # 2nd fc layer with tf.name_scope('fc2'): W = weight_variable([1024, classes]) b = bias_variable([classes]) logits = tf.matmul(fc1, W) + b entropy = tf.nn.softmax_cross_entropy_with_logits(logits, Y, name='loss') loss = tf.reduce_mean(entropy) variable_summaries(loss, 'loss') return logits, loss, keep_prob, 'cnn'
def add_linear_output_layer(self, last_hidden_layer, ground_truth, corpus_tag, task_tag, loss_weight=1): # returns loss op with tf.variable_scope("output_layer_%s" % task_tag) as layer_scope: last_out = fully_connected(last_hidden_layer, 1, activation_fn=tf.identity, weights_regularizer=l1_l2_regularizer(self.l1_reg, self.l2_reg), scope=layer_scope) self.predictions = last_out with tf.name_scope("%s_loss_%s" % (corpus_tag, task_tag)): loss = loss_weight * tf.reduce_mean(tf.squared_difference(last_out, ground_truth)) utils.variable_summaries(loss, "loss", corpus_tag) tf.add_to_collection(tf.GraphKeys.LOSSES, loss) with tf.name_scope('%s_accuracy_%s' % (corpus_tag, task_tag)): accuracy, _ = streaming_mean_relative_error(last_out, ground_truth, ground_truth, name="acc_%s" % corpus_tag, updates_collections=tf.GraphKeys.UPDATE_OPS) accuracy = 1 - accuracy utils.variable_summaries(accuracy, "accuracy", corpus_tag) updates_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.calculate_accuracy_op = control_flow_ops.with_dependencies(updates_op, accuracy)
def encode(self): encoded_output, encoded_state = utils.encode_seq( input_seq=self.q1, seq_len=self.len1, word_embeddings=self.word_embeddings, num_neurons=self.num_neurons) # [batch_size, 2*num_neurons] with tf.variable_scope( "variational_inference"): # Variational inference mean = utils.linear(encoded_state, self.hidden_size, scope='mean') # [batch_size, n_hidden] logsigm = utils.linear(encoded_state, self.hidden_size, scope='logsigm') # [batch_size, n_hidden] self.mean, self.logsigm = mean, logsigm # Gaussian Multivariate kld(z,N(0,1)) = -0.5 * [ sum_d(logsigma) + d - sum_d(sigma) - mu_T*mu] klds = -0.5 * (tf.reduce_sum(logsigm, 1) + tf.cast(tf.shape(mean)[1], tf.float32) - tf.reduce_sum(tf.exp(logsigm), 1) - tf.reduce_sum(tf.square(mean), 1) ) # KLD(q(z|x), N(0,1)) tensor [batch_size] utils.variable_summaries( 'klds', klds) # posterior distribution close to prior N(0,1) self.kld = tf.reduce_mean(klds, 0) # mean over batches: scalar h_ = tf.get_variable("GO", [1, self.hidden_size], initializer=self.initializer) h_ = tf.tile(h_, [self.batch_size, 1 ]) # trainable tensor: decoder init_state[1] eps = tf.random_normal((self.batch_size, self.hidden_size), 0, 1) self.doc_vec = tf.multiply( tf.exp(logsigm), eps ) + mean # sample from latent intent space: decoder init_state[0] self.doc_vec = self.doc_vec, h_ # tuple state Z, h
def add_classification_output_layer(self, last_hidden_layer, gt_labels, num_classes, corpus_tag, task_tag, loss_weight=1): # returns loss op with tf.variable_scope("output_layer_%s" % task_tag) as layer_scope: last_out = fully_connected(last_hidden_layer, num_classes, activation_fn=tf.identity, weights_regularizer=l1_l2_regularizer(self.l1_reg, self.l2_reg), scope=layer_scope) self.predictions = tf.nn.softmax(last_out) with tf.name_scope("%s_loss_%s" % (corpus_tag, task_tag)): loss = loss_weight * tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(last_out, gt_labels)) utils.variable_summaries(loss, "loss", corpus_tag) tf.add_to_collection(tf.GraphKeys.LOSSES, loss) with tf.name_scope('%s_accuracy_%s' % (corpus_tag, task_tag)): # correct_prediction = tf.equal(tf.argmax(last_out, 1), gt_labels) # accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) * 100 accuracy, _ = streaming_accuracy(tf.argmax(last_out, 1), gt_labels, name="acc_%s" % corpus_tag, updates_collections=tf.GraphKeys.UPDATE_OPS) utils.variable_summaries(accuracy, "accuracy", corpus_tag) updates_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.calculate_accuracy_op = control_flow_ops.with_dependencies(updates_op, accuracy)
def policy_model(x, stochastic=0.0, collect_summary=False): assert (training_layers[0].shape[1] == x.shape[1]) h = x for i, layer in enumerate(training_layers[1:]): w = layer.W b = layer.b pre_h = tf.matmul(h, w) + b h = layer.nonlinearity(pre_h, name='policy_out') if collect_summary: with tf.name_scope(scope_name + '/observation'): variable_summaries(x) with tf.name_scope(scope_name + '/layer%d' % i): with tf.name_scope('weights'): variable_summaries(w) with tf.name_scope('biases'): variable_summaries(b) with tf.name_scope('Wx_plus_b'): tf.summary.histogram('pre_activations', pre_h) tf.summary.histogram('activations', h) std = training_policy._l_std_param.param h += stochastic * tf.random_normal( shape=(tf.shape(x)[0], n_actions)) * tf.exp(std) return h
def SimpleNet1(x, input_shape, neurons=1024, n_classes=10, non_linearity='relu', create_summaries=True): h = x h, output_shape = l.flatten(input_shape, h) h, output_shape = l.linear(output_shape, neurons, h, name='linear1') if create_summaries: utils.variable_summaries(h, name='linear-comb-hidden-layer') h = l.non_linearity(h, name=non_linearity) if create_summaries: utils.variable_summaries(h, name='activation-hidden-layer') sparsity = tf.nn.zero_fraction(h, name='activation-hidden-layer-sparsity') tf.summary.scalar(sparsity.op.name, sparsity) logits, output_shape = l.linear(output_shape, n_classes, h, name='output') if create_summaries: utils.variable_summaries(logits, name='unscaled-logits-output-layer') return logits
def build(self): self.init_variables() ## batchsize x 5 x labelemb self.yemb = tf.nn.embedding_lookup(self.labelemb, self.ys_, name='yemb') ## batchsize x 10 x labelemb self.negemb = tf.nn.embedding_lookup(self.labelemb, self.negsamples, name='negemb') # rnnin = [tf.zeros(shape=(tf.shape(yemb)[0], 1)) for i in range(5)] log.info('input label embedding-{}'.format(self.yemb.get_shape())) log.info('negative sample embedding-{}'.format( self.negemb.get_shape())) rnnin = [self.inputs for i in range(self.numfuncs)] rnnout, rnn_final_states = tf.nn.static_rnn(self.lstmcell, rnnin, dtype=tf.float32) #initial_state=self.inputs #) # log.info('rnnout shape {}'.format(rnnout.get_shape())) rflat = tf.reshape(rnnout, shape=[-1, self.lstm_statesize]) # batchsize*5 x labeldim self.output = tf.nn.l2_normalize(tf.nn.softplus( tf.matmul(rflat, self.output_weights) + self.output_bias, name='yhat'), axis=1) log.info('final decoder out shape {}'.format(self.output.get_shape())) # ipdb.set_trace() self.transformed_y = tf.nn.l2_normalize(tf.matmul( tf.reshape(self.yemb, shape=[-1, self.label_dimensions]), self.ytransform), axis=1) variable_summaries(self.transformed_y) # batch size*10 x labeldim self.transformed_negsamples = tf.nn.l2_normalize(tf.matmul( tf.reshape(self.negemb, shape=[-1, self.label_dimensions]), self.ytransform), axis=1) variable_summaries(self.ytransform) # batchsize *5 x 1 self.cosinesim_pos = tf.reduce_sum(tf.multiply(self.output, self.transformed_y), axis=1) # batchsize *5 x batchsize*10 self.cosinesim_neg = tf.matmul( self.output, tf.transpose(self.transformed_negsamples)) # batchsize *5 x 1 self.min_neg_dist = tf.reduce_min(self.cosinesim_neg, axis=1) self.loss = tf.reduce_mean( tf.exp(self.cosinesim_pos, name='posdist') / (tf.exp(self.min_neg_dist, name='negdist') + tf.constant(1e-3)), name='loss') tf.summary.scalar('loss', self.loss) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) self.train = self.optimizer.minimize(self.loss) self.summary = tf.summary.merge_all() # self.predictions, self.precision, self.recall, self.f1 = self.make_prediction() self.predictions = self.make_prediction() return self
def negQ(self, x, y, reuse=False): """Architecture of the neural network""" print('x shape', x.get_shape()) print('y shape', y.get_shape()) szs = self.layers_dim assert (len(szs) >= 1) fc = tflearn.fully_connected bn = tflearn.batch_normalization lrelu = tflearn.activations.leaky_relu if reuse: tf.get_variable_scope().reuse_variables() nLayers = len(szs) us = [] zs = [] z_zs = [] z_ys = [] z_us = [] reg = 'L2' prevU = x for i in range(nLayers): with tf.variable_scope('u' + str(i), reuse=reuse) as s: u = fc(prevU, szs[i], reuse=reuse, scope=s, regularizer=reg) if i < nLayers - 1: u = tf.nn.relu(u) if FLAGS.icnn_bn: u = bn(u, reuse=reuse, scope=s, name='bn') variable_summaries(u, suffix='u{}'.format(i)) us.append(u) prevU = u prevU, prevZ = x, y for i in range(nLayers + 1): sz = szs[i] if i < nLayers else 1 z_add = [] if i > 0: with tf.variable_scope('z{}_zu_u'.format(i), reuse=reuse) as s: zu_u = fc(prevU, szs[i - 1], reuse=reuse, scope=s, activation='relu', bias=True, regularizer=reg, bias_init=tf.constant_initializer(1.)) variable_summaries(zu_u, suffix='zu_u{}'.format(i)) with tf.variable_scope('z{}_zu_proj'.format(i), reuse=reuse) as s: z_zu = fc(tf.multiply(prevZ, zu_u), sz, reuse=reuse, scope=s, bias=False, regularizer=reg) variable_summaries(z_zu, suffix='z_zu{}'.format(i)) z_zs.append(z_zu) z_add.append(z_zu) with tf.variable_scope('z{}_yu_u'.format(i), reuse=reuse) as s: yu_u = fc(prevU, self.dimA, reuse=reuse, scope=s, bias=True, regularizer=reg, bias_init=tf.constant_initializer(1.)) variable_summaries(yu_u, suffix='yu_u{}'.format(i)) with tf.variable_scope('z{}_yu'.format(i), reuse=reuse) as s: z_yu = fc(tf.multiply(y, yu_u), sz, reuse=reuse, scope=s, bias=False, regularizer=reg) z_ys.append(z_yu) variable_summaries(z_yu, suffix='z_yu{}'.format(i)) z_add.append(z_yu) with tf.variable_scope('z{}_u'.format(i), reuse=reuse) as s: z_u = fc(prevU, sz, reuse=reuse, scope=s, bias=True, regularizer=reg, bias_init=tf.constant_initializer(0.)) variable_summaries(z_u, suffix='z_u{}'.format(i)) z_us.append(z_u) z_add.append(z_u) z = tf.add_n(z_add) variable_summaries(z, suffix='z{}_preact'.format(i)) if i < nLayers: # z = tf.nn.relu(z) z = lrelu(z, alpha=FLAGS.lrelu) variable_summaries(z, suffix='z{}_act'.format(i)) zs.append(z) prevU = us[i] if i < nLayers else None prevZ = z print('z shape', z.get_shape()) z = tf.reshape(z, [-1], name='energies') return z
def train(ARGS): # Define helper function for evaluating on test data during training def eval(epoch): from train_utils import clean_eval test_accuracy, test_loss, _ = clean_eval(sess, x, y, is_training, testloader, n_classes, logits, preds) # Write tensorboard summary acc_summary = tf.Summary() acc_summary.value.add(tag='Evaluation/accuracy/test', simple_value=test_accuracy) writer_test.add_summary(acc_summary, epoch) # Write tensorboard summary err_summary = tf.Summary() err_summary.value.add(tag='Evaluation/error/test', simple_value=1.0 - test_accuracy) writer_test.add_summary(err_summary, epoch) # Write tensorboard summary loss_summary = tf.Summary() loss_summary.value.add(tag='Evaluation/loss/test', simple_value=test_loss) writer_test.add_summary(loss_summary, epoch) # Define helper function for evaluating on adversarial test data during training def adv_eval(epoch): from train_utils import adversarial_eval adv_accuracy, adv_loss = adversarial_eval(sess, x, y, is_training, adv_testloader, n_classes, preds, adv_preds, eval_all=True) # Write tensorboard summary acc_summary = tf.Summary() acc_summary.value.add(tag='Evaluation/adversarial-accuracy/test', simple_value=adv_accuracy) writer_test.add_summary(acc_summary, epoch) # Write tensorboard summary err_summary = tf.Summary() err_summary.value.add(tag='Evaluation/adversarial-error/test', simple_value=1.0 - adv_accuracy) writer_test.add_summary(err_summary, epoch) # Write tensorboard summary loss_summary = tf.Summary() loss_summary.value.add(tag='Evaluation/adversarial-loss/test', simple_value=adv_loss) writer_test.add_summary(loss_summary, epoch) # Define computational graph with tf.Graph().as_default() as g: # Define placeholders with tf.device('/gpu:0'): with tf.name_scope('Placeholders'): x = tf.placeholder(dtype=tf.float32, shape=input_shape, name='inputs') x_pair1 = tf.placeholder(dtype=tf.float32, shape=input_shape, name='x-pair1') x_pair2 = tf.placeholder(dtype=tf.float32, shape=input_shape, name='x-pair2') y = tf.placeholder(dtype=tf.float32, shape=(None, n_classes), name='labels') is_training = tf.placeholder_with_default(True, shape=(), name='is-training') # Define TF session config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(graph=g, config=config) # Define model with tf.name_scope('Model'): with tf.device('/gpu:0'): model = Model(nb_classes=n_classes, input_shape=input_shape, is_training=is_training) # Define forward-pass with tf.name_scope('Logits'): logits = model.get_logits(x) with tf.name_scope('Probs'): preds = tf.nn.softmax(logits) with tf.name_scope('Accuracy'): ground_truth = tf.argmax(y, axis=1) predicted_label = tf.argmax(preds, axis=1) correct_prediction = tf.equal(predicted_label, ground_truth) acc = tf.reduce_mean(tf.to_float(correct_prediction), name='accuracy') tf.add_to_collection('accuracies', acc) err = tf.identity(1.0 - acc, name='error') tf.add_to_collection('accuracies', err) # Define losses with tf.name_scope('Losses'): ce_loss, wd_loss, clp_loss, lsq_loss, at_loss, alp_loss = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 adv_logits = None if ARGS.ct: with tf.name_scope('Cross-Entropy-Loss'): ce_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=y), name='cross-entropy-loss') tf.add_to_collection('losses', ce_loss) if ARGS.at: with tf.name_scope('Adversarial-Cross-Entropy-Loss'): at_loss, adv_logits = get_at_loss( sess, x, y, model, ARGS.eps, ARGS.eps_iter, ARGS.nb_iter) at_loss = tf.identity(at_loss, name='at-loss') tf.add_to_collection('losses', at_loss) with tf.name_scope('Regularizers'): if ARGS.wd: with tf.name_scope('Weight-Decay'): for var in tf.trainable_variables(): if 'beta' in var.op.name: # Do not regularize bias of batch normalization continue # print('regularizing: ', var.op.name) wd_loss += tf.nn.l2_loss(var) reg_loss = tf.identity(wd_loss, name='wd-loss') tf.add_to_collection('losses', reg_loss) if ARGS.alp: with tf.name_scope('Adversarial-Logit-Pairing'): alp_loss = get_alp_loss( sess, x, y, logits, adv_logits, model, ARGS.eps, ARGS.eps_iter, ARGS.nb_iter) alp_loss = tf.identity(alp_loss, name='alp-loss') tf.add_to_collection('losses', alp_loss) if ARGS.clp: with tf.name_scope('Clean-Logit-Pairing'): clp_loss = get_clp_loss( x_pair1, x_pair2, model) clp_loss = tf.identity(clp_loss, name='clp-loss') tf.add_to_collection('losses', clp_loss) if ARGS.lsq: with tf.name_scope('Logit-Squeezing'): lsq_loss = get_lsq_loss(x, model) lsq_loss = tf.identity(lsq_loss, name='lsq-loss') tf.add_to_collection('losses', lsq_loss) with tf.name_scope('Total-Loss'): # Define objective function total_loss = (ARGS.ct_lambda * ce_loss) + ( ARGS.at_lambda * at_loss) + (ARGS.wd_lambda * wd_loss) + ( ARGS.clp_lambda * clp_loss) + (ARGS.lsq_lambda * lsq_loss) + ( ARGS.alp_lambda * alp_loss) total_loss = tf.identity(total_loss, name='total-loss') tf.add_to_collection('losses', total_loss) # Define PGD adversary with tf.name_scope('PGD-Attacker'): pgd_params = { 'ord': np.inf, 'y': y, 'eps': ARGS.eps / 255, 'eps_iter': ARGS.eps_iter / 255, 'nb_iter': ARGS.nb_iter, 'rand_init': True, 'rand_minmax': ARGS.eps / 255, 'clip_min': 0., 'clip_max': 1., 'sanity_checks': True } pgd = ProjectedGradientDescent(model, sess=sess) adv_x = pgd.generate(x, **pgd_params) with tf.name_scope('Logits'): adv_logits = model.get_logits(adv_x) with tf.name_scope('Probs'): adv_preds = tf.nn.softmax(adv_logits) # Define optimizer with tf.device('/gpu:0'): with tf.name_scope('Optimizer'): # Define global step variable global_step = tf.get_variable( name='global_step', shape=[], # scalar dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=ARGS.lr, beta1=0.9, beta2=0.999, epsilon=1e-6, use_locking=False, name='Adam') trainable_vars = tf.trainable_variables() update_bn_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # this collection stores the moving_mean and moving_variance ops # for batch normalization with tf.control_dependencies(update_bn_ops): grads_and_vars = optimizer.compute_gradients( total_loss, trainable_vars) train_step = optimizer.apply_gradients( grads_and_vars, global_step=global_step) # Add Tensorboard summaries with tf.device('/gpu:0'): # Create file writers writer_train = tf.summary.FileWriter(ARGS.log_dir + '/train', graph=g) writer_test = tf.summary.FileWriter(ARGS.log_dir + '/test') # Add summary for input images with tf.name_scope('Image-Summaries'): # Create image summary ops tf.summary.image('input', x, max_outputs=2, collections=['training']) # Add summaries for the training losses losses = tf.get_collection('losses') for entry in losses: tf.summary.scalar(entry.name, entry, collections=['training']) # Add summaries for the training accuracies accs = tf.get_collection('accuracies') for entry in accs: tf.summary.scalar(entry.name, entry, collections=['training']) # Add summaries for all trainable vars for var in trainable_vars: tf.summary.histogram(var.op.name, var, collections=['training']) var_norm = tf.norm(var, ord='euclidean') tf.summary.scalar(var.op.name + '/l2norm', var_norm, collections=['training']) # Add summaries for variable gradients for grad, var in grads_and_vars: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad, collections=['training']) grad_norm = tf.norm(grad, ord='euclidean') tf.summary.scalar(var.op.name + '/gradients/l2norm', grad_norm, collections=['training']) # Add summaries for the logits and model predictions with tf.name_scope('Logits-Summaries'): variable_summaries(tf.identity(logits, name='logits'), name='logits', collections=['training', 'test'], histo=True) with tf.name_scope('Predictions-Summaries'): variable_summaries(tf.identity(preds, name='predictions'), name='predictions', collections=['training', 'test'], histo=True) # Initialize all variables with sess.as_default(): tf.global_variables_initializer().run() # Collect training params train_params = { 'epochs': ARGS.epochs, 'eval_step': ARGS.eval_step, 'adv_eval_step': ARGS.adv_eval_step, 'n_classes': n_classes, 'clp': ARGS.clp } # Start training loop model_train(sess, x, y, x_pair1, x_pair2, is_training, trainloader, train_step, args=train_params, evaluate=eval, adv_evaluate=adv_eval, writer_train=writer_train) # Save the trained model if ARGS.save: save_path = os.path.join(ARGS.save_dir, ARGS.filename) saver = tf.train.Saver(var_list=tf.global_variables()) saver.save(sess, save_path) print("Saved model at {:s}".format(str(ARGS.save_dir)))
def main(_): # Set TF random seed to improve reproducibility tf.set_random_seed(1234) np.random.seed(1234) with tf.device('/cpu:0'): # Get time stamp experiment_ts = strftime("%H-%M-%S", localtime()) # Create log and checkpoint dir for the current experiment FLAGS.log_dir += '/{:s}/{:s}'.format(FLAGS.optimizer, experiment_ts) utils.create_dir(FLAGS.log_dir) # Get training data X_train, Y_train, X_test, Y_test, X_val, Y_val = load_mnist(FLAGS.data_dir) # Adding validation data to training data (60.000 training data) X_train = np.append(X_train, X_val, axis=0) Y_train = np.append(Y_train, Y_val, axis=0) print('X_train shape: ', X_train.shape) print('X_test shape: ', X_test.shape) # Repeat training for all specified models for method in FLAGS.methods: # Create log dir log_dir = FLAGS.log_dir + '/{:s}'.format(method) utils.create_dir(log_dir) with tf.Graph().as_default() as g: with tf.device(FLAGS.device): # Define placeholders for inputs with tf.name_scope('Inputs'): # Inputs x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1), name='X') y = tf.placeholder(tf.float32, shape=(None, 10), name='y') with tf.variable_scope('model') as scope: with tf.name_scope('Training-Graph'): # Define boundaries for learning rate decay boundaries = None # boundaries = [500.0, 800.0] # Build the model loss, train_op, global_step, grads_and_vars, optimizer, perturbation, x_adv = build_training_graph( x, y, FLAGS.learning_rate, method, FLAGS.optimizer, boundaries) with tf.device('/cpu:0'): # Add summaries for the training losses with tf.name_scope('Loss-Summaries'): losses = tf.get_collection('losses') for entry in losses: tf.summary.scalar(entry.op.name, entry) # Add histograms for all trainable variables and gradients with tf.name_scope('Trainable-Variable-Summaries'): for var in tf.trainable_variables(): utils.variable_summaries(var) for grad, var in grads_and_vars: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad) grad_norm = tf.norm(grad, ord='euclidean') tf.summary.scalar(var.op.name + '/gradients/l2norm', grad_norm) with tf.name_scope('Eval-Graph'): # Create ops used for evaluating the model on test data losses_eval, fgsm_perturbation, fgsm_x_adv = build_eval_graph(x, y, scope) with tf.name_scope('Image-Summary'): # Create image summary op for clean images tf.summary.image('training-images', x, max_outputs=2) if perturbation is not None and x_adv is not None: # Create image summary op for FGSM adversarial images tf.summary.image('fgsm-adversarial-training-perturbations', perturbation, max_outputs=2) # Create image summary op for FGSM adversarial images tf.summary.image('fgsm-adversarial-training-images', x_adv, max_outputs=2) # Create init op with tf.name_scope('Initializer'): init_op = tf.global_variables_initializer() # Create file writer for TensorBoard with tf.device('/cpu:0'): writer_train = tf.summary.FileWriter(log_dir + '/train', graph=g) writer_test = tf.summary.FileWriter(log_dir + '/test') # Merge all the summaries merged = tf.summary.merge_all() # Create tf session config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(graph=g, config=config) as sess: # Initialize all variables sess.run(init_op) print('\nStart training for {:d} epochs.'.format(FLAGS.epochs)) # Compute number of batches batches = int(math.ceil(float(len(X_train)) / FLAGS.batch_size)) print('Performing {:d} updates per epoch.'.format(batches)) print('\nEvaluate on test data...') evaluate(sess, x, y, fgsm_perturbation, fgsm_x_adv, X_test, Y_test, -1, FLAGS.batch_size, losses_eval, writer_test) # Training loop for epoch in range(FLAGS.epochs): print('\nStart of epoch {:d}...'.format(epoch + 1)) # Write summaries for optimizer parameters # TODO: For adaptive learning rate methods this does not log the adapted learning rate if FLAGS.optimizer == 'vanilla': learning_rate_val = optimizer._learning_rate elif FLAGS.optimizer == 'momentum': learning_rate_val = optimizer._learning_rate_tensor elif FLAGS.optimizer == 'adagrad': learning_rate_val = optimizer._learning_rate_tensor elif FLAGS.optimizer == 'adam': learning_rate_val = optimizer._lr_t else: raise NotImplementedError learning_rate_val = sess.run(learning_rate_val) summary = tf.Summary() summary.value.add(tag='Optimizer/{:s}'.format('learning-rate'), simple_value=learning_rate_val) writer_train.add_summary(summary, epoch) # Shuffle training data indices = list(range(len(X_train))) np.random.shuffle(indices) # Iterate through the training data in batches sum_batch_loss = 0.0 for batch in range(batches): # Compute batch start and end indices start, end = batch_indices( batch, len(X_train), FLAGS.batch_size) feed_dict = {x: X_train[indices[start:end]], y: Y_train[indices[start:end]]} # Perform a single step of stochastic gradient descent batch_summaries, _, batch_loss, step = sess.run([merged, train_op, loss, global_step], feed_dict=feed_dict) # Accumulate the loss sum_batch_loss += batch_loss # Write tensorboard summaries writer_train.add_summary(batch_summaries, step) print('Epoch: {:d}, Cross-Entropy-Loss (training data): {:.4f}'.format(epoch + 1, sum_batch_loss / batches)) # Evaluate on test data if epoch % FLAGS.eval_step == 0 or epoch + 1 == FLAGS.epochs: print('\nEvaluate on test data...') evaluate(sess, x, y, fgsm_perturbation, fgsm_x_adv, X_test, Y_test, epoch, FLAGS.batch_size, losses_eval, writer_test) print('\nPerformed {:.2f} training iterations.'.format(sess.run(global_step)))
def build_training_graph(x, y, learning_rate, method, optimizer, boundaries): print('\nBuilding training graph for method {:s}'.format(method)) print('Using optimizer {:s}'.format(optimizer)) # Define global step variable global_step = tf.get_variable( name='global_step', shape=[], # scalar dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=False ) # Build the network with tf.name_scope('Logits'): logits = train_utils.forward(x, create_summaries=True) # Build the network with tf.name_scope('Predictions'): predictions = layers.softmax(logits) utils.variable_summaries(predictions, name='softmax-predictions') # Create an op for the loss with tf.name_scope('Cross-Entropy-Loss'): ce_loss = layers.cross_entropy_loss(logits, y) tf.add_to_collection('losses', ce_loss) with tf.variable_scope(tf.get_variable_scope(), reuse=True): if method == 'random': with tf.name_scope('RND-Adversarial-Training'): rnd_loss, perturbation, x_adv = train_utils.random_loss(x, y, ord='l2', epsilon=3.0) additional_loss = rnd_loss tf.add_to_collection('losses', rnd_loss) elif method == 'advt': with tf.name_scope('FGSM-Adversarial-Training'): advt_loss, perturbation, x_adv = train_utils.adversarial_loss(x, y, ord='l2', epsilon=3.0) additional_loss = advt_loss tf.add_to_collection('losses', advt_loss) else: perturbation = None x_adv = None # Create an op for the total loss with tf.name_scope('Loss'): if method == 'advt' or method == 'random': loss = (ce_loss + additional_loss) / 2 loss = tf.identity(loss, name='total-loss') tf.add_to_collection('losses', loss) else: loss = ce_loss loss = tf.identity(loss, name='total-loss') tf.add_to_collection('losses', loss) # Create the optimizer with tf.name_scope('Optimizer'): # Implement additional learning rate decay if boundaries is not None: print('Using piecewise constant learning rate decay with boundaries {0}'.format(boundaries)) values = [0.1, 0.05, 0.025] learning_rate = tf.train.piecewise_constant(global_step, boundaries, values) else: learning_rate = tf.constant(learning_rate) if optimizer == 'vanilla': optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) elif optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=.5) elif optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=.1) elif optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) else: raise NotImplementedError trainable_vars = tf.trainable_variables() grads_and_vars = optimizer.compute_gradients(loss, trainable_vars) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) return loss, train_op, global_step, grads_and_vars, optimizer, perturbation, x_adv
def train_and_test(X_train, Y_train, X_test, Y_test, batch_size=1000, learning_rate=0.5, n_epochs=1000): batch_size = min(min(len(X_train), len(X_test)), batch_size) D = len(X_train[0]) num_class = len(np.unique(np.append(Y_train, Y_test))) x = tf.placeholder(tf.float32, [batch_size, D]) y = tf.placeholder(tf.float32, [batch_size, num_class]) W = tf.Variable(tf.zeros([D, num_class])) b = tf.Variable(tf.zeros([num_class])) pred_y = tf.matmul(x, W) + b cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred_y)) train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) print 'Optimization starting!' tf.summary.histogram('y', y) variable_summaries(W) variable_summaries(b) tf.summary.histogram('pred_y', pred_y) tf.summary.scalar('cross_entropy', cross_entropy) merged = tf.summary.merge_all() with tf.Session() as sess: train_writer = tf.summary.FileWriter(summaries_dir + '/train', sess.graph) test_writer = tf.summary.FileWriter(summaries_dir + '/test') tf.global_variables_initializer().run() n_batches = int(len(X_train) / batch_size) for iter in range(n_epochs): # train the model n_epochs times # print iter total_loss = 0 for j in range(n_batches): # print j X_batch, Y_batch = _get_batch_data(X_train, Y_train, batch_size, j) Y_batch = generate_one_hot_num_array(Y_batch, num_class) curr_step, curr_entropy, summary = sess.run( [train_step, cross_entropy, merged], feed_dict={ x: X_batch, y: Y_batch }) train_writer.add_summary(summary, iter * n_batches + j) total_loss += curr_entropy if j % 100 == 0: print 'Average loss epoch {0} {1}: {2}'.format( iter, j, total_loss / (j + 1)) print 'Average loss epoch {0}: {1}'.format(iter, total_loss / n_batches) print 'Optimization Finished!' # should be around 0.35 after 25 epochs # test the model preds = tf.nn.softmax(pred_y) correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(y, 1)) accuracy = tf.reduce_sum(tf.cast( correct_preds, tf.float32)) # need numpy.count_nonzero(boolarr) :( n_batches = int(len(X_test) / batch_size) total_correct_preds = 0 for iter in range(n_batches): X_batch, Y_batch = _get_batch_data(X_test, Y_test, batch_size, iter) Y_batch = generate_one_hot_num_array(Y_batch, num_class) accuracy_batch, summary = sess.run([accuracy, merged], feed_dict={ x: X_batch, y: Y_batch }) test_writer.add_summary(summary, iter) total_correct_preds += accuracy_batch print 'Accuracy {0}'.format(total_correct_preds / len(X_test)) train_writer.close() test_writer.close()
def build_q_global_cond(PARAMETERS, devs, conds, verbose, kernel_regularizer=None, use_bias=False, stop_grad=False): # make a distribution that has "log_prob(theta)" and "sample()" q_global_cond = ChainedDistribution(name="q_global_cond") if not hasattr(PARAMETERS, "g_c"): print("- Found no global conditional params") return q_global_cond distribution_descriptions = PARAMETERS.g_c for distribution_name in distribution_descriptions.list_of_params: description = getattr(distribution_descriptions, distribution_name) conditioning = description.defaults['c'] # <-- not a tensor if verbose: print("build_q_global_cond::%s" % distribution_name) params = OrderedDict() for free_name, constrained_name, free_to_constrained in zip( description.free_params, description.params, description.free_to_constrained): to_concat = [] if conditioning is not None: # collect tensors to concat if verbose: print("- Conditioning parameter %s.%s" % (distribution_name, free_name)) if conditioning['treatments']: to_concat.append(conds) if conditioning['devices']: to_concat.append(devs) mlp_inp = tf.concat(to_concat, axis=1) # map sample from prior with conditioning informtion through 1-layer NN tf_free_param = tf.layers.dense( mlp_inp, units=1, use_bias=use_bias, name='%s_%s' % (distribution_name, free_name), kernel_regularizer=kernel_regularizer) if stop_grad: tf_free_param = tf.stop_gradient(tf_free_param) name = os.path.split(tf_free_param.name)[0] variable_summaries( tf.get_default_graph().get_tensor_by_name(name + '/kernel:0'), 'nn_weights_%s' % name) tf_constrained_param = constrain_parameter(tf_free_param, free_to_constrained, distribution_name, constrained_name) params[free_name] = tf_free_param params[constrained_name] = tf_constrained_param for other_param_name, other_param_value in description.other_params.items( ): params[other_param_name] = other_param_value new_distribution = description.class_type(wait_for_assigned=True, variable=True) new_distribution.assign_free_and_constrained(**params) q_global_cond.add_distribution(distribution_name, new_distribution) return q_global_cond
def set_up(self): spec = load_config_file(self.args.yaml) # spec is a dict of dicts of dicts # Import the correct model self.params_dict = apply_defaults(spec["params"]) # time some things, like epoch time start_time = time.time() # ---------------------------------------- # # DEFINE XVAL DATASETS # # ---------------------------------------- # # Create self.dataset_pair: DatasetPair containing train and val Datasets. self._prepare_data(spec["data"]) # Number of instances to put in a training batch. self.n_batch = min(self.params_dict['n_batch'], self.dataset_pair.n_train) # This is already a model object because of the use of "!!python/object:... in the yaml file. model = self.params_dict["model"] # Set various attributes of the model model.init_with_params(self.params_dict, self.procdata.relevance_vectors) # Import priors from YAML parameters = Parameters() parameters.load(self.params_dict) print("----------------------------------------------") if self.args.verbose: print("parameters:") parameters.pretty_print() n_vals = LocalAndGlobal.from_list(parameters.get_parameter_counts()) self.n_theta = n_vals.sum() # TENSORFLOW PARTS # self.placeholders = Placeholders(self.dataset_pair, n_vals) # feed_dicts are used to supply placeholders, these are for the entire train/val dataset, there is a batch one below. self._create_feed_dicts() # time-series of species differences: x_delta_obs is BATCH x (nTimes-1) x nSpecies x_delta_obs = self.placeholders.x_obs[:, 1:, :] - self.placeholders.x_obs[:, :-1, :] # DEFINE THE ENCODER NN: for LOCAL PARAMETERS print("Set up encoder") self.encoder = Encoder(self.args.verbose, parameters, self.placeholders, x_delta_obs) # DEFINE THE DECODER NN print("Set up decoder") self.decoder = Decoder(self.args.verbose, self.params_dict, self.placeholders, self.dataset_pair.times, self.encoder) # DEFINE THE OBJECTIVE and GRADIENTS # likelihood p (x | theta) print("Set up objective") self.objective = Objective(self.encoder, self.decoder, model, self.placeholders) # SET-UP tensorflow LEARNING/OPTIMIZER self.training_stepper = TrainingStepper(self.args.dreg, self.encoder, self.objective, self.params_dict) time_interval = time.time() - start_time print("Time before sess: %g" % time_interval) # TENSORBOARD VISUALIZATION # ts_to_vis = 1 self.encoder.q.attach_summaries() # global and local parameters of q distribution unnormed_iw = self.objective.log_unnormalized_iws[ts_to_vis, :] self_normed_iw = self.objective.normalized_iws[ts_to_vis, :] # not in log space with tf.name_scope('IWS'): variable_summaries(unnormed_iw, 'iws_unn_log') variable_summaries(self_normed_iw, 'iws_normed') tf.summary.scalar('nonzeros', tf.count_nonzero(self_normed_iw)) #print(tf.shape(log_p_observations)) with tf.name_scope('ELBO'): tf.summary.scalar('log_p', tf.reduce_mean(self.training_stepper.logsumexp_log_p)) # [batch, 1] tf.summary.scalar('log_prior', tf.reduce_mean(self.training_stepper.logsumexp_log_p_theta)) tf.summary.scalar('loq_q', tf.reduce_mean(self.training_stepper.logsumexp_log_q_theta)) tf.summary.scalar('elbo', self.objective.elbo)
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = segmentModel(config, is_training, features, embedding) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names ) = model_utils.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: utils.variable_summaries(var) init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: (total_loss, per_example_loss, label_ids, prediction, seq_length) = model.get_all_results() weight = tf.sequence_mask(seq_length, dtype=tf.int64) accuracy = tf.metrics.accuracy(label_ids, prediction, weights=weight) tf.summary.scalar('accuracy', accuracy[1]) l2_reg_lamda = config.l2_reg_lamda clip = 5 with tf.variable_scope('train_op'): optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1 ]) total_loss = total_loss + l2_reg_lamda * l2_loss grads, _ = tf.clip_by_global_norm( tf.gradients(total_loss, tvars), clip) global_step = tf.train.get_or_create_global_step() train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step) logging_hook = tf.train.LoggingTensorHook( {"accuracy": accuracy[1]}, every_n_iter=100) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: (total_loss, per_example_loss, label_ids, prediction, seq_length) = model.get_all_results() loss = tf.metrics.mean(per_example_loss) weight = tf.sequence_mask(seq_length, dtype=tf.int64) accuracy = tf.metrics.accuracy(label_ids, prediction, weights=weight) metrics = {"eval_loss": loss, "eval_accuracy": accuracy} output_spec = tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, eval_metric_ops=metrics) else: input_ids = features["input_ids"] label_ids = features["label_ids"] (_, _, _, prediction, seq_length) = model.get_all_results() predictions = { "input_ids": input_ids, "prediction": prediction, "ground_truths": label_ids, "length": seq_length } output_spec = tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) return output_spec
def __init__(self, hyperparams, train_config): self.train_config = train_config # create placeholder self.u = tf.placeholder(tf.int32, [None]) # [B] self.i = tf.placeholder(tf.int32, [None]) # [B] self.y = tf.placeholder(tf.float32, [None]) # [B] self.w = tf.placeholder(tf.float32, [None]) # [B] self.lr = tf.placeholder(tf.float32, [], name='learning_rate') # -- create embed begin ---- user_emb_w = tf.get_variable( "user_emb_w", [hyperparams['num_users'], hyperparams['user_embed_dim']]) item_emb_w = tf.get_variable( "item_emb_w", [hyperparams['num_items'], hyperparams['item_embed_dim']]) user_b = tf.get_variable("user_b", [hyperparams['num_users']], initializer=tf.constant_initializer(0.0)) item_b = tf.get_variable("item_b", [hyperparams['num_items']], initializer=tf.constant_initializer(0.0)) # -- create embed end ---- # -- embed begin ------- u_emb = tf.nn.embedding_lookup(user_emb_w, self.u) i_emb = tf.nn.embedding_lookup(item_emb_w, self.i) u_b = tf.gather(user_b, self.u) # [B] i_b = tf.gather(item_b, self.i) # [B] # -- embed end ------- interaction = tf.reduce_sum(u_emb * i_emb, axis=-1) # [B] self.logits = interaction + u_b + i_b # [B] self.scores = tf.nn.sigmoid( self.logits) # scores is logits into sigmoid, for inference variable_summaries(self.logits, 'logits') variable_summaries(self.scores, 'scores') # return same dimension as input tensors, let x = logits, z = labels, z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) self.losses = tf.nn.sigmoid_cross_entropy_with_logits( logits=self.logits, labels=self.y) variable_summaries(self.losses, 'loss') self.loss = tf.reduce_mean(self.losses * self.w) # for training loss # global update step variable self.global_step = tf.Variable(0, trainable=False, name='global_step') # optimizer if train_config['optimizer'] == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=self.lr) elif train_config['optimizer'] == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate=self.lr) else: optimizer = tf.train.GradientDescentOptimizer( learning_rate=self.lr) # compute gradients and different update step trainable_params = tf.trainable_variables() grads = tf.gradients( self.loss, trainable_params ) # return a list of gradients (A list of `sum(dy/dx)` for each x in `xs`) clip_grads, _ = tf.clip_by_global_norm(grads, 5) clip_grads_tuples = zip(clip_grads, trainable_params) self.train_op = optimizer.apply_gradients(clip_grads_tuples, global_step=self.global_step) self.merged = tf.summary.merge_all()
def create_network(img_size, num_channels, num_classes, shape1, shape2, num_fc_layer1_output, num_fc_layer2_output, learning_rate, bn=False): # PLACEHOLDER VARIABLES x = tf.placeholder(tf.float32, shape=[None, img_size * img_size * num_channels], name='x') x_image = tf.reshape(x, [-1, img_size, img_size, num_channels]) y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true') y_true_cls = tf.argmax(y_true, dimension=1) fc_layer1_keep_prob = tf.placeholder(tf.float32, name='keep_prob') phase_train = tf.placeholder(tf.bool, name='phase_train') placeholders = { 'x': x, 'x_image': x_image, 'y_true': y_true, 'y_true_cls': y_true_cls, 'fc_layer1_keep_prob': fc_layer1_keep_prob, 'phase_train': phase_train } # CONVOLUTIONAL LAYER 1 with tf.variable_scope('conv_1'): conv_weights1 = tf.Variable(tf.truncated_normal(shape1, stddev=0.05)) conv_biases1 = tf.Variable(tf.constant(0.05, shape=[shape1[3]])) conv_layer1 = tf.nn.conv2d(input=x_image, filter=conv_weights1, strides=[1, 1, 1, 1], padding='SAME') + conv_biases1 # POOLING LAYER 1 with tf.variable_scope('pool_1'): conv_layer1 = tf.nn.max_pool(value=conv_layer1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # BATCH NORMALIZATION if bn: with tf.variable_scope('bn_1'): conv_layer1 = batch_norm(conv_layer1, phase_train) # RELU with tf.variable_scope('relu_1'): conv_layer1 = tf.nn.relu(conv_layer1) # CONVOLUTIONAL LAYER 2 with tf.variable_scope('conv_2'): conv_weights2 = tf.Variable(tf.truncated_normal(shape2, stddev=0.05)) conv_biases2 = tf.Variable(tf.constant(0.05, shape=[shape2[3]])) conv_layer2 = tf.nn.conv2d(input=conv_layer1, filter=conv_weights2, strides=[1, 1, 1, 1], padding='SAME') + conv_biases2 # POOLING LAYER 2 with tf.variable_scope('pool_2'): conv_layer2 = tf.nn.max_pool(value=conv_layer2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # BATCH NORMALIZATION if bn: with tf.variable_scope('bn_2'): conv_layer2 = batch_norm(conv_layer2, phase_train) # RELU with tf.variable_scope('relu_2'): conv_layer2 = tf.nn.relu(conv_layer2) # FLATTEN LAYER with tf.variable_scope('flatten'): layer_shape = conv_layer2.get_shape() num_features = layer_shape[1:4].num_elements() # [num_images, img_height * img_width * num_channels] layer_flat = tf.reshape(conv_layer2, [-1, num_features]) # FULLY CONNECTED LAYER 1 with tf.variable_scope('fc_1'): fc_weights1 = tf.Variable(tf.truncated_normal(shape=[num_features, num_fc_layer1_output], stddev=0.05)) variable_summaries(fc_weights1) fc_biases1 = tf.Variable(tf.constant(0.05, shape=[num_fc_layer1_output])) variable_summaries(fc_biases1) fc_layer1 = tf.matmul(layer_flat, fc_weights1) + fc_biases1 tf.summary.histogram('fc_layer1', fc_layer1) # BATCH NORMALIZATION if bn: with tf.variable_scope('fc_bn_1'): fc_layer1 = batch_norm(fc_layer1, phase_train) # RELU with tf.variable_scope('fc_relu_1'): fc_layer1 = tf.nn.relu(fc_layer1) # DROPOUT LAYER 1 with tf.variable_scope('dropout_1'): fc_layer1_dropout = tf.nn.dropout(fc_layer1, fc_layer1_keep_prob) # FULLY CONNECTED LAYER 2 with tf.variable_scope('fc_2'): fc_weights2 = tf.Variable(tf.truncated_normal(shape=[num_fc_layer1_output, num_fc_layer2_output], stddev=0.05)) variable_summaries(fc_weights2) fc_biases2 = tf.Variable(tf.constant(0.05, shape=[num_fc_layer2_output])) variable_summaries(fc_biases2) fc_layer2 = tf.matmul(fc_layer1_dropout, fc_weights2) + fc_biases2 tf.summary.histogram('fc_layer2', fc_layer2) # BATCH NORMALIZATION if bn: with tf.variable_scope('fc_bn_2'): fc_layer2 = batch_norm(fc_layer2, phase_train) # SOFTMAX with tf.variable_scope('softmax'): y_pred = tf.nn.softmax(fc_layer2) y_pred_cls = tf.argmax(y_pred, dimension=1) tf.summary.histogram('y_pred', y_pred) # COST FUNCTION with tf.variable_scope('cost'): cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc_layer2, labels=y_true)) # cost = tf.reduce_mean(-tf.reduce_sum(y_true * tf.log(y_pred), reduction_indices=[1])) tf.summary.histogram('cost', cost) # GRADIENT DESCENT METHOD - ADAM OPTIMIZER train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # PERFORMANCE MEASURES correct_prediction = tf.equal(y_pred_cls, y_true_cls) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.histogram('accuracy', accuracy) return train_step, cost, accuracy, y_pred, y_pred_cls, y_true_cls, placeholders
def __init__(self, segment_model, dim_info, config, init_checkpoint, tokenizer, learning_rate, init_embedding=None): uni_embedding = None bi_embedding = None if init_embedding is not None: uni_embedding = utils.get_embedding(init_embedding, tokenizer.vocab, config.embedding_size) if "bigram_vocab" in tokenizer.__dict__: bi_embedding = utils.get_embedding(init_embedding, tokenizer.bigram_vocab, config.embedding_size) self.input_ids = tf.placeholder( dtype=tf.int64, shape=[None, None, dim_info.feature_dims['input_ids']], name='input_ids') self.input_dicts = tf.placeholder( dtype=tf.int64, shape=[None, None, dim_info.feature_dims['input_dicts']], name='input_dicts') if dim_info.label_dim == 1: self.label_ids = tf.placeholder(dtype=tf.int64, shape=[None, None], name='label_ids') else: self.label_ids = tf.placeholder( dtype=tf.int64, shape=[None, None, dim_info.label_dim], name='label_ids') self.seq_length = tf.placeholder(dtype=tf.int64, shape=[None], name='seq_length') self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob') self.learning_rate = tf.Variable(learning_rate, trainable=False) self.new_learning_rate = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") features = { "input_ids": self.input_ids, "input_dicts": self.input_dicts, "label_ids": self.label_ids, "seq_length": self.seq_length } self.model = segment_model(config, features, self.dropout_keep_prob, init_embeddings={ "uni_embedding": uni_embedding, "bi_embedding": bi_embedding }) tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names ) = model_utils.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: utils.variable_summaries(var) init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) (loss, label_ids, prediction, seq_length) = self.model.get_all_results() l2_reg_lamda = config.l2_reg_lamda clip = 5 with tf.variable_scope('train_op'): self.lr_update = tf.assign(self.learning_rate, self.new_learning_rate) global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) if l2_reg_lamda > 0: l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in tvars if (v.get_shape().ndims > 1 and "rate" not in v.name) ]) tf.logging.info("**** L2 Loss Variables ****") for var in tvars: if var.get_shape().ndims > 1 and "rate" not in var.name: tf.logging.info(" name = %s, shape = %s", var.name, var.shape) total_loss = loss + l2_reg_lamda * l2_loss else: total_loss = loss if config.clip_grad: grads, _ = tf.clip_by_global_norm( tf.gradients(total_loss, tvars), clip) train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step) else: train_op = optimizer.minimize(total_loss, global_step=global_step) self.loss = loss self.total_loss = total_loss self.seq_length = seq_length self.prediction = prediction self.train_op = train_op
def coattention_encoder(D, Q, documents_lengths, questions_lengths, hyperparameters): # D[i] = document i in the batch, Q[i] = question i in the batch with tf.name_scope("sentinels"): with tf.variable_scope("sentinel_d"): sentinel_d = bias_variable([hyperparameters.hidden_size]) variable_summaries(sentinel_d) with tf.variable_scope("sentinel_q"): sentinel_q = bias_variable([hyperparameters.hidden_size]) variable_summaries(sentinel_q) # append sentinels at the end of documents expanded_sentinel_d = tf.expand_dims(tf.expand_dims(sentinel_d, 0), 0) tiled_sentinel_d = tf.tile(expanded_sentinel_d, [hyperparameters.batch_size, 1, 1]) D = tf.concat([D, tiled_sentinel_d], axis=1) # append sentinels at the end of questions expanded_sentinel_q = tf.expand_dims(tf.expand_dims(sentinel_q, 0), 0) tiled_sentinel_q = tf.tile(expanded_sentinel_q, [hyperparameters.batch_size, 1, 1]) Q = tf.concat([Q, tiled_sentinel_q], axis=1) L = tf.matmul(D, tf.transpose(Q, perm=[0, 2, 1])) if hyperparameters.padding_mask: document_end_indices = tf.subtract(documents_lengths, 1) question_end_indices = tf.subtract(questions_lengths, 1) doc_words_mask = tf.math.cumsum(tf.one_hot( document_end_indices, hyperparameters.max_doc_len), axis=1, reverse=True) que_words_mask = tf.math.cumsum(tf.one_hot( question_end_indices, hyperparameters.max_que_len), axis=1, reverse=True) # add sentinels sentinel_mask = tf.ones([hyperparameters.batch_size, 1]) doc_words_mask = tf.concat([doc_words_mask, sentinel_mask], axis=1) que_words_mask = tf.concat([que_words_mask, sentinel_mask], axis=1) words_mask = tf.matmul(tf.expand_dims(doc_words_mask, axis=2), tf.expand_dims(que_words_mask, axis=1)) negative_padding_mask = tf.subtract(words_mask, 1) min_float_at_padding = tf.multiply( negative_padding_mask, tf.cast(-0.5 * tf.float32.min, tf.float32)) L = tf.add(L, min_float_at_padding) A_Q = tf.nn.softmax(L, axis=int(hyperparameters.softmax_axis), name="softmaxed_L") A_D = tf.nn.softmax(tf.transpose(L, perm=[0, 2, 1]), axis=int(hyperparameters.softmax_axis), name="softmaxed_L_transpose") C_Q = tf.matmul(tf.transpose(D, perm=[0, 2, 1]), A_Q) C_D_2 = tf.matmul(C_Q, A_D) C_Q_2 = tf.matmul(C_D_2, A_Q) #print('C_D_2', C_D_2.shape) #print('C_Q_2', C_Q_2.shape) concat_1 = tf.concat([tf.transpose(Q, perm=[0, 2, 1]), C_Q], 1) concat_1_1 = tf.concat([tf.transpose(Q, perm=[0, 2, 1]), C_Q, C_Q_2], 1) if int(hyperparameters.coattention) == 0: C_D = tf.matmul(tf.transpose(Q, perm=[0, 2, 1]), A_D) elif int(hyperparameters.coattention) == 1: C_D = tf.matmul(concat_1, A_D) elif int(hyperparameters.coattention) == 2: C_D = tf.matmul(concat_1_1, A_D) concat_2 = tf.concat([tf.transpose(D, perm=[0, 2, 1]), C_D], 1) concat_2 = tf.transpose(concat_2, perm=[0, 2, 1]) concat_2 = concat_2[:, :-1, :] # remove sentinels BiLSTM_outputs, BiLSTM_final_fw_state, BiLSTM_final_bw_state = dynamic_bilstm( concat_2, documents_lengths, hyperparameters) if hyperparameters.bi_lstm_encoding_dropout: BiLSTM_outputs = tf.nn.dropout(BiLSTM_outputs, keep_prob=hyperparameters.keep_prob) if (hyperparameters.squad2_vector or hyperparameters.squad2_lstm): with tf.name_scope("SQuAD_2"): if (hyperparameters.squad2_vector): impossible_encoding = bias_variable( [2 * hyperparameters.hidden_size]) variable_summaries(impossible_encoding) impossible_encoding = tf.expand_dims(tf.expand_dims( impossible_encoding, axis=0), axis=0) impossible_encoding = tf.tile( impossible_encoding, [hyperparameters.batch_size, 1, 1]) elif (hyperparameters.squad2_lstm): encodings, final_state = dynamic_lstm_with_hidden_size( concat_2, documents_lengths, hyperparameters, 2 * hyperparameters.hidden_size, False) impossible_encoding = encodings[:, -1] variable_summaries(impossible_encoding) impossible_encoding = tf.expand_dims(impossible_encoding, axis=1) BiLSTM_outputs = tf.concat([BiLSTM_outputs, impossible_encoding], axis=1) return L, BiLSTM_outputs
def model(self, X, Y): feature = int(np.prod(X.get_shape()[1:])) classes = int(np.prod(Y.get_shape()[1:])) x_image = tf.reshape(X, [-1, feature, 1, 1]) # 1st conv layer with tf.name_scope('conv1') as scope: w = weight_variable([5, 1, 1, 32]) b = bias_variable([32]) h = tf.nn.relu(conv2d(x_image, w) + b) conv1 = max_pool_2x2(h) # print "conv1 shape: ", h.get_shape() # print "pool1 shape: ", conv1.get_shape() # 2nd conv layer with tf.name_scope('conv2') as scope: w = weight_variable([5, 1, 32, 64]) b = bias_variable([64]) h = tf.nn.relu(conv2d(conv1, w) + b) conv2 = max_pool_2x2(h) # print "conv2 shape: ", h.get_shape() # print "pool2 shape: ", conv2.get_shape() # 3rd conv layer with tf.name_scope('conv3') as scope: w = weight_variable([5, 1, 64, 64]) b = bias_variable([64]) conv3 = tf.nn.relu(conv2d(conv2, w) + b) # print "conv3 shape: ", conv3.get_shape() # 4th conv layer with tf.name_scope('conv4') as scope: w = weight_variable([5, 1, 64, 64]) b = bias_variable([64]) conv4 = tf.nn.relu(conv2d(conv3, w) + b) # print "conv4 shape: ", conv4.get_shape() # 5th conv layer with tf.name_scope('conv5') as scope: w = weight_variable([5, 1, 64, 64]) b = bias_variable([64]) h = tf.nn.relu(conv2d(conv4, w) + b) conv5 = max_pool_2x2(h) # print "conv5 shape: ", h.get_shape() # print "pool5 shape: ", conv5.get_shape() # dropout keep_prob = tf.placeholder(tf.float32) # 1st fc layer with tf.name_scope('fc1') as scope: shape = int(np.prod(conv5.get_shape()[1:])) print('shape: ', shape) conv5_flat = tf.reshape(conv5, [-1, shape]) w = weight_variable([shape, 1024]) b = bias_variable([1024]) h = tf.nn.relu(tf.matmul(conv5_flat, w) + b) fc1 = tf.nn.dropout(h, keep_prob) # print "fc1 shape: ", fc1.get_shape() # 2nd fc layer with tf.name_scope('fc2') as scope: w = weight_variable([1024, 512]) b = bias_variable([512]) h = tf.nn.relu(tf.matmul(fc1, w) + b) fc2 = tf.nn.dropout(h, keep_prob) # print "fc2 shape: ", fc2.get_shape() # 3rd fc layer with tf.name_scope('fc3') as scope: w = weight_variable([512, classes]) b = bias_variable([classes]) logits = tf.matmul(fc2, w) + b # print "logits shape: ", logits.get_shape() entropy = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logits, name='loss') loss = tf.reduce_mean(entropy) variable_summaries(loss, 'loss') return logits, loss, keep_prob, "alex"