示例#1
0
    def rothk_penalty(self, d_real, d_fake):
        config = self.config
        g_sample = self.gan.uniform_sample
        x = self.gan.inputs.x
        gradx = tf.gradients(d_real, [x])[0]
        gradg = tf.gradients(d_fake, [g_sample])[0]
        gradx = tf.reshape(gradx, [self.ops.shape(gradx)[0], -1])
        gradg = tf.reshape(gradg, [self.ops.shape(gradg)[0], -1])
        gradx_norm = tf.norm(gradx, axis=1, keep_dims=True)
        gradg_norm = tf.norm(gradg, axis=1, keep_dims=True)
        if int(gradx_norm.get_shape()[0]) != int(d_real.get_shape()[0]):
            print("Condensing along batch for rothk")
            gradx_norm = tf.reduce_mean(gradx_norm, axis=0)
            gradg_norm = tf.reduce_mean(gradg_norm, axis=0)
        gradx = tf.square(gradx_norm) * tf.square(1-tf.nn.sigmoid(d_real))
        gradg = tf.square(gradg_norm) * tf.square(tf.nn.sigmoid(d_fake))
        loss = gradx + gradg
        loss *= config.rothk_lambda or 1
        if config.rothk_decay:
            decay_function = config.decay_function or tf.train.exponential_decay
            decay_steps = config.decay_steps or 50000
            decay_rate = config.decay_rate or 0.9
            decay_staircase = config.decay_staircase or False
            global_step = tf.train.get_global_step()
            loss = decay_function(loss, global_step, decay_steps, decay_rate, decay_staircase)

        return loss
示例#2
0
def p_norm(tensor,order):
    if type(order) in [int,float]:
        return tf.norm(tensor,ord=order)
    elif type(order) in [list,tuple]:
        return [tf.norm(tensor,ord=order_item) for order_item in order]
    else:
        raise ValueError('Unrecognized order of p_norm: %s'%str(order))
示例#3
0
def cosineface_losses(embedding, labels, out_num, w_init=None, s=30., m=0.4):
    '''
    :param embedding: the input embedding vectors
    :param labels:  the input labels, the shape should be eg: (batch_size, 1)
    :param s: scalar value, default is 30
    :param out_num: output class num
    :param m: the margin value, default is 0.4
    :return: the final cacualted output, this output is send into the tf.nn.softmax directly
    '''
    with tf.variable_scope('cosineface_loss'):
        # inputs and weights norm
        embedding_norm = tf.norm(embedding, axis=1, keep_dims=True)
        embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
        weights = tf.get_variable(name='embedding_weights', shape=(embedding.get_shape().as_list()[-1], out_num),
                                  initializer=w_init, dtype=tf.float32)
        weights_norm = tf.norm(weights, axis=0, keep_dims=True)
        weights = tf.div(weights, weights_norm, name='norm_weights')
        # cos_theta - m
        cos_t = tf.matmul(embedding, weights, name='cos_t')
        cos_t_m = tf.subtract(cos_t, m, name='cos_t_m')

        mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask')
        inv_mask = tf.subtract(1., mask, name='inverse_mask')

        output = tf.add(s * tf.multiply(cos_t, inv_mask), s * tf.multiply(cos_t_m, mask), name='cosineface_loss_output')
    return output
示例#4
0
def find_best_k(X, Z):
    best_k = 1
    best_valid_loss = float("inf")
    for k in [1, 3, 5, 50]:
        sess = tf.InteractiveSession()

        dist = calculate_euclidean_distance(X, Z)
        # print(sess.run(dist, feed_dict={X: trainData, Z: testData}))
        r = calculate_responsibilities(dist, k=k)
        prediction = tf.matmul(r, casted_train_target)

        train_losses = tf.norm(trainTarget - prediction)
        valid_losses = tf.norm(validTarget - prediction)
        valid_losses = sess.run(valid_losses, feed_dict={X: trainData, Z: validData})
        test_losses = tf.norm(testTarget - prediction)
        print("Training/Validation/Testing loss for k={:d} is {:f}/{:f}/{:f}"
              .format(k, sess.run(train_losses, feed_dict={X: trainData, Z: trainData}),
                      valid_losses,
                      sess.run(test_losses, feed_dict={X: trainData, Z: testData})))

        if valid_losses < best_valid_loss:
            best_k = k
            best_valid_loss = valid_losses

    return best_k, best_valid_loss
 def tf_summary(self):
     tf.summary.scalar('cost', self.cost)
     tf.summary.scalar('w_fnorm', tf.norm(self.W, ord='euclidean', axis=[-2,-1]))   # Frobenius Norm
     tf.summary.scalar('b_1norm', tf.norm(self.b, ord=1))
     tf.summary.scalar('b_2norm', tf.norm(self.b, ord=2))
     self.summary = tf.summary.merge_all()   # for saving in the epoch/iteration
     self.sw = tf.summary.FileWriter(self.result_dir, self.sess.graph)
 def _l1_loss(self, hparams):
     l1_loss = tf.zeros([1], dtype=tf.float32)
     # embedding_layer l2 loss
     for param in self.embed_params:
         l1_loss = tf.add(l1_loss, tf.multiply(hparams.embed_l1, tf.norm(param, ord=1)))
     params = self.layer_params
     for param in params:
         l1_loss = tf.add(l1_loss, tf.multiply(hparams.layer_l1, tf.norm(param, ord=1)))
     return l1_loss
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    var_list = [ v for _,v in grads_and_vars]
    with ops.init_scope():
        zt = [self._get_or_make_slot(v, v, "zt", self._name) for _,v in grads_and_vars]
        slots_list = []
        for name in self.optimizer.get_slot_names():
            for var in self.optimizer.variables():
                self._get_or_make_slot(var, var, "zt", "zt")
    self._prepare()

    def _name(post, s):
        ss = s.split(":")
        return ss[0] + "_" + post + "_dontsave"
    zt = [self.get_slot(v, "zt") for _,v in grads_and_vars]
    xt = [tf.Variable(v, name=_name("gigaxt",v.name)) for _,v in grads_and_vars]
    tmp = [tf.Variable(v, name=_name("gigatmp",v.name)) for _,v in grads_and_vars]
    xslots_list = []
    zslots_list = []
    tmpslots_list = []
    slots_vars = []
    for name in self.optimizer.get_slot_names():
        for var in self.optimizer.variables():
            slots_vars += [var]
            xslots_list.append(tf.Variable(var))
            zslots_list.append(self._get_or_make_slot(var, var, "zt", "zt"))
            tmpslots_list.append(tf.Variable(var, name=_name("gigaslottmp", var.name)))


    restored_vars = var_list + slots_vars
    zt_vars = zt + zslots_list
    xt_vars = xt + xslots_list
    tmp_vars = tmp + tmpslots_list
    all_grads = [ g for g, _ in grads_and_vars ]
    # store variables for resetting

    op1 = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, restored_vars)]) # store tmp_vars

    with tf.get_default_graph().control_dependencies([op1]):
        op2 = self.optimizer.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name)
        with tf.get_default_graph().control_dependencies([op2]):
            op3 = tf.group(*[tf.assign(w, v) for w,v in zip(xt_vars, restored_vars)]) # store xt^+1 in xt_vars
            with tf.get_default_graph().control_dependencies([op3]):
                op4 = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, zt_vars)]) # restore vars to zt (different weights)
                with tf.get_default_graph().control_dependencies([op4]):
                    op5 = self.optimizer2.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name) # zt+1
                    with tf.get_default_graph().control_dependencies([op5]):
                        zt1_xt1 = [_restored_vars - _xt1_vars for _restored_vars, _xt1_vars in zip(restored_vars, xt_vars)]
                        St1 = [tf.minimum(1.0, tf.norm(_zt1_vars-_zt_vars) / tf.norm(_zt1_xt1)) for _zt1_vars, _zt_vars, _zt1_xt1 in zip(restored_vars, zt_vars, zt1_xt1)]
                        self.gan.add_metric('st1',tf.reduce_mean(tf.add_n(St1)/len(St1)))
                        #self.gan.add_metric('xzt1',tf.norm(xt_vars[0]-zt_vars[0]))
                        nextw = [_xt_t1 + _St1 * _zt1_xt1 for _xt_t1, _St1, _zt1_xt1 in zip(xt_vars, St1, zt1_xt1)]
                        op6 = tf.group(*[tf.assign(w, v) for w,v in zip(zt_vars, restored_vars)]) # set zt+1
                        with tf.get_default_graph().control_dependencies([op6]):
                            op7 = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, nextw)]) # set xt+1
                            with tf.get_default_graph().control_dependencies([op7]):
                                return tf.no_op()
示例#8
0
def s_norm(tensor,order):
    s,U,V=tf.svd(tensor,full_matrices=False)
    result=None
    if type(order) in [int,float]:
        result=tf.norm(s,ord=order)
    elif type(order) in [list,tuple]:
        result=[tf.norm(s,ord=order_item) for order_item in order]
    else:
        raise ValueError('Unrecognized order of s_norm: %s'%str(order))
    return s,result
示例#9
0
 def __tensor_norm__(self,tensor,order):
     if order in ['Si']:           # Schatten inf norm
         s,U,V=tf.svd(tensor,full_matrices=False)
         return tf.norm(s,ord=np.inf)
     elif order[0]=='S':           # Schatten norm
         s,U,V=tf.svd(tensor,full_matrices=False)
         sub_order=int(order[1:])
         return tf.norm(s,ord=sub_order)
     else:
         sub_order=int(order)
         return tf.norm(tensor,ord=sub_order)
示例#10
0
def image_encoder(image_feat,
                  hparams,
                  name="image_encoder",
                  save_weights_to=None,
                  make_image_summary=True):
  """A stack of self attention layers."""

  x = image_feat
  image_hidden_size = hparams.image_hidden_size or hparams.hidden_size
  image_filter_size = hparams.image_filter_size or hparams.filter_size
  with tf.variable_scope(name):
    for layer in range(hparams.num_encoder_layers or hparams.num_hidden_layers):
      with tf.variable_scope("layer_%d" % layer):
        with tf.variable_scope("self_attention"):
          y = vqa_layers.multihead_attention(
              common_layers.layer_preprocess(x, hparams),
              None,
              None,
              hparams.attention_key_channels or image_hidden_size,
              hparams.attention_value_channels or image_hidden_size,
              image_hidden_size,
              hparams.num_heads,
              hparams.attention_dropout,
              attention_type=hparams.image_self_attention_type,
              save_weights_to=save_weights_to,
              make_image_summary=make_image_summary,
              scale_dotproduct=hparams.scale_dotproduct,
          )
          utils.collect_named_outputs(
              "norms", "image_feat_self_attention_%d"%(layer),
              tf.norm(y, axis=-1))
          x = common_layers.layer_postprocess(x, y, hparams)
          utils.collect_named_outputs(
              "norms", "image_feat_self_attention_postprocess_%d"%(layer),
              tf.norm(x, axis=-1))
        with tf.variable_scope("ffn"):
          y = common_layers.dense_relu_dense(
              common_layers.layer_preprocess(x, hparams),
              image_filter_size,
              image_hidden_size,
              dropout=hparams.relu_dropout,
          )
          utils.collect_named_outputs(
              "norms", "image_feat_ffn_%d"%(layer), tf.norm(y, axis=-1))
          x = common_layers.layer_postprocess(x, y, hparams)
          utils.collect_named_outputs(
              "norms", "image_feat_ffn_postprocess_%d"%(layer),
              tf.norm(x, axis=-1))
    # if normalization is done in layer_preprocess, then it should also be done
    # on the output, since the output can grow very large, being the sum of
    # a whole stack of unnormalized layer outputs.
    return common_layers.layer_preprocess(x, hparams)
  def body(self, features):
    hp = self.hparams
    # pylint: disable=eval-used
    if hp.image_input_type == "image":
      image_feat = vqa_layers.image_embedding(
          features["inputs"],
          model_fn=eval(hp.image_model_fn),
          trainable=hp.train_resnet,
          is_training=hp.mode == tf.estimator.ModeKeys.TRAIN)
    else:
      image_feat = features["inputs"]

    image_feat = common_layers.flatten4d3d(image_feat)
    image_feat = common_layers.dense(image_feat, hp.hidden_size)
    utils.collect_named_outputs("norms", "image_feat_after_proj",
                                tf.norm(image_feat, axis=-1))

    question = common_layers.flatten4d3d(features["question"])
    utils.collect_named_outputs("norms", "question_embedding",
                                tf.norm(question, axis=-1))
    (encoder_input, encoder_self_attention_bias,
     encoder_decoder_attention_bias) = prepare_image_question_encoder(
         image_feat, question, hp)

    encoder_input = tf.nn.dropout(
        encoder_input, keep_prob=1.-hp.layer_prepostprocess_dropout)

    encoder_output, _ = recurrent_transformer_decoder(
        encoder_input, None, encoder_self_attention_bias, None,
        hp, name="encoder")
    utils.collect_named_outputs(
        "norms", "encoder_output", tf.norm(encoder_output, axis=-1))

    # scale query by sqrt(hidden_size)
    query = tf.get_variable("query", [hp.hidden_size]) * hp.hidden_size **0.5
    query = tf.expand_dims(tf.expand_dims(query, axis=0), axis=0)
    batch_size = common_layers.shape_list(encoder_input)[0]
    query = tf.tile(query, [batch_size, 1, 1])
    query = tf.nn.dropout(
        query, keep_prob=1.-hp.layer_prepostprocess_dropout)

    decoder_output, _ = recurrent_transformer_decoder(
        query, encoder_output, None, encoder_decoder_attention_bias,
        hp, name="decoder")
    utils.collect_named_outputs("norms", "decoder_output",
                                tf.norm(decoder_output, axis=-1))

    norm_tensors = utils.convert_collection_to_dict("norms")
    vqa_layers.summarize_tensors(norm_tensors, tag="norms/")

    # Expand dimension 1 and 2
    return tf.expand_dims(decoder_output, axis=1)
 def project_gradient_layer(gs):
     if self.config.norm == 'softmax':
         return tf.nn.softmax(gs)
     elif self.config.norm == 'euclidean':
         return gs / (tf.sqrt(tf.reduce_sum(tf.square(gs)))+1e-8)
     elif self.config.norm == 'inf':
         return gs / (tf.norm(gs, ord=np.inf)+1e-8)
     elif self.config.norm == 'max':
         return gs / (tf.reduce_max(tf.abs(gs))+1e-8)
     elif self.config.norm == False:
         return gs
     else:
         return gs / (tf.norm(gs, ord=self.config.norm)+1e-8)
示例#13
0
 def _cross_l_loss(self):
     """Construct L1-norm and L2-norm on cross network parameters for loss function.
     Returns:
         obj: Regular loss value on cross network parameters.
     """
     cross_l_loss = tf.zeros([1], dtype=tf.float32)
     for param in self.cross_params:
         cross_l_loss = tf.add(
             cross_l_loss, tf.multiply(self.hparams.cross_l1, tf.norm(param, ord=1))
         )
         cross_l_loss = tf.add(
             cross_l_loss, tf.multiply(self.hparams.cross_l2, tf.norm(param, ord=2))
         )
     return cross_l_loss
示例#14
0
def nearest(x, means, hparams):
  """Find the nearest means to elements in x."""
  x, means = tf.stop_gradient(x), tf.stop_gradient(means)
  x_flat = tf.reshape(x, [-1, hparams.hidden_size])
  x_norm = tf.norm(x_flat, axis=-1, keep_dims=True)
  means_norm = tf.norm(means, axis=-1, keep_dims=True)
  dist = x_norm + tf.transpose(means_norm) - 2 * tf.matmul(x_flat, means,
                                                           transpose_b=True)
  _, nearest_idx = tf.nn.top_k(- dist, k=1)
  nearest_hot = tf.one_hot(tf.squeeze(nearest_idx, axis=1), hparams.v_size)
  shape = common_layers.shape_list(x)
  shape[-1] = hparams.v_size
  nearest_hot = tf.reshape(nearest_hot, shape=shape)
  return tf.stop_gradient(nearest_hot)
示例#15
0
    def _make_activity_op(self, input_tensor):
        """ Creates the op for calculating the activity of a SOM
        :param input_tensor: A tensor to calculate the activity of. Must be of shape `[batch_size, dim]` where `dim` is
        the dimensionality of the SOM's weights.
        :return A handle to the newly created activity op:
        """
        with self._graph.as_default():
            with tf.name_scope("Activity"):
                # This constant controls the width of the gaussian.
                # The closer to 0 it is, the wider it is.
                c = tf.constant(self._c, dtype="float32")
                # Get the euclidean distance between each neuron and the input vectors
                dist = tf.norm(tf.subtract(
                        tf.expand_dims(self._weights, axis=0),
                        tf.expand_dims(input_tensor, axis=1)),
                    name="Distance")  # [batch_size, neurons]

                # Calculate the Gaussian of the activity. Units with distances closer to 0 will have activities
                # closer to 1.
                activity = tf.exp(tf.multiply(tf.pow(dist, 2), c), name="Gaussian")

                # Convert the activity into a softmax probability distribution
                if self._softmax_activity:
                    activity = tf.divide(tf.exp(activity),
                                         tf.expand_dims(tf.reduce_sum(tf.exp(activity), axis=1), axis=-1),
                                         name="Softmax")

                return tf.identity(activity, name="Output")
示例#16
0
def dia(model, config, scope, connectsegment, connectfeature):
	with tf.variable_scope(scope), tf.name_scope(scope):
		with tf.variable_scope('inputs'), tf.name_scope('inputs'):
			model['%s_in0length_segment' %scope] = model['%s_out0length' %connectsegment]
			model['%s_in1length_segment' %scope] = model['%s_out1length' %connectsegment]
			model['%s_in2length_segment' %scope] = model['%s_out2length' %connectsegment]
			model['%s_maxin2length_segment' %scope] = model['%s_maxout2length' %connectsegment]
			model['%s_in0length_feature' %scope] = model['%s_out0length' %connectfeature]
			model['%s_in1length_feature' %scope] = model['%s_out1length' %connectfeature]
			model['%s_in2length_feature' %scope] = model['%s_out2length' %connectfeature]
			model['%s_maxin2length_feature' %scope] = model['%s_maxout2length' %connectfeature]
			model['%s_inputs_segment' %scope] = tf.squeeze(model['%s_outputs' %connectsegment], 2, '%s_inputs_segment' %scope)
			model['%s_inputs_feature' %scope] = tf.unstack(tf.transpose(model['%s_outputs' %connectfeature], [1, 0, 2]), name = '%s_inputs_feature' %scope)
			model['%s_out0length' %scope] = model['%s_in0length_feature' %scope]
			model['%s_out1length' %scope] = config.getint('global', 'speaker_size')
			model['%s_out2length' %scope] = tf.stack([config.getint('global', 'speaker_size') for _ in xrange(model['%s_out0length' %scope])])
			model['%s_maxout2length' %scope] = config.getint('global', 'speaker_size')

		with tf.variable_scope('outputs'), tf.name_scope('outputs'):
			model['%s_topsegmentvalues' %scope], model['%s_topsegmentindices' %scope] = tf.nn.top_k(tf.transpose(model['%s_inputs_segment' %scope], [1, 0]), config.getint('global', 'speaker_size'))
			model['%s_scores' %scope] = [tf.gather(feature, index) for feature, index in zip(model['%s_inputs_feature' %scope], tf.unstack(model['%s_topsegmentindices' %scope]))]
			model['%s_normalizedscores' %scope]  = [tf.divide(score, tf.norm(score, 2, 1, True)) for score in model['%s_scores' %scope]]
			model['%s_outputs' %scope] = tf.add(0.5, tf.multiply(0.5, tf.stack([tf.matmul(score, score, transpose_b = True) for score in model['%s_normalizedscores' %scope]], name = '%s_outputs' %scope)))

	return model
def build_arch(input, is_train, num_classes):
    data_size = int(input.get_shape()[1])
    # initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
    # bias_initializer = tf.constant_initializer(0.0)
    # weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04)

    with slim.arg_scope([slim.conv2d], trainable=is_train):#, activation_fn=None, , , biases_initializer=bias_initializer, weights_regularizer=weights_regularizer
        with tf.variable_scope('conv1') as scope:
            output = slim.conv2d(input, num_outputs=256, kernel_size=[9, 9], stride=1, padding='VALID', scope=scope)
            data_size = data_size-8
            assert output.get_shape() == [cfg.batch_size, data_size, data_size, 256]
            tf.logging.info('conv1 output shape: {}'.format(output.get_shape()))

        with tf.variable_scope('primary_caps_layer') as scope:
            output = slim.conv2d(output, num_outputs=32*8, kernel_size=[9, 9], stride=2, padding='VALID', scope=scope)#, activation_fn=None
            output = tf.reshape(output, [cfg.batch_size, -1, 8])
            output = squash(output)
            data_size = int(np.floor((data_size-8)/2))
            assert output.get_shape() == [cfg.batch_size, data_size*data_size*32, 8]
            tf.logging.info('primary capsule output shape: {}'.format(output.get_shape()))

        with tf.variable_scope('digit_caps_layer') as scope:
            with tf.variable_scope('u') as scope:
                u_hats = vec_transform(output, num_classes, 16)
                assert u_hats.get_shape() == [cfg.batch_size, num_classes, data_size*data_size*32, 16]
                tf.logging.info('digit_caps_layer u_hats shape: {}'.format(u_hats.get_shape()))

            with tf.variable_scope('routing') as scope:
                output = dynamic_routing(u_hats)
                assert output.get_shape() == [cfg.batch_size, num_classes, 16]
                tf.logging.info('the output capsule has shape: {}'.format(output.get_shape()))

        output_len = tf.norm(output, axis=-1)

    return output, output_len
示例#18
0
 def build_graph(self, left, right, gt_flow):
     x = self.preprocess(left, right)
     prediction = self.graph_structure(x)
     prediction = self.postprocess(prediction)
     tf.identity(prediction, name="prediction")
     # endpoint error
     tf.reduce_mean(tf.norm(prediction - gt_flow, axis=1), name='epe')
    def step(self, inputs, states):
        # Split the hidden state into blocks (each U, V, W are shared across blocks).
        state = tf.split(states[0], self._num_blocks, axis=1)
        print('state after split', state)

        next_states = []
        for j, state_j in enumerate(state):  # Hidden State (j)
            key_j = tf.expand_dims(self._keys[j], axis=0)
            gate_j = self.get_gate(state_j, key_j, inputs)
            candidate_j = self.get_candidate(state_j, key_j, inputs, self.U, self.V, self.W, self.U_bias)

            # Equation 4: h_j <- h_j + g_j * h_j^~
            # Perform an update of the hidden state (memory).
            state_j_next = state_j + tf.expand_dims(gate_j, -1) * candidate_j

            # Equation 5: h_j <- h_j / \norm{h_j}
            # Forget previous memories by normalization.
            state_j_next_norm = tf.norm(
                tensor=state_j_next,
                ord='euclidean',
                axis=-1,
                keep_dims=True)
            state_j_next_norm = tf.where(
                tf.greater(state_j_next_norm, 0.0),
                state_j_next_norm,
                tf.ones_like(state_j_next_norm))
            state_j_next = state_j_next / state_j_next_norm
            next_states.append(state_j_next)
        state_next = tf.concat(next_states, axis=1)
        return state_next, [state_next]
示例#20
0
  def __call__(self, codes):
    """Use codebook to find nearest neighbor for each code.

    Args:
      codes: A `float`-like `Tensor` containing the latent
        vectors to be compared to the codebook. These are rank-3 with shape
        `[batch_size, latent_size, code_size]`.

    Returns:
      nearest_codebook_entries: The 1-nearest neighbor in Euclidean distance for
        each code in the batch.
      one_hot_assignments: The one-hot vectors corresponding to the matched
        codebook entry for each code in the batch.
    """
    distances = tf.norm(
        tf.expand_dims(codes, 2) -
        tf.reshape(self.codebook, [1, 1, self.num_codes, self.code_size]),
        axis=3)
    assignments = tf.argmin(distances, 2)
    one_hot_assignments = tf.one_hot(assignments, depth=self.num_codes)
    nearest_codebook_entries = tf.reduce_sum(
        tf.expand_dims(one_hot_assignments, -1) *
        tf.reshape(self.codebook, [1, 1, self.num_codes, self.code_size]),
        axis=2)
    return nearest_codebook_entries, one_hot_assignments
  def _PerCentroidNormalization(self, unnormalized_vector):
    """Perform per-centroid normalization.

    Args:
      unnormalized_vector: [KxD] float tensor.

    Returns:
      per_centroid_normalized_vector: [KxD] float tensor, with normalized
        aggregated residuals. Some residuals may be all-zero.
      visual_words: Int tensor containing indices of visual words which are
        present for the set of features.
    """
    unnormalized_vector = tf.reshape(
        unnormalized_vector,
        [self._codebook_size, self._feature_dimensionality])
    per_centroid_norms = tf.norm(unnormalized_vector, axis=1)

    visual_words = tf.reshape(
        tf.where(
            tf.greater(per_centroid_norms, tf.sqrt(_NORM_SQUARED_TOLERANCE))),
        [-1])

    per_centroid_normalized_vector = tf.math.l2_normalize(
        unnormalized_vector, axis=1, epsilon=_NORM_SQUARED_TOLERANCE)

    return per_centroid_normalized_vector, visual_words
示例#22
0
def arcface_loss(embedding, labels, out_num, w_init=None, s=64., m=0.5):
    '''
    :param embedding: the input embedding vectors
    :param labels:  the input labels, the shape should be eg: (batch_size, 1)
    :param s: scalar value default is 64
    :param out_num: output class num
    :param m: the margin value, default is 0.5
    :return: the final cacualted output, this output is send into the tf.nn.softmax directly
    '''
    cos_m = math.cos(m)
    sin_m = math.sin(m)
    mm = sin_m * m  # issue 1
    threshold = math.cos(math.pi - m)
    with tf.variable_scope('arcface_loss'):
        # inputs and weights norm
        embedding_norm = tf.norm(embedding, axis=1, keep_dims=True)
        embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
        weights = tf.get_variable(name='embedding_weights', shape=(embedding.get_shape().as_list()[-1], out_num),
                                  initializer=w_init, dtype=tf.float32)
        weights_norm = tf.norm(weights, axis=0, keep_dims=True)
        weights = tf.div(weights, weights_norm, name='norm_weights')
        # cos(theta+m)
        cos_t = tf.matmul(embedding, weights, name='cos_t')
        cos_t2 = tf.square(cos_t, name='cos_2')
        sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
        sin_t = tf.sqrt(sin_t2, name='sin_t')
        cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt')

        # this condition controls the theta+m should in range [0, pi]
        #      0<=theta+m<=pi
        #     -m<=theta<=pi-m
        cond_v = cos_t - threshold
        cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)

        keep_val = s*(cos_t - mm)
        cos_mt_temp = tf.where(cond, cos_mt, keep_val)

        mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask')
        # mask = tf.squeeze(mask, 1)
        inv_mask = tf.subtract(1., mask, name='inverse_mask')

        s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t')

        output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_loss_output')
    return output
def modulus(x):

    input_shape = x.get_shape().as_list()

    out = tf.norm(x, axis=len(input_shape) - 1)
    out = tf.expand_dims(out, axis=-1)
    out = tf.concat([out, tf.zeros_like(out)], axis=-1)

    return out
示例#24
0
    def build_model(self):
        print('\nBuilding Model')
        # Creating placeholders for the question and the answer
        self.questions = tf.placeholder(tf.int64, shape=[None, 15], name="question_vector") 
        self.answers = tf.placeholder(tf.float32, shape=[None, self.most_freq_limit], name="answer_vector")
        self.images = tf.placeholder(tf.float32, shape=[None, 448, 448, 3], name="images_matrix")
        

        arg_scope = resnet_arg_scope()
        with tf.contrib.slim.arg_scope(arg_scope):
            resnet_features, _ = resnet_v2_152(self.images, reuse=tf.AUTO_REUSE)
        depth_norm = tf.norm(resnet_features, ord='euclidean', keepdims=True, axis=3) + 1e-8
        self.image_features = resnet_features/depth_norm
        
        with tf.variable_scope("text_features") as scope:
            if self.reuse:
                scope.reuse_variables()
            self.word_embeddings = tf.get_variable('word_embeddings', 
                                              [self.vocabulary_size,
                                               self.embedding_size],
                                               initializer=tf.contrib.layers.xavier_initializer())
            word_vectors = tf.nn.embedding_lookup(self.word_embeddings, self.questions)
            len_word = self._len_seq(word_vectors)
            
            embedded_sentence = tf.nn.dropout(tf.nn.tanh(word_vectors, name="embedded_sentence"),
                                       keep_prob=self.dropout_prob)
            lstm = tf.nn.rnn_cell.LSTMCell(self.state_size,
                                           initializer=tf.contrib.layers.xavier_initializer())
            _, final_state = tf.nn.dynamic_rnn(lstm, embedded_sentence,
                                               sequence_length=len_word,
                                               dtype=tf.float32)
            self.text_features = final_state.c
        
        self.attention_features = self.compute_attention(self.image_features,
                                                         self.text_features)
        
        with tf.variable_scope("fully_connected") as scope:
            if self.reuse:
                scope.reuse_variables()
            self.fc1 = tf.nn.dropout(tf.nn.relu(self.fc_layer(self.attention_features, 1024, name="fc1")),
                                     keep_prob=self.dropout_prob)
            self.fc2 = self.fc_layer(self.fc1, 3000, name="fc2")
        
        self.answer_prob = tf.nn.softmax(self.fc2)            
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.answers, 
                                                                              logits=self.fc2))
        
        self.global_step = tf.Variable(0, name='global_step', trainable=False, dtype=tf.int32)
        self.inc = tf.assign_add(self.global_step, 1, name='increment')
        self.lr = tf.train.exponential_decay(learning_rate=self.init_lr, 
                                             global_step=self.global_step,
                                             decay_steps=10000,
                                             decay_rate=0.5,
                                             staircase=True)
        
        self.optimizer = tf.train.AdamOptimizer(self.lr, beta1=0.9, beta2=0.999, name="optim")
示例#25
0
def _uniform_unit_norm(dimension, shape, dtype, seed):
  """Returns a batch of points chosen uniformly from the unit hypersphere."""
  # This works because the Gaussian distribution is spherically symmetric.
  # raw shape: shape + [dimension]
  raw = normal.Normal(
      loc=dtype.as_numpy_dtype(0.),
      scale=dtype.as_numpy_dtype(1.)).sample(
          tf.concat([shape, [dimension]], axis=0), seed=seed())
  unit_norm = raw / tf.norm(raw, ord=2, axis=-1)[..., tf.newaxis]
  return unit_norm
示例#26
0
 def monte_carlo_hypersphere_volume(dist, num_samples, radius, center):
   # https://en.wikipedia.org/wiki/Importance_sampling
   x = dist.sample(num_samples, seed=seed)
   x = tf.identity(x)  # Invalidate bijector cacheing.
   inverse_log_prob = tf.exp(-dist.log_prob(x))
   importance_weights = tf.where(
       tf.norm(x - center, axis=-1) <= radius,
       inverse_log_prob,
       tf.zeros_like(inverse_log_prob))
   return tf.reduce_mean(importance_weights, axis=0)
示例#27
0
def conv_block(inputs,
               num_units=None,
               size=5,
               rate=1,
               padding="SAME",
               dropout_rate=0,
               training=False,
               scope="conv_block",
               reuse=None):
    '''Convolution block.
    Args:
      inputs: A 3-D tensor with shape of [batch, time, depth].
      size: An int. Filter size.
      padding: Either `same` or `valid` or `causal` (case-insensitive).
      norm_type: A string. See `normalize`.
      activation_fn: A string. Activation function.
      training: A boolean. Whether or not the layer is in training mode.
      scope: Optional scope for `variable_scope`.
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.
    Returns:
      A tensor of the same shape and dtype as inputs.
    '''
    in_dim = inputs.get_shape().as_list()[-1]
    if num_units is None: num_units = in_dim

    with tf.variable_scope(scope, reuse=reuse):
        inputs = tf.layers.dropout(inputs, rate=dropout_rate, training=training)

        if padding.lower() == "causal":
            # pre-padding for causality
            pad_len = (size - 1) * rate  # padding size
            inputs = tf.pad(inputs, [[0, 0], [pad_len, 0], [0, 0]])
            padding = "VALID"

        V = tf.get_variable('V',
                            shape=[size, in_dim, num_units*2],
                            dtype=tf.float32,
                            initializer=tf.contrib.layers.variance_scaling_initializer(factor=(4.*(1.-dropout_rate)))) # (width, in_dim, out_dim)
        g = tf.get_variable('g',
                            dtype=tf.float32,
                            initializer=tf.norm(V.initialized_value(), axis=(0, 1), keep_dims=True)
                            )
        b = tf.get_variable('b',
                            shape=(num_units*2,),
                            dtype=tf.float32,
                            initializer=tf.zeros_initializer)

        V_norm = tf.nn.l2_normalize(V, [0, 1])  # (width, in_dim, out_dim)
        W = V_norm * g

        outputs = tf.nn.convolution(inputs, W, padding, dilation_rate=[rate]) + b
        outputs = glu(outputs)

    return outputs
示例#28
0
 def _test_model_fn(image, normalized_image, reuse):
   del normalized_image, reuse  # Unused variables in the test.
   image_shape = tf.shape(image)
   attention = tf.squeeze(tf.norm(image, axis=3))
   feature_map = tf.concat(
       [
           tf.tile(image, [1, 1, 1, 341]),
           tf.zeros([1, image_shape[1], image_shape[2], 1])
       ],
       axis=3)
   return attention, feature_map
示例#29
0
def linear_mapping_weightnorm(inputs, out_dim, in_dim=None, dropout=1.0, var_scope_name="linear_mapping"):
  with tf.variable_scope(var_scope_name):
    input_shape = inputs.get_shape().as_list()    # static shape. may has None
    input_shape_tensor = tf.shape(inputs)    
    # use weight normalization (Salimans & Kingma, 2016)  w = g* v/2-norm(v)
    V = tf.get_variable('V', shape=[int(input_shape[-1]), out_dim], dtype=tf.float32, initializer=tf.random_normal_initializer(mean=0, stddev=tf.sqrt(dropout*1.0/int(input_shape[-1]))), trainable=True)
    V_norm = tf.norm(V.initialized_value(), axis=0)  # V shape is M*N,  V_norm shape is N
    g = tf.get_variable('g', dtype=tf.float32, initializer=V_norm, trainable=True)
    b = tf.get_variable('b', shape=[out_dim], dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=True)   # weightnorm bias is init zero
    
    assert len(input_shape) == 3
    inputs = tf.reshape(inputs, [-1, input_shape[-1]])
    inputs = tf.matmul(inputs, V)
    inputs = tf.reshape(inputs, [input_shape_tensor[0], -1, out_dim])
    #inputs = tf.matmul(inputs, V)    # x*v
    
    scaler = tf.div(g, tf.norm(V, axis=0))   # g/2-norm(v)
    inputs = tf.reshape(scaler,[1, out_dim])*inputs + tf.reshape(b,[1, out_dim])   # x*v g/2-norm(v) + b
    

    return inputs 
示例#30
0
def mlp(feature, hparams, name="mlp"):
  """Multi layer perceptron with dropout and relu activation."""
  with tf.variable_scope(name, "mlp", values=[feature]):
    num_mlp_layers = hparams.num_mlp_layers
    mlp_size = hparams.mlp_size
    for _ in range(num_mlp_layers):
      feature = common_layers.dense(feature, mlp_size, activation=None)
      utils.collect_named_outputs("norms", "mlp_feature",
                                  tf.norm(feature, axis=-1))
      feature = common_layers.layer_norm(feature)
      feature = tf.nn.relu(feature)
      feature = tf.nn.dropout(feature, keep_prob=1.-hparams.dropout)
    return feature
示例#31
0
            # compute loss
            # out:[b, 10]
            # y:[b] => [b ,10]
            y_onehot = tf.one_hot(y, depth=10)

            # mse = mean(sum(y - out)^2)
            # [b, 10]
            loss = tf.square(y_onehot - out)
            # mean: scalar
            loss = tf.reduce_mean(loss)

        # compute gradients
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        print("==before==")
        for g in grads:
            print(tf.norm(g))

        grads, _ = tf.clip_by_global_norm(grads,
                                          15)  #限制梯度向量的范数不能超过15,超过的话会将其等比例缩小

        print("==after==")
        for g in grads:
            print(tf.norm(g))

        # w1 = w1 - learning_rate * w1_grad 必须使用assign进行原地更新
        # 否则会从variable包装变成原tensor
        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
示例#32
0
    params.dict["arcsoftmax_lambda_min"] = 10
    params.dict["arcsoftmax_lambda_base"] = 1000
    params.dict["arcsoftmax_lambda_gamma"] = 1
    params.dict["arcsoftmax_lambda_power"] = 4

    params.dict["feature_norm"] = True
    params.dict["feature_scaling_factor"] = 20

    from model.common import l2_scaling
    outputs, endpoints = tdnn(features,
                              params,
                              is_training=True,
                              reuse_variables=False)
    outputs = l2_scaling(outputs, params.feature_scaling_factor)
    outputs_norm = tf.norm(outputs, axis=1)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        [outputs_val,
         outputs_norm_val] = sess.run([outputs, outputs_norm],
                                      feed_dict={features: features_val})
        assert np.allclose(np.sqrt(np.sum(outputs_val**2, axis=1)),
                           params.feature_scaling_factor)
        assert np.allclose(outputs_norm_val, params.feature_scaling_factor)

    # Test loss functions
    # It only works on debug mode, since the loss is asked to output weights for our numpy computation.
    from model.loss import asoftmax, additive_margin_softmax, additive_angular_margin_softmax
    from model.test_utils import compute_asoftmax, compute_amsoftmax, compute_arcsoftmax

    params.dict["global_step"] = 1
示例#33
0
def vertex_normals(vertices, faces, name=None):
    """Computes vertex normals for the given meshes.
    This function takes a batch of meshes with common topology, and calculates vertex normals for each.
    Args:
        vertices: a `Tensor` of shape [*, vertex count, 3] or [*, vertex count, 4], where * represents arbitrarily
            many leading (batch) dimensions.
        faces: an int32 `Tensor` of shape [face count, 3]; each value is an index into the first dimension of `vertices`, and
            each row defines one triangle.
        name: an optional name for the operation
    Returns:
        a `Tensor` of shape [*, vertex count, 3], which for each vertex, gives the (normalised) average of the normals of
        all faces that include that vertex
    """

    # This computes vertex normals, as the average of the normals of the faces each vertex is part of
    # vertices is indexed by *, vertex-index, x/y/z[/w]
    # faces is indexed by face-index, vertex-in-face
    # result is indexed by *, vertex-index, x/y/z

    with ops.name_scope(name, 'VertexNormals', [vertices, faces]) as scope:

        vertices, faces = _prepare_vertices_and_faces(vertices, faces)
        vertices = vertices[..., :3]  # drop the w-coordinate if present

        vertices_ndim = vertices.get_shape().ndims
        # normals_by_face is indexed by face-index, *, x/y/z
        normals_by_face, vertices_by_index = _get_face_normals(vertices, faces)

        face_count = tf.shape(faces)[0]
        vbi_shape = tf.shape(vertices_by_index)
        # this is the number of 'elements' in the * dimensions
        N_extra = tf.reduce_prod(vbi_shape[1:-1])

        # ** keep it simple for now; in the general case we need a flattened outer product of ranges
        assert vertices_ndim in {2, 3}
        if vertices_ndim == 2:
            extra_indices = []
        else:
            extra_indices = [
                tf.tile(_repeat_1d(tf.range(N_extra), 3), [face_count * 3])]

        normals_by_face_and_vertex = tf.SparseTensor(
            indices=tf.cast(
                tf.stack([  # each element of this stack is repeated a number of times matching the things after, then tiled a number of times matching the things before, so that each has the same length
                    _repeat_1d(
                        tf.range(face_count, dtype=tf.int32), N_extra * 9),
                    _repeat_1d(tf.reshape(faces, [-1]), N_extra * 3)
                ] + extra_indices + [
                    tf.tile(tf.constant([0, 1, 2], dtype=tf.int32), tf.convert_to_tensor(
                        [face_count * N_extra * 3]))
                ], axis=1),
                tf.int64
            ),
            values=tf.reshape(tf.tile(normals_by_face[:, tf.newaxis, ...], [
                              1, 3] + [1] * (vertices_ndim - 1)), [-1]),
            dense_shape=tf.cast(
                tf.concat([[face_count], vbi_shape], axis=0), tf.int64)
        )  # indexed by face-index, vertex-index, *, x/y/z

        summed_normals_by_vertex = tf.sparse_reduce_sum(
            normals_by_face_and_vertex, axis=0)  # indexed by vertex-index, *, x/y/z
        renormalised_normals_by_vertex = summed_normals_by_vertex / \
            (tf.norm(summed_normals_by_vertex, axis=-1, keepdims=True) + 1.e-12)  # ditto

        result = tf.transpose(renormalised_normals_by_vertex, range(
            1, vertices_ndim - 1) + [0, vertices_ndim - 1])
        result.set_shape(vertices.get_shape())
        return result
示例#34
0
    def __init__(self,
                 sess: tf.Session,
                 predict: Union[Callable, tf.keras.Model, 'keras.Model'],
                 shape: tuple,
                 kappa: float = 0.,
                 beta: float = .1,
                 feature_range: tuple = (-1e10, 1e10),
                 gamma: float = 0.,
                 ae_model: Union[tf.keras.Model, 'keras.Model'] = None,
                 enc_model: Union[tf.keras.Model, 'keras.Model'] = None,
                 theta: float = 0.,
                 use_kdtree: bool = False,
                 learning_rate_init: float = 1e-2,
                 max_iterations: int = 1000,
                 c_init: float = 10.,
                 c_steps: int = 10,
                 eps: tuple = (1e-3, 1e-3),
                 clip: tuple = (-1000., 1000.),
                 update_num_grad: int = 1,
                 write_dir: str = None) -> None:
        """
        Initialize prototypical counterfactual method.

        Parameters
        ----------
        sess
            TensorFlow session
        predict
            Keras or TensorFlow model or any other model's prediction function returning class probabilities
        shape
            Shape of input data starting with batch size
        kappa
            Confidence parameter for the attack loss term
        beta
            Regularization constant for L1 loss term
        feature_range
            Tuple with min and max ranges to allow for perturbed instances. Min and max ranges can be floats or
            numpy arrays with dimension (1x nb of features) for feature-wise ranges
        gamma
            Regularization constant for optional auto-encoder loss term
        ae_model
            Optional auto-encoder model used for loss regularization
        enc_model
            Optional encoder model used to guide instance perturbations towards a class prototype
        theta
            Constant for the prototype search loss term
        use_kdtree
            Whether to use k-d trees for the prototype loss term if no encoder is available
        learning_rate_init
            Initial learning rate of optimizer
        max_iterations
            Maximum number of iterations for finding a counterfactual
        c_init
            Initial value to scale the attack loss term
        c_steps
            Number of iterations to adjust the constant scaling the attack loss term
        eps
            If numerical gradients are used to compute dL/dx = (dL/dp) * (dp/dx), then eps[0] is used to
            calculate dL/dp and eps[1] is used for dp/dx. eps[0] and eps[1] can be a combination of float values and
            numpy arrays. For eps[0], the array dimension should be (1x nb of prediction categories) and for
            eps[1] it should be (1x nb of features)
        clip
            Tuple with min and max clip ranges for both the numerical gradients and the gradients
            obtained from the TensorFlow graph
        update_num_grad
            If numerical gradients are used, they will be updated every update_num_grad iterations
        write_dir
            Directory to write tensorboard files to
        """
        self.sess = sess
        self.predict = predict

        # check whether the model, encoder and auto-encoder are Keras or TF models
        try:
            import keras  # noqa
            is_model = isinstance(predict, (tf.keras.Model, keras.Model))
            is_ae = isinstance(ae_model, (tf.keras.Model, keras.Model))
            is_enc = isinstance(enc_model, (tf.keras.Model, keras.Model))
        except ImportError:
            is_model = isinstance(predict, (tf.keras.Model))
            is_ae = isinstance(ae_model, (tf.keras.Model))
            is_enc = isinstance(enc_model, (tf.keras.Model))

        if is_model:
            self.model = True
            self.classes = self.sess.run(
                self.predict(
                    tf.convert_to_tensor(np.zeros(shape),
                                         dtype=tf.float32))).shape[1]
        else:
            self.model = False
            self.classes = self.predict(np.zeros(shape)).shape[1]

        if is_enc:
            self.enc_model = True
        else:
            self.enc_model = False

        if is_ae:
            self.ae_model = True
        else:
            self.ae_model = False

        if use_kdtree and self.enc_model:
            logger.warning(
                'Both an encoder and k-d trees enabled. Using the encoder for the prototype loss term.'
            )

        if use_kdtree or self.enc_model:
            self.enc_or_kdtree = True
        else:
            self.enc_or_kdtree = False

        self.shape = shape
        self.kappa = kappa
        self.beta = beta
        self.gamma = gamma
        self.theta = theta
        self.ae = ae_model
        self.enc = enc_model
        self.use_kdtree = use_kdtree
        self.batch_size = shape[0]
        self.max_iterations = max_iterations
        self.c_init = c_init
        self.c_steps = c_steps
        self.update_num_grad = update_num_grad
        self.eps = eps
        self.clip = clip
        self.write_dir = write_dir

        # define tf variables for original and perturbed instances, and target labels
        self.orig = tf.Variable(np.zeros(shape), dtype=tf.float32, name='orig')
        self.adv = tf.Variable(np.zeros(shape), dtype=tf.float32, name='adv')
        self.adv_s = tf.Variable(np.zeros(shape),
                                 dtype=tf.float32,
                                 name='adv_s')
        self.target = tf.Variable(np.zeros((self.batch_size, self.classes)),
                                  dtype=tf.float32,
                                  name='target')

        # variable for target class proto
        if self.enc_model:
            self.shape_enc = self.enc.predict(np.zeros(shape)).shape
        else:
            self.shape_enc = shape

        self.target_proto = tf.Variable(np.zeros(self.shape_enc),
                                        dtype=tf.float32,
                                        name='target_proto')

        # define tf variable for constant used in FISTA optimization
        self.const = tf.Variable(np.zeros(self.batch_size),
                                 dtype=tf.float32,
                                 name='const')
        self.global_step = tf.Variable(0.0,
                                       trainable=False,
                                       name='global_step')

        # define placeholders that will be assigned to relevant variables
        self.assign_orig = tf.placeholder(tf.float32,
                                          shape,
                                          name='assign_orig')
        self.assign_adv = tf.placeholder(tf.float32, shape, name='assign_adv')
        self.assign_adv_s = tf.placeholder(tf.float32,
                                           shape,
                                           name='assign_adv_s')
        self.assign_target = tf.placeholder(tf.float32,
                                            (self.batch_size, self.classes),
                                            name='assign_target')
        self.assign_const = tf.placeholder(tf.float32, [self.batch_size],
                                           name='assign_const')
        self.assign_target_proto = tf.placeholder(tf.float32,
                                                  self.shape_enc,
                                                  name='assign_target_proto')

        # define conditions and values for element-wise shrinkage thresholding
        with tf.name_scope('shrinkage_thresholding') as scope:
            cond = [
                tf.cast(
                    tf.greater(tf.subtract(self.adv_s, self.orig), self.beta),
                    tf.float32),
                tf.cast(
                    tf.less_equal(tf.abs(tf.subtract(self.adv_s, self.orig)),
                                  self.beta), tf.float32),
                tf.cast(
                    tf.less(tf.subtract(self.adv_s, self.orig),
                            tf.negative(self.beta)), tf.float32)
            ]
            upper = tf.minimum(tf.subtract(self.adv_s, self.beta),
                               tf.cast(feature_range[1], tf.float32))
            lower = tf.maximum(tf.add(self.adv_s, self.beta),
                               tf.cast(feature_range[0], tf.float32))
            self.assign_adv = tf.multiply(cond[0], upper) + tf.multiply(
                cond[1], self.orig) + tf.multiply(cond[2], lower)

        # perturbation update and vector projection on correct feature range set
        with tf.name_scope('perturbation_y') as scope:
            self.zt = tf.divide(self.global_step,
                                self.global_step + tf.cast(3, tf.float32))
            self.assign_adv_s = self.assign_adv + tf.multiply(
                self.zt, self.assign_adv - self.adv)
            # map to feature space
            self.assign_adv_s = tf.minimum(
                self.assign_adv_s, tf.cast(feature_range[1], tf.float32))
            self.assign_adv_s = tf.maximum(
                self.assign_adv_s, tf.cast(feature_range[0], tf.float32))

        # assign counterfactual of step k+1 to k
        with tf.name_scope('update_adv') as scope:
            self.adv_updater = tf.assign(self.adv, self.assign_adv)
            self.adv_updater_s = tf.assign(self.adv_s, self.assign_adv_s)

        # from perturbed instance, derive deviation delta
        with tf.name_scope('update_delta') as scope:
            self.delta = self.orig - self.adv
            self.delta_s = self.orig - self.adv_s

        # define L1 and L2 loss terms; L1+L2 is later used as an optimization constraint for FISTA
        ax_sum = list(np.arange(1, len(shape)))
        with tf.name_scope('loss_l1_l2') as scope:
            self.l2 = tf.reduce_sum(tf.square(self.delta), axis=ax_sum)
            self.l2_s = tf.reduce_sum(tf.square(self.delta_s), axis=ax_sum)
            self.l1 = tf.reduce_sum(tf.abs(self.delta), axis=ax_sum)
            self.l1_s = tf.reduce_sum(tf.abs(self.delta_s), axis=ax_sum)
            self.l1_l2 = self.l2 + tf.multiply(self.l1, self.beta)
            self.l1_l2_s = self.l2_s + tf.multiply(self.l1_s, self.beta)

            # sum losses
            self.loss_l1 = tf.reduce_sum(self.l1)
            self.loss_l1_s = tf.reduce_sum(self.l1_s)
            self.loss_l2 = tf.reduce_sum(self.l2)
            self.loss_l2_s = tf.reduce_sum(self.l2_s)

        with tf.name_scope('loss_ae') as scope:
            # gamma * AE loss
            if self.ae_model:
                self.loss_ae = self.gamma * tf.square(
                    tf.norm(self.ae(self.adv) - self.adv))
                self.loss_ae_s = self.gamma * tf.square(
                    tf.norm(self.ae(self.adv_s) - self.adv_s))
            else:  # no auto-encoder available
                self.loss_ae = tf.constant(0.)
                self.loss_ae_s = tf.constant(0.)

        with tf.name_scope('loss_attack') as scope:
            if not self.model:
                self.loss_attack = tf.placeholder(tf.float32)
            elif self.c_init == 0. and self.c_steps == 1:  # prediction loss term not used
                # make predictions on perturbed instance
                self.pred_proba = self.predict(self.adv)
                self.pred_proba_s = self.predict(self.adv_s)

                self.loss_attack = tf.constant(0.)
                self.loss_attack_s = tf.constant(0.)
            else:
                # make predictions on perturbed instance
                self.pred_proba = self.predict(self.adv)
                self.pred_proba_s = self.predict(self.adv_s)

                # probability of target label prediction
                self.target_proba = tf.reduce_sum(
                    self.target * self.pred_proba, 1)
                target_proba_s = tf.reduce_sum(self.target * self.pred_proba_s,
                                               1)

                # max probability of non target label prediction
                self.nontarget_proba_max = tf.reduce_max(
                    (1 - self.target) * self.pred_proba -
                    (self.target * 10000), 1)
                nontarget_proba_max_s = tf.reduce_max(
                    (1 - self.target) * self.pred_proba_s -
                    (self.target * 10000), 1)

                # loss term f(x,d)
                loss_attack = tf.maximum(
                    0.0,
                    -self.nontarget_proba_max + self.target_proba + self.kappa)
                loss_attack_s = tf.maximum(
                    0.0, -nontarget_proba_max_s + target_proba_s + self.kappa)

                # c * f(x,d)
                self.loss_attack = tf.reduce_sum(self.const * loss_attack)
                self.loss_attack_s = tf.reduce_sum(self.const * loss_attack_s)

        with tf.name_scope('loss_prototype') as scope:
            if self.enc_model:
                self.loss_proto = self.theta * tf.square(
                    tf.norm(self.enc(self.adv) - self.target_proto))
                self.loss_proto_s = self.theta * tf.square(
                    tf.norm(self.enc(self.adv_s) - self.target_proto))
            elif self.use_kdtree:
                self.loss_proto = self.theta * tf.square(
                    tf.norm(self.adv - self.target_proto))
                self.loss_proto_s = self.theta * tf.square(
                    tf.norm(self.adv_s - self.target_proto))
            else:  # no encoder available and no k-d trees used
                self.loss_proto = tf.constant(0.)
                self.loss_proto_s = tf.constant(0.)

        with tf.name_scope('loss_combined') as scope:
            # no need for L1 term in loss to optimize when using FISTA
            if self.model:
                self.loss_opt = self.loss_attack_s + self.loss_l2_s + self.loss_ae_s + self.loss_proto_s
            else:  # separate numerical computation of loss attack gradient
                self.loss_opt = self.loss_l2_s + self.loss_ae_s + self.loss_proto_s

            # add L1 term to overall loss; this is not the loss that will be directly optimized
            self.loss_total = (self.loss_attack + self.loss_l2 + self.loss_ae +
                               tf.multiply(self.beta, self.loss_l1) +
                               self.loss_proto)

        with tf.name_scope('training') as scope:
            self.learning_rate = tf.train.polynomial_decay(learning_rate_init,
                                                           self.global_step,
                                                           self.max_iterations,
                                                           0,
                                                           power=0.5)
            optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)
            start_vars = set(x.name for x in tf.global_variables())

            # first compute, then apply grads
            self.compute_grads = optimizer.compute_gradients(
                self.loss_opt, var_list=[self.adv_s])
            self.grad_ph = tf.placeholder(tf.float32, name='grad_adv_s')
            var = [
                tvar for tvar in tf.trainable_variables()
                if tvar.name.startswith('adv_s')
            ][-1]  # get the last in
            # case explainer is re-initialized and a new graph is created
            grad_and_var = [(self.grad_ph, var)]
            self.apply_grads = optimizer.apply_gradients(
                grad_and_var, global_step=self.global_step)
            end_vars = tf.global_variables()
            new_vars = [x for x in end_vars if x.name not in start_vars]

        # variables to initialize
        self.setup = []  # type: list
        self.setup.append(self.orig.assign(self.assign_orig))
        self.setup.append(self.target.assign(self.assign_target))
        self.setup.append(self.const.assign(self.assign_const))
        self.setup.append(self.adv.assign(self.assign_adv))
        self.setup.append(self.adv_s.assign(self.assign_adv_s))
        self.setup.append(self.target_proto.assign(self.assign_target_proto))

        self.init = tf.variables_initializer(
            var_list=[self.global_step] + [self.adv_s] + [self.adv] + new_vars)

        if self.write_dir is not None:
            self.writer = tf.summary.FileWriter(write_dir,
                                                tf.get_default_graph())
            self.writer.add_graph(tf.get_default_graph())
        else:
            self.writer = None
示例#35
0
def block_Lanczos(Sigma_, B_):
    """
    block Lanczos method to approx Sigma^1/2 * B, with B matrix of N(0,1)'s.
    Used to generate multiple approximate large normal draws.
    
    """
    n = tf.shape(B_)[0]
    s = tf.shape(B_)[1]
    k = tf.div(n, 500) + 3

    betas = tf.zeros([1, s])
    alphas = tf.zeros([0, s])
    D = tf.zeros([s, n, 1])
    B_norms = tf.norm(B_, axis=0)
    D = tf.concat([D, tf.expand_dims(tf.transpose(B_ / B_norms), 2)], 2)

    def cond(j, alphas, betas, D):
        return j < k + 1

    #TODO: use block-CG in place of Sigma
    def body(j, alphas, betas, D):
        d_j = tf.squeeze(tf.slice(D, [0, 0, j], [-1, -1, 1]))
        d = tf.matmul(Sigma_, tf.transpose(d_j)) - (
            tf.slice(betas, [j - 1, 0], [1, -1]) *
            tf.transpose(tf.squeeze(tf.slice(D, [0, 0, j - 1], [-1, -1, 1]))))
        alphas = tf.concat([alphas, [tf.diag_part(tf.matmul(d_j, d))]], 0)
        d = d - tf.slice(alphas, [j - 1, 0], [1, -1]) * tf.transpose(d_j)
        betas = tf.concat([betas, [tf.norm(d, axis=0)]], 0)
        D = tf.concat([
            D,
            tf.expand_dims(tf.transpose(d / tf.slice(betas, [j, 0], [1, -1])),
                           2)
        ], 2)
        return j + 1, alphas, betas, D

    j = tf.constant(1)
    j, alphas, betas, D = tf.while_loop(cond,
                                        body,
                                        loop_vars=[j, alphas, betas, D],
                                        shape_invariants=[
                                            j.get_shape(),
                                            tf.TensorShape([None, None]),
                                            tf.TensorShape([None, None]),
                                            tf.TensorShape([None, None, None])
                                        ])

    D_ = tf.slice(D, [0, 0, 1], [-1, -1, k])

    ##TODO replace loop
    H = tf.zeros([0, k, k])

    for ss in range(s):
        this_beta = tf.diag(tf.squeeze(tf.slice(betas, [1, ss], [k - 1, 1])))
        #build out tridiagonal H: alphas_1:k on main, betas_2:k on off
        this_H = (tf.diag(tf.squeeze(tf.slice(alphas, [0, ss], [-1, 1]))) +
                  tf.pad(this_beta, [[1, 0], [0, 1]]) +
                  tf.pad(this_beta, [[0, 1], [1, 0]]))
        H = tf.concat([H, tf.expand_dims(this_H, 0)], 0)

    E, V = tf.self_adjoint_eig(H)
    E_sqrt = tf.zeros([0, k, k])
    #TODO: loop
    for ss in range(s):
        E_sqrt = tf.concat([
            E_sqrt,
            tf.expand_dims(
                tf.diag(
                    tf.squeeze(
                        tf.sqrt(tf.maximum(tf.slice(E, [ss, 0], [1, -1]),
                                           1e-6)))), 0)
        ], 0)
    sq_H = tf.matmul(V, tf.matmul(E_sqrt, tf.transpose(V, perm=[0, 2, 1])))

    e1 = tf.expand_dims(
        tf.transpose(tf.tile(tf.slice(tf.eye(k), [0, 0], [-1, 1]), [1, s])), 2)
    out = B_norms * tf.transpose(tf.squeeze(tf.matmul(D_, tf.matmul(sq_H,
                                                                    e1))))
    return out
示例#36
0
    def build_model(self):
        min_queue_examples = 256
        self.g_zbatch = tf.placeholder(tf.float32,
                                       [self.batch_size, self.z_dim], 'zbatch')
        image_dims_real = [self.images_height_real, self.images_width_real, 3]
        image_dims_synth = [
            self.images_height_synth, self.images_width_synth, 3
        ]
        #lbl_dims = [self.images_height, self.images_width, 1]
        self.Discr_inputs_real = tf.placeholder(tf.float32, [self.batch_size] +
                                                image_dims_real,
                                                name='D_images_real')
        self.Discr_inputs_synth = tf.placeholder(
            tf.float32, [self.batch_size] + image_dims_synth,
            name='D_images_synth')
        self.Gen_inputs_imgs = tf.placeholder(tf.float32, [self.batch_size] +
                                              image_dims_synth,
                                              name='G_images')
        #
        with tf.variable_scope('model') as scope:
            #
            ## sensor transformer augmentation generator
            img_train_aug, window_h, sigmas, scale_val, tx_Rval, ty_Rval, tx_Gval, ty_Gval, tx_Bval, ty_Bval, delta_S, A, Ra_sd, Rb_si, Ga_sd, Gb_si, Ba_sd, Bb_si, a_transl, b_transl = self.augmentation_generator(
                self.Gen_inputs_imgs, self.g_zbatch)
            #self.aug_img, window_h, sigmas, scale_val, tx_Rval, ty_Rval, tx_Gval, ty_Gval, tx_Bval, ty_Bval, delta_S, A, a_transl, b_transl = self.augmentation_generator(img_train_synth, self.Gen_zbatch)
            self.aug_img, self.blurSTparams, self.expSTparams, self.colorSTparams, self.noiseSTparams, self.chromabSTparams = self.augmentation_generator_sampler(
                self.Gen_inputs_imgs, self.g_zbatch, reuse=True)
            #img_train_aug, self.blurSTparams, self.expSTparams, self.colorSTparams, self.noiseSTparams, self.chromabSTparams = self.augmentation_generator(img_train_synth, self.g_zbatch)
            #
            ## get style loss
            #scope.reuse_variables()
            conv1_1activ_aug, conv1_2activ_aug, conv2_1activ_aug, conv2_2activ_aug, conv3_1activ_aug, conv3_2activ_aug, conv3_3activ_aug, conv4_1activ_aug, conv4_2activ_aug, conv4_3activ_aug = self.net_synth(
                img_train_aug, None, get_activ=True)
            scope.reuse_variables()
            conv1_1activ_real, conv1_2activ_real, conv2_1activ_real, conv2_2activ_real, conv3_1activ_real, conv3_2activ_real, conv3_3activ_real, conv4_1activ_real, conv4_2activ_real, conv4_3activ_real = self.net_real(
                self.Discr_inputs_real, None, get_activ=True)
            ## calculate style loss on the early layers
            self.style_loss = tf.reduce_sum(
                          tf.square(tf.norm(self.gram_matrix(conv1_1activ_aug) - self.gram_matrix(conv1_1activ_real))) + \
                          tf.square(tf.norm(self.gram_matrix(conv1_2activ_aug) - self.gram_matrix(conv1_2activ_real))) + \
                          tf.square(tf.norm(self.gram_matrix(conv2_1activ_aug) - self.gram_matrix(conv2_1activ_real))) + \
                          tf.square(tf.norm(self.gram_matrix(conv2_2activ_aug) - self.gram_matrix(conv2_2activ_real))) + \
                          tf.square(tf.norm(self.gram_matrix(conv3_1activ_aug) - self.gram_matrix(conv3_1activ_real))) + \
                          tf.square(tf.norm(self.gram_matrix(conv3_2activ_aug) - self.gram_matrix(conv3_2activ_real))) + \
                          tf.square(tf.norm(self.gram_matrix(conv3_3activ_aug) - self.gram_matrix(conv3_3activ_real))) + \
                          tf.square(tf.norm(self.gram_matrix(conv4_1activ_aug) - self.gram_matrix(conv4_1activ_real))) + \
                          tf.square(tf.norm(self.gram_matrix(conv4_2activ_aug) - self.gram_matrix(conv4_2activ_real))) + \
                          tf.square(tf.norm(self.gram_matrix(conv4_3activ_aug) - self.gram_matrix(conv4_3activ_real)))
                          )
            ## blur constraints to prevent it from going to 0 (which will give nans)
            #self.sigmas_loss = -tf.minimum(tf.reduce_min(sigmas),0)*100000
            ## calc total loss
            self.loss_train = self.style_loss / 1e6
            #self.loss_train = self.style_loss/1e6 + self.sigmas_loss
            ##
        with tf.variable_scope('optimizer'):
            self.step = tf.placeholder(tf.float32, [], 'step')
            lr = self.learning_rate * tf.exp(-self.step / self.tau)
            self.train_step = tf.train.AdamOptimizer(lr).minimize(
                self.loss_train)
            #
        tf.summary.scalar('learning rate', lr)

        if self.log_weights:
            for var in tf.trainable_variables():
                tf.summary.histogram(var.name, var)
示例#37
0
def main():
    args = parser.parse_args()

    # We store all arguments in a json file. This has two advantages:
    # 1. We can always get back and see what exactly that experiment was
    # 2. We can resume an experiment as-is without needing to remember all flags.
    args_file = os.path.join(args.experiment_root, 'args.json')
    if args.resume:
        if not os.path.isfile(args_file):
            raise IOError('`args.json` not found in {}'.format(args_file))

        print('Loading args from {}.'.format(args_file))
        with open(args_file, 'r') as f:
            args_resumed = json.load(f)
        args_resumed['resume'] = True  # This would be overwritten.

        # When resuming, we not only want to populate the args object with the
        # values from the file, but we also want to check for some possible
        # conflicts between loaded and given arguments.
        for key, value in args.__dict__.items():
            if key in args_resumed:
                resumed_value = args_resumed[key]
                if resumed_value != value:
                    print('Warning: For the argument `{}` we are using the'
                          ' loaded value `{}`. The provided value was `{}`'
                          '.'.format(key, resumed_value, value))
                    comand = input('Would you like to restore it?(yes/no)')
                    if comand == 'yes':
                        args.__dict__[key] = resumed_value
                        print(
                            'For the argument `{}` we are using the loaded value `{}`.'
                            .format(key, args.__dict__[key]))
                    else:
                        print(
                            'For the argument `{}` we are using the provided value `{}`.'
                            .format(key, args.__dict__[key]))
            else:
                print('Warning: A new argument was added since the last run:'
                      ' `{}`. Using the new value: `{}`.'.format(key, value))
        os.remove(args_file)
        with open(args_file, 'w') as f:
            json.dump(vars(args),
                      f,
                      ensure_ascii=False,
                      indent=2,
                      sort_keys=True)

    else:
        # If the experiment directory exists already, we bail in fear.
        if os.path.exists(args.experiment_root):
            if os.listdir(args.experiment_root):
                print('The directory {} already exists and is not empty.'
                      ' If you want to resume training, append --resume to'
                      ' your call.'.format(args.experiment_root))
                exit(1)
        else:
            os.makedirs(args.experiment_root)

        # Store the passed arguments for later resuming and grepping in a nice
        # and readable format.
        with open(args_file, 'w') as f:
            json.dump(vars(args),
                      f,
                      ensure_ascii=False,
                      indent=2,
                      sort_keys=True)

    log_file = os.path.join(args.experiment_root, "train")
    logging.config.dictConfig(common.get_logging_dict(log_file))
    log = logging.getLogger('train')

    # Also show all parameter values at the start, for ease of reading logs.
    log.info('Training using the following parameters:')
    for key, value in sorted(vars(args).items()):
        log.info('{}: {}'.format(key, value))

    # Check them here, so they are not required when --resume-ing.
    if not args.train_set:
        parser.print_help()
        log.error("You did not specify the `train_set` argument!")
        sys.exit(1)
    if not args.image_root:
        parser.print_help()
        log.error("You did not specify the required `image_root` argument!")
        sys.exit(1)

    # Load the data from the TxT file. see Common.load_dataset function for details
    pids, fids = common.load_dataset(args.train_set, args.image_root)
    max_fid_len = max(map(len, fids))  # We'll need this later for logfiles.

    # Setup a tf.Dataset where one "epoch" loops over all PIDS.
    # PIDS are shuffled after every epoch and continue indefinitely.
    unique_pids = np.unique(pids)
    dataset = tf.data.Dataset.from_tensor_slices(unique_pids)
    dataset = dataset.shuffle(len(unique_pids))

    # Constrain the dataset size to a multiple of the batch-size, so that
    # we don't get overlap at the end of each epoch.
    dataset = dataset.take((len(unique_pids) // args.batch_p) * args.batch_p)
    dataset = dataset.repeat(None)  # Repeat forever. Funny way of stating it.

    # For every PID, get K images.
    dataset = dataset.map(lambda pid: sample_k_fids_for_pid(
        pid, all_fids=fids, all_pids=pids, batch_k=args.batch_k
    ))  # now the dataset has been modified as [selected_fids
    # , pid] due to the return of the function 'sample_k_fids_for_pid'

    # Ungroup/flatten the batches for easy loading of the files.
    dataset = dataset.apply(tf.contrib.data.unbatch())

    # Convert filenames to actual image tensors.
    net_input_size = (args.net_input_height, args.net_input_width)
    pre_crop_size = (args.pre_crop_height, args.pre_crop_width)
    dataset = dataset.map(
        lambda fid, pid: common.fid_to_image(fid,
                                             pid,
                                             image_root=args.image_root,
                                             image_size=pre_crop_size if args.
                                             crop_augment else net_input_size),
        num_parallel_calls=args.loading_threads
    )  # now the dataset has been modified as [selected_images
    # , fid, pid] due to the return of the function 'fid_to_image'

    # Augment the data if specified by the arguments.
    if args.flip_augment:
        dataset = dataset.map(lambda im, fid, pid:
                              (tf.image.random_flip_left_right(im), fid, pid))
    if args.crop_augment:
        dataset = dataset.map(lambda im, fid, pid: (tf.random_crop(
            im, net_input_size + (3, )), fid, pid))

    # Group it back into PK batches.
    batch_size = args.batch_p * args.batch_k
    dataset = dataset.batch(batch_size)

    # Overlap producing and consuming for parallelism.
    dataset = dataset.prefetch(1)

    # Since we repeat the data infinitely, we only need a one-shot iterator.
    images, fids, pids = dataset.make_one_shot_iterator().get_next()

    # Create the model and an embedding head.
    model = import_module('nets.' + args.model_name)
    head = import_module('heads.' + args.head_name)

    # Feed the image through the model. The returned `body_prefix` will be used
    # further down to load the pre-trained weights for all variables with this
    # prefix.
    endpoints, body_prefix = model.endpoints(images, is_training=True)
    if args.head_name == 'fusion':
        with tf.name_scope('head'):
            endpoints = head.head(endpoints,
                                  args.embedding_dim,
                                  args.model_name,
                                  is_training=True)
    else:
        with tf.name_scope('head'):
            endpoints = head.head(endpoints,
                                  args.embedding_dim,
                                  is_training=True)

    # Create the loss in two steps:
    # 1. Compute all pairwise distances according to the specified metric.
    # 2. For each anchor along the first dimension, compute its loss.
    # dists = loss.cdist(endpoints['emb'], endpoints['emb'], metric=args.metric)
    # losses, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[args.loss](
    #     dists, pids, args.margin, batch_precision_at_k=args.batch_k-1)
    # # '_' stands for the boolean matrix shows topK where the correct match of the identities occurs
    # shape=(batch_size,K)


# 更改loss1
    dists1 = loss.cdist(endpoints['feature1'],
                        endpoints['feature1'],
                        metric=args.metric)
    losses1, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss](
        dists1, pids, args.margin, batch_precision_at_k=args.batch_k - 1)
    dists2 = loss.cdist(endpoints['feature2'],
                        endpoints['feature2'],
                        metric=args.metric)
    losses2, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss](
        dists2, pids, args.margin, batch_precision_at_k=args.batch_k - 1)
    dists3 = loss.cdist(endpoints['feature3'],
                        endpoints['feature3'],
                        metric=args.metric)
    losses3, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss](
        dists3, pids, args.margin, batch_precision_at_k=args.batch_k - 1)
    dists4 = loss.cdist(endpoints['feature4'],
                        endpoints['feature4'],
                        metric=args.metric)
    losses4, _, _, _, _, _ = loss.LOSS_CHOICES[args.loss](
        dists4, pids, args.margin, batch_precision_at_k=args.batch_k - 1)
    dists_fu = loss.cdist(endpoints['fusion_layer'],
                          endpoints['fusion_layer'],
                          metric=args.metric)
    losses_fu, train_top1, prec_at_k, _, neg_dists, pos_dists = loss.LOSS_CHOICES[
        args.loss](dists_fu,
                   pids,
                   args.margin,
                   batch_precision_at_k=args.batch_k - 1)

    losses = losses1 + losses2 + losses3 + losses4 + losses_fu

    # losses, train_top1, prec_at_k, _, neg_dists, pos_dists = loss_m.LOSS_CHOICES[args.loss](
    #     endpoints, pids, args.margin, args.model_name, batch_precision_at_k=args.batch_k - 1, metric =args.metric
    # )

    # Count the number of active entries, and compute the total batch loss.
    num_active = tf.reduce_sum(tf.cast(tf.greater(losses, 1e-5), tf.float32))

    # 此处losses即为 pospair 比 negpair+margin 还大的部分
    loss_mean = tf.reduce_mean(losses)

    # Some logging for tensorboard.
    tf.summary.histogram('loss_distribution', losses)
    tf.summary.scalar('loss', loss_mean)
    tf.summary.scalar('batch_top1', train_top1)
    tf.summary.scalar('batch_prec_at_{}'.format(args.batch_k - 1), prec_at_k)
    tf.summary.scalar('active_count', num_active)
    #tf.summary.histogram('embedding_dists', dists)
    tf.summary.histogram('embedding_pos_dists', pos_dists)
    tf.summary.histogram('embedding_neg_dists', neg_dists)
    tf.summary.histogram('embedding_lengths',
                         tf.norm(endpoints['emb_raw'], axis=1))

    # Create the mem-mapped arrays in which we'll log all training detail in
    # addition to tensorboard, because tensorboard is annoying for detailed
    # inspection and actually discards data in histogram summaries.
    if args.detailed_logs:
        log_embs = lb.create_or_resize_dat(
            os.path.join(args.experiment_root, 'embeddings'),
            dtype=np.float32,
            shape=(args.train_iterations, batch_size, args.embedding_dim))
        log_loss = lb.create_or_resize_dat(
            os.path.join(args.experiment_root, 'losses'),
            dtype=np.float32,
            shape=(args.train_iterations, batch_size))
        log_fids = lb.create_or_resize_dat(
            os.path.join(args.experiment_root, 'fids'),
            dtype='S' + str(max_fid_len),
            shape=(args.train_iterations, batch_size))

    # These are collected here before we add the optimizer, because depending
    # on the optimizer, it might add extra slots, which are also global
    # variables, with the exact same prefix.
    model_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                        body_prefix)

    # Define the optimizer and the learning-rate schedule.
    # Unfortunately, we get NaNs if we don't handle no-decay separately.
    global_step = tf.Variable(
        0, name='global_step',
        trainable=False)  # 'global_step' means the number of batches seen
    #  by graph
    if 0 <= args.decay_start_iteration < args.train_iterations:
        learning_rate = tf.train.exponential_decay(
            args.learning_rate,
            tf.maximum(0, global_step - args.decay_start_iteration
                       ),  # decay every 'lr_decay_steps' after the
            # 'decay_start_iteration'
            # args.train_iterations - args.decay_start_iteration, args.weight_decay_factor)
            args.lr_decay_steps,
            args.lr_decay_factor,
            staircase=True)
    else:
        learning_rate = args.learning_rate  # the case when we set 'decay_start_iteration' as -1
    tf.summary.scalar('learning_rate', learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate, epsilon=1e-3)
    # Feel free to try others!
    # optimizer = tf.train.AdadeltaOptimizer(learning_rate)

    # Update_ops are used to update batchnorm stats.
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_op = optimizer.minimize(loss_mean, global_step=global_step)

    # Define a saver for the complete model.
    checkpoint_saver = tf.train.Saver(max_to_keep=0)

    with tf.Session(config=config) as sess:
        if args.resume:
            # In case we're resuming, simply load the full checkpoint to init.
            last_checkpoint = tf.train.latest_checkpoint(args.experiment_root)
            log.info('Restoring from checkpoint: {}'.format(last_checkpoint))
            checkpoint_saver.restore(sess, last_checkpoint)
        else:
            # But if we're starting from scratch, we may need to load some
            # variables from the pre-trained weights, and random init others.
            sess.run(tf.global_variables_initializer())
            if args.initial_checkpoint is not None:
                saver = tf.train.Saver(model_variables)
                saver.restore(
                    sess, args.initial_checkpoint
                )  # restore the pre-trained parameter from online model

            # In any case, we also store this initialization as a checkpoint,
            # such that we could run exactly re-producable experiments.
            checkpoint_saver.save(sess,
                                  os.path.join(args.experiment_root,
                                               'checkpoint'),
                                  global_step=0)

        merged_summary = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(args.experiment_root,
                                               sess.graph)

        start_step = sess.run(global_step)
        log.info('Starting training from iteration {}.'.format(start_step))

        # Finally, here comes the main-loop. This `Uninterrupt` is a handy
        # utility such that an iteration still finishes on Ctrl+C and we can
        # stop the training cleanly.
        with lb.Uninterrupt(sigs=[SIGINT, SIGTERM], verbose=True) as u:
            for i in range(start_step, args.train_iterations):

                # Compute gradients, update weights, store logs!
                start_time = time.time()
                _, summary, step, b_prec_at_k, b_embs, b_loss, b_fids = \
                    sess.run([train_op, merged_summary, global_step,
                              prec_at_k, endpoints['emb'], losses, fids])
                elapsed_time = time.time() - start_time

                # Compute the iteration speed and add it to the summary.
                # We did observe some weird spikes that we couldn't track down.
                summary2 = tf.Summary()
                summary2.value.add(tag='secs_per_iter',
                                   simple_value=elapsed_time)
                summary_writer.add_summary(summary2, step)
                summary_writer.add_summary(summary, step)

                if args.detailed_logs:
                    log_embs[i], log_loss[i], log_fids[
                        i] = b_embs, b_loss, b_fids

                # Do a huge print out of the current progress.
                seconds_todo = (args.train_iterations - step) * elapsed_time
                log.info(
                    'iter:{:6d}, loss min|avg|max: {:.3f}|{:.3f}|{:6.3f}, '
                    'batch-p@{}: {:.2%}, ETA: {} ({:.2f}s/it)'.format(
                        step, float(np.min(b_loss)), float(np.mean(b_loss)),
                        float(np.max(b_loss)), args.batch_k - 1,
                        float(b_prec_at_k),
                        timedelta(seconds=int(seconds_todo)), elapsed_time))
                sys.stdout.flush()
                sys.stderr.flush()

                # Save a checkpoint of training every so often.
                if (args.checkpoint_frequency > 0
                        and step % args.checkpoint_frequency == 0):
                    checkpoint_saver.save(sess,
                                          os.path.join(args.experiment_root,
                                                       'checkpoint'),
                                          global_step=step)

                # Stop the main-loop at the end of the step, if requested.
                if u.interrupted:
                    log.info("Interrupted on request!")
                    break

        # Store one final checkpoint. This might be redundant, but it is crucial
        # in case intermediate storing was disabled and it saves a checkpoint
        # when the process was interrupted.
        checkpoint_saver.save(sess,
                              os.path.join(args.experiment_root, 'checkpoint'),
                              global_step=step)
示例#38
0
            train_nll = tf.reduce_mean([env[0][0], env[1][0], env[2][0]])
            train_accuracy = tf.reduce_mean([env[0][1], env[1][1], env[2][1]])
            train_penalty = tf.reduce_mean([env[0][2], env[1][2], env[2][2]])

            test_accuracy = env[3][1]

            train_loss(train_nll)
            train_acc(train_accuracy)
            test_acc(test_accuracy)

            tape_src.watch(train_nll)


            weight_norm = tf.zeros(1,1)
            for w in model.trainable_variables:
                weight_norm += tf.norm(w)**2

            loss = train_nll
            loss += flags.l2_regularizer_weight * weight_norm
            penalty_weight = (flags.penalty_weight 
                if step >= flags.penalty_anneal_iters else 0.01)
            loss += penalty_weight * train_penalty
            if penalty_weight > 1.0:
                # Rescale the entire loss to keep gradients in a reasonable range
                loss /= penalty_weight
            # update weights of classifier
            grads = tape_src.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
        
    if step % 1 == 0:    
        pretty_print('epoch', 'train nll', 'train acc', 'test acc')
示例#39
0
    def radial_flow_modified(self,
                             z,
                             flow_params,
                             num_flows,
                             n_latent_dim,
                             invert_condition=True):
        """
        Created on 12-Aug-2017
        """
        z0s, alphas, betas = flow_params
        print "z0s shape:", z0s.get_shape()
        print "alphas shape:", alphas.get_shape()
        print "betas shape:", betas.get_shape()

        log_detjs = []
        if num_flows == 0:
            # f_z = z
            sum_logdet_jacobian = tf.Variable(0.0, dtype=tf.float32)
        else:
            for k in range(num_flows):
                # z0, alpha, beta = z0s[:, k*Z:(k+1)*Z], alphas[:, k*Z:(k+1)*Z], betas[:, k]
                z0, alpha, beta = z0s[:, k * n_latent_dim:(k + 1) * n_latent_dim], \
                                  alphas[:, k], betas[:, k]
                print "z0 shape", z0.get_shape()
                print "alpha shape", alpha.get_shape()
                print "beta shape", beta.get_shape()
                if invert_condition:
                    # m(x)= log(1 + exp(x)) where x= w'*u. Last equation in A.2 Radial Flows.
                    m_of_beta = self.softplus(beta)
                    print "m_of_beta", m_of_beta.get_shape()
                    print "alpha", alpha.get_shape()
                    beta_hat = -alpha + m_of_beta  # It's a scalar.
                    print "beta_hat", beta_hat.get_shape()
                else:
                    beta_hat = beta
                    print "beta_hat", beta_hat.get_shape()

                # beta_hat = tf.expand_dims(beta_hat,1)
                # Distance of each data point from z0

                # dist = (z - z0) ** 2
                # dist = tf.reduce_sum(dist, 1)
                # r = tf.sqrt(dist)

                r = tf.norm((z - z0), ord='euclidean', axis=1)

                # r= tf.sqrt(np.sum(((self.z-self.z0)**2),1))
                # m_of_beta = self.softplus(self.beta) # m(x)= log(1 + exp(x)) where x= w'*u. Last equation in A.2 Radial Flows.
                # beta_hat = -self.alpha + m_of_beta # It's a scalar.

                h_alpha_r = self.get_h(
                    r, alpha)  # Argument of h(.) in equation 14. (1000000,)
                print "beta_hat", beta_hat.get_shape()
                beta_h_alpha_r = beta_hat * h_alpha_r
                print "beta_h_alpha_r", beta_h_alpha_r.get_shape()
                # fz = self.z + beta_hat * tf.mul(tf.transpose(tf.expand_dims(h_alpha_r, 1)),
                #                                            (self.z-self.z0))
                # print "h_alpha_r shape", tf.expand_dims(h_alpha_r,1).get_shape()
                # z = z + beta_hat * tf.multiply((z-z0), h_alpha_r)
                # z = z + tf.multiply(tf.multiply((z-z0), h_alpha_r), beta_hat)
                # z = z + tf.multiply(tf.multiply((z - z0), tf.expand_dims(h_alpha_r, 1)), tf.expand_dims(beta_hat, 1))
                z = z + tf.multiply(
                    (z - z0), tf.expand_dims(beta_h_alpha_r, 1))
                # print "z shape", z.get_shape()
                # Calculation of log det jacobian

                h_derivative_alpha_r = self.get_derivative_h(r, alpha)
                beta_h_derivative_alpha_r = beta_hat * h_derivative_alpha_r
                # logdet_jacobian = tf.log(1e-6 + tf.multiply(((1 + beta_h_alpha_r) ** (n_latent_dim - 1)),
                #                          (1 + h_derivative_alpha_r * r + beta_h_alpha_r)))
                logdet_jacobian = tf.log(
                    1e-6 + ((1.0 + beta_h_alpha_r)**(n_latent_dim - 1)) *
                    (1.0 + beta_h_alpha_r + beta_h_derivative_alpha_r * r))

                log_detjs.append(tf.expand_dims(logdet_jacobian, 1))
            logdet_jacobian = tf.concat(log_detjs[0:num_flows + 1], axis=1)
            sum_logdet_jacobian = tf.reduce_sum(logdet_jacobian, axis=1)
        return z, sum_logdet_jacobian
示例#40
0
def unit(vector):
    return tf.convert_to_tensor(vector) / tf.norm(vector)
示例#41
0
    def model_fn(self, features, labels, mode, params, config):
        image = features['image']
        batch_size = tf.shape(image)[0]

        training = (mode == tf.estimator.ModeKeys.TRAIN)

        # Generator
        self._generator = layers.Segment(self.generative_network(params),
                                         name="generator")

        code = tf.random_uniform(shape=(batch_size, ) + self._code_shape,
                                 minval=-1.,
                                 maxval=1.,
                                 dtype=tf.float32)

        synthetic = tf.nn.sigmoid(
            self._generator.apply(code, training=training))
        synthetic_ng = tf.stop_gradient(synthetic)

        epsilon = tf.random_uniform(shape=(),
                                    minval=0,
                                    maxval=1.,
                                    dtype=tf.float32)
        synthmix = epsilon * image + (1 - epsilon) * synthetic_ng

        # Critic
        self._critic = layers.Segment(self.critic_network(params),
                                      name="critic")

        f_synth = self._critic.apply(synthetic, training=training)
        f_synth_ng = self._critic.apply(synthetic_ng, training=training)
        f_data = self._critic.apply(image, training=training)

        f_mix = self._critic.apply(synthmix, training=training)
        f_grad = tf.gradients(f_mix, synthmix)

        # Autoencoder
        if self._autoencoder:
            self._encoder = layers.Segment(self.encoder_network(params),
                                           name="encoder")

            code_ae = self._encoder.apply(synthetic, training=training)

        # Losses
        loss_wgan = tf.reduce_mean(f_data - f_synth)

        loss_ae = tf.constant(0, dtype=tf.float32)
        if self._autoencoder:
            loss_ae = tf.nn.l2_loss(code - code_ae) / tf.cast(batch_size,
                                                              dtype=tf.float32)

        loss_crit = -tf.reduce_mean(f_data - f_synth_ng)

        loss_lip = tf.square(tf.norm(f_grad, ord=2) - 1)

        # loss_lip = sum([tf.square(tf.nn.relu(tf.nn.l2_loss(w) - 2))
        #                 for l in self._critic.layers for w in l.variables])

        alpha = tf.exp(-1 * tf.stop_gradient(loss_lip))
        loss = alpha * (0.2 * loss_wgan + loss_crit) + 10 * loss_lip

        loss += sum([l for l in self._generator.losses])
        loss += sum([l for l in self._classifier.losses])
        if self._encoder:
            loss += sum([l for l in self._encoder.losses])

        if mode == tf.estimator.ModeKeys.PREDICT:
            return self.prediction_estimator_spec(image, code, synthetic,
                                                  params, config)

        tf.summary.scalar('loss/wgan', loss_wgan)
        tf.summary.scalar('loss/lip', loss_lip)
        tf.summary.scalar('loss/ae', loss_ae)

        # Configure the Training Op (for TRAIN mode)
        if mode == tf.estimator.ModeKeys.TRAIN:
            return self.training_estimator_spec(loss, image, code, synthetic,
                                                params, config)

        else:
            return self.evaluation_estimator_spec(loss, image, code, synthetic,
                                                  params, config)
 def Loss(self,Newvar_D, Newvar_lbda):
   arg=Newvar_D, Newvar_lbda
   p =  self.wass_grad(arg)
   p=tf.math.exp(p)
   #p=p/tf.math.reduce_sum(p)
   return tf.norm( self.datapoint-p, ord='euclidean')**2*1/2*1000 #tf.math.reduce_sum(p*tf.math.log(p/self.datapoint - p + self.datapoint))#tf.math.reduce_sum((self.datapoint-p)**2)*1/2
def accuracy(output,label):
  return tf.norm(output - label)/tf.norm(label)
示例#44
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 num_labels,
                 embed,
                 learning_rate=0.005,
                 max_gradient_norm=5.0,
                 param_da=150,
                 param_r=10,
                 model_type=0,
                 attention=True):

        self.texts = tf.placeholder(tf.string, (None, None),
                                    'texts')  # shape: [batch, length]

        #todo: implement placeholders
        self.texts_length = tf.placeholder(tf.int32, (None, ),
                                           'texts_length')  # shape: [batch]
        self.labels = tf.placeholder(tf.int64, (None, ),
                                     'labels')  # shape: [batch]

        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             name="in_table",
                                             checkpoint=True)

        batch_size = tf.shape(self.texts)[0]
        # build the vocab table (string to index)
        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.global_step = tf.Variable(0, trainable=False)
        self.index_input = self.symbol2index.lookup(
            self.texts)  # shape: [batch, length]

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        # todo: implement embedding inputs
        self.embed_input = tf.nn.embedding_lookup(
            self.embed,
            self.index_input)  #shape: [batch, length, num_embed_units]

        # todo: implement 3 RNNCells (BasicRNNCell, GRUCell, BasicLSTMCell) in a multi-layer setting with #num_units neurons and #num_layers layers
        if model_type == 2:
            rnn_model = GRUCell
        elif model_type == 1:
            rnn_model = BasicLSTMCell
        else:
            rnn_model = BasicRNNCell
        cell_fw = MultiRNNCell(
            [rnn_model(num_units) for i in range(num_layers)])
        cell_bw = MultiRNNCell(
            [rnn_model(num_units) for i in range(num_layers)])

        # todo: implement bidirectional RNN
        outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw,
                                                          cell_bw,
                                                          self.embed_input,
                                                          self.texts_length,
                                                          dtype=tf.float32,
                                                          scope="rnn")
        H = tf.concat(outputs, 2)  # shape: (batch, length, 2*num_units)

        with tf.variable_scope('logits'):
            if attention:
                # todo: implement self-attention mechanism, feel free to add codes to calculate temporary results
                Ws1 = tf.get_variable("Ws1", [param_da, 2 * num_units])
                Ws2 = tf.get_variable("Ws2", [param_r, param_da])

                A = tf.nn.softmax(
                    tf.matmul(Ws2,
                              tf.nn.tanh(tf.matmul(Ws1, H, transpose_b=True))))
                M = tf.matmul(A, H)  # shape: [batch, param_r, 2*num_units]
                flatten_M = tf.reshape(
                    M, shape=[batch_size, param_r * 2 * num_units
                              ])  # shape: [batch, param_r*2*num_units]

                logits = tf.layers.dense(
                    flatten_M, num_labels, activation=None,
                    name='projection')  # shape: [batch, num_labels]
            else:
                M = tf.reduce_mean(H, axis=1)
                flatten_H = tf.reshape(M, shape=[batch_size, 2 * num_units])
                logits = tf.layers.dense(flatten_H,
                                         num_labels,
                                         activation=None,
                                         name='projection')

        # todo: calculate additional loss, feel free to add codes to calculate temporary results
        identity = tf.reshape(
            tf.tile(tf.diag(tf.ones([param_r])), [batch_size, 1]),
            [batch_size, param_r, param_r])
        if attention:
            self.penalized_term = tf.norm(
                tf.matmul(A, A, transpose_b=True) - identity)
            self.loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.labels, logits=logits),
                name='loss') + 0.0001 * self.penalized_term
        else:
            self.loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=self.labels, logits=logits),
                name='loss')
        predict_labels = tf.argmax(logits, 1, 'predict_labels')
        self.accuracy = tf.reduce_sum(tf.cast(
            tf.equal(self.labels, predict_labels), tf.int32),
                                      name='accuracy')

        self.params = tf.trainable_variables()

        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(self.loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=6,
                                    pad_step_number=True)
distance1 = tf.reduce_mean(
    tf.sqrt(
        tf.reduce_sum(tf.pow(tf.subtract(fc_featsx, fc_featsy1), 2),
                      1,
                      keep_dims=True)))
distance2 = tf.reduce_mean(
    tf.sqrt(
        tf.reduce_sum(tf.pow(tf.subtract(fc_featsx, fc_featsy2), 2),
                      1,
                      keep_dims=True)))
loss0 = tf.maximum(distance1 - distance2 + alpha, 0)

#loss1 = tf.reduce_mean(tf.norm(fc_featsx))
#loss2 = tf.reduce_mean(tf.norm(fc_featsy1)) + tf.reduce_mean(tf.norm(fc_featsy2))
loss3a = tf.reduce_mean(tf.norm(tf.subtract(fc_featsx, fc_featsy1)))
#loss3b = tf.reduce_mean(tf.norm(tf.subtract(fc_featsx,fc_featsy2)))
loss3 = loss3a
loss4 = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=predx, labels=ly1))
loss5 = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=predy1, labels=ly1))
#loss6 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predy2, labels=ly2))
#loss4 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pox,labels=lx))
#loss5 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=poy1, labels=ly1))
#loss6 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=poy2, labels=ly2))
#loss6 =  tf.norm(tf.subtract(tf.norm(weightsx['wd1x']),1))
#loss7 =  tf.norm(tf.subtract(tf.norm(weightsy['wd1y']),1))
#loss8 = tf.norm(tf.subtract(tf.norm(weightsz['wd1z']),1))
loss9 = tf.reduce_mean(tf.norm(tf.subtract(x, duy1))) + tf.reduce_mean(
    tf.norm(tf.subtract(y1, dux)))
示例#46
0
def test_nets_and_update(env, config):
    tf.reset_default_graph()
    model = DQNquantie(env, config)

    # inject test data
    s = tf.ones([1, 80, 80, 4], dtype=tf.float32)
    sp = tf.ones([1, 80, 80, 4], dtype=tf.float32)

    # create q_test and target_q_test
    q_test = model.get_q_values_op(s, scope="q_test", reuse=False)
    target_q_test = model.get_q_values_op(
        sp, scope="target_q_test", reuse=False)

    # create update_op

    q_test_var_lst = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES,
        "q_test")
    target_q_test_var_lst = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES,
        "target_q_test")
    update_target_op = model.add_update_target_op("q_test", "target_q_test")

    assert len(q_test_var_lst) == len(target_q_test_var_lst), \
        "number of variables in q and target_q differ"

    # main logic of the test
    sess = tf.Session()
    init = tf.global_variables_initializer()
    sess.run(init)
    distance_before_lst = []

    # check difference before the update
    # NOTE: checking difference by checking the
    # Euclidean distance
    for idx in range(len(target_q_test_var_lst)):
        # skip bias, since they are intialized with 0's
        if 'bias' in q_test_var_lst[idx].name:
            continue
        distance_np = (sess.run(tf.norm(
            q_test_var_lst[idx] -
            target_q_test_var_lst[idx]
        )))
        distance_before_lst.append(distance_np)

    assert np.mean(distance_before_lst) != 0., \
        'q and taget_q initialized with the same weights'

    # perform update
    sess.run(update_target_op)

    # check difference after the update
    distance_after_lst = []
    for idx in range(len(target_q_test_var_lst)):
        # skip bias, since they are intialized with 0's
        if 'bias' in q_test_var_lst[idx].name:
            continue
        distance_np = (sess.run(tf.norm(
            q_test_var_lst[idx] -
            target_q_test_var_lst[idx]
        )))
        distance_after_lst.append(distance_np)
    assert np.mean(distance_after_lst) == 0., \
        'network creation and update test failed'

    print(" -- network creation and update test passed")
示例#47
0
def my_segcap(images, is_train, size, l2_reg):
    is_training = True
    start_s = 2
    atom = 16
    routing = 3
    end_points = OrderedDict()

    # 1  (128 -> 128)
    L1_conv1 = conv(images,
                    filters=atom,
                    kernel_size=[1, 1],
                    l2_reg_scale=l2_reg,
                    batchnorm_istraining=is_training)
    conv_prime = tf.expand_dims(L1_conv1, axis=3)  # [N, H, W, t=1, z]

    # 1/2  (128 -> 64)
    multiple = 1
    L2_cap1_1 = residual_cap_block(conv_prime, routing=routing)
    L3_cap1_2 = capsule(L2_cap1_1,
                        "conv",
                        k=3,
                        s=2,
                        t=start_s * multiple,
                        z=atom,
                        routing=routing)
    skip1 = L2_cap1_1

    # 1/4  (64 -> 32)
    multiple = 2
    L4_cap2_1 = residual_cap_block(L3_cap1_2, routing=routing)
    L5_cap2_2 = capsule(L4_cap2_1,
                        "conv",
                        k=3,
                        s=2,
                        t=start_s * multiple,
                        z=atom,
                        routing=routing)
    skip2 = L4_cap2_1

    #middle  (16 -> 16)
    L6_cap_m_1 = residual_cap_block(L5_cap2_2, routing=routing)
    L7_cap_m_2 = residual_cap_block(L6_cap_m_1, routing=routing)

    # 1/4  (32 -> 64)
    multiple = 2
    L8_u_cap2_1 = capsule(L7_cap_m_2,
                          "deconv",
                          k=3,
                          s=2,
                          t=start_s * multiple,
                          z=atom,
                          routing=routing)
    u_cap_concat_2 = tf.concat([L8_u_cap2_1, skip2], axis=3)
    L9_u_cap2_3 = residual_cap_block(u_cap_concat_2, routing=routing)

    # 1/2  (64 -> 128)
    multiple = 1
    L10_u_cap3_1 = capsule(L9_u_cap2_3,
                           "deconv",
                           k=3,
                           s=2,
                           t=start_s * multiple,
                           z=atom,
                           routing=routing)
    u_cap_concat_3 = tf.concat([L10_u_cap3_1, skip1], axis=3)
    L11_u_cap3_2 = capsule(u_cap_concat_3,
                           "conv",
                           k=3,
                           s=1,
                           t=start_s * multiple,
                           z=atom * 4,
                           routing=routing)
    L12_u_cap3_3 = residual_cap_block(L11_u_cap3_2, routing=routing)
    L13_u_cap3_4 = residual_cap_block(L12_u_cap3_3, routing=routing)
    L14_u_cap3_5 = capsule(L13_u_cap3_4,
                           "conv",
                           k=3,
                           s=1,
                           t=1,
                           z=atom,
                           routing=routing)
    # L14_u_cap3_5_l_list =tf.split(L14_u_cap3_5,num_or_size_splits=atom,axis=4)
    # L14_u_cap3_5_l_add = tf.add_n(L14_u_cap3_5_l_list)
    # predict = tf.squeeze(L14_u_cap3_5_l_add, axis=[4])

    #  tf.norm默认为Frobenius范数,简称F - 范数,是一种矩阵范数,记为 | |· | | F。
    #  矩阵A的Frobenius范数定义为矩阵A各项元素的绝对值平方的总和
    predict = tf.norm(L14_u_cap3_5, axis=-1)
    predict = bn(predict, is_training)
    # tf.squeeze()

    # 1  (128 -> 128)
    # u_cap_concat_4=cap_out_1
    # [N, H_1, W_1, t_1, z_1] =u_cap_concat_4.get_shape()
    # u_cap_concat_4 = tf.reshape(u_cap_concat_4, [N, H_1, W_1, 1,t_1* z_1])

    #普通输出层
    # cap_out_4 = tf.squeeze(u_cap_concat_4, axis=3)
    # cap_out_7 =conv(cap_out_4, filters=24, kernel_size=[1,1],l2_reg_scale=l2_reg, batchnorm_istraining=is_training)
    # cap_out_8 =conv(cap_out_7, filters=1, kernel_size=[1,1],l2_reg_scale=l2_reg, batchnorm_istraining=is_training)
    # cap_out_9 = bn(cap_out_8, is_training)

    ################   end_points  ##########################
    #用于输出可视化中间层
    end_points['L1_conv1'] = L1_conv1  #Layer 1
    end_points['L2_cap1_1'] = L2_cap1_1  #Layer 2  skip1
    end_points['L3_cap1_2'] = L3_cap1_2  #Layer 3
    end_points['L4_cap2_1'] = L4_cap2_1  #Layer 4  skip2
    end_points['L5_cap2_2'] = L5_cap2_2  #Layer 5
    end_points['L6_cap_m_1'] = L6_cap_m_1  #Layer 6
    end_points['L7_cap_m_2'] = L7_cap_m_2  #Layer 7
    end_points['L8_u_cap2_1'] = L8_u_cap2_1  #Layer 8  skip2
    end_points['L9_u_cap2_3'] = L9_u_cap2_3  #Layer 9
    end_points['L10_u_cap3_1'] = L10_u_cap3_1  #Layer 10  skip1
    end_points['L11_u_cap3_2'] = L11_u_cap3_2  #Layer 11
    end_points['L12_u_cap3_3'] = L12_u_cap3_3  #Layer 12
    end_points['L13_u_cap3_4'] = L13_u_cap3_4  #Layer 13
    end_points['L14_u_cap3_5'] = L14_u_cap3_5  #Layer 14
    end_points['predict'] = predict  #Layer 15
    ################     end       ###########################

    return predict, end_points
示例#48
0
def robust_norm(x):
    x = x + 1e-8
    a = tf.reduce_max(tf.abs(x), axis=2, keep_dims=True)
    return tf.squeeze(a, [2]) * tf.norm(x / a, axis=2)
示例#49
0
    def test_NN(self, net, record_path=None, save_name=None):
        if record_path is not None:
            folderpath = record_path
            record_path = record_path + "rel_errs2.csv"
            if os.path.exists(record_path):
                pass
            else:
                with open(record_path, mode='w') as record:
                    fields = [
                        'Problem', 'Net_struct', 'Net_setup', 'Sample', 'L',
                        'relative_err', 'save_name'
                    ]
                    record_writer = csv.writer(record,
                                               delimiter=',',
                                               quotechar='"',
                                               quoting=csv.QUOTE_MINIMAL)
                    record_writer.writerow(fields)
        X0_dict, u_test = self.u_exact_test()
        x_tf = X0_dict["x_tf"]
        y_tf = X0_dict["y_tf"]
        t_tf = X0_dict["t_tf"]
        xi_tf = X0_dict["xi_tf"]
        target_f = tf.zeros([self.N * self.N_p_test, 1])
        if self.sampling_method == 3:
            net.h_init = tf.constant(self.h_init, dtype=tf.float32)
        u_test_p = net.forward(x_tf, y_tf, t_tf, xi_tf)
        f_res = net.compute_residual(x_tf, y_tf, t_tf, xi_tf, target_f)
        if self.sampling_method == 0:
            u_test_p = u_test_p.numpy()
            self.V = np.load(self.path_env + "V_{}.npy".format(self.L))
            u_test_p = u_test_p @ self.V.T
            u_test_p_grid = tf.constant(u_test_p, dtype=tf.float32)
            u_test_grid = tf.constant(u_test.T, dtype=tf.float32)

        elif self.sampling_method == 1 or self.sampling_method == 2:
            N_record = [self.Nf, self.Nb, self.Nn, self.N0]
            u_test_grid = tf.reshape(u_test, (self.N_p_test, self.N))
            u_test_p_grid = tf.reshape(u_test_p, (self.N_p_test, self.N))
            f_res_grid = tf.reshape(f_res, (self.N_p_test, self.N))

        if self.sampling_method == 3:
            u_test_p = u_test_p.numpy()
            u_test_p_grid = tf.constant(u_test_p, dtype=tf.float32)
            u_test_grid = tf.constant(u_test.T, dtype=tf.float32)
            f_res_grid = None

        err_grid = u_test_grid - u_test_p_grid
        err_test = tf.math.reduce_mean(tf.square(err_grid))

        relative_err_vec = tf.norm(err_grid, axis=1) / tf.norm(u_test_grid,
                                                               axis=1)
        rel_err_test = tf.reduce_mean(relative_err_vec)
        if record_path is not None:
            # y_tf = tf.constant((),shape = (len(self.x),0),dtype = tf.float32)
            # t_tf = tf.constant((),shape = (len(self.x),0),dtype = tf.float32)
            # x_tf = tf.constant(self.x.reshape((len(self.x),1)),dtype = tf.float32)
            # xi_tf = tf.constant(1e-4*np.ones((len(self.x),1)),dtype = tf.float32)
            # u_test_p = net.forward(x_tf, y_tf, t_tf, xi_tf)
            list_info = [
                self.name, net.name, net.layers, N_record, self.L,
                rel_err_test.numpy(), save_name
            ]
            # scipy.io.savemat(folderpath+"/{0}.mat".format(N_record), {'approx':u_test_p.numpy()})
            with open(record_path, 'a') as f:
                writer = csv.writer(f)
                writer.writerow(list_info)
        print("Test average error is: {0}\nRelative error is: {1}".format(
            err_test.numpy(), rel_err_test.numpy()))

        return u_test_grid, u_test_p_grid, err_test, rel_err_test, f_res_grid
示例#50
0
    def __init__(self,
                 name,
                 inputs,
                 targets,
                 n_classes,
                 n_features,
                 tower_setup,
                 imgs_raw=None,
                 original_labels=None,
                 activation="linear",
                 dropout=0.0,
                 batch_norm=False,
                 batch_norm_decay=BATCH_NORM_DECAY_DEFAULT,
                 l2=L2_DEFAULT,
                 negative_weighting_factor=1):
        super(FullyConnectedWithTripletLoss, self).__init__()
        self.measures = {}
        inp, n_features_inp = prepare_collapsed_input_and_dropout(
            inputs, dropout)
        with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
            if batch_norm:
                inp = tf.expand_dims(inp, axis=0)
                inp = tf.expand_dims(inp, axis=0)
                inp = self.create_and_apply_batch_norm(inp, n_features_inp,
                                                       batch_norm_decay,
                                                       tower_setup)
                inp = tf.squeeze(inp, axis=[0, 1])
            W = self.create_weight_variable("W", [n_features_inp, n_features],
                                            l2, tower_setup)
            b = self.create_bias_variable("b", [n_features], tower_setup)
            z = tf.matmul(inp, W) + b
            h = get_activation(activation)(z)
            self.outputs = [h]

            if original_labels is not None:
                self.measures[Constants.EMBEDDING] = [h]
                self.measures[Constants.ORIGINAL_LABELS] = [original_labels]

            self.add_scalar_summary(tf.norm(h[0]), "embedding_norm")
            self.summaries.append(tf.summary.histogram("embedding", h))

            size = smart_shape(h)[0]
            eps = 1e-10

            # New print debug example
            def my_print(x, name):
                with tf.control_dependencies([
                        tf.assert_equal(
                            tf.reduce_all(tf.greater(tf.shape(x), 0)), True)
                ]):
                    if x.dtype in (tf.float32, tf.float64):
                        with tf.control_dependencies([
                                tf.assert_equal(tf.reduce_all(tf.is_finite(x)),
                                                True)
                        ]):
                            return tf.Print(x, [
                                tf.shape(x),
                                tf.reduce_all(tf.is_finite(x)), x
                            ],
                                            name,
                                            summarize=200)
                    else:
                        return tf.Print(x, [tf.shape(x), x], name)

            def get_loss(idx):
                anchor = h[idx, :]
                anchor_class = targets[idx]

                ###### New code ######
                class_division = tf.equal(targets, anchor_class)
                not_self_mask = tf.logical_not(
                    tf.cast(tf.one_hot(idx, depth=size), tf.bool))
                positive_output = tf.boolean_mask(
                    h, tf.logical_and(class_division, not_self_mask))
                negative_output = tf.boolean_mask(
                    h, tf.logical_not(class_division))
                # negative_output = tf.boolean_mask(h, tf.logical_and(tf.logical_not(class_division),not_self_mask))
                # positive_output = my_print(positive_output,"positive_output")
                # negative_output = my_print(negative_output, "negative_output")

                positive_distances = tf.abs(anchor - positive_output)
                pos_dis_val = tf.norm(positive_distances + eps, axis=1)
                hardest_positive, hardest_positive_idx = tf.nn.top_k(
                    pos_dis_val, 1)

                negative_distances = tf.abs(anchor - negative_output)
                neg_dis_val = tf.norm(negative_distances + eps, axis=1)
                minus_neg_dis_val = tf.negative(neg_dis_val)
                # minus_neg_dis_val = tf.Print(minus_neg_dis_val,[minus_neg_dis_val])
                # minus_neg_dis_val = tf.Print(minus_neg_dis_val, [minus_neg_dis_val.shape])
                minus_hardest_negative, hardest_negative_idx = tf.nn.top_k(
                    minus_neg_dis_val, 1)
                hardest_negative = tf.negative(minus_hardest_negative)

                # minus_hardest_negative, hardest_negative_idx = tf.nn.top_k(minus_neg_dis_val, negative_weighting_factor)
                # hardest_negative = tf.negative(minus_hardest_negative)
                # hardest_negative = tf.reduce_sum(hardest_negative,-1)

                ###### Old code with dynamic partition ######
                # class_division = tf.cast(tf.equal(targets, anchor_class), tf.int32)
                # not_self_mask = tf.logical_not(tf.cast(tf.one_hot(idx, depth=size), tf.bool))
                # partitioned_output = tf.dynamic_partition(h, class_division, 2)
                # positive_output = partitioned_output[1]
                # negative_output = partitioned_output[0]

                # class_division = tf.equal(targets, anchor_class)
                # not_self_mask = tf.logical_not(tf.cast(tf.one_hot(idx, depth=size),tf.bool))
                # positive_output = tf.boolean_mask(h, tf.logical_and(class_division, not_self_mask))
                # negative_output = tf.boolean_mask(h, tf.logical_not(class_division))
                #
                #
                # positive_distances = tf.abs(anchor - positive_output)
                # pos_dis_val = tf.norm(positive_distances+eps, axis=1)
                # hardest_positive_idx = tf.argmax(pos_dis_val,0)
                # pos_div_size = smart_shape(positive_output)[0]
                # pos_divider = tf.one_hot(hardest_positive_idx,pos_div_size,dtype=tf.int32)
                # hardest_positive = tf.dynamic_partition(positive_distances,pos_divider,2)[1]
                # hardest_positive_class = tf.gather(targets, hardest_positive_idx)
                # hardest_positive = tf.norm(hardest_positive+eps, axis=1)
                #
                # negative_distances = tf.abs(anchor - negative_output)
                # neg_dis_val = tf.norm(negative_distances+eps, axis=1)
                # hardest_negative_idx = tf.argmin(neg_dis_val,0)
                # neg_div_size = smart_shape(negative_output)[0]
                # neg_divider = tf.one_hot(hardest_negative_idx,neg_div_size,dtype=tf.int32)
                # hardest_negative = tf.dynamic_partition(negative_distances,neg_divider,2)[1]
                # hardest_negative_class = tf.gather(targets,hardest_negative_idx)
                # hardest_negative = tf.norm(hardest_negative+eps, axis=1)

                # hardest_positive = my_print(hardest_positive,"hardest_positive")
                # hardest_negative = my_print(hardest_negative,"hardest_negative")

                #### Next two lines should be the same
                loss = tf.nn.softplus(hardest_positive - hardest_negative)
                # loss = tf.nn.softplus(hardest_positive - negative_weighting_factor*hardest_negative)
                # loss = tf.log1p(tf.exp(hardest_positive - hardest_negative))

                #### Code for using a hard margin rather than a softmargin
                # margin = 1
                # loss = tf.maximum(0., margin + hardest_positive - hardest_negative)

                anchor_img = tf.zeros([], tf.float32)
                hard_pos_img = tf.zeros([], tf.float32)
                hard_neg_img = tf.zeros([], tf.float32)
                if imgs_raw is not None:
                    positive_images = tf.boolean_mask(
                        imgs_raw, tf.logical_and(class_division,
                                                 not_self_mask))
                    negative_images = tf.boolean_mask(
                        imgs_raw, tf.logical_not(class_division))
                    anchor_img = imgs_raw[idx]
                    hard_pos_img = positive_images[tf.squeeze(
                        hardest_positive_idx)]
                    hard_neg_img = negative_images[tf.squeeze(
                        hardest_negative_idx)]

                    # self.summaries.append(tf.summary.image("anchor_image", imgs_raw[idx]))
                    # positive_images = tf.squeeze(tf.boolean_mask(imgs_raw, tf.logical_and(class_division, not_self_mask)))
                    # negative_images = tf.squeeze(tf.boolean_mask(imgs_raw, tf.logical_not(class_division)))
                    # self.summaries.append(tf.summary.image("hardest_postive_image",positive_images[hardest_positive_idx]))
                    # self.summaries.append(tf.summary.image("hardest_negative_image", negative_images[hardest_negative_idx]))

                return loss, hardest_positive, hardest_negative, anchor_img, hard_pos_img, hard_neg_img

            #### Next two lines should be the same
            loss, hardest_positive, hardest_negative, anchor_imgs, hard_pos_imgs, hard_neg_imgs = \
              tf.map_fn(get_loss, tf.range(0, size), dtype=(tf.float32,tf.float32,tf.float32, tf.float32, tf.float32, tf.float32))
            # loss, hardest_positive, hardest_negative = [get_loss(idx) for idx in xrange(size)]

            self.loss = tf.reduce_sum(loss)
            hardest_positive = tf.reduce_sum(hardest_positive)
            hardest_negative = tf.reduce_sum(hardest_negative)
            self.add_scalar_summary(self.loss, "loss")
            self.add_scalar_summary(hardest_positive, "hardest_positive")
            self.add_scalar_summary(hardest_negative, "hardest_negative")
            # tf.summary.image()
            self.n_features = n_features

            if imgs_raw is not None:
                self.summaries.append(
                    tf.summary.image("anchor_image", anchor_imgs))
                self.summaries.append(
                    tf.summary.image("hardest_postive_image", hard_pos_imgs))
                self.summaries.append(
                    tf.summary.image("hardest_negative_image", hard_neg_imgs))
示例#51
0
def discriminative_loss_single(prediction, correct_label, feature_dim,
                               label_shape, delta_v, delta_d, param_var,
                               param_dist, param_reg):
    """
    论文equ(1)提到的实例分割损失函数
    :param prediction: inference of network
    :param correct_label: instance label
    :param feature_dim: feature dimension of prediction
    :param label_shape: shape of label
    :param delta_v: cutoff variance distance
    :param delta_d: curoff cluster distance
    :param param_var: weight for intra cluster variance
    :param param_dist: weight for inter cluster distances
    :param param_reg: weight regularization
    """

    # 像素对齐为一行
    correct_label = tf.reshape(correct_label,
                               [label_shape[1] * label_shape[0]])
    reshaped_pred = tf.reshape(prediction,
                               [label_shape[1] * label_shape[0], feature_dim])

    # 统计实例个数
    unique_labels, unique_id, counts = tf.unique_with_counts(correct_label)
    counts = tf.cast(counts, tf.float32)
    num_instances = tf.size(unique_labels)

    # 计算pixel embedding均值向量
    segmented_sum = tf.unsorted_segment_sum(reshaped_pred, unique_id,
                                            num_instances)
    mu = tf.div(segmented_sum, tf.reshape(counts, (-1, 1)))
    mu_expand = tf.gather(mu, unique_id)

    # 计算公式的loss(var)
    distance = tf.norm(tf.subtract(mu_expand, reshaped_pred), axis=1)
    distance = tf.subtract(distance, delta_v)
    distance = tf.clip_by_value(distance, 0., distance)
    distance = tf.square(distance)

    l_var = tf.unsorted_segment_sum(distance, unique_id, num_instances)
    l_var = tf.div(l_var, counts)
    l_var = tf.reduce_sum(l_var)
    l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32))

    # 计算公式的loss(dist)
    mu_interleaved_rep = tf.tile(mu, [num_instances, 1])
    mu_band_rep = tf.tile(mu, [1, num_instances])
    mu_band_rep = tf.reshape(mu_band_rep,
                             (num_instances * num_instances, feature_dim))

    mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep)

    # 去除掩模上的零点
    intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff), axis=1)
    zero_vector = tf.zeros(1, dtype=tf.float32)
    bool_mask = tf.not_equal(intermediate_tensor, zero_vector)
    mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask)

    mu_norm = tf.norm(mu_diff_bool, axis=1)
    mu_norm = tf.subtract(2. * delta_d, mu_norm)
    mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm)
    mu_norm = tf.square(mu_norm)

    l_dist = tf.reduce_mean(mu_norm)

    # 计算原始Discriminative Loss论文中提到的正则项损失
    l_reg = tf.reduce_mean(tf.norm(mu, axis=1))

    # 合并损失按照原始Discriminative Loss论文中提到的参数合并
    param_scale = 1.
    l_var = param_var * l_var
    l_dist = param_dist * l_dist
    l_reg = param_reg * l_reg

    loss = param_scale * (l_var + l_dist + l_reg)

    return loss, l_var, l_dist, l_reg
示例#52
0
def trainModelWithCSV(run_name,
                      layer_sizes,
                      training_file_path,
                      testing_file_path,
                      initial_learning_rate,
                      learning_rate_decay,
                      num_epochs,
                      batch_size,
                      regularization_parameter,
                      save_model=False):

    # Code to reset the tensorflow graph & make tensorflow release VRAM after it's done computing
    tf.reset_default_graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    #Importing the data from the specified paths. Can be changed to adjust for what you want to compute
    X_training, Y_training = readInputsAndOutputs(training_file_path)
    X_testing, Y_testing = readInputsAndOutputs(testing_file_path)

    #Calculating the number of batches based on the batchsize specified
    m = X_training.shape[0]
    if m // batch_size == m / batch_size:
        num_batches = m // batch_size
    else:
        num_batches = m // batch_size + 1

    #Dictionaries that are used to save intermediate values of the graph
    a = dict()
    z = dict()
    weights = dict()
    biases = dict()
    a_normalized = dict()

    #Defines the model according to the layer sizes specified
    for index, layer_size in enumerate(layer_sizes):
        if index == 0:
            #Creates placeholder for the X's. Adds that value to a dictionary for easier computing afterwards
            with tf.variable_scope('input'):
                X = tf.placeholder(dtype=tf.float32,
                                   shape=[None, layer_size],
                                   name='X')
                a_normalized['a_normalized0'] = X
        else:
            #Defines computations for each layer in the model
            with tf.variable_scope('layer' + str(index)):

                #Initializes weights using Xavier Initialization
                weights['w' + str(index)] = tf.get_variable(
                    name='weights' + str(index),
                    dtype=tf.float32,
                    shape=[layer_sizes[index - 1], layer_sizes[index]],
                    initializer=tf.contrib.layers.xavier_initializer())

                #Initializes biases to 0
                biases['b' + str(index)] = tf.get_variable(
                    name='biases' + str(index),
                    dtype=tf.float32,
                    shape=[layer_sizes[index]],
                    initializer=tf.zeros_initializer())

                #Computes the linear activation
                z['z' + str(index)] = tf.matmul(
                    a_normalized['a_normalized' + str(index - 1)],
                    weights['w' + str(index)]) + biases['b' + str(index)]

                #Computes the non-linear activation for all layers except for the last one
                if index != len(layer_sizes) - 1:
                    a['a' + str(index)] = tf.nn.relu(z['z' + str(index)])
                    a_normalized['a_normalized' +
                                 str(index)] = tf.layers.batch_normalization(
                                     inputs=a['a' + str(index)], axis=1)

                # Activation of the last layer. Can be changed according what you want to predict
                else:
                    outputs = tf.nn.softmax(logits=z['z' + str(index)])

    #Computes the sum of frobenius norm of all the weights matrixes
    weights_squarred_sum = 0
    for index in range(1, len(layer_sizes)):
        weights_squarred_sum += tf.norm(weights["w" + str(index)],
                                        ord='fro',
                                        axis=[-2, -1])

    #Defines the cost function. Change according to last layer's activation. Additional calculations for regularization
    with tf.variable_scope('cost'):
        Y = tf.placeholder(dtype=tf.float32,
                           shape=(None, layer_sizes[len(layer_sizes) - 1]),
                           name='Y')
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=z['z' + str(len(layer_sizes) - 1)], labels=Y) +
            regularization_parameter / (2 * m) * weights_squarred_sum)

    #Defines optimizer
    with tf.variable_scope('optimizer'):
        learning_rate = tf.placeholder(dtype=tf.float32, name='learning_rate')
        optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(cost)

    #Object used to log cost's across runs and epochs. Used later
    with tf.variable_scope('logging'):
        tf.summary.scalar(name='cost', tensor=cost)
        summary = tf.summary.merge_all()

    #If specified, saves model. Used later
    if save_model:
        saver = tf.train.Saver()

    #Starts a session
    with tf.Session(config=config) as session:

        #Initializes all the variables (weights and biases)
        session.run(tf.global_variables_initializer())

        #Objects used to write log files for training and testing costs
        training_writer = tf.summary.FileWriter(
            "./logs/" + run_name + "/training", session.graph)
        testing_writer = tf.summary.FileWriter(
            "./logs/" + run_name + "/testing", session.graph)

        #Training loop running according to the specified number of epochs
        for epoch in range(num_epochs):
            for batch in range(num_batches):

                #Selecting batch to run optimizer on
                X_training_batch = X_training[batch * batch_size:(batch + 1) *
                                              batch_size, :]
                Y_training_batch = Y_training[batch * batch_size:(batch + 1) *
                                              batch_size, :]

                #Runs one step of the Adam optimizer for every batch
                session.run(
                    [optimizer],
                    feed_dict={
                        X:
                        X_training_batch,
                        Y:
                        Y_training_batch,
                        learning_rate:
                        initial_learning_rate /
                        (1 + learning_rate_decay * epoch)
                    })

            #Logs training and testing costs every 5 epochs
            if epoch % 5 == 0:
                training_cost, training_summary = session.run([cost, summary],
                                                              feed_dict={
                                                                  X:
                                                                  X_training,
                                                                  Y: Y_training
                                                              })
                testing_cost, testing_summary = session.run([cost, summary],
                                                            feed_dict={
                                                                X: X_testing,
                                                                Y: Y_testing
                                                            })
                print("Epoch #" + str(epoch) + ": training cost= " +
                      str(training_cost) + " testing cost= " +
                      str(testing_cost))
                training_writer.add_summary(training_summary, epoch)
                testing_writer.add_summary(testing_summary, epoch)

        #Display percentage of accurate predictions
        predictions = session.run(outputs, feed_dict={X: X_testing})
        expected = np.argmax(Y_testing, axis=1)
        predictions = np.argmax(predictions, axis=1)
        correct = 0
        for index in range(len(predictions)):
            if predictions[index] == expected[index]:
                correct += 1
        print("Testing accuracy = " + str(correct / (len(predictions)) * 100) +
              "%")

        #If specified, saves model
        if save_model:
            saver.save(sess=session,
                       save_path="./models/" + run_name + "/" + run_name +
                       ".ckpt")
            f = open("./models/" + run_name + "/" + "layer_sizes.txt", "w+")
            f.write(str(layer_sizes))
            f.close()

        return session.run(cost, feed_dict={
            X: X_training,
            Y: Y_training
        }), session.run(cost, feed_dict={
            X: X_testing,
            Y: Y_testing
        })
示例#53
0
 def cond(i, x, r, p):
     return tf.logical_and(i < MAX_ITER, tf.norm(r) > CG_EPS)
示例#54
0
def predictUsingModelWithVectors(model_path, layer_sizes, X_input):

    # Code to reset the tensorflow graph & make tensorflow release VRAM after it's done computing
    tf.reset_default_graph()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Dictionaries that are used to save intermediate values of the graph
    a = dict()
    z = dict()
    weights = dict()
    biases = dict()
    a_normalized = dict()

    # Defines the model according to the layer sizes specified
    for index, layer_size in enumerate(layer_sizes):
        if index == 0:
            # Creates placeholder for the X's. Adds that value to a dictionary for easier computing afterwards
            with tf.variable_scope('input'):
                X = tf.placeholder(dtype=tf.float32,
                                   shape=[None, layer_size],
                                   name='X')
                a_normalized['a_normalized0'] = X
        else:
            # Defines computations for each layer in the model
            with tf.variable_scope('layer' + str(index)):

                # Initializes weights using Xavier Initialization
                weights['w' + str(index)] = tf.get_variable(
                    name='weights' + str(index),
                    dtype=tf.float32,
                    shape=[layer_sizes[index - 1], layer_sizes[index]],
                    initializer=tf.contrib.layers.xavier_initializer())

                # Initializes biases to 0
                biases['b' + str(index)] = tf.get_variable(
                    name='biases' + str(index),
                    dtype=tf.float32,
                    shape=[layer_sizes[index]],
                    initializer=tf.zeros_initializer())

                # Computes the linear activation
                z['z' + str(index)] = tf.matmul(
                    a_normalized['a_normalized' + str(index - 1)],
                    weights['w' + str(index)]) + biases['b' + str(index)]

                # Computes the non-linear activation for all layers except for the last one
                if index != len(layer_sizes) - 1:
                    a['a' + str(index)] = tf.nn.relu(z['z' + str(index)])
                    a_normalized['a_normalized' +
                                 str(index)] = tf.layers.batch_normalization(
                                     inputs=a['a' + str(index)], axis=1)

                # Activation of the last layer. Can be changed according what you want to predict
                else:
                    outputs = tf.nn.softmax(logits=z['z' + str(index)])

    # Computes the sum of frobenius norm of all the weights matrixes
    weights_squarred_sum = 0
    for index in range(1, len(layer_sizes)):
        weights_squarred_sum += tf.norm(weights["w" + str(index)],
                                        ord='fro',
                                        axis=[-2, -1])

    saver = tf.train.Saver()

    # Starts a session
    with tf.Session(config=config) as session:

        # Initializes all the variables (weights and biases)
        saver.restore(sess=session, save_path=model_path)

        #Compute predicitions for the inputs
        predicitons = session.run(outputs, feed_dict={X: X_input})
        return np.argmax(predicitons, axis=1)
示例#55
0
 def cond(i, X, R_, R, V_):
     return tf.logical_and(i < MAX_ITER, tf.norm(R) > CG_EPS)
示例#56
0
    def optimize(self, task, target):

        """
        Calculate the loss functions and optimize the weights
        """
        # Get a list of all trainable variables
        variables_names = [v for v in tf.trainable_variables()]
        pol_list = []
        val_list = []
        for v in variables_names:
            # List of variables that should be optimized for the policy network
            ind = v.name.find('pol_')
            if ind != -1:    # If the string pol is found in the name, this is a policy network variable
                pol_list.append(v)
            # List of variables that should be optimized for the value network
            ind = v.name.find('val_')
            if ind != -1:    # If the string val is found in the name, this is a value network variable
                val_list.append(v)

        '''
        Calculate the loss function dependent on the policy netwokrk parameters
        Equation (2) from Song et al., 2017
        '''
        pol_out = tf.nn.softmax(tf.stack(task.pol_out_history), 1)   # Output of the policy network, a small amount added so log wouldn't get nan
        #pol_out = tf.stack(task.pol_out_history)
        NT = tf.stop_gradient(tf.reduce_sum(task.time_mask))   # Total # of included time points
        # Calculate J (equation 22 bur also baseline will be subtracted):
        # 1) Discard reward at time points that are to be excluded
        #reward = tf.multiply(task.reward, task.time_mask)
        external_reward = tf.stop_gradient(tf.multiply(task.reward, task.time_mask))    # This is the reward value given by the environment
        self.external_reward = external_reward

        time_mask = tf.stop_gradient(task.time_mask)

        # 2) Apply discount (Page 17, Song et al., 2017)
        baseline = tf.stop_gradient(tf.multiply(task.val_out_history, time_mask))
        self.baseline = baseline

        # Calculate discounted future reward per Song et al.,2 017
        Mcausal = np.zeros((par['num_time_steps'], par['num_time_steps']))
        for i in range(par['num_time_steps']):
            # Mcausal[i,i:] = 1 # No discount version
            Mcausal[i,i:] = np.exp(-np.arange(par['num_time_steps']-i)/(par['discount_time_constant']//par['dt']))   # Add discount, 100ms (10 steps) works
        #pdb.set_trace()
        advantage = tf.matmul(Mcausal, external_reward) - baseline

        '''
        # Advantage based on Nick and Greg's code
        Vt = baseline[:-1, :]   # Vt will have all baseline values but the last one
        Vtnext = baseline[1:, :]    # Vt+1 will have all baseline values but the first one
        advantage = external_reward[:-1, :] + par['discount_coef']*Vtnext - Vt
        '''
        self.advantage = advantage
        action_array = tf.stop_gradient(task.action_array)
        # 3) Multiply reward by logpi to get the first term in J (i.e. reward portion)
        logpi = tf.multiply(pol_out, action_array)
        logpi = tf.log(tf.reduce_sum(logpi, axis=1)+1e-7) #tf.log(tf.reduce_sum(logpi, axis=0))
        # logpi = logpi[:-1]     # Discard last time point for some formulations
        task.logpi = logpi
        self.Jn = tf.reduce_sum(tf.multiply(advantage, task.logpi))/(NT - 1)
        #self.Jn = -tf.square(tf.stack(task.pol_out_history) - target)
        # Average Jn values to get average of J
        self.J = tf.reduce_mean(self.Jn)
        # 7) Calculate average regularization term (mentioned as Omega in equation 2)
        with tf.variable_scope('pol_rnn_cell', reuse=True):
            pol_W_rnn = tf.get_variable('pol_W_rnn', dtype=tf.float64)
        # Second norm of the recurrent weight loss, encourages sparse weights
        self.weight_loss_pol = par['weight_cost'] * tf.norm(pol_W_rnn, ord=2) / par['batch_train_size']
        # Sum of firing rates squared (Second norm of their activity matrix), encourages sparse activity
        self.spike_loss_pol = par['spike_cost'] * tf.reduce_mean(tf.reduce_mean(tf.square(tf.stack(task.pol_r_history)), axis=2))
        self.Omega_pol = 0*self.weight_loss_pol + self.spike_loss_pol

        # Caclulate entropy
        #pdb.set_trace()
        log_pol_out = tf.log(pol_out + 1e-7)               # Log of output of the policy network
        # Multiply output and its log
        entropy = tf.multiply(pol_out, log_pol_out) #size: Ntime x 3 x Nbatch size
        # Sum over all the outputs
        entropy = tf.reduce_sum(entropy, axis=1)    #size: Ntime x Nbatch size
        # Apply time mask
        entropy = tf.multiply(entropy, time_mask)
        # Sum across time
        entropy = tf.reduce_sum(entropy, axis=0)    #size: Nbatch size
        # Average across trials
        entropy = -1*tf.reduce_sum(entropy)/NT
        self.entropy = entropy
        self.ent_pol_out = pol_out
        self.ent_log_pol_out = log_pol_out
        self.NT = NT
        # 8) Calculate the loss function for policy network (Equation 2)
        self.Loss_pol = -self.J + self.Omega_pol #- 0.00*self.entropy

        '''
        Calculate the loss function dependent on the value netwokrk parameters
        Equation (4) from Song et al., 2017
        '''

        # 1) Calculate En (Equation 5)
        # Sum of squared of differences averaged across all time points
        # Applt the time mask to output of the value network
        val_out = tf.multiply(tf.stack(task.val_out_history), time_mask)
        # E will minimzie advantage, except instead of Vt, which is not differentiable, we use val_out which is the differentiable variable
        #self.En = tf.square(external_reward[:-1, :] + par['discount_coef']*Vtnext - val_out[:-1, :])
        self.En = tf.square(val_out - advantage)
        # Average En values to get E
        self.E = tf.reduce_mean(self.En)
        # 2) Calculate Omega for the value network (mentioned in equation 4)
        # Set it to zero for now
        self.Omega_val = 0
        # 3) Calculate loss for the value network (Equation 4)
        self.Loss_val = self.E + self.Omega_val

        """
        Define optimizer, calculate and gradient the the value network
        """
        val_opt = tf.train.AdamOptimizer(learning_rate = par['learning_rate']/10)
        """
        Define optimizer, calculate and gradient the the policy network
        """
        pol_opt = tf.train.AdamOptimizer(learning_rate = par['learning_rate'])
        self.pol_grads_and_vars = pol_opt.compute_gradients(self.Loss_pol, var_list = pol_list)
        self.val_grads_and_vars = val_opt.compute_gradients(self.Loss_val, var_list = val_list)
        self.pol_train_op = pol_opt.minimize(self.Loss_pol, var_list = pol_list)
        self.val_train_op = val_opt.minimize(self.Loss_val, var_list = val_list)
    def __init__(
            self,
            time_step_spec: types.TimeStep,
            action_spec: types.BoundedTensorSpec,
            reward_network: types.Network,
            optimizer: types.Optimizer,
            observation_and_action_constraint_splitter: Optional[
                types.Splitter] = None,
            accepts_per_arm_features: bool = False,
            constraints: Iterable[constr.BaseConstraint] = (),
            # Params for training.
            error_loss_fn: types.LossFn = tf.compat.v1.losses.
        mean_squared_error,
            gradient_clipping: Optional[float] = None,
            # Params for debugging.
            debug_summaries: bool = False,
            summarize_grads_and_vars: bool = False,
            enable_summaries: bool = True,
            emit_policy_info: Tuple[Text, ...] = (),
            train_step_counter: Optional[tf.Variable] = None,
            laplacian_matrix: Optional[types.Float] = None,
            laplacian_smoothing_weight: float = 0.001,
            name: Optional[Text] = None):
        """Creates a Greedy Reward Network Prediction Agent.

     In some use cases, the actions are not independent and they are related to
     each other (e.g., when the actions are ordinal integers). Assuming that
     the relations between arms can be modeled by a graph, we may want to
     enforce that the estimated reward function is smooth over the graph. This
     implies that the estimated rewards `r_i` and `r_j` for two related actions
     `i` and `j`, should be close to each other. To quantify this smoothness
     criterion we use the Laplacian matrix `L` of the graph over the actions.
     When the laplacian smoothing is enabled, the loss is extended to:
     ```
       Loss_new := Loss + lambda r^T * L * r,
     ```
     where `r` is the estimated reward vector for all actions. The second
     term is the laplacian smoothing regularization term and `lambda` is the
     weight that determines how strongly we enforce the regularization.
     For more details, please see:
     "Bandits on graphs and structures", Michal Valko
     https://hal.inria.fr/tel-01359757/document

    Args:
      time_step_spec: A `TimeStep` spec of the expected time_steps.
      action_spec: A nest of `BoundedTensorSpec` representing the actions.
      reward_network: A `tf_agents.network.Network` to be used by the agent. The
        network will be called with call(observation, step_type) and it is
        expected to provide a reward prediction for all actions.
      optimizer: The optimizer to use for training.
      observation_and_action_constraint_splitter: A function used for masking
        valid/invalid actions with each state of the environment. The function
        takes in a full observation and returns a tuple consisting of 1) the
        part of the observation intended as input to the bandit agent and
        policy, and 2) the boolean mask. This function should also work with a
        `TensorSpec` as input, and should output `TensorSpec` objects for the
        observation and mask.
      accepts_per_arm_features: (bool) Whether the policy accepts per-arm
        features.
      constraints: iterable of constraints objects that are instances of
        `tf_agents.bandits.agents.NeuralConstraint`.
      error_loss_fn: A function for computing the error loss, taking parameters
        labels, predictions, and weights (any function from tf.losses would
        work). The default is `tf.losses.mean_squared_error`.
      gradient_clipping: A float representing the norm length to clip gradients
        (or None for no clipping.)
      debug_summaries: A Python bool, default False. When True, debug summaries
        are gathered.
      summarize_grads_and_vars: A Python bool, default False. When True,
        gradients and network variable summaries are written during training.
      enable_summaries: A Python bool, default True. When False, all summaries
        (debug or otherwise) should not be written.
      emit_policy_info: (tuple of strings) what side information we want to get
        as part of the policy info. Allowed values can be found in
        `policy_utilities.PolicyInfo`.
      train_step_counter: An optional `tf.Variable` to increment every time the
        train op is run.  Defaults to the `global_step`.
      laplacian_matrix: A float `Tensor` or a numpy array shaped
        `[num_actions, num_actions]`. This holds the Laplacian matrix used to
        regularize the smoothness of the estimated expected reward function.
        This only applies to problems where the actions have a graph structure.
        If `None`, the regularization is not applied.
      laplacian_smoothing_weight: A float that determines the weight of the
        regularization term. Note that this has no effect if `laplacian_matrix`
        above is `None`.
      name: Python str name of this agent. All variables in this module will
        fall under that name. Defaults to the class name.

    Raises:
      ValueError: If the action spec contains more than one action or or it is
      not a bounded scalar int32 spec with minimum 0.
      InvalidArgumentError: if the Laplacian provided is not None and not valid.
    """
        tf.Module.__init__(self, name=name)
        common.tf_agents_gauge.get_cell('TFABandit').set(True)
        self._observation_and_action_constraint_splitter = (
            observation_and_action_constraint_splitter)
        self._num_actions = policy_utilities.get_num_actions_from_tensor_spec(
            action_spec)
        self._accepts_per_arm_features = accepts_per_arm_features
        self._constraints = constraints

        reward_network.create_variables()
        self._reward_network = reward_network
        self._optimizer = optimizer
        self._error_loss_fn = error_loss_fn
        self._gradient_clipping = gradient_clipping
        self._heteroscedastic = isinstance(
            reward_network, heteroscedastic_q_network.HeteroscedasticQNetwork)
        self._laplacian_matrix = None
        if laplacian_matrix is not None:
            self._laplacian_matrix = tf.convert_to_tensor(laplacian_matrix,
                                                          dtype=tf.float32)
            # Check the validity of the laplacian matrix.
            tf.debugging.assert_near(
                0.0, tf.norm(tf.reduce_sum(self._laplacian_matrix, 1)))
            tf.debugging.assert_near(
                0.0, tf.norm(tf.reduce_sum(self._laplacian_matrix, 0)))
        self._laplacian_smoothing_weight = laplacian_smoothing_weight

        policy = greedy_reward_policy.GreedyRewardPredictionPolicy(
            time_step_spec,
            action_spec,
            reward_network,
            observation_and_action_constraint_splitter,
            constraints=constraints,
            accepts_per_arm_features=accepts_per_arm_features,
            emit_policy_info=emit_policy_info)
        training_data_spec = None
        if accepts_per_arm_features:
            training_data_spec = bandit_spec_utils.drop_arm_observation(
                policy.trajectory_spec)

        super(GreedyRewardPredictionAgent,
              self).__init__(time_step_spec,
                             action_spec,
                             policy,
                             collect_policy=policy,
                             train_sequence_length=None,
                             training_data_spec=training_data_spec,
                             debug_summaries=debug_summaries,
                             summarize_grads_and_vars=summarize_grads_and_vars,
                             enable_summaries=enable_summaries,
                             train_step_counter=train_step_counter)
def custom_loss(y_true, y_pred):

    return tf.norm(tf.norm(y_true - y_pred, ord=1, axis=(1, 2)), axis=1)
示例#59
0
    net = Net(x, weights, biases)

    # ------ Loss + Regularizer ------
    with tf.name_scope("Loss"):
        # define loss
        with tf.name_scope("cross_entropy"):
            ce_loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=net, labels=y))
        # define regularizer
        with tf.name_scope("regularizer"):
            with tf.name_scope("jacobians"):
                jacobians = tf_jacobian(net, x, batch_size)
            with tf.name_scope("regularizer_cal"):
                regularizer = tf.reduce_sum(
                    tf.norm(tf.gather(jacobians, ind_i, axis=2) -
                            tf.gather(jacobians, ind_j, axis=2),
                            axis=1) * similarities)
        # get final loss by adding loss and regularizer
        customized_loss = tf.add(ce_loss, args.lambda_reg * regularizer)

    # define optimizer
    with tf.name_scope("optimizer"):
        optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(customized_loss)

    # define accuracy
    with tf.name_scope("accuracy"):
        prediction = tf.nn.softmax(net)
        correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
示例#60
0
    def forward(self, x, sequence_length=None, scope="RNN"):
        rnn = tf.nn.rnn_cell
        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE
                               ):  # initializer=tf.orthogonal_initializer(),
            # scope.reuse_variables()  # or tf.get_variable_scope().reuse_variables()
            # current_batch_of_words does not correspond to a "sentence" of words
            # but [t_steps, batch_size, num_features]
            # Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors.
            # sequence_length list tensors of shape (batch_size, embedding_dim)
            if not self._use_dynamic:
                x = tf.unstack(tf.transpose(x, perm=[1, 0,
                                                     2]))  # `static_rnn` input
            if self._rnn_cell.lower() == 'lstm':
                rnn_cell = rnn.LSTMCell
            elif self._rnn_cell.lower() == 'gru':
                rnn_cell = rnn.GRUCell
            elif self._rnn_cell.lower() == 'rnn':
                rnn_cell = rnn.BasicRNNCell
            else:
                raise ValueError("Invalid rnn_cell type.")

            with tf.variable_scope("fw"):
                # state(c, h), tf.nn.rnn_cell.BasicLSTMCell does not support gradient clipping, use tf.nn.rnn_cell.LSTMCell.
                # fw_cells = [rnn_cell(hidden_units) for _ in range(num_layers)]
                fw_cells = []
                for _ in range(self._num_layers):
                    fw_cell = rnn_cell(self._hidden_units)
                    fw_cell = rnn.DropoutWrapper(
                        fw_cell,
                        output_keep_prob=self._dropout_keep_prob,
                        variational_recurrent=False,
                        dtype=tf.float32)
                    fw_cells.append(fw_cell)
                fw_cells = rnn.MultiRNNCell(cells=fw_cells,
                                            state_is_tuple=True)
            with tf.variable_scope("bw"):
                bw_cells = []
                for _ in range(self._num_layers):
                    bw_cell = rnn_cell(self._hidden_units)
                    bw_cell = rnn.DropoutWrapper(
                        bw_cell,
                        output_keep_prob=self._dropout_keep_prob,
                        variational_recurrent=False,
                        dtype=tf.float32)
                    bw_cells.append(bw_cell)
                bw_cells = rnn.MultiRNNCell(cells=bw_cells,
                                            state_is_tuple=True)

            if self._use_dynamic:
                # [batch_size, max_time, cell_fw.output_size]
                outputs, output_states = tf.nn.bidirectional_dynamic_rnn(
                    fw_cells,
                    bw_cells,
                    x,
                    sequence_length=sequence_length,
                    dtype=tf.float32)
                outputs = tf.concat(outputs, 2)
                if self._rnn_cell.lower() == 'lstm':
                    out = tf.concat(
                        [output_states[-1][0].h, output_states[-1][1].h], 1)
                else:
                    out = tf.concat(
                        [output_states[-1][0], output_states[-1][1]], 1)
                # outputs = outputs[:, -1, :]  # take last hidden states  (batch_size, 2*hidden_units)
                # outputs = self._last_relevant(outputs, sequence_length)
            else:
                # `static_rnn` Returns: A tuple (outputs, output_state_fw, output_state_bw)
                # outputs is a list of timestep outputs, depth-concatenated forward and backward outputs.
                outputs, state_fw, state_bw = tf.nn.static_bidirectional_rnn(
                    fw_cells,
                    bw_cells,
                    x,
                    dtype=tf.float32,
                    sequence_length=sequence_length)
                outputs = tf.transpose(tf.stack(outputs), perm=[1, 0, 2])
                if self._rnn_cell.lower() == 'lstm':
                    out = tf.concat([state_fw[-1].h, state_bw[-1].h],
                                    1)  # good
                else:
                    out = tf.concat([state_fw[-1], state_bw[-1]], 1)
                # outputs = tf.reduce_mean(outputs, 0)  # average [batch_size, hidden_units] (mean pooling)
                # outputs = tf.reduce_max(outputs, axis=0)  # max pooling, bad result.
                # outputs = outputs[-1]  # take last hidden state [batch_size, hidden_units]
                # outputs = tf.transpose(tf.stack(outputs), [1, 0, 2])  # shape(batch_size, seq_len, hidden_units)
                # outputs = self._last_relevant(outputs, sequence_length)
            if self._use_attention:
                d_a = 300
                r = 2
                self.H = outputs
                batch_size = tf.shape(x)[0]
                initializer = tf.contrib.layers.xavier_initializer()
                with tf.variable_scope(
                        "attention"
                ):  # TODO: Nan in summary histogram for: RNN/attention/W_s2_0/grad/hist
                    # shape(W_s1) = d_a * 2u
                    self.W_s1 = tf.get_variable(
                        'W_s1',
                        shape=[d_a, 2 * self._hidden_units],
                        initializer=initializer)
                    # shape(W_s2) = r * d_a
                    self.W_s2 = tf.get_variable('W_s2',
                                                shape=[r, d_a],
                                                initializer=initializer)
                    # shape (d_a, 2u) --> shape(batch_size, d_a, 2u)
                    self.W_s1 = tf.tile(tf.expand_dims(self.W_s1, 0),
                                        [batch_size, 1, 1])
                    self.W_s2 = tf.tile(tf.expand_dims(self.W_s2, 0),
                                        [batch_size, 1, 1])
                    # attention matrix A = softmax(W_s2*tanh(W_s1*H^T)  shape(A) = batch_siz * r * n
                    self.H_T = tf.transpose(self.H, perm=[0, 2, 1], name="H_T")
                    self.A = tf.nn.softmax(
                        tf.matmul(self.W_s2,
                                  tf.tanh(tf.matmul(self.W_s1, self.H_T)),
                                  name="A"))
                    # sentences embedding matrix M = AH  shape(M) = (batch_size, r, 2u)
                    self.M = tf.matmul(self.A, self.H, name="M")
                    out = tf.reshape(self.M, [batch_size, -1])

                with tf.variable_scope("penalization"):
                    # penalization term: Frobenius norm square of matrix AA^T-I, ie. P = |AA^T-I|_F^2
                    A_T = tf.transpose(self.A, perm=[0, 2, 1], name="A_T")
                    I = tf.eye(r, r, batch_shape=[batch_size], name="I")
                    self.P = tf.square(tf.norm(tf.matmul(self.A, A_T) - I,
                                               axis=[-2, -1],
                                               ord='fro'),
                                       name="P")
        return out