示例#1
0
def noisy_activation(x, generic, linearized, training, alpha=1.1, c=0.5):
    """
    Implements the noisy activation with Half-Normal Noise for Hard-Saturation
    functions. See http://arxiv.org/abs/1603.00391, Algorithm 1.

    Args:

        x: Tensor which is an input to the activation function

        generic: The generic formulation of the activation function. (denoted
            as h in the paper)

        linearized: Linearization of the activation based on the first-order
            Tailor expansion around zero. (denoted as u in the paper)

        training: A boolean tensor telling whether we are in the training stage
            (and the noise is sampled) or in runtime when the expactation is
            used instead.

        alpha: Mixing hyper-parameter. The leakage rate from the linearized
            function to the nonlinear one.

        c: Standard deviation of the sampled noise.

    """

    delta = generic(x) - linearized(x)
    d = -tf.sign(x) * tf.sign(1 - alpha)
    p = tf.Variable(1.0)
    scale = c * (tf.sigmoid(p * delta) - 0.5)  ** 2
    noise = tf.select(training, tf.abs(tf.random_normal([])), math.sqrt(2 / math.pi))
    activation = alpha * generic(x) + (1 - alpha) * linearized(x) + d * scale * noise
    return activation
示例#2
0
def ternary_operation(x):
    """Ternary operation use threshold computed with weights."""
    g = tf.compat.v1.get_default_graph()
    with g.gradient_override_map({"Sign": "Identity"}):
        threshold = _compute_threshold(x)
        x = tf.sign(tf.add(tf.sign(tf.add(x, threshold)), tf.sign(tf.add(x, -threshold))))
        return x
示例#3
0
def get_accuracy_loss(arg,x,y,y_):
    '''
    Note: when the task is regression accuracy = loss but for classification
    loss = cross_entropy,svm_loss, surrogate_loss, etc and accuracy = 1 - {0-1 loss}.
    '''
    with tf.name_scope("loss_and_acc") as scope:
        # loss
        if arg.softmax:
            #cross_entropy = tf.reduce_mean(-tf.rduce_sum(y_ * tf.log(y), reduction_indices=[1]))
            diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
            cross_entropy = tf.reduce_mean(diff)
            loss = cross_entropy
            correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) # list of booleans indicating correct predictions
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        else:
            l2_loss = tf.reduce_sum( tf.reduce_mean(tf.square(y_-y), 0))
            loss = l2_loss
            y = tf.cast(tf.sign(y),tf.float32)
            y_ = tf.cast(tf.sign(y_),tf.float32)
            correct_prediction = tf.equal(y, y_) # list of booleans indicating correct predictions
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        # accuracy
        # if arg.classification:
        #     correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) # list of booleans indicating correct predictions
        #     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        # else:
        #     accuracy = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
    ##
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('accuracy', accuracy)
    return loss, accuracy
示例#4
0
def loss_func(logits):
    final_maps = tf.placeholder(tf.float32, shape=[None, 361])

    # final maps are originally -1 to 1. rescale them to 0 to 1 probabilities:
    final_prob_maps = final_maps * tf.constant(0.5) + tf.constant(0.5)
    cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, targets=final_prob_maps)
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_mean')

    correct_prediction = tf.equal(tf.sign(logits), tf.sign(final_maps))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    return final_maps, cross_entropy_mean, accuracy
示例#5
0
def loss_func(score_op):
    final_scores = tf.placeholder(tf.float32, shape=[None])

    squared_errors = tf.square(tf.reshape(score_op, [-1]) - final_scores)
    #mean_sq_err = tf.reduce_mean(squared_errors, name='mean_sq_err')
    cross_entropy_ish_loss = tf.reduce_mean(-tf.log(tf.constant(1.0) - tf.constant(0.5) * tf.abs(tf.reshape(score_op, [-1]) - final_scores), name='cross-entropy-ish-loss'))

    correct_prediction = tf.equal(tf.sign(tf.reshape(score_op, [-1])), tf.sign(final_scores))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
    #return final_scores, mean_sq_err, accuracy, squared_errors
    return final_scores, cross_entropy_ish_loss, accuracy
示例#6
0
  def angular_symmetry(self, d_cutoff, d, atom_numbers, coordinates):
    """ Angular Symmetry Function """

    max_atoms = self.max_atoms
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.angular_cutoff, self.angular_length)
    ita = 3 / (Rs[1] - Rs[0])**2
    thetas = np.linspace(0., np.pi, self.angular_length)
    zeta = float(self.angular_length**2)

    ita, zeta, Rs, thetas = np.meshgrid(ita, zeta, Rs, thetas)
    zeta = tf.cast(np.reshape(zeta, (1, 1, 1, 1, -1)), tf.float32)
    ita = tf.cast(np.reshape(ita, (1, 1, 1, 1, -1)), tf.float32)
    Rs = tf.cast(np.reshape(Rs, (1, 1, 1, 1, -1)), tf.float32)
    thetas = tf.cast(np.reshape(thetas, (1, 1, 1, 1, -1)), tf.float32)
    length = zeta.get_shape().as_list()[-1]

    vector_distances = tf.stack([coordinates] * max_atoms, 1) - tf.stack(
        [coordinates] * max_atoms, 2)
    R_ij = tf.stack([d] * max_atoms, axis=3)
    R_ik = tf.stack([d] * max_atoms, axis=2)
    f_R_ij = tf.stack([d_cutoff] * max_atoms, axis=3)
    f_R_ik = tf.stack([d_cutoff] * max_atoms, axis=2)

    # Define angle theta = arccos(R_ij(Vector) dot R_ik(Vector)/R_ij(distance)/R_ik(distance))
    vector_mul = tf.reduce_sum(tf.stack([vector_distances] * max_atoms, axis=3) * \
                               tf.stack([vector_distances] * max_atoms, axis=2), axis=4)
    vector_mul = vector_mul * tf.sign(f_R_ij) * tf.sign(f_R_ik)
    theta = tf.acos(tf.math.divide(vector_mul, R_ij * R_ik + 1e-5))

    R_ij = tf.stack([R_ij] * length, axis=4)
    R_ik = tf.stack([R_ik] * length, axis=4)
    f_R_ij = tf.stack([f_R_ij] * length, axis=4)
    f_R_ik = tf.stack([f_R_ik] * length, axis=4)
    theta = tf.stack([theta] * length, axis=4)

    out_tensor = tf.pow((1. + tf.cos(theta - thetas)) / 2., zeta) * \
                 tf.exp(-ita * tf.square((R_ij + R_ik) / 2. - Rs)) * f_R_ij * f_R_ik * 2

    if self.atomic_number_differentiated:
      out_tensors = []
      for id_j, atom_type_j in enumerate(self.atom_cases):
        for atom_type_k in self.atom_cases[id_j:]:
          selected_atoms = tf.stack([atom_numbers_embedded[:, :, atom_type_j]] * max_atoms, axis=2) * \
                           tf.stack([atom_numbers_embedded[:, :, atom_type_k]] * max_atoms, axis=1)
          selected_atoms = tf.expand_dims(
              tf.expand_dims(selected_atoms, axis=1), axis=4)
          out_tensors.append(
              tf.reduce_sum(out_tensor * selected_atoms, axis=(2, 3)))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out_tensor, axis=(2, 3))
def main():
    data, labels = input()
    #     logits=inference()
    #     loss_step=loss(logits)
    #     train_step = train(loss_step,0.001)
    #     sess=tf.Session()
    #     sess.run(tf.global_variables_initializer())
    #     print(sess.run([w,b]))
    #     labels.shape=(6000,1)
    #     for i in range(1000):
    #         sess.run(train_step,feed_dict={x_placehold:data,y_placehold:labels})
    #     wc,bc=sess.run([w,b],feed_dict={x_placehold:data,y_placehold:labels})
    #     print(wc,bc)

    labels.shape = (6000, 1)
    print(data)
    print(labels)
    print(data.shape)
    print(labels.shape)
    print(feed_dict('D'))
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    # sess.run(tf.sign(tf.matmul(x_placehold, w) + b),
    #          feed_dict=feed_dict('D'))
    sess.run(tf.sign(tf.matmul(x_placehold, w) + b) - y_placehold,
             feed_dict=feed_dict())
    sess.run(tf.square(tf.sign(tf.matmul(x_placehold, w) + b) -
                       y_placehold), feed_dict=feed_dict())
    sess.run(tf.reduce_sum(tf.square(tf.sign(tf.matmul(x_placehold, w) + b) -
                                     y_placehold)), feed_dict=feed_dict())

    logits = tf.matmul(x_placehold, w) + b
    loss_op = tf.reduce_sum(tf.square(logits - y_placehold))

    with tf.name_scope('loss'):
        tf.summary.scalar('error', loss_op)

    with tf.name_scope('w'):
        tf.summary.scalar('x', w[0, 0])
        tf.summary.scalar('y', w[1, 0])

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter('./train', sess.graph)

    optimizer = tf.train.GradientDescentOptimizer(0.1)

    for i in range(1000):
        sess.run(optimizer.minimize(loss_op), feed_dict=feed_dict())
        summary = sess.run(merged, feed_dict=feed_dict())
        train_writer.add_summary(summary, i)

    wc, bc = sess.run([w, b], feed_dict=feed_dict())
    print(wc, bc)
示例#8
0
def neural_attention(embedding_dim=384, encoding_dim=128):
    embeddings = tf.Variable(tf.random_normal([vocab_size, embedding_dim], stddev=0.22), dtype=tf.float32)
    tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(1e-4), [embeddings])

    with tf.variable_scope('encode'):
        with tf.variable_scope('X'):
            X_lens = tf.reduce_sum(tf.sign(tf.abs(X)), 1)
            embedded_X = tf.nn.embedding_lookup(embeddings, X)
            encoded_X = tf.nn.dropout(embedded_X, keep_prob)
            gru_cell = tf.contrib.rnn.core_rnn_cell.GRUCell(encoding_dim)
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(gru_cell, gru_cell, encoded_X,
                                                                     sequence_length=X_lens, dtype=tf.float32,
                                                                     swap_memory=True)
            encoded_X = tf.concat(outputs, 2)
        with tf.variable_scope('Q'):
            Q_lens = tf.reduce_sum(tf.sign(tf.abs(Q)), 1)
            embedded_Q = tf.nn.embedding_lookup(embeddings, Q)
            encoded_Q = tf.nn.dropout(embedded_Q, keep_prob)
            gru_cell = tf.contrib.rnn.core_rnn_cell.GRUCell(encoding_dim)
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(gru_cell, gru_cell, encoded_Q,
                                                                     sequence_length=Q_lens, dtype=tf.float32,
                                                                     swap_memory=True)
            encoded_Q = tf.concat(outputs, 2)

    W_q = tf.Variable(tf.random_normal([2 * encoding_dim, 4 * encoding_dim], stddev=0.22), dtype=tf.float32)
    b_q = tf.Variable(tf.random_normal([2 * encoding_dim, 1], stddev=0.22), dtype=tf.float32)
    W_d = tf.Variable(tf.random_normal([2 * encoding_dim, 6 * encoding_dim], stddev=0.22), dtype=tf.float32)
    b_d = tf.Variable(tf.random_normal([2 * encoding_dim, 1], stddev=0.22), dtype=tf.float32)
    g_q = tf.Variable(tf.random_normal([10 * encoding_dim, 2 * encoding_dim], stddev=0.22), dtype=tf.float32)
    g_d = tf.Variable(tf.random_normal([10 * encoding_dim, 2 * encoding_dim], stddev=0.22), dtype=tf.float32)

    with tf.variable_scope('attend') as scope:
        infer_gru = tf.contrib.rnn.core_rnn_cell.GRUCell(4 * encoding_dim)
        infer_state = infer_gru.zero_state(batch_size, tf.float32)
        for iter_step in range(8):
            if iter_step > 0:
                scope.reuse_variables()

            _, q_glimpse = glimpse(W_q, b_q, encoded_Q, infer_state)
            d_attention, d_glimpse = glimpse(W_d, b_d, encoded_X, tf.concat([infer_state, q_glimpse], 1 ))

            gate_concat = tf.concat([infer_state, q_glimpse, d_glimpse, q_glimpse * d_glimpse], 1)

            r_d = tf.sigmoid(tf.matmul(gate_concat, g_d))
            r_d = tf.nn.dropout(r_d, keep_prob)
            r_q = tf.sigmoid(tf.matmul(gate_concat, g_q))
            r_q = tf.nn.dropout(r_q, keep_prob)

            combined_gated_glimpse = tf.concat([r_q * q_glimpse, r_d * d_glimpse], 1)
            _, infer_state = infer_gru(combined_gated_glimpse, infer_state)

    return tf.to_float(tf.sign(tf.abs(X))) * d_attention
 def one_bp_iteration(self, xe_v2c_pre_iter, H_sumC_to_V, H_sumV_to_C, xe_0):
     xe_tanh = tf.tanh(tf.to_double(tf.truediv(xe_v2c_pre_iter, [2.0])))
     xe_tanh = tf.to_float(xe_tanh)
     xe_tanh_temp = tf.sign(xe_tanh)
     xe_sum_log_img = tf.matmul(H_sumC_to_V, tf.multiply(tf.truediv((1 - xe_tanh_temp), [2.0]), [3.1415926]))
     xe_sum_log_real = tf.matmul(H_sumC_to_V, tf.log(1e-8 + tf.abs(xe_tanh)))
     xe_sum_log_complex = tf.complex(xe_sum_log_real, xe_sum_log_img)
     xe_product = tf.real(tf.exp(xe_sum_log_complex))
     xe_product_temp = tf.multiply(tf.sign(xe_product), -2e-7)
     xe_pd_modified = tf.add(xe_product, xe_product_temp)
     xe_v_sumc = tf.multiply(self.atanh(xe_pd_modified), [2.0])
     xe_c_sumv = tf.add(xe_0, tf.matmul(H_sumV_to_C, xe_v_sumc))
     return xe_v_sumc, xe_c_sumv
示例#10
0
 def _apply(self, grad, var, indices=None):
   lr = tf.cast(self._learning_rate_tensor, var.dtype.base_dtype)
   m = self.get_slot(var, "m")
   # m_t = beta1 * m + (1 - beta1) * g_t
   beta1_t = tf.cast(self._beta1_t, var.dtype.base_dtype)
   m_scaled_g_values = grad * (1 - beta1_t)
   m_t = tf.assign(m, m * beta1_t, use_locking=self._use_locking)
   with tf.control_dependencies([m_t]):
     m_t = self._assign_add(m, updates=m_scaled_g_values, indices=indices)
   # update = lr * grad * where(...)
   m_gathered = self._gather(m_t, indices=indices)
   ones = tf.ones_like(grad)
   update = lr * grad * tf.where(tf.equal(tf.sign(m_gathered), tf.sign(grad)), ones, ones * self._decrease_factor)
   var_update = self._assign_sub(ref=var, updates=update, indices=indices)
   return tf.group(*[var_update, m_t])
示例#11
0
    def _apply_dense(self, grad, var):
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
        beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)

        eps = 1e-7  # cap for moving average

        m = self.get_slot(var, "m")
        m_t = m.assign(tf.maximum(beta_t * m + eps, tf.abs(grad)))

        var_update = state_ops.assign_sub(var, lr_t * grad * tf.exp(
            tf.log(alpha_t) * tf.sign(grad) * tf.sign(m_t)))  # Update 'ref' by subtracting 'value
        # Create an op that groups multiple operations.
        # When this op finishes, all ops in input have finished
        return control_flow_ops.group(*[var_update, m_t])
示例#12
0
def retrieve_seq_length_op(data):
    """ An op to compute the length of a sequence. 0 are masked. """
    with tf.name_scope('GetLength'):
        used = tf.sign(tf.reduce_max(tf.abs(data), reduction_indices=2))
        length = tf.reduce_sum(used, reduction_indices=1)
        length = tf.cast(length, tf.int32)
    return length
示例#13
0
def proximal_step(train_op, lr):
  # Apply weight decay for the variables with l2 loss
  # If basenet weights are trained together, do not set a weight decay on the
  # conv layers of the basenet
  l2_op_list = []
  l1_op_list = []
  with tf.control_dependencies([train_op]):
    if L2_LOSS_WEIGHT > 0:
      for var in tf.get_collection(utils.WEIGHT_DECAY_KEY):
        assign_op = var.assign_add(- lr * tf.convert_to_tensor(L2_LOSS_WEIGHT) * var)
        l2_op_list.append(assign_op)
        print('\tL2 loss added: %s(strength: %f)' % (var.name, L2_LOSS_WEIGHT))

    # Apply proximal gradient for the variables with l1 lasso loss
    # Non-negative weights constraint
    if L1_LOSS_WEIGHT > 0:
      for var in tf.get_collection(utils.LASSO_KEY):
        th_t = tf.fill(tf.shape(var), tf.convert_to_tensor(L1_LOSS_WEIGHT) * lr)
        zero_t = tf.zeros(tf.shape(var))
        var_temp = var - th_t * tf.sign(var)
        assign_op = var.assign(tf.select(tf.less(var, th_t), zero_t, var_temp))
        l1_op_list.append(assign_op)
        print('\tL1 loss added: %s(strength: %f)' % (var.name, L1_LOSS_WEIGHT))

  with tf.control_dependencies(l2_op_list + l1_op_list):
    train_op = tf.no_op(name='proximal_step')

  return train_op
示例#14
0
文件: utils.py 项目: Peratham/models
def SoftThreshold(t, threshold_ratio, name=None):
  """Soft-threshold a tensor by the mean value.

  Softthreshold each dimension-0 vector (for matrix it is each column) by
  the mean of absolute value multiplied by the threshold_ratio factor. Here
  we soft threshold each column as it corresponds to each unit in a layer.

  Args:
    t: the input tensor.
    threshold_ratio: the threshold ratio.
    name: the optional name for the returned tensor.
  Returns:
    the thresholded tensor, where each entry is soft-thresholded by
    threshold_ratio times the mean of the aboslute value of each column.
  """

  assert threshold_ratio >= 0
  with tf.op_scope([t, threshold_ratio], name, "soft_thresholding") as name:
    saved_shape = tf.shape(t)
    t2 = tf.reshape(t, tf.concat(0, [tf.slice(saved_shape, [0], [1]), -1]))
    t_abs = tf.abs(t2)
    t_x = tf.sign(t2) * tf.nn.relu(t_abs -
                                   (tf.reduce_mean(t_abs, [0],
                                                   keep_dims=True) *
                                    threshold_ratio))
    return tf.reshape(t_x, saved_shape, name=name)
示例#15
0
def _binarizer(prebinary_codes, is_training):
  """Binarize compression logits.

  During training, add noise, as in https://arxiv.org/pdf/1611.01704.pdf. During
  eval, map [-1, 1] -> {-1, 1}.

  Args:
    prebinary_codes: Floating-point tensors corresponding to pre-binary codes.
      Shape is [batch, code_length].
    is_training: A python bool. If True, add noise. If false, binarize.

  Returns:
    Binarized codes. Shape is [batch, code_length].

  Raises:
    ValueError: If the shape of `prebinary_codes` isn't static.
  """
  if is_training:
    # In order to train codes that can be binarized during eval, we add noise as
    # in https://arxiv.org/pdf/1611.01704.pdf. Another option is to use a
    # stochastic node, as in https://arxiv.org/abs/1608.05148.
    noise = tf.random_uniform(
        prebinary_codes.shape,
        minval=-1.0,
        maxval=1.0)
    return prebinary_codes + noise
  else:
    return tf.sign(prebinary_codes)
示例#16
0
	def speech_data_seq_len(self, data):
		''' Assuming one-hot char matrix is batchsize x max speech length x vocab length, return
		sequence length for each char matrix '''

		signed_data = tf.sign(tf.reduce_sum(tf.abs(data), reduction_indices=2))
		length = tf.reduce_sum(signed_data, reduction_indices=1)
		return length
 def __call__(self,x,keep_prob=1.0,seq_length=None):  #__call__ is very efficient when the state of instance changes frequently 
   with tf.variable_scope(self.name,reuse = self.reuse) as vs:
     self.fw_cell =tf.contrib.rnn.LSTMCell(self.cell_size,state_is_tuple=True,reuse=tf.get_variable_scope().reuse)
     self.fw_cell1 =tf.contrib.rnn.LSTMCell(self.cell_size,state_is_tuple=True,reuse=tf.get_variable_scope().reuse)
     
     self.bw_cell =tf.contrib.rnn.LSTMCell(self.cell_size,state_is_tuple=True,reuse=tf.get_variable_scope().reuse)
     self.bw_cell1 =tf.contrib.rnn.LSTMCell(self.cell_size,state_is_tuple=True,reuse=tf.get_variable_scope().reuse)
 
 
     self.fw_cells = tf.contrib.rnn.MultiRNNCell([self.fw_cell,self.fw_cell1],state_is_tuple=True)
     self.bw_cells = tf.contrib.rnn.MultiRNNCell([self.bw_cell,self.bw_cell1],state_is_tuple=True)
     
     if seq_length ==None:  #get the real sequence length (suppose that the padding are zeros)
       used = tf.sign(tf.reduce_max(tf.abs(x),reduction_indices=2))
       seq_length = tf.cast(tf.reduce_sum(used,reduction_indices=1),tf.int32)
     
     lstm_out,_,_ =  tf.contrib.rnn.static_bidirectional_rnn(self.fw_cells,self.bw_cells,tf.unstack(tf.transpose(x,[1,0,2])),dtype=tf.float32,sequence_length=seq_length)
     
     lstm_out = tf.transpose(tf.stack(lstm_out),[1,0,2])
     print 'lstm_out: ',lstm_out
     
     #shape(lstm_out) = (self.batch_size,sequence_length,2*cell_size)
     
     #if keep_prob < 1.:
     #  lstm_out = tf.nn.dropout(lstm_out,keep_prob)
       
     if self.reuse is None:
       self.trainable_weights = vs.global_variables()
       
   self.reuse =True
   return lstm_out,seq_length
示例#18
0
 def encode(self, x, noise):
   x = tf.to_float(x)
   # we can't use tf.pow(..., 8.0) because of a high-error approximation
   # on TPU.  Instead we square three times.
   x = tf.sign(x) * tf.square(tf.square(tf.square(tf.abs(x) * 128.0)))
   x = _to_bfloat16_unbiased(x, noise)
   return x
示例#19
0
def triangle_wave(frequency):
  """Emit a triangle wave at the given frequency."""
  xs = tf.reshape(tf.range(_samples(), dtype=tf.float32), [1, _samples(), 1])
  ts = xs / FLAGS.sample_rate
  #
  # A triangle wave looks like this:
  #
  #      /\      /\
  #     /  \    /  \
  #         \  /    \  /
  #          \/      \/
  #
  # If we look at just half a period (the first four slashes in the
  # diagram above), we can see that it looks like a transformed absolute
  # value function.
  #
  # Let's start by computing the times relative to the start of each
  # half-wave pulse (each individual "mountain" or "valley", of which
  # there are four in the above diagram).
  half_pulse_index = ts * (frequency * 2)
  half_pulse_angle = half_pulse_index % 1.0  # in [0, 1]
  #
  # Now, we can see that each positive half-pulse ("mountain") has
  # amplitude given by A(z) = 0.5 - abs(z - 0.5), and then normalized:
  absolute_amplitude = (0.5 - tf.abs(half_pulse_angle - 0.5)) / 0.5
  #
  # But every other half-pulse is negative, so we should invert these.
  half_pulse_parity = tf.sign(1 - (half_pulse_index % 2.0))
  amplitude = half_pulse_parity * absolute_amplitude
  #
  # This is precisely the desired result, so we're done!
  return amplitude
示例#20
0
def retrieve_seq_length_op(data):
    """An op to compute the length of a sequence. 0 are masked. """
    with tf.name_scope('GetLength'):
        used = tf.sign(x=tf.reduce_max(tf.abs(data), axis=2))
        length = tf.reduce_sum(input_tensor=used, axis=1)
        length = tf.cast(x=length, dtype=tf.int32)
    return length
示例#21
0
 def build(self):
   """ tensorflow computation graph for transform """
   graph = tf.Graph()
   with graph.as_default():
     self.inputs = tf.placeholder(tf.float32, shape=(None, self.max_atoms, 4))
     atom_numbers = tf.cast(self.inputs[:, :, 0], tf.int32)
     flags = tf.sign(atom_numbers)
     flags = tf.cast(
         tf.expand_dims(flags, 1) * tf.expand_dims(flags, 2), tf.float32)
     coordinates = self.inputs[:, :, 1:]
     if self.coordinates_in_bohr:
       coordinates = coordinates * 0.52917721092
     d = self.distance_matrix(coordinates, flags)
     d_radial_cutoff = self.distance_cutoff(d, self.radial_cutoff, flags)
     d_angular_cutoff = self.distance_cutoff(d, self.angular_cutoff, flags)
     radial_sym = self.radial_symmetry(d_radial_cutoff, d, atom_numbers)
     angular_sym = self.angular_symmetry(d_angular_cutoff, d, atom_numbers,
                                         coordinates)
     self.outputs = tf.concat(
         [
             tf.cast(tf.expand_dims(atom_numbers, 2), tf.float32), radial_sym,
             angular_sym
         ],
         axis=2)
   return graph
def random_sign_uniform(
    shape, minval=None, maxval=None, dtype=tf.float32, seed=None):
  """Tensor with (possibly complex) random entries from a "sign Uniform".

  Letting `Z` be a random variable equal to `-1` and `1` with equal probability,
  Samples from this `Op` are distributed like

  ```
  Z * X, where X ~ Uniform[minval, maxval], if dtype is real,
  Z * (X + iY),  where X, Y ~ Uniform[minval, maxval], if dtype is complex.
  ```

  Args:
    shape:  `TensorShape` or Python list.  Shape of the returned tensor.
    minval:  `0-D` `Tensor` giving the minimum values.
    maxval:  `0-D` `Tensor` giving the maximum values.
    dtype:  `TensorFlow` `dtype` or Python dtype
    seed:  Python integer seed for the RNG.

  Returns:
    `Tensor` with desired shape and dtype.
  """
  dtype = tf.as_dtype(dtype)

  with tf.name_scope("random_sign_uniform"):
    unsigned_samples = random_uniform(
        shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed)
    if seed is not None:
      seed += 12
    signs = tf.sign(tf.random_uniform(shape, minval=-1., maxval=1., seed=seed))
    return unsigned_samples * tf.cast(signs, unsigned_samples.dtype)
示例#23
0
def loss(logits, labels):
    """Calculates Mean Pixel Error.
    
    Args:
      logits: Logits from inference().
      labels: Labels from distorted_inputs or inputs(). 1-D tensor
              of shape [batch_size]
    
    Returns:
      Loss tensor of type float.
    """
    
    labelValidity = tf.sign(labels, name='label_validity')
    
    minop = tf.sub(logits, labels, name='Diff_Op')
    
    absop = tf.abs(minop, name='Abs_Op')
    
    lossValues = tf.mul(labelValidity, absop, name='lossValues')
    
    loss_mean = tf.reduce_mean(lossValues, name='MeanPixelError')
    
    tf.add_to_collection('losses', loss_mean)
    
    return tf.add_n(tf.get_collection('losses'), name='total_loss'), loss_mean
示例#24
0
 def _non_linear_grad(cls, op, grad):
     LRP.logger.debug("Computing non-linear gradient with activation type {}".format(op.type))
     op_out = op.outputs[0]
     op_in = op.inputs[0]
     stabilizer_epsilon = cls._eps * tf.sign(op_in)
     op_in += stabilizer_epsilon
     return grad * op_out / op_in
示例#25
0
    def binomial_sampling(self, pr):

        """
        Binomial sampling of hidden units activations using a rejection method.

        Basic mechanics:
            1) Extract a random number from a uniform distribution (g) and compare it with
                the unit's probability (pr)

            2) Choose 0 if pr<g, 1 otherwise. It is convenient to implement this condtion using
               the relu function.

        Args:
            pr (tensor, float32): input conditional probability
            g  (np.array, float32):  uniform probability used for comparison

        Returns:
            h_sampled (tensor, float32): sampled units. The value is 1 if pr>g and 0 otherwise.
        """

        np.random.seed(self.seed)

        # sample from a Bernoulli distribution with same dimensions as input distribution
        g = tf.convert_to_tensor(np.random.uniform(size=pr.shape[1]), dtype=tf.float32)

        # sample the value of the hidden units
        h_sampled = tf.nn.relu(tf.sign(pr - g))

        return h_sampled
示例#26
0
    def __init__(self,
                 length_batch,
                 features_batch,
                 labels_batch):
        self.labels_flat = tf.reshape(labels_batch, [-1])
        self.labels_one_hot = tf.one_hot(labels_batch, 26)
        self.labels_one_hot_flat = tf.reshape(self.labels_one_hot, [-1, 26])

        self.lstm = tf.nn.rnn_cell.BasicLSTMCell(128)
        self.lstm_outputs, _ = tf.nn.dynamic_rnn(
            self.lstm, features_batch, sequence_length=length_batch, time_major=False, dtype=tf.float32)
        self.flat_lstm_outputs = tf.reshape(self.lstm_outputs, [-1, 128])
        self.outputs = tflearn.fully_connected(self.flat_lstm_outputs, 26)

        # mask out padding
        self.losses = tf.nn.softmax_cross_entropy_with_logits(self.outputs, self.labels_one_hot_flat)
        self.mask = tf.to_float(tf.sign(self.labels_flat))
        self.masked_losses = self.mask * self.losses
        self.mean_loss = tf.reduce_sum(self.masked_losses / tf.reduce_sum(self.mask))

        self.predictions = tf.argmax(self.outputs, 1)
        self.accurate = tf.equal(self.predictions, self.labels_flat)
        self.accuracy = tf.reduce_sum(tf.to_float(self.accurate) * self.mask) / tf.reduce_sum(self.mask)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.mean_loss, tvars), 5.0)

        self.train = tf.train.GradientDescentOptimizer(0.1).apply_gradients(zip(grads, tvars))
示例#27
0
 def configurable_params_turn_on(self, args, options):
     offset = float(options["offset"]) or 0.0
     if "random" in args:
         onvalue = float(options["onvalue"]) or 1.0
         n = tf.random_uniform([1], minval=-1, maxval=1)
         n += tf.constant(offset, dtype=tf.float32)
         return (tf.sign(n) + 1) /2 * tf.constant(float(options["onvalue"], dtype=tf.float32))
示例#28
0
def NTanh(x,
          use_noise,
          alpha=1.05,
          c=0.5, half_normal=False):
    """
    Noisy Hard Tanh Units: NAN without learning p
    ----------------------------------------------------
    Arguments:
        x: tensorflow tensor variable, input of the function.
        use_noise: bool, whether to add noise or not to the activations, this is in particular
        useful for the test time, in order to disable the noise injection.
        c: float, standard deviation of the noise
        alpha: the leaking rate from the linearized function to the nonlinear one.
    """


    threshold = 1.0
    signs = tf.sign(x)
    delta = tf.abs(x) - threshold

    scale = c * (tf.sigmoid(delta**2) - 0.5)**2
    if alpha > 1.0 and  half_normal:
           scale *= -1.0
    zeros=tf.zeros(tf.shape(x), dtype=tf.float32, name=None)
    def noise_func() :return tf.abs(tf.random_normal(tf.shape(x), mean=0.0, stddev=1.0, dtype=tf.float32))
    def zero_func (): return zeros+ 0.797  if half_normal   else zeros
    noise=tf.cond(use_noise,noise_func,zero_func)

    eps = scale * noise + alpha * delta
    z = x - signs * eps
    test=tf.cast(tf.greater_equal(tf.abs(x) , threshold),tf.float32)
    res = test * z + (1. - test) *  HardTanh(x)


    return res
示例#29
0
        def __graph__():
            """Building the inference graph"""

            with tf.name_scope('input'):
                # [BATCH_SIZE, NUM_FEATURES]
                x_input = tf.placeholder(dtype=tf.float32, shape=[None, self.num_features], name='x_input')

                # [BATCH_SIZE]
                y_input = tf.placeholder(dtype=tf.uint8, shape=[None], name='y_input')

                # [BATCH_SIZE, NUM_CLASSES]
                y_onehot = tf.one_hot(indices=y_input, depth=self.num_classes, on_value=1, off_value=-1,
                                      name='y_onehot')

            learning_rate = tf.placeholder(dtype=tf.float32, name='learning_rate')

            with tf.name_scope('training_ops'):
                with tf.name_scope('weights'):
                    weight = tf.get_variable(name='weights',
                                             initializer=tf.random_normal([self.num_features, self.num_classes],
                                                                          stddev=0.01))
                    self.variable_summaries(weight)
                with tf.name_scope('biases'):
                    bias = tf.get_variable(name='biases', initializer=tf.constant([0.1], shape=[self.num_classes]))
                    self.variable_summaries(bias)
                with tf.name_scope('Wx_plus_b'):
                    output = tf.matmul(x_input, weight) + bias
                    tf.summary.histogram('pre-activations', output)

            with tf.name_scope('svm'):
                regularization = tf.reduce_mean(tf.square(weight))
                hinge_loss = tf.reduce_mean(tf.square(tf.maximum(tf.zeros([self.batch_size, self.num_classes]),
                                                                 1 - tf.cast(y_onehot, tf.float32) * output)))
                with tf.name_scope('loss'):
                    loss = regularization + self.svm_c * hinge_loss
            tf.summary.scalar('loss', loss)

            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

            with tf.name_scope('accuracy'):
                predicted_class = tf.sign(output)
                predicted_class = tf.identity(predicted_class, name='prediction')
                with tf.name_scope('correct_prediction'):
                    correct = tf.equal(tf.argmax(predicted_class, 1), tf.argmax(y_onehot, 1))
                with tf.name_scope('accuracy'):
                    accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
            tf.summary.scalar('accuracy', accuracy)

            merged = tf.summary.merge_all()

            self.x_input = x_input
            self.y_input = y_input
            self.y_onehot = y_onehot
            self.learning_rate = learning_rate
            self.loss = loss
            self.optimizer = optimizer
            self.output = output
            self.predicted_class = predicted_class
            self.accuracy = accuracy
            self.merged = merged
def create_model(bert_config, is_training, input_ids, input_mask,
                 segment_ids, labels, num_labels, use_one_hot_embeddings):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings
    )
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value

    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

    blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell, num_layers=FLAGS.num_layers,
                          droupout_rate=FLAGS.droupout_rate, initializers=initializers, num_labels=num_labels,
                          seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer()
    return rst
示例#31
0
 def f_epsilon(self, x):
     return tf.sign(x) * tf.sqrt(tf.abs(x))
示例#32
0
def train():
    # load data
    data, labels = load_data.extract_data('linearly_separable_data.csv')
    # creating testing and training set
    X_train, X_test, Y_train, Y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.25)

    train_data_node = tf.placeholder(tf.float32, shape=(None, 2))
    train_label_node = tf.placeholder(tf.float32, shape=(None, 1))

    # weight
    W = tf.Variable(tf.random_uniform([2, 1], -1.0, 1.0), name="W")
    b = tf.Variable(tf.zeros([1]))
    # y_value = [batch_size,1]
    y_value = tf.matmul(train_data_node, W) + b
    weight_loss = 0.5 * tf.reduce_sum(tf.square(W))
    hinge_loss = tf.reduce_sum(
        tf.maximum(tf.zeros([BATCH_SIZE, 1]), 1 - train_label_node * y_value))
    svm_loss = weight_loss + svmC * hinge_loss
    # for test
    hinge_loss_test = tf.reduce_sum(
        tf.maximum(tf.zeros([Test_Size, 1]), 1 - train_label_node * y_value))
    svm_loss_test = weight_loss + svmC * hinge_loss_test
    # train
    global_step = tf.Variable(0, name="global_step", trainable=False)
    optimizer = tf.train.AdamOptimizer(1e-3)
    grads_and_vars = optimizer.compute_gradients(svm_loss)
    train_op = optimizer.apply_gradients(grads_and_vars,
                                         global_step=global_step)

    # evaluation
    predicted_class = tf.sign(y_value)
    correct_prediction = tf.equal(train_label_node, predicted_class)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # runing the training
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        print('Initialized!')
        # generate batches
        batches = load_data.batch_iter(list(zip(X_train, Y_train)), BATCH_SIZE,
                                       NUM_EPOCHS)
        # batch count
        batch_count = 0
        epoch = 1
        print("Epoch " + str(epoch) + ":")
        for batch in batches:
            batch_count += 1
            # train process
            x_batch, y_batch = zip(*batch)
            feed_dict = {train_data_node: x_batch, train_label_node: y_batch}
            _, step, losses = sess.run([train_op, global_step, svm_loss],
                                       feed_dict=feed_dict)
            # test process
            if (batch_count * BATCH_SIZE) % Train_Size == 0:
                epoch += 1
                print("Epoch " + str(epoch) + ":")
            if batch_count % EVAL_FREQUENCY == 0:
                feed_dict = {train_data_node: X_test, train_label_node: Y_test}
                step, losses, acc = sess.run(
                    [global_step, svm_loss_test, accuracy],
                    feed_dict=feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(
                    time_str, step, losses, acc))
示例#33
0
commmon math functions
'''
tf.abs()
tf.ceil()
tf.cos()
tf.exp()
tf.floor()
tf.inv()
tf.log()
tf.maximum()
tf.minimum()
#tf.neg()
tf.pow()
tf.round()
tf.rsqrt()
tf.sign()
tf.sin()
tf.sqrt()
tf.square()
'''
special math functions
'''
tf.digamma()
tf.erf()
tf.erfc()
tf.igamma()
tf.igammac()
tf.lbeta()
tf.lgamma()
tf.squared_difference()
'''
# Declare vector L2 'norm' function squared
l2_norm = tf.reduce_sum(tf.square(A))

# Declare loss function
# Loss = max(0, 1-pred*actual) + alpha * L2_norm(A)^2
# L2 regularization parameter, alpha
alpha = tf.constant([0.01])
# Margin term in loss
classification_term = tf.reduce_mean(
    tf.maximum(0., tf.subtract(1., tf.multiply(model_output, y_target))))
# Put terms together
loss = tf.add(classification_term, tf.multiply(alpha, l2_norm))

# Declare prediction function
prediction = tf.sign(model_output)
accuracy = tf.reduce_mean(tf.cast(tf.equal(prediction, y_target), tf.float32))

# Declare optimizer
my_opt = tf.train.GradientDescentOptimizer(0.01)
train_step = my_opt.minimize(loss)

# Initialize variables
init = tf.global_variables_initializer()
sess.run(init)

# Training loop
loss_vec = []
train_accuracy = []
test_accuracy = []
for i in range(500):
示例#35
0
    def __init__(self):
        path = remote_helper.get_remote_date(
            "https://www.flyai.com/m/uncased_L-24_H-1024_A-16.zip")
        data_root = os.path.splitext(path)[0]
        bert_config_file = os.path.join(data_root, 'bert_config.json')
        bert_config = modeling.BertConfig.from_json_file(bert_config_file)
        init_checkpoint = os.path.join(data_root, 'bert_model.ckpt')
        bert_vocab_file = os.path.join(data_root, 'vocab.txt')

        self.input_ids = tf.placeholder(tf.int32,
                                        shape=[None, None],
                                        name='input_ids')
        self.input_mask = tf.placeholder(tf.int32,
                                         shape=[None, None],
                                         name='input_masks')
        self.segment_ids = tf.placeholder(tf.int32,
                                          shape=[None, None],
                                          name='segment_ids')
        self.labels = tf.placeholder(tf.int32, shape=[
            None,
        ], name="labels")

        self.is_training = tf.placeholder_with_default(False,
                                                       shape=(),
                                                       name='is_training')
        self.learning_rate = tf.placeholder_with_default(config.learning_rate,
                                                         shape=(),
                                                         name='learning_rate')

        # 创建bert模型
        with tf.name_scope('Bert'):
            model = modeling.BertModel(
                config=bert_config,
                is_training=True,
                input_ids=self.input_ids,
                input_mask=self.input_mask,
                token_type_ids=self.segment_ids,
                # 这里如果使用TPU 设置为True,速度会快些。使用CPU 或GPU 设置为False ,速度会快些。
                use_one_hot_embeddings=False)
            # 这个获取每个token的output 输入数据[batch_size, seq_length, embedding_size] 如果做seq2seq 或者ner 用这个
            # output_layer = model.get_sequence_output()
            tvars = tf.trainable_variables()
            # 加载BERT模型
            (assignment_map, initialized_variable_names) = \
                modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            # output_layer = model.get_pooled_output()  # 这个获取句子的output
            # hidden_size = output_layer.shape[-1].value  # 获取输出的维度
            embedding = model.get_sequence_output()
            max_seq_length = embedding.shape[1].value

        used = tf.sign(tf.abs(self.input_ids))
        lengths = tf.reduce_sum(
            used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

        blstm_crf = BiLstmCrf(embedded_chars=embedding,
                              max_seq_length=max_seq_length,
                              labels=self.labels,
                              lengths=lengths,
                              is_training=self.is_training)

        self.loss, logits, trans, pred_ids = blstm_crf.add_blstm_crf_layer()

        with tf.variable_scope("predict"):
            self.pred = tf.Variable(pred_ids, name='pred')

        with tf.name_scope("train_op"):
            self.train_op = tf.train.AdamOptimizer(
                learning_rate=model_config.learning_rate).minimize(self.loss)
示例#36
0
 def accuracy(labels, predictions, weights):
   predictions = tf.nn.relu(tf.sign(predictions))
   return tf.metrics.accuracy(labels, predictions, weights)
示例#37
0
    def __init__(self, config):
        print(config)
        self.config = config
        self.lr = config["lr"]
        self.char_dim = config["char_dim"]
        self.lstm_dim = config["lstm_dim"]
        self.seg_dim = config["seg_dim"]
        self.subtype_dim = config["subtype_dim"]
        self.num_tags = config["num_tags"]
        self.num_chars = config["num_char"]
        self.num_steps = config["num_steps"]
        self.num_segs = 14
        self.num_subtypes = 51
        self.seq_nums = 8

        self.global_step = tf.Variable(0, trainable=False)
        self.best_dev_f1 = tf.Variable(0.0, trainable=False)
        self.best_test_f1 = tf.Variable(0.0, trainable=False)
        self.initializer = initializers.xavier_initializer()

        self.char_inputs = tf.placeholder(dtype=tf.int32,
                                          shape=[None, None],
                                          name="ChatInputs")
        self.seg_inputs = tf.placeholder(dtype=tf.int32,
                                         shape=[None, None],
                                         name="SegInputs")
        self.subtype_inputs = tf.placeholder(dtype=tf.int32,
                                             shape=[None, None],
                                             name="SubInputs")
        self.targets = tf.placeholder(dtype=tf.int32,
                                      shape=[None, None],
                                      name="Targets")

        self.doc_inputs = tf.placeholder(dtype=tf.int32,
                                         shape=[None, None, self.num_steps],
                                         name="doc_inputs")
        self.dropout = tf.placeholder(dtype=tf.float32, name="Dropout")
        self.char_lookup = tf.get_variable(
            name="char_embedding",
            shape=[self.num_chars, self.char_dim],
            initializer=self.initializer)

        used = tf.sign(tf.abs(self.char_inputs))
        length = tf.reduce_sum(used, reduction_indices=1)
        self.lengths = tf.cast(length, tf.int32)
        self.batch_size = tf.shape(self.char_inputs)[0]

        embedding = self.embedding_layer(self.char_inputs, self.seg_inputs,
                                         self.subtype_inputs, config)

        doc_embedding = self.doc_embedding_layer(self.doc_inputs,
                                                 self.lstm_dim, self.lengths,
                                                 config)

        lstm_inputs = tf.nn.dropout(embedding, self.dropout)

        lstm_outputs, lstm_states = self.biLSTM_layer(lstm_inputs,
                                                      self.lstm_dim,
                                                      self.lengths)
        lstm_outputs = tf.nn.dropout(lstm_outputs, self.dropout)

        sen_att_outputs = self.attention(lstm_outputs)

        doc_att_outputs = self.doc_attention(doc_embedding, lstm_states)

        gat_output = self.gate(sen_att_outputs, doc_att_outputs)

        outputs = tf.concat([embedding, gat_output], -1)
        lstm_outputs = self.LSTM_decoder(outputs, self.lstm_dim)

        # lstm_outputs = self.tag_attention(lstm_outputs)
        self.logits = self.project_layer(lstm_outputs)

        self.loss = self.loss_layer(self.logits, self.lengths)

        with tf.variable_scope("optimizer"):
            optimizer = self.config["optimizer"]
            if optimizer == "sgd":
                self.opt = tf.train.GradientDescentOptimizer(self.lr)
            elif optimizer == "adam":
                self.opt = tf.train.AdamOptimizer(self.lr)
            elif optimizer == "adgrad":
                self.opt = tf.train.AdagradOptimizer(self.lr)
            else:
                raise KeyError

            grads_vars = self.opt.compute_gradients(self.loss)
            capped_grads_vars = [[
                tf.clip_by_value(g, -self.config["clip"], self.config["clip"]),
                v
            ] for g, v in grads_vars]
            self.train_op = self.opt.apply_gradients(capped_grads_vars,
                                                     self.global_step)

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
示例#38
0
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=5)

model.evaluate(x_test, y_test, verbose=2)

loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
labels = tf.one_hot(y_test, 10)
loss(model(x_test), labels)

x = tf.convert_to_tensor(x_test)
labels = tf.one_hot(y_test, 10)
with tf.GradientTape() as tape:
    tape.watch(x)
    prediction = model(x)
    loss = loss(labels, prediction)
grad = tape.gradient(loss, x)

adv_x = x + 0.05 * tf.sign(grad)
model.evaluate(adv_x, y_test, verbose=2)
示例#39
0
def train(discriminator, data, test_data, config):
    tf.set_random_seed(int(config['random_seed']))

    batch_size = config['batch_size']
    epsilon = config['epsilon']  # perturbation error
    class_num = config['class_num']  # number of output classes
    pgd_iter = config['pgd_iter']
    learning_rate = config['learning_rate']
    weight_decay = config['weight_decay']

    x_real = data[0]
    label = data[1]

    # Normalize to range [-1,1]
    x_real = 2. * x_real - 1.

    step_size = epsilon * 0.25

    x = x_real + tf.random_uniform(x_real.shape, -epsilon, epsilon)
    for i in range(pgd_iter):
        d_out = discriminator(x)
        d_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=d_out,
                                                    labels=tf.one_hot(
                                                        label, class_num)))

        grad_x, = tf.gradients(d_loss, x)
        x = tf.stop_gradient(x + step_size * tf.sign(grad_x))
        x = tf.clip_by_value(x, x_real - epsilon, x_real + epsilon)
        x = tf.clip_by_value(x, -1.0, 1.0)

    d_out_adv = discriminator(x)

    d_loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=d_out_adv,
                                                labels=tf.one_hot(
                                                    label, class_num)))

    d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                               scope='discriminator')

    # Weight decay: assume weights are named 'kernel' or 'weights'
    d_decay = weight_decay * 0.5 * sum(
        tf.reduce_sum(tf.square(v)) for v in d_vars
        if (v.name.find('kernel') > 0 or v.name.find('weights') > 0))

    # SGD optimizer with different step sizes
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                           momentum=0.9)
    optimizer2 = tf.train.MomentumOptimizer(learning_rate=learning_rate * 0.1,
                                            momentum=0.9)

    d_grads = tf.gradients(d_loss + d_decay, d_vars)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.apply_gradients(zip(d_grads, d_vars))

    # Gradient norm to evaluate convergence
    d_reg = 0.5 * sum(tf.reduce_sum(tf.square(g)) for g in d_grads)

    # build test
    acc, acc_update, acc_init = build_test(discriminator, test_data, config)
    acc_fgs, acc_update_fgs, acc_init_fgs = build_test_fgs(
        discriminator, test_data, config)
    acc_pgd, acc_update_pgd, acc_init_pgd = build_test_pgd(
        discriminator, test_data, config)

    saver = tf.train.Saver(
        tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                          scope='discriminator'))

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        train_size = config['train_size']
        num_steps_per_epoch = int(train_size / batch_size) + 1

        for batch_idx in range(config['nsteps']):
            d_loss_out, d_reg_out, _ = sess.run([d_loss, d_reg, train_op])

            if batch_idx % num_steps_per_epoch == 0:
                test_acc = run_test(acc, acc_update, acc_init, sess, config)
                test_acc_fgs = run_test(acc_fgs, acc_update_fgs, acc_init_fgs,
                                        sess, config)
                test_acc_pgd = run_test(acc_pgd, acc_update_pgd, acc_init_pgd,
                                        sess, config)

                print(
                    'i=%d, Loss_d: %4.4f, test_acc: %.4f, fgs_acc: %.4f pgd_acc: %.4f d_reg: %.4f'
                    % (batch_idx, d_loss_out, test_acc, test_acc_fgs,
                       test_acc_pgd, d_reg_out))

        model_filename = config['model_file']
        saver.save(sess, model_filename)
示例#40
0
文件: rbm.py 项目: peterleong/RBM
 def bernoulli(self, p):
     return tf.nn.relu(tf.sign(p - tf.random_uniform(p.shape)))
 def __length(sequence):
     used = tf.sign(tf.reduce_max(tf.abs(sequence), 2))
     length = tf.reduce_sum(used, 1)
     length = tf.cast(length, tf.int32)
     return length
示例#42
0
def Transformer_match(context,
                      query,
                      context_mask,
                      query_mask,
                      num_units=None,
                      num_heads=1,
                      dropout_keep_rate=1.0,
                      causality=False,
                      scope='MultiHead_Attention_Block',
                      reuse=None,
                      residual=False,
                      normalize_output=False,
                      **kwargs):
    """Applies multihead attention.

    Args:
      context: A 3d tensor with shape of [N, T_q, C_q].
      query: A 3d tensor with shape of [N, T_k, C_k].
      num_units: A scalar. Attention size.
      dropout_rate: A floating point number.
      is_training: Boolean. Controller of mechanism for dropout.
      causality: Boolean. If true, units that reference the future are masked.
      num_heads: An int. Number of heads.
      scope: Optional scope for `variable_scope`.
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.

    Returns
      A 3d tensor with shape of (N, T_q, C)
    """
    if num_units is None or residual:
        num_units = context.get_shape().as_list()[-1]
    with tf.variable_scope(scope, reuse=reuse):
        # Set the fall back option for num_units

        # Linear projections
        Q = tf.layers.dense(context, num_units,
                            activation=tf.nn.relu)  # (N, T_q, C)
        K = tf.layers.dense(query, num_units,
                            activation=tf.nn.relu)  # (N, T_k, C)
        V = tf.layers.dense(query, num_units,
                            activation=tf.nn.relu)  # (N, T_k, C)

        # Split and concat
        Q_ = tf.concat(tf.split(Q, num_heads, axis=2),
                       axis=0)  # (h*N, T_q, C/h)
        K_ = tf.concat(tf.split(K, num_heads, axis=2),
                       axis=0)  # (h*N, T_k, C/h)
        V_ = tf.concat(tf.split(V, num_heads, axis=2),
                       axis=0)  # (h*N, T_k, C/h)

        # Multiplication
        outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))  # (h*N, T_q, T_k)

        # Scale
        outputs = outputs / (K_.get_shape().as_list()[-1]**0.5)

        # Key Masking, aka query
        if query_mask is None:
            query_mask = tf.sign(tf.abs(tf.reduce_sum(query,
                                                      axis=-1)))  # (N, T_k)

        mask1 = tf.tile(query_mask, [num_heads, 1])  # (h*N, T_k)
        mask1 = tf.tile(tf.expand_dims(mask1, 1),
                        [1, tf.shape(context)[1], 1])  # (h*N, T_q, T_k)

        paddings = tf.ones_like(outputs) * (-2**32 + 1)
        outputs = tf.where(tf.equal(mask1, 0), paddings,
                           outputs)  # (h*N, T_q, T_k)

        # Causality = Future blinding
        if causality:
            diag_vals = tf.ones_like(outputs[0, :, :])  # (T_q, T_k)
            tril = tf.contrib.linalg.LinearOperatorLowerTriangular(
                diag_vals).to_dense()  # (T_q, T_k)
            masks = tf.tile(tf.expand_dims(tril, 0),
                            [tf.shape(outputs)[0], 1, 1])  # (h*N, T_q, T_k)

            paddings = tf.ones_like(masks) * (-2**32 + 1)
            outputs = tf.where(tf.equal(masks, 0), paddings,
                               outputs)  # (h*N, T_q, T_k)

        # Activation
        outputs = tf.nn.softmax(outputs)  # (h*N, T_q, T_k)

        # Query Masking  aka, context
        if context_mask is None:
            context_mask = tf.sign(tf.abs(tf.reduce_sum(context,
                                                        axis=-1)))  # (N, T_q)

        mask2 = tf.tile(context_mask, [num_heads, 1])  # (h*N, T_q)
        mask2 = tf.tile(tf.expand_dims(mask2, -1),
                        [1, 1, tf.shape(query)[1]])  # (h*N, T_q, T_k)
        outputs *= mask2  # (h*N, T_q, T_k)

        # Dropouts
        outputs = tf.nn.dropout(outputs, keep_prob=dropout_keep_rate)

        # Weighted sum
        outputs = tf.matmul(outputs, V_)  # ( h*N, T_q, C/h)

        # Restore shape
        outputs = tf.concat(tf.split(outputs, num_heads, axis=0),
                            axis=2)  # (N, T_q, C)

        if residual:
            # Residual connection
            outputs += context

        if normalize_output:
            # Normalize
            outputs = layer_norm(outputs)  # (N, T_q, C)

    return outputs
示例#43
0
 def _length(self):
     mask = tf.sign(tf.reduce_max(tf.abs(self._x), 2))
     length = tf.reduce_sum(mask, 1)
     length = tf.cast(length, tf.int32)
     return mask, length
示例#44
0
 def sample_prob(self, probs):
     # 随机采样
     return tf.nn.relu(tf.sign(probs - tf.random_uniform(tf.shape(probs))))
示例#45
0
 def _length(seq):
     relevant = tf.sign(tf.abs(seq))
     length = tf.reduce_sum(relevant, reduction_indices=1)
     length = tf.cast(length, tf.int32)
     return length
示例#46
0
    #Check to see if we finished adding in the amount of users for training
    if amountOfUsedUsers == 0:
        break
    amountOfUsedUsers -= 1

hiddenUnits = 20
visibleUnits = len(movies_df)
vb = tf.placeholder("float", [visibleUnits])  #Number of unique movies
hb = tf.placeholder("float",
                    [hiddenUnits])  #Number of features we're going to learn
W = tf.placeholder("float", [visibleUnits, hiddenUnits])

#Phase 1: Input Processing
v0 = tf.placeholder("float", [None, visibleUnits])
_h0 = tf.nn.sigmoid(tf.matmul(v0, W) + hb)
h0 = tf.nn.relu(tf.sign(_h0 - tf.random_uniform(tf.shape(_h0))))
#Phase 2: Reconstruction
_v1 = tf.nn.sigmoid(tf.matmul(h0, tf.transpose(W)) + vb)
v1 = tf.nn.relu(tf.sign(_v1 - tf.random_uniform(tf.shape(_v1))))
h1 = tf.nn.sigmoid(tf.matmul(v1, W) + hb)

#Learning rate
alpha = 1.0
#Create the gradients
w_pos_grad = tf.matmul(tf.transpose(v0), h0)
w_neg_grad = tf.matmul(tf.transpose(v1), h1)
#Calculate the Contrastive Divergence to maximize
CD = (w_pos_grad - w_neg_grad) / tf.to_float(tf.shape(v0)[0])
#Create methods to update the weights and biases
update_w = W + alpha * CD
update_vb = vb + alpha * tf.reduce_mean(v0 - v1, 0)
示例#47
0
def multihead_attention(queries,
                        keys,
                        num_units=None,
                        num_heads=8,
                        dropout_rate=0,
                        is_training=True,
                        causality=False,
                        scope="multihead_attention",
                        reuse=None):
    '''Applies multihead attention.

    Args:
      queries: A 3d tensor with shape of [N, T_q, C_q].
      keys: A 3d tensor with shape of [N, T_k, C_k].
      num_units: A scalar. Attention size.
      dropout_rate: A floating point number.
      is_training: Boolean. Controller of mechanism for dropout.
      causality: Boolean. If true, units that reference the future are masked.
      num_heads: An int. Number of heads.
      scope: Optional scope for `variable_scope`.
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.

    Returns
      A 3d tensor with shape of (N, T_q, C)
    '''
    with tf.variable_scope(scope, reuse=reuse):
        # Set the fall back option for num_units
        if num_units is None:
            num_units = queries.get_shape().as_list[-1]

        # Linear projections
        Q = tf.layers.dense(queries, num_units,
                            activation=tf.nn.relu)  # (N, T_q, C)
        K = tf.layers.dense(keys, num_units,
                            activation=tf.nn.relu)  # (N, T_k, C)
        V = tf.layers.dense(keys, num_units,
                            activation=tf.nn.relu)  # (N, T_k, C)

        # Split and concat
        Q_ = tf.concat(tf.split(Q, num_heads, axis=2),
                       axis=0)  # (h*N, T_q, C/h)
        K_ = tf.concat(tf.split(K, num_heads, axis=2),
                       axis=0)  # (h*N, T_k, C/h)
        V_ = tf.concat(tf.split(V, num_heads, axis=2),
                       axis=0)  # (h*N, T_k, C/h)

        # Multiplication
        outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))  # (h*N, T_q, T_k)

        # Scale
        outputs = outputs / (K_.get_shape().as_list()[-1]**0.5)

        # Key Masking
        key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1)))  # (N, T_k)
        key_masks = tf.tile(key_masks, [num_heads, 1])  # (h*N, T_k)
        key_masks = tf.tile(tf.expand_dims(key_masks, 1),
                            [1, tf.shape(queries)[1], 1])  # (h*N, T_q, T_k)

        paddings = tf.ones_like(outputs) * (-2**32 + 1)
        outputs = tf.where(tf.equal(key_masks, 0), paddings,
                           outputs)  # (h*N, T_q, T_k)

        # Causality = Future blinding
        if causality:
            diag_vals = tf.ones_like(outputs[0, :, :])  # (T_q, T_k)
            tril = tf.contrib.linalg.LinearOperatorTriL(
                diag_vals).to_dense()  # (T_q, T_k)
            masks = tf.tile(tf.expand_dims(tril, 0),
                            [tf.shape(outputs)[0], 1, 1])  # (h*N, T_q, T_k)

            paddings = tf.ones_like(masks) * (-2**32 + 1)
            outputs = tf.where(tf.equal(masks, 0), paddings,
                               outputs)  # (h*N, T_q, T_k)

        # Activation
        outputs = tf.nn.softmax(outputs)  # (h*N, T_q, T_k)

        # Query Masking
        query_masks = tf.sign(tf.abs(tf.reduce_sum(queries,
                                                   axis=-1)))  # (N, T_q)
        query_masks = tf.tile(query_masks, [num_heads, 1])  # (h*N, T_q)
        query_masks = tf.tile(tf.expand_dims(query_masks, -1),
                              [1, 1, tf.shape(keys)[1]])  # (h*N, T_q, T_k)
        outputs *= query_masks  # broadcasting. (N, T_q, C)
        # Alignments
        alignments = tf.transpose(outputs, [0, 2, 1])
        # Dropouts
        outputs = tf.layers.dropout(outputs,
                                    rate=dropout_rate,
                                    training=tf.convert_to_tensor(is_training))

        # Weighted sum
        outputs = tf.matmul(outputs, V_)  # ( h*N, T_q, C/h)

        # Restore shape
        outputs = tf.concat(tf.split(outputs, num_heads, axis=0),
                            axis=2)  # (N, T_q, C)

        # Residual connection
        outputs += queries

        # Normalize
        outputs = normalize(outputs)  # (N, T_q, C)

    return outputs, alignments