示例#1
0
def conv2d(x,
           name,
           filter_size,
           in_channels,
           out_channels,
           strides,
           bias=True):
    """2D convolution."""
    with tf.variable_scope(name):
        kernel = tf.get_variable(
            name='DW',
            shape=[filter_size[0], filter_size[1], in_channels, out_channels],
            dtype=tf.float32,
            initializer=tf.initializers.glorot_uniform())
        if bias:
            b = tf.get_variable(name='bias',
                                shape=[out_channels],
                                dtype=tf.float32,
                                initializer=tf.constant_initializer(0.0))
        out = tf.nn.conv2d(x,
                           kernel, [1, strides[0], strides[1], 1],
                           padding='SAME')
        if bias:
            out = tf.nn.bias_add(out, b)
        return out
示例#2
0
def attention(inputs, attention_size, time_major=False, return_alphas=False):
    """Attention layer."""
    if isinstance(inputs, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
        inputs = tf.concat(inputs, 2)

    if time_major:
        # (T,B,D) => (B,T,D)
        inputs = tf.transpose(inputs, [1, 0, 2])

    time_size = inputs.shape[1].value  # T value - time size of the RNN layer
    hidden_size = inputs.shape[
        2].value  # D value - hidden size of the RNN layer

    # Trainable parameters
    W_omega = tf.get_variable(name='W_omega',
                              initializer=tf.random_normal(
                                  [hidden_size, attention_size], stddev=0.1))
    b_omega = tf.get_variable(name='b_omega',
                              initializer=tf.random_normal([attention_size],
                                                           stddev=0.1))
    u_omega = tf.get_variable(name='u_omega',
                              initializer=tf.random_normal([attention_size, 1],
                                                           stddev=0.1))

    # Applying fully connected layer with non-linear activation to each of the B*T timestamps;
    #  the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size
    #v = tf.tanh(tf.tensordot(inputs, W_omega, axes=1) + b_omega)
    #v = tf.sigmoid(tf.tensordot(inputs, W_omega, axes=1) + b_omega)
    # (B, T, D) dot (D, Atten)

    logging.info('attention inputs: {}'.format(inputs.shape))
    inputs_reshaped = tf.reshape(inputs, [-1, hidden_size])
    dot = tf.matmul(inputs_reshaped, W_omega)
    dot = tf.reshape(dot, [-1, time_size, attention_size])
    v = tf.sigmoid(dot + b_omega)
    logging.info(f'attention vector: {v.shape}')
    # For each of the timestamps its vector of size A from `v` is reduced with `u` vector
    # (B, T, Atten) dot (Atten)
    #vu = tf.tensordot(v, u_omega, axes=1)   # (B,T) shape
    v = tf.reshape(v, [-1, attention_size])
    vu = tf.matmul(v, u_omega)  # (B,T) shape
    vu = tf.squeeze(vu, axis=-1)
    vu = tf.reshape(vu, [-1, time_size])
    logging.info(f'attention energe: {vu.shape}')
    alphas = tf.nn.softmax(vu)  # (B,T) shape also

    # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
    # [batch, time] -> [batch, time, 1]
    alphas = tf.expand_dims(alphas, -1)
    # [batch, time, dim] -> [batch, dim]
    output = tf.reduce_sum(inputs * alphas, 1)

    if not return_alphas:
        return output

    return output, alphas
示例#3
0
def tdnn(x,
         name,
         in_dim,
         context,
         out_dim,
         has_bias=True,
         method='splice_layer'):
    '''
  TDNN implementation.

  Args:
    context:
      a int of left and right context, or
      a list of context indexes, e.g. (-2, 0, 2).
    method:
      splice_layer: use column-first patch-based copy.
      splice_op: use row-first while_loop copy.
      conv1d: use conv1d as TDNN equivalence.
  '''
    if hasattr(context, '__iter__'):
        context_size = len(context)
        if method in ('splice_op', 'conv1d'):
            msg = 'Method splice_op and conv1d does not support context list.'
            raise ValueError(msg)
        context_list = context
    else:
        context_size = context * 2 + 1
        context_list = range(-context, context + 1)
    with tf.variable_scope(name):
        if method == 'splice_layer':
            x = splice_layer(x, 'splice', context_list)
            x = linear(x,
                       'linear', [in_dim * context_size, out_dim],
                       has_bias=has_bias)
        elif method == 'splice_op':
            x = speech_ops.splice(x, context, context)
            x = linear(x,
                       'linear', [in_dim * context_size, out_dim],
                       has_bias=has_bias)
        elif method == 'conv1d':
            kernel = tf.get_variable(
                name='DW',
                shape=[context, in_dim, out_dim],
                dtype=tf.float32,
                initializer=tf.glorot_uniform_initializer())
            x = tf.nn.conv1d(x, kernel, stride=1, padding='SAME')
            if has_bias:
                b = tf.get_variable(name='bias',
                                    shape=[out_dim],
                                    dtype=tf.float32,
                                    initializer=tf.constant_initializer(0.0))
                x = tf.nn.bias_add(x, b)
        else:
            raise ValueError('Unsupported method: %s.' % (method))
        return x
示例#4
0
def linear(x, names, shapes, has_bias=True):
    """Linear Layer."""
    assert len(shapes) == 2
    with tf.variable_scope(names):
        weights = tf.get_variable(name='weights',
                                  shape=shapes,
                                  initializer=tf.initializers.glorot_uniform())
        if has_bias:
            bias = tf.get_variable(
                name='bias',
                shape=shapes[1],
                initializer=tf.initializers.glorot_uniform())
            return tf.matmul(x, weights) + bias
        else:
            return tf.matmul(x, weights)
示例#5
0
文件: sub_tf.py 项目: zhjou/delta
    def get_pos_embedding_matrix(max_len, embed_dim, use_const, name):
        """
    generate position embedding matrix, two optional types:
    constant(untrainable) and trainable.
    Args:
      max_len, embed_dim, use_const

    Return:
      pos_embed: [max_len, embed_dim]
    """
        # First part of the PE function: sin and cos argument
        if use_const:
            pos_embed = np.array([[
                pos / np.power(10000, (i - i % 2) / embed_dim)
                for i in range(embed_dim)
            ] for pos in range(max_len)])

            # Second part, apply the cosine to even columns and sin to odds.
            pos_embed[:, 0::2] = np.sin(pos_embed[:, 0::2])  # dim 2i
            pos_embed[:, 1::2] = np.cos(pos_embed[:, 1::2])  # dim 2i+1
            pos_embed = pos_embed[np.newaxis, ...]
            pos_embed = tf.cast(pos_embed, dtype=tf.float32)
        else:
            pos_embed = tf.get_variable(
                name=name,
                shape=[max_len, embed_dim],
                initializer=tf.random_uniform_initializer(-0.1, 0.1))
            pos_embed = tf.expand_dims(pos_embed, 0)

        return pos_embed
def embedding_look_up(text_inputs, vocab_size, embedding_size):
  """Embedding layer."""
  with tf.variable_scope("embedding"):
    W = tf.get_variable(
        name='W',
        initializer=tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))
    embedding_chars = tf.nn.embedding_lookup(W, text_inputs)
    embedding_chars_expanded = tf.expand_dims(embedding_chars, -1)
  return embedding_chars_expanded
示例#7
0
 def prelu_layer(self, x, name, num_parameters=1, init=0.25):
     if num_parameters == 1:
         shape = 1
     else:
         shape = x.get_shape()[-1]
     alpha = tf.get_variable(name,
                             shape=shape,
                             dtype=x.dtype,
                             initializer=tf.constant_initializer(init))
     return tf.maximum(0.0, x) + alpha * tf.minimum(0.0, x)
示例#8
0
def conv_pool(embedded_chars_expanded, filter_sizes, embedding_size,
              num_filters, sequence_length):
    """
    text conv and max pooling to get one-dimension vector to representation of text
    :param filter_sizes:
    :return:
    """
    pooled_outputs = []
    for _, filter_size in enumerate(filter_sizes):
        with tf.variable_scope("conv-maxpool-%s" % filter_size):
            # Convolution Layer
            filter_shape = [filter_size, embedding_size, 1, num_filters]
            W = tf.get_variable(name='W',
                                initializer=tf.truncated_normal(filter_shape,
                                                                stddev=0.1))
            b = tf.get_variable(name='b',
                                initializer=tf.constant(0.1,
                                                        shape=[num_filters]))
            conv = tf.nn.conv2d(embedded_chars_expanded,
                                W,
                                strides=[1, 1, 1, 1],
                                padding="VALID",
                                name="conv")
            # Apply nonlinearity
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            # Maxpooling over the outputs
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, sequence_length - filter_size + 1, 1, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="pool")
            pooled_outputs.append(pooled)
    # Combine all the pooled features
    num_filters_total = num_filters * len(filter_sizes)

    h_pool = tf.concat(pooled_outputs, 3)

    h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
    return h_pool_flat
示例#9
0
    def logits_layer(self, x, labels):
        ''' Logits layer to further produce softmax. '''
        if labels is None:
            # serving export mode, no need for logits
            return x

        output_num = self.taskconf['classes']['num']
        logits_type = self.netconf['logits_type']
        logits_shape = [x.shape[-1].value, output_num]

        with tf.variable_scope('logits'):
            init_type = self.netconf['logits_weight_init']['type']
            if init_type == 'truncated_normal':
                stddev = self.netconf['logits_weight_init']['stddev']
                init = tf.truncated_normal_initializer(stddev=stddev)
            elif init_type == 'xavier_uniform':
                init = tf.contrib.layers.xavier_initializer(uniform=True)
            elif init_type == 'xavier_norm':
                init = tf.contrib.layers.xavier_initializer(uniform=False)
            else:
                raise ValueError('Unsupported weight init type: %s' %
                                 (init_type))

            weights = tf.get_variable(name='weights',
                                      shape=logits_shape,
                                      initializer=init)

            if logits_type == 'linear':
                bias = tf.get_variable(
                    name='bias',
                    shape=logits_shape[1],
                    initializer=tf.constant_initializer(0.0))
                return tf.matmul(x, weights) + bias
            elif logits_type == 'linear_no_bias':
                return tf.matmul(x, weights)
            elif logits_type == 'arcface':
                return self.arcface_layer(x, labels, output_num, weights)
示例#10
0
def arcface_loss(embedding,
                 labels,
                 out_num,
                 weights=None,
                 s=64.,
                 m=0.5,
                 limit_to_pi=True):
    '''
  https://github.com/auroua/InsightFace_TF/blob/master/losses/face_losses.py
  :param embedding: the input embedding vectors
  :param labels:  the input labels, the shape should be eg: (batch_size, 1)
  :param s: scalar value default is 64
  :param out_num: output class num
  :param weights: a tf.variable with shape (embedding.shape[-1], out_num)
                  or None to make a new one internally. default = None
  :param m: the margin value, default is 0.5
  :return: the final cacualted output, this output is send into the tf.nn.softmax directly
  '''
    cos_m = math.cos(m)
    sin_m = math.sin(m)
    mm = sin_m * m  # issue 1
    threshold = math.cos(math.pi - m)
    with tf.variable_scope('arcface_loss'):
        # inputs and weights norm
        embedding_norm = tf.norm(embedding, axis=1, keep_dims=True)
        embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
        if weights is None:
            weights = tf.get_variable(
                name='weights',
                shape=[embedding.shape[-1].value, out_num],
                initializer=tf.initializer.glorot_unifrom())
        weights_norm = tf.norm(weights, axis=0, keep_dims=True)
        weights = tf.div(weights, weights_norm, name='norm_weights')
        # cos(theta+m)
        cos_t = tf.matmul(embedding, weights, name='cos_t')
        cos_t2 = tf.square(cos_t, name='cos_2')
        sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
        sin_t = tf.sqrt(sin_t2, name='sin_t')
        cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m),
                                 tf.multiply(sin_t, sin_m),
                                 name='cos_mt')

        if limit_to_pi:
            # this condition controls the theta+m should in range [0, pi]
            #      0<=theta+m<=pi
            #     -m<=theta<=pi-m
            cond_v = cos_t - threshold
            cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)

            keep_val = s * (cos_t - mm)
            cos_mt_temp = tf.where(cond, cos_mt, keep_val)
        else:
            cos_mt_temp = cos_mt

        mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask')
        # mask = tf.squeeze(mask, 1)
        inv_mask = tf.subtract(1., mask, name='inverse_mask')

        s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t')

        output = tf.add(tf.multiply(s_cos_t, inv_mask),
                        tf.multiply(cos_mt_temp, mask),
                        name='arcface_loss_output')
    return output
示例#11
0
def main(_):
    if FLAGS.checkpoints:
        # Get the checkpoints list from flags and run some basic checks.
        checkpoints = [c.strip() for c in FLAGS.checkpoints.split(",")]
        checkpoints = [c for c in checkpoints if c]
        if not checkpoints:
            raise ValueError("No checkpoints provided for averaging.")
        if FLAGS.prefix:
            checkpoints = [FLAGS.prefix + c for c in checkpoints]
    else:
        assert FLAGS.num_last_checkpoints >= 1, "Must average at least one model"
        assert FLAGS.prefix, ("Prefix must be provided when averaging last"
                              " N checkpoints")
        checkpoint_state = tf.train.get_checkpoint_state(
            os.path.dirname(FLAGS.prefix))
        # Checkpoints are ordered from oldest to newest.
        checkpoints = checkpoint_state.all_model_checkpoint_paths[
            -FLAGS.num_last_checkpoints:]

    checkpoints = [c for c in checkpoints if checkpoint_exists(c)]
    if not checkpoints:
        if FLAGS.checkpoints:
            raise ValueError("None of the provided checkpoints exist. %s" %
                             FLAGS.checkpoints)
        else:
            raise ValueError("Could not find checkpoints at %s" %
                             os.path.dirname(FLAGS.prefix))

    # Read variables from all checkpoints and average them.
    logging.info("Reading variables and averaging checkpoints:")
    for c in checkpoints:
        logging.info("%s ", c)
    var_list = tf.train.list_variables(checkpoints[0])
    var_values, var_dtypes = {}, {}
    for (name, shape) in var_list:
        if not name.startswith("global_step"):
            var_values[name] = np.zeros(shape)
    for checkpoint in checkpoints:
        reader = tf.train.load_checkpoint(checkpoint)
        for name in var_values:
            tensor = reader.get_tensor(name)
            var_dtypes[name] = tensor.dtype
            var_values[name] += tensor
        logging.info("Read from checkpoint %s", checkpoint)
    for name in var_values:  # Average.
        var_values[name] /= len(checkpoints)

    with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
        tf_vars = [
            tf.get_variable(v, shape=var_values[v].shape, dtype=var_dtypes[v])
            for v in var_values
        ]
    placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars]
    assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)]
    global_step = tf.Variable(0,
                              name="global_step",
                              trainable=False,
                              dtype=tf.int64)
    saver = tf.train.Saver(tf.all_variables())

    # Build a model consisting only of variables, set them to the average values.
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for p, assign_op, (name, value) in zip(placeholders, assign_ops,
                                               six.iteritems(var_values)):
            sess.run(assign_op, {p: value})
        # Use the built saver to save the averaged checkpoint.
        saver.save(sess, FLAGS.output_path, global_step=global_step)

    logging.info("Averaged checkpoints saved in %s", FLAGS.output_path)