def conv2d(x, name, filter_size, in_channels, out_channels, strides, bias=True): """2D convolution.""" with tf.variable_scope(name): kernel = tf.get_variable( name='DW', shape=[filter_size[0], filter_size[1], in_channels, out_channels], dtype=tf.float32, initializer=tf.initializers.glorot_uniform()) if bias: b = tf.get_variable(name='bias', shape=[out_channels], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) out = tf.nn.conv2d(x, kernel, [1, strides[0], strides[1], 1], padding='SAME') if bias: out = tf.nn.bias_add(out, b) return out
def attention(inputs, attention_size, time_major=False, return_alphas=False): """Attention layer.""" if isinstance(inputs, tuple): # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. inputs = tf.concat(inputs, 2) if time_major: # (T,B,D) => (B,T,D) inputs = tf.transpose(inputs, [1, 0, 2]) time_size = inputs.shape[1].value # T value - time size of the RNN layer hidden_size = inputs.shape[ 2].value # D value - hidden size of the RNN layer # Trainable parameters W_omega = tf.get_variable(name='W_omega', initializer=tf.random_normal( [hidden_size, attention_size], stddev=0.1)) b_omega = tf.get_variable(name='b_omega', initializer=tf.random_normal([attention_size], stddev=0.1)) u_omega = tf.get_variable(name='u_omega', initializer=tf.random_normal([attention_size, 1], stddev=0.1)) # Applying fully connected layer with non-linear activation to each of the B*T timestamps; # the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size #v = tf.tanh(tf.tensordot(inputs, W_omega, axes=1) + b_omega) #v = tf.sigmoid(tf.tensordot(inputs, W_omega, axes=1) + b_omega) # (B, T, D) dot (D, Atten) logging.info('attention inputs: {}'.format(inputs.shape)) inputs_reshaped = tf.reshape(inputs, [-1, hidden_size]) dot = tf.matmul(inputs_reshaped, W_omega) dot = tf.reshape(dot, [-1, time_size, attention_size]) v = tf.sigmoid(dot + b_omega) logging.info(f'attention vector: {v.shape}') # For each of the timestamps its vector of size A from `v` is reduced with `u` vector # (B, T, Atten) dot (Atten) #vu = tf.tensordot(v, u_omega, axes=1) # (B,T) shape v = tf.reshape(v, [-1, attention_size]) vu = tf.matmul(v, u_omega) # (B,T) shape vu = tf.squeeze(vu, axis=-1) vu = tf.reshape(vu, [-1, time_size]) logging.info(f'attention energe: {vu.shape}') alphas = tf.nn.softmax(vu) # (B,T) shape also # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape # [batch, time] -> [batch, time, 1] alphas = tf.expand_dims(alphas, -1) # [batch, time, dim] -> [batch, dim] output = tf.reduce_sum(inputs * alphas, 1) if not return_alphas: return output return output, alphas
def tdnn(x, name, in_dim, context, out_dim, has_bias=True, method='splice_layer'): ''' TDNN implementation. Args: context: a int of left and right context, or a list of context indexes, e.g. (-2, 0, 2). method: splice_layer: use column-first patch-based copy. splice_op: use row-first while_loop copy. conv1d: use conv1d as TDNN equivalence. ''' if hasattr(context, '__iter__'): context_size = len(context) if method in ('splice_op', 'conv1d'): msg = 'Method splice_op and conv1d does not support context list.' raise ValueError(msg) context_list = context else: context_size = context * 2 + 1 context_list = range(-context, context + 1) with tf.variable_scope(name): if method == 'splice_layer': x = splice_layer(x, 'splice', context_list) x = linear(x, 'linear', [in_dim * context_size, out_dim], has_bias=has_bias) elif method == 'splice_op': x = speech_ops.splice(x, context, context) x = linear(x, 'linear', [in_dim * context_size, out_dim], has_bias=has_bias) elif method == 'conv1d': kernel = tf.get_variable( name='DW', shape=[context, in_dim, out_dim], dtype=tf.float32, initializer=tf.glorot_uniform_initializer()) x = tf.nn.conv1d(x, kernel, stride=1, padding='SAME') if has_bias: b = tf.get_variable(name='bias', shape=[out_dim], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) x = tf.nn.bias_add(x, b) else: raise ValueError('Unsupported method: %s.' % (method)) return x
def linear(x, names, shapes, has_bias=True): """Linear Layer.""" assert len(shapes) == 2 with tf.variable_scope(names): weights = tf.get_variable(name='weights', shape=shapes, initializer=tf.initializers.glorot_uniform()) if has_bias: bias = tf.get_variable( name='bias', shape=shapes[1], initializer=tf.initializers.glorot_uniform()) return tf.matmul(x, weights) + bias else: return tf.matmul(x, weights)
def get_pos_embedding_matrix(max_len, embed_dim, use_const, name): """ generate position embedding matrix, two optional types: constant(untrainable) and trainable. Args: max_len, embed_dim, use_const Return: pos_embed: [max_len, embed_dim] """ # First part of the PE function: sin and cos argument if use_const: pos_embed = np.array([[ pos / np.power(10000, (i - i % 2) / embed_dim) for i in range(embed_dim) ] for pos in range(max_len)]) # Second part, apply the cosine to even columns and sin to odds. pos_embed[:, 0::2] = np.sin(pos_embed[:, 0::2]) # dim 2i pos_embed[:, 1::2] = np.cos(pos_embed[:, 1::2]) # dim 2i+1 pos_embed = pos_embed[np.newaxis, ...] pos_embed = tf.cast(pos_embed, dtype=tf.float32) else: pos_embed = tf.get_variable( name=name, shape=[max_len, embed_dim], initializer=tf.random_uniform_initializer(-0.1, 0.1)) pos_embed = tf.expand_dims(pos_embed, 0) return pos_embed
def embedding_look_up(text_inputs, vocab_size, embedding_size): """Embedding layer.""" with tf.variable_scope("embedding"): W = tf.get_variable( name='W', initializer=tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0)) embedding_chars = tf.nn.embedding_lookup(W, text_inputs) embedding_chars_expanded = tf.expand_dims(embedding_chars, -1) return embedding_chars_expanded
def prelu_layer(self, x, name, num_parameters=1, init=0.25): if num_parameters == 1: shape = 1 else: shape = x.get_shape()[-1] alpha = tf.get_variable(name, shape=shape, dtype=x.dtype, initializer=tf.constant_initializer(init)) return tf.maximum(0.0, x) + alpha * tf.minimum(0.0, x)
def conv_pool(embedded_chars_expanded, filter_sizes, embedding_size, num_filters, sequence_length): """ text conv and max pooling to get one-dimension vector to representation of text :param filter_sizes: :return: """ pooled_outputs = [] for _, filter_size in enumerate(filter_sizes): with tf.variable_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.get_variable(name='W', initializer=tf.truncated_normal(filter_shape, stddev=0.1)) b = tf.get_variable(name='b', initializer=tf.constant(0.1, shape=[num_filters])) conv = tf.nn.conv2d(embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) return h_pool_flat
def logits_layer(self, x, labels): ''' Logits layer to further produce softmax. ''' if labels is None: # serving export mode, no need for logits return x output_num = self.taskconf['classes']['num'] logits_type = self.netconf['logits_type'] logits_shape = [x.shape[-1].value, output_num] with tf.variable_scope('logits'): init_type = self.netconf['logits_weight_init']['type'] if init_type == 'truncated_normal': stddev = self.netconf['logits_weight_init']['stddev'] init = tf.truncated_normal_initializer(stddev=stddev) elif init_type == 'xavier_uniform': init = tf.contrib.layers.xavier_initializer(uniform=True) elif init_type == 'xavier_norm': init = tf.contrib.layers.xavier_initializer(uniform=False) else: raise ValueError('Unsupported weight init type: %s' % (init_type)) weights = tf.get_variable(name='weights', shape=logits_shape, initializer=init) if logits_type == 'linear': bias = tf.get_variable( name='bias', shape=logits_shape[1], initializer=tf.constant_initializer(0.0)) return tf.matmul(x, weights) + bias elif logits_type == 'linear_no_bias': return tf.matmul(x, weights) elif logits_type == 'arcface': return self.arcface_layer(x, labels, output_num, weights)
def arcface_loss(embedding, labels, out_num, weights=None, s=64., m=0.5, limit_to_pi=True): ''' https://github.com/auroua/InsightFace_TF/blob/master/losses/face_losses.py :param embedding: the input embedding vectors :param labels: the input labels, the shape should be eg: (batch_size, 1) :param s: scalar value default is 64 :param out_num: output class num :param weights: a tf.variable with shape (embedding.shape[-1], out_num) or None to make a new one internally. default = None :param m: the margin value, default is 0.5 :return: the final cacualted output, this output is send into the tf.nn.softmax directly ''' cos_m = math.cos(m) sin_m = math.sin(m) mm = sin_m * m # issue 1 threshold = math.cos(math.pi - m) with tf.variable_scope('arcface_loss'): # inputs and weights norm embedding_norm = tf.norm(embedding, axis=1, keep_dims=True) embedding = tf.div(embedding, embedding_norm, name='norm_embedding') if weights is None: weights = tf.get_variable( name='weights', shape=[embedding.shape[-1].value, out_num], initializer=tf.initializer.glorot_unifrom()) weights_norm = tf.norm(weights, axis=0, keep_dims=True) weights = tf.div(weights, weights_norm, name='norm_weights') # cos(theta+m) cos_t = tf.matmul(embedding, weights, name='cos_t') cos_t2 = tf.square(cos_t, name='cos_2') sin_t2 = tf.subtract(1., cos_t2, name='sin_2') sin_t = tf.sqrt(sin_t2, name='sin_t') cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt') if limit_to_pi: # this condition controls the theta+m should in range [0, pi] # 0<=theta+m<=pi # -m<=theta<=pi-m cond_v = cos_t - threshold cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool) keep_val = s * (cos_t - mm) cos_mt_temp = tf.where(cond, cos_mt, keep_val) else: cos_mt_temp = cos_mt mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask') # mask = tf.squeeze(mask, 1) inv_mask = tf.subtract(1., mask, name='inverse_mask') s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t') output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_loss_output') return output
def main(_): if FLAGS.checkpoints: # Get the checkpoints list from flags and run some basic checks. checkpoints = [c.strip() for c in FLAGS.checkpoints.split(",")] checkpoints = [c for c in checkpoints if c] if not checkpoints: raise ValueError("No checkpoints provided for averaging.") if FLAGS.prefix: checkpoints = [FLAGS.prefix + c for c in checkpoints] else: assert FLAGS.num_last_checkpoints >= 1, "Must average at least one model" assert FLAGS.prefix, ("Prefix must be provided when averaging last" " N checkpoints") checkpoint_state = tf.train.get_checkpoint_state( os.path.dirname(FLAGS.prefix)) # Checkpoints are ordered from oldest to newest. checkpoints = checkpoint_state.all_model_checkpoint_paths[ -FLAGS.num_last_checkpoints:] checkpoints = [c for c in checkpoints if checkpoint_exists(c)] if not checkpoints: if FLAGS.checkpoints: raise ValueError("None of the provided checkpoints exist. %s" % FLAGS.checkpoints) else: raise ValueError("Could not find checkpoints at %s" % os.path.dirname(FLAGS.prefix)) # Read variables from all checkpoints and average them. logging.info("Reading variables and averaging checkpoints:") for c in checkpoints: logging.info("%s ", c) var_list = tf.train.list_variables(checkpoints[0]) var_values, var_dtypes = {}, {} for (name, shape) in var_list: if not name.startswith("global_step"): var_values[name] = np.zeros(shape) for checkpoint in checkpoints: reader = tf.train.load_checkpoint(checkpoint) for name in var_values: tensor = reader.get_tensor(name) var_dtypes[name] = tensor.dtype var_values[name] += tensor logging.info("Read from checkpoint %s", checkpoint) for name in var_values: # Average. var_values[name] /= len(checkpoints) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): tf_vars = [ tf.get_variable(v, shape=var_values[v].shape, dtype=var_dtypes[v]) for v in var_values ] placeholders = [tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars] assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)] global_step = tf.Variable(0, name="global_step", trainable=False, dtype=tf.int64) saver = tf.train.Saver(tf.all_variables()) # Build a model consisting only of variables, set them to the average values. with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for p, assign_op, (name, value) in zip(placeholders, assign_ops, six.iteritems(var_values)): sess.run(assign_op, {p: value}) # Use the built saver to save the averaged checkpoint. saver.save(sess, FLAGS.output_path, global_step=global_step) logging.info("Averaged checkpoints saved in %s", FLAGS.output_path)