def conv_block(x, shortcut, filter_width=3, filter_channel=64, is_training=True, scope=None): x_dim = x.get_shape()[2] with tf.variable_scope(scope or "conv_block") as scope: # Convolution 1 step W1 = tf.get_variable(name="W1", shape=[filter_width, x_dim, filter_channel], initializer=initializers.get("glorot_uniform")) x = tf.nn.conv1d(x, W1, stride=1, padding="SAME") x = layers.dropout(x, keep_prob=0.7, is_training=is_training) x = tf.nn.relu(x) # Convolution 2 step W2 = tf.get_variable(name="W2", shape=[filter_width, x_dim, filter_channel], initializer=initializers.get("glorot_uniform")) x = tf.nn.conv1d(x, W2, stride=1, padding="SAME") x = layers.dropout(x, keep_prob=0.7, is_training=is_training) x = tf.nn.relu(x) # Residual connection if shortcut != None: return shortcut + x else: return x
def __init__(self, W_regularizer=None, b_regularizer=None, W_constraint=None, b_constraint=None, bias=True, **kwargs): """ Keras Layer that implements an Attention mechanism for temporal data. Supports Masking. Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756] # Input shape 3D tensor with shape: `(samples, steps, features)`. # Output shape 2D tensor with shape: `(samples, features)`. :param kwargs: Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True. The dimensions are inferred based on the output shape of the RNN. Note: The layer has been tested with Keras 2.0.6 Example: model.add(LSTM(64, return_sequences=True)) model.add(Attention()) # next add a Dense layer (for classification/regression) or whatever... """ self.supports_masking = True self.init = initializers.get('glorot_uniform') self.W_regularizer = regularizers.get(W_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.W_constraint = constraints.get(W_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias super(Attention, self).__init__(**kwargs)
def apply(self, is_train, x, mask=None): return fully_connected(x, x.shape.as_list()[-1], use_bias=self.bias, activation=activations.get(self.activation), kernel_initializer=_wrap_init( initializers.get(self.w_init)))
def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, depth_multiplier=1, activation=None, use_bias=False, depthwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, bias_constraint=None, **kwargs): super(DepthWiseConv2D, self).__init__(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, activation=activation, use_bias=use_bias, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, bias_constraint=bias_constraint, **kwargs) self.depth_multiplier = depth_multiplier self.depthwise_initializer = initializers.get(depthwise_initializer) self.depthwise_regularizer = regularizers.get(depthwise_regularizer) self.depthwise_constraint = constraints.get(depthwise_constraint)
def apply(self, is_train, x, mask=None): bias = (self.bias is None) or self.bias # for backwards compat return fully_connected( x, self.n_out, use_bias=bias, activation=get_keras_activation(self.activation), kernel_initializer=_wrap_init(initializers.get(self.w_init)))
def tri_linear(x, keys): with tf.variable_scope("tri_linear_attn") as scope: key_w = tf.get_variable("key_w", shape=x.shape.as_list()[-1], initializer=initializers.get("glorot_uniform"), dtype=tf.float32) key_logits = tf.tensordot(keys, key_w, axes=[[2], [0]]) # (batch, key_len) x_w = tf.get_variable("input_w", shape=x.shape.as_list()[-1], initializer=initializers.get("glorot_uniform"), dtype=tf.float32) x_logits = tf.tensordot(x, x_w, axes=[[2], [0]]) # (batch, x_len) dot_w = tf.get_variable("dot_w", shape=x.shape.as_list()[-1], initializer=initializers.get("glorot_uniform"), dtype=tf.float32) x_dots = x * tf.expand_dims(tf.expand_dims(dot_w, 0), 0) dot_logits = tf.matmul(x_dots, keys, transpose_b=True) return dot_logits + tf.expand_dims(key_logits, 1) + tf.expand_dims( x_logits, 2)
def vdcnn(x, filter_width=3, init_channel=64, num_layers=[2, 2, 2, 2], use_shortcut=False, k=8, is_training=True, scope=None): layers = [] x_dim = x.get_shape()[2] with tf.variable_scope("temp_conv"): filter_shape = [filter_width, x_dim, init_channel] W = tf.get_variable(name='temp_1', shape=filter_shape, initializer=initializers.get("glorot_uniform")) x = tf.nn.conv1d(x, W, stride=1, padding="SAME") layers.append(x) now_channel_size = init_channel for i, num_layer in enumerate(num_layers): for j in range(num_layer): with tf.variable_scope("%d_layer_%d_cnn" % (i, j)) as scope: shortcut = None if use_shortcut and i < len(num_layers) - 1: shortcut = layers[-1] conv_ = conv_block(layers[-1], shortcut, filter_width, now_channel_size, is_training, scope) layers.append(conv_) if i == len(num_layers) - 1: break with tf.variable_scope("%d_layer_pool" % (i)) as scope: shortcut = None if use_shortcut: shortcut = layers[-1] pool_ = pool_block(layers[-1], shortcut, filter_width, scope) layers.append(pool_) now_channel_size *= 2 k_pooled = tf.nn.top_k(tf.transpose(layers[-1], [0, 2, 1]), k=k, name='k_pool', sorted=False)[0] flatten = tf.reshape(k_pooled, (-1, now_channel_size * k)) return flatten
def dilated_causal_conv(x, filter_width=3, dilates=[1, 2, 4], scope=None): x_dim = x.get_shape()[-1].value with tf.variable_scope(scope or 'dilated_causal_conv'): conved = x for idx, dilate in enumerate(dilates): W = tf.get_variable(name='conv_filter_{}'.format(idx), shape=[filter_width, x_dim, x_dim], initializer=initializers.get('glorot_uniform')) W_norm = tf.nn.l2_normalize(W, [1, 2]) conved = tf.nn.convolution(input=conved, filter=W_norm, padding='SAME', strides=[1], dilation_rate=[dilate], name='conved_{}'.format(idx)) return conved
def shallow_wide_cnn(x, filter_widths, filter_channel): layers = [] x_dim = x.get_shape()[2].value x_width = x.get_shape()[1].value for idx, filter_width in enumerate(filter_widths): with tf.variable_scope('filter_{}'.format(idx)) as scope: W = tf.get_variable(name='conv_W', shape=[filter_width, x_dim, filter_channel], initializer=initializers.get("glorot_uniform")) conved = tf.nn.conv1d(value=x, filters=W, stride=1, padding='VALID') pooled = tf.reduce_max(conved, axis=1) layers.append(pooled) return tf.concat(layers, axis=1)
def __init__(self, W_regularizer=None, u_regularizer=None, b_regularizer=None, W_constraint=None, u_constraint=None, b_constraint=None, bias=True, **kwargs): self.supports_masking = True self.init = initializers.get('glorot_uniform') self.W_regularizer = regularizers.get(W_regularizer) self.u_regularizer = regularizers.get(u_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.W_constraint = constraints.get(W_constraint) self.u_constraint = constraints.get(u_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias super(AttentionWithContext, self).__init__(**kwargs)
def tcn_block(x, is_training, filter_width=3, dilates=[1, 2, 4], keep_prob=0.7): ''' reference: https://arxiv.org/pdf/1803.01271.pdf ''' x_dim = x.get_shape()[-1].value with tf.variable_scope('dilated_causal_conv_1') as scope: conved_1 = dilated_causal_conv(x, filter_width, dilates, scope=scope) with tf.variable_scope('relu_dropout') as scope: output_1 = tf.nn.relu(conved_1) output_1 = layers.dropout(output_1, keep_prob=keep_prob, is_training=is_training) with tf.variable_scope('dilated_causal_conv_2') as scope: conved_2 = dilated_causal_conv(output_1, filter_width, dilates, scope=scope) with tf.variable_scope('relu_dropout') as scope: output_2 = tf.nn.relu(conved_2) output_2 = layers.dropout(output_2, keep_prob=keep_prob, is_training=is_training) conv_11_W = tf.get_variable(name='conv_11_filter', shape=[1, x_dim, x_dim], initializer=initializers.get('glorot_uniform')) conved_11 = tf.nn.convolution(input=x, filter=conv_11_W, padding='SAME', strides=[1], name='conved_11') return tf.nn.relu(output_2 + conved_11)
def get_keras_initialization(name): if name is None: return None return _wrap_init(initializers.get(name))
def get_keras_initialization(name: Union[str, Callable]): if name is None: return None return _wrap_init(initializers.get(name))
syll_embedder = tf.get_variable('syll_embedder', (syll_size, syll_dim)) syll_embed = tf.nn.embedding_lookup(syll_embedder, sylls) from core_layer import han1_syll_cnn_char_rnn, han1_syll_cnn_char_cnn core_layer_output = han1_syll_cnn_char_cnn(config, word_embed, sent_len, char_embed, word_len, syll_embed, None, fc_dim, is_training) with tf.variable_scope("output"): output = fully_connected( core_layer_output, fc_dim, use_bias=True, activation=activations.get("relu"), kernel_initializer=initializers.get("glorot_uniform")) output = layers.dropout(output, keep_prob=config.keep_prob, is_training=is_training) output = fully_connected( output, 1, use_bias=True, activation=None, kernel_initializer=initializers.get("glorot_uniform")) y_logits = tf.sigmoid(output) * 9 + 1 predictions = y_logits acc = tf.reduce_mean( tf.to_float(tf.equal(tf.round(predictions), tf.round(y_))))
sx_ = tf.placeholder(tf.int32, (None, max_word_num, max_syll_num), name='sx_') y_ = tf.placeholder(tf.int32, (None), name='y_') c_embed = tf.get_variable('c_embed', (character_size, char_dim)) s_embed = tf.get_variable('s_embed', (syllable_size, syll_dim)) cx = tf.nn.embedding_lookup(c_embed, cx_) sx = tf.nn.embedding_lookup(s_embed, sx_) core_output = cnn_char_syll(config, wx, cx, sx, is_training) preds = fully_connected( core_output, 10, activation=activations.get('relu'), kernel_initializer=initializers.get('glorot_uniform')) pred = tf.argmax(preds, axis=1, output_type=tf.int32) + 1 y_arr = tf.one_hot(y_, 10) acc = tf.reduce_mean(tf.to_float(tf.equal(pred, y_))) loss = tf.losses.mean_squared_error(y_arr, preds) mse = tf.losses.mean_squared_error(y_, pred) train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) ############################################################################################################## sess = tf.InteractiveSession() tf.global_variables_initializer().run() # DONOTCHANGE: Reserved for nsml
def rr_han(config, word_embed, sent_len, char_embed, word_len, syll_embed, syll_len, n_unit, is_training): ''' HAN 1 layer with char rnn @ Input spec word_embed [batch_size, max_sent_len, word_dim] sent_len [batch_size] char_embed [batch_size, max_sent_len, max_word_len, char_dim] word_len [batch_size, max_sent_len] syll_embed [batch_size, max_sent_len, max_syll_len, syll_dim] syll_len [batch_size, max_sent_len] @ Output spec return [batch, n_unit] ''' char_dim = config.char_dim syll_dim = config.syll_dim max_sent_len = config.max_sentence_length max_word_len = config.max_word_length max_syll_num = config.max_syll_num keep_prob = config.keep_prob rnn_dim = config.rnn_dim with tf.variable_scope('syll_rnn') as scope: cell_stack_count = 2 syll_cell = MultiRNNCell([GRUCell(syll_dim)] * cell_stack_count) syll_embed = tf.cast( tf.reshape(syll_embed, [-1, max_syll_num, syll_dim]), tf.float32) syll_len = tf.reshape(syll_len, [-1]) _, syll_rnn_embed = bidirectional_rnn(syll_cell, syll_cell, syll_embed, syll_len, scope=scope) syll_rnn_embed = tf.reshape( syll_rnn_embed, [-1, max_sent_len, syll_dim * 2 * cell_stack_count]) with tf.variable_scope('char_rnn') as scope: cell_stack_count = 2 char_cell = MultiRNNCell([GRUCell(char_dim)] * cell_stack_count) char_embed = tf.cast( tf.reshape(char_embed, [-1, max_word_len, char_dim]), tf.float32) word_len = tf.reshape(word_len, [-1]) _, char_rnn_embed = bidirectional_rnn(char_cell, char_cell, char_embed, word_len, scope=scope) char_rnn_embed = tf.reshape( char_rnn_embed, [-1, max_sent_len, char_dim * 2 * cell_stack_count]) word_char_concat = tf.concat([word_embed, char_rnn_embed, syll_rnn_embed], axis=2) with tf.variable_scope('embedding') as scope: word_char_embed = fully_connected( word_char_concat, rnn_dim, use_bias=True, activation=activations.get("relu"), kernel_initializer=initializers.get("glorot_uniform")) with tf.variable_scope('dropout'): word_char_embed = layers.dropout( word_char_embed, keep_prob=keep_prob, is_training=is_training, ) with tf.variable_scope('encoder') as scope: cell = MultiRNNCell([GRUCell(rnn_dim)] * 3) encoder_output, _ = bidirectional_rnn(cell, cell, word_char_embed, sent_len, scope=scope) with tf.variable_scope('attention') as scope: attn_sum_output = task_specific_attention(encoder_output, n_unit, scope=scope) with tf.variable_scope('dropout'): attn_sum_output = layers.dropout( attn_sum_output, keep_prob=keep_prob, is_training=is_training, ) return attn_sum_output