def call(self, x, mask=None): if 0. < self.rate < 1.: noise_shape = self._get_noise_shape(x) if self.permanent: x = K.dropout(x, self.rate) else: x = K.in_train_phase(K.dropout(x, self.rate), x) return x
def call(self, inputs, **kwargs): main_input, embedding_matrix = inputs input_shape_tensor = K.shape(main_input) last_input_dim = K.int_shape(main_input)[-1] emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix) projected = K.dot(K.reshape(main_input, (-1, last_input_dim)), self.embedding_weights['projection']) if self.add_biases: projected = K.bias_add(projected, self.embedding_weights['biases'], data_format='channels_last') if 0 < self.projection_dropout < 1: projected = K.in_train_phase( lambda: K.dropout(projected, self.projection_dropout), projected, training=kwargs.get('training')) attention = K.dot(projected, K.transpose(embedding_matrix)) if self.scaled_attention: # scaled dot-product attention, described in # "Attention is all you need" (https://arxiv.org/abs/1706.03762) sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx()) attention = attention / sqrt_d result = K.reshape( self.activation(attention), (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim)) return result
def call(self, inputs): # [batch_size, seq_length, embedding_dim] embed = self.dropout_embedding(inputs) if self.training: x = K.dropout(embed, level=self.input_dropout) for i in range(self.layer_num): x, state_h, state_c = self.rnn_layer[i](x, training=self.training) dropped_hidden = K.dropout(x, level=self.dropout) else: x = embed for i in range(self.layer_num): x, state_h, state_c = self.rnn_layer[i](x, training=self.training) dropped_hidden = x hidden = x x = self.output_layer(dropped_hidden) output = K.softmax(x) return output, hidden, dropped_hidden
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.int_shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def dot_product_attention(self, x, mask=None, dropout=0.1, training=None): q, k, v = x logits = tf.matmul(q, k, transpose_b=True) # [bs, 8, len, len] if self.bias: logits += self.b if mask is not None: # [bs, len] mask = tf.expand_dims(mask, axis=1) mask = tf.expand_dims(mask, axis=1) # [bs,1,1,len] logits = self.mask_logits(logits, mask) weights = tf.nn.softmax(logits, name="attention_weights") weights = K.in_train_phase(K.dropout(weights, dropout), weights, training=training) x = tf.matmul(weights, v) return x
def call(self, x): row = [] col = [] # 对特征进行两两组合 for r, c in combinations(x, 2): # [field * (field - 1)] / 2 row.append(r) col.append(c) p = K.concatenate( row, axis=1) # [batch_size, [field * (field - 1)] / 2, embedding_size] q = K.concatenate(col, axis=1) inner_product = p * q # 对应元素相乘 # 添加非线性, 进行激活 attention_tmp = K.relu( K.bias_add(K.dot(inner_product, self.attention_W), self.attention_b)) # [batch_size, [field * (field - 1)] / 2, embedding_size] * [embedding_size, attention_units] = > [batch_size, [field * (field - 1)] / 2, attention_units] # context 向量 attention_tmp_dot = K.dot( attention_tmp, self.projection_h) # [batch_size, [field * (field - 1)] / 2, 1] # 计算的是一个样本的sofmax, sum的是一个样本的所有特征 attention_weight = K.softmax( attention_tmp_dot, axis=1 ) # 等价于 K.exp(attention_tmp_dot) / K.sum(attention_tmp_dot, axis=1, keepdims=True) # [batch_size, [field * (field - 1)] / 2, 1] # 权重乘以内积 attention_output = K.sum(inner_product * attention_weight, axis=1) # [batch_size, embedding_size] # 经过dropout操作 attention_output = K.dropout( attention_output, self.dropout_rate) # [batch_size, embedding_size] # 等价于dense层 afm_out = K.dot(attention_output, self.projection_p) # [batch_size, 1] return afm_out
def dropped_inputs(): return K.dropout(ones, rate)
def dropped_inputs(): # pylint: disable=function-redefined return K.dropout(ones, self.recurrent_dropout)
def dropped_inputs(): return K.dropout(ones, self.dropout)
def dropped_weight_connections(): return K.dropout(ones, self.kernel_dropout) * (1 - self.kernel_dropout)
def drop_inputs(): return K.dropout(inputs, self.unit_dropout)
def dropped_softmax(): return K.dropout(attention_softmax, self.dropout)
def call(self, inputs, states, training=None): if self.in_dropout_mask is None and self.use_dropout_mask is True: self.in_dropout_mask = K.dropout( array_ops.ones_like(inputs), self.in_dropout)
else: self.bias = None self.built = True def call(self, inputs, states, training=None): if self.in_dropout_mask is None and self.use_dropout_mask is True: self.in_dropout_mask = K.dropout( array_ops.ones_like(inputs), self.in_dropout) if self.recur_dropout_mask is None and self.use_recur is True: self.recur_dropout_mask = K.dropout( array_ops.ones_like(self.kern_3), self.recur_dropout) self.connectivity_kern_1 = K.dropout( array_ops.ones_like(self.kern_1), self.connectivity_1) self.connectivity_kern_2 = K.dropout( array_ops.ones_like(self.kern_2), self.connectivity_2) self.connectivity_kern_3 = K.dropout( array_ops.ones_like(self.kern_3), self.connectivity_3) K.set_value(self.kern_1, spec_normalize(self.kern_1)* self.connectivity_kern_1)