def __init__(self, input_dim, output_dim, embedding_initializer='zeros', **kwargs): super(RelativePositionEmbedding, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = output_dim self.embedding_initializer = initializers.get(embedding_initializer)
def __init__(self, units, activation='relu', use_bias=True, kernel_initializer='glorot_uniform', **kwargs): super(FeedForward, self).__init__(**kwargs) self.units = units self.activation = activation self.use_bias = use_bias self.kernel_initializer = initializers.get(kernel_initializer)
def __init__(self, input_dim, output_dim, merge_mode='add', embeddings_initializer='zero', **kwargs): super(PositionEmbedding, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = output_dim self.merge_mode = merge_mode self.embeddings_initializer = initializers.get(embeddings_initializer)
def __init__(self, head_nums, head_size, key_size=None, use_bias=True, attention_scale=True, kernel_initializer='glorot_uniform', with_residual_attention=False, **kwargs): super(MultiHeadAttention, self).__init__(**kwargs) self.head_nums = head_nums self.head_size = head_size self.key_size = key_size or head_size self.output_dim = head_nums * head_size self.use_bias = use_bias self.attention_scale = attention_scale self.kernel_initializer = initializers.get(kernel_initializer) self.with_residual_attention = with_residual_attention # realformer
def __init__(self, center=True, scale=True, epsilon=None, conditional=False, condition_hidden_units=None, condition_hidden_activation='linear', condition_hidden_initializer='glorot_uniform', **kwargs): super(LayerNormalization, self).__init__(**kwargs) self.center = center self.scale = scale self.epsilon = epsilon or 1e-12 self.conditional = conditional self.condition_hidden_units = condition_hidden_units self.condition_hidden_activation = activations.get(condition_hidden_activation) self.condition_hidden_initializer = initializers.get(condition_hidden_initializer)
def __init__(self, input_dim, output_dim, num_attention_heads, attn_scale_factor=2, relative_position_bias=True, embeddings_initializer='zeros', **kwargs ): super(AbsolutePositionEmbeddingTUPE, self).__init__(**kwargs) self.input_dim = input_dim self.output_dim = output_dim self.num_attention_heads = num_attention_heads self.attn_scale_factor = attn_scale_factor self.relative_position_bias = relative_position_bias self.embedding_initializer = initializers.get(embeddings_initializer) self.pos_scaling = float(self.output_dim / num_attention_heads * self.attn_scale_factor) ** -0.5