def test_add(self): seq_len = np.random.randint(1, 10) embed_dim = np.random.randint(1, 20) * 2 inputs = np.ones((1, seq_len, embed_dim)) model = keras.models.Sequential() model.add( TrigPosEmbedding( input_shape=(seq_len, embed_dim), mode=TrigPosEmbedding.MODE_ADD, name='Pos-Embd', )) model.compile('adam', 'mse') model_path = os.path.join( tempfile.gettempdir(), 'test_trig_pos_embd_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model( model_path, custom_objects={'TrigPosEmbedding': TrigPosEmbedding}) model.summary() predicts = model.predict(inputs)[0].tolist() for i in range(seq_len): for j in range(embed_dim): actual = predicts[i][j] if j % 2 == 0: expect = 1.0 + np.sin(i / 10000.0**(float(j) / embed_dim)) else: expect = 1.0 + np.cos(i / 10000.0**((j - 1.0) / embed_dim)) self.assertAlmostEqual(expect, actual, places=6, msg=(embed_dim, i, j, expect, actual))
def test_concat(self): seq_len = np.random.randint(1, 10) feature_dim = np.random.randint(1, 20) embed_dim = np.random.randint(1, 20) * 2 inputs = np.ones((1, seq_len, feature_dim)) model = keras.models.Sequential() model.add( TrigPosEmbedding( input_shape=(seq_len, feature_dim), output_dim=embed_dim, mode=TrigPosEmbedding.MODE_CONCAT, name='Pos-Embd', )) model.compile('adam', 'mse') model_path = os.path.join( tempfile.gettempdir(), 'test_trig_pos_embd_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model( model_path, custom_objects={'TrigPosEmbedding': TrigPosEmbedding}) model.summary() predicts = model.predict(inputs)[0].tolist() sess = tf.Session() for i in range(seq_len): for j in range(embed_dim): actual = predicts[i][feature_dim + j] if j % 2 == 0: expect = tf.sin(i / tf.pow(10000.0, float(j) / embed_dim)) else: expect = tf.cos(i / tf.pow(10000.0, (j - 1.0) / embed_dim)) expect = expect.eval(session=sess) self.assertAlmostEqual(expect, actual, places=6, msg=(embed_dim, i, j, expect, actual))
def cnn_multi_attention(): model = Sequential() model.add(TrigPosEmbedding( input_shape=(300,34), mode=TrigPosEmbedding.MODE_CONCAT, # MODE_ADD MODE_EXPAND output_dim=2, name='Pos-Embd')) model.add(Conv1D(64, (2,),input_shape=(( 300,35)))) model.add(Dropout(0.5)) model.add(MaxPool1D()) model.add(Conv1D(32, 2)) model.add(MaxPool1D()) model.add(Dropout(0.5)) model.add(MultiHeadAttention(head_num=16,name='Multi-Head')) model.add(Flatten()) #model.add(Dropout(0.5)) #model.add(Dense(64,activation='relu')) model.add(Dense(3,activation='softmax')) model.summary() model.compile( optimizer='adam', loss='mse', metrics=[categorical_accuracy], ) return model
def encoder(seq_len, m_features, d_model, n_heads, dff, rate=0.1, encoder=None): """Basic Attention Encoder. It can be concatenated with a previous encoder by passing it as argument.""" if encoder == None: in_seq = keras.layers.Input(shape=(seq_len, m_features)) in_seq = LayerNormalization()(in_seq) else:: in_seq = encoder.output linear = keras.layers.Dense(units=d_model)(norm_0) pos = TrigPosEmbedding(mode=TrigPosEmbedding.MODE_ADD)(linear) mha = MultiHeadAttention(head_num=n_heads)(pos) mha_drop = keras.layers.Dropout(rate=rate)(mha) add_1 = keras.layers.Add()([pos, mha_drop]) norm_1 = LayerNormalization()(add_1) ff = FeedForward(dff)(norm_1) ff_drop = keras.layers.Dropout(rate=rate)(ff) add_2 = keras.layers.Add()([ff_drop, norm_1]) out = LayerNormalization()(add_2) return keras.Model(in_seq, out) if encoder == None else keras.Model(encoder.input, out)
def build(self): # input i = Input(shape=self.input_shape) # position embedding embed = TrigPosEmbedding(mode=TrigPosEmbedding.MODE_CONCAT,output_dim=2) x = embed(i) x = Dense(1, activation=None)(x) # Seq2seq block for _ in range(self.n_block): A = Conv1D(self.input_shape[1], self.kernel_size, padding="same", activation=None, input_shape=self.input_shape[1:])(x) B = Conv1D(self.input_shape[1], self.kernel_size, padding="same", activation="sigmoid", input_shape=self.input_shape[1:])(x) x = Multiply()([A, B]) + x # FNN block for _ in range(self.nb_layers): x = Dense(self.nb_hidden_units, activation="relu")(x) # output x = Dense(1, activation=None)(x) return Model(inputs=[i], outputs=[x])
def test_missing_output_dim(self): with self.assertRaises(NotImplementedError): TrigPosEmbedding(mode=TrigPosEmbedding.MODE_EXPAND, )
def get_model(token_num, embed_dim, encoder_num, decoder_num, head_num, hidden_dim, attention_activation=None, feed_forward_activation='relu', dropout_rate=0.0, use_same_embed=True, embed_weights=None, embed_trainable=None, trainable=True): """Get full model without compilation. :param token_num: Number of distinct tokens. :param embed_dim: Dimension of token embedding. :param encoder_num: Number of encoder components. :param decoder_num: Number of decoder components. :param head_num: Number of heads in multi-head self-attention. :param hidden_dim: Hidden dimension of feed forward layer. :param attention_activation: Activation for multi-head self-attention. :param feed_forward_activation: Activation for feed-forward layer. :param dropout_rate: Dropout rate. :param use_same_embed: Whether to use the same token embedding layer. `token_num`, `embed_weights` and `embed_trainable` should be lists of two elements if it is False. :param embed_weights: Initial weights of token embedding. :param embed_trainable: Whether the token embedding is trainable. It will automatically set to False if the given value is None when embedding weights has been provided. :param trainable: Whether the keras_layers are trainable. :return: Keras model. """ if not isinstance(token_num, list): token_num = [token_num, token_num] encoder_token_num, decoder_token_num = token_num if not isinstance(embed_weights, list): embed_weights = [embed_weights, embed_weights] encoder_embed_weights, decoder_embed_weights = embed_weights if encoder_embed_weights is not None: encoder_embed_weights = [encoder_embed_weights] if decoder_embed_weights is not None: decoder_embed_weights = [decoder_embed_weights] if not isinstance(embed_trainable, list): embed_trainable = [embed_trainable, embed_trainable] encoder_embed_trainable, decoder_embed_trainable = embed_trainable if encoder_embed_trainable is None: encoder_embed_trainable = encoder_embed_weights is None if decoder_embed_trainable is None: decoder_embed_trainable = decoder_embed_weights is None if use_same_embed: encoder_embed_layer = decoder_embed_layer = EmbeddingRet( input_dim=encoder_token_num, output_dim=embed_dim, mask_zero=True, weights=encoder_embed_weights, trainable=encoder_embed_trainable, name='Token-Embedding', ) else: encoder_embed_layer = EmbeddingRet( input_dim=encoder_token_num, output_dim=embed_dim, mask_zero=True, weights=encoder_embed_weights, trainable=encoder_embed_trainable, name='Encoder-Token-Embedding', ) decoder_embed_layer = EmbeddingRet( input_dim=decoder_token_num, output_dim=embed_dim, mask_zero=True, weights=decoder_embed_weights, trainable=decoder_embed_trainable, name='Decoder-Token-Embedding', ) encoder_input = keras.layers.Input(shape=(None, ), name='Encoder-Input') encoder_embed = TrigPosEmbedding( mode=TrigPosEmbedding.MODE_ADD, name='Encoder-Embedding', )(encoder_embed_layer(encoder_input)[0]) encoded_layer = get_encoders( encoder_num=encoder_num, input_layer=encoder_embed, head_num=head_num, hidden_dim=hidden_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, ) decoder_input = keras.layers.Input(shape=(None, ), name='Decoder-Input') decoder_embed, decoder_embed_weights = decoder_embed_layer(decoder_input) decoder_embed = TrigPosEmbedding( mode=TrigPosEmbedding.MODE_ADD, name='Decoder-Embedding', )(decoder_embed) decoded_layer = get_decoders( decoder_num=decoder_num, input_layer=decoder_embed, encoded_layer=encoded_layer, head_num=head_num, hidden_dim=hidden_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, ) dense_layer = EmbeddingSim( trainable=trainable, name='Output', )([decoded_layer, decoder_embed_weights]) return keras.models.Model(inputs=[encoder_input, decoder_input], outputs=dense_layer)
def double_trans_modality_part( main_input, middle_input, main_dim, middle_dim, encoder_num, decoder_num, head_num, hidden_dim, prefix="", attention_activation=None, feed_forward_activation='relu', dropout_rate=0.0, trainable=True, ): main_embed = TrigPosEmbedding( mode=TrigPosEmbedding.MODE_ADD, name='{}-first-Encoder-Embedding'.format(prefix), )(main_input) first_encoded_layer = get_encoders( encoder_num=encoder_num, input_layer=main_embed, head_num=head_num, hidden_dim=hidden_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, prefix=prefix + "-first-") middle_embed = TrigPosEmbedding( mode=TrigPosEmbedding.MODE_ADD, name='{}-first-Decoder-Embedding'.format(prefix), )(middle_input) first_decoded_layer = get_decoders( decoder_num=decoder_num, input_layer=middle_embed, encoded_layer=first_encoded_layer, head_num=head_num, hidden_dim=hidden_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, prefix=prefix + "-first-") first_decoded_output = TimeDistributed(Dense( middle_dim, activation="relu"))(first_decoded_layer) fake_middle_embed = TrigPosEmbedding( mode=TrigPosEmbedding.MODE_ADD, name='{}-second-Encoder-Embedding'.format(prefix), )(first_decoded_layer) second_encoded_layer = get_encoders( encoder_num=encoder_num, input_layer=fake_middle_embed, head_num=head_num, hidden_dim=hidden_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, prefix=prefix + "-second-") second_decoded_layer = get_decoders( decoder_num=decoder_num, input_layer=main_embed, encoded_layer=second_encoded_layer, head_num=head_num, hidden_dim=hidden_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, prefix=prefix + "-second-") second_decoded_output = TimeDistributed(Dense( main_dim, activation="relu"))(second_decoded_layer) return first_encoded_layer, first_decoded_output, \ second_encoded_layer, second_decoded_output
def get_model(token_num, embed_dim, encoder_num, decoder_num, head_num, hidden_dim, num_classes, add_new_node, attention_activation=None, feed_forward_activation='relu', dropout_rate=0.0, use_same_embed=True, embed_weights=None, embed_trainable=None, trainable=True, use_adapter=False, adapter_units=None, adapter_activation='relu'): if not isinstance(token_num, list): token_num = [token_num, token_num] encoder_token_num, decoder_token_num = token_num if not isinstance(embed_weights, list): embed_weights = [embed_weights, embed_weights] encoder_embed_weights, decoder_embed_weights = embed_weights if encoder_embed_weights is not None: encoder_embed_weights = [encoder_embed_weights] if decoder_embed_weights is not None: decoder_embed_weights = [decoder_embed_weights] if not isinstance(embed_trainable, list): embed_trainable = [embed_trainable, embed_trainable] encoder_embed_trainable, decoder_embed_trainable = embed_trainable if encoder_embed_trainable is None: encoder_embed_trainable = encoder_embed_weights is None if decoder_embed_trainable is None: decoder_embed_trainable = decoder_embed_weights is None if use_same_embed: encoder_embed_layer = decoder_embed_layer = EmbeddingRet( input_dim=encoder_token_num, output_dim=embed_dim, mask_zero=True, weights=encoder_embed_weights, trainable=encoder_embed_trainable, name='Token-Embedding', ) else: encoder_embed_layer = EmbeddingRet( input_dim=encoder_token_num, output_dim=embed_dim, mask_zero=True, weights=encoder_embed_weights, trainable=encoder_embed_trainable, name='Encoder-Token-Embedding', ) decoder_embed_layer = EmbeddingRet( input_dim=decoder_token_num, output_dim=embed_dim, mask_zero=True, weights=decoder_embed_weights, trainable=decoder_embed_trainable, name='Decoder-Token-Embedding', ) encoder_input = keras.layers.Input(shape=(None,), name='Encoder-Input') encoder_embed = TrigPosEmbedding( mode=TrigPosEmbedding.MODE_ADD, name='Encoder-Embedding', )(encoder_embed_layer(encoder_input)[0]) encoded_layer = get_encoders( encoder_num=encoder_num, input_layer=encoder_embed, head_num=head_num, hidden_dim=hidden_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, use_adapter=use_adapter, adapter_units=adapter_units, adapter_activation=adapter_activation, ) decoder_input = keras.layers.Input(shape=(None,), name='Decoder-Input') decoder_embed, decoder_embed_weights = decoder_embed_layer(decoder_input) decoder_embed = TrigPosEmbedding( mode=TrigPosEmbedding.MODE_ADD, name='Decoder-Embedding', )(decoder_embed) decoded_layer = get_decoders( decoder_num=decoder_num, input_layer=decoder_embed, encoded_layer=encoded_layer, head_num=head_num, hidden_dim=hidden_dim, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, trainable=trainable, use_adapter=use_adapter, adapter_units=adapter_units, adapter_activation=adapter_activation, ) dense_layer = EmbeddingSim( trainable=trainable, name='normal_end', )([decoded_layer, decoder_embed_weights]) if add_new_node == False: dense = Dense(units=num_classes, activation="softmax")(decoded_layer) elif add_new_node == True: print("add new node") dense = Dense(units=num_classes+1, activation="softmax")(decoded_layer) #return keras.models.Model(inputs=[encoder_input], outputs=dense) return keras.models.Model(inputs=[encoder_input, decoder_input], outputs=dense)
def add_position_encoding(input_layer, name): return TrigPosEmbedding(name=name, mode=TrigPosEmbedding.MODE_ADD)(input_layer)
def CNN(frame, channels, batch_size, droprate, lr, activation): cnn_input = Input(shape=(frame, channels), name='the_inputs', dtype='float32') cnn_input_2d = Reshape((-1, channels, 1))(cnn_input) cnn2d = muti_scale_cnn2d(cnn_input_2d, filters_2d=16, strides=2, mode='add') channels = channels // 2 if channels % 2 == 0 else channels // 2 + 1 cnn2d = muti_scale_cnn2d(cnn2d, filters_2d=32, strides=2, mode='add') channels = channels // 2 if channels % 2 == 0 else channels // 2 + 1 cnn2d = muti_scale_cnn2d(cnn2d, filters_2d=64, strides=2, mode='add') channels = channels // 2 if channels % 2 == 0 else channels // 2 + 1 cnn2d = Reshape((-1, channels * 64))(cnn2d) cnn2d = TimeDistributed(Dropout(0.2))(cnn2d) cnn = muti_scale_cnn1d(cnn2d, filters_1d=64, mode='add') cnn_pos = TrigPosEmbedding(output_dim=64, mode=TrigPosEmbedding.MODE_ADD)(cnn) cnn1 = resblock(cnn_pos, 64, 3, activation, 7, 4, batch_size) cnn_pos = TrigPosEmbedding(output_dim=64, mode=TrigPosEmbedding.MODE_ADD)(cnn1) cnn2 = resblock(cnn_pos, 64, 3, activation, 5, 4, batch_size) cnn_pos = TrigPosEmbedding(output_dim=64, mode=TrigPosEmbedding.MODE_ADD)(cnn2) cnn1 = resblock(cnn_pos, 64, 3, activation, 5, 4, batch_size) cnn_pos = TrigPosEmbedding(output_dim=64, mode=TrigPosEmbedding.MODE_ADD)(cnn1) cnn2 = resblock(cnn_pos, 64, 3, activation, 3, 4, batch_size) cnn3 = TimeDistributed(Dropout(0.2))(cnn2) cnn = muti_scale_cnn1d(cnn3, filters_1d=32, mode='concat') cnn = TimeDistributed(Dropout(0.5))(cnn) cnn_g = muti_scale_cnn1d(cnn, filters_1d=4, mode='concat') cnn = concatenate([cnn, cnn_g], -1) cnn_gg = GlobalAveragePooling1D()(cnn_g) cnn_gender = Dense(2, activation='softmax', kernel_initializer=TruncatedNormal(stddev=0.02), name='gender')(cnn_gg) cnn_output = TimeDistributed(Dense( 6, activation='softmax', kernel_initializer=TruncatedNormal(stddev=0.02)), name='softmax')(cnn) print('\n', "USE:", '\n') model_optput = Model(inputs=[cnn_input], outputs=[cnn_output]) model_optput.summary() model_optput_g = Model(inputs=[cnn_input], outputs=[cnn_output, cnn_gender]) #ctc loss labels = Input(name='the_labels', shape=[None], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int32') label_length = Input(name='label_length', shape=[1], dtype='int32') ctc_loss = Lambda(ctc_loss_lambda, output_shape=(1, ), name='ctc')( [labels, cnn_output, input_length, label_length]) model = Model(inputs=[cnn_input, labels, input_length, label_length], outputs=[ctc_loss, cnn_gender]) test_func = K.function([cnn_input], [cnn_output]) #0 = test, 1 = train return model, model_optput, test_func, model_optput_g