def load(feat_size): input_layer = Input(shape=(200, 189, 1)) layer = input_layer layer = Conv2D(32, 3, strides=(2, 2), padding='same', activation='relu')(layer) layer = BatchNormalization()(layer) layer = Dropout(0.2)(layer) layer = Conv2D(64, 3, strides=(2, 2), padding='same', activation='relu')(layer) layer = BatchNormalization()(layer) layer = Dropout(0.2)(layer) layer = Conv2D(64, 3, strides=(2, 2), padding='same', activation='relu')(layer) layer = BatchNormalization()(layer) layer = Dropout(0.2)(layer) layer = Conv2D(128, 3, strides=(2, 2), padding='same', activation='relu')(layer) layer = BatchNormalization()(layer) layer = Conv2D(128, 3, strides=(2, 2), padding='same', activation='relu')(layer) layer = BatchNormalization()(layer) layer = Dropout(0.2)(layer) layer = Flatten()(layer) output_layer = Dense(4, activation='softmax')(layer) model = Model(inputs=input_layer, outputs=output_layer) metrics = top_k_accuracy() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=metrics) return model
def load(nb_words, g_word_embedding_matrix, feat_size): input_layer = Input(shape=(500, )) layer = input_layer layer = Embedding(nb_words, 300, weights=[g_word_embedding_matrix], input_length=500, trainable=True)(layer) layer = Bidirectional( LSTM(256, return_sequences=True, recurrent_dropout=0.2))(layer) layer = Dropout(0.2)(layer) layer = Bidirectional( LSTM(256, return_sequences=False, recurrent_dropout=0.2))(layer) layer = Dropout(0.2)(layer) layer = Dense(256, activation='relu')(layer) output_layer = Dense(4, activation='softmax')(layer) model = Model(inputs=input_layer, outputs=output_layer) metrics = top_k_accuracy() #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-6, amsgrad=False, clipnorm=3.0) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=metrics) return model
def load(feat_size): speech_input_layer = Input(shape=(100, feat_size)) speech_layer = speech_input_layer speech_layer = Bidirectional(LSTM(256, return_sequences=True))(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Bidirectional(LSTM(256, return_sequences=False))(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Dense(256, activation='relu')(speech_layer) mocap_input_layer = Input(shape=(200, 189, 1)) mocap_layer = mocap_input_layer mocap_layer = Conv2D(32, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Conv2D(64, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Conv2D(64, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Conv2D(128, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Flatten()(mocap_layer) mocap_layer = Dense(256, activation='relu')(mocap_layer) combined_layer = concatenate([speech_layer, mocap_layer]) combined_layer = Dense(256, activation='relu')(combined_layer) output_layer = Dense(4, activation='softmax')(combined_layer) model = Model(inputs=[speech_input_layer, mocap_input_layer], outputs=output_layer) metrics = top_k_accuracy() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=metrics) return model
def load(feat_size): input_layer = Input(shape=(100, feat_size)) layer = input_layer layer = Bidirectional(LSTM(256, return_sequences=True, recurrent_dropout=0.2))(layer) layer = Dropout(0.2)(layer) layer = Bidirectional(LSTM(256, return_sequences=False, recurrent_dropout=0.2))(layer) layer = Dropout(0.2)(layer) # layer = Dense(256, activation='relu')(layer) output_layer = Dense(4, activation='softmax')(layer) model = Model(inputs=input_layer, outputs=output_layer) metrics = top_k_accuracy() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=metrics) return model
def load(nb_words, g_word_embedding_matrix, feat_size): text_input_layer = Input(shape=(500, )) text_layer = text_input_layer text_layer = Embedding(nb_words, 300, weights=[g_word_embedding_matrix], input_length=500, trainable=True)(text_layer) text_layer = Bidirectional( LSTM(256, return_sequences=True, recurrent_dropout=0.2))(text_layer) text_layer = Dropout(0.2)(text_layer) text_layer = Bidirectional( LSTM(256, return_sequences=False, recurrent_dropout=0.2))(text_layer) text_layer = Dropout(0.2)(text_layer) text_layer = Dense(256, activation='relu')(text_layer) speech_input_layer = Input(shape=(100, feat_size)) speech_layer = speech_input_layer speech_layer = Bidirectional( LSTM(256, return_sequences=True, recurrent_dropout=0.2))(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Bidirectional( LSTM(256, return_sequences=False, recurrent_dropout=0.2))(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Dense(256, activation='relu')(speech_layer) combined_layer = concatenate([text_layer, speech_layer]) output_layer = Dense(4, activation='softmax')(combined_layer) model = Model(inputs=[text_input_layer, speech_input_layer], outputs=output_layer) metrics = top_k_accuracy() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=metrics) return model
def load(feat_size): input_layer = Input(shape=(100, feat_size)) layer = input_layer layer = Dense(1024, activation='relu')(layer) layer = BatchNormalization()(layer) layer = Dropout(0.2)(layer) layer = Dense(512, activation='relu')(layer) layer = BatchNormalization()(layer) layer = Dropout(0.2)(layer) layer = Dense(256, activation='relu')(layer) layer = BatchNormalization()(layer) layer = Dropout(0.2)(layer) output_layer = Dense(4, activation='softmax')(layer) model = Model(inputs=input_layer, outputs=output_layer) metrics = top_k_accuracy() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=metrics) return model
def load(feat_size): speech_input_layer = Input(shape=(100, feat_size, 3)) speech_layer = speech_input_layer speech_layer = Conv2D(128, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(speech_layer) speech_layer = MaxPooling2D(padding='same')(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Dense(512, activation="relu")(speech_layer) speech_layer = BatchNormalization()(speech_layer) speech_layer = Reshape((100, -1))(speech_layer) speech_layer = Bidirectional( LSTM(256, return_sequences=True, recurrent_dropout=0.2))(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Bidirectional( LSTM(256, return_sequences=True, recurrent_dropout=0.2))(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = AttentionDecoder(256, 256)(speech_layer) speech_layer = Flatten()(speech_layer) speech_layer = Dense(256, activation="relu")(speech_layer) mocap_input_layer = Input(shape=(200, 189, 1)) mocap_layer = mocap_input_layer mocap_layer = Conv2D(32, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Conv2D(64, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Conv2D(64, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Conv2D(128, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Flatten()(mocap_layer) mocap_layer = Dense(256, activation='relu')(mocap_layer) combined_layer = concatenate([speech_layer, mocap_layer]) combined_layer = Dense(256, activation='relu')(combined_layer) output_layer = Dense(4, activation='softmax')(combined_layer) model = Model(inputs=[speech_input_layer, mocap_input_layer], outputs=output_layer) metrics = top_k_accuracy() adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-6, amsgrad=False, clipnorm=3.0) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=metrics) return model
def load(feat_size): input_layer = Input(shape=(100, feat_size, 3)) layer = input_layer layer = Conv2D(128, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(layer) layer = MaxPooling2D(padding='same')(layer) layer = Dropout(0.2)(layer) layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(layer) layer = Dropout(0.2)(layer) layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(layer) layer = Dropout(0.2)(layer) layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(layer) layer = Dropout(0.2)(layer) layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(layer) layer = Dropout(0.2)(layer) layer = Dense(512, activation="relu")(layer) layer = BatchNormalization()(layer) layer = Reshape((100, -1))(layer) layer = LSTM(256, return_sequences=True, recurrent_dropout=0.2)(layer) layer = Dropout(0.2)(layer) layer = LSTM(256, return_sequences=True, recurrent_dropout=0.2)(layer) layer = Dropout(0.2)(layer) layer = AttentionDecoder(128, 128)(layer) layer = Flatten()(layer) layer = Dense(512, activation="relu")(layer) layer = BatchNormalization()(layer) output_layer = Dense(4, activation='softmax')(layer) model = Model(inputs=input_layer, outputs=output_layer) metrics = top_k_accuracy() adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-6, amsgrad=False) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=metrics) return model
def load(nb_words, g_word_embedding_matrix, feat_size): text_input_layer = Input(shape=(500, )) text_layer = text_input_layer text_layer = Embedding(nb_words, 300, weights=[g_word_embedding_matrix], input_length=500, trainable=True)(text_layer) text_layer = LSTM(256, return_sequences=True, recurrent_dropout=0.2)(text_layer) text_layer = Dropout(0.2)(text_layer) text_layer = LSTM(256, return_sequences=False, recurrent_dropout=0.2)(text_layer) text_layer = Dropout(0.2)(text_layer) #text_layer = AttentionDecoder(256, 256, name='AttentionDecoder_tx')(text_layer) #text_layer = Flatten()(text_layer) text_layer = Dense(256, activation='relu')(text_layer) speech_input_layer = Input(shape=(100, feat_size, 3)) speech_layer = speech_input_layer speech_layer = Conv2D(128, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(speech_layer) speech_layer = MaxPooling2D(padding='same')(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Conv2D(256, kernel_size=(5, 3), strides=(1, 1), padding='same', activation='relu')(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Dense(512, activation="relu")(speech_layer) speech_layer = BatchNormalization()(speech_layer) speech_layer = Reshape((100, -1))(speech_layer) speech_layer = Bidirectional( LSTM(256, return_sequences=True, recurrent_dropout=0.2))(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = LSTM(256, return_sequences=True, recurrent_dropout=0.2)(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = AttentionDecoder(256, 256, name='AttentionDecoder_sp')(speech_layer) speech_layer = Flatten()(speech_layer) speech_layer = Dense(256, activation="relu")(speech_layer) combined_layer = concatenate([text_layer, speech_layer]) output_layer = Dense(4, activation='softmax')(combined_layer) model = Model(inputs=[text_input_layer, speech_input_layer], outputs=output_layer) metrics = top_k_accuracy() adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-6, amsgrad=False, clipnorm=4.0) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=metrics) return model
def load(nb_words, g_word_embedding_matrix, feat_size): text_input_layer = Input(shape=(500, )) text_layer = text_input_layer text_layer = Embedding(nb_words, 300, weights=[g_word_embedding_matrix], input_length=500, trainable=True)(text_layer) text_layer = Bidirectional( LSTM(256, return_sequences=True, recurrent_dropout=0.2))(text_layer) text_layer = Dropout(0.2)(text_layer) text_layer = Bidirectional( LSTM(256, return_sequences=False, recurrent_dropout=0.2))(text_layer) text_layer = Dropout(0.2)(text_layer) text_layer = Dense(256, activation='relu')(text_layer) speech_input_layer = Input(shape=(100, feat_size)) speech_layer = speech_input_layer speech_layer = Bidirectional( LSTM(256, return_sequences=True, recurrent_dropout=0.2))(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Bidirectional( LSTM(256, return_sequences=False, recurrent_dropout=0.2))(speech_layer) speech_layer = Dropout(0.2)(speech_layer) speech_layer = Dense(256, activation='relu')(speech_layer) mocap_input_layer = Input(shape=(200, 189, 1)) mocap_layer = mocap_input_layer mocap_layer = Conv2D(32, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Conv2D(64, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Conv2D(64, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Conv2D(128, 3, strides=(2, 2), padding='same', activation='relu')(mocap_layer) mocap_layer = BatchNormalization()(mocap_layer) mocap_layer = Dropout(0.2)(mocap_layer) mocap_layer = Flatten()(mocap_layer) mocap_layer = Dense(256, activation='relu')(mocap_layer) combined_layer = concatenate([text_layer, speech_layer, mocap_layer]) combined_layer = Dense(256, activation='relu')(combined_layer) output_layer = Dense(4, activation='softmax')(combined_layer) model = Model( inputs=[text_input_layer, speech_input_layer, mocap_input_layer], outputs=output_layer) metrics = top_k_accuracy() adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False, clipnorm=3.0) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=metrics) return model