def build_model(model_config, energy_dim, weather_dim): lstm_dim = model_config['LSTM_DIM'] denseA_dim = model_config['DENSE_ATTENTION_DIM'] denseP_dim = model_config['DENSE_PREDICTION_DIM'] drop_rate = model_config['DROP_RATE'] lr = model_config['LR'] # Energy part energy = Input(shape=( energy_dim, 1, ), dtype='float32', name='energy_input') energy_encoding = EnergyEncodingLayer(lstm_dim, drop_rate)(energy) attention_weight = AttentionWeight(n_factor=1, hidden_d=denseA_dim)(energy_encoding) energy_encoding = Attention()([attention_weight, energy_encoding]) # Weather part weather = Input(shape=(weather_dim, ), dtype='float32', name='weather_input') # prediction layer prediction = PredictLayer(denseP_dim, input_dim=K.int_shape(energy_encoding)[-1], dropout=drop_rate)(energy_encoding) # model model = Model(inputs=[energy, weather], outputs=prediction) optimizer = keras.optimizers.Nadam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-05, schedule_decay=0.0) model.compile(loss='mean_squared_error', optimizer=optimizer) # model.summary() return model
def build_model(embedding_matrix, word_index, char_index): print('--- Building model...') # Params nb_words = min(TrainConfig.MAX_NB_WORDS, len(word_index)) + 1 sequence_length = TrainConfig.MAX_SEQUENCE_LENGTH word_embedding_dim = TrainConfig.WORD_EMBEDDING_DIM rnn_unit = AttentionConfig.RNN_UNIT dropout = AttentionConfig.DROP_RATE context_rnn_dim = AttentionConfig.CONTEXT_LSTM_DIM dense_dim = AttentionConfig.DENSE_DIM if TrainConfig.USE_CHAR: nb_chars = min(TrainConfig.MAX_NB_CHARS, len(char_index)) + 1 char_embedding_dim = TrainConfig.CHAR_EMBEDDING_DIM char_rnn_dim = TrainConfig.CHAR_LSTM_DIM nb_per_word = TrainConfig.MAX_CHAR_PER_WORD # Build words input w1 = Input(shape=(sequence_length, ), dtype='int32') w2 = Input(shape=(sequence_length, ), dtype='int32') if TrainConfig.USE_CHAR: c1 = Input(shape=(sequence_length, nb_per_word), dtype='int32') c2 = Input(shape=(sequence_length, nb_per_word), dtype='int32') # Build word representation layer word_layer = WordRepresLayer(sequence_length, nb_words, word_embedding_dim, embedding_matrix) w_res1 = word_layer(w1) w_res2 = word_layer(w2) # Build chars input if TrainConfig.USE_CHAR: char_layer = CharRepresLayer(sequence_length, nb_chars, nb_per_word, char_embedding_dim, char_rnn_dim, rnn_unit=rnn_unit, dropout=dropout) c_res1 = char_layer(c1) c_res2 = char_layer(c2) sequence1 = concatenate([w_res1, c_res1]) sequence2 = concatenate([w_res2, c_res2]) else: sequence1 = w_res1 sequence2 = w_res2 # Build context representation layer, (batch_size, timesteps, context_rnn_dim * 2) context_layer = ContextLayer(context_rnn_dim, rnn_unit=rnn_unit, dropout=dropout, input_shape=( sequence_length, K.int_shape(sequence1)[-1], ), return_sequences=True) context1 = context_layer(sequence1) context2 = context_layer(sequence2) # Build attention layer, (batch_size, timesteps, dense_dim) att_layer = AttentionLayer(dense_dim, sequence_length=sequence_length, input_dim=K.int_shape(context1)[-1], dropout=dropout) # attention1, (batch_size, timesteps1, dim) # attention2, (batch_size, timesteps2, dim) attention1, attention2 = att_layer(context1, context2) # # Build compare layer aggregation1 = concatenate([context1, attention1]) aggregation2 = concatenate([context2, attention2]) compare_layer = NNCompareLayer(dense_dim, sequence_length=sequence_length, input_dim=K.int_shape(aggregation1)[-1], dropout=dropout) compare1 = compare_layer(aggregation1) compare2 = compare_layer(aggregation2) final_repres = concatenate([compare1, compare2]) # Build predition layer pred = PredictLayer(dense_dim, input_dim=K.int_shape(final_repres)[-1], dropout=dropout)(final_repres) # Build model if TrainConfig.USE_CHAR: inputs = (w1, w2, c1, c2) else: inputs = (w1, w2) model = Model(inputs=inputs, outputs=pred) # Compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) return model
def build_model(embedding_matrix, word_index, char_index): print('--- Building model...') # Params nb_words = min(TrainConfig.MAX_NB_WORDS, len(word_index)) + 1 sequence_length = TrainConfig.MAX_SEQUENCE_LENGTH context_rnn_dim = BasicRnnConfig.RNN_DIM word_embedding_dim = TrainConfig.WORD_EMBEDDING_DIM rnn_unit = BasicRnnConfig.RNN_UNIT nb_per_word = TrainConfig.MAX_CHAR_PER_WORD dropout = BasicRnnConfig.DROP_RATE dense_dim = BasicRnnConfig.DENSE_DIM if TrainConfig.USE_CHAR: nb_chars = min(TrainConfig.MAX_NB_CHARS, len(char_index)) + 1 char_embedding_dim = TrainConfig.CHAR_EMBEDDING_DIM char_rnn_dim = TrainConfig.CHAR_LSTM_DIM # define inputs w1 = Input(shape=(sequence_length,), dtype='int32') w2 = Input(shape=(sequence_length,), dtype='int32') if TrainConfig.USE_CHAR: c1 = Input(shape=(sequence_length, nb_per_word), dtype='int32') c2 = Input(shape=(sequence_length, nb_per_word), dtype='int32') # define word embedding representation word_layer = WordRepresLayer( sequence_length, nb_words, word_embedding_dim, embedding_matrix) w_res1 = word_layer(w1) w_res2 = word_layer(w2) # define char embedding representation if TrainConfig.USE_CHAR: char_layer = CharRepresLayer( sequence_length, nb_chars, nb_per_word, char_embedding_dim, char_rnn_dim, rnn_unit=rnn_unit, dropout=dropout) c_res1 = char_layer(c1) c_res2 = char_layer(c2) sequence1 = concatenate([w_res1, c_res1]) sequence2 = concatenate([w_res2, c_res2]) else: sequence1 = w_res1 sequence2 = w_res2 # define stack lstm layers for i in range(BasicRnnConfig.RNN_DIM_LAYER): if i == BasicRnnConfig.RNN_DIM_LAYER - 1: return_q = False else: return_q = True context_layer = ContextLayer( context_rnn_dim, rnn_unit=rnn_unit, dropout=dropout, input_shape=(sequence_length, K.int_shape(sequence1)[-1],), return_sequences=return_q) context1 = context_layer(sequence1) context2 = context_layer(sequence2) sequence1 = context1 sequence2 = context2 final_repres = concatenate([sequence1, sequence2]) # Build predition layer preds = PredictLayer(dense_dim, input_dim=K.int_shape(final_repres)[-1], dropout=dropout)(final_repres) if TrainConfig.USE_CHAR: inputs = [w1, w2, c1, c2] else: inputs = [w1, w2] # Build model graph model = Model(inputs=inputs, outputs=preds) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) return model
def build_model(embedding_matrix, word_index, char_index=None): print('--- Building model...') # Parameters sequence_length = TrainConfig.MAX_SEQUENCE_LENGTH nb_per_word = TrainConfig.MAX_CHAR_PER_WORD rnn_unit = BiMPMConfig.RNN_UNIT nb_words = min(TrainConfig.MAX_NB_WORDS, len(word_index)) + 1 word_embedding_dim = TrainConfig.WORD_EMBEDDING_DIM dropout = BiMPMConfig.DROP_RATE context_rnn_dim = BiMPMConfig.CONTEXT_LSTM_DIM mp_dim = BiMPMConfig.MP_DIM highway = BiMPMConfig.WITH_HIGHWAY aggregate_rnn_dim = BiMPMConfig.AGGREGATION_LSTM_DIM dense_dim = BiMPMConfig.DENSE_DIM if TrainConfig.USE_CHAR: nb_chars = min(TrainConfig.MAX_NB_CHARS, len(char_index)) + 1 char_embedding_dim = TrainConfig.CHAR_EMBEDDING_DIM char_rnn_dim = TrainConfig.CHAR_LSTM_DIM # Model words input w1 = Input(shape=(sequence_length, ), dtype='int32') w2 = Input(shape=(sequence_length, ), dtype='int32') if TrainConfig.USE_CHAR: c1 = Input(shape=(sequence_length, nb_per_word), dtype='int32') c2 = Input(shape=(sequence_length, nb_per_word), dtype='int32') # Build word representation layer word_layer = WordRepresLayer(sequence_length, nb_words, word_embedding_dim, embedding_matrix) w_res1 = word_layer(w1) w_res2 = word_layer(w2) # Model chars input if TrainConfig.USE_CHAR: char_layer = CharRepresLayer(sequence_length, nb_chars, nb_per_word, char_embedding_dim, char_rnn_dim, rnn_unit=rnn_unit, dropout=dropout) c_res1 = char_layer(c1) c_res2 = char_layer(c2) sequence1 = concatenate([w_res1, c_res1]) sequence2 = concatenate([w_res2, c_res2]) else: sequence1 = w_res1 sequence2 = w_res2 # Build context representation layer context_layer = ContextLayer(context_rnn_dim, rnn_unit=rnn_unit, dropout=dropout, highway=highway, input_shape=( sequence_length, K.int_shape(sequence1)[-1], ), return_sequences=True) context1 = context_layer(sequence1) context2 = context_layer(sequence2) # Build matching layer matching_layer = MultiPerspective(mp_dim) matching1 = matching_layer([context1, context2]) matching2 = matching_layer([context2, context1]) matching = concatenate([matching1, matching2]) # Build aggregation layer aggregate_layer = ContextLayer(aggregate_rnn_dim, rnn_unit=rnn_unit, dropout=dropout, highway=highway, input_shape=( sequence_length, K.int_shape(matching)[-1], ), return_sequences=False) aggregation = aggregate_layer(matching) # Build prediction layer pred = PredictLayer(dense_dim, input_dim=K.int_shape(aggregation)[-1], dropout=dropout)(aggregation) # Build model if TrainConfig.USE_CHAR: inputs = (w1, w2, c1, c2) else: inputs = (w1, w2) # Build model graph model = Model(inputs=inputs, outputs=pred) # Compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) return model
def build_model(embedding_matrix, word_index, train_config, model_config, dir_config): print('--- Building model...') # Parameters sent_length = train_config.MAX_SENT_LENGTH target_length = train_config.MAX_TARGET_LENGTH nb_words = min(train_config.MAX_NB_WORDS, len(word_index)) + 1 word_embedding_dim = train_config.WORD_EMBEDDING_DIM dropout_rate = model_config.DROP_RATE rnn_dim = model_config.RNN_DIM n_aspect = model_config.NUM_ASPECT dense_dim = model_config.DENSE_DIM lr = train_config.LR num_class = len(dir_config.LABEL_MAPPING) # Input layer sent = Input(shape=(sent_length,), dtype='int32', name='s_input') target = Input(shape=(target_length,), dtype='int32', name='t_input') # Embedding Layer emb_sent = Embedding(output_dim=word_embedding_dim, input_dim=nb_words, input_length=sent_length, weights=[embedding_matrix], trainable=False, mask_zero=True)(sent) emb_target = Embedding(output_dim=word_embedding_dim, input_dim=nb_words, input_length=target_length, weights=[embedding_matrix], trainable=False, mask_zero=True)(target) emb_sent = Dropout(dropout_rate)(emb_sent) emb_target = Dropout(dropout_rate)(emb_target) # Context Encoding Layer target_encoding_layer = Bidirectional(LSTM(rnn_dim, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_state=True, return_sequences=False), merge_mode='concat') (target_encoding, target_fw_state_h, target_fw_state_s, target_bw_state_h, target_bw_state_s) = target_encoding_layer(emb_target) sent_encoding_layer = Bidirectional(LSTM(rnn_dim, unroll=True, kernel_regularizer=regularizers.l2(1e-4), activity_regularizer=regularizers.l2(1e-4), dropout=dropout_rate, recurrent_dropout=dropout_rate, return_state=False, return_sequences=True), merge_mode='concat') sent_encoding = sent_encoding_layer(emb_sent, initial_state=[target_fw_state_h, target_fw_state_s, target_bw_state_h, target_bw_state_s]) # Aspect Attention Layer aspect_attention_layer = AspectAttentionLayer(n_aspect=n_aspect, hidden_d=dense_dim) aspect_attention = aspect_attention_layer([sent_encoding]) # Aspect Encoding Layer aspect_encoding_layer = AspectEncoding() aspect_encoding = aspect_encoding_layer([aspect_attention, sent_encoding]) aspect_encoding = LayerNormalization()(aspect_encoding) # Prediction layer pred = PredictLayer(dense_dim, input_dim=K.int_shape(aspect_encoding)[-1], dropout=dropout_rate, num_class=num_class)(aspect_encoding) # Build model graph model = Model(inputs=(sent, target), outputs=pred) # Compile model optimizer = optimizers.Nadam(lr=lr) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) model.summary() return model