Exemplo n.º 1
0
def build_model(model_config, energy_dim, weather_dim):
    lstm_dim = model_config['LSTM_DIM']
    denseA_dim = model_config['DENSE_ATTENTION_DIM']
    denseP_dim = model_config['DENSE_PREDICTION_DIM']
    drop_rate = model_config['DROP_RATE']
    lr = model_config['LR']

    # Energy part
    energy = Input(shape=(
        energy_dim,
        1,
    ),
                   dtype='float32',
                   name='energy_input')
    energy_encoding = EnergyEncodingLayer(lstm_dim, drop_rate)(energy)

    attention_weight = AttentionWeight(n_factor=1,
                                       hidden_d=denseA_dim)(energy_encoding)
    energy_encoding = Attention()([attention_weight, energy_encoding])

    # Weather part
    weather = Input(shape=(weather_dim, ),
                    dtype='float32',
                    name='weather_input')

    # prediction layer
    prediction = PredictLayer(denseP_dim,
                              input_dim=K.int_shape(energy_encoding)[-1],
                              dropout=drop_rate)(energy_encoding)

    # model
    model = Model(inputs=[energy, weather], outputs=prediction)

    optimizer = keras.optimizers.Nadam(lr=lr,
                                       beta_1=0.9,
                                       beta_2=0.999,
                                       epsilon=1e-05,
                                       schedule_decay=0.0)
    model.compile(loss='mean_squared_error', optimizer=optimizer)

    # model.summary()

    return model
Exemplo n.º 2
0
def build_model(embedding_matrix, word_index, char_index):
    print('--- Building model...')
    # Params
    nb_words = min(TrainConfig.MAX_NB_WORDS, len(word_index)) + 1
    sequence_length = TrainConfig.MAX_SEQUENCE_LENGTH
    word_embedding_dim = TrainConfig.WORD_EMBEDDING_DIM
    rnn_unit = AttentionConfig.RNN_UNIT
    dropout = AttentionConfig.DROP_RATE
    context_rnn_dim = AttentionConfig.CONTEXT_LSTM_DIM
    dense_dim = AttentionConfig.DENSE_DIM
    if TrainConfig.USE_CHAR:
        nb_chars = min(TrainConfig.MAX_NB_CHARS, len(char_index)) + 1
        char_embedding_dim = TrainConfig.CHAR_EMBEDDING_DIM
        char_rnn_dim = TrainConfig.CHAR_LSTM_DIM
        nb_per_word = TrainConfig.MAX_CHAR_PER_WORD

    # Build words input
    w1 = Input(shape=(sequence_length, ), dtype='int32')
    w2 = Input(shape=(sequence_length, ), dtype='int32')
    if TrainConfig.USE_CHAR:
        c1 = Input(shape=(sequence_length, nb_per_word), dtype='int32')
        c2 = Input(shape=(sequence_length, nb_per_word), dtype='int32')

    # Build word representation layer
    word_layer = WordRepresLayer(sequence_length, nb_words, word_embedding_dim,
                                 embedding_matrix)
    w_res1 = word_layer(w1)
    w_res2 = word_layer(w2)

    # Build chars input
    if TrainConfig.USE_CHAR:
        char_layer = CharRepresLayer(sequence_length,
                                     nb_chars,
                                     nb_per_word,
                                     char_embedding_dim,
                                     char_rnn_dim,
                                     rnn_unit=rnn_unit,
                                     dropout=dropout)
        c_res1 = char_layer(c1)
        c_res2 = char_layer(c2)
        sequence1 = concatenate([w_res1, c_res1])
        sequence2 = concatenate([w_res2, c_res2])
    else:
        sequence1 = w_res1
        sequence2 = w_res2

    # Build context representation layer, (batch_size, timesteps, context_rnn_dim * 2)
    context_layer = ContextLayer(context_rnn_dim,
                                 rnn_unit=rnn_unit,
                                 dropout=dropout,
                                 input_shape=(
                                     sequence_length,
                                     K.int_shape(sequence1)[-1],
                                 ),
                                 return_sequences=True)
    context1 = context_layer(sequence1)
    context2 = context_layer(sequence2)

    # Build attention layer, (batch_size, timesteps, dense_dim)
    att_layer = AttentionLayer(dense_dim,
                               sequence_length=sequence_length,
                               input_dim=K.int_shape(context1)[-1],
                               dropout=dropout)
    # attention1, (batch_size, timesteps1, dim)
    # attention2, (batch_size, timesteps2, dim)
    attention1, attention2 = att_layer(context1, context2)

    # # Build compare layer
    aggregation1 = concatenate([context1, attention1])
    aggregation2 = concatenate([context2, attention2])
    compare_layer = NNCompareLayer(dense_dim,
                                   sequence_length=sequence_length,
                                   input_dim=K.int_shape(aggregation1)[-1],
                                   dropout=dropout)
    compare1 = compare_layer(aggregation1)
    compare2 = compare_layer(aggregation2)

    final_repres = concatenate([compare1, compare2])

    # Build predition layer
    pred = PredictLayer(dense_dim,
                        input_dim=K.int_shape(final_repres)[-1],
                        dropout=dropout)(final_repres)

    # Build model
    if TrainConfig.USE_CHAR:
        inputs = (w1, w2, c1, c2)
    else:
        inputs = (w1, w2)
    model = Model(inputs=inputs, outputs=pred)
    # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    print(model.summary())
    return model
Exemplo n.º 3
0
def build_model(embedding_matrix, word_index, char_index):
    print('--- Building model...')
    # Params
    nb_words = min(TrainConfig.MAX_NB_WORDS, len(word_index)) + 1
    sequence_length = TrainConfig.MAX_SEQUENCE_LENGTH
    context_rnn_dim = BasicRnnConfig.RNN_DIM
    word_embedding_dim = TrainConfig.WORD_EMBEDDING_DIM
    rnn_unit = BasicRnnConfig.RNN_UNIT
    nb_per_word = TrainConfig.MAX_CHAR_PER_WORD
    dropout = BasicRnnConfig.DROP_RATE
    dense_dim = BasicRnnConfig.DENSE_DIM

    if TrainConfig.USE_CHAR:
        nb_chars = min(TrainConfig.MAX_NB_CHARS, len(char_index)) + 1
        char_embedding_dim = TrainConfig.CHAR_EMBEDDING_DIM
        char_rnn_dim = TrainConfig.CHAR_LSTM_DIM

    # define inputs
    w1 = Input(shape=(sequence_length,), dtype='int32')
    w2 = Input(shape=(sequence_length,), dtype='int32')
    if TrainConfig.USE_CHAR:
        c1 = Input(shape=(sequence_length, nb_per_word), dtype='int32')
        c2 = Input(shape=(sequence_length, nb_per_word), dtype='int32')

    # define word embedding representation
    word_layer = WordRepresLayer(
        sequence_length, nb_words, word_embedding_dim, embedding_matrix)
    w_res1 = word_layer(w1)
    w_res2 = word_layer(w2)

    # define char embedding representation
    if TrainConfig.USE_CHAR:
        char_layer = CharRepresLayer(
            sequence_length, nb_chars, nb_per_word, char_embedding_dim,
            char_rnn_dim, rnn_unit=rnn_unit, dropout=dropout)
        c_res1 = char_layer(c1)
        c_res2 = char_layer(c2)
        sequence1 = concatenate([w_res1, c_res1])
        sequence2 = concatenate([w_res2, c_res2])
    else:
        sequence1 = w_res1
        sequence2 = w_res2

    # define stack lstm layers
    for i in range(BasicRnnConfig.RNN_DIM_LAYER):
        if i == BasicRnnConfig.RNN_DIM_LAYER - 1:
            return_q = False
        else:
            return_q = True
        context_layer = ContextLayer(
            context_rnn_dim, rnn_unit=rnn_unit, dropout=dropout,
            input_shape=(sequence_length, K.int_shape(sequence1)[-1],),
            return_sequences=return_q)
        context1 = context_layer(sequence1)
        context2 = context_layer(sequence2)
        sequence1 = context1
        sequence2 = context2

    final_repres = concatenate([sequence1, sequence2])

    # Build predition layer
    preds = PredictLayer(dense_dim,
                         input_dim=K.int_shape(final_repres)[-1],
                         dropout=dropout)(final_repres)

    if TrainConfig.USE_CHAR:
        inputs = [w1, w2, c1, c2]
    else:
        inputs = [w1, w2]

    # Build model graph
    model = Model(inputs=inputs, outputs=preds)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    print(model.summary())
    return model
Exemplo n.º 4
0
def build_model(embedding_matrix, word_index, char_index=None):
    print('--- Building model...')

    # Parameters
    sequence_length = TrainConfig.MAX_SEQUENCE_LENGTH
    nb_per_word = TrainConfig.MAX_CHAR_PER_WORD
    rnn_unit = BiMPMConfig.RNN_UNIT
    nb_words = min(TrainConfig.MAX_NB_WORDS, len(word_index)) + 1
    word_embedding_dim = TrainConfig.WORD_EMBEDDING_DIM
    dropout = BiMPMConfig.DROP_RATE
    context_rnn_dim = BiMPMConfig.CONTEXT_LSTM_DIM
    mp_dim = BiMPMConfig.MP_DIM
    highway = BiMPMConfig.WITH_HIGHWAY
    aggregate_rnn_dim = BiMPMConfig.AGGREGATION_LSTM_DIM
    dense_dim = BiMPMConfig.DENSE_DIM
    if TrainConfig.USE_CHAR:
        nb_chars = min(TrainConfig.MAX_NB_CHARS, len(char_index)) + 1
        char_embedding_dim = TrainConfig.CHAR_EMBEDDING_DIM
        char_rnn_dim = TrainConfig.CHAR_LSTM_DIM

    # Model words input
    w1 = Input(shape=(sequence_length, ), dtype='int32')
    w2 = Input(shape=(sequence_length, ), dtype='int32')
    if TrainConfig.USE_CHAR:
        c1 = Input(shape=(sequence_length, nb_per_word), dtype='int32')
        c2 = Input(shape=(sequence_length, nb_per_word), dtype='int32')

    # Build word representation layer
    word_layer = WordRepresLayer(sequence_length, nb_words, word_embedding_dim,
                                 embedding_matrix)
    w_res1 = word_layer(w1)
    w_res2 = word_layer(w2)

    # Model chars input
    if TrainConfig.USE_CHAR:
        char_layer = CharRepresLayer(sequence_length,
                                     nb_chars,
                                     nb_per_word,
                                     char_embedding_dim,
                                     char_rnn_dim,
                                     rnn_unit=rnn_unit,
                                     dropout=dropout)
        c_res1 = char_layer(c1)
        c_res2 = char_layer(c2)
        sequence1 = concatenate([w_res1, c_res1])
        sequence2 = concatenate([w_res2, c_res2])
    else:
        sequence1 = w_res1
        sequence2 = w_res2

    # Build context representation layer
    context_layer = ContextLayer(context_rnn_dim,
                                 rnn_unit=rnn_unit,
                                 dropout=dropout,
                                 highway=highway,
                                 input_shape=(
                                     sequence_length,
                                     K.int_shape(sequence1)[-1],
                                 ),
                                 return_sequences=True)
    context1 = context_layer(sequence1)
    context2 = context_layer(sequence2)

    # Build matching layer
    matching_layer = MultiPerspective(mp_dim)
    matching1 = matching_layer([context1, context2])
    matching2 = matching_layer([context2, context1])
    matching = concatenate([matching1, matching2])

    # Build aggregation layer
    aggregate_layer = ContextLayer(aggregate_rnn_dim,
                                   rnn_unit=rnn_unit,
                                   dropout=dropout,
                                   highway=highway,
                                   input_shape=(
                                       sequence_length,
                                       K.int_shape(matching)[-1],
                                   ),
                                   return_sequences=False)
    aggregation = aggregate_layer(matching)

    # Build prediction layer
    pred = PredictLayer(dense_dim,
                        input_dim=K.int_shape(aggregation)[-1],
                        dropout=dropout)(aggregation)
    # Build model
    if TrainConfig.USE_CHAR:
        inputs = (w1, w2, c1, c2)
    else:
        inputs = (w1, w2)

    # Build model graph
    model = Model(inputs=inputs, outputs=pred)

    # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    print(model.summary())
    return model
Exemplo n.º 5
0
def build_model(embedding_matrix, word_index, train_config, model_config, dir_config):
    print('--- Building model...')

    # Parameters
    sent_length = train_config.MAX_SENT_LENGTH
    target_length = train_config.MAX_TARGET_LENGTH
    nb_words = min(train_config.MAX_NB_WORDS, len(word_index)) + 1
    word_embedding_dim = train_config.WORD_EMBEDDING_DIM
    dropout_rate = model_config.DROP_RATE
    rnn_dim = model_config.RNN_DIM
    n_aspect = model_config.NUM_ASPECT
    dense_dim = model_config.DENSE_DIM
    lr = train_config.LR
    num_class = len(dir_config.LABEL_MAPPING)

    # Input layer
    sent = Input(shape=(sent_length,), dtype='int32', name='s_input')
    target = Input(shape=(target_length,), dtype='int32', name='t_input')

    # Embedding Layer
    emb_sent = Embedding(output_dim=word_embedding_dim,
                         input_dim=nb_words,
                         input_length=sent_length,
                         weights=[embedding_matrix],
                         trainable=False,
                         mask_zero=True)(sent)
    emb_target = Embedding(output_dim=word_embedding_dim,
                           input_dim=nb_words,
                           input_length=target_length,
                           weights=[embedding_matrix],
                           trainable=False,
                           mask_zero=True)(target)

    emb_sent = Dropout(dropout_rate)(emb_sent)
    emb_target = Dropout(dropout_rate)(emb_target)

    # Context Encoding Layer
    target_encoding_layer = Bidirectional(LSTM(rnn_dim,
                                               dropout=dropout_rate,
                                               recurrent_dropout=dropout_rate,
                                               return_state=True,
                                               return_sequences=False),
                                          merge_mode='concat')
    (target_encoding,
     target_fw_state_h, target_fw_state_s,
     target_bw_state_h, target_bw_state_s) = target_encoding_layer(emb_target)

    sent_encoding_layer = Bidirectional(LSTM(rnn_dim,
                                             unroll=True,
                                             kernel_regularizer=regularizers.l2(1e-4),
                                             activity_regularizer=regularizers.l2(1e-4),
                                             dropout=dropout_rate,
                                             recurrent_dropout=dropout_rate,
                                             return_state=False,
                                             return_sequences=True),
                                        merge_mode='concat')
    sent_encoding = sent_encoding_layer(emb_sent,
                                        initial_state=[target_fw_state_h, target_fw_state_s,
                                                       target_bw_state_h, target_bw_state_s])

    # Aspect Attention Layer
    aspect_attention_layer = AspectAttentionLayer(n_aspect=n_aspect, hidden_d=dense_dim)
    aspect_attention = aspect_attention_layer([sent_encoding])

    # Aspect Encoding Layer
    aspect_encoding_layer = AspectEncoding()
    aspect_encoding = aspect_encoding_layer([aspect_attention, sent_encoding])

    aspect_encoding = LayerNormalization()(aspect_encoding)

    # Prediction layer
    pred = PredictLayer(dense_dim,
                        input_dim=K.int_shape(aspect_encoding)[-1],
                        dropout=dropout_rate,
                        num_class=num_class)(aspect_encoding)

    # Build model graph
    model = Model(inputs=(sent, target),
                  outputs=pred)

    # Compile model
    optimizer = optimizers.Nadam(lr=lr)
    model.compile(loss='categorical_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    model.summary()
    return model