示例#1
0
def lstmNet(input_shape1, input_shape2):

    # temporal_network
    input_t = Input(shape=input_shape1)
    lstm1_t = LSTM(100,
                   dropout=0.2,
                   recurrent_dropout=0.2,
                   return_sequences=True)(input_t)
    dp1_t = Dropout(0.5)(lstm1_t)
    lstm2_t = LSTM(100,
                   dropout=0.2,
                   recurrent_dropout=0.2,
                   return_sequences=True)(dp1_t)
    dp2_t = Dropout(0.5)(lstm2_t)
    lstm3_t = LSTM(100,
                   dropout=0.2,
                   recurrent_dropout=0.2,
                   return_sequences=True)(dp2_t)
    dp3_t = Dropout(0.5)(lstm3_t)
    att1 = Attention()(dp3_t)
    out1_t = Dense(nb_classes, activation='softmax')(att1)
    model1 = Model(input_t, out1_t)

    #model1.summary()

    # load weights
    model1.load_weights('shape_attention.hdf5')

    # spatial_network
    input_s = Input(shape=input_shape2)
    lstm1_s = LSTM(100,
                   dropout=0.2,
                   recurrent_dropout=0.2,
                   return_sequences=True)(input_s)
    dp1_s = Dropout(0.5)(lstm1_s)
    lstm2_s = LSTM(100,
                   dropout=0.2,
                   recurrent_dropout=0.2,
                   return_sequences=True)(dp1_s)
    dp2_s = Dropout(0.5)(lstm2_s)
    lstm3_s = LSTM(100,
                   dropout=0.2,
                   recurrent_dropout=0.2,
                   return_sequences=True)(dp2_s)
    dp3_s = Dropout(0.5)(lstm3_s)
    att2 = Attention()(dp3_s)
    out1_s = Dense(nb_classes, activation='softmax')(att2)
    model2 = Model(input_s, out1_s)

    #model2.summary()

    # load weights
    model2.load_weights('motion_attention.hdf5')

    return model1, model2
示例#2
0
def model(x_train_3, y_train_3, x_val_3, y_val_3, embedding_layer):
    model2 = Sequential()
    model2.add(embedding_layer)
    model2.add(GaussianNoise(0.3))
    model2.add(Dropout(0.3))
    model2.add(
        Bidirectional(
            LSTM(150,
                 recurrent_dropout=0.3,
                 kernel_regularizer=l2(0),
                 return_sequences=True)))
    model2.add(Dropout(0.3))
    model2.add(
        Bidirectional(
            LSTM(150,
                 recurrent_dropout=0.3,
                 kernel_regularizer=l2(0),
                 return_sequences=True)))
    model2.add(Dropout(0.3))
    model2.add(Attention())
    model2.add(Dense(4, activity_regularizer=l2(0.0001)))
    model2.add(Activation('softmax'))
    model2.compile(optimizer=Adam(clipnorm=1, lr=0.001),
                   loss='categorical_crossentropy',
                   metrics=['acc'])
    model2.summary()
    model2.fit(x_train_3,
               y_train_3,
               validation_data=(x_val_3, y_val_3),
               epochs=12,
               batch_size=50)
    model2.save("./model_test.h5")
示例#3
0
def prepareAttentionModel(embeddings,
                          classes,
                          max_length,
                          unit=LSTM,
                          cells=64,
                          layers=1,
                          **kwargs):
    # parameters
    bi = kwargs.get("bidirectional", False)
    noise = kwargs.get("noise", 0.)
    dropout_words = kwargs.get("dropout_words", 0)
    dropout_rnn = kwargs.get("dropout_rnn", 0)
    dropout_rnn_U = kwargs.get("dropout_rnn_U", 0)
    dropout_attention = kwargs.get("dropout_attention", 0)
    dropout_final = kwargs.get("dropout_final", 0)
    attention = kwargs.get("attention", None)
    final_layer = kwargs.get("final_layer", False)
    clipnorm = kwargs.get("clipnorm", 1)
    loss_l2 = kwargs.get("loss_l2", 0.)
    lr = kwargs.get("lr", 0.001)
    model = Sequential()
    model.add(embeddings)

    if noise > 0:
        model.add(GaussianNoise(noise))
    if dropout_words > 0:
        model.add(Dropout(dropout_words))

    for i in range(layers):
        rs = (layers > 1 and i < layers - 1) or attention
        model.add(
            get_RNN(unit,
                    cells,
                    bi,
                    return_sequences=rs,
                    dropout_U=dropout_rnn_U))
        if dropout_rnn > 0:
            model.add(Dropout(dropout_rnn))

    if attention == "memory":
        model.add(AttentionWithContext())
        if dropout_attention > 0:
            model.add(Dropout(dropout_attention))
    elif attention == "simple":
        model.add(Attention())
        if dropout_attention > 0:
            model.add(Dropout(dropout_attention))

    if final_layer:
        model.add(MaxoutDense(100, W_constraint=maxnorm(2)))
        if dropout_final > 0:
            model.add(Dropout(dropout_final))

    model.add(Dense(classes, activity_regularizer=l2(loss_l2)))
    model.add(Activation('softmax'))

    model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model
示例#4
0
def train_lstm(vocab,
               n_class,
               num_features,
               embed_output_dim=300,
               lstm_output_dim=150,
               noise=0.3,
               layers=2,
               bi=True,
               attention="simple",
               dropout_attention=0.5,
               dropout_words=0.3,
               dropout_rnn=0.3,
               dropout_rnn_U=0.3,
               clipnorm=1,
               lr=0.001,
               loss_l2=0.0001):
    model = Sequential()
    # Embedding layer
    embed_input_dim = len(vocab) + 1
    embedding_matrix = get_embedding(embed_output_dim, vocab)
    model.add(
        Embedding(embed_input_dim, embed_output_dim,
                  input_length=num_features))
    # model.add(Embedding(embed_input_dim, embed_output_dim, input_length = num_features,
    #                    weights = [embedding_matrix], trainable = False))
    model.add(SpatialDropout1D(0.2))
    # GaussianNoise layer
    if noise > 0:
        model.add(GaussianNoise(noise))

    # Dropout layer
    if dropout_words > 0:
        model.add(Dropout(dropout_words))

    for i in range(layers):
        rs = (layers > 1 and i < layers - 1) or attention
        model.add(
            get_lstm(lstm_output_dim,
                     bi,
                     return_sequences=rs,
                     dropout_U=dropout_rnn_U))
        if dropout_rnn > 0:
            model.add(Dropout(dropout_rnn))
    if attention == "memory":
        model.add(AttentionWithContext())
        if dropout_attention > 0:
            model.add(Dropout(dropout_attention))
    elif attention == "simple":
        model.add(Attention())
        if dropout_attention > 0:
            model.add(Dropout(dropout_attention))

    model.add(Dense(n_class, activity_regularizer=l2(loss_l2)))
    model.add(Activation('softmax'))

    model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr),
                  loss='categorical_crossentropy')
    return model
示例#5
0
def hierarical_net(inputs, out_size, n_class, l2_a=0):
    #inputs = Lambda(lambda x: [x[i*100 :(i+1)*100,:] for i in range(20)])(inputs)
    inputs = Lambda(lambda x: tf.unstack(x, axis=1))(inputs)
    print('the inputs is'.format(len(inputs)))
    outputs = []
    for i in range(6):
        x = Lambda(lambda x: tf.stack(x, axis=1))(inputs[i * 100:(i + 1) *
                                                         100])
        af_a = Attention()(x)
        outputs.append(af_a)
    outputs = Lambda(lambda x: tf.stack(x, axis=1))(outputs)
    return aug_cnn(outputs, out_size, n_class, l2_a)
示例#6
0
def mix_cnn_rnn(inputs, n_class, channels, l2_a=0.0001):

    data_aug = []
    for i, k_size in enumerate(channels):
        data_aug.append(
            Conv1D(kernel_size=i + 1,
                   filters=k_size,
                   padding='same',
                   kernel_regularizer=regularizers.l2(l2_a),
                   activation='tanh',
                   name='aug_{}st'.format(i + 1))(inputs))

    concat_data = Concatenate()(data_aug)
    rnn_result = CuDNNGRU(256, return_sequences=True)(concat_data)
    #rnn_result = Dropout(0.5 )(rnn_result)
    #rnn_result = CuDNNGRU(256,return_sequences=True)(rnn_result)
    after_att = Attention()(rnn_result)
    logist = Dense(n_class,
                   kernel_regularizer=regularizers.l2(l2_a),
                   activation='softmax')(after_att)
    return logist
示例#7
0
def build_attention_RNN(vectorizers_dict, we_matrix, unit=LSTM, **kwargs):
    print("Model configuration:")
    for i, v in kwargs.items():
        print(i, ": ", v)

    # general settings
    use_ngram = kwargs.get("use_ngram", True)
    use_word_emb = kwargs.get("use_word_emb", True)
    bi = kwargs.get("bidirectional", False)
    final_layer = kwargs.get("final_layer", False)  # if add final_layer
    attention = kwargs.get("attention", False)  # if use attention
    decay_rate = kwargs.get("decay_rate", 0)  # decay rate for the optimizer
    use_masking = kwargs.get("use_masking", True)
    noise = kwargs.get("noise", 0.)  # word embeddings noise

    # general dropout
    dropout_rnn = kwargs.get("dropout_rnn", 0)  # dropout after each RNN layer
    dropout_rnn_recurrent = kwargs.get("dropout_rnn_recurrent",
                                       0)  # dropout for recurrent connections
    dropout_attention = kwargs.get("dropout_attention",
                                   0)  # dropout after attention layer
    dropout_final = kwargs.get("dropout_final", 0)  # dropout after final layer

    rnn_loss_l2 = kwargs.get("rnn_loss_l2", 0.)  # recurent l2 regularization
    loss_l2 = kwargs.get("loss_l2", 0.)  # final layer l2 regularizer

    # variables for characters
    trainable_chars = kwargs.get("trainable_chars",
                                 True)  # if the we layer is trainable
    dropout_chars = kwargs.get("dropout_chars",
                               0)  # dropout after character embeddings
    ngram_rnn_layers = kwargs.get("ngram_rnn_layers",
                                  2)  # number of ngram RNN layers
    ngram_cells = kwargs.get("ngram_cells",
                             10)  # number of unit in ngram RNN cells
    max_sentence_len = kwargs.get("max_sentence_len",
                                  200)  # max sentence len in chars
    char_emb_output_dim = kwargs.get(
        "char_emb_output_dim", 150)  # output dimension of character emb layer

    # variables for embedding
    trainable_we = kwargs.get("trainable_we",
                              False)  # if the we layer is trainable
    dropout_words = kwargs.get("dropout_words", 0)  # dropout after embeddings
    rnn_layers = kwargs.get("rnn_layers", 2)  # number of RNN layers
    cells = kwargs.get("cells", 64)  # number of unit in RNN cells
    max_sequence_length = kwargs.get("max_sequence_length",
                                     64)  # max sequence length

    # variable for lang features
    use_lang_features = kwargs.get("use_lang_features", False)
    num_lang_dense_layers = kwargs.get("num_lang_dense_layers", 2)
    dense_lang_layers_cells = kwargs.get("dense_lang_layers_cells", 400)
    dense_lang_dropout = kwargs.get("dense_lang_dropout", 0)
    lang_features_len = kwargs.get("lang_features_len", 26)

    out_len = kwargs.get('out_len', 26)
    lr = kwargs.get("lr", 0.0005)
    optimizer_name = kwargs.get("optimizer",
                                'Adam')  # default optimizer is Adam

    # get object of optimizer
    optimizer = get_optimizer(optimizer_name, lr, decay_rate)

    # build ngram part of nn
    encoding_out = []
    inputs = []
    concat_char = None
    emb_word = None
    lang_nn = None

    if use_word_emb is True:
        emb_in = Input(shape=(max_sequence_length, ), name='emb-input')
        inputs.append(emb_in)

        emb_word = (get_embeddings_layer(we_matrix,
                                         max_sequence_length,
                                         trainable_we=trainable_we,
                                         use_masking=use_masking))(emb_in)

        if noise > 0:
            emb_word = GaussianNoise(noise)(emb_word)

        if dropout_words > 0:
            emb_word = Dropout(dropout_chars)(emb_word)

        # RNN layers
        for i in range(rnn_layers):
            rs = (rnn_layers > 1 and i < rnn_layers - 1) or attention
            emb_word = (get_rnn_layer(unit,
                                      cells,
                                      bi,
                                      return_sequences=rs,
                                      recurent_dropout=dropout_rnn_recurrent,
                                      l2_reg=rnn_loss_l2))(emb_word)
            if dropout_rnn > 0:
                emb_word = (Dropout(dropout_rnn))(emb_word)

        # Attention after RNN
        if attention is True:
            emb_word = Attention()(emb_word)
            if dropout_attention > 0:
                emb_word = Dropout(dropout_attention)(emb_word)

    if use_ngram is True:
        for ngram_order, vectorizer in vectorizers_dict.items():
            vocab = vectorizer.vocabulary_
            vocab_size = len(vocab) + 3
            print('Vocab size:' + str(vocab_size))

            char_in = Input(shape=(max_sentence_len, ),
                            name='char-input-' + str(ngram_order))

            emb_char = Embedding(input_dim=vocab_size,
                                 output_dim=char_emb_output_dim,
                                 input_length=max_sentence_len,
                                 mask_zero=use_masking,
                                 trainable=trainable_chars)(char_in)
            if noise > 0:
                emb_char = GaussianNoise(noise)(emb_char)

            if dropout_chars > 0:
                emb_char = Dropout(dropout_chars)(emb_char)

            # RNN layers
            for i in range(ngram_rnn_layers):
                rs = (ngram_rnn_layers > 1
                      and i < ngram_rnn_layers - 1) or attention
                emb_char = (get_rnn_layer(
                    unit,
                    ngram_cells,
                    bi,
                    return_sequences=rs,
                    recurent_dropout=dropout_rnn_recurrent,
                    l2_reg=rnn_loss_l2))(emb_char)
                if dropout_rnn > 0:
                    emb_char = (Dropout(dropout_rnn))(emb_char)

            # Attention after RNN
            if attention is True:
                emb_char = Attention()(emb_char)
                if dropout_attention > 0:
                    emb_char = Dropout(dropout_attention)(emb_char)

            encoding_out.append(emb_char)
            inputs.append(char_in)

        if len(encoding_out) < 2:
            concat_char = encoding_out[0]
        else:
            concat_char = concatenate(encoding_out)

    if use_lang_features is True:
        lang_in = Input(shape=(lang_features_len, ), name='lang-input')
        inputs.append(lang_in)

        lang_nn = lang_in

        for i in range(num_lang_dense_layers):
            lang_nn = (Dense(dense_lang_layers_cells,
                             activation='relu'))(lang_nn)

            if dense_lang_dropout > 0:
                lang_nn = Dropout(dense_lang_dropout)(lang_nn)

    layers = None
    if (use_word_emb is True) and (use_ngram is True) and (use_lang_features is
                                                           True):
        layers = concatenate([emb_word, concat_char, lang_nn])
    elif (use_word_emb is True) and (use_ngram is True):
        layers = concatenate([emb_word, concat_char])
    elif (use_word_emb is True) and (use_lang_features is True):
        layers = concatenate([emb_word, lang_nn])
    elif (use_ngram is True) and (use_lang_features is True):
        layers = concatenate([concat_char, lang_nn])
    elif use_word_emb is True:
        layers = emb_word
    elif use_ngram:
        layers = concat_char
    elif use_lang_features:
        layers = lang_nn
    else:
        raise Exception('Unknown configuration')

    if final_layer is True:
        layers = Dense(400, activation='relu')(layers)
        if dropout_final > 0:
            layers = Dropout(dropout_final)(layers)

    out = Dense(out_len,
                activation='softmax',
                activity_regularizer=l2(loss_l2))(layers)

    model = Model(inputs=inputs, outputs=[out])
    model.summary()
    model.compile(optimizer=optimizer,
                  loss="categorical_crossentropy",
                  metrics=["acc"])

    return model
示例#8
0
def target_RNN(embeddings,
               classes,
               max_length,
               unit=LSTM,
               cells=64,
               layers=1,
               **kwargs):
    # parameters
    bi = kwargs.get("bidirectional", False)
    noise = kwargs.get("noise", 0.)
    dropout_words = kwargs.get("dropout_words", 0)
    dropout_rnn = kwargs.get("dropout_rnn", 0)
    dropout_rnn_U = kwargs.get("dropout_rnn_U", 0)
    dropout_attention = kwargs.get("dropout_attention", 0)
    dropout_final = kwargs.get("dropout_final", 0)
    attention = kwargs.get("attention", None)
    final_layer = kwargs.get("final_layer", False)
    clipnorm = kwargs.get("clipnorm", 1)
    loss_l2 = kwargs.get("loss_l2", 0.)
    lr = kwargs.get("lr", 0.001)

    input_text = Input(shape=(max_length, ), dtype='int32')

    emb_text = embeddings_layer(max_length=max_length,
                                embeddings=embeddings,
                                trainable=False,
                                masking=True,
                                scale=False,
                                normalize=False)(input_text)

    if noise > 0:
        emb_text = GaussianNoise(noise)(emb_text)
    if dropout_words > 0:
        emb_text = Dropout(dropout_words)(emb_text)

    merge_text = []
    # #one lstm

    # # cov_text = Conv1D(activation="relu", padding="same", filters=300, kernel_size=5)(emb_text)
    # # pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    # # pooling_text = GlobalMaxPooling1D()(cov_text)

    # layer_output = get_RNN(unit, cells, bi=True, return_sequences=False, dropout_U=dropout_rnn_U)(emb_text)
    # if dropout_rnn > 0:
    #     layer_output = Dropout(dropout_rnn)(layer_output)

    # #one conv

    # cov_text = Conv1D(activation="relu", padding="same", filters=300, kernel_size=5)(emb_text)
    # #pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    # pooling_text = GlobalMaxPooling1D()(cov_text)

    # #bilstm+bilstm+attention
    # # cov_text = Conv1D(activation="relu", padding="same", filters=300, kernel_size=5)(emb_text)
    # # pooling_text = MaxPooling1D(pool_size=4)(cov_text)

    # layer_output = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(emb_text)
    # if dropout_rnn > 0:
    #     layer_output = Dropout(dropout_rnn)(layer_output)
    # layer_output2 = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(layer_output)
    # if dropout_rnn > 0:
    #     layer_output2 = Dropout(dropout_rnn)(layer_output2)

    # attention_text = Attention()(layer_output2)

    # if dropout_attention > 0:
    #     attention_text = Dropout(dropout_attention)(attention_text)

    # #bilstm+bilstm+bilstm+attention
    # # cov_text = Conv1D(activation="relu", padding="same", filters=300, kernel_size=5)(emb_text)
    # # pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    # # pooling_text = GlobalMaxPooling1D()(cov_text)

    # layer_output = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(emb_text)
    # if dropout_rnn > 0:
    #     layer_output = Dropout(dropout_rnn)(layer_output)
    # layer_output2 = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(layer_output)
    # if dropout_rnn > 0:
    #     layer_output2 = Dropout(dropout_rnn)(layer_output2)
    # layer_output3 = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(layer_output2)
    # if dropout_rnn > 0:
    #     layer_output3 = Dropout(dropout_rnn)(layer_output3)

    # attention_text = Attention()(layer_output3)

    # if dropout_attention > 0:
    #     attention_text = Dropout(dropout_attention)(attention_text)

    # #origin one lstm
    # # cov_text = Conv1D(activation="relu", padding="same", filters=300, kernel_size=5)(emb_text)
    # # pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    # # pooling_text = GlobalMaxPooling1D()(cov_text)

    # layer_output = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(emb_text)
    # if dropout_rnn > 0:
    #     layer_output = Dropout(dropout_rnn)(layer_output)

    # attention_text = Attention()(layer_output)

    # if dropout_attention > 0:
    #     attention_text = Dropout(dropout_attention)(attention_text)

    # #origin conv one lstm
    # cov_text = Conv1D(activation="relu", padding="same", filters=300, kernel_size=5)(emb_text)
    # pooling_text = MaxPooling1D(pool_size=4)(cov_text)

    # layer_output = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(pooling_text)
    # if dropout_rnn > 0:
    #     layer_output = Dropout(dropout_rnn)(layer_output)

    # attention_text = Attention()(layer_output)

    # if dropout_attention > 0:
    #     attention_text = Dropout(dropout_attention)(attention_text)

    #bilstm+bilstm+attention merge bilstm+attention
    # cov_text = Conv1D(activation="relu", padding="same", filters=300, kernel_size=5)(emb_text)
    # pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    # pooling_text = GlobalMaxPooling1D()(cov_text)

    layer_output = get_RNN(unit,
                           cells,
                           bi,
                           return_sequences=True,
                           dropout_U=dropout_rnn_U)(emb_text)
    if dropout_rnn > 0:
        layer_output = Dropout(dropout_rnn)(layer_output)

    attention_text = Attention()(layer_output)
    if dropout_attention > 0:
        attention_text = Dropout(dropout_attention)(attention_text)

    layer_output2 = get_RNN(unit,
                            cells,
                            bi,
                            return_sequences=True,
                            dropout_U=dropout_rnn_U)(layer_output)
    if dropout_rnn > 0:
        layer_output2 = Dropout(dropout_rnn)(layer_output2)

    attention_text2 = Attention()(layer_output2)

    if dropout_attention > 0:
        attention_text2 = Dropout(dropout_attention)(attention_text2)

    merge_text.append(attention_text)
    merge_text.append(attention_text2)
    attention_mul = concatenate(merge_text)

    # # merge conv lstm lstm+lstm
    # cov_text = Conv1D(activation="relu", padding="same", filters=300, kernel_size=5)(emb_text)
    # pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    # globalpooling = GlobalMaxPooling1D()(cov_text)
    # merge_text.append(globalpooling)

    # layer_output = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(pooling_text)
    # if dropout_rnn > 0:
    #     layer_output = Dropout(dropout_rnn)(layer_output)

    # attention_text2 = Attention()(layer_output)
    # if dropout_attention > 0:
    #     attention_text2 = Dropout(dropout_attention)(attention_text2)
    # merge_text.append(attention_text2)
    # layer_output2 = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(layer_output)
    # if dropout_rnn > 0:
    #     layer_output2 = Dropout(dropout_rnn)(layer_output2)

    # # layer_input = {}
    # # layer_output = {-1:pooling_text}
    # # for i in range(layers):
    # #     j=i
    # #     layer_input[i] = layer_output[i-1]
    # #     rs = (layers > 1 and i < layers - 1) or attention

    # #     layer_output[i] = get_RNN(unit, cells, bi, return_sequences=rs,
    # #                       dropout_U=dropout_rnn_U)(layer_input[i])
    # #     if dropout_rnn > 0:
    # #         layer_output[i] = Dropout(dropout_rnn)(layer_output[i])
    # # if layers==0:
    # #     j = -1

    # attention_text = Attention()(layer_output2)

    # if dropout_attention > 0:
    #     attention_text = Dropout(dropout_attention)(attention_text)

    # merge_text.append(attention_text)
    # #attention_mul = concatenate(merge_text)
    # attention_mul = merge(merge_text)

    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(attention_mul)
    model = Model(input=input_text, output=probabilities)
    model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', f1])  #,f1_score,f1_score2,f12
    return model
示例#9
0
def target_RNN2(embeddings,
                classes,
                max_length,
                unit=LSTM,
                cells=64,
                **kwargs):
    # parameters
    bi = kwargs.get("bidirectional", False)
    noise = kwargs.get("noise", 0.)
    dropout_words = kwargs.get("dropout_words", 0)
    dropout_rnn = kwargs.get("dropout_rnn", 0)
    dropout_rnn_U = kwargs.get("dropout_rnn_U", 0)
    dropout_attention = kwargs.get("dropout_attention", 0)
    dropout_final = kwargs.get("dropout_final", 0)
    attention = kwargs.get("attention", None)
    final_layer = kwargs.get("final_layer", False)
    clipnorm = kwargs.get("clipnorm", 1)
    loss_l2 = kwargs.get("loss_l2", 0.)
    lr = kwargs.get("lr", 0.001)
    bi = kwargs.get("bi", False)

    attention_times = kwargs.get("attention_times", 1)

    input_text = Input(shape=(max_length, ), dtype='int32')

    emb_text = embeddings_layer(max_length=max_length,
                                embeddings=embeddings,
                                trainable=False,
                                masking=True,
                                scale=False,
                                normalize=False)(input_text)

    if noise > 0:
        emb_text = GaussianNoise(noise)(emb_text)
    if dropout_words > 0:
        emb_text = Dropout(dropout_words)(emb_text)

    # cnn
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = GlobalMaxPooling1D()(cov_text)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(pooling_text)

    #cnn+lstm
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=False,
                        dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text = Dropout(0.5)(lstm_text)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(lstm_text)

    #lstm
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=False,
                        dropout_U=dropout_rnn_U)(emb_text)
    lstm_text = Dropout(0.5)(lstm_text)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(lstm_text)

    #bilstm
    lstm_text = get_RNN(unit,
                        cells,
                        bi=True,
                        return_sequences=False,
                        dropout_U=dropout_rnn_U)(emb_text)
    lstm_text = Dropout(0.5)(lstm_text)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(lstm_text)

    #cnn+lstm+dense+lstm
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=True,
                        dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text = Dropout(0.5)(lstm_text)
    attention_probs = Dense(32, activation='softmax',
                            name='attention_probs')(lstm_text)
    attention_mul = merge([lstm_text, attention_probs],
                          output_shape=32,
                          name='attention_mul',
                          mode='mul')
    lstm_text2 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=False,
                         dropout_U=dropout_rnn_U)(attention_mul)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(lstm_text2)

    #cnn+lstm+dense+lstm+denselstm
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=True,
                        dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text = Dropout(0.5)(lstm_text)
    attention_probs = Dense(32, activation='softmax',
                            name='attention_probs')(lstm_text)
    attention_mul = merge([lstm_text, attention_probs],
                          output_shape=32,
                          name='attention_mul',
                          mode='mul')
    lstm_text2 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=True,
                         dropout_U=dropout_rnn_U)(attention_mul)
    lstm_text2 = Dropout(0.5)(lstm_text2)
    attention_probs2 = Dense(32, activation='softmax',
                             name='attention_probs2')(lstm_text2)
    attention_mul2 = merge([lstm_text2, attention_probs2],
                           output_shape=32,
                           name='attention_mul2',
                           mode='mul')
    lstm_text3 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=False,
                         dropout_U=dropout_rnn_U)(attention_mul2)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(lstm_text3)

    #cnn+lstm+dense+conv+lstm+dense+lstm
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=True,
                        dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text = Dropout(0.5)(lstm_text)
    attention_probs = Dense(32, activation='softmax',
                            name='attention_probs')(lstm_text)
    attention_mul = merge([lstm_text, attention_probs],
                          output_shape=32,
                          name='attention_mul',
                          mode='mul')
    cov_text2 = Conv1D(activation="relu",
                       padding="same",
                       filters=64,
                       kernel_size=5)(attention_mul)
    pooling_text2 = MaxPooling1D(pool_size=4)(cov_text2)
    lstm_text2 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=True,
                         dropout_U=dropout_rnn_U)(pooling_text2)
    lstm_text2 = Dropout(0.5)(lstm_text2)
    attention_probs2 = Dense(32, activation='softmax',
                             name='attention_probs2')(lstm_text2)
    attention_mul2 = merge([lstm_text2, attention_probs2],
                           output_shape=32,
                           name='attention_mul2',
                           mode='mul')
    lstm_text3 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=False,
                         dropout_U=dropout_rnn_U)(attention_mul2)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(lstm_text3)

    # cnn+lstm+dense+conv
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=True,
                        dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text = Dropout(0.5)(lstm_text)
    attention_probs = Dense(32, activation='softmax',
                            name='attention_probs')(lstm_text)
    attention_mul = merge([lstm_text, attention_probs],
                          output_shape=32,
                          name='attention_mul',
                          mode='mul')
    conv_text2 = Convolution1D(nb_filter=80,
                               filter_length=4,
                               border_mode='valid',
                               activation='relu')(attention_mul)
    globalpooling = GlobalMaxPooling1D()(conv_text2)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(globalpooling)

    #lstm + conv + lstm
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=True,
                        dropout_U=dropout_rnn_U)(emb_text)
    lstm_text = Dropout(0.5)(lstm_text)
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(lstm_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text2 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=False,
                         dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text2 = Dropout(0.5)(lstm_text2)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(lstm_text2)

    #conv+lstm merge lstm
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=False,
                        dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text = Dropout(0.5)(lstm_text)
    lstm_text2 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=False,
                         dropout_U=dropout_rnn_U)(emb_text)
    lstm_text2 = Dropout(0.5)(lstm_text2)
    merge_text = merge([lstm_text, lstm_text2])
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(merge_text)

    #cov+lstm+cont+conv+lstm+cont+conv
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=True,
                        dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text = Dropout(0.5)(lstm_text)
    attention_probs = Dense(32, activation='softmax',
                            name='attention_probs')(lstm_text)
    attention_mul = merge([lstm_text, attention_probs],
                          output_shape=32,
                          name='attention_mul',
                          mode='mul')
    cov_text2 = Conv1D(activation="relu",
                       padding="same",
                       filters=64,
                       kernel_size=5)(attention_mul)
    pooling_text2 = MaxPooling1D(pool_size=4)(cov_text2)
    lstm_text2 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=True,
                         dropout_U=dropout_rnn_U)(pooling_text2)
    lstm_text2 = Dropout(0.5)(lstm_text2)
    attention_probs2 = Dense(32, activation='softmax',
                             name='attention_probs2')(lstm_text2)
    attention_mul2 = merge([lstm_text2, attention_probs2],
                           output_shape=32,
                           name='attention_mul2',
                           mode='mul')
    conv_text3 = Conv1D(activation="relu",
                        padding="same",
                        filters=64,
                        kernel_size=5)(attention_mul2)
    globalpooling = GlobalMaxPooling1D()(conv_text3)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(globalpooling)

    #cov+lstm+cont+conv+lstm+cont+conv merge cov+lstm+cont+conv merge cov
    conv_text = Conv1D(activation="relu",
                       padding="same",
                       filters=64,
                       kernel_size=5)(emb_text)
    globalpooling = GlobalMaxPooling1D()(conv_text)

    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=True,
                        dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text = Dropout(0.5)(lstm_text)
    attention_probs = Dense(32, activation='softmax',
                            name='attention_probs')(lstm_text)
    attention_mul = merge([lstm_text, attention_probs],
                          output_shape=32,
                          name='attention_mul',
                          mode='mul')
    conv_text2 = Conv1D(activation="relu",
                        padding="same",
                        filters=64,
                        kernel_size=5)(attention_mul)
    globalpooling2 = GlobalMaxPooling1D()(conv_text2)

    cov_text2 = Conv1D(activation="relu",
                       padding="same",
                       filters=64,
                       kernel_size=5)(attention_mul)
    pooling_text2 = MaxPooling1D(pool_size=4)(cov_text2)
    lstm_text2 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=True,
                         dropout_U=dropout_rnn_U)(pooling_text2)
    lstm_text2 = Dropout(0.5)(lstm_text2)
    attention_probs2 = Dense(32, activation='softmax',
                             name='attention_probs')(lstm_text2)
    attention_mul2 = merge([lstm_text2, attention_probs2],
                           output_shape=32,
                           name='attention_mul',
                           mode='mul')
    conv_text3 = Conv1D(activation="relu",
                        padding="same",
                        filters=64,
                        kernel_size=5)(attention_mul2)
    globalpooling3 = GlobalMaxPooling1D()(conv_text3)
    merge_text = merge([globalpooling, globalpooling2, globalpooling])
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(merge_text)

    #lstm merge conv+lstm merge conv+lstm+conv+lstm
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=False,
                        dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text = Dropout(0.5)(lstm_text)

    lstm_text2_1 = get_RNN(unit,
                           cells,
                           bi,
                           return_sequences=True,
                           dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text2_1 = Dropout(0.5)(lstm_text2_1)
    cov_text2 = Conv1D(activation="relu",
                       padding="same",
                       filters=64,
                       kernel_size=5)(lstm_text2_1)
    pooling_text2 = MaxPooling1D(pool_size=4)(cov_text2)
    lstm_text3 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=False,
                         dropout_U=dropout_rnn_U)(pooling_text2)
    lstm_text3 = Dropout(0.5)(lstm_text3)

    lstm_text2 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=False,
                         dropout_U=dropout_rnn_U)(emb_text)
    lstm_text2 = Dropout(0.5)(lstm_text2)

    merge_text = merge([lstm_text, lstm_text2, lstm_text3])
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(merge_text)

    #cnn+lstm+dense+lstm+dense+conv
    cov_text = Conv1D(activation="relu",
                      padding="same",
                      filters=64,
                      kernel_size=5)(emb_text)
    pooling_text = MaxPooling1D(pool_size=4)(cov_text)
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=True,
                        dropout_U=dropout_rnn_U)(pooling_text)
    lstm_text = Dropout(0.5)(lstm_text)
    attention_probs = Dense(32, activation='softmax',
                            name='attention_probs')(lstm_text)
    attention_mul = merge([lstm_text, attention_probs],
                          output_shape=32,
                          name='attention_mul',
                          mode='mul')
    lstm_text2 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=True,
                         dropout_U=dropout_rnn_U)(attention_mul)
    lstm_text2 = Dropout(0.5)(lstm_text2)
    attention_probs2 = Dense(32, activation='softmax',
                             name='attention_probs2')(lstm_text2)
    attention_mul2 = merge([lstm_text2, attention_probs2],
                           output_shape=32,
                           name='attention_mul2',
                           mode='mul')
    cov_text2 = Conv1D(activation="relu",
                       padding="same",
                       filters=64,
                       kernel_size=5)(attention_mul2)
    globalpooling2 = GlobalMaxPooling1D()(cov_text2)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(globalpooling2)

    #lstm+dense+lstm+dense+conv
    lstm_text = get_RNN(unit,
                        cells,
                        bi,
                        return_sequences=True,
                        dropout_U=dropout_rnn_U)(emb_text)
    lstm_text = Dropout(0.5)(lstm_text)
    attention_probs = Dense(32, activation='softmax',
                            name='attention_probs')(lstm_text)
    attention_mul = merge([lstm_text, attention_probs],
                          output_shape=32,
                          name='attention_mul',
                          mode='mul')
    lstm_text2 = get_RNN(unit,
                         cells,
                         bi,
                         return_sequences=True,
                         dropout_U=dropout_rnn_U)(attention_mul)
    lstm_text2 = Dropout(0.5)(lstm_text2)
    attention_probs2 = Dense(32, activation='softmax',
                             name='attention_probs2')(lstm_text2)
    attention_mul2 = merge([lstm_text2, attention_probs2],
                           output_shape=32,
                           name='attention_mul2',
                           mode='mul')
    cov_text2 = Conv1D(activation="relu",
                       padding="same",
                       filters=64,
                       kernel_size=5)(attention_mul2)
    globalpooling2 = GlobalMaxPooling1D()(cov_text2)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(globalpooling2)

    # according to origin
    lstm_text = get_RNN(unit,
                        cells,
                        bi=True,
                        return_sequences=True,
                        dropout_U=dropout_rnn_U)(emb_text)
    lstm_text = Dropout(0.3)(lstm_text)
    print("a", lstm_text.shape)
    atten_text = Attention()(lstm_text)
    print("b", atten_text.shape)
    # cov_text2 = Conv1D(activation="relu", padding="same", filters=64, kernel_size=5)(atten_text)
    # print("c",cov_text2.shape)
    # globalpooling2 = GlobalMaxPooling1D()(cov_text2)
    # print("d",globalpooling2.shape)
    probabilities = Dense(classes,
                          activation='softmax',
                          activity_regularizer=l2(loss_l2))(atten_text)

    #lstm+dense+lstm+dense

    # # cov_text = Convolution1D(nb_filter=80, filter_length=4,
    # #                        border_mode='valid', activation='relu')(emb_text)
    # # pooling_text = GlobalMaxPooling1D()(cov_text)
    # # pooling_text = MaxPooling1D(pool_size=4)(cov_text)

    # cov_text = Conv1D(activation="relu", padding="same", filters=64, kernel_size=5)(emb_text)
    # pooling_text = MaxPooling1D(pool_size=4)(cov_text)

    # # lstm_text = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(emb_text)
    # # if dropout_rnn > 0:
    # #     lstm_text = Dropout(dropout_rnn)(lstm_text)
    # # attention_probs = Dense(32, activation='softmax', name='attention_probs')(lstm_text)
    # # attention_mul = merge([lstm_text, attention_probs], output_shape=32, name='attention_mul', mode='mul')

    # # all_lstm_text={}
    # # all_lstm_text[0]=pooling_text
    # # all_attention_probs={}
    # # all_attention_mul={}

    # # for i in range(attention_times):
    # #     j=i
    # #     all_lstm_text[i+1] = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(all_lstm_text[i])
    # #     if dropout_rnn > 0:
    # #         all_lstm_text[i+1] = Dropout(dropout_rnn)(all_lstm_text[i+1])
    # #     attention_probs[i] = Dense(32, activation='softmax', name='attention_probs')(all_lstm_text[i+1])
    # #     attention_mul[i] = merge([all_lstm_text[i+1], attention_probs[i]], output_shape=32, name='attention_mul', mode='mul')

    # lstm_text = get_RNN(unit, cells, bi, return_sequences=False, dropout_U=dropout_rnn_U)(pooling_text)
    # lstm_text = Dropout(0.5)(lstm_text)
    # # cov_text = Convolution1D(nb_filter=80, filter_length=4,
    # #                         border_mode='valid', activation='relu')(lstm_text)
    # # # we use max pooling:
    # # pooling_text = GlobalMaxPooling1D()(cov_text)
    # # lstm_text2 = get_RNN(unit, cells, bi, return_sequences=True, dropout_U=dropout_rnn_U)(lstm_text)
    # # lstm_text2 = Dropout(0.3)(lstm_text2)
    # # cov_text2 = Convolution1D(nb_filter=80, filter_length=4,
    # #                         border_mode='valid', activation='relu')(lstm_text2)
    # # # we use max pooling:
    # # pooling_text2 = GlobalMaxPooling1D()(cov_text2)
    # # merge_text = merge([pooling_text,pooling_text2])
    # probabilities = Dense(classes, activation='softmax', activity_regularizer=l2(loss_l2))(lstm_text)

    model = Model(input=input_text, output=probabilities)
    model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model
示例#10
0
            if class_values == 1:
                y_classes.append(i)
                break
    return y_classes


def loadFile(name):
    with open(name, 'rb') as input:
        wi = pickle.load(input)
        return wi


# model = load_model('Categorical_Attention_LSTN_3_Epoch_20K_No_Scope_50_Batch_50_WORD_Length.h5')
model = load_model(
    'Categorical_Simple_LSTN_3_Epoch_20K_No_Scope_50_Batch_50_WORD_Length.h5',
    custom_objects={"Attention": Attention()})

word_index = loadFile('word_index.pkl')
# print(word_index['positive_emoticon'])
# print(word_index['negative_emoticon'])
test_df = pd.read_csv('Test_Big.csv',
                      sep=';',
                      encoding='utf-8',
                      error_bad_lines=False)
test_values = test_df['tweet_text'].values
labels_to_delete = []
prep_test = tr.preprocess_texts(test_values, False, False, labels_to_delete)
y_test = test_df["sentiment"].values
# y_test = np.delete(y_test, labels_to_delete)
test_sequences = tr.transformSequenceToInt(prep_test, word_index)
test_data = pad_sequences(test_sequences,
示例#11
0
                               pos=opinions[0],
                               neu=opinions[1],
                               neg=opinions[2])
    except:
        print('fail')
        return render_template(
            'analyze.html',
            error=f'Remember You can only load .csv file and it has to \
                contain one of the followings columns: {approved_col_names}')


if __name__ == '__main__':
    url = 'http://localhost:5002/api/'
    model_weights = os.path.abspath('data/model_weights/new_bi_model_1.h5')
    model = load_model(model_weights,
                       custom_objects={'Attention': Attention()})
    global graph
    graph = tf.get_default_graph()
    MAXLEN = 50
    CORPUS = 'datastories.twitter'
    DIM = 300
    _, word_map = get_embeddings(CORPUS, DIM)
    pipeline = Pipeline([('preprocessor', tweetsPreprocessor(load=False)),
                         ('extractor',
                          EmbExtractor(word_idxs=word_map, maxlen=MAXLEN))])
    app.run(debug=True, host='localhost', port=5002)

#TODO:
'''  
    - add docs for functions
'''
示例#12
0
def build_attention_rnn(embeddings,
                        classes,
                        maxlen,
                        layer_type=LSTM,
                        cells=64,
                        layers=1,
                        **kwargs):
    '''
    creates rnn based model
    @params:
    :embeddings: array-> embeddigns matrix
    :classes: int -> num of label classes 
    :maxlen: int -> max lenght of the input sequence
    :layer_type: keras.layers -> type of rnn layer
    :cells: int -> amount of cells in a single layer
    :**kwargs: params like all kind of dropouts etc.
    @returns:
    :keras.model.sequential object
    '''
    trainable_emb = kwargs.get('trainable_emb', False)
    bi = kwargs.get('bidirectional', False)
    layer_dropout_rnn = kwargs.get('layer_dropout_rnn', 0)
    dropout_rnn = kwargs.get('dropout_rnn', 0)
    rec_dropout_rnn = kwargs.get('rec_dropout_rnn', 0)
    dropout_attention = kwargs.get('dropout_attention', 0)
    attention = kwargs.get('attention', None)
    dropout_final = kwargs.get('dropout_final', 0)
    fc1 = kwargs.get('fc1', False)
    clipnorm = kwargs.get('clipnorm', 0)
    loss_l2 = kwargs.get('loss_l2', 0.)
    lr = kwargs.get('lr', 0.001)

    print('Creating model...')
    # init the model
    model = Sequential()
    model.add(
        embedding_layer(embeddings=embeddings,
                        maxlen=maxlen,
                        trainable=trainable_emb,
                        masking=True,
                        scale=True))

    for i in range(layers):
        return_seq = (layers > 1 and i < layers - 1) or attention
        model.add(
            get_rnn_layer(layer_type,
                          cells,
                          bi,
                          return_sequences=return_seq,
                          dropout=dropout_rnn,
                          recurrent_dropout=rec_dropout_rnn))
        if layer_dropout_rnn > 0:
            model.add(Dropout(layer_dropout_rnn))

    if attention == 'memmory':
        model.add(AttentionWithContext())
        if dropout_attention > 0:
            model.add(Dropout(dropout_attention))
    elif attention == 'simple':
        model.add(Attention())
        if dropout_attention > 0:
            model.add(Dropout(dropout_attention))

    if fc1:
        model.add(Dense(100))
        if dropout_final > 0:
            model.add(Dropout(dropout_final))

    model.add(Dense(classes, activity_regularizer=l2(loss_l2)))
    model.add(Activation('softmax'))

    model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr),
                  loss='categorical_crossentropy')
    return model