Exemplo n.º 1
0
        def triple_classification():
            model = Sequential()
            model.add(Dense(100, input_dim=w2v_dim, activation='relu'))
            model.add(Dropout(0.4))
            model.add(Dense(50, activation='relu'))
            model.add(Dropout(0.4))
            model.add(Dense(10, activation='relu'))
            # model.add(Dropout(0.4))
            model.add(Dense(label_categories, activation='sigmoid'))

            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc])


            model.fit(X_train, dummy_Y_train, shuffle=True, epochs=n_epochs, batch_size=64,
                      validation_split=0.2, verbose=2, callbacks=[early_stopping])

            scores = model.evaluate(X_test, dummy_Y_test, batch_size=64)

            print(model.metrics_names[0]+":"+str(scores[0])+"  "
                  +model.metrics_names[1]+":"+str(scores[1])+"  "
                  +model.metrics_names[2]+":"+str(scores[2])+"  ")
            score_dic = {model.metrics_names[0]: scores[0],
                         model.metrics_names[1]: scores[1],
                         model.metrics_names[2]: scores[2]}

            predictions = model.predict(X_test)
            confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index)
            print(confusion_matrix)
            return score_dic, confusion_matrix
Exemplo n.º 2
0
        def train_text_dense_3():
            input_text = Input(shape=(100, ))
            input_dense = Input(shape=(50, ))

            text_out = Dense(50, input_dim=100, activation='relu')(input_text)
            text_out = Dropout(0.4)(text_out)
            text_out = Dense(10, activation='relu')(text_out)
            # auxi_out = Dropout(0.4)(auxi_out)
            # auxi_out = Dense(label_categories, activation='sigmoid', name='auxi_out')(auxi_out)

            lstm_out = Dense(10, input_dim=50, activation='relu')(input_dense)
            # lstm_out = Dropout(0.4)(lstm_out)
            # lstm_out = Dense(10, activation='relu')(lstm_out)
            # lstm_out = Dropout(0.4)(lstm_out)
            # senti_out = Dense(label_categories, activation='sigmoid', name='senti_out')(senti_out)

            x = keras.layers.concatenate([text_out, lstm_out])
            # x = Dense(10, activation='relu')(x)
            y = Dense(label_categories, activation='sigmoid')(x)

            model = Model(inputs=[input_text, input_dense], outputs=y)
            model.compile(optimizer='adam',
                          loss='binary_crossentropy',
                          metrics=['accuracy', auc])

            model.fit([X_train_text, X_train_dense],
                      dummy_Y_train,
                      shuffle=True,
                      validation_split=0.2,
                      epochs=300,
                      batch_size=64,
                      verbose=2)

            scores_train = model.evaluate([X_train_text, X_train_dense],
                                          dummy_Y_train,
                                          batch_size=64)
            print('result => ' + model.metrics_names[0] + ": " +
                  str(scores_train[0]) + "  " + model.metrics_names[1] + ": " +
                  str(scores_train[1]) + "  " + model.metrics_names[2] + ": " +
                  str(scores_train[2]) + "  ")

            scores = model.evaluate([X_test_text, X_test_dense],
                                    dummy_Y_test,
                                    batch_size=64)
            print('result => ' + model.metrics_names[0] + ": " +
                  str(scores[0]) + "  " + model.metrics_names[1] + ": " +
                  str(scores[1]) + "  " + model.metrics_names[2] + ": " +
                  str(scores[2]) + "  ")
            score_dic = {
                model.metrics_names[0]: scores[0],
                model.metrics_names[1]: scores[1],
                model.metrics_names[2]: scores[2]
            }

            Y_predict = model.predict([X_test_text, X_test_dense])
            confusion_matrix = conf_matrix(dummy_Y_test, Y_predict,
                                           label_index)
            print(confusion_matrix)
            print('round ' + str(train_chunk_number) + ' finished')
            return score_dic, confusion_matrix
Exemplo n.º 3
0
        def train_text():
            input = Input(shape=(w2v_dim,))
            x = Dense(50, input_dim=w2v_dim, activation='relu')(input)
            x = Dropout(0.4)(x)
            x = Dense(10, activation='relu')(x)
            y = Dense(label_categories, activation='sigmoid', name='text_out')(x)
            model = Model(inputs=input, outputs=y)

            model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc])

            model.fit(X_train, dummy_Y_train, shuffle=True, epochs=n_epochs, batch_size=64,
                      validation_split=0.2, verbose=2, callbacks=[early_stopping])

            scores = model.evaluate(X_test, dummy_Y_test, batch_size=64)

            print(model.metrics_names[0] + ":" + str(scores[0]) + "  "
                  + model.metrics_names[1] + ":" + str(scores[1]) + "  "
                  + model.metrics_names[2] + ":" + str(scores[2]) + "  ")
            score_dic = {model.metrics_names[0]: scores[0],
                         model.metrics_names[1]: scores[1],
                         model.metrics_names[2]: scores[2]}

            predictions = model.predict(X_test)
            confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index)
            print(confusion_matrix)
            print('round '+str(train_chunk_number)+' finished')
            return score_dic,confusion_matrix
Exemplo n.º 4
0
        def train_text_senti_2():
            input_text = Input(shape=(100,))
            input_senti = Input(shape=(2,))

            text_out = Dense(50, input_dim=100, activation='relu')(input_text)
            text_out = Dropout(0.4)(text_out)
            text_out = Dense(10, activation='relu')(text_out)
            text_out = Dropout(0.4)(text_out)
            auxi_out = Dense(label_categories, activation='sigmoid', name='text_out')(text_out)

            senti_out = Dense(10, input_dim=2, activation='sigmoid')(input_senti)
            senti_out = Dropout(0.4)(senti_out)
            # senti_out = Dense(label_categories, activation='sigmoid', name='senti_out')(senti_out)

            x = keras.layers.concatenate([text_out, senti_out])
            y = Dense(label_categories, activation='sigmoid', name='final_out')(x)

            model = Model(inputs=[input_text, input_senti], outputs=[y, auxi_out])
            model.compile(optimizer='adam',
                          loss='binary_crossentropy',
                          metrics=['accuracy', auc])

            model.fit([X_train_text, X_train_senti], [dummy_Y_train, dummy_Y_train],
                      shuffle=True, validation_split=0.2, epochs=350, batch_size=64)

            scores_train = model.evaluate([X_train_text, X_train_senti], [dummy_Y_train, dummy_Y_train], batch_size=64)
            print('result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + "  "
                  + model.metrics_names[1] + ": " + str(scores_train[1]) + "  "
                  + model.metrics_names[2] + ": " + str(scores_train[2]) + "  "
                  + model.metrics_names[3] + ": " + str(scores_train[3]) + "  "
                  + model.metrics_names[4] + ": " + str(scores_train[4]) + "  "
                  + model.metrics_names[5] + ": " + str(scores_train[5]) + "  "
                  + model.metrics_names[6] + ": " + str(scores_train[6]) + "  "
                  )

            scores = model.evaluate([X_test_text, X_test_senti], [dummy_Y_test, dummy_Y_test], batch_size=64)
            print('result => ' + model.metrics_names[0] + ": " + str(scores[0]) + "  "
                  + model.metrics_names[1] + ": " + str(scores[1]) + "  "
                  + model.metrics_names[2] + ": " + str(scores[2]) + "  "
                  + model.metrics_names[3] + ": " + str(scores[3]) + "  "
                  + model.metrics_names[4] + ": " + str(scores[4]) + "  "
                  + model.metrics_names[5] + ": " + str(scores[5]) + "  "
                  + model.metrics_names[6] + ": " + str(scores[6]) + "  "
                  )
            score_dic = {model.metrics_names[0]: scores[0],
                         model.metrics_names[1]: scores[1],
                         model.metrics_names[2]: scores[2],
                         model.metrics_names[3]: scores[3],
                         model.metrics_names[4]: scores[4],
                         model.metrics_names[5]: scores[5],
                         model.metrics_names[6]: scores[6]
                         }

            Y_predict = model.predict([X_test_text, X_test_senti])
            confusion_matrix = conf_matrix(dummy_Y_test, Y_predict[0], label_index)
            print(confusion_matrix)
            print('round ' + str(train_chunk_number) + ' finished')
            return score_dic, confusion_matrix
Exemplo n.º 5
0
        def train_text_senti_lstm():
            input_text = Input(shape=(100,))
            input_senti = Input(shape=(6,))
            input_lstm = Input(shape=(128,))

            input_text_senti = keras.layers.concatenate([input_text,input_senti,input_lstm])
            text_out = Dense(120, input_dim=234, activation='relu')(input_text_senti)
            text_out = Dropout(0.4)(text_out)
            text_out = Dense(10, activation='relu')(text_out)
            y = Dense(label_categories, activation='sigmoid',name='text_out')(text_out)


            model = Model(inputs = [input_text, input_senti, input_lstm], outputs = y)
            model.compile(optimizer='adam',
                          loss='binary_crossentropy',
                          metrics=['accuracy', auc])

            model.fit([X_train_text, X_train_senti, X_train_lstm], dummy_Y_train,shuffle=True,
                      validation_split=0.2, epochs=n_epochs, batch_size=64)


            scores_train = model.evaluate([X_train_text, X_train_senti, X_train_lstm], dummy_Y_train, batch_size=64)
            print(str(n_epochs)+' epochs, '+str(train_chunk_number)+' chunk, test_result => '
                  + model.metrics_names[0] + ": " + str(scores_train[0]) + "  "
                  + model.metrics_names[1] + ": " + str(scores_train[1]) + "  "
                  )
            f.write(str(n_epochs)+' epochs, '+str(train_chunk_number)+' chunk, test_result => '
                  + model.metrics_names[0] + ": " + str(scores_train[0]) + "  "
                  + model.metrics_names[1] + ": " + str(scores_train[1]) + "  \n")


            scores = model.evaluate([X_test_text, X_test_senti, X_test_lstm], dummy_Y_test, batch_size=64)
            print(str(n_epochs)+' epochs, '+str(train_chunk_number)+' chunk, test_result => '
                  + model.metrics_names[0] + ": " + str(scores[0]) + "  "
                  + model.metrics_names[1] + ": " + str(scores[1]) + "  "
                  )
            f.write(str(n_epochs)+' epochs, '+str(train_chunk_number)+' chunk, test_result => '
                  + model.metrics_names[0] + ": " + str(scores[0]) + "  "
                  + model.metrics_names[1] + ": " + str(scores[1]) + "  \n")

            score_dic = {model.metrics_names[0]: scores[0],
                         model.metrics_names[1]: scores[1],
                         model.metrics_names[2]: scores[2]}

            Y_predict = model.predict([X_test_text, X_test_senti, X_test_lstm])
            confusion_matrix = conf_matrix(dummy_Y_test, Y_predict, label_index)
            print(confusion_matrix)
            print('round ' + str(train_chunk_number) + ' finished')
            return score_dic, confusion_matrix
Exemplo n.º 6
0
        def train_text_cnn():
            inputs = Input(shape=(maxlen, w2v_dim, 1))
            x = Conv2D(filters=32,
                       kernel_size=(5, 100),
                       activation='relu',
                       name='conv_out')(inputs)
            x = AveragePooling2D((4, 1), name='pool_out')(x)
            x = Flatten()(x)
            x = Dense(50, activation='relu', name='dense_50')(x)
            y = Dense(label_categories, activation='sigmoid',
                      name='final_out')(x)

            model = Model(inputs=inputs, outputs=y)

            model.compile(loss='binary_crossentropy',
                          optimizer='adam',
                          metrics=['accuracy', auc])

            model.fit(X_train,
                      dummy_Y_train,
                      shuffle=True,
                      epochs=n_epochs,
                      batch_size=64,
                      validation_split=0.2,
                      verbose=2,
                      callbacks=[early_stopping])

            scores = model.evaluate(X_test, dummy_Y_test, batch_size=64)

            print(model.metrics_names[0] + ":" + str(scores[0]) + "  " +
                  model.metrics_names[1] + ":" + str(scores[1]) + "  " +
                  model.metrics_names[2] + ":" + str(scores[2]) + "  ")
            score_dic = {
                model.metrics_names[0]: scores[0],
                model.metrics_names[1]: scores[1],
                model.metrics_names[2]: scores[2]
            }

            predictions = model.predict(X_test)
            confusion_matrix = conf_matrix(dummy_Y_test, predictions,
                                           label_index)
            print(confusion_matrix)
            print('round ' + str(train_chunk_number) + ' finished')
            return score_dic, confusion_matrix
Exemplo n.º 7
0
        def train_insertdense():
            # 加载lstm模型
            lstm_inputs = Input(shape=(maxlen, ))
            lstm_embedding = Embedding(output_dim=word2vec_dim,
                                       input_dim=n_symbols,
                                       mask_zero=True,
                                       weights=[embedding_weights],
                                       input_length=maxlen,
                                       name='embedding_1')(lstm_inputs)
            lstm_lstm = LSTM(128, name='lstm_out')(lstm_embedding)
            lstm_dropout_1 = Dropout(0.5)(lstm_lstm)
            lstm_dense_50 = Dense(50, activation='relu',
                                  name='dense_50')(lstm_dropout_1)

            lstm_model = Model(inputs=lstm_inputs, outputs=lstm_dense_50)
            lstm_model.load_weights(path.join(path.dirname(__file__),
                                              'sentiment_lstm.h5'),
                                    by_name=True)

            # 主要用来分类的全连接层
            fcnn_inputs = Input(shape=(100, ))
            fcnn_conca = keras.layers.concatenate([fcnn_inputs, lstm_dense_50])
            fcnn_dense_100 = Dense(75, activation='relu')(fcnn_conca)
            fcnn_dropout_1 = Dropout(0.4)(fcnn_dense_100)
            fcnn_dense_20 = Dense(15, activation='relu')(fcnn_dropout_1)
            fcnn_dropout_2 = Dropout(0.4)(fcnn_dense_20)
            fcnn_out = Dense(label_categories,
                             activation='sigmoid')(fcnn_dropout_2)

            model = Model(inputs=[fcnn_inputs, lstm_inputs], outputs=fcnn_out)
            model.compile(optimizer='adam',
                          loss='binary_crossentropy',
                          metrics=['accuracy', auc])
            model.summary()

            print('fit')
            model.fit([X_train_text, X_train_lstm],
                      dummy_Y_train,
                      shuffle=True,
                      validation_split=0.2,
                      epochs=n_epochs,
                      batch_size=64,
                      verbose=1)

            print('evaluate')
            scores_train = model.evaluate([X_train_text, X_train_lstm],
                                          dummy_Y_train,
                                          batch_size=64)
            print(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, train_result => ' + model.metrics_names[0] + ": " +
                str(scores_train[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores_train[1]) + "  ")
            f.write(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, train_result => ' + model.metrics_names[0] + ": " +
                str(scores_train[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores_train[1]) + "  \n")

            scores = model.evaluate([X_test_text, X_test_lstm],
                                    dummy_Y_test,
                                    batch_size=512)
            print(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, test_result => ' + model.metrics_names[0] + ": " +
                str(scores[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores[1]) + "  ")
            f.write(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, test_result => ' + model.metrics_names[0] + ": " +
                str(scores[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores[1]) + "  \n")

            score_dic = {
                model.metrics_names[0]: scores[0],
                model.metrics_names[1]: scores[1]
            }

            Y_predict = model.predict([X_test_text, X_test_lstm])
            confusion_matrix = conf_matrix(dummy_Y_test, Y_predict,
                                           label_index)
            print(confusion_matrix)
            print('round ' + str(train_chunk_number) + ' finished')
            return score_dic, confusion_matrix
Exemplo n.º 8
0
        def train_insertlstm():
            # 加载lstm模型
            lstm_inputs = Input(shape=(maxlen, word2vec_dim))
            lstm_lstm = LSTM(128, name='lstm_out')(lstm_inputs)
            lstm_model = Model(inputs=lstm_inputs, outputs=lstm_lstm)

            lstm_model.load_weights(path.join(path.dirname(__file__),
                                              'smote_simJD_sentiment_lstm.h5'),
                                    by_name=True)
            print(lstm_model.get_weights())

            # 主要用来分类的全连接层
            fcnn_inputs = Input(shape=(100, ), name='fcnn_input')
            # fcnn_dense_a = Dense(50,activation='relu')(fcnn_inputs)
            # fcnn_dropout_a = Dropout(0.4)(fcnn_dense_a)
            # fcnn_dense_aa = Dense(10,activation='relu')(fcnn_dropout_a)
            # lstm_dense_b = Dense(50,activation='relu')(lstm_lstm)
            # lstm_dropout_b = Dropout(0.4)(lstm_dense_b)
            # lstm_dense_bb = Dense(10,activation='relu')(lstm_dropout_b)
            fcnn_conca = keras.layers.concatenate([fcnn_inputs, lstm_lstm])
            x = Dense(100, activation='relu')(fcnn_conca)
            x = Dropout(0.4)(x)
            x = Dense(20, activation='relu')(x)
            fcnn_out = Dense(label_categories, activation='sigmoid')(x)

            model = Model(inputs=[fcnn_inputs, lstm_inputs], outputs=fcnn_out)
            model.compile(optimizer='adam',
                          loss='binary_crossentropy',
                          metrics=['accuracy', auc])
            model.summary()

            print('fit')
            model.fit([X_train_text, X_train_lstm],
                      dummy_Y_train,
                      shuffle=True,
                      validation_split=0.2,
                      epochs=n_epochs,
                      batch_size=64,
                      verbose=1)

            print('evaluate')
            scores_train = model.evaluate([X_train_text, X_train_lstm],
                                          dummy_Y_train,
                                          batch_size=64)
            print(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, train_result => ' + model.metrics_names[0] + ": " +
                str(scores_train[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores_train[1]) + "  " + model.metrics_names[2] + ": " +
                str(scores_train[2]) + "  ")
            f.write(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, train_result => ' + model.metrics_names[0] + ": " +
                str(scores_train[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores_train[1]) + "  " + model.metrics_names[2] + ": " +
                str(scores_train[2]) + "  \n")

            scores = model.evaluate([X_test_text, X_test_lstm],
                                    dummy_Y_test,
                                    batch_size=512)
            print(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, test_result => ' + model.metrics_names[0] + ": " +
                str(scores[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores[1]) + "  " + model.metrics_names[2] + ": " +
                str(scores[2]) + "  ")
            f.write(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, test_result => ' + model.metrics_names[0] + ": " +
                str(scores[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores[1]) + "  " + model.metrics_names[2] + ": " +
                str(scores[2]) + "  \n")

            score_dic = {
                model.metrics_names[0]: scores[0],
                model.metrics_names[1]: scores[1],
                model.metrics_names[2]: scores[2]
            }

            Y_predict = model.predict([X_test_text, X_test_lstm])
            confusion_matrix = conf_matrix(dummy_Y_test, Y_predict,
                                           label_index)
            print(confusion_matrix)
            print('round ' + str(train_chunk_number) + ' finished')
            return score_dic, confusion_matrix
        def train_onlyText_cnn():
            # 加载lstm模型
            inputs = Input(shape=(maxlen, word2vec_dim, 1))
            x = Conv2D(filters=32,
                       kernel_size=(5, 100),
                       activation='relu',
                       name='conv_out')(inputs)
            x = AveragePooling2D((4, 1), name='pool_out')(x)
            x = Flatten()(x)
            x = Dense(50, activation='relu', name='dense_50')(x)
            y = Dense(label_categories, activation='sigmoid')(x)

            model = Model(inputs=inputs, outputs=y)
            model.compile(optimizer='adam',
                          loss='binary_crossentropy',
                          metrics=['accuracy', auc])
            model.summary()

            print('fit')
            model.fit(X_train_cnn,
                      dummy_Y_train,
                      shuffle=True,
                      validation_split=0.2,
                      epochs=n_epochs,
                      batch_size=64,
                      verbose=1)

            print('evaluate')
            scores_train = model.evaluate(X_train_cnn,
                                          dummy_Y_train,
                                          batch_size=64)
            print(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, train_result => ' + model.metrics_names[0] + ": " +
                str(scores_train[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores_train[1]) + "  " + model.metrics_names[2] + ": " +
                str(scores_train[2]) + "  ")
            f.write(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, train_result => ' + model.metrics_names[0] + ": " +
                str(scores_train[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores_train[1]) + "  " + model.metrics_names[2] + ": " +
                str(scores_train[2]) + "  \n")

            scores = model.evaluate(X_test_cnn, dummy_Y_test, batch_size=512)
            print(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, test_result => ' + model.metrics_names[0] + ": " +
                str(scores[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores[1]) + "  " + model.metrics_names[2] + ": " +
                str(scores[2]) + "  ")
            f.write(
                str(n_epochs) + ' epochs, ' + str(train_chunk_number) +
                ' chunk, test_result => ' + model.metrics_names[0] + ": " +
                str(scores[0]) + "  " + model.metrics_names[1] + ": " +
                str(scores[1]) + "  " + model.metrics_names[2] + ": " +
                str(scores[2]) + "  \n")

            score_dic = {
                model.metrics_names[0]: scores[0],
                model.metrics_names[1]: scores[1],
                model.metrics_names[2]: scores[2]
            }

            Y_predict = model.predict(X_test_cnn)
            confusion_matrix = conf_matrix(dummy_Y_test, Y_predict,
                                           label_index)
            print(confusion_matrix)
            print('round ' + str(train_chunk_number) + ' finished')
            return score_dic, confusion_matrix