Пример #1
0
    def triple_classification():
        model = Sequential()
        model.add(Dense(100, input_dim=w2v_dim, activation='relu'))
        model.add(Dropout(0.4))
        model.add(Dense(50, activation='relu'))
        model.add(Dropout(0.4))
        model.add(Dense(10, activation='relu'))
        # model.add(Dropout(0.4))
        model.add(Dense(label_categories, activation='sigmoid'))

        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc])

        model.fit(X_train, dummy_Y_train, shuffle=True, epochs=800, batch_size=64,
                  validation_split=0.2, verbose=2, callbacks=[early_stopping])

        scores = model.evaluate(X_test, dummy_Y_test, batch_size=64)

        print(model.metrics_names[0] + ":" + str(scores[0]) + "  "
              + model.metrics_names[1] + ":" + str(scores[1]) + "  "
              + model.metrics_names[2] + ":" + str(scores[2]) + "  ")
        score_dic = {model.metrics_names[0]: scores[0],
                     model.metrics_names[1]: scores[1],
                     model.metrics_names[2]: scores[2]}

        predictions = model.predict(X_test)
        confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index)
        print(confusion_matrix)
        return score_dic, confusion_matrix
Пример #2
0
    def train_text_dense_3():
        input_text = Input(shape=(100, ))
        input_dense = Input(shape=(50, ))

        text_out = Dense(50, input_dim=100, activation='relu')(input_text)
        text_out = Dropout(0.4)(text_out)
        text_out = Dense(10, activation='relu')(text_out)
        # auxi_out = Dropout(0.4)(auxi_out)
        # auxi_out = Dense(label_categories, activation='sigmoid', name='auxi_out')(auxi_out)

        lstm_out = Dense(10, input_dim=50, activation='relu')(input_dense)
        # lstm_out = Dropout(0.4)(lstm_out)
        # lstm_out = Dense(10, activation='relu')(lstm_out)
        # lstm_out = Dropout(0.4)(lstm_out)
        # senti_out = Dense(label_categories, activation='sigmoid', name='senti_out')(senti_out)

        x = keras.layers.concatenate([text_out, lstm_out])
        # x = Dense(10, activation='relu')(x)
        y = Dense(label_categories, activation='sigmoid')(x)

        model = Model(inputs=[input_text, input_dense], outputs=y)
        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy', auc])

        model.fit([X_train_text, X_train_dense],
                  dummy_Y_train,
                  shuffle=True,
                  validation_split=0.2,
                  epochs=300,
                  batch_size=256,
                  verbose=2)

        scores_train = model.evaluate([X_train_text, X_train_dense],
                                      dummy_Y_train,
                                      batch_size=256)
        print('result => ' + model.metrics_names[0] + ": " +
              str(scores_train[0]) + "  " + model.metrics_names[1] + ": " +
              str(scores_train[1]) + "  " + model.metrics_names[2] + ": " +
              str(scores_train[2]) + "  ")

        scores = model.evaluate([X_test_text, X_test_dense],
                                dummy_Y_test,
                                batch_size=256)
        print('result => ' + model.metrics_names[0] + ": " + str(scores[0]) +
              "  " + model.metrics_names[1] + ": " + str(scores[1]) + "  " +
              model.metrics_names[2] + ": " + str(scores[2]) + "  ")
        score_dic = {
            model.metrics_names[0]: scores[0],
            model.metrics_names[1]: scores[1],
            model.metrics_names[2]: scores[2]
        }

        Y_predict = model.predict([X_test_text, X_test_dense])
        confusion_matrix = conf_matrix(dummy_Y_test, Y_predict, label_index)
        print(confusion_matrix)
        print('round ' + str(train_chunk_number) + ' finished')
        return score_dic, confusion_matrix
Пример #3
0
    def train_text():
        input = Input(shape=(w2v_dim, ))
        x = Dense(300, input_dim=w2v_dim, activation='sigmoid')(input)
        x = Dropout(0.4)(x)
        x = Dense(200, activation='sigmoid')(x)
        x = Dense(50, activation='sigmoid')(x)
        x = Dense(10, activation='sigmoid')(x)
        y = Dense(label_categories, activation='softmax', name='text_out')(x)
        model = Model(inputs=input, outputs=y)

        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy', auc])

        model.fit(X_train,
                  dummy_Y_train,
                  shuffle=True,
                  epochs=15,
                  batch_size=64,
                  validation_split=0.2,
                  verbose=2,
                  callbacks=[early_stopping])

        scores = model.evaluate(X_test, dummy_Y_test, batch_size=64)

        print(model.metrics_names[0] + ":" + str(scores[0]) + "  " +
              model.metrics_names[1] + ":" + str(scores[1]) + "  " +
              model.metrics_names[2] + ":" + str(scores[2]) + "  ")
        score_dic = {
            model.metrics_names[0]: scores[0],
            model.metrics_names[1]: scores[1],
            model.metrics_names[2]: scores[2]
        }

        predictions = model.predict(X_test)
        print('general_confusion_matrix =>')
        print(
            print_conf_matrix(
                conf_matrix(dummy_Y_test, predictions, label_index)))

        # for i in range(len(dummy_Y_test)):
        #     print(str(i)+':')
        #     print(dummy_Y_test[i,:])
        # if (dummy_Y_test[i,:]==np.zeros((1,2))).all():
        #     print(dummy_Y_test[i,:])
        #     print('find [0,0]')
        #     exit(1)

        confusion_matrix, acc = get_w_acc_conf(train_chunk_number, label_index,
                                               dummy_Y_test, predictions)
        print('w_confusion_matrix =>')
        print(print_conf_matrix(confusion_matrix))
        print('w_acc =>', acc)

        print('round ' + str(train_chunk_number) + ' finished')

        return score_dic, confusion_matrix
Пример #4
0
    def train_text_keywords_1():
        input_text = Input(shape=(100, ))
        input_keywords = Input(shape=(256, ))

        input_text_senti = keras.layers.concatenate(
            [input_text, input_keywords])
        text_out = Dense(150, input_dim=102,
                         activation='relu')(input_text_senti)
        text_out = Dropout(0.4)(text_out)
        text_out = Dense(50, activation='relu')(text_out)
        text_out = Dropout(0.4)(text_out)
        text_out = Dense(10, activation='relu')(text_out)
        y = Dense(label_categories, activation='sigmoid',
                  name='text_out')(text_out)

        model = Model(inputs=[input_text, input_keywords], outputs=y)
        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy', auc])

        model.fit([X_train_text, X_train_keywords],
                  dummy_Y_train,
                  shuffle=True,
                  validation_split=0.2,
                  epochs=400,
                  batch_size=512,
                  callbacks=[early_stopping])

        scores_train = model.evaluate([X_train_text, X_train_keywords],
                                      dummy_Y_train,
                                      batch_size=512)
        print('train_result => ' + model.metrics_names[0] + ": " +
              str(scores_train[0]) + "  " + model.metrics_names[1] + ": " +
              str(scores_train[1]) + "  " + model.metrics_names[2] + ": " +
              str(scores_train[2]) + "  ")
        scores = model.evaluate([X_test_text, X_test_keywords],
                                dummy_Y_test,
                                batch_size=512)
        print('test_result => ' + model.metrics_names[0] + ": " +
              str(scores[0]) + "  " + model.metrics_names[1] + ": " +
              str(scores[1]) + "  " + model.metrics_names[2] + ": " +
              str(scores[2]) + "  ")

        score_dic = {
            model.metrics_names[0]: scores[0],
            model.metrics_names[1]: scores[1],
            model.metrics_names[2]: scores[2]
        }

        Y_predict = model.predict([X_test_text, X_test_keywords])
        confusion_matrix = conf_matrix(dummy_Y_test, Y_predict, label_index)
        print(confusion_matrix)
        print('round ' + str(train_chunk_number) + ' finished')
        return score_dic, confusion_matrix
Пример #5
0
    def train_text_cnn():
        input = Input(shape=(max_count, w2v_dim, 1))

        x = Conv2D(filters=16, kernel_size=(5, 100), activation='tanh')(input)
        x = AveragePooling2D(pool_size=(4, 1))(x)
        x = Flatten()(x)
        x = Dense(100, activation='relu')(x)
        x = Dense(10, activation='relu')(x)
        y = Dense(label_categories, activation='tanh', name='text_out')(x)

        model = Model(inputs=input, outputs=y)
        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy', auc])
        model.summary()

        model.fit(X_train,
                  dummy_Y_train,
                  shuffle=True,
                  epochs=10,
                  batch_size=64,
                  validation_split=0.2,
                  verbose=2,
                  callbacks=[early_stopping])

        scores = model.evaluate(X_test, dummy_Y_test, batch_size=64)

        print(model.metrics_names[0] + ":" + str(scores[0]) + "  " +
              model.metrics_names[1] + ":" + str(scores[1]) + "  " +
              model.metrics_names[2] + ":" + str(scores[2]) + "  ")
        score_dic = {
            model.metrics_names[0]: scores[0],
            model.metrics_names[1]: scores[1],
            model.metrics_names[2]: scores[2]
        }

        predictions = model.predict(X_test)
        confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index)
        print(confusion_matrix)
        print('round ' + str(train_chunk_number) + ' finished')
        return score_dic, confusion_matrix
Пример #6
0
    def train_text_lstm():
        input = Input(shape=(max_count, w2v_dim))

        x = LSTM(256)(input)
        x = Dropout(0.4)(x)
        x = Dense(100, activation='tanh')(x)
        x = Dense(20, activation='tanh')(x)
        y = Dense(label_categories, activation='tanh', name='text_out')(x)

        model = Model(inputs=input, outputs=y)

        model.compile(loss='binary_crossentropy',
                      optimizer='sgd',
                      metrics=['accuracy', auc])

        model.fit(X_train,
                  dummy_Y_train,
                  shuffle=True,
                  epochs=20,
                  batch_size=64,
                  validation_split=0.2,
                  verbose=2)

        scores = model.evaluate(X_test, dummy_Y_test, batch_size=64)

        print(model.metrics_names[0] + ":" + str(scores[0]) + "  " +
              model.metrics_names[1] + ":" + str(scores[1]) + "  " +
              model.metrics_names[2] + ":" + str(scores[2]) + "  ")
        score_dic = {
            model.metrics_names[0]: scores[0],
            model.metrics_names[1]: scores[1],
            model.metrics_names[2]: scores[2]
        }

        predictions = model.predict(X_test)
        confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index)
        print(confusion_matrix)
        print('round ' + str(train_chunk_number) + ' finished')
        return score_dic, confusion_matrix
Пример #7
0
    def train_text_keywords_2():
        input_text = Input(shape=(100, ))
        input_keywords = Input(shape=(256, ))

        text_out = Dense(50, input_dim=100, activation='relu')(input_text)
        auxi_out = Dropout(0.4)(text_out)
        auxi_out = Dense(10, activation='relu')(auxi_out)
        auxi_out = Dropout(0.4)(auxi_out)
        auxi_out = Dense(label_categories,
                         activation='sigmoid',
                         name='auxi_out')(auxi_out)

        keywords_out = Dense(50, input_dim=2,
                             activation='sigmoid')(input_keywords)
        # keywords_out = Dropout(0.4)(keywords_out)
        # keywords_out = Dense(10,activation='relu')(keywords_out)

        # senti_out = Dense(label_categories, activation='sigmoid', name='senti_out')(senti_out)

        x = keras.layers.concatenate([text_out, keywords_out])
        x = Dense(10, activation='relu')(x)
        y = Dense(label_categories, activation='sigmoid', name='final_out')(x)

        model = Model(inputs=[input_text, input_keywords],
                      outputs=[y, auxi_out])
        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy', auc])

        model.fit([X_train_text, X_train_keywords],
                  [dummy_Y_train, dummy_Y_train],
                  shuffle=True,
                  validation_split=0.2,
                  epochs=350,
                  batch_size=64,
                  callbacks=[early_stopping])

        scores_train = model.evaluate([X_train_text, X_train_keywords],
                                      [dummy_Y_train, dummy_Y_train],
                                      batch_size=512)
        print('result => ' + model.metrics_names[0] + ": " +
              str(scores_train[0]) + "  " + model.metrics_names[1] + ": " +
              str(scores_train[1]) + "  " + model.metrics_names[2] + ": " +
              str(scores_train[2]) + "  " + model.metrics_names[3] + ": " +
              str(scores_train[3]) + "  " + model.metrics_names[4] + ": " +
              str(scores_train[4]) + "  " + model.metrics_names[5] + ": " +
              str(scores_train[5]) + "  " + model.metrics_names[6] + ": " +
              str(scores_train[6]) + "  ")

        scores = model.evaluate([X_test_text, X_test_keywords],
                                [dummy_Y_test, dummy_Y_test],
                                batch_size=512)
        print('result => ' + model.metrics_names[0] + ": " + str(scores[0]) +
              "  " + model.metrics_names[1] + ": " + str(scores[1]) + "  " +
              model.metrics_names[2] + ": " + str(scores[2]) + "  " +
              model.metrics_names[3] + ": " + str(scores[3]) + "  " +
              model.metrics_names[4] + ": " + str(scores[4]) + "  " +
              model.metrics_names[5] + ": " + str(scores[5]) + "  " +
              model.metrics_names[6] + ": " + str(scores[6]) + "  ")
        score_dic = {
            model.metrics_names[0]: scores[0],
            model.metrics_names[1]: scores[1],
            model.metrics_names[2]: scores[2],
            model.metrics_names[3]: scores[3],
            model.metrics_names[4]: scores[4],
            model.metrics_names[5]: scores[5],
            model.metrics_names[6]: scores[6]
        }

        Y_predict = model.predict([X_test_text, X_test_keywords])
        confusion_matrix = conf_matrix(dummy_Y_test, Y_predict[0], label_index)
        print(confusion_matrix)
        print('round ' + str(train_chunk_number) + ' finished')
        return score_dic, confusion_matrix