Exemplo n.º 1
0
 def evaluation_mbrnn():
     test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3,
                                                      train_num=1,
                                                      test_num=400,
                                                      load_train=False,
                                                      noise_percent=0)
     brnn = DynamicWeightHybridModel(time_step=TIME_STEP,
                                     feature_size=WORD2VEC_FEATURE_NUM,
                                     rnn_utils=RNN_UTILS,
                                     rnn_layers_num=RNN_LAYERS_NUM,
                                     brnn_model_path=BRNN_700_PATH,
                                     fnn_model_path=JOINT_100_PATH)
     begin = 0
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     # print(train_y)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=WORD2VEC_FEATURE_NUM,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     ev, weights = brnn.evaluate(test_x, test_y, return_weight=True)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
     brnn.save_weights(DW_HYBRID_600_PATH)
Exemplo n.º 2
0
 def evaluation_mbrnn_load_model():
     train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(
         data_set=3,
         train_num=100,
         test_num=400,
         noise_percent=10,
         noise_type='shuffle')
     brnn = GRUModel(time_step=TIME_STEP,
                     feature_size=WORD2VEC_FEATURE_NUM,
                     rnn_utils=RNN_UTILS,
                     rnn_layers_num=RNN_LAYERS_NUM)
     brnn.load_weights(BRNN_700_PATH)
     begin = 0
     # process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin)  # 原地修改label_list,统一维度
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     # print(train_y)
     # train_x = trans_to_wordvec_by_word2vec(train_x, feature_size=100,
     #         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     # train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(train_y, dtype=tf.float32)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     inputs, label_list = None, None
     # brnn.fit(train_x, train_y, batchsz=10, epochs=15)
     ev = brnn.evaluate(test_x, test_y)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
Exemplo n.º 3
0
 def evaluation_mbrnn_load_model():
     train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(
         data_set=3,
         train_num=1,
         test_num=400,
         noise_type='shuffle',
         noise_percent=10)
     brnn = KnowledgeDistillModel(time_step=TIME_STEP,
                                  feature_size=WORD2VEC_FEATURE_NUM,
                                  rnn_utils=RNN_UTILS,
                                  rnn_layers_num=RNN_LAYERS_NUM,
                                  fnn_model_path=FNN_700_PATH)
     brnn.load_weights(KNOWLEDGE_DISTILL_MODEL_700_PATH)
     begin = 0
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     ev = brnn.evaluate(test_x, test_y)
     print('b-brnn:')
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
     ev = brnn.evaluate(test_x, test_y, choose=1)
     print('fnn:')
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
Exemplo n.º 4
0
 def evaluation_joint():
     train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3, train_num=700, test_num=400, noise_percent=10)
     brnn = FNNModel(time_step=12, feature_size=100)
     begin = 0
     process_rnn_label_list(train_y, time_step=brnn.time_step, begin=begin)  # 原地修改label_list,统一维度
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     # print(train_y)
     train_x = trans_to_wordvec_by_word2vec(train_x, feature_size=100,
             word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn', time_step=brnn.time_step, begin=begin)
     test_x = trans_to_wordvec_by_word2vec(test_x, feature_size=100,
             word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn',
             time_step=brnn.time_step, begin=begin)
     train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(train_y, dtype=tf.float32)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(test_y, dtype=tf.float32)
     inputs, label_list = None, None
     brnn.fit(train_x, train_y, batchsz=5, epochs=12)
     ev = brnn.evaluate(test_x, test_y, choose=0)
     # ev_b = brnn.evaluate(test_x, test_y, 1)
     # ev_f = brnn.evaluate(test_x, test_y, 2)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
     # print(template.format(ev_b['precision'], ev_b['recall'], ev_b['f1-score']))
     # print(template.format(ev_f['precision'], ev_f['recall'], ev_f['f1-score']))
     # model_path = ROOT_PATH + '\\fnn_11_30'
     brnn.save_weights(JOINT_100_PATH)
Exemplo n.º 5
0
 def evaluation_load_model():
     test_x, test_y = load_data_for_rnn_new_add_noise(load_train=False, data_set=3, train_num=700, test_num=400, noise_percent=10)
     brnn = FNNModel(time_step=12, feature_size=100)
     brnn.load_weights(JOINT_100_PATH)
     begin = 0
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     test_x = trans_to_wordvec_by_word2vec(test_x, feature_size=100,
             word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923, type='rnn',
             time_step=brnn.time_step, begin=begin)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(test_y, dtype=tf.float32)
     ev = brnn.evaluate(test_x, test_y, choose=1)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
Exemplo n.º 6
0
    def evaluation_mbrnn_best_params():
        test_xs, test_ys = [], []
        for i in range(0, 11):  # 异常比例从0到100%
            test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3,
                                                             train_num=1,
                                                             test_num=400,
                                                             noise_percent=i,
                                                             load_train=False)
            test_xs.append(test_x)
            test_ys.append(test_y)
        begin = 0
        for i in range(len(test_ys)):
            process_rnn_label_list(test_ys[i],
                                   time_step=TIME_STEP,
                                   begin=begin)
            test_xs[i] = trans_to_wordvec_by_word2vec(
                test_xs[i],
                feature_size=WORD2VEC_FEATURE_NUM,
                word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
                type='rnn',
                time_step=TIME_STEP,
                begin=begin)
            test_xs[i], test_ys[i] = tf.constant(
                test_xs[i], dtype=tf.float32), tf.constant(test_ys[i],
                                                           dtype=tf.float32)
        print('-----------------------------------------------')
        model = KnowledgeDistillModel(time_step=TIME_STEP,
                                      feature_size=WORD2VEC_FEATURE_NUM,
                                      rnn_utils=RNN_UTILS,
                                      rnn_layers_num=RNN_LAYERS_NUM)
        model.load_weights(KNOWLEDGE_DISTILL_MODEL_700_PATH)
        res = 0
        length = len(test_xs)
        for i, test_x in enumerate(test_xs):
            ev = model.evaluate(test_x, test_ys[i])
            print(i * 10, '%:', ev['f1-score'])
            res += ev['f1-score']

        res /= length
        print('brnn mean f1-score:', res)
        res = 0
        for i, test_x in enumerate(test_xs):
            ev = model.evaluate(test_x, test_ys[i], choose=1)
            print(i * 10, '%:', ev['f1-score'])
            res += ev['f1-score']

        res /= length
        print('fnn mean f1-score:', res)
Exemplo n.º 7
0
 def evaluation_hbrnn():
     # from tensorflow.python.client import device_lib
     # print(device_lib.list_local_devices())
     # set_gpu()
     train_x, train_y, test_x, test_y = load_data_for_rnn_new(data_set=3,
                                                              train_num=50,
                                                              test_num=400)
     h_brnn = HBRNN(time_step=12,
                    feature_size=100,
                    rnn_utils=64,
                    rnn_layers_num=1,
                    hidden_vector_size=64,
                    word_num=MAX_LEN)
     begin = 0
     process_rnn_label_list(train_y,
                            time_step=h_brnn.time_step,
                            begin=begin)  # 原地修改label_list,统一维度
     process_rnn_label_list(test_y, time_step=h_brnn.time_step, begin=begin)
     # print(train_y)
     train_x = trans_to_wordvec_by_word2vec(
         train_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='attention',
         time_step=h_brnn.time_step,
         begin=begin)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='attention',
         time_step=h_brnn.time_step,
         begin=begin)
     train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(
         train_y, dtype=tf.float32)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     print(train_x.shape, train_y.shape)
     print(test_x.shape, test_y.shape)
     inputs, label_list = None, None
     # wbrnn = WBRNNLayer(rnn_utils=32, output_vector_size=32)
     # output_train, output_test = wbrnn(train_x), wbrnn(test_x)
     # print(output_train.shape, output_test.shape)
     h_brnn.fit(train_x, train_y, batchsz=10, epochs=15)
     ev = h_brnn.evaluate(test_x, test_y)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
Exemplo n.º 8
0
    def evaluation_mbrnn():
        train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(
            data_set=3,
            train_num=600,
            test_num=400,
            noise_percent=10,
            noise_type='swap')
        brnn = GRUModel(time_step=12,
                        feature_size=100,
                        rnn_utils=64,
                        rnn_layers_num=2)
        begin = 0

        process_rnn_label_list(train_y, time_step=brnn.time_step,
                               begin=begin)  # 原地修改label_list,统一维度
        process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
        # print(train_y)
        train_x = trans_to_wordvec_by_word2vec(
            train_x,
            feature_size=100,
            word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
            type='rnn',
            time_step=brnn.time_step,
            begin=begin)
        test_x = trans_to_wordvec_by_word2vec(
            test_x,
            feature_size=100,
            word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
            type='rnn',
            time_step=brnn.time_step,
            begin=begin)
        train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(
            train_y, dtype=tf.float32)
        test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
            test_y, dtype=tf.float32)
        inputs, label_list = None, None
        brnn.fit(train_x, train_y, batchsz=5, epochs=15)
        ev = brnn.evaluate(test_x, test_y)
        # ev_b = brnn.evaluate(test_x, test_y, 1)
        # ev_f = brnn.evaluate(test_x, test_y, 2)
        template = 'test data precision:{}, recall:{}, f1-score:{}'
        print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
        # print(template.format(ev_b['precision'], ev_b['recall'], ev_b['f1-score']))
        # print(template.format(ev_f['precision'], ev_f['recall'], ev_f['f1-score']))
        # model_path = ROOT_PATH + '\\B-BRNN-IMPROVED-BY-FEATURE-INTEGRATION'
        brnn.save_weights(BRNN_600_PATH)
 def evaluation_mbrnn1():
     train_x, train_y, test_x, test_y = load_data_for_rnn_new_add_noise(
         data_set=1, train_num=1, test_num=400)
     brnn = TeacherModel(time_step=TIME_STEP,
                         feature_size=WORD2VEC_FEATURE_NUM,
                         rnn_utils=RNN_UTILS,
                         rnn_layers_num=RNN_LAYERS_NUM,
                         brnn_model_path=BBRNN_MODEL_PATH,
                         fnn_model_path=FNN_MODEL_PATH)
     begin = 0
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=WORD2VEC_FEATURE_NUM,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     inputs, label_list = None, None
     brnn.fit(test_x, test_y, batchsz=10, epochs=10)
Exemplo n.º 10
0
 def evaluation_mbrnn():
     train_x, train_y, test_x, test_y = load_data_for_rnn_new(data_set=3,
                                                              train_num=500,
                                                              test_num=400)
     brnn = GRUModel1(time_step=12,
                      feature_size=100,
                      rnn_utils=64,
                      rnn_layers_num=2)
     begin = 0
     process_rnn_label_list(train_y, time_step=brnn.time_step,
                            begin=begin)  # 原地修改label_list,统一维度
     process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
     # print(train_y)
     train_x = trans_to_wordvec_by_word2vec(
         train_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     test_x = trans_to_wordvec_by_word2vec(
         test_x,
         feature_size=100,
         word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
         type='rnn',
         time_step=brnn.time_step,
         begin=begin)
     train_x, train_y = tf.constant(train_x, dtype=tf.float32), tf.constant(
         train_y, dtype=tf.float32)
     test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
         test_y, dtype=tf.float32)
     inputs, label_list = None, None
     brnn.fit(train_x, train_y, batchsz=10, epochs=15)
     ev = brnn.evaluate(test_x, test_y)
     template = 'test data precision:{}, recall:{}, f1-score:{}'
     print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
     model_path = ROOT_PATH + '\\wbrnn_feature_block_attention_11_24_19_30'
     brnn.save_weights(model_path)
Exemplo n.º 11
0
    def evaluation_mbrnn_load_model():
        test_x, test_y = load_data_for_rnn_new_add_noise(data_set=3,
                                                         train_num=1,
                                                         test_num=400,
                                                         noise_type='shuffle',
                                                         noise_percent=0,
                                                         load_train=False)
        test_x_noise, test_y_noise = load_data_for_rnn_new_add_noise(
            data_set=3,
            train_num=1,
            test_num=400,
            noise_type='shuffle',
            noise_percent=10,
            load_train=False)
        brnn = DynamicWeightHybridModel(time_step=TIME_STEP,
                                        feature_size=WORD2VEC_FEATURE_NUM,
                                        rnn_utils=RNN_UTILS,
                                        rnn_layers_num=RNN_LAYERS_NUM)
        brnn.load_weights(DW_HYBRID_600_PATH)
        begin = 0
        process_rnn_label_list(test_y, time_step=brnn.time_step, begin=begin)
        test_x = trans_to_wordvec_by_word2vec(
            test_x,
            feature_size=100,
            word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
            type='rnn',
            time_step=brnn.time_step,
            begin=begin)
        test_x, test_y = tf.constant(test_x, dtype=tf.float32), tf.constant(
            test_y, dtype=tf.float32)

        process_rnn_label_list(test_y_noise,
                               time_step=brnn.time_step,
                               begin=begin)
        test_x_noise = trans_to_wordvec_by_word2vec(
            test_x_noise,
            feature_size=100,
            word2vec_model=word2vec_model_path_zhwiki_rnn_update_20_923,
            type='rnn',
            time_step=brnn.time_step,
            begin=begin)
        test_x_noise, test_y_noise = tf.constant(
            test_x_noise, dtype=tf.float32), tf.constant(test_y_noise,
                                                         dtype=tf.float32)
        ev_noise, weight_noise = brnn.evaluate(test_x_noise,
                                               test_y_noise,
                                               return_weight=True)
        ev, weight = brnn.evaluate(test_x, test_y, return_weight=True)
        template = 'test data precision:{}, recall:{}, f1-score:{}'
        print(template.format(ev['precision'], ev['recall'], ev['f1-score']))
        print(
            template.format(ev_noise['precision'], ev_noise['recall'],
                            ev_noise['f1-score']))
        x = [i for i in range(0, 400)]
        # colors = []
        # for i in range(400):
        #     colors.append('r')
        # for i in range(400, 800):
        #     colors.append('g')
        # plt.scatter(x, tf.concat([weight, weight_noise], axis=-1), c=colors, s=20)
        # plt.scatter(x, weight_noise, c=['g'], s=20)
        plt.scatter(x,
                    weight_noise,
                    c="r",
                    alpha=0.5,
                    label="abnormal test set",
                    s=15,
                    marker='^')
        # 第二个散点图,颜色为蓝色,透明度50%,图例为散点图2
        plt.scatter(x,
                    weight,
                    c="g",
                    alpha=0.5,
                    label="normal test set",
                    s=15,
                    marker='*')
        plt.xlabel('index of resume sample')
        plt.ylabel('total weight value of each time step ')
        plt.legend(loc='best')
        plt.title('weight value distribution')
        plt.savefig(ROOT_PATH + '\\lspd_weight_distribution.pdf')
        plt.show()