Exemplo n.º 1
0
def predict_main(list_posts, data_folder_path, saved_model_path,
                 temp_save_path):
    model_type = 'hier_fuse'
    word_feats_raw = [{
        'emb': 'elmo',
        's_enc': 'rnn',
        'm_id': '11'
    }, {
        'emb': 'glove',
        's_enc': 'rnn',
        'm_id': '21'
    }]
    sent_enc_feats_raw = [{'emb': 'bert_pre', 'm_id': '1'}]
    conf_dict_com = {
        'poss_sent_enc_feats_emb_dict': {
            'bert_pre': 768
        },
        'poss_word_feats_emb_dict': {
            'glove': 300,
            'ling': 33,
            'elmo': 3072
        }
    }
    classi_probs_label_info = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
                               [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]]

    data_dict = load_data(list_posts, 16, 35, 198,
                          data_folder_path + 'esp_class_maps.txt')
    # for cl_post in data_dict['text_sen']:
    #     print(cl_post)
    word_feats, word_feat_str = word_featurize(
        word_feats_raw, model_type, data_dict,
        conf_dict_com['poss_word_feats_emb_dict'], False, False,
        data_folder_path, temp_save_path, True)
    sent_enc_feats, sent_enc_feat_str = sent_enc_featurize(
        sent_enc_feats_raw, model_type, data_dict,
        conf_dict_com['poss_sent_enc_feats_emb_dict'], False, False,
        data_folder_path, temp_save_path, True)
    test_generator = TestGenerator(
        np.arange(data_dict['test_st_ind'], data_dict['test_en_ind']),
        word_feats, sent_enc_feats, data_dict, 64)
    mod_op_list = gen_raw_output(len(classi_probs_label_info),
                                 saved_model_path, test_generator,
                                 data_dict['NUM_CLASSES'])

    pred_vals = evaluate_model(mod_op_list, data_dict, classi_probs_label_info)

    pred_categories = []
    for pred_val_set in pred_vals:
        pred_categories.append(
            [data_dict['FOR_LMAP'][x] for x in pred_val_set])
    return pred_categories
Exemplo n.º 2
0
     sep_task_name, sep_task_dict = sep_task_tup
     sep_task_dict['word_feats'], sep_task_dict[
         'word_feat_str'] = word_featurize(
             word_feats_raw, model_type, sep_task_dict,
             conf_dict_com['poss_word_feats_emb_dict'],
             conf_dict_com['use_saved_word_feats'],
             conf_dict_com['save_word_feats'],
             conf_dict_com["data_folder_name"],
             conf_dict_com["save_folder_name"],
             conf_dict_com["test_mode"],
             get_filename(sep_task_dict['filename']))
 for sent_enc_feats_raw in conf_dict["sent_enc_feats_l"]:
     sent_enc_feats, sent_enc_feat_str = sent_enc_featurize(
         sent_enc_feats_raw, model_type, data_dict,
         conf_dict_com['poss_sent_enc_feats_emb_dict'],
         conf_dict_com['use_saved_sent_enc_feats'],
         conf_dict_com['save_sent_enc_feats'],
         conf_dict_com["data_folder_name"],
         conf_dict_com["save_folder_name"],
         conf_dict_com["test_mode"], prime_filename)
     for sep_task_tup in conf_dict_com[
             'sep_inp_tasks_list']:
         sep_task_name, sep_task_dict = sep_task_tup
         sep_task_dict['sent_enc_feats'], sep_task_dict[
             'sent_enc_feat_str'] = sent_enc_featurize(
                 sent_enc_feats_raw, model_type,
                 sep_task_dict, conf_dict_com[
                     'poss_sent_enc_feats_emb_dict'],
                 conf_dict_com['use_saved_sent_enc_feats'],
                 conf_dict_com['save_sent_enc_feats'],
                 conf_dict_com["data_folder_name"],
                 conf_dict_com["save_folder_name"],
        f_tsv.write("model\tword feats\tsent feats\ttrans\tclass imb\tcnn fils\tcnn kernerls\tthresh\trnn dim\tatt dim\tpool k\tstack RNN\tf\tp\tr\tacc\tstd_f\trnn type\tl rate\tb size\tdr1\tdr2\ttest mode\tclassi probs labels\n") 

print("max # sentences: %d, max # words per sentence: %d, max # words per post: %d" % (data_dict['max_num_sent'], data_dict['max_words_sent'], data_dict['max_post_length']))

metr_dict = init_metr_dict(data_dict['prob_type'])
classi_probs_label_str = str(conf_dict_com['classi_probs_label_info'])[2:-2].replace('], [', '+').replace(', ','_')
for conf_dict in conf_dict_list:
    for prob_trans_type in conf_dict["prob_trans_types"]:
        trainY_list, trainY_noncat_list, num_classes_list, bac_map = transform_labels(data_dict['lab'][:data_dict['train_en_ind']], prob_trans_type, conf_dict_com['test_mode'], conf_dict_com["save_folder_name"], data_dict['NUM_CLASSES'], data_dict['prob_type'], conf_dict_com['classi_probs_label_info'], classi_probs_label_str, conf_dict_com['use_saved_data_stuff'], conf_dict_com['save_data_stuff'])
        for class_imb_flag in conf_dict["class_imb_flags"]:
            loss_func_list, nonlin, out_vec_size_list, cw_list = class_imb_loss_nonlin(trainY_noncat_list, class_imb_flag, num_classes_list, prob_trans_type, conf_dict_com['test_mode'], conf_dict_com["save_folder_name"], conf_dict_com['use_saved_data_stuff'], conf_dict_com['save_data_stuff'], classi_probs_label_str)
            for model_type in conf_dict["model_types"]:
                for word_feats_raw in conf_dict["word_feats_l"]:
                    word_feats, word_feat_str = word_featurize(word_feats_raw, model_type, data_dict, conf_dict_com['poss_word_feats_emb_dict'], conf_dict_com['use_saved_word_feats'], conf_dict_com['save_word_feats'], conf_dict_com["data_folder_name"], conf_dict_com["save_folder_name"], conf_dict_com["test_mode"])
                    for sent_enc_feats_raw in conf_dict["sent_enc_feats_l"]:
                        sent_enc_feats, sent_enc_feat_str = sent_enc_featurize(sent_enc_feats_raw, model_type, data_dict, conf_dict_com['poss_sent_enc_feats_emb_dict'], conf_dict_com['use_saved_sent_enc_feats'], conf_dict_com['save_sent_enc_feats'], conf_dict_com["data_folder_name"], conf_dict_com["save_folder_name"], conf_dict_com["test_mode"])
                        for num_cnn_filters in conf_dict["num_cnn_filters"]:
                            for max_pool_k_val in conf_dict["max_pool_k_vals"]:
                                for cnn_kernel_set in conf_dict["cnn_kernel_sets"]:
                                    cnn_kernel_set_str = str(cnn_kernel_set)[1:-1].replace(', ','_')
                                    for rnn_type in conf_dict["rnn_types"]:
                                        for rnn_dim in conf_dict["rnn_dims"]:
                                            for att_dim in conf_dict["att_dims"]:
                                                for stack_rnn_flag in conf_dict["stack_rnn_flags"]:
                                                    mod_op_list_save_list = []
                                                    for thresh in conf_dict["threshes"]:
                                                        startTime = time.time()
                                                        info_str = "model: %s, word_feats = %s, sent_enc_feats = %s, classi_probs_label_info = %s, prob_trans_type = %s, class_imb_flag = %s, num_cnn_filters = %s, cnn_kernel_set = %s, rnn_type = %s, rnn_dim = %s, att_dim = %s, max_pool_k_val = %s, stack_rnn_flag = %s, thresh = %s, test mode = %s" % (model_type,word_feat_str,sent_enc_feat_str,classi_probs_label_str,prob_trans_type,class_imb_flag,num_cnn_filters,cnn_kernel_set,rnn_type,rnn_dim,att_dim,max_pool_k_val,stack_rnn_flag, thresh, conf_dict_com["test_mode"])
                                                        fname_part = ("%s~%s~%s~%s~%s~%s~%s~%s~%s~%s~%s~%s~%s~%s" % (model_type,word_feat_str,sent_enc_feat_str,classi_probs_label_str,prob_trans_type,class_imb_flag,num_cnn_filters,cnn_kernel_set_str,rnn_type,rnn_dim,att_dim,max_pool_k_val,stack_rnn_flag, conf_dict_com["test_mode"]))
                                                        pred_vals_across_runs = []
                                                        for run_ind in range(conf_dict_com["num_runs"]):