Exemplo n.º 1
0
def run_entropy(dataset_name,
                save_dir,
                k_list,
                models,
                feature_types,
                second=False):
    train_tokens, dev_tokens, train_dev_tokens, test_tokens, \
    train_labels, dev_labels, train_dev_labels, test_labels = utils.load_data(dataset_name)

    y_data, min_vals, max_vals, y_min_val, y_max_val = [], [], [], 0, 0
    for model_name in models:
        dicts, d_keys = als.create_model_d(save_dir,
                                           model_name,
                                           test_labels=test_labels)
        tmp_y_data = als.get_entropy(test_tokens, dicts, d_keys, k_list)
        assert len(tmp_y_data) == len(d_keys)
        tmp_min_val, tmp_max_val = als.get_min_max(tmp_y_data)
        min_vals.append(tmp_min_val)
        max_vals.append(tmp_max_val)
        y_data.append(tmp_y_data)
    y_min_val = np.min(min_vals) - 0.25
    y_min_val = max(0, y_min_val)
    y_max_val = np.max(max_vals) + 0.25
    simi.show_simi_plot(k_list, y_data, 'Number of important features (k)', 'Entropy', '', \
                        (13, 12), '', x_min=np.min(k_list)-0.5, x_max=np.max(k_list)+0.5, \
                        y_min=y_min_val, y_max=y_max_val, if_model=True, second=second, \
                        if_combi=False, if_builtin_posthoc=True)
Exemplo n.º 2
0
def run_simi_length(dataset_name,
                    save_dir,
                    k_list,
                    models,
                    feature_types,
                    folder_name,
                    var,
                    second=False):
    train_tokens, dev_tokens, train_dev_tokens, test_tokens, \
    train_labels, dev_labels, train_dev_labels, test_labels = utils.load_data(dataset_name)
    if var == 'len':
        variable_l = als.get_tokens_length(test_tokens)
    else:
        variable_l = als.get_tokens_ratio(test_tokens)
    # generate models line plot
    y_data, min_vals, max_vals, y_min_val, y_max_val = [], [], [], 0, 0
    for model_name in models:
        combinations = als.get_model_combinations()
        dicts, dict_keys = als.create_model_d(save_dir, model_name,
                                              test_labels)
        all_combi_data = als.get_rho(test_tokens, dicts, combinations, k_list,
                                     variable_l)
        tmp_min_val, tmp_max_val = als.get_min_max(all_combi_data)
        min_vals.append(tmp_min_val)
        max_vals.append(tmp_max_val)
        y_data.append(all_combi_data)
    y_min_val = np.min(min_vals) - 0.05
    y_min_val = min(y_min_val, 0 - 0.05)
    y_max_val = np.max(max_vals) + 0.05
    y_max_val = max(y_max_val, 0 + 0.05)
    simi.show_simi_plot(k_list, y_data, 'Number of important features (k)', 'Spearman correlation', '', \
                        (13, 12), '', x_min=np.min(k_list)-0.5, x_max=np.max(k_list)+0.5, \
                        y_min=y_min_val, y_max=y_max_val, if_model=True, second=second, \
                        if_builtin_posthoc=True)
Exemplo n.º 3
0
def run_js_pos(dataset_name,
               data_dir,
               save_dir,
               models,
               feature_types,
               k_list,
               second=False):
    train_tokens, dev_tokens, train_dev_tokens, test_tokens, \
    train_labels, dev_labels, train_dev_labels, test_labels = utils.load_data(dataset_name)
    train_pos, dev_pos, train_dev_pos, test_pos = utils.get_pos(
        dataset_name, data_dir)
    token_pos_d = als.get_token_pos_d(test_tokens, test_pos)
    # compare with background
    y_data, min_vals, max_vals, y_min_val, y_max_val = [], [], [], 0, 0
    for model_name in models:
        dicts, d_keys = als.create_model_d(save_dir,
                                           model_name,
                                           test_labels=test_labels)
        tmp_y_data = get_jensen_shannon(test_tokens, dicts, d_keys, k_list, 'background', \
                                        combinations=d_keys, token_pos_d=token_pos_d)
        tmp_min_val, tmp_max_val = als.get_min_max(tmp_y_data)
        min_vals.append(tmp_min_val)
        max_vals.append(tmp_max_val)
        y_data.append(tmp_y_data)
    y_min_val = np.min(min_vals) - 0.05
    y_max_val = np.max(max_vals) + 0.05
    simi.show_simi_plot(k_list, y_data, 'Number of important features (k)', 'Jensen-Shannon Score', '', \
                        (13, 12), '', y_min=y_min_val, y_max=y_max_val, if_model=True, second=second, \
                        if_combi=False, if_background=True, if_builtin_posthoc=True)