Exemplo n.º 1
0
def calculate_and_plot_feature_selection_score(input, selection, name):

    outputs = helper.input_normalization(input, a, count)
    fig = plt.figure()
    labels = ["brak normalizacji", "normalizacja"]
    i = 0
    for item in outputs:  #normalizacja i brak
        final = []
        final_2 = []
        cv_scores = []
        for how_many_attrs in features:  #range(1,32):#pierwsze 30 najlepszych cech
            final.append(selection(item, how_many_attrs, cv_scores))

        for item in final:
            final_2.append(
                helper.cross_validation(item,
                                        [KNeighborsClassifier(n_neighbors=5)]))

        # print(final_2)
        all_scores.append(final_2)
        plt.plot(features, final_2, label=labels[i])
        i += 1

    plt.title(name + " dla algorytmu 5-najbliszych sasiadow")
    plt.legend()
    ax = fig.gca()
    plt.xlabel("Liczba Cech")
    plt.ylabel("Dokladnosc")
    ax.set_xticks(features)
    ax.set_yticks(np.arange(0, 1., 0.1))
    plt.grid()
Exemplo n.º 2
0
def k_choice(input):
    fig = 1
    string_index = 0
    k_chosen = []
    normal_string = []
    scores = []
    for p in [1, 2]:
        cv_scores = []
        for i in range(1, 10):
            scores_mean = (helper.cross_validation(
                input,
                KNeighborsClassifier(n_neighbors=i, p=p,
                                     metric='minkowski')), i)
            cv_scores.append(scores_mean)
            scores.append(scores_mean)
        Range = list(range(1, 10))
        cv_scores = [x[0] for x in cv_scores]
        plt.subplot(4, 2, fig)
        if p == 1:
            plt.title("Manhattan distance measurement " +
                      normal_string[string_index])
        if p == 2:
            plt.title("Euclidean distance measurement " +
                      normal_string[string_index])
        plt.plot(Range, cv_scores)
        plt.xlabel('Number of Neighbors K')
        plt.ylabel('Learning level')
        fig += 1
        string_index += 1
    final_scores = sorted(scores, key=lambda x: x[0], reverse=True)
    k_chosen.append(final_scores)
    for i in range(4):
        print(k_chosen[0][i][1])
    plt.show()
Exemplo n.º 3
0
import sys

sys.path.append("../")
from helper.utils import pd, Print, random_seed_cpu
from helper.cross_validation import *
from config import *
from tqdm import tqdm

random_seed_cpu(RANDOM_STATE)

target = pd.read_pickle(join(proc_data_path, "target.p"))
target = target[~target.label.isna()]
CV = cross_validation(train_df=target,
                      _id_="fid",
                      target_name="label",
                      kfold_type="skfold",
                      output_dir=proc_data_path,
                      split_ratio=0.1,
                      nfolds=5,
                      random_state=RANDOM_STATE,
                      shuffle=True,
                      stratify=False,
                      tag="_fold_5",
                      group_name="fid")
target = CV.split()