def test_optimisation_RNAClassifier():

    # ----- Trace la distribution suivie par le log d'une variable suivant la distribution powerlognorm que l'on va choisir pour le paramètre alpha -----

    fig, ax = plt.subplots(1, 1)

    c, s = 1, 1
    y = stats.powerlognorm.rvs(c, s, scale=0.01,
                               size=10000)  # scale règle le centrage.

    ax.hist(np.log10(y), bins=30)

    plt.show()

    # ----- Le Randomized search -----

    # Importe les données
    excel_table = readMultipleCsv('names')

    # Estimateur de base, permettant notamment de sélectionner le solveur à tester
    base_estimator = MLPClassifier(solver="sgd", max_iter=1000,
                                   verbose=True)  # ou solver="adam"

    hidden_layer_sizes = [
        tuple(np.random.randint(20, 35, np.random.randint(3, 5, 1)))
        for i in range(500)
    ]
    supprDoublon(hidden_layer_sizes)

    # Parametres a modifier suivant ce que nous voulons tester
    param = {
        "alpha": stats.powerlognorm(1, 1, scale=0.01),
        "hidden_layer_sizes": hidden_layer_sizes,
        "activation": ["logistic", "tanh", "relu"],
        "learning_rate": ["constant", "invscaling", "adaptive"],
        "learning_rate_init": stats.powerlognorm(1, 1, scale=0.001),
        "batch_size": np.arange(
            200, 500, 10
        )  # batch_size : nombre de données sur lesquelles l'estimateur s'entraine
    }  # "learning_rate" n'est que pour le solver sgd

    RNAClassifier_random_search(excel_table, base_estimator, param, 1)
Exemplo n.º 2
0
rv = expon(loc=loc1,scale=scale1)
rv2=expon(loc=loc2,scale=scale2)
x = rv.rvs(size=1000)
x2 = rv2.rvs(size=1000)
isOutlier=[ True if expon.pdf(data,loc=loc1,scale=scale1)<0.01 else False for data in x ]
data=[[xi,isOutlieri] for xi,isOutlieri in zip (x,isOutlier)]
isOutlier2=[ True if expon.pdf(data,loc=loc2,scale=scale2)<0.01 else False for data in x2 ]
data=data+[[xi,isOutlieri] for xi,isOutlieri in zip (x2,isOutlier2)]
fig, ax = plt.subplots(1, 1)
ax.hist([i[0] for i in data], density=True, histtype='stepfilled', alpha=0.2)
plt.show()
activitiesTimes.append(data)

#powerlognorm
c, s = 2.14, 0.446
rv=powerlognorm(c,s,scale=3,loc=5)
x=rv.rvs(size=2000)
isOutlier=[ True if powerlognorm.pdf(data,c=c,s=s,loc=5,scale=3)<0.01 else False for data in x ]
data=[[xi,isOutlieri] for xi,isOutlieri in zip (x,isOutlier)]
fig, ax = plt.subplots(1, 1)
ax.hist([i[0] for i in data], density=True, histtype='stepfilled', alpha=0.2)
plt.show()
activitiesTimes.append(data)

#we have 4 types of events and now create traces: untill all are used from data
minTrace,maxTrace=5,25
numberOfEvents=sum([len(i) for i in activitiesTimes])
traces=[]
dataVectors=[[] for _ in range(len(activitiesTimes))]
while numberOfEvents >0:  
    eventsInTrace=random.randint(minTrace,maxTrace)
Exemplo n.º 3
0
def all_dists():
    # dists param were taken from scipy.stats official
    # documentaion examples
    # Total - 89
    return {
        "alpha":
        stats.alpha(a=3.57, loc=0.0, scale=1.0),
        "anglit":
        stats.anglit(loc=0.0, scale=1.0),
        "arcsine":
        stats.arcsine(loc=0.0, scale=1.0),
        "beta":
        stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0),
        "betaprime":
        stats.betaprime(a=5, b=6, loc=0.0, scale=1.0),
        "bradford":
        stats.bradford(c=0.299, loc=0.0, scale=1.0),
        "burr":
        stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0),
        "cauchy":
        stats.cauchy(loc=0.0, scale=1.0),
        "chi":
        stats.chi(df=78, loc=0.0, scale=1.0),
        "chi2":
        stats.chi2(df=55, loc=0.0, scale=1.0),
        "cosine":
        stats.cosine(loc=0.0, scale=1.0),
        "dgamma":
        stats.dgamma(a=1.1, loc=0.0, scale=1.0),
        "dweibull":
        stats.dweibull(c=2.07, loc=0.0, scale=1.0),
        "erlang":
        stats.erlang(a=2, loc=0.0, scale=1.0),
        "expon":
        stats.expon(loc=0.0, scale=1.0),
        "exponnorm":
        stats.exponnorm(K=1.5, loc=0.0, scale=1.0),
        "exponweib":
        stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0),
        "exponpow":
        stats.exponpow(b=2.7, loc=0.0, scale=1.0),
        "f":
        stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0),
        "fatiguelife":
        stats.fatiguelife(c=29, loc=0.0, scale=1.0),
        "fisk":
        stats.fisk(c=3.09, loc=0.0, scale=1.0),
        "foldcauchy":
        stats.foldcauchy(c=4.72, loc=0.0, scale=1.0),
        "foldnorm":
        stats.foldnorm(c=1.95, loc=0.0, scale=1.0),
        # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0),
        # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0),
        "genlogistic":
        stats.genlogistic(c=0.412, loc=0.0, scale=1.0),
        "genpareto":
        stats.genpareto(c=0.1, loc=0.0, scale=1.0),
        "gennorm":
        stats.gennorm(beta=1.3, loc=0.0, scale=1.0),
        "genexpon":
        stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0),
        "genextreme":
        stats.genextreme(c=-0.1, loc=0.0, scale=1.0),
        "gausshyper":
        stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0),
        "gamma":
        stats.gamma(a=1.99, loc=0.0, scale=1.0),
        "gengamma":
        stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0),
        "genhalflogistic":
        stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0),
        "gilbrat":
        stats.gilbrat(loc=0.0, scale=1.0),
        "gompertz":
        stats.gompertz(c=0.947, loc=0.0, scale=1.0),
        "gumbel_r":
        stats.gumbel_r(loc=0.0, scale=1.0),
        "gumbel_l":
        stats.gumbel_l(loc=0.0, scale=1.0),
        "halfcauchy":
        stats.halfcauchy(loc=0.0, scale=1.0),
        "halflogistic":
        stats.halflogistic(loc=0.0, scale=1.0),
        "halfnorm":
        stats.halfnorm(loc=0.0, scale=1.0),
        "halfgennorm":
        stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0),
        "hypsecant":
        stats.hypsecant(loc=0.0, scale=1.0),
        "invgamma":
        stats.invgamma(a=4.07, loc=0.0, scale=1.0),
        "invgauss":
        stats.invgauss(mu=0.145, loc=0.0, scale=1.0),
        "invweibull":
        stats.invweibull(c=10.6, loc=0.0, scale=1.0),
        "johnsonsb":
        stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0),
        "johnsonsu":
        stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0),
        "ksone":
        stats.ksone(n=1e03, loc=0.0, scale=1.0),
        "kstwobign":
        stats.kstwobign(loc=0.0, scale=1.0),
        "laplace":
        stats.laplace(loc=0.0, scale=1.0),
        "levy":
        stats.levy(loc=0.0, scale=1.0),
        "levy_l":
        stats.levy_l(loc=0.0, scale=1.0),
        "levy_stable":
        stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0),
        "logistic":
        stats.logistic(loc=0.0, scale=1.0),
        "loggamma":
        stats.loggamma(c=0.414, loc=0.0, scale=1.0),
        "loglaplace":
        stats.loglaplace(c=3.25, loc=0.0, scale=1.0),
        "lognorm":
        stats.lognorm(s=0.954, loc=0.0, scale=1.0),
        "lomax":
        stats.lomax(c=1.88, loc=0.0, scale=1.0),
        "maxwell":
        stats.maxwell(loc=0.0, scale=1.0),
        "mielke":
        stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0),
        "nakagami":
        stats.nakagami(nu=4.97, loc=0.0, scale=1.0),
        "ncx2":
        stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0),
        "ncf":
        stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0),
        "nct":
        stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0),
        "norm":
        stats.norm(loc=0.0, scale=1.0),
        "pareto":
        stats.pareto(b=2.62, loc=0.0, scale=1.0),
        "pearson3":
        stats.pearson3(skew=0.1, loc=0.0, scale=1.0),
        "powerlaw":
        stats.powerlaw(a=1.66, loc=0.0, scale=1.0),
        "powerlognorm":
        stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0),
        "powernorm":
        stats.powernorm(c=4.45, loc=0.0, scale=1.0),
        "rdist":
        stats.rdist(c=0.9, loc=0.0, scale=1.0),
        "reciprocal":
        stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0),
        "rayleigh":
        stats.rayleigh(loc=0.0, scale=1.0),
        "rice":
        stats.rice(b=0.775, loc=0.0, scale=1.0),
        "recipinvgauss":
        stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0),
        "semicircular":
        stats.semicircular(loc=0.0, scale=1.0),
        "t":
        stats.t(df=2.74, loc=0.0, scale=1.0),
        "triang":
        stats.triang(c=0.158, loc=0.0, scale=1.0),
        "truncexpon":
        stats.truncexpon(b=4.69, loc=0.0, scale=1.0),
        "truncnorm":
        stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0),
        "tukeylambda":
        stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0),
        "uniform":
        stats.uniform(loc=0.0, scale=1.0),
        "vonmises":
        stats.vonmises(kappa=3.99, loc=0.0, scale=1.0),
        "vonmises_line":
        stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0),
        "wald":
        stats.wald(loc=0.0, scale=1.0),
        "weibull_min":
        stats.weibull_min(c=1.79, loc=0.0, scale=1.0),
        "weibull_max":
        stats.weibull_max(c=2.87, loc=0.0, scale=1.0),
        "wrapcauchy":
        stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0),
    }
Exemplo n.º 4
0
x = np.linspace(powerlognorm.ppf(0.01, c, s), powerlognorm.ppf(0.99, c, s),
                100)
ax.plot(x,
        powerlognorm.pdf(x, c, s),
        'r-',
        lw=5,
        alpha=0.6,
        label='powerlognorm pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = powerlognorm(c, s)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = powerlognorm.ppf([0.001, 0.5, 0.999], c, s)
np.allclose([0.001, 0.5, 0.999], powerlognorm.cdf(vals, c, s))
# True

# Generate random numbers:

r = powerlognorm.rvs(c, s, size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
def test_optimisation_RNAClassifier():
    
    # ----- Trace la distribution suivie par le log d'une variable suivant la distribution powerlognorm que l'on va choisir pour le paramètre alpha -----
        
    fig, ax = plt.subplots(1, 1)

    c, s = 1, 1
    y = stats.powerlognorm.rvs(c, s, scale=0.01, size=10000) # scale règle le centrage.

    ax.hist(np.log10(y), bins=30)

    plt.show()

    # ----- Le Randomized search -----
    
    # Importe les données
    excel_table = readMultipleCsv('names')
    
    # Estimateur de base, permettant notamment de sélectionner le solveur à tester
    base_estimator = MLPClassifier(solver="sgd", max_iter=1000, verbose=True) # ou solver="adam"
    
    hidden_layer_sizes = [tuple(np.random.randint(20, 35, np.random.randint(3, 5, 1))) for i in range(500)]
    supprDoublon(hidden_layer_sizes)
    
    # Parametres a modifier suivant ce que nous voulons tester
    param = {
        "alpha": stats.powerlognorm(1, 1, scale=0.01),
        "hidden_layer_sizes": hidden_layer_sizes,
        "activation": ["logistic", "tanh", "relu"],
        "learning_rate": ["constant", "invscaling", "adaptive"],
        "learning_rate_init": stats.powerlognorm(1, 1, scale=0.001),
        "batch_size": np.arange(200, 500, 10) # batch_size : nombre de données sur lesquelles l'estimateur s'entraine
    } # "learning_rate" n'est que pour le solver sgd

    RNAClassifier_random_search(excel_table, base_estimator, param, 1)

    def RNA_cross_val(data, base_estimator, n_jobs=-3):
    print("***Pré-traitement des donnees***")

    # On récupere toutes les donnees utilisables
    data_usable = BDDminimal(data)

    # On récupére les features et les label
    x, y = featuresLabel(data_usable)

    print("[Separation en set d'entrainement et de test]")
    x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.2, shuffle=True)

    # Scale + encodage
    scaling(x_train, x_test)

    transformer = LabelEncoder()
    y_train_encode = transformer.fit_transform(y_train).ravel()

    print("[Preparation pour cross-validation]")

    # Separation base test / validation
    cv = StratifiedKFold(n_splits=5, shuffle=True)

    scores = cross_val_score(base_estimator, x_train, y_train_encode, cv=cv, n_jobs=n_jobs)

    print(scores)



def alpha_validation_curve(data, base_estimator, n_jobs=-3):
    print("***Pré-traitement des donnees***")

    # On récupere toutes les donnees utilisables
    data_usable = BDDminimal(data)

    # On récupére les features et les label
    x, y = featuresLabel(data_usable)

    print("[Separation en set d'entrainement et de test]")
    x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.2, shuffle=True)

    # Scale + encodage
    scaling(x_train, x_test)

    transformer = LabelEncoder()
    y_train_encode = transformer.fit_transform(y_train).ravel()

    print("[Preparation pour cross-validation]")

    # Separation base test / validation
    cv = StratifiedKFold(n_splits=3, shuffle=True)

    # ----- Paramétrage de la validation curve -----

    # Choix du paramètre et de son intervalle de recherche
    param_range = np.logspace(-11, 0, 55)

    # Calcul de la validation curve
    train_scores, valid_scores = validation_curve(base_estimator, x_train, y_train_encode,
                                                  "alpha", param_range,
                                                  cv=cv, n_jobs=n_jobs)

    # Tracé de la validation curve
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    validation_scores_mean = np.mean(valid_scores, axis=1)
    validation_scores_std = np.std(valid_scores, axis=1)

    fig, ax = plt.subplots(1, 1)
    ax.set_title("Validation curve on alpha for Adam")
    ax.set_xlabel("Alpha")
    ax.set_ylabel("Score")

    lw = 2
    ax.semilogx(param_range, train_scores_mean, label="Training score",
                color="darkorange", lw=lw)
    ax.fill_between(param_range, train_scores_mean - train_scores_std,
                    train_scores_mean + train_scores_std, alpha=0.2,
                    color="darkorange", lw=lw)
    ax.semilogx(param_range, validation_scores_mean, label="Cross-validation score",
                color="navy", lw=lw)
    ax.fill_between(param_range, validation_scores_mean - validation_scores_std,
                    validation_scores_mean + validation_scores_std, alpha=0.2,
                    color="navy", lw=lw)
    plt.legend(loc="best")
    plt.show()

    return (train_scores, valid_scores)


def layers_validation_curve(data, base_estimator, n_jobs=-3):
    print("***Pré-traitement des donnees***")

    # On récupere toutes les donnees utilisables
    data_usable = BDDminimal(data)

    # On récupére les features et les label
    x, y = featuresLabel(data_usable)

    print("[Separation en set d'entrainement et de test]")
    x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.2, shuffle=True)

    # Scale + encodage
    scaling(x_train, x_test)

    transformer = LabelEncoder()
    y_train_encode = transformer.fit_transform(y_train).ravel()

    print("[Preparation pour cross-validation]")

    # Separation base test / validation
    cv = StratifiedKFold(n_splits=3, shuffle=True)

    # ----- Paramétrage de la validation curve -----

    # Choix du paramètre et de son intervalle de recherche
    param_range = range(1, 10)
    hidden_layers_range = [tuple([60 for _ in range(i)]) for i in param_range]
    # param_range = range(10, 171, 10)
    # hidden_layers_range = [(i,i,i,i,i) for i in param_range]

    # Calcul de la validation curve
    train_scores, valid_scores = validation_curve(base_estimator, x_train, y_train_encode,
                                                  "hidden_layer_sizes", hidden_layers_range,
                                                  cv=cv, n_jobs=n_jobs)

    # Tracé de la validation curve
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    validation_scores_mean = np.mean(valid_scores, axis=1)
    validation_scores_std = np.std(valid_scores, axis=1)

    fig, ax = plt.subplots(1, 1)
    ax.set_title("Validation curve on number of layers for SGD")
    ax.set_xlabel("Number of layers")
    ax.set_ylabel("Score")

    lw = 2
    ax.plot(param_range, train_scores_mean, label="Training score",
                color="darkorange", lw=lw)
    ax.fill_between(param_range, train_scores_mean - train_scores_std,
                    train_scores_mean + train_scores_std, alpha=0.2,
                    color="darkorange", lw=lw)
    ax.plot(param_range, validation_scores_mean, label="Cross-validation score",
                color="navy", lw=lw)
    ax.fill_between(param_range, validation_scores_mean - validation_scores_std,
                    validation_scores_mean + validation_scores_std, alpha=0.2,
                    color="navy", lw=lw)
    plt.legend(loc="best")
    plt.show()

    return (train_scores, valid_scores)


def trace_VC():
    excel_table = readMultipleCsv('names')

    # === Modèles de départs ===

    # base_estimator = MLPClassifier(solver="sgd", activation='relu', batch_size=250, learning_rate='adaptive',
    #                                learning_rate_init=0.003803490162088419, max_iter=1000, verbose=True,
    #                                hidden_layer_sizes=(50, 30, 57, 51, 56), alpha=0.00579294106283857)

    # base_estimator = MLPClassifier(solver="adam", activation='relu', batch_size=440,
    #                                    learning_rate_init=0.0036049822428060574, max_iter=1000, verbose=True,
    #                                    hidden_layer_sizes= (31, 56, 58, 41), alpha=0.006600250942968936)

    # === Modèles standardisés ===

    base_estimator = MLPClassifier(solver="sgd", activation='relu', batch_size=250, learning_rate='adaptive',
                                   learning_rate_init=0.003803490162088419, max_iter=1000, verbose=True,
                                   hidden_layer_sizes=(60, 60, 60, 60), alpha=1e-5)

    # base_estimator = MLPClassifier(solver="adam", activation='relu', batch_size=440,
    #                                    learning_rate_init=0.0036049822428060574, max_iter=1000, verbose=True,
    #                                    hidden_layer_sizes= (60, 60, 60, 60), alpha=1e-5)

    # === Traçage des learning curves et recherche des scores moyens maximaux ===

    # train_scores, valid_scores = alpha_validation_curve(excel_table, base_estimator)
    train_scores, valid_scores = layers_validation_curve(excel_table, base_estimator)
    valid_mean = np.mean(valid_scores, axis=1)

    # arg_max = np.argmax(valid_mean)
    # print(np.logspace(-11, 0, 55)[arg_max])
    # print(valid_mean[arg_max])

    arg_max = np.argmax(valid_mean)
    print(range(1, 10)[arg_max])
    print(valid_mean[arg_max])
    
    
# Entraine SVC et affiche la precision
def training_SVC(x_train, y_train, x_test, y_test):
    model_SVC = SVC(kernel='linear', gamma='scale', shrinking=False)
    # Entrainement
    model_SVC.fit(X_train_scaled, y_train_encode)
    # calcul de précision
    print(f'precision SVC de: {model_SVC.score(X_test_scaled, y_test_encode)*100} %')


# Entraine Kneighbors et affiche la precision
def training_kneighbors(x_train, y_train, x_test, y_test, k):
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(x_train, y_train)
    print(f'precision KNeighborsClassifier avec {k} voisins de: {model.score(x_test, y_test) *100} %')


# Entraine SGDClassifier et affiche la precision
def training_SGDClassifier(x_train, y_train, x_test, y_test):
    model = SGDClassifier(random_state=0)
    model.fit(x_train, y_train)
    print(f'precision SGDClassifier de: {model.score(x_test, y_test) *100} %')
    

def score_par_type_de_sol():
    """
    Fonction qui renvoie les graphes d'apprentissage par type de sol (spécifier le model dans la fonction)
    """
    
    excel_table = readMultipleCsv('names')
    data_usable = BDDminimal(excel_table)

    x, y = featuresLabel(data_usable)

    x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.2, shuffle=True, random_state=6)

    # Préparation de la base test

    scalerX = StandardScaler()
    x_train1 = scalerX.fit_transform(x_train)
    x_test = scalerX.transform(x_test)
    data_test = pd.DataFrame(x_test, columns=["z", "VIA", "Po", "Pi", "Cr", "Pr"])
    y_test = pd.DataFrame.to_numpy(y_test)
    Dico_sol_test = {}
    data_test['sol'] = y_test
    for type in groundType:
        Dico_sol_test[type] = []
    i = 0
    for type_sol in data_test['sol']:
        Dico_sol_test[type_sol].append(data_test.iloc[i])
        i += 1
    # Préparation de la base d'entrainement

    y_train1 = y_train
    data = pd.DataFrame(x_train1, columns=["z", "VIA", "Po", "Pi", "Cr", "Pr"])
    y_train1 = pd.DataFrame.to_numpy(y_train1)
    data['sol'] = y_train1

    Dico_sol = {}

    for type in groundType:
        Dico_sol[type] = []
    i = 0
    for type_sol in data['sol']:
        Dico_sol[type_sol].append(data.iloc[i])
        i += 1

    clefs = list(Dico_sol.keys())

    # Coupe de la base d'entraiement à la proportion p

    proportion = np.linspace(41, 100, 120)
    list_graph = []
    list_prop_graph = []
    for p in proportion:
        proportions_grap = []
        Dico_sol1 = {}

        print(p)

        for clef in clefs:
            p_sol = int(len(Dico_sol[clef]) * (p / 100))
            proportions_grap.append(p_sol)
            Dico_sol1[clef] = Dico_sol[clef][0:p_sol]

        x_train_def = []
        y_train1 = []

        for i in range(len(clefs)):
            for j in range(len(Dico_sol1[clefs[i]])):
                x_train_def.append(pd.DataFrame.to_numpy(Dico_sol1[clefs[i]][j]))
                y_train1.append(Dico_sol1[clefs[i]][j][-1])

        x_train1 = np.delete(x_train_def, 6, 1)

        # Choix du modèle

        '''model = MLPClassifier(solver="sgd", activation='relu', batch_size=250, learning_rate='adaptive',
                              learning_rate_init=0.003803490162088419, max_iter=1000, verbose=True,
                              hidden_layer_sizes=(60, 60, 60, 60), alpha=1e-6)'''
        model = KNeighborsClassifier(n_neighbors=3)
        model.fit(x_train1, y_train1)

        # Evaluation du score

        score_sol = []

        for i in range(len(clefs)):
            x_test1 = Dico_sol_test[clefs[i]]
            if len(x_test1) == 0:
                score_sol.append(0)
            else:
                x_test1 = np.delete(x_test1, 6, 1)
                y_pred = model.predict(x_test1)
                score = 0
                for j in range(0, len(y_pred)):
                    if y_pred[j] == clefs[i]:
                        score += 1
                score_sol.append(100 * score / len(y_pred))

        list_graph.append(score_sol)
        list_prop_graph.append(proportions_grap)

    # Tracé des graphes
    list_graph = np.transpose(list_graph)
    list_prop_graph = np.transpose(list_prop_graph)

    for i in range(0, len(clefs)):
        plt.plot(list_prop_graph[i], list_graph[i])
        plt.title(clefs[i])
        plt.show()

# Exemple de tests concernant sur les classifieurs
if __name__ == '__main__':
    excel_table = readMultipleCsv('names')
    data_usable = BDDminimal(excel_table)
    x, y = featuresLabel(data_usable)
    model = MLPClassifier(solver="sgd", activation='relu', batch_size=250, learning_rate='adaptive', learning_rate_init = 0.003803490162088419, max_iter = 1000, verbose = True, hidden_layer_sizes = (60, 60, 60, 60), alpha = 1e-6)
    x_train, x_test, y_train, y_test = train_test_split(x, y, stratify=y, test_size=0.2, shuffle=True)

    # transformer = LabelEncoder()
    # y_train_encode = transformer.fit_transform(y_train).ravel()
    # y_test_encode = transformer.transform(y_test).ravel()

    scalerX = StandardScaler()
    x_train = scalerX.fit_transform(x_train)
    x_test = scalerX.transform(x_test)

    model.fit(x_train, y_train)
    model.score(x_test, y_test)