Пример #1
0
def evaluate_cotrain(modelo1, modelo2, modelo3, arquitectura1, arquitectura2,
                     arquitectura3, datos, etapa, kfold, iteracion, pipeline,
                     models_info, logs):

    train_generator_arch1, test1_generator_arch1, STEP_SIZE_TEST1_arch1 = generadores(
        etapa, arquitectura1, datos, pipeline, False, iteracion, models_info)
    train_generator_arch2, test1_generator_arch2, STEP_SIZE_TEST1_arch2 = generadores(
        etapa, arquitectura2, datos, pipeline, False, iteracion, models_info)
    train_generator_arch3, test1_generator_arch3, STEP_SIZE_TEST1_arch3 = generadores(
        etapa, arquitectura3, datos, pipeline, False, iteracion, models_info)

    df1 = evaluar(modelo1, train_generator_arch1, test1_generator_arch1,
                  STEP_SIZE_TEST1_arch1)
    df2 = evaluar(modelo2, train_generator_arch2, test1_generator_arch2,
                  STEP_SIZE_TEST1_arch2)
    df3 = evaluar(modelo3, train_generator_arch3, test1_generator_arch3,
                  STEP_SIZE_TEST1_arch3)

    predicciones = []
    for i in range(len(df1)):

        c1 = (df1['Predictions'][i] == df2['Predictions'][i])
        c2 = (df1['Predictions'][i] == df3['Predictions'][i])
        c3 = (df2['Predictions'][i] == df3['Predictions'][i])

        if c1 and c2 and c3:
            predicciones.append([df1['Filename'][i], df1['Predictions'][i]])
        else:
            probabilidades = np.array([
                df1['Max_Probability'][i], df2['Max_Probability'][i],
                df3['Max_Probability'][i]
            ])
            indice_prob_max = probabilidades.argmax()

            clases = np.array([
                df1['Predictions'][i], df2['Predictions'][i],
                df3['Predictions'][i]
            ])
            indice_clas_max = clases.argmax()

            real = np.array(
                [df1['Filename'][i], df2['Filename'][i], df3['Filename'][i]])
            predicciones.append(
                [real[indice_prob_max], clases[indice_clas_max]])

    results = pd.DataFrame(predicciones, columns=["filename", "predictions"])

    #results['filename'] = results['filename'].apply(lambda x:x.split('/')[-1].split('_')[-1][0])
    results['filename'] = results['filename'].apply(lambda x: x.split('/')[-2])
    y_true = results['filename'].values.tolist()
    y_pred = results['predictions'].values.tolist()

    from sklearn.metrics import accuracy_score
    co_train_accu = accuracy_score(y_pred, y_true)
    co_train_label = 'co-train'

    logs.append(
        [kfold, iteracion, co_train_label, None, None, None, co_train_accu])
    save_logs(logs, 'train', pipeline)
    return co_train_accu
Пример #2
0
def labeling(etapa, modelo1, modelo2, modelo3, arquitectura1, arquitectura2,
             arquitectura3, datos, pipeline, iteracion, models_info):
    """
    Labels samples using models
    Args:
        etapa (str): Word 1 to compare
        modelo1 (bytes): Top1 Model
        modelo2 (bytes): Top2 Model
        modelo3 (bytes): Top3 Model
        arquitectura1 (str): Top1 Arch
        arquitectura2 (str): Top2 Arch
        arquitectura3 (str): Top3 Arch
        EL (list): Enlarge labeled samples
        LC (list): Low confidence samples
        datos (df): Dataframe with data
        pipeline (dict): General config
        iteracion (int): Semi-supervised Stage
        models_info (list): Info about models

    Returns:
        EL (list): Enlarge labeled samples updated
        LC (list): Low confidence samples updated
        EL_iter ():
        LC_iter ():

    """
    etiquetados_EL = 0
    etiquetados_LC = 0
    EL_iter = []
    LC_iter = []

    train_generator_arch1, batchset_generator_arch1, STEP_SIZE_BATCH_arch1 = generadores(
        etapa, arquitectura1, datos, pipeline, True, iteracion, models_info)
    train_generator_arch2, batchset_generator_arch2, STEP_SIZE_BATCH_arch2 = generadores(
        etapa, arquitectura2, datos, pipeline, True, iteracion, models_info)
    train_generator_arch3, batchset_generator_arch3, STEP_SIZE_BATCH_arch3 = generadores(
        etapa, arquitectura3, datos, pipeline, True, iteracion, models_info)

    df1 = evaluar(modelo1, train_generator_arch1, batchset_generator_arch1,
                  STEP_SIZE_BATCH_arch1)
    df2 = evaluar(modelo2, train_generator_arch2, batchset_generator_arch2,
                  STEP_SIZE_BATCH_arch2)
    df3 = evaluar(modelo3, train_generator_arch3, batchset_generator_arch3,
                  STEP_SIZE_BATCH_arch3)

    for i in range(len(df1)):

        arch_scores = {}
        arch_scores[arquitectura1] = df1['Max_Probability'][i]
        arch_scores[arquitectura2] = df2['Max_Probability'][i]
        arch_scores[arquitectura3] = df3['Max_Probability'][i]

        c1 = (df1['Predictions'][i] == df2['Predictions'][i]) and (
            df1['Max_Probability'][i] > pipeline["ssl_threshold"]) and (
                df2['Max_Probability'][i] > pipeline["ssl_threshold"])
        c2 = (df1['Predictions'][i] == df3['Predictions'][i]) and (
            df1['Max_Probability'][i] > pipeline["ssl_threshold"]) and (
                df3['Max_Probability'][i] > pipeline["ssl_threshold"])
        c3 = (df2['Predictions'][i] == df3['Predictions'][i]) and (
            df2['Max_Probability'][i] > pipeline["ssl_threshold"]) and (
                df3['Max_Probability'][i] > pipeline["ssl_threshold"])

        if c1 and c2 and c3:
            datos["EL"].append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            EL_iter.append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            etiquetados_EL += 1
        else:
            datos["LC"].append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            LC_iter.append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            etiquetados_LC += 1

    print('etiquetados EL {} LC {}'.format(etiquetados_EL, etiquetados_LC))

    return datos, EL_iter, LC_iter
Пример #3
0
def evaluate_cotrain(modelo1,modelo2,modelo3,
                    arquitectura1,arquitectura2,arquitectura3,
                    datos, etapa, kfold, iteracion,
                    pipeline, models_info, logs):

    train_generator_arch1,test_generator_arch1,STEP_SIZE_TEST_arch1=generadores(etapa,arquitectura1,datos,pipeline,False,iteracion,models_info)
    train_generator_arch2,test_generator_arch2,STEP_SIZE_TEST_arch2=generadores(etapa,arquitectura2,datos,pipeline,False,iteracion,models_info)
    train_generator_arch3,test_generator_arch3,STEP_SIZE_TEST_arch3=generadores(etapa,arquitectura3,datos,pipeline,False,iteracion,models_info)

    df1=evaluar(modelo1,train_generator_arch1,test_generator_arch1,STEP_SIZE_TEST_arch1)
    df2=evaluar(modelo2,train_generator_arch2,test_generator_arch2,STEP_SIZE_TEST_arch2)
    df3=evaluar(modelo3,train_generator_arch3,test_generator_arch3,STEP_SIZE_TEST_arch3)

    import numpy as np

    predicciones = []
    predicciones_logs = []

    for i in range(len(df1)):

        c1 = (df1['Predictions'][i] == df2['Predictions'][i])
        c2 = (df1['Predictions'][i] == df3['Predictions'][i])
        c3 = (df2['Predictions'][i] == df3['Predictions'][i])

        if c1 or c2:
            predicciones.append([df1['Filename'][i],df1['Predictions'][i]])
            selected = df1['Predictions'][i]
            prob_selected = df1["Max_Probability"][i]
            predicciones_logs.append([df1['Filename'][i],selected,prob_selected,"democ",
                                    df1['Predictions'][i],df1['Max_Probability'][i],
                                    df2["Predictions"][i],df2['Max_Probability'][i],
                                    df3["Predictions"][i],df3['Max_Probability'][i]])
        elif c3:
            predicciones.append([df2['Filename'][i],df2['Predictions'][i]])
            selected = df2['Predictions'][i]
            prob_selected = df2["Max_Probability"][i]
            predicciones_logs.append([df1['Filename'][i],selected,prob_selected,"democ",
                                    df1['Predictions'][i],df1['Max_Probability'][i],
                                    df2["Predictions"][i],df2['Max_Probability'][i],
                                    df3["Predictions"][i],df3['Max_Probability'][i]])
        else:
            probabilidades = np.array([df1['Max_Probability'][i],df2['Max_Probability'][i],df3['Max_Probability'][i]])
            indice_prob_max = probabilidades.argmax()

            clases = np.array([df1['Predictions'][i],df2['Predictions'][i],df3['Predictions'][i]])
            real = np.array([df1['Filename'][i],df2['Filename'][i],df3['Filename'][i]])

            predicciones.append([real[indice_prob_max],clases[indice_prob_max]])
            
            selected = clases[indice_prob_max]
            prob_selected = probabilidades[indice_prob_max]
            predicciones_logs.append([df1['Filename'][i],selected,prob_selected,"max",
                                    df1['Predictions'][i],df1['Max_Probability'][i],
                                    df2["Predictions"][i],df2['Max_Probability'][i],
                                    df3["Predictions"][i],df3['Max_Probability'][i]])
            
    results = pd.DataFrame(predicciones,columns=["filename","predictions"])

    results['filename'] = results['filename'].apply(lambda x:x.split('/')[-2])
    y_true = results['filename'].values.tolist()
    y_pred = results['predictions'].values.tolist()


    labels_arch1 = (train_generator_arch1.class_indices)

    print("LABELS CO-TRAIN")
    print([*labels_arch1])

    architecture = 'co-train'

    class_metrics = precision_recall_fscore_support(y_true, y_pred, average=pipeline["metrics"])
    
    # TODO Bugfix Calculate Confusion Matrix
    #cm = calculate_confusion_matrix(y_true, y_pred)
    
    # TODO Bugfix Calculate Confusion Matrix
    #save_confusion_matrix(cm, [*labels_arch1], kfold, iteracion, architecture, pipeline)

    # TODO Bugfix Calculate Confusion Matrix
    # normalize confusion matrix
    #if pipeline["cm_normalize"]:
    #    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    #    cm = np.round(cm, 2)

    # TODO Bugfix Calculate Confusion Matrix
    #plot_confusion_matrix(cm, [*labels_arch1], kfold, iteracion, architecture, pipeline)
    
    #acc_cls = accuracy_by_class(cm, [*labels_arch1])
    #print("ACCURACY BY CLASS")
    #print(acc_cls)
    #print("LEN ACCURACY BY CLASS")
    #print(len(acc_cls))
    # SAVE ACC_CLS
    #logs_accBycls = []
    #logs_accBycls.append([kfold,iteracion,architecture,acc_cls])
    #save_logs(logs_accBycls, 'accBycls', pipeline)

    #plot_confusion_matrix(y_true, y_pred, [*labels_arch1], kfold, iteracion, architecture, pipeline)
    
    from sklearn.metrics import accuracy_score
    co_train_accu = accuracy_score(y_true,y_pred)
    #co_train_accu = accuracy_score(y_pred, y_true)

    logs.append([kfold,iteracion,architecture,None,None,None,co_train_accu,
    class_metrics[0],class_metrics[1],class_metrics[2],class_metrics[3]])

    print(f"Co-train Accuracy: {co_train_accu}")
    print(f"Co-train Precision: {class_metrics[0]}")
    print(f"Co-train Recall: {class_metrics[1]}")
    print(f"Co-train F1-Score: {class_metrics[2]}")
    print(f"Co-train Support: {class_metrics[3]}")

    save_logs(logs,'train',pipeline)
    return co_train_accu, [df1,df2,df3]
Пример #4
0
def labeling(etapa, modelo1, modelo2, modelo3, arquitectura1, arquitectura2,
             arquitectura3, datos, pipeline, iteracion, models_info):
    """
    Labels samples using models
    Args:
        etapa (str): Word 1 to compare
        modelo1 (bytes): Top1 Model
        modelo2 (bytes): Top2 Model
        modelo3 (bytes): Top3 Model
        arquitectura1 (str): Top1 Arch
        arquitectura2 (str): Top2 Arch
        arquitectura3 (str): Top3 Arch
        EL (list): Enlarge labeled samples
        LC (list): Low confidence samples
        datos (df): Dataframe with data
        pipeline (dict): General config
        iteracion (int): Semi-supervised Stage
        models_info (list): Info about models

    Returns:
        EL (list): Enlarge labeled samples updated
        LC (list): Low confidence samples updated
        EL_iter ():
        LC_iter ():

    """
    etiquetados_EL = 0
    etiquetados_LC = 0
    EL_iter = []
    LC_iter = []

    predicciones = []
    predicciones_logs = []

    train_generator_arch1, batchset_generator_arch1, STEP_SIZE_BATCH_arch1 = generadores(
        etapa, arquitectura1, datos, pipeline, True, iteracion, models_info)
    train_generator_arch2, batchset_generator_arch2, STEP_SIZE_BATCH_arch2 = generadores(
        etapa, arquitectura2, datos, pipeline, True, iteracion, models_info)
    train_generator_arch3, batchset_generator_arch3, STEP_SIZE_BATCH_arch3 = generadores(
        etapa, arquitectura3, datos, pipeline, True, iteracion, models_info)

    df1 = evaluar(modelo1, train_generator_arch1, batchset_generator_arch1,
                  STEP_SIZE_BATCH_arch1)
    df2 = evaluar(modelo2, train_generator_arch2, batchset_generator_arch2,
                  STEP_SIZE_BATCH_arch2)
    df3 = evaluar(modelo3, train_generator_arch3, batchset_generator_arch3,
                  STEP_SIZE_BATCH_arch3)

    for i in range(len(df1)):

        arch_scores = {}
        arch_scores[arquitectura1] = df1['Max_Probability'][i]
        arch_scores[arquitectura2] = df2['Max_Probability'][i]
        arch_scores[arquitectura3] = df3['Max_Probability'][i]

        c1 = (df1['Predictions'][i] == df2['Predictions'][i])
        c2 = (df1['Predictions'][i] == df3['Predictions'][i])
        c3 = (df2['Predictions'][i] == df3['Predictions'][i])

        p1 = (df1['Max_Probability'][i] > pipeline["ssl_threshold"])
        p2 = (df2['Max_Probability'][i] > pipeline["ssl_threshold"])
        p3 = (df3['Max_Probability'][i] > pipeline["ssl_threshold"])

        if c1 and c2 and p1 and p2 and p3:
            datos["EL"].append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            predicciones.append([df1['Filename'][i], df1['Predictions'][i]])
            selected = df1['Predictions'][i]
            prob_selected = df1["Max_Probability"][i]
            predicciones_logs.append([
                df1['Filename'][i], selected, prob_selected, "EL",
                df1['Predictions'][i], df1['Max_Probability'][i],
                df2["Predictions"][i], df2['Max_Probability'][i],
                df3["Predictions"][i], df3['Max_Probability'][i]
            ])
            EL_iter.append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            etiquetados_EL += 1
        else:
            datos["LC"].append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            predicciones.append([df1['Filename'][i], df1['Predictions'][i]])
            selected = df1['Predictions'][i]
            prob_selected = df1["Max_Probability"][i]
            predicciones_logs.append([
                df1['Filename'][i], selected, prob_selected, "LC",
                df1['Predictions'][i], df1['Max_Probability'][i],
                df2["Predictions"][i], df2['Max_Probability'][i],
                df3["Predictions"][i], df3['Max_Probability'][i]
            ])
            LC_iter.append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            etiquetados_LC += 1

    print('etiquetados EL {} LC {}'.format(etiquetados_EL, etiquetados_LC))

    import pandas as pd

    df_logs = pd.DataFrame(predicciones_logs,
                           columns=[
                               "y_true", "y_pred", "prob_pred", "type",
                               "pred1", "prob1", "pred2", "prob2", "pred3",
                               "prob3"
                           ])

    df_logs["y_true"] = df_logs['y_true'].apply(lambda x: x.split('/')[-2])

    print("LABELING LOGS")

    print(df_logs)

    df_EL_logs = df_logs[(df_logs["type"] == "EL")].copy()
    df_LC_logs = df_logs[(df_logs["type"] == "LC")].copy()

    y_true = df_EL_logs['y_true'].values.tolist()
    y_pred = df_EL_logs['y_pred'].values.tolist()

    from sklearn.metrics import accuracy_score
    EL_accu = accuracy_score(y_true, y_pred)
    print("EL_ACCU: ", EL_accu)

    y_true = df_LC_logs['y_true'].values.tolist()
    y_pred = df_LC_logs['y_pred'].values.tolist()

    from sklearn.metrics import accuracy_score
    LC_accu = accuracy_score(y_true, y_pred)
    print("LC_ACCU: ", LC_accu)

    return datos, EL_iter, LC_iter, [df1, df2, df3]
Пример #5
0
def labeling_v2(etapa, modelo1, modelo2, modelo3, arquitectura1, arquitectura2,
                arquitectura3, datos, pipeline, iteracion, models_info):

    predicciones = []
    predicciones_logs = []

    etiquetados_EL = 0
    etiquetados_LC = 0
    EL_iter = []
    LC_iter = []

    train_generator_arch1, batchset_generator_arch1, STEP_SIZE_BATCH_arch1 = generadores(
        etapa, arquitectura1, datos, pipeline, True, iteracion, models_info)
    train_generator_arch2, batchset_generator_arch2, STEP_SIZE_BATCH_arch2 = generadores(
        etapa, arquitectura2, datos, pipeline, True, iteracion, models_info)
    train_generator_arch3, batchset_generator_arch3, STEP_SIZE_BATCH_arch3 = generadores(
        etapa, arquitectura3, datos, pipeline, True, iteracion, models_info)

    df1 = evaluar(modelo1, train_generator_arch1, batchset_generator_arch1,
                  STEP_SIZE_BATCH_arch1)
    df2 = evaluar(modelo2, train_generator_arch2, batchset_generator_arch2,
                  STEP_SIZE_BATCH_arch2)
    df3 = evaluar(modelo3, train_generator_arch3, batchset_generator_arch3,
                  STEP_SIZE_BATCH_arch3)

    for i in range(len(df1)):

        arch_scores = {}
        arch_scores[arquitectura1] = df1['Max_Probability'][i]
        arch_scores[arquitectura2] = df2['Max_Probability'][i]
        arch_scores[arquitectura3] = df3['Max_Probability'][i]

        c1 = (df1['Predictions'][i] == df2['Predictions'][i])
        c2 = (df1['Predictions'][i] == df3['Predictions'][i])
        c3 = (df2['Predictions'][i] == df3['Predictions'][i])

        if c1 and c2:
            datos["EL"].append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            predicciones.append([df1['Filename'][i], df1['Predictions'][i]])
            selected = df1['Predictions'][i]
            prob_selected = df1["Max_Probability"][i]
            predicciones_logs.append([
                df1['Filename'][i], selected, prob_selected, "EL",
                df1['Predictions'][i], df1['Max_Probability'][i],
                df2["Predictions"][i], df2['Max_Probability'][i],
                df3["Predictions"][i], df3['Max_Probability'][i]
            ])
            EL_iter.append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            etiquetados_EL += 1
        elif c1 or c2:
            datos["EL"].append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            predicciones.append([df1['Filename'][i], df1['Predictions'][i]])
            selected = df1['Predictions'][i]
            prob_selected = df1["Max_Probability"][i]
            predicciones_logs.append([
                df1['Filename'][i], selected, prob_selected, "EL",
                df1['Predictions'][i], df1['Max_Probability'][i],
                df2["Predictions"][i], df2['Max_Probability'][i],
                df3["Predictions"][i], df3['Max_Probability'][i]
            ])
            EL_iter.append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            etiquetados_EL += 1
        elif c3:
            datos["EL"].append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            predicciones.append([df2['Filename'][i], df2['Predictions'][i]])
            selected = df2['Predictions'][i]
            prob_selected = df2["Max_Probability"][i]
            predicciones_logs.append([
                df1['Filename'][i], selected, prob_selected, "EL",
                df1['Predictions'][i], df1['Max_Probability'][i],
                df2["Predictions"][i], df2['Max_Probability'][i],
                df3["Predictions"][i], df3['Max_Probability'][i]
            ])
            EL_iter.append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            etiquetados_EL += 1

        else:
            datos["LC"].append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            LC_iter.append(
                [df1['Filename'][i], df1['Predictions'][i], arch_scores])
            etiquetados_LC += 1

            predicciones.append([df1['Filename'][i], df1['Predictions'][i]])
            selected = df1['Predictions'][i]
            prob_selected = df1["Max_Probability"][i]
            predicciones_logs.append([
                df1['Filename'][i], selected, prob_selected, "LC",
                df1['Predictions'][i], df1['Max_Probability'][i],
                df2["Predictions"][i], df2['Max_Probability'][i],
                df3["Predictions"][i], df3['Max_Probability'][i]
            ])

    print('etiquetados EL {} LC {}'.format(etiquetados_EL, etiquetados_LC))

    import pandas as pd

    df_logs = pd.DataFrame(predicciones_logs,
                           columns=[
                               "y_true", "y_pred", "prob_pred", "type",
                               "pred1", "prob1", "pred2", "prob2", "pred3",
                               "prob3"
                           ])

    df_logs["y_true"] = df_logs['y_true'].apply(lambda x: x.split('/')[-2])

    print("LABELING LOGS")

    print(df_logs)

    df_EL_logs = df_logs[(df_logs["type"] == "EL")].copy()
    df_LC_logs = df_logs[(df_logs["type"] == "LC")].copy()

    y_true = df_EL_logs['y_true'].values.tolist()
    y_pred = df_EL_logs['y_pred'].values.tolist()

    from sklearn.metrics import accuracy_score
    EL_accu = accuracy_score(y_true, y_pred)
    print("EL_ACCU: ", EL_accu)

    y_true = df_LC_logs['y_true'].values.tolist()
    y_pred = df_LC_logs['y_pred'].values.tolist()

    from sklearn.metrics import accuracy_score
    LC_accu = accuracy_score(y_true, y_pred)
    print("LC_ACCU: ", LC_accu)

    return datos, EL_iter, LC_iter, [df1, df2, df3]