Exemplo n.º 1
0
def main(replicat_i):
    # final results file
    results = {
        'reconstruction': {
            'train': {
                'time': None,
                'loss': None
            },
            'scores_threshold': None,
            'valid': {
                'time': None,
                'auc': None,
                'f1': None,
                'scores': None
            },
            'test': {
                'time': None,
                'auc': None,
                'f1': None,
                'scores': None
            }
        },
        'embedding': {
            'train': {
                'time': None,
                'loss': None
            },
            'scores_threshold': None,
            'valid': {
                'time': None,
                'auc': None,
                'f1': None,
                'scores': None
            },
            'test': {
                'time': None,
                'auc': None,
                'f1': None,
                'scores': None
            }
        }
    }

    # %% load the AE_results file and DeepSAD results file
    with open(PATH_TO_OUTPUTS + f'results/AE_results_{replicat_i}.json') as f:
        ae_results = json.load(f)

    with open(PATH_TO_OUTPUTS +
              f'results/DeepSAD_valid_results_{replicat_i}.json') as f:
        sad_results_valid = json.load(f)

    with open(PATH_TO_OUTPUTS +
              f'results/DeepSAD_test_results_{replicat_i}.json') as f:
        sad_results_test = json.load(f)

    # %% Recover AE and SAD train time and train loss
    results['embedding']['train']['time'] = sad_results_test['train_time']
    results['embedding']['train']['loss'] = sad_results_test['train_loss']

    results['reconstruction']['train']['time'] = ae_results['train_time']
    results['reconstruction']['train']['loss'] = ae_results['train_loss']

    # %% recover test and valid AUC and time
    results['embedding']['valid']['time'] = sad_results_valid['test_time']
    results['embedding']['valid']['auc'] = sad_results_valid['test_auc']
    results['embedding']['valid']['scores'] = sad_results_valid['test_scores']

    results['embedding']['test']['time'] = sad_results_test['test_time']
    results['embedding']['test']['auc'] = sad_results_test['test_auc']
    results['embedding']['test']['scores'] = sad_results_test['test_scores']

    results['reconstruction']['valid']['time'] = ae_results['test_time']
    results['reconstruction']['valid']['auc'] = ae_results['test_auc']

    # %% compute f1-scores and threshold
    scores_v = np.array(results['embedding']['valid']['scores'])[:, 2]
    labels_v = np.array(results['embedding']['valid']['scores'])[:, 1]
    scores_t = np.array(results['embedding']['test']['scores'])[:, 2]
    labels_t = np.array(results['embedding']['test']['scores'])[:, 1]
    thres, f1_valid = get_best_threshold(scores_v, labels_v, metric=f1_score)
    f1_test = f1_score(np.where(scores_t > thres, 1, 0), labels_t)

    results['embedding']['scores_threshold'] = thres
    results['embedding']['valid']['f1'] = f1_valid
    results['embedding']['test']['f1'] = f1_test

    ################################################################################
    # %% get the datasets
    df_info = pd.read_csv(DATA_INFO_PATH)
    df_info = df_info.drop(df_info.columns[0], axis=1)
    # remove low contrast images (all black)
    df_info = df_info[df_info.low_contrast == 0]

    # Train Validation Test Split
    spliter = MURA.MURA_TrainValidTestSplitter(df_info,
                                               train_frac=0.5,
                                               ratio_known_normal=0.05,
                                               ratio_known_abnormal=0.05,
                                               random_state=42)
    spliter.split_data(verbose=True)
    valid_df = spliter.get_subset('valid')
    test_df = spliter.get_subset('test')
    # make datasets
    valid_dataset = MURA.MURA_Dataset(valid_df,
                                      data_path=DATA_PATH,
                                      load_mask=True,
                                      load_semilabels=True,
                                      output_size=512)
    test_dataset = MURA.MURA_Dataset(test_df,
                                     data_path=DATA_PATH,
                                     load_mask=True,
                                     load_semilabels=True,
                                     output_size=512)

    ################################################################################
    # %% Load the AE model and compute valid and test scores of reconstruction
    print('>>> initializing AE')
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    # define model
    ae_net = AE_ResNet18(embed_dim=256, output_size=(1, 512, 512))
    ae_net = ae_net.to(device)
    # get the trained parameters
    model_dict = torch.load(PATH_TO_OUTPUTS +
                            f'model/deepSAD_model_{replicat_i}.pt',
                            map_location=device)
    ae_state_dict = model_dict['ae_net_dict']
    ae_net.load_state_dict(ae_state_dict)

    print('>>> AE initialized')
    print('>>> Validating AE')
    valid_time, valid_scores = test_AE(ae_net, valid_dataset, device=device)
    print('>>> Testing AE')
    test_time, test_scores = test_AE(ae_net, test_dataset, device=device)
    print('>>> Computing scores and thresholds')
    scores_v = np.array(valid_scores)[:, 2]
    labels_v = np.array(valid_scores)[:, 1]
    scores_t = np.array(test_scores)[:, 2]
    labels_t = np.array(test_scores)[:, 1]
    thres, f1_valid = get_best_threshold(scores_v, labels_v, metric=f1_score)
    auc_valid = roc_auc_score(labels_v, scores_v)
    f1_test = f1_score(np.where(scores_t > thres, 1, 0), labels_t)
    auc_test = roc_auc_score(labels_t, scores_t)
    print(f'>>> valid auc : {auc_valid}')
    print(f'>>> valid f1 : {f1_valid}')
    print(f'>>> test auc : {auc_test}')
    print(f'>>> test f1 : {f1_test}')

    results['reconstruction']['scores_threshold'] = thres
    results['reconstruction']['valid']['time'] = valid_time
    results['reconstruction']['valid']['auc'] = auc_valid
    results['reconstruction']['valid']['f1'] = f1_valid
    results['reconstruction']['valid']['scores'] = valid_scores
    results['reconstruction']['test']['time'] = test_time
    results['reconstruction']['test']['auc'] = auc_test
    results['reconstruction']['test']['f1'] = f1_test
    results['reconstruction']['test']['scores'] = test_scores

    # save corrected results on disk
    with open(PATH_TO_OUTPUTS + f'results/results_corrected_{replicat_i}.json',
              'w') as f:
        json.dump(results, f)

    print('>>> results saved at' + PATH_TO_OUTPUTS +
          f'results/results_corrected_{replicat_i}.json')
Exemplo n.º 2
0
        ax.xaxis.set_ticklabels([])
    ax.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(human_format))
    ax.tick_params(axis='both', labelsize=10)
    if return_counts:
        return counts


# %% Get data info
df = pd.read_csv(DATA_PATH + 'data_info.csv')
df = df.drop(df.columns[0], axis=1)
df = df[df.low_contrast == 0]

# %% split data
spliter = MURA.MURA_TrainValidTestSplitter(df,
                                           train_frac=0.5,
                                           ratio_known_normal=0.05,
                                           ratio_known_abnormal=0.05,
                                           random_state=42)
spliter.split_data(verbose=True)

train_df = spliter.get_subset('train')
valid_df = spliter.get_subset('valid')
test_df = spliter.get_subset('test')

# %% get counts
all_counts = get_patient_bodypart_counts(df, False)
train_counts = get_patient_bodypart_counts(train_df, False)
valid_counts = get_patient_bodypart_counts(valid_df, False)
test_counts = get_patient_bodypart_counts(test_df, False)

# %%
Exemplo n.º 3
0
DATA_PATH = r'../../data/'
OUTPUT_PATH = r'../../Outputs/'
FIGURE_PATH = r'../../Figures/'
FIG_RES = 200  # dpi
transparent = False
save_fig = True

# %% Get data info
df = pd.read_csv(DATA_PATH + 'data_info.csv')
df = df.drop(df.columns[0], axis=1)
df = df[df.low_contrast == 0]

# %% split data
spliter = MURA.MURA_TrainValidTestSplitter(df,
                                           train_frac=0.5,
                                           ratio_known_normal=0.05,
                                           ratio_known_abnormal=0.05,
                                           random_state=42)
spliter.split_data(verbose=True)

train_df = spliter.get_subset('train')
test_df = spliter.get_subset('test')

# %% make dataset
dataset = MURA.MURA_Dataset(train_df,
                            DATA_PATH + 'PROCESSED/',
                            output_size=256)

# %% figures
################################################################################
img_idx = 15