Exemplo n.º 1
0
def main(testing_flag, batch_size, epoch_number, latent_dim, projection_dim,
         disease):
    disease_user_datasets, test_train_split_dict, working_directory = get_datasets_from_path(
        testing_flag)
    user_datasets = disease_user_datasets[disease]
    # get trained autoencoder
    autoencoder, encoder, decoder = actigraphy_utilities.get_trained_autoencoder(
        user_datasets, test_train_split_dict, batch_size, latent_dim)
    # use encoder as input into BYOL
    model = deepcopy(encoder)

    train_dataset = actigraphy_utilities.ActigraphyDataset(
        user_datasets, test_train_split_dict, 'train')
    test_dataset = actigraphy_utilities.ActigraphyDataset(
        user_datasets, test_train_split_dict, 'test')

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    val_loader = DataLoader(test_dataset, batch_size=batch_size)

    byol_model = deepcopy(model)
    image_size = (train_dataset.average_length, 5)
    byol = actigraphy_utilities.BYOL(byol_model,
                                     image_size=image_size,
                                     projection_size=projection_dim)
    byol_trainer = pl.Trainer(max_epochs=epoch_number,
                              weights_summary=None,
                              logger=False)
    byol_trainer.fit(byol, train_loader, val_loader)
    byol_encoder = byol.encoder

    return byol_encoder, test_dataset, train_dataset, working_directory
Exemplo n.º 2
0
def byol_main(testing_flag, hchs_or_mesa, batch_size, epoch_number, latent_dim, projection_dim, disease):
    disease_user_datasets, test_train_split_dict, working_directory, path_to_embeddings = get_datasets_from_path(testing_flag, hchs_or_mesa)
    path_to_embeddings = os.path.join(path_to_embeddings, disease, "byol")
    if not os.path.exists(path_to_embeddings):
        os.makedirs(path_to_embeddings)

    user_datasets = disease_user_datasets[disease]
    # get trained autoencoder 
    autoencoder, encoder, decoder = actigraphy_utilities.get_trained_autoencoder(user_datasets, test_train_split_dict, batch_size, latent_dim)
    # use encoder as input into BYOL 
    model = deepcopy(encoder)

    train_dataset = actigraphy_utilities.ActigraphyDataset(user_datasets, test_train_split_dict, 'train')
    test_dataset = actigraphy_utilities.ActigraphyDataset(user_datasets, test_train_split_dict, 'test')

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=28
    )
    val_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        num_workers=28
    )

    byol_model = deepcopy(model)
    image_size = (train_dataset.average_length, 5)
    byol = actigraphy_utilities.BYOL(byol_model, image_size=image_size, projection_size=projection_dim, batch_size=batch_size)
    byol_trainer = pl.Trainer(
        max_epochs=epoch_number,
        weights_summary=None,
        logger=False,
        checkpoint_callback=False
    )
    byol_trainer.fit(byol, train_loader, val_loader) 
    # byol_encoder = byol.encoder
    state_dict = byol_model.state_dict()
    byol_encoder = deepcopy(encoder)
    byol_encoder.load_state_dict(state_dict)

    return byol_encoder, test_dataset, train_dataset, working_directory, path_to_embeddings
Exemplo n.º 3
0
def testing(testing_flag=True,
            dataset_name='mesa',
            disease='metabolic_syndrome',
            bs=128,
            with_wake=False):
    disease_user_datasets, test_train_split_dict, working_directory = get_datasets_from_path(
        testing_flag, dataset_name, with_wake)
    user_datasets = disease_user_datasets[disease]
    user_datasets = get_rid_of_nan_labels_from_user_datasets(user_datasets)
    train_dataset = actigraphy_utilities.ActigraphyDataset(
        user_datasets, test_train_split_dict, 'train')
    test_dataset = actigraphy_utilities.ActigraphyDataset(
        user_datasets, test_train_split_dict, 'test')

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=3)
    val_loader = DataLoader(test_dataset, batch_size=bs, num_workers=3)

    input_x = train_dataset.average_length
    if with_wake:
        input_y = 6
    else:
        input_y = 5
    my_nn = actigraphy_utilities.Net(input_x,
                                     input_y,
                                     testing_flag=testing_flag,
                                     dataset_name=dataset_name,
                                     disease=disease)

    model = actigraphy_utilities.fit_model(my_nn, train_loader, val_loader,
                                           0.01, 1)

    i = 0
    for data, label in val_loader:
        pred = model(data.float())
        if i == 0:
            break
Exemplo n.º 4
0
def multiclass_testing(testing_flag, dataset_name, disease, bs, with_wake,
                       number_of_classes):
    if dataset_name == 'chapman':
        train_dataset, test_dataset = get_chapman_datasets(testing_flag)
    else:
        disease_user_datasets, test_train_split_dict, working_directory = get_datasets_from_path(
            testing_flag, dataset_name, with_wake)
        user_datasets = disease_user_datasets[disease]
        user_datasets = get_rid_of_nan_labels_from_user_datasets(user_datasets)
        train_dataset = actigraphy_utilities.ActigraphyDataset(
            user_datasets, test_train_split_dict, 'train')
        print(f'length of dataset: {len(train_dataset)}')
        half_dataset = train_dataset[int(0.5 * len(train_dataset)):]
        print(len(half_dataset))
        test_dataset = actigraphy_utilities.ActigraphyDataset(
            user_datasets, test_train_split_dict, 'test')

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=3)
    val_loader = DataLoader(test_dataset, batch_size=bs, num_workers=3)
    if dataset_name == 'chapman':
        input_x = 2500
        input_y = 4
    else:
        input_x = train_dataset.average_length
        if with_wake:
            input_y = 6
        else:
            input_y = 5

    if number_of_classes > 2:
        my_nn = actigraphy_utilities.Net(input_x,
                                         input_y,
                                         testing_flag=testing_flag,
                                         output_dim=number_of_classes,
                                         dataset_name=dataset_name,
                                         disease=disease)
        model = actigraphy_utilities.fit_model_multiclass(
            my_nn,
            train_loader,
            val_loader,
            0.1,
            max_epochs=2,
            number_of_classes=number_of_classes)

    else:
        my_nn = actigraphy_utilities.Net(input_x,
                                         input_y,
                                         testing_flag=testing_flag,
                                         dataset_name=dataset_name,
                                         disease=disease)
        model = actigraphy_utilities.fit_model(my_nn,
                                               train_loader,
                                               val_loader,
                                               0.2,
                                               max_epochs=2)

    i = 0
    for data, label in val_loader:
        pred = model(data.float()).detach().numpy()
        print(label)
        print(pred)
        pred_single = np.argmax(pred, axis=1)

        print(pred_single)
        auc = roc_auc_score(label.detach().numpy(), pred, multi_class='ovr')
        f1_micro = f1_score(label, pred_single, average='micro')
        f1_macro = f1_score(label, pred_single, average='macro')
        print(auc, f1_macro, f1_micro)
        if i == 0:
            break
Exemplo n.º 5
0
def cnn_get_representations(testing_flag,
                            dataset_name,
                            disease,
                            bs,
                            lr,
                            max_epochs=20,
                            with_wake=True):
    if dataset_name != 'chapman':
        disease_user_datasets, test_train_split_dict, working_directory = get_datasets_from_path(
            testing_flag, dataset_name, with_wake)
        user_datasets = disease_user_datasets[disease]
        user_datasets = get_rid_of_nan_labels_from_user_datasets(user_datasets)
        train_dataset = actigraphy_utilities.ActigraphyDataset(
            user_datasets, test_train_split_dict, 'train')
        test_dataset = actigraphy_utilities.ActigraphyDataset(
            user_datasets, test_train_split_dict, 'test')

        input_x = train_dataset.average_length
        if with_wake:
            input_y = 6
        else:
            input_y = 5
        number_of_classes = get_number_of_classes(dataset_name, disease)

    else:
        train_dataset, test_dataset = get_chapman_datasets(testing_flag)
        input_x = 2500
        input_y = 4
        number_of_classes = 4

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=3)
    val_loader = DataLoader(test_dataset, batch_size=bs, num_workers=3)

    if number_of_classes > 2:
        my_nn = actigraphy_utilities.Net(input_x,
                                         input_y,
                                         testing_flag=testing_flag,
                                         output_dim=number_of_classes,
                                         dataset_name=dataset_name,
                                         disease=disease)
        model = actigraphy_utilities.fit_model_multiclass(
            my_nn,
            train_loader,
            val_loader,
            lr,
            max_epochs=max_epochs,
            number_of_classes=number_of_classes)

    else:
        my_nn = actigraphy_utilities.Net(input_x,
                                         input_y,
                                         testing_flag=testing_flag,
                                         dataset_name=dataset_name,
                                         disease=disease)
        model = actigraphy_utilities.fit_model(my_nn,
                                               train_loader,
                                               val_loader,
                                               lr,
                                               max_epochs=max_epochs)

    train_model = deepcopy(model)
    test_model = deepcopy(model)

    train_embeddings = []

    def get_train_embeddings_hook(module, input, output):
        output = output.detach().numpy()
        train_embeddings.append(output)

    test_embeddings = []

    def get_test_embeddings_hook(module, input, output):
        output = output.detach().numpy()
        test_embeddings.append(output)

    # register forward hook on the fc1 layer
    train_model.fc1.register_forward_hook(get_train_embeddings_hook)
    test_model.fc1.register_forward_hook(get_test_embeddings_hook)

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=3)
    val_loader = DataLoader(test_dataset, batch_size=bs, num_workers=3)
    training_labels = []
    for data, label in train_loader:
        pred = train_model(data.float())
        training_labels.append(label)

    testing_labels = []
    for data, label in val_loader:
        pred = test_model(data.float())
        testing_labels.append(label)

    X_train = np.concatenate(train_embeddings, axis=0)
    X_test = np.concatenate(test_embeddings, axis=0)

    y_train = np.concatenate(training_labels)
    y_test = np.concatenate(testing_labels)

    path_to_embeddings = os.path.join(os.path.dirname(os.getcwd()),
                                      "embeddings", dataset_name, disease,
                                      "cnn")
    if not os.path.exists(path_to_embeddings):
        os.makedirs(path_to_embeddings)

    start_time = datetime.datetime.now()
    start_time_str = start_time.strftime("%Y%m%d-%H%M%S")

    save_name = f'{start_time_str}_testing-{testing_flag}_bs-{bs}_lr-{lr}_eps-{max_epochs}_embeddings.pickle'
    save_path = os.path.join(path_to_embeddings, save_name)
    print(save_path)

    with open(save_path, 'wb') as f:
        data = [X_train, X_test, y_train, y_test]
        pickle.dump(data, f)
Exemplo n.º 6
0
def cnn_different_percentage(testing_flag,
                             dataset_name,
                             disease,
                             bs,
                             lr,
                             max_epochs=20,
                             with_wake=True):
    if dataset_name != 'chapman':
        disease_user_datasets, test_train_split_dict, working_directory = get_datasets_from_path(
            testing_flag, dataset_name, with_wake)
        user_datasets = disease_user_datasets[disease]
        user_datasets = get_rid_of_nan_labels_from_user_datasets(user_datasets)
        train_dataset = actigraphy_utilities.ActigraphyDataset(
            user_datasets, test_train_split_dict, 'train')
        test_dataset = actigraphy_utilities.ActigraphyDataset(
            user_datasets, test_train_split_dict, 'test')

        input_x = train_dataset.average_length
        if with_wake:
            input_y = 6
        else:
            input_y = 5
        number_of_classes = get_number_of_classes(dataset_name, disease)

    else:
        train_dataset, test_dataset = get_chapman_datasets(testing_flag)
        input_x = 2500
        input_y = 4
        number_of_classes = 4

    if testing_flag:
        percentages = [0.2, 0.4, 0.6]
    else:
        percentages = [0.2, 0.4, 0.6, 0.8, 1.0]

    f1_macros_auc_ovrs = []
    f1_micros_auc_ovos = []

    for percentage in percentages:
        reduced_length = int(percentage * len(train_dataset))
        reduced_train_dataset = train_dataset[:reduced_length]

        train_loader = DataLoader(reduced_train_dataset,
                                  batch_size=bs,
                                  shuffle=True,
                                  num_workers=3)
        val_loader = DataLoader(test_dataset, batch_size=bs, num_workers=3)

        if number_of_classes > 2:
            my_nn = actigraphy_utilities.Net(input_x,
                                             input_y,
                                             testing_flag=testing_flag,
                                             output_dim=number_of_classes,
                                             dataset_name=dataset_name,
                                             disease=disease)
            model = actigraphy_utilities.fit_model_multiclass(
                my_nn,
                train_loader,
                val_loader,
                lr,
                max_epochs=max_epochs,
                number_of_classes=number_of_classes)

        else:
            my_nn = actigraphy_utilities.Net(input_x,
                                             input_y,
                                             testing_flag=testing_flag,
                                             dataset_name=dataset_name,
                                             disease=disease)
            model = actigraphy_utilities.fit_model(my_nn,
                                                   train_loader,
                                                   val_loader,
                                                   lr,
                                                   max_epochs=max_epochs)

        # downstream
        val_loader = DataLoader(test_dataset,
                                batch_size=len(test_dataset),
                                num_workers=3)
        for data, label in val_loader:
            pred = model(data.float()).detach().numpy()
            pred_single = np.argmax(pred, axis=1)
            y_true = label
            y_pred = pred_single

        if dataset_name != 'chapman':
            f1_macro = f1_score(y_true, y_pred, average='macro')
            f1_macros_auc_ovrs.append(round(f1_macro, 2))
            f1_micro = f1_score(y_true, y_pred, average='micro')
            f1_micros_auc_ovos.append(round(f1_micro, 2))
        else:
            auc_ovr = roc_auc_score(y_true, pred, multi_class='ovr')
            f1_macros_auc_ovrs.append(round(auc_ovr, 2))
            auc_ovo = roc_auc_score(y_true, pred, multi_class='ovo')
            f1_micros_auc_ovos.append(round(auc_ovo, 2))

    print(f'percentage used for training: {percentages}')
    print(f'f1 macros/auc ovrs: {f1_macros_auc_ovrs}')
    print(f'f1 micros/auc ovos: {f1_micros_auc_ovos}')

    start_time = datetime.datetime.now()
    start_time_str = start_time.strftime("%Y%m%d-%H%M%S")

    path_to_training_percentage = os.path.join(os.path.dirname(os.getcwd()),
                                               "training_percentage",
                                               dataset_name, disease, "cnn")
    if not os.path.exists(path_to_training_percentage):
        os.makedirs(path_to_training_percentage)

    save_name = f'{start_time_str}_testing-{testing_flag}_bs-{bs}_lr-{lr}_eps-{max_epochs}_different_training_percentages.pickle'
    save_path = os.path.join(path_to_training_percentage, save_name)
    print(save_path)

    with open(save_path, 'wb') as f:
        data = [percentages, f1_macros_auc_ovrs, f1_micros_auc_ovos]
        pickle.dump(data, f)
Exemplo n.º 7
0
def cnn_disease_confidence_levels(testing_flag,
                                  dataset_name,
                                  disease,
                                  bs,
                                  lr,
                                  max_epochs=20,
                                  with_wake=True,
                                  N=500):
    if dataset_name != 'chapman':
        disease_user_datasets, test_train_split_dict, working_directory = get_datasets_from_path(
            testing_flag, dataset_name, with_wake)
        user_datasets = disease_user_datasets[disease]
        user_datasets = get_rid_of_nan_labels_from_user_datasets(user_datasets)
        train_dataset = actigraphy_utilities.ActigraphyDataset(
            user_datasets, test_train_split_dict, 'train')
        test_dataset = actigraphy_utilities.ActigraphyDataset(
            user_datasets, test_train_split_dict, 'test')

        input_x = train_dataset.average_length
        if with_wake:
            input_y = 6
        else:
            input_y = 5
        number_of_classes = get_number_of_classes(dataset_name, disease)

    else:
        train_dataset, test_dataset = get_chapman_datasets(testing_flag)
        input_x = 2500
        input_y = 4
        number_of_classes = 4

    train_loader = DataLoader(train_dataset,
                              batch_size=bs,
                              shuffle=True,
                              num_workers=3)
    val_loader = DataLoader(test_dataset, batch_size=bs, num_workers=3)

    if number_of_classes > 2:
        my_nn = actigraphy_utilities.Net(input_x,
                                         input_y,
                                         testing_flag=testing_flag,
                                         output_dim=number_of_classes,
                                         dataset_name=dataset_name,
                                         disease=disease)
        model = actigraphy_utilities.fit_model_multiclass(
            my_nn,
            train_loader,
            val_loader,
            lr,
            max_epochs=max_epochs,
            number_of_classes=number_of_classes)

    else:
        my_nn = actigraphy_utilities.Net(input_x,
                                         input_y,
                                         testing_flag=testing_flag,
                                         dataset_name=dataset_name,
                                         disease=disease)
        model = actigraphy_utilities.fit_model(my_nn,
                                               train_loader,
                                               val_loader,
                                               lr,
                                               max_epochs=max_epochs)

    save_model_directory = os.path.join("save_models", "cnn")
    try:
        if not os.path.exists(save_model_directory):
            os.makedirs(save_model_directory)
    except OSError as err:
        print(err)

    start_time = datetime.datetime.now()
    start_time_str = start_time.strftime("%Y%m%d-%H%M%S")
    save_path = os.path.join(
        save_model_directory,
        f'{start_time_str}-{testing_flag}-{bs}-{dataset_name}-{disease}.h5')
    torch.save(model, save_path)

    # downstream
    val_loader = DataLoader(test_dataset,
                            batch_size=len(test_dataset),
                            num_workers=3)
    for data, label in val_loader:
        pred = model(data.float()).detach().numpy()
        pred_single = np.argmax(pred, axis=1)
        y_true = label
        y_pred = pred_single
    # bootstrap
    if dataset_name != 'chapman':
        middle_macro, half_macro, mean_macro, std_macro, middle_micro, half_micro, mean_micro, std_micro = get_confidence_interval_f1_micro_macro(
            y_true, y_pred, n_bootstraps=N)
        print_confidence_f1_scores(middle_macro, half_macro, mean_macro,
                                   std_macro, middle_micro, half_micro,
                                   mean_micro, std_micro)
    else:
        middle_ovr, half_ovr, mean_ovr, std_ovr, middle_ovo, half_ovo, mean_ovo, std_ovo = get_confidence_interval_auc(
            y_true, pred)
        print_confidence_auc_scores(middle_ovr, half_ovr, mean_ovr, std_ovr,
                                    middle_ovo, half_ovo, mean_ovo, std_ovo)