示例#1
0
def main():
    
    parser = get_input_arguments()
    
    # check for data directory
    if not os.path.isdir(parser.data_directory):
        print(f'Cannot locate data directory: {parser.data_directory}, please enter another directory.')
        exit(1)
    
    #if not os.path.isdir(parser.validation_directory):
        #print(f'Cannot locate data directory: {parser.validation_directory}, please enter another directory.')

    # check for save directory
    if not os.path.isdir(parser.save_dir):
        print(f'Creating directory: {parser.save_dir}')
        os.makedirs(parser.save_dir)
    
    device = set_device(parser.use_gpu)
    
    # Map categories to their respective names
    cat_to_name = load_json(parser.category_json)
    #with open(parser.category_json, 'r') as f:
        #cat_to_name = json.load(f)
    
    output_size = len(cat_to_name)
    print(f'There are {output_size} categories in the dataset, meaning an output layer of {output_size} units')    
    
    '''data_dir = parser.data_directory
    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/valid'
    test_dir = data_dir + '/test'
    '''
    
    train_transform, valid_transform, test_transform = transform_data()
    train_dataset, valid_dataset, test_dataset = load_datasets(parser.data_directory, train_transform, valid_transform, test_transform)
    #train_dataset, valide_dataset, test_dataset
    # takes data from train, validation and test set folders, performs transforms, loads sets with batch sizes
    trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True)
    validationloader = torch.utils.data.DataLoader(valid_dataset, batch_size=64)
    testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64)
                   
    # pass architecture and hidden units as arguments, returns the loaded architecture model, classifier, optimizer and NLLLoss function
    model, criterion, optimizer, classifier = select_model(parser.hidden_units, output_size, parser.learnrate, device, parser.arch)
    
    train_model(device, parser.epochs, trainloader, validationloader, model, optimizer, criterion)
        
    test_model(device, testloader, model)
        
    save_checkpoint(parser.save_dir, train_dataset, model, classifier, optimizer, parser.epochs, parser.arch)  
示例#2
0
 i = 0
 epochs = 100
 for epoch in range(epochs):
     i = i + 1
     print(i)
     ## Split dataset to 80% training 20% test sets.
     x_train, x_test, y_train, y_test = train_test_split(x,
                                                         y,
                                                         test_size=0.2,
                                                         shuffle=True,
                                                         stratify=y)
     sc = MinMaxScaler(feature_range=(0, 1))
     x_train = sc.fit_transform(x_train)
     x_test = sc.transform(x_test)
     ## Define which model, parameters we want to tune and their range, and also the inner cross validation method(n_splits, n_repeats)
     model, param_grid, cv = select_model(models)
     ## Based on the chosen model, create a grid to search for the optimal model
     grid = GridSearchCV(estimator=model,
                         param_grid=param_grid,
                         cv=cv,
                         scoring='roc_auc',
                         n_jobs=-1)
     ## Get the grid results and fit to training set
     grid_result = grid.fit(x_train, y_train)
     ## Print out the best model chosen in the grid
     print('Best model:', grid_result.best_estimator_)
     ## Print out the best hyper-parameters chosen in the grid
     print("Best: %f using %s" %
           (grid_result.best_score_, grid_result.best_params_))
     ## Calculate the AUC means and standard deviation for each hyper-parameters used during tuning. Print this out.
     means = grid_result.cv_results_['mean_test_score']
示例#3
0
def collect():

    name_list = [
        "mnist",
        #"mnistcov"
    ]

    nr_of_epochs = 8000
    record_all_flag = False
    rec_test_flag = True
    learning_r = [0.0004]
    save_data_flag = False
    show_flag = True
    separate_flag = False
    save_MI_and_plot_flag = False

    bin_size_or_nr = [True]
    bins = [0.01, 0.07, 0.15, 0.3]

    color_list = [
        "red", "blue", "green", "orange", "purple", "brown", "pink", "teal",
        "goldenrod"
    ]

    models = [3, 4, 2, 1, 5, 6]

    for set_name in name_list:
        nrs = [3, 8, 1]
        #samples = "full"
        samples = 1600
        seed(1337)
        set_random_seed(1337)
        X_train, X_test, y_train, y_test = data_selection.select_data(
            set_name, shuffle=True, samples_per_class=samples, list_of_nrs=nrs)
        batch_size = [256, X_train.shape[0], 128, 512]

        print("calculations starting for: ", set_name)
        for i in models:
            if (i <= 4) and ("cov" in set_name):
                continue
            if (i > 4) and ("cov" not in set_name):
                continue

            for batch in batch_size:

                seed(1337)
                set_random_seed(1337)

                # objects to record parameters
                outputs = classes.Outputs()

                # define and train model
                output_recording = LambdaCallback(
                    on_epoch_end=lambda
                    epoch, logs: Callbacks.record_activations(
                        outputs, model, epoch, X_train, X_test, y_test, batch,
                        record_all_flag, rec_test_flag))

                model, architecture = model_selection.select_model(
                    i, nr_of_epochs, set_name, X_train.shape, y_train)

                adam = optimizers.Adam(lr=learning_r)
                model.compile(loss="categorical_crossentropy",
                              optimizer=adam,
                              metrics=["accuracy"])

                history = model.fit(X_train,
                                    y_train,
                                    epochs=nr_of_epochs,
                                    batch_size=batch,
                                    validation_split=0.2,
                                    callbacks=[output_recording])
                # final model score
                score = model.evaluate(X_test, y_test, verbose=0)
                score = score[1]

                # save data
                common_name = architecture + "_lr_" + str(
                    learning_r) + "_batchsize_" + str(batch)
                if "mnist" in set_name:
                    common_name = str(samples) + str(nrs) + common_name

                aname = common_name + "_activations"
                outputs.model_score = score
                if save_data_flag == True:
                    util.save(outputs, aname)

                hname = common_name + "_history"
                h_obj = history.history
                h_obj["model_score"] = score
                if save_data_flag == True:
                    util.save(h_obj, hname)

                plotting.plot_history(h_obj, common_name, show_flag,
                                      save_MI_and_plot_flag)

                if rec_test_flag == True:
                    plotting.plot_test_development(outputs.int_model_score,
                                                   common_name, show_flag,
                                                   save_MI_and_plot_flag)

                # compute binning MI
                for flag in bin_size_or_nr:
                    for nr_of_bins in bins:
                        if flag == True and nr_of_bins > 1:
                            continue
                        if flag == False and nr_of_bins < 1:
                            continue
                        seed(1337)
                        set_random_seed(1337)
                        est_type_flag = 1
                        info_plane.create_infoplane(common_name,
                                                    X_train,
                                                    y_train,
                                                    outputs,
                                                    est_type_flag,
                                                    color_list,
                                                    nr_of_bins,
                                                    flag,
                                                    show_flag,
                                                    separate_flag,
                                                    save_MI_and_plot_flag,
                                                    par_flag=False)

                seed(1337)
                set_random_seed(1337)
                # compute EDGE
                if batch == 256:
                    est_type_flag = 2
                    info_plane.create_infoplane(
                        common_name,
                        X_train,
                        y_train,
                        outputs,
                        est_type_flag,
                        color_list,
                        show_flag,
                        separate_flag,
                        save_flag=save_MI_and_plot_flag,
                        par_flag=True)

                seed(1337)
                set_random_seed(1337)
                # compute KDE upper
                est_type_flag = 3
                info_plane.create_infoplane(common_name,
                                            X_train,
                                            y_train,
                                            outputs,
                                            est_type_flag,
                                            color_list,
                                            show_flag,
                                            separate_flag,
                                            save_flag=save_MI_and_plot_flag,
                                            par_flag=False)
                seed(1337)
                set_random_seed(1337)
                # compute KDE lower
                if batch == 256:
                    est_type_flag = 4
                    info_plane.create_infoplane(
                        common_name,
                        X_train,
                        y_train,
                        outputs,
                        est_type_flag,
                        color_list,
                        show_flag,
                        separate_flag,
                        save_flag=save_MI_and_plot_flag,
                        par_flag=False)

                seed(1337)
                set_random_seed(1337)
                # compute KSG discrete
                est_type_flag = 5
                info_plane.create_infoplane(common_name,
                                            X_train,
                                            y_train,
                                            outputs,
                                            est_type_flag,
                                            color_list,
                                            show_flag,
                                            separate_flag,
                                            save_flag=save_MI_and_plot_flag,
                                            par_flag=False)
示例#4
0
i = 0
epochs = 25
for epoch in range(epochs):
    i = i + 1
    print(i)
    ## Split dataset to 80% training 20% test sets.
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        shuffle=True)
    ## Scale the dataset by removing mean and scaling to unit variance
    sc = StandardScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)
    ## Define which model, parameters we want to tune and their range, and also the cross validation method(n_splits, n_repeats)
    model, param_grid, cv = select_model("L2_Logistic_Regression")
    ## Based on the chosen model, create a grid to search for the optimal model
    grid = GridSearchCV(estimator=model,
                        param_grid=param_grid,
                        cv=cv,
                        scoring="roc_auc",
                        n_jobs=1)
    ## Get the grid results and fit to training set
    grid_result = grid.fit(x_train, y_train)
    print('Best C:', grid_result.best_estimator_.get_params()['C'])
    print('Best model:', grid_result.best_estimator_)
    print("Best: %f using %s" %
          (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
示例#5
0
def main(argv):
    """
    This script will implement the training and testing of a deep learning model for the classification of
    Alzheimer's disease, based on structural MRI data. All settings and model parameters can be defined in the
    config.py configuration file.

    In short, data is split in a test, train and validation set. Cross-validation is performed for which in
    each fold data is augmented, normalized and a CNN is trained and evaluated. As output this script provides train,
    validation & test performance metrics, which are all stored in a dictionary called 'results.npy'. Plots of the
    performance per epoch and of the ROC-AUC of all folds will be provided. Also the configurations file will be saved,
    together with the model information.

    Live performance can be monitored through TensorBoard (only when running local)
    $ tensorboard --logdir=<results dir>/logs
    """

    # start timer
    start = time.time()
    start_localtime = time.localtime()

    # if temp job dir is provided as input use this as data dir (server)
    if len(argv) > 1:
        config.data_dir = sys.argv[1] + "/"
        config.aug_dir = sys.argv[1] + "/augmented/"
    # if job nr is provided use this to define output dir (server)
    if len(argv) > 2:
        config.output_dir = f"{config.all_results_dir}{sys.argv[2]}_{config.roi}_{config.task}_{config.model}{config.comments}/"

    # save configuration file
    create_data_directory(config.output_dir)
    copyfile(config.config_file,
             f"{config.output_dir}configuration_{config.model}.py")

    # initialization of results dictionary
    results = {
        "train": {
            "loss": [],
            "acc": [],
            "fpr": [],
            "tpr": [],
            "auc": [],
            "sensitivity": [],
            "specificity": []
        },
        "validation": {
            "loss": [],
            "acc": [],
            "fpr": [],
            "tpr": [],
            "auc": [],
            "sensitivity": [],
            "specificity": []
        },
        "test": {
            "loss": [],
            "acc": [],
            "fpr": [],
            "tpr": [],
            "auc": [],
            "sensitivity": [],
            "specificity": []
        }
    }

    # create labels
    partition_labels, labels = create_labels()

    # train test split
    partition_train_test = split_train_test(partition_labels, labels)
    np.save(config.output_dir + "train_test.npy", partition_train_test)

    # train val split
    partition_train_validation = split_train_val(partition_train_test, labels)
    np.save(config.output_dir + "train_val.npy", partition_train_validation)

    # START CROSS VALIDATION
    for i in range(config.k_cross_validation):

        # select model
        model = select_model(i)

        print("\n----------- CROSS VALIDATION " + str(i) +
              " ----------------\n")

        # augmentation of training data
        if config.augmentation:
            partition_train_validation["train"][i], labels = augmentation(
                partition_train_validation["train"][i], labels)
            count_sets(partition_train_validation, labels)

        # create results directory for fold
        results_dir = config.output_dir + "k" + str(i)
        create_data_directory(results_dir)
        file = open(results_dir + "/results.txt", 'w')

        # get mean + std of train data to standardize all data in generator
        if config.all_data:
            mean = np.load(config.mean_file)
            std = np.load(config.std_file)
        else:
            mean, std = standardization_matrix(
                partition_train_validation["train"][i])

        # save mean + std
        np.save(results_dir + "/mean.npy", mean)
        np.save(results_dir + "/std.npy", std)

        # create data generators
        train_generator = DataGenerator(partition_train_validation["train"][i],
                                        labels,
                                        mean,
                                        std,
                                        batch_size=config.batch_size,
                                        dim=config.input_shape,
                                        n_channels=1,
                                        n_classes=2,
                                        shuffle=True)
        validation_generator = DataGenerator(
            partition_train_validation["validation"][i],
            labels,
            mean,
            std,
            batch_size=config.batch_size,
            dim=config.input_shape,
            n_channels=1,
            n_classes=2,
            shuffle=True)
        test_generator = DataGenerator(partition_train_test["test"][i],
                                       labels,
                                       mean,
                                       std,
                                       batch_size=1,
                                       dim=config.input_shape,
                                       n_channels=1,
                                       n_classes=2,
                                       shuffle=False)
        CM_train_generator = DataGenerator(
            partition_train_validation["train"][i],
            labels,
            mean,
            std,
            batch_size=1,
            dim=config.input_shape,
            n_channels=1,
            n_classes=2,
            shuffle=False)
        CM_validation_generator = DataGenerator(
            partition_train_validation["validation"][i],
            labels,
            mean,
            std,
            batch_size=1,
            dim=config.input_shape,
            n_channels=1,
            n_classes=2,
            shuffle=False)

        # set callbacks
        callback_list = callbacks_list(CM_train_generator,
                                       CM_validation_generator, labels,
                                       results_dir)

        if not config.test_only:

            # TRAINING
            history = model.fit_generator(generator=train_generator,
                                          validation_data=validation_generator,
                                          class_weight=None,
                                          callbacks=callback_list,
                                          epochs=config.epochs,
                                          verbose=1,
                                          use_multiprocessing=False,
                                          workers=0)

            # plot acc + loss
            plot_acc_loss(history, results_dir, i)

            # plot performance per epoch
            if config.epoch_performance:
                plot_epoch_performance(callback_list[0])
                plot_epoch_performance(callback_list[1])

            # load model of epoch with best performance
            model = load_best_model(results_dir)

            # TRAIN EVALUATION

            # roc auc
            Y_pred = model.predict_generator(CM_train_generator, verbose=0)
            y_pred = np.argmax(Y_pred, axis=1)
            y_true = []
            for id in CM_train_generator.list_IDs:
                y_true.append(labels[id])
            fpr, tpr, thresholds = roc_curve(y_true, Y_pred[:, 1])
            roc_auc = auc(fpr, tpr)

            # save classification per subject (for statistical test)
            np.save(results_dir + "/train_IDs.npy",
                    CM_train_generator.list_IDs)
            np.save(results_dir + "/train_y_true.npy", y_true)
            np.save(results_dir + "/train_y_pred.npy", y_pred)

            # sen / spe
            report = classification_report(
                y_true,
                y_pred,
                target_names=[config.class0, config.class1],
                output_dict=True)

            # loss, acc
            score = model.evaluate_generator(generator=train_generator,
                                             verbose=1)

            results["train"]["loss"].append(score[0])
            results["train"]["acc"].append(score[1])
            results["train"]["fpr"].append(fpr)
            results["train"]["tpr"].append(tpr)
            results["train"]["auc"].append(roc_auc)
            results["train"]["sensitivity"].append(
                report[config.class1]["recall"])
            results["train"]["specificity"].append(
                report[config.class0]["recall"])

            # report train results
            train_results = f"\nTrain\n    loss: {score[0]:.4f}\n    acc: {score[1]:.4f}\n    AUC: {roc_auc:.4f}\n    " \
                            f"sens: {report[config.class1]['recall']:.4f}\n    spec: {report[config.class0]['recall']:.4f}\n\n"
            file.write(train_results), print(train_results)

            # VALIDATION EVALUATION

            # roc auc
            Y_pred = model.predict_generator(CM_validation_generator,
                                             verbose=0)
            y_pred = np.argmax(Y_pred, axis=1)
            y_true = []
            for id in CM_validation_generator.list_IDs:
                y_true.append(labels[id])
            fpr, tpr, thresholds = roc_curve(y_true, Y_pred[:, 1])
            roc_auc = auc(fpr, tpr)

            # save classification per subject (for statistical test)
            np.save(results_dir + "/val_IDs.npy",
                    CM_validation_generator.list_IDs)
            np.save(results_dir + "/val_y_true.npy", y_true)
            np.save(results_dir + "/val_y_pred.npy", y_pred)

            # sen / spe
            report = classification_report(
                y_true,
                y_pred,
                target_names=[config.class0, config.class1],
                output_dict=True)

            # loss, acc
            score = model.evaluate_generator(generator=validation_generator,
                                             verbose=1)

            results["validation"]["loss"].append(score[0])
            results["validation"]["acc"].append(score[1])
            results["validation"]["fpr"].append(fpr)
            results["validation"]["tpr"].append(tpr)
            results["validation"]["auc"].append(roc_auc)
            results["validation"]["sensitivity"].append(
                report[config.class1]["recall"])
            results["validation"]["specificity"].append(
                report[config.class0]["recall"])

            # report val results
            val_results = f"\nValidation\n    loss: {score[0]:.4f}\n    acc: {score[1]:.4f}\n    AUC: {roc_auc:.4f}\n    " \
                            f"sens: {report[config.class1]['recall']:.4f}\n    spec: {report[config.class0]['recall']:.4f}\n\n"
            file.write(val_results), print(val_results)

        # TEST EVALUATION

        # roc auc
        Y_pred = model.predict_generator(test_generator, verbose=0)
        y_pred = np.argmax(Y_pred, axis=1)
        y_true = []
        for id in test_generator.list_IDs:
            y_true.append(labels[id])
        fpr, tpr, thresholds = roc_curve(y_true, Y_pred[:, 1])
        roc_auc = auc(fpr, tpr)

        # save classification per subject (for statistical test)
        np.save(results_dir + "/test_IDs.npy", test_generator.list_IDs)
        np.save(results_dir + "/test_y_true.npy", y_true)
        np.save(results_dir + "/test_y_pred.npy", y_pred)

        # sen / spe
        report = classification_report(
            y_true,
            y_pred,
            target_names=[config.class0, config.class1],
            output_dict=True)

        # loss, acc
        score = model.evaluate_generator(generator=test_generator, verbose=1)

        results["test"]["loss"].append(score[0])
        results["test"]["acc"].append(score[1])
        results["test"]["fpr"].append(fpr)
        results["test"]["tpr"].append(tpr)
        results["test"]["auc"].append(roc_auc)
        results["test"]["sensitivity"].append(report[config.class1]["recall"])
        results["test"]["specificity"].append(report[config.class0]["recall"])

        # report test results
        test_results = f"\nTest\n    loss: {score[0]:.4f}\n    acc: {score[1]:.4f}\n    AUC: {roc_auc:.4f}\n    " \
                        f"sens: {report[config.class1]['recall']:.4f}\n    spec: {report[config.class0]['recall']:.4f}\n\n"
        file.write(test_results), print(test_results)
        file.close()

        # delete augmented images
        if config.augmentation:
            os.system('rm -rf %s/*' % config.aug_dir)

    print("\n---------------------- RESULTS ----------------------\n\n")

    # plot test ROC of all folds + average + std
    plot_ROC(results["test"]["tpr"], results["test"]["fpr"],
             results["test"]["auc"])

    # end timer
    end = time.time()
    end_localtime = time.localtime()

    # save results + model
    np.save(config.output_dir + "results.npy", results)
    save_DL_model(model)
    save_results(results, start, start_localtime, end, end_localtime)

    print('\nend')
示例#6
0
if not os.path.exists(args.path_query_image):
    raise ValueError('The path to the source image you provide does not exist ! ')
if not os.path.exists(args.path_reference_image):
    raise ValueError('The path to the target image you provide does not exist ! ')

if not os.path.isdir(args.write_dir):
    os.makedirs(args.write_dir)
try:
    query_image = imageio.imread(args.path_query_image)
    reference_image = imageio.imread(args.path_reference_image)
    query_image, reference_image = pad_to_same_shape(query_image, reference_image)
except:
    raise ValueError('It seems that the path for the images you provided does not work ! ')

with torch.no_grad():
    network = select_model(args.model, args.pre_trained_model, args.optim_iter, local_optim_iter,
                           path_to_pre_trained_models=args.pre_trained_models_dir)

    # convert numpy to torch tensor and put it in right shape
    query_image_ = torch.from_numpy(query_image).permute(2, 0, 1).unsqueeze(0)
    reference_image_ = torch.from_numpy(reference_image).permute(2, 0, 1).unsqueeze(0)
    # ATTENTION, here source and target images are Torch tensors of size 1x3xHxW, without further pre-processing
    # specific pre-processing (/255 and rescaling) are done within the function.

    # pass both images to the network, it will pre-process the images and ouput the estimated flow in dimension 1x2xHxW
    if args.flipping_condition and 'GLUNet' in args.model:
        estimated_flow = network.estimate_flow_with_flipping_condition(query_image_, reference_image_,
                                                                       mode='channel_first')
    else:
        estimated_flow = network.estimate_flow(query_image_, reference_image_, mode='channel_first')
    estimated_flow_numpy = estimated_flow.squeeze().permute(1, 2, 0).cpu().numpy()
    warped_query_image = remap_using_flow_fields(query_image, estimated_flow.squeeze()[0].cpu().numpy(),