Пример #1
0
def main(param):

    data_path = param.data_path
    numbers = range(1, 7)
    names = ["ds", "ms", "mw"]

    X = []
    y = []

    for number in numbers:
        for name in names:
            for pathname in glob.glob(data_path + '/filmy_' + name + '_' +
                                      str(number) + '_l.mov/*'):
                landmark = utils.read_landmark(
                    os.path.join(pathname, "landmark.txt"))
                if (landmark[0] != 0.0):
                    X.append(landmark)
                    y.append(number - 1)

    print(len(X), len(y))

    X, y = shuffle(X, y, random_state=2)
    trainX, testX, trainY, testY = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=4)

    model = KNeighborsClassifier()
    #model = GaussianNB()
    model.fit(trainX, trainY)
    dump(model, 'model.joblib')

    y_pred = model.predict(testX)
    class_report = classification_report(testY, y_pred)
    conf_matrix = confusion_matrix(testY, y_pred)
    conf_matrix_printed = utils.print_confusion_matrix(conf_matrix, testY)

    # with open("report_naive_bayes_gaussian.txt", "w") as f:
    with open("report_5_neighbours.txt", "w") as f:
        f.write(class_report)
        f.write('\n')
        f.write(conf_matrix_printed)

    print(class_report)
    print(conf_matrix)
    for fnames, fundus_rescale_mean_subtract_lesions, grades in train_batch_fetcher(
    ):
        loss, acc = network.train_on_batch(
            fundus_rescale_mean_subtract_lesions, grades)
        losses += [loss] * fundus_rescale_mean_subtract_lesions.shape[0]
        accs += [acc] * fundus_rescale_mean_subtract_lesions.shape[0]
    utils.print_metrics(epoch + 1,
                        training_loss=np.mean(losses),
                        training_acc=np.mean(accs))

    # evaluate on the validation set
    if epoch in validation_epochs:
        pred_grades, true_grades = [], []
        for fnames, fundus_rescale_mean_subtract_lesions, grades in val_batch_fetcher(
        ):
            pred = network.predict(fundus_rescale_mean_subtract_lesions,
                                   batch_size=batch_size,
                                   verbose=0)
            pred_grades += pred.tolist()
            true_grades += grades.tolist()
        utils.print_confusion_matrix(true_grades, pred_grades, "DR")

        # save the weight
        if epoch in validation_epochs:
            network.save_weights(
                os.path.join(model_out_dir, "network_{}.h5".format(epoch + 1)))

    duration = time.time() - start_time
    print "duration for {}th epoch: {}s".format(epoch + 1, duration)
    sys.stdout.flush()
Пример #3
0
# get performance for each filtered dataset
dataset_acc = dict()
for dataset_name in dataset_path:
    path = dataset_path[dataset_name]

    dataset_for_test = dataset.listDataset(root_path=path, transform=transform, encoder=encoder,
                                        img_size=(img_width, img_height))

    assert dataset_for_test

    print("num. " + str(dataset_name) + " data : " + str(len(dataset_for_test)))

    acc, mean_ed, conf_mat = \
        get_performace(dataset_name, dataset_for_test, correct_dir, incorrect_dir)
    print('confusion mat for ' + str(dataset_name))
    utils.print_confusion_matrix(conf_mat)
    conf_mat_path = os.path.join(result_dir, str(dataset_name) +'_conf_mat.txt')
    utils.write_confusion_matrix(conf_mat, conf_mat_path)
    print(str(dataset_name) + '. acc : ' + str(acc))
    print(str(dataset_name) + '. MED : ' + str(mean_ed))

    dataset_acc[dataset_name] = [acc, mean_ed]

fout = open(os.path.join(result_dir, 'results.txt'), 'w')
for iter_acc in dataset_acc:
    print("Acc. of " + str(iter_acc) + ": %.5f" % (dataset_acc[iter_acc][0]))
    print("MED. of " + str(iter_acc) + ": %.5f" % (dataset_acc[iter_acc][1]))

    fout.write("Acc. of " + str(iter_acc) + ": %.5f\n" % (dataset_acc[iter_acc][0]))
    fout.write("MED. of " + str(iter_acc) + ": %.5f\n" % (dataset_acc[iter_acc][1]))
fout.close()
Пример #4
0
        pred_grades, true_grades = [], []
        for fnames, imgs_mean_subt, imgs_z, vessels, grades_onehot in val_batch_fetcher(
        ):
            if check_validation_batch:
                utils.check_input(imgs_mean_subt, imgs_z, vessels,
                                  val_img_check_dir)
                check_validation_batch = False
            pred = network.predict([imgs_mean_subt, imgs_z, vessels],
                                   batch_size=batch_size,
                                   verbose=0)
            pred_grades += np.argmax(pred, axis=1).tolist()
            true_grades += np.argmax(grades_onehot, axis=1).tolist()
            loss, acc = network.evaluate([imgs_mean_subt, imgs_z, vessels],
                                         grades_onehot,
                                         batch_size=batch_size,
                                         verbose=0)
            losses += [loss] * imgs_mean_subt.shape[0]
            accs += [acc] * imgs_mean_subt.shape[0]
        utils.print_metrics(epoch + 1,
                            validation_loss=np.mean(losses),
                            validation_acc=np.mean(accs))
        utils.print_confusion_matrix(true_grades, pred_grades,
                                     FLAGS.grade_type)

        # save the weight
        if epoch in validation_epochs:
            network.save_weights(
                os.path.join(model_out_dir, "network_{}.h5".format(epoch + 1)))

    sys.stdout.flush()
if __name__ == '__main__':

    #####Instantiate models, data and labels#####
    labels_w = ['Arriba', 'Abajo', 'Adelante', 'Atrás', 'Derecha', 'Izquierda']
    labels_v = ['/a/', '/e/', '/i/', '/o/', '/u/']
    labels_c = [
        '/a/', '/e/', '/i/', '/o/', '/u/', 'Arriba', 'Abajo', 'Adelante',
        'Atrás', 'Derecha', 'Izquierda'
    ]
    model_types = ['shallow', 'deep', 'eegnet']
    data_types = ['vowels', 'words', 'all_classes']

    for model_type in model_types:
        for data_type in data_types:
            y_true_all, y_pred_all = get_stats(
                model_type, data_type)  #run the main function
            cm = confusion_matrix(y_true_all, y_pred_all)
            cm_filename = f"results_folder/results/misc/{model_type}_{data_type}"  #folder to save to

            if data_type == 'words':
                labels = labels_w
            elif data_type == 'vowels':
                labels = labels_v
            elif data_type == 'all_classes':
                labels = labels_c
            print_confusion_matrix(cm,
                                   labels,
                                   filename=cm_filename,
                                   normalize=True)
Пример #6
0
def main(train_images_path, train_labels_path, model_save_path):
    """ Load data, train model, display metrics, save model."""
    # Load data.
    #dataset = data.MnistDataset(
    #        train_images_path, train_labels_path,
    #        image_transform=None,
    #        label_transform=None,
    #    )
    dataset = torchvision.datasets.CIFAR10('data/',
                                           train=True,
                                           transform=None,
                                           target_transform=None,
                                           download=True)
    #dataset = torchvision.datasets.MNIST(
    #    'data/', train=True,
    #    transform=None, target_transform=None,
    #    download=True)

    # Split data into train/validation sets.
    train_split_size = int(len(dataset) * 0.8)
    validation_split_size = len(dataset) - train_split_size
    training_dataset, validation_dataset = torch.utils.data.random_split(
        dataset,
        [train_split_size, len(dataset) - train_split_size])

    # These values come from a pytorch github issue, and are
    # computed on the whole dataset.
    #normalization = torchvision.transforms.Normalize(
    #    (0.4914, 0.4822, 0.4465),
    #    (0.247, 0.243, 0.261)
    #)
    # These values are close enough, and do not leak validation into training
    # set. (I could also compute them myself every split, but laziness)
    normalization = torchvision.transforms.Normalize((0.5, 0.5, 0.5),
                                                     (0.25, 0.25, 0.25))
    crop_size = 26
    augmentation = torchvision.transforms.Compose([
        #torchvision.transforms.RandomRotation(22.5, fill=(0,)),
        torchvision.transforms.RandomHorizontalFlip(p=0.5),
        torchvision.transforms.RandomCrop(crop_size, 0, fill=(0, )),
        torchvision.transforms.ColorJitter(0.1, 0.1, 0.1, 0.1),
        torchvision.transforms.ToTensor(),
        normalization,
    ])
    test_transform = torchvision.transforms.Compose([
        torchvision.transforms.CenterCrop(crop_size),
        torchvision.transforms.ToTensor(),
        normalization,
    ])

    training_dataset.dataset.transform = augmentation
    validation_dataset.dataset.transform = test_transform

    batch_size = 2**8
    steps_per_epoch = (train_split_size // batch_size)

    #print(training_dataset.image_transform, validation_dataset.image_transform)
    training_dataloader = torch.utils.data.DataLoader(training_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=True)
    validation_dataloader = torch.utils.data.DataLoader(validation_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True)

    # Get a single batch from the training data.
    # We use this batch to set the shapes of layers according to the shape
    # of the actual data.
    for inputs, labels in training_dataloader:
        sample = inputs
        break
    model = Model(nb_classes=10, input_image=sample[0])

    epochs = 15
    print(steps_per_epoch, "steps per epoch.")

    #strategy = None
    #strategy = 'ReduceOnPlateau'
    #strategy = 'CyclicLR'
    strategy = 'OneCycle'
    swa = False

    for group in model.optimizer.param_groups:
        base_lr = group['lr']
    min_lr = base_lr  #* 0.1
    max_lr = base_lr * 25

    if input('LR range test ? y/[n]: ') == 'y':
        import torch_lr_finder

        for group in model.optimizer.param_groups:
            group['lr'] = base_lr * 1e-2
        lr_finder = torch_lr_finder.LRFinder(model, model.optimizer,
                                             model.criterion)
        lr_finder.range_test(training_dataloader,
                             end_lr=base_lr * 1e2,
                             num_iter=steps_per_epoch * 1)
        lr_finder.plot()

        return

    print("LR (min, base, max) = ({}, {}, {}))".format(min_lr, base_lr,
                                                       max_lr))

    strategy_functions = {
        'before_epoch_fn': None,
        'after_epoch_fn': None,
        'before_batch_fn': None,
        'after_batch_fn': None,
        'before_validation_fn': None,
        'after_validation_fn': None,
    }

    if strategy == 'WarmUpReduceOnPlateau':
        pct_start = max(1 / epochs, 1 / 8)
        div_factor = max_lr / (base_lr * 0.1)
        warmup_steps = pct_start * epochs * steps_per_epoch
        print("OneCycle with {} warmup steps ({:.2f} epochs)".format(
            warmup_steps, warmup_steps / steps_per_epoch))
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            model.optimizer,
            max_lr=max_lr,
            div_factor=div_factor,
            final_div_factor=1,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            pct_start=pct_start,
            anneal_strategy='linear',
        )

        strategy_functions['after_batch_fn'] = lambda *a, **k: scheduler.step()

    if strategy == 'ReduceOnPlateau':
        for group in model.optimizer.param_groups:
            group['lr'] = max_lr

        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            model.optimizer,
            mode='min',
            factor=0.2,
            patience=0,
            verbose=True,
            threshold=1e-2,
            threshold_mode='rel',
            cooldown=0,
            min_lr=base_lr * 1e-2)
        strategy_functions['after_epoch_fn'] = lambda *a, **k: scheduler.step(
            k['metrics']['loss'])

    if strategy == 'CyclicLR':
        # Cycle variables
        cycle_length = 0.5  #max(1, epochs // 8)
        steps_per_cycle = int(steps_per_epoch * cycle_length)

        step_up_ratio = 1 / 8
        step_down_ratio = 1 - step_up_ratio
        step_size_up = int(steps_per_epoch * step_up_ratio * cycle_length)
        step_size_down = steps_per_cycle - step_size_up
        #int(steps_per_epoch * step_down_ratio * cycle_length)

        print("Cyclic LR with steps of sizes:", step_size_up, step_size_down)
        print("Cycling between {} and {}".format(min_lr, max_lr))

        scheduler = torch.optim.lr_scheduler.CyclicLR(
            model.optimizer,
            min_lr,
            max_lr,
            step_size_up=step_size_up,
            step_size_down=step_size_down,
            mode='triangular',
            gamma=1.0,
            scale_fn=None,
            scale_mode='iterations',
            cycle_momentum=False,
            base_momentum=0.8,
            max_momentum=0.9,
            last_epoch=-1)

        strategy_functions['after_batch_fn'] = lambda *a, **k: scheduler.step()

    if strategy == 'OneCycle':
        pct_start = 3 / epochs  #max(1/epochs, 3/10)
        max_lr = base_lr * 200
        div_factor = 200  #max_lr / (base_lr * 0.1)
        final_div_factor = 1 / 100
        warmup_steps = pct_start * epochs * steps_per_epoch
        print("OneCycle with {} warmup steps ({:.2f} epochs)".format(
            warmup_steps, warmup_steps / steps_per_epoch))
        print("start_lr, max_lr, final_lr = {:.4f}, {:.4f}, {:.4f}".format(
            max_lr / div_factor, max_lr,
            (max_lr / div_factor) / final_div_factor))
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            model.optimizer,
            max_lr=max_lr,
            div_factor=div_factor,
            final_div_factor=final_div_factor,
            epochs=epochs,
            steps_per_epoch=steps_per_epoch,
            pct_start=pct_start,
            anneal_strategy='linear',
        )
        strategy_functions['after_batch_fn'] = lambda *a, **k: scheduler.step()

    # Fit model.
    history = model.fit_generator(
        training_dataloader,
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
        validation_data=validation_dataloader,
        validation_steps=min(300, validation_split_size // batch_size),
        **strategy_functions,
    )

    strategy_functions = {
        'before_epoch_fn': None,
        'after_epoch_fn': None,
        'before_batch_fn': None,
        'after_batch_fn': None,
        'before_validation_fn': None,
        'after_validation_fn': None,
    }

    # Stochastic Weight Averaging
    if swa:
        min_lr = base_lr
        max_lr = base_lr * 1e0
        model.optimizer = torchcontrib.optim.SWA(model.optimizer,
                                                 swa_start=0,
                                                 swa_freq=steps_per_epoch // 2,
                                                 swa_lr=max_lr)

        scheduler = torch.optim.lr_scheduler.CyclicLR(
            model.optimizer,
            min_lr,
            max_lr,
            step_size_up=int(steps_per_epoch * 0.3),
            step_size_down=int(steps_per_epoch * 0.7),
            mode='triangular',
            gamma=1.0,
            scale_fn=None,
            scale_mode='iterations',
            cycle_momentum=False,
            base_momentum=0.8,
            max_momentum=0.9,
            last_epoch=-1)
        #strategy_functions['after_batch_fn'] = lambda *a, **k: scheduler.step()
        #print("SWA with lr: {} {}".format(min_lr, max_lr))
        print("SWA with lr: {}".format(max_lr))

        #strategy_functions['before_validation_fn'] = lambda *a, **k: \
        #    model.optimizer.swap_swa_sgd()
        #strategy_functions['after_validation_fn'] = lambda *a, **k: \
        #    model.optimizer.swap_swa_sgd()

        swa_history = model.fit_generator(
            training_dataloader,
            steps_per_epoch=steps_per_epoch,
            epochs=max(3, int(epochs * 0.25)),
            validation_data=validation_dataloader,
            validation_steps=300,
            **strategy_functions,
        )

        for metric_name, values in swa_history.items():
            history[metric_name] = history[metric_name] + values

        model.optimizer.swap_swa_sgd()
        model.optimizer.bn_update(training_dataloader, model)

    # Disable augmentations.
    training_dataset.dataset.transform = test_transform
    validation_dataset.dataset.transform = test_transform

    training_dataloader = torch.utils.data.DataLoader(training_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=True)
    validation_dataloader = torch.utils.data.DataLoader(validation_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True)

    model.bn_update(training_dataloader)
    # Compute metrics on whole sets.
    training_metrics, training_confusion_matrix = model.evaluate(
        training_dataloader, steps=None, confusion=True)
    validation_metrics, validation_confusion_matrix = model.evaluate(
        validation_dataloader, steps=None, confusion=True)
    print()
    print("{:<12}".format("Training:"), utils.format_metrics(training_metrics))
    print("{:<12}".format("Validation:"),
          utils.format_metrics(validation_metrics))
    print()

    class_labels = training_dataset.dataset.classes
    utils.print_confusion_matrix(training_confusion_matrix,
                                 class_labels,
                                 normalize=True)
    print()
    utils.print_confusion_matrix(validation_confusion_matrix,
                                 class_labels,
                                 normalize=True)
    print()

    # Save the model and optimizer states.
    torch.save(
        {
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': model.optimizer.state_dict(),
            'learning_history': history,
        }, model_save_path)

    # Plot learning curves.
    for metric_name, metric_values in history.items():
        plt.plot(metric_values, label=metric_name)
    plt.plot(np.full(epochs, 1.0), 'k--')
    plt.gca().set_ylim(bottom=0.0)
    plt.xlabel('epoch')
    plt.ylabel('metric value')
    plt.legend()
    plt.show()
network_file = utils.all_files_under(load_model_dir, extension=".json")
weight_file = utils.all_files_under(load_model_dir, extension=".h5")
assert len(network_file) == 1 and len(weight_file) == 1
with open(network_file[0], 'r') as f:
    network = model_from_json(f.read())
network.load_weights(weight_file[0])

# run inference
filepaths, filenames, pred_grades, true_grades = [], [], [], []
for fnames, fundus_rescale, fundus_rescale_mean_subtract, grades in val_batch_fetcher(
):
    pred = network.predict(fundus_rescale_mean_subtract,
                           batch_size=batch_size,
                           verbose=0)
    pred_grades += pred[0].tolist()
    true_grades += grades.tolist()
    filenames += [
        os.path.basename(fname).replace(".tif", "")
        for fname in fnames.tolist()
    ]
    filepaths += fnames.tolist()

final_prediction = utils.adjust_threshold(true_grades, pred_grades)
df = pd.DataFrame({"Image No": filenames, "DR Grade": final_prediction})
df.to_csv("VRT_Disease_Grading_DR.csv", index=False)
# segmented_dir_tempalte = "../outputs//{}/"
# ori_img_dir = "../data/merged_training_set/"
# utils.save_wrong_files(true_grades, pred_grades, filepaths, segmented_dir_tempalte, ori_img_dir)

utils.print_confusion_matrix(true_grades, final_prediction, "DR")
Пример #8
0
                                                  batch_size=BATCH_SIZE),
                               steps_per_epoch=train_idx.sum() // BATCH_SIZE,
                               epochs=EPOCHS,
                               validation_data=test_datagen.flow(
                                   Xv, yv, batch_size=BATCH_SIZE),
                               validation_steps=valid_idx.sum() // BATCH_SIZE,
                               verbose=2)
    print('Done!')
    # 评估
    test_gen = test_datagen.flow(Xv, yv, batch_size=BATCH_SIZE, shuffle=False)
    probabilities = model.predict_generator(test_gen,
                                            steps=len(yv) // BATCH_SIZE + 1)
    # 绘制多分类混淆矩阵
    cnf_matrix = confusion_matrix(np.argmax(yv, axis=1),
                                  np.argmax(probabilities, axis=1))
    _ = print_confusion_matrix(cnf_matrix, selected_breed_list)

    report = classification_report(np.argmax(probabilities, axis=1),
                                   np.argmax(yv, axis=1),
                                   target_names=selected_breed_list)
    print(report)

    # 存储训练得到的模型权重
    # !mkdir models
    # model.save_weights('../tmp/models/tl_xception_weights.h5')
    save_path = '/home/colin/Github/Computer-Vision/models/'  # 保存路径
    if not os.path.exists(save_path):  # 判断路径是否存在,不存在就创建
        os.makedirs(save_path)
    # !mkdir models
    model.save_weights(
        '/home/colin/Github/Computer-Vision/models/tl_xception_weights.h5'