示例#1
0
def main():
    # Clear log dir first
    helpers.clear_logs_folder()

    # Hyperparameters
    input_img_resize = (16, 16) # The resize size of the input images of the neural net
    output_img_resize = (16, 16) # The resize size of the output images of the neural net
    batch_size = 100
    epochs = 50 # 100
    threshold = 1.5  # mask is 1 background and 2 infarct
    validation_size = 0.1
    sample_size = None

    # -- Optional parameters
    threads = cpu_count()
    use_cuda = torch.cuda.is_available()
    script_dir = os.path.dirname(os.path.abspath(__file__))

    # Fetch the datasets
    ds_fetcher = DatasetFetcher()
    ds_fetcher.fetch_dataset()

    # Get the path to the files for the neural net
    # We don't want to split train/valid for KFold crossval
    X_train, y_train, X_valid, y_valid, z_train, z_valid= ds_fetcher.get_train_files(sample_size=sample_size,
                                                                    validation_size=validation_size)
    full_x_test = ds_fetcher.get_test_files(sample_size)

    # -- Computed parameters
    # Get the original images size (assuming they are all the same size)
    origin_img_size = ds_fetcher.get_image_size(X_train[0])
    # The image kept its aspect ratio so we need to recalculate the img size for the nn
    # Seems not necessary
    # img_resize_centercrop = transformer.get_center_crop_size(X_train[0], img_resize)

    # Training callbacks
    tb_viz_cb = TensorboardVisualizerCallback(os.path.join(script_dir, '../logs/tb_viz_' + helpers.get_model_timestamp()))
    tb_logs_cb = TensorboardLoggerCallback(os.path.join(script_dir, '../logs/tb_logs_' + helpers.get_model_timestamp()))
    model_saver_cb = ModelSaverCallback(
        os.path.join(script_dir, '../output/models/model_' + helpers.get_model_timestamp()), verbose=True)

    # Testing callbacks
    pred_thresh = 0.5
    pred_saver_cb = PredictionsSaverCallback(os.path.join(script_dir, '../output/submit_'+ helpers.get_model_timestamp() + '.csv.gz'),
                                             origin_img_size, pred_thresh)

    # Define our neural net architecture
    net = unet.UNet1024((1, *input_img_resize))
    classifier = nn.classifier.InfarctClassifier(net, epochs)

    img_aug = random_shift_scale_rotate  # Image augmentation with shift, scaling and rotation
    train_ds = TrainImageDataset(X_train, y_train, z_train, input_img_resize, X_transform=img_aug)
    train_loader = DataLoader(train_ds, batch_size,
                              sampler=RandomSampler(train_ds),
                              num_workers=threads,
                              pin_memory=use_cuda)

    valid_ds = TrainImageDataset(X_valid, y_valid, z_valid, input_img_resize, threshold=threshold)
    valid_loader = DataLoader(valid_ds, batch_size,
                              sampler=SequentialSampler(valid_ds),
                              num_workers=threads,
                              pin_memory=use_cuda)

    print("Training on {} samples and validating on {} samples "
          .format(len(train_loader.dataset), len(valid_loader.dataset)))

    # Train the classifier
    classifier.train(train_loader, valid_loader, epochs, callbacks=[tb_viz_cb, tb_logs_cb, model_saver_cb])

    test_ds = TestImageDataset(full_x_test, input_img_resize)
    test_loader = DataLoader(test_ds, batch_size,
                             sampler=SequentialSampler(test_ds),
                             num_workers=threads,
                             pin_memory=use_cuda)

    # Predict & save
    classifier.predict(test_loader, callbacks=[pred_saver_cb])
    pred_saver_cb.close_saver()
示例#2
0
def main():
    # Clear log dir first
    helpers.clear_logs_folder()

    # Hyperparameters
    img_resize = (1024, 1024)
    batch_size = 2  # 2
    epochs = 50

    if opt.mode == 'TEST':
        batch_size = 1
        epochs = 1

    threshold = 0.5
    validation_size = 0.2
    sample_size = None  # Put None to work on full dataset

    # Training on 4576 samples and validating on 512 samples
    # -- Optional parameters
    threads = cpu_count()
    use_cuda = torch.cuda.is_available()
    script_dir = os.path.dirname(os.path.abspath(__file__))

    # Download the datasets
    ds_fetcher = DatasetFetcher(opt)
    ds_fetcher.download_dataset(False)

    # Get the path to the files for the neural net
    # We don't want to split train/valid for KFold crossval
    X_train, y_train, X_valid, y_valid = ds_fetcher.get_train_files(
        sample_size=sample_size, validation_size=validation_size)
    full_x_test = ds_fetcher.get_test_files(sample_size)

    # -- Computed parameters
    # Get the original images size (assuming they are all the same size)
    origin_img_size = ds_fetcher.get_image_size(X_train[0])
    # The image kept its aspect ratio so we need to recalculate the img size for the nn
    img_resize_centercrop = transformer.get_center_crop_size(
        X_train[0], img_resize)  # Training callbacks
    tb_viz_cb = TensorboardVisualizerCallback(
        os.path.join(script_dir, '../logs/tb_viz'))
    tb_logs_cb = TensorboardLoggerCallback(
        os.path.join(script_dir, '../logs/tb_logs'))
    model_saver_cb = ModelSaverCallback(os.path.join(
        script_dir, '../output/models/model_' + helpers.get_model_timestamp()),
                                        verbose=True)

    # Testing callbacks
    pred_saver_cb = PredictionsSaverCallback(
        os.path.join(script_dir, '../output/submit.csv.gz'), origin_img_size,
        threshold)

    # Define our neural net architecture
    net = unet.UNet1024((3, *img_resize_centercrop))
    classifier = nn.classifier.CarvanaClassifier(net, epochs, opt)

    if opt.model != "":
        classifier.restore_model(opt.model)
        classifier.net.eval()

    train_ds = TrainImageDataset(X_train,
                                 y_train,
                                 img_resize,
                                 X_transform=aug.augment_img)
    train_loader = DataLoader(train_ds,
                              batch_size,
                              sampler=RandomSampler(train_ds),
                              num_workers=threads,
                              pin_memory=use_cuda)

    valid_ds = TrainImageDataset(X_valid,
                                 y_valid,
                                 img_resize,
                                 threshold=threshold)
    valid_loader = DataLoader(valid_ds,
                              batch_size,
                              sampler=SequentialSampler(valid_ds),
                              num_workers=threads,
                              pin_memory=use_cuda)

    print("Training on {} samples and validating on {} samples ".format(
        len(train_loader.dataset), len(valid_loader.dataset)))

    if opt.mode == 'TRAIN':
        classifier.train(train_loader,
                         valid_loader,
                         epochs,
                         callbacks=[tb_viz_cb, tb_logs_cb, model_saver_cb])
    else:
        classifier.train(train_loader, valid_loader, epochs, callbacks=[])

    test_ds = TestImageDataset(full_x_test, img_resize)
    test_loader = DataLoader(test_ds,
                             batch_size,
                             sampler=SequentialSampler(test_ds),
                             num_workers=threads,
                             pin_memory=use_cuda)

    # Predict & save
    classifier.predict(test_loader, callbacks=[pred_saver_cb])
    pred_saver_cb.close_saver()
示例#3
0
    def __call__(self):
        self.ds = Dataset(RS=self.RS,
                          proj_folder=self.folder_name,
                          pseudo_file=self.pseudo_file,
                          silence_binary=self.silence_binary_flag,
                          include_double_words=include_double_words)

        test_dataset = self.ds.test
        if self.predict_custom_flag:
            test_dataset = self.custom_dataset.test
        test_loader_list = self.generate_tta_loader_list(test_dataset)

        for fold_num in [4, 3, 2, 1, 0]:  #np.arange(self.num_folds):
            #train
            train_ids = self.ds.train_ids_list[fold_num]
            outfile = open(self.splits_folder + 'train_%d.txt' % fold_num, 'w')
            for item in train_ids:
                outfile.write("%s\n" % str(item))

            valid_ids = self.ds.val_ids_list[fold_num]
            outfile = open(self.splits_folder + 'val_%d.txt' % fold_num, 'w')
            for item in valid_ids:
                outfile.write("%s\n" % str(item))

            print('Train index', train_ids[:15])
            if mode == '31class':
                if not include_double_words:
                    train_ds = ImageDataset(self.ds.train[self.ds.train[
                        config.id_col].isin(train_ids)],
                                            include_target=True,
                                            X_transform=aug.data_transformer)
                    val_ds = ImageDataset(self.ds.train[self.ds.train[
                        config.id_col].isin(valid_ids)],
                                          include_target=True,
                                          X_transform=None)

                    train_loader = DataLoader(train_ds,
                                              batch_size,
                                              sampler=RandomSampler(train_ds),
                                              num_workers=config.THREADS,
                                              pin_memory=config.USE_CUDA)
                    valid_loader = DataLoader(val_ds,
                                              batch_size,
                                              num_workers=config.THREADS,
                                              pin_memory=config.USE_CUDA)
                else:
                    train_ds = ImageDataset(self.ds.train,
                                            include_target=True,
                                            X_transform=aug.data_transformer)
                    val_ds = ImageDataset(self.ds.train,
                                          include_target=True,
                                          X_transform=None)
                    train_loader = DataLoader(
                        train_ds,
                        batch_size,
                        sampler=UnknownsRandomSampler(self.ds.train[
                            self.ds.train[config.id_col].isin(train_ids)]),
                        num_workers=config.THREADS,
                        pin_memory=config.USE_CUDA)
                    valid_loader = DataLoader(
                        val_ds,
                        batch_size,
                        sampler=UnknownsRandomSampler(self.ds.train[
                            self.ds.train[config.id_col].isin(valid_ids)]),
                        num_workers=config.THREADS,
                        pin_memory=config.USE_CUDA)

            elif mode == 'binary':
                train_ds = ImageDataset(self.ds.train,
                                        include_target=True,
                                        X_transform=aug.data_transformer)
                val_ds = ImageDataset(self.ds.train,
                                      include_target=True,
                                      X_transform=None)

                train_loader = DataLoader(
                    train_ds,
                    batch_size,
                    sampler=SilenceBinaryRandomSampler(self.ds.train[
                        self.ds.train[config.id_col].isin(train_ids)]),
                    num_workers=config.THREADS,
                    pin_memory=config.USE_CUDA)
                valid_loader = DataLoader(
                    val_ds,
                    batch_size,
                    sampler=SilenceBinaryRandomSampler(self.ds.train[
                        self.ds.train[config.id_col].isin(valid_ids)]),
                    num_workers=config.THREADS,
                    pin_memory=config.USE_CUDA)
            valid_loader_oof_list = self.generate_tta_loader_list(
                self.ds.train[self.ds.train[config.id_col].isin(valid_ids)])

            #train
            if self.train_flag:
                classifier = nn.classifier.Classifier(
                    net_tuple=net_tuple,
                    train_loader=train_loader,
                    valid_loader_oof_list=valid_loader_oof_list,
                    valid_loader=valid_loader,
                    test_loader_list=test_loader_list,
                    output_folder=self.folder_name,
                    fold_num=fold_num,
                    load_model_from_file=None,
                    mode=self.mode)
                classifier.train(self.epochs)
            else:  #load model and predict
                classifier = nn.classifier.Classifier(
                    net_tuple=net_tuple,
                    train_loader=train_loader,
                    valid_loader_oof_list=valid_loader_oof_list,
                    valid_loader=valid_loader,
                    test_loader_list=test_loader_list,
                    output_folder=self.folder_name,
                    fold_num=fold_num,
                    load_model_from_file=self.model_weights_dict[fold_num],
                    mode=self.mode)

            #predict
            oof_train, test_pred_sub, self.aug_col_list = classifier.predict()

            if self.train_flag:
                training_log_info = classifier.training_log_info
                self.best_weights.append(
                    training_log_info.head(1)['weight'].item())
                self.training_logs.append(training_log_info)

            del classifier
            gc.collect

            #concat oof for train
            if isinstance(self.train_prediction, pd.DataFrame):
                self.train_prediction = pd.concat(
                    [self.train_prediction, oof_train])
            else:
                self.train_prediction = oof_train

            #merge test predictions
            if isinstance(self.test_prediction, pd.DataFrame):
                self.test_prediction = self.test_prediction.merge(
                    test_pred_sub, on='id')
            else:
                self.test_prediction = test_pred_sub

        #calculate score across folds
        self.errors_dict = {}
        loss_list = np.array(
            [x.head(1)['valid_loss'].item() for x in self.training_logs])

        self.errors_dict['val_std'] = np.std(loss_list)
        self.errors_dict['val_mean'] = np.mean(loss_list)
        self.errors_dict['best_dict'] = self.best_weights

        with open(self.folder_name + 'results.json', 'w') as fp:
            json.dump(self.errors_dict, fp)