def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('root', help='checkpoint root') arg('--batch-size', type=int, default=32) arg('--patch-size', type=int, default=256) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=2) arg('--fold', type=int, default=1) arg('--n-folds', type=int, default=5) arg('--stratified', action='store_true') arg('--mode', choices=[ 'train', 'validation', 'predict_valid', 'predict_test', 'predict_all_valid' ], default='train') arg('--clean', action='store_true') arg('--epoch-size', type=int) arg('--limit', type=int, help='Use only N images for train/valid') arg('--min-scale', type=float, default=1) arg('--max-scale', type=float, default=1) arg('--test-scale', type=float, default=0.5) args = parser.parse_args() coords = utils.load_coords() train_paths, valid_paths = utils.train_valid_split(args, coords) root = Path(args.root) model = SSPD() model = utils.cuda(model) criterion = SSPDLoss() if args.mode == 'train': kwargs = dict(min_scale=args.min_scale, max_scale=args.max_scale) train_loader, valid_loader = (utils.make_loader( PointDataset, args, train_paths, coords, **kwargs), utils.make_loader(PointDataset, args, valid_paths, coords, deterministic=True, **kwargs)) if root.exists() and args.clean: shutil.rmtree(str(root)) root.mkdir(exist_ok=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train(args, model, criterion, train_loader=train_loader, valid_loader=valid_loader, save_predictions=save_predictions)
def self_training(args, **kwargs): torch.manual_seed(args.seed) device = kwargs['device'] file = kwargs['file'] current_time = kwargs['current_time'] nclasses = datasets.__dict__[args.dataset].nclasses model = models.__dict__[args.arch](nclasses = nclasses) model = torch.nn.DataParallel(model).to(device) model.to(device) # Multiple loss will be needed because we need in-between probabilty. # nn.CrossEntropyLoss criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class. softmax = nn.Softmax(dim = 1) logsoftmax = nn.LogSoftmax(dim = 1) nll = nn.NLLLoss().to(device) optimizer = utils.select_optimizer(args, model) train_supervised_dataset, _, _ = utils.get_dataset(args) # because we need to update the dataset after each epoch _, train_unsupervised_loader, val_loader = utils.make_loader(args) report = PrettyTable(['Epoch #', 'Train loss', 'Train Accuracy', 'Train Correct', 'Train Total', 'Val loss', 'Top-1 Accuracy', 'Top-5 Accuracy', 'Top-1 Correct', 'Top-5 Correct', 'Val Total', 'Time(secs)']) for epoch in range(1, args.epochs + 1): per_epoch = PrettyTable(['Epoch #', 'Train loss', 'Train Accuracy', 'Train Correct', 'Train Total', 'Val loss', 'Top-1 Accuracy', 'Top-5 Accuracy', 'Top-1 Correct', 'Top-5 Correct', 'Val Total', 'Time(secs)']) start_time = time.time() training_loss, train_correct, train_total = train(device, model, logsoftmax, nll, epoch, train_supervised_dataset, optimizer, args.batch_size) validation_loss, val1_correct, val5_correct, val_total = validation(device, model, logsoftmax, nll, val_loader) train_supervised_dataset = label_addition(device, model, softmax, train_supervised_dataset, train_unsupervised_loader, args.tau) end_time = time.time() report.add_row([epoch, round(training_loss, 4), "{:.3f}%".format(round((train_correct*100.0)/train_total, 3)), train_correct, train_total, round(validation_loss, 4), "{:.3f}%".format(round((val1_correct*100.0)/val_total, 3)), "{:.3f}%".format(round((val5_correct*100.0)/val_total, 3)), val1_correct, val5_correct, val_total, round(end_time - start_time, 2)]) per_epoch.add_row([epoch, round(training_loss, 4), "{:.3f}%".format(round((train_correct*100.0)/train_total, 3)), train_correct, train_total, round(validation_loss, 4), "{:.3f}%".format(round((val1_correct*100.0)/val_total, 3)), "{:.3f}%".format(round((val5_correct*100.0)/val_total, 3)), val1_correct, val5_correct, val_total, round(end_time - start_time, 2)]) print(per_epoch) if args.save_model == 'y': val_folder = "saved_model/" + current_time if not os.path.isdir(val_folder): os.mkdir(val_folder) save_model_file = val_folder + '/model_' + str(epoch) +'.pth' torch.save(model.state_dict(), save_model_file) file.write(report.get_string())
def train(): """ Training function Adapted from https://github.com/jesseniagonzalezv/App_segmentation_water_bodies/ """ parser = argparse.ArgumentParser() arg = parser.add_argument # image-related variables arg('--image-patches-dir', type=str, default='./data/dataset/split', help='satellite image patches directory') arg('--masks-dir', type=str, default='./data/dataset/labels', help='numPy masks directory') arg('--npy-dir', type=str, default='./data/dataset/split_npy', help='numPy preprocessed patches directory') # preprocessing-related variables arg('--val-percent', type=float, default=0.25, help='Validation percent') arg('--test-percent', type=float, default=0.10, help='Test percent') # training-related variable arg('--batch-size', type=int, default=16, help='HR:4,VHR:8') arg('--limit', type=int, default=0, help='number of images in epoch') arg('--n-epochs', type=int, default=500) arg('--lr', type=float, default=1e-3) arg('--step', type=float, default=60) arg('--model', type=str, help='roof: roof segmentation / income: income determination') arg('--out-path', type=str, default='./trained_models/', help='model output path') arg('--pretrained', type=int, default=1, help='0: False; 1: True') # CUDA devices arg('--device-ids', type=str, default='0,1', help='For example 0,1 to run on two GPUs') args = parser.parse_args() pretrained = True if args.pretrained else False if args.model == "roof": model = models.UNet11(pretrained=pretrained) elif args.model == "income": model = models.UNet11(pretrained=pretrained, num_classes=4, input_channels=5) else: raise ValueError if torch.cuda.is_available(): if args.device_ids: device_ids = list(map(int, args.device_ids.split(','))) else: device_ids = None model = torch.nn.DataParallel(model, device_ids=device_ids).cuda() cudnn.benchmark = True images_filenames = np.array(sorted(glob.glob(args.image_patches_dir + "/*.tif"))) train_set_indices, val_set_indices, test_set_indices = utils.train_val_test_split(len(images_filenames), args.val_percent, args.test_percent) images_np_filenames = utils.save_npy(images_filenames, args.npy_dir, args.model, args.masks_dir) channel_num = 4 if args.model == "roof" else 5 max_value, mean_train, std_train = utils.meanstd(np.array(images_np_filenames)[train_set_indices], channel_num=channel_num) train_transform = DualCompose([ HorizontalFlip(), VerticalFlip(), Rotate(), ImageOnly(Normalize(mean=mean_train, std=std_train)) ]) val_transform = DualCompose([ ImageOnly(Normalize(mean=mean_train, std=std_train)) ]) limit = args.limit if args.limit > 0 else None train_loader = utils.make_loader(filenames=np.array(images_np_filenames)[train_set_indices], mask_dir=args.masks_dir, dataset=args.model, shuffle=False, transform=train_transform, mode='train', batch_size=args.batch_size, limit=limit) valid_loader = utils.make_loader(filenames=np.array(images_np_filenames)[val_set_indices], mask_dir=args.masks_dir, dataset=args.model, shuffle=False, transform=val_transform, mode='train', batch_size=args.batch_size, limit=None) dataloaders = { 'train': train_loader, 'val': valid_loader } optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step, gamma=0.1) name_file = '_' + str(int(args.val_percent * 100)) + '_percent_' + args.model utils.train_model(name_file=name_file, model=model, dataset=args.model, optimizer=optimizer, scheduler=scheduler, dataloaders=dataloaders, name_model="Unet11", num_epochs=args.n_epochs) if not os.path.exists(args.out_path): os.mkdir(args.out_path) torch.save(model.module.state_dict(), (str(args.out_path) + '/model{}_{}_{}epochs').format(name_file, "Unet11", args.n_epochs)) find_metrics(train_file_names=np.array(images_np_filenames)[train_set_indices], val_file_names=np.array(images_np_filenames)[val_set_indices], test_file_names=np.array(images_np_filenames)[test_set_indices], mask_dir=args.masks_dir, dataset=args.model, mean_values=mean_train, std_values=std_train, model=model, name_model="Unet11", epochs=args.n_epochs, out_file=args.model, dataset_file=args.model, name_file=name_file)
def train(config, device='cuda:0', save_chkpt=True): ''' procedure launching all main functions of training, validation and testing pipelines''' # for pipeline testing purposes save_chkpt = False if config.test_steps else True # preprocessing data normalize = A.Normalize(**config.img_norm_cfg) train_transform_real = A.Compose([ A.Resize(**config.resize, interpolation=cv.INTER_CUBIC), A.HorizontalFlip(p=0.5), A.augmentations.transforms.ISONoise(color_shift=(0.15, 0.35), intensity=(0.2, 0.5), p=0.2), A.augmentations.transforms.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, brightness_by_max=True, always_apply=False, p=0.3), A.augmentations.transforms.MotionBlur(blur_limit=5, p=0.2), normalize ]) train_transform_spoof = A.Compose([ A.Resize(**config.resize, interpolation=cv.INTER_CUBIC), A.HorizontalFlip(p=0.5), A.augmentations.transforms.ISONoise(color_shift=(0.15, 0.35), intensity=(0.2, 0.5), p=0.2), A.augmentations.transforms.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, brightness_by_max=True, always_apply=False, p=0.3), A.augmentations.transforms.MotionBlur(blur_limit=5, p=0.2), normalize ]) val_transform = A.Compose( [A.Resize(**config.resize, interpolation=cv.INTER_CUBIC), normalize]) # load data sampler = config.data.sampler if sampler: num_instances, weights = make_weights(config) sampler = torch.utils.data.WeightedRandomSampler(weights, num_instances, replacement=True) train_transform = Transform(train_spoof=train_transform_spoof, train_real=train_transform_real, val=None) val_transform = Transform(train_spoof=None, train_real=None, val=val_transform) train_dataset, val_dataset, test_dataset = make_dataset( config, train_transform, val_transform) train_loader, val_loader, test_loader = make_loader(train_dataset, val_dataset, test_dataset, config, sampler=sampler) # build model and put it to cuda and if it needed then wrap model to data parallel model = build_model(config, device=device, strict=False, mode='train') model.to(device) if config.data_parallel.use_parallel: model = torch.nn.DataParallel(model, **config.data_parallel.parallel_params) # build a criterion softmax = build_criterion(config, device, task='main').to(device) cross_entropy = build_criterion(config, device, task='rest').to(device) bce = nn.BCELoss().to(device) criterion = (softmax, cross_entropy, bce) if config.multi_task_learning else softmax # build optimizer and scheduler for it optimizer = torch.optim.SGD(model.parameters(), **config.optimizer) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, **config.scheduler) # create Trainer object and get experiment information trainer = Trainer(model, criterion, optimizer, device, config, train_loader, val_loader, test_loader) trainer.get_exp_info() # learning epochs for epoch in range(config.epochs.start_epoch, config.epochs.max_epoch): if epoch != config.epochs.start_epoch: scheduler.step() # train model for one epoch train_loss, train_accuracy = trainer.train(epoch) print( f'epoch: {epoch} train loss: {train_loss} train accuracy: {train_accuracy}' ) # validate your model accuracy = trainer.validate() # eval metrics such as AUC, APCER, BPCER, ACER on val and test dataset according to rule trainer.eval(epoch, accuracy, save_chkpt=save_chkpt) # for testing purposes if config.test_steps: exit() # evaluate in the end of training if config.evaluation: file_name = 'tests.txt' trainer.test(file_name=file_name)
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('root', help='checkpoint root') arg('--batch-size', type=int, default=32) arg('--patch-size', type=int, default=256) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=2) arg('--fold', type=int, default=1) arg('--bg-weight', type=float, default=1.0, help='background weight') arg('--dice-weight', type=float, default=0.0) arg('--n-folds', type=int, default=5) arg('--stratified', action='store_true') arg('--mode', choices=[ 'train', 'valid', 'predict_valid', 'predict_test', 'predict_all_valid' ], default='train') arg('--model-path', help='path to model file to use for validation/prediction') arg('--clean', action='store_true') arg('--epoch-size', type=int) arg('--limit', type=int, help='Use only N images for train/valid') arg('--min-scale', type=float, default=1) arg('--max-scale', type=float, default=1) arg('--test-scale', type=float, default=0.5) arg('--oversample', type=float, default=0.0, help='sample near lion with given probability') arg('--with-head', action='store_true') arg('--pred-oddity', type=int, help='set to 0/1 to predict even/odd images') args = parser.parse_args() coords = utils.load_coords() train_paths, valid_paths = utils.train_valid_split(args) root = Path(args.root) model = UNetWithHead() if args.with_head else UNet() model = utils.cuda(model) criterion = Loss(dice_weight=args.dice_weight, bg_weight=args.bg_weight) loader_kwargs = dict( min_scale=args.min_scale, max_scale=args.max_scale, downscale=args.with_head, ) if args.mode == 'train': train_loader, valid_loader = (utils.make_loader( SegmentationDataset, args, train_paths, coords, oversample=args.oversample, **loader_kwargs), utils.make_loader(SegmentationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs)) if root.exists() and args.clean: shutil.rmtree(str(root)) # remove dir tree root.mkdir(exist_ok=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train(args, model, criterion, train_loader=train_loader, valid_loader=valid_loader, save_predictions=save_predictions) elif args.mode == 'valid': utils.load_best_model(model, root, args.model_path) valid_loader = utils.make_loader(SegmentationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs) utils.validation(model, criterion, tqdm.tqdm(valid_loader, desc='Validation')) else: utils.load_best_model(model, root, args.model_path) if args.mode in {'predict_valid', 'predict_all_valid'}: if args.mode == 'predict_all_valid': # include all paths we did not train on (makes sense only with --limit) valid_paths = list( set(valid_paths) | (set(utils.labeled_paths()) - set(train_paths))) predict(model, valid_paths, out_path=root, patch_size=args.patch_size, batch_size=args.batch_size, min_scale=args.min_scale, max_scale=args.max_scale, downsampled=args.with_head) elif args.mode == 'predict_test': out_path = root.joinpath('test') out_path.mkdir(exist_ok=True) predicted = {p.stem.split('-')[0] for p in out_path.glob('*.npy')} test_paths = [ p for p in utils.DATA_ROOT.joinpath('Test').glob('*.png') if p.stem not in predicted ] if args.pred_oddity is not None: assert args.pred_oddity in {0, 1} test_paths = [ p for p in test_paths if int(p.stem) % 2 == args.pred_oddity ] predict(model, test_paths, out_path, patch_size=args.patch_size, batch_size=args.batch_size, test_scale=args.test_scale, is_test=True, downsampled=args.with_head) else: parser.error('Unexpected mode {}'.format(args.mode))
def main(): parser = argparse.ArgumentParser() arg = parser.add_argument arg('root', help='checkpoint root') arg('out_path', help='path to UNet features', type=Path) arg('--batch-size', type=int, default=32) arg('--patch-size', type=int, default=160) arg('--offset', type=int, default=6) arg('--n-epochs', type=int, default=100) arg('--lr', type=float, default=0.0001) arg('--workers', type=int, default=2) arg('--fold', type=int, default=1) arg('--n-folds', type=int, default=5) arg('--stratified', action='store_true') arg('--mode', choices=[ 'train', 'valid', 'predict_valid', 'predict_test', 'predict_all_valid' ], default='train') arg('--model-path', help='path to model file to use for validation/prediction') arg('--clean', action='store_true') arg('--epoch-size', type=int) arg('--limit', type=int, help='Use only N images for train/valid') arg('--min-scale', type=float, default=1) arg('--max-scale', type=float, default=1) arg('--test-scale', type=float, default=0.5) arg('--pred-oddity', type=int, help='set to 0/1 to predict even/odd images') args = parser.parse_args() coords = utils.load_coords() train_paths, valid_paths = utils.train_valid_split(args, coords) root = Path(args.root) model = VGGModel(args.patch_size) model = utils.cuda(model) criterion = nn.CrossEntropyLoss() loader_kwargs = dict(min_scale=args.min_scale, max_scale=args.max_scale, offset=args.offset) if args.mode == 'train': train_loader, valid_loader = (utils.make_loader( ClassificationDataset, args, train_paths, coords, **loader_kwargs), utils.make_loader(ClassificationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs)) if root.exists() and args.clean: shutil.rmtree(str(root)) root.mkdir(exist_ok=True) root.joinpath('params.json').write_text( json.dumps(vars(args), indent=True, sort_keys=True)) utils.train( args, model, criterion, train_loader=train_loader, valid_loader=valid_loader, save_predictions=save_predictions, is_classification=True, make_optimizer=lambda lr: SGD([ { 'params': model.features.parameters(), 'lr': lr }, { 'params': model.classifier.parameters(), 'lr': lr }, ], nesterov=True, momentum=0.9), ) elif args.mode == 'valid': utils.load_best_model(model, root, args.model_path) valid_loader = utils.make_loader(ClassificationDataset, args, valid_paths, coords, deterministic=True, **loader_kwargs) utils.validation(model, criterion, tqdm.tqdm(valid_loader, desc='Validation'), is_classification=True) else: utils.load_best_model(model, root, args.model_path) if args.mode in {'predict_valid', 'predict_all_valid'}: if args.mode == 'predict_all_valid': # include all paths we did not train on (makes sense only with --limit) valid_paths = list( set(valid_paths) | (set(utils.labeled_paths()) - set(train_paths))) predict(model, valid_paths, out_path=args.out_path, patch_size=args.patch_size, batch_size=args.batch_size, min_scale=args.min_scale, max_scale=args.max_scale) elif args.mode == 'predict_test': assert False # FIXME - use out_path too out_path = root.joinpath('test') out_path.mkdir(exist_ok=True) predicted = {p.stem.split('-')[0] for p in out_path.glob('*.npy')} test_paths = [ p for p in utils.DATA_ROOT.joinpath('Test').glob('*.jpg') if p.stem not in predicted ] if args.pred_oddity is not None: assert args.pred_oddity in {0, 1} test_paths = [ p for p in test_paths if int(p.stem) % 2 == args.pred_oddity ] predict(model, test_paths, out_path, patch_size=args.patch_size, batch_size=args.batch_size, test_scale=args.test_scale, is_test=True) else: parser.error('Unexpected mode {}'.format(args.mode))
def find_metrics(train_file_names, val_file_names, test_file_names, mask_dir, dataset, mean_values, std_values, model, fold_out='0', fold_in='0', name_model='UNet11', epochs='40', out_file='VHR', dataset_file='VHR', name_file='_VHR_60_fake'): outfile_path = 'predictions/{}/'.format(out_file) if not os.path.exists("predictions/{}".format(out_file)): os.mkdir("predictions/{}".format(out_file)) f = open( "predictions/{}/metric{}_{}_foldout{}_foldin{}_{}epochs.txt".format( out_file, name_file, name_model, fold_out, fold_in, epochs), "w+") f2 = open( "predictions/{}/pred_loss_test{}_{}_foldout{}_foldin{}_{}epochs.txt". format(out_file, name_file, name_model, fold_out, fold_in, epochs), "w+") f.write("Training mean_values:[{}], std_values:[{}] \n".format( mean_values, std_values)) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(len(test_file_names)) all_transform = DualCompose([ CenterCrop(int(512)), ImageOnly(Normalize(mean=mean_values, std=std_values)) ]) train_loader = utils.make_loader(filenames=train_file_names, mask_dir=mask_dir, dataset=dataset, shuffle=False, transform=all_transform, mode='train') val_loader = utils.make_loader(filenames=val_file_names, mask_dir=mask_dir, dataset=dataset, shuffle=False, transform=all_transform, mode='train') test_loader = utils.make_loader(filenames=test_file_names, mask_dir=mask_dir, dataset=dataset, shuffle=False, transform=all_transform, mode='train') dataloaders = { 'train': train_loader, 'val': val_loader, 'test': test_loader } for phase in ['train', 'val', 'test']: model.eval() metrics = defaultdict(float) count_img = 0 input_vec = [] labels_vec = [] pred_vec = [] result_dice = [] result_jaccard = [] for inputs, labels in dataloaders[phase]: inputs = inputs.to(device) labels = labels.to(device) with torch.set_grad_enabled(False): input_vec.append(inputs.data.cpu().numpy()) labels_vec.append(labels.data.cpu().numpy()) pred = model(inputs) loss = calc_loss(pred, labels, metrics, 'test') if phase == 'test': print_metrics(metrics, f2, 'test') pred = torch.sigmoid(pred) pred_vec.append(pred.data.cpu().numpy()) result_dice += [metrics['dice']] result_jaccard += [metrics['jaccard']] count_img += 1 print("{}_{}".format(phase, out_file)) print('Dice = ', np.mean(result_dice), np.std(result_dice)) print('Jaccard = ', np.mean(result_jaccard), np.std(result_jaccard), '\n') f.write("{}_{}\n".format(phase, out_file)) f.write("dice_metric: {:4f}, std: {:4f} \n".format( np.mean(result_dice), np.std(result_dice))) f.write("jaccard_metric: {:4f}, std: {:4f} \n".format( np.mean(result_jaccard), np.std(result_jaccard))) if phase == 'test': np.save( str( os.path.join( outfile_path, "inputs_test{}_{}_foldout{}_foldin{}_{}epochs_{}.npy". format(name_file, name_model, fold_out, fold_in, epochs, int(count_img)))), np.array(input_vec)) np.save( str( os.path.join( outfile_path, "labels_test{}_{}_foldout{}_foldin{}_{}epochs_{}.npy". format(name_file, name_model, fold_out, fold_in, epochs, int(count_img)))), np.array(labels_vec)) np.save( str( os.path.join( outfile_path, "pred_test{}_{}_foldout{}_foldin{}_{}epochs_{}.npy". format(name_file, name_model, fold_out, fold_in, epochs, int(count_img)))), np.array(pred_vec))