def draw_circle_clips(preds, geetup_info, beg_ind, end_ind, out_dir): path_utils.create_dir(out_dir) for j in range(beg_ind, end_ind): f_path, f_gt = geetup_info.__getitem__(j) f_path = f_path[-1] f_gt = f_gt[0] f_pred = preds[j] # read the image img_in = cv2.imread(f_path) # draw the gt img_in = cv2.circle( img_in, (f_gt[1], f_gt[0]), 5, (0, 255, 0), thickness=9 ) # draw the prediction pred = np.int(f_pred) # TODO: support other image sizes pred = map_point_to_image_size(pred, (360, 640), (180, 320)) pred[0] = int(pred[0]) pred[1] = int(pred[1]) img_in = cv2.circle( img_in, (pred[1], pred[0]), 5, (0, 0, 255), thickness=9 ) img_name = f_path.split('/')[-1] out_file = '%s/%s' % (out_dir, img_name) cv2.imwrite(out_file, img_in)
def test_prominent_prepares(args): output_file = None if os.path.isdir(args.network_name): dirname = args.network_name output_dir = os.path.join(dirname, args.experiment_name) create_dir(output_dir) output_file = os.path.join(output_dir, 'results_') networks = sorted(glob.glob(dirname + '*.h5')) network_names = [] preprocessings = [args.preprocessing] * len(networks) elif os.path.isfile(args.network_name): networks = [] preprocessings = [] network_names = [] with open(args.network_name) as f: lines = f.readlines() for line in lines: tokens = line.strip().split(',') networks.append(tokens[0]) if len(tokens) > 1: preprocessings.append(tokens[1]) else: preprocessings.append(args.preprocessing) # FIXME if len(tokens) > 2: network_names.append(tokens[2]) else: networks = [args.network_name.lower()] network_names = [args.network_name.lower()] # choosing the preprocessing function if not args.preprocessing: args.preprocessing = args.network_name.lower() preprocessings = [args.preprocessing] if not output_file: current_time = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H_%M_%S') output_dir = args.experiment_name create_dir(output_dir) output_file = os.path.join(output_dir, 'results_' + current_time) args.networks = networks args.network_names = network_names args.preprocessings = preprocessings args.output_file = output_file return args
def prepare_saving_dir(experiment_name, network, dataset, manipulation_type): create_dir(experiment_name) dataset_dir = os.path.join(experiment_name, dataset) create_dir(dataset_dir) manipulation_dir = os.path.join(dataset_dir, manipulation_type) create_dir(manipulation_dir) network_dir = os.path.join(manipulation_dir, network) create_dir(network_dir) return network_dir
def report_cityscape(input_folder, out_dir=None, prefix_dir='pred_mask', out_name='cityscape_stats', in_exp='SELECTED_IMGS_*.txt'): if out_dir is None: out_dir = input_folder for part_dir in sorted(glob.glob(input_folder + '/*/')): save_part = part_dir.split('/')[-2] create_dir(os.path.join(out_dir, save_part)) save_part_segment = save_part + '/segments/' create_dir(os.path.join(out_dir, save_part_segment)) for video_dir in sorted(glob.glob(part_dir + '/segments/*/')): print(video_dir) save_segment_dir = save_part_segment + video_dir.split('/')[-2] create_dir(os.path.join(out_dir, save_segment_dir)) for selected_txt in sorted(glob.glob(video_dir + in_exp)): vid_ind = selected_txt.split('/')[-1][:-4].split('_')[-1] imgs_dir = os.path.join(video_dir, 'CutVid_%s/%s' % (vid_ind, prefix_dir)) im_list = np.loadtxt(selected_txt, dtype=str, delimiter=',') current_result = _cityscape_folder(im_list, imgs_dir) out_file = os.path.join(video_dir, '%s_%s.txt' % (out_name, vid_ind)) header = 'im_name,gaze_label,pixels_per_labels' np.savetxt(out_file, np.array(current_result), delimiter=';', fmt='%s', header=header)
def report_monodepth(img_folder, txt_folder, out_dir=None, prefix_dir='npys', out_name='depth_stats', in_exp='SELECTED_IMGS_*.txt'): if out_dir is None: out_dir = img_folder for part_dir in sorted(glob.glob(txt_folder + '/*/')): save_part = part_dir.split('/')[-2] create_dir(os.path.join(out_dir, save_part)) save_part_segment = save_part + '/segments/' create_dir(os.path.join(out_dir, save_part_segment)) for video_dir in sorted(glob.glob(part_dir + '/segments/*/')): print(video_dir) save_segment_dir = save_part_segment + video_dir.split('/')[-2] create_dir(os.path.join(out_dir, save_segment_dir)) for selected_txt in sorted(glob.glob(video_dir + in_exp)): vid_ind = selected_txt.split('/')[-1][:-4].split('_')[-1] imgs_dir = os.path.join(video_dir, 'CutVid_%s/%s' % (vid_ind, prefix_dir)) im_list = np.loadtxt(selected_txt, dtype=str, delimiter=',') # replacing the txt folder with img folder imgs_dir = imgs_dir.replace(txt_folder, img_folder) current_result = _monodepth_folder(im_list, imgs_dir) video_dir_save = video_dir.replace(txt_folder, img_folder) out_file = os.path.join(video_dir_save, '%s_%s.txt' % (out_name, vid_ind)) header = 'im_name,gaze_depth,pixels_per_depth' np.savetxt(out_file, np.array(current_result), delimiter=';', fmt='%s', header=header)
def main_worker(ngpus_per_node, args): mean, std = model_utils.get_preprocessing_function(args.colour_space, args.vision_type) if args.gpus is not None: print("Use GPU: {} for training".format(args.gpus)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + args.gpus dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model if args.transfer_weights is not None: print('Transferred model!') (model, _) = model_utils.which_network(args.transfer_weights[0], args.task_type, num_classes=args.old_classes) which_layer = -1 if len(args.transfer_weights) == 2: which_layer = args.transfer_weights[1] model = model_utils.NewClassificationModel(model, which_layer, args.num_classes) elif args.custom_arch: print('Custom model!') supported_customs = ['resnet_basic_custom', 'resnet_bottleneck_custom'] if os.path.isfile(args.network_name): checkpoint = torch.load(args.network_name, map_location='cpu') customs = None if 'customs' in checkpoint: customs = checkpoint['customs'] # TODO: num_classes is just for backward compatibility if 'num_classes' not in customs: customs['num_classes'] = 1000 model = which_architecture(checkpoint['arch'], customs, args.contrast_head) args.network_name = checkpoint['arch'] model.load_state_dict(checkpoint['state_dict'], strict=False) elif args.network_name in supported_customs: model = custom_models.__dict__[args.network_name]( args.blocks, contrast_head=args.contrast_head, pooling_type=args.pooling_type, in_chns=len(mean), num_classes=args.num_classes, inplanes=args.num_kernels, kernel_size=args.kernel_size) elif args.pretrained: print("=> using pre-trained model '{}'".format(args.network_name)) model = models.__dict__[args.network_name](pretrained=True) else: print("=> creating model '{}'".format(args.network_name)) model = models.__dict__[args.network_name]() # TODO: why load weights is False? args.out_dir = prepare_training.prepare_output_directories( dataset_name='contrast', network_name=args.network_name, optimiser='sgd', load_weights=False, experiment_name=args.experiment_name, framework='pytorch') # preparing the output folder create_dir(args.out_dir) json_file_name = os.path.join(args.out_dir, 'args.json') with open(json_file_name, 'w') as fp: json.dump(dict(args._get_kwargs()), fp, sort_keys=True, indent=4) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpus is not None: torch.cuda.set_device(args.gpus) model.cuda(args.gpus) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpus]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpus is not None: torch.cuda.set_device(args.gpus) model = model.cuda(args.gpus) else: # DataParallel will divide and allocate batch_size to all available GPUs if (args.network_name.startswith('alexnet') or args.network_name.startswith('vgg')): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpus) # optimiser if args.transfer_weights is None: optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: # for p in model.features.parameters(): # p.requires_grad = False params_to_optimize = [ { 'params': [p for p in model.features.parameters()], 'lr': 1e-6 }, { 'params': [p for p in model.fc.parameters()] }, ] optimizer = torch.optim.SGD(params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) model_progress = [] model_progress_path = os.path.join(args.out_dir, 'model_progress.csv') # optionally resume from a checkpoint # TODO: it would be best if resume load the architecture from this file # TODO: merge with which_architecture best_acc1 = 0 if args.resume is not None: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') args.initial_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) if args.gpus is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpus) model = model.cuda(args.gpus) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) if os.path.exists(model_progress_path): model_progress = np.loadtxt(model_progress_path, delimiter=',') model_progress = model_progress.tolist() else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True train_trans = [] valid_trans = [] both_trans = [] if args.mosaic_pattern is not None: mosaic_trans = preprocessing.mosaic_transformation(args.mosaic_pattern) both_trans.append(mosaic_trans) if args.num_augmentations != 0: augmentations = preprocessing.random_augmentation( args.augmentation_settings, args.num_augmentations) train_trans.append(augmentations) target_size = default_configs.get_default_target_size( args.dataset, args.target_size) # loading the training set train_trans = [*both_trans, *train_trans] db_params = { 'colour_space': args.colour_space, 'vision_type': args.vision_type, 'mask_image': args.mask_image } if args.dataset in ['imagenet', 'celeba', 'natural']: path_or_sample = args.data_dir else: path_or_sample = args.train_samples train_dataset = dataloader.train_set(args.dataset, target_size, mean, std, extra_transformation=train_trans, data_dir=path_or_sample, **db_params) if args.dataset == 'natural': train_dataset.num_crops = args.batch_size args.batch_size = 1 if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) # loading validation set valid_trans = [*both_trans, *valid_trans] validation_dataset = dataloader.validation_set( args.dataset, target_size, mean, std, extra_transformation=valid_trans, data_dir=path_or_sample, **db_params) if args.dataset == 'natural': validation_dataset.num_crops = train_dataset.num_crops args.batch_size = 1 val_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # training on epoch for epoch in range(args.initial_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) misc_utils.adjust_learning_rate(optimizer, epoch, args) # train for one epoch train_log = train_on_data(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set validation_log = validate_on_data(val_loader, model, criterion, args) model_progress.append([*train_log, *validation_log]) # remember best acc@1 and save checkpoint acc1 = validation_log[2] is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if misc_utils.is_saving_node(args.multiprocessing_distributed, args.rank, ngpus_per_node): misc_utils.save_checkpoint( { 'epoch': epoch + 1, 'arch': args.network_name, 'customs': { 'pooling_type': args.pooling_type, 'in_chns': len(mean), 'num_classes': args.num_classes, 'blocks': args.blocks, 'num_kernels': args.num_kernels, 'kernel_size': args.kernel_size }, 'preprocessing': { 'mean': mean, 'std': std }, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), 'target_size': target_size, }, is_best, out_folder=args.out_dir) # TODO: get this header directly as a dictionary keys header = 'epoch,t_time,t_loss,t_top1,t_top5,v_time,v_loss,v_top1,v_top5' np.savetxt(model_progress_path, np.array(model_progress), delimiter=',', header=header)
video_list = pickle_info['video_list'] sequence_length = pickle_info['sequence_length'] if frames_gap is None: frames_gap = pickle_info['frames_gap'] return video_list, sequence_length, frames_gap if __name__ == "__main__": parser = geetup_opts.argument_parser() args = geetup_opts.check_args(parser, sys.argv[1:]) os.environ['CUDA_VISIBLE_DEVICES'] = ', '.join(str(e) for e in args.gpus) gpus = [*range(len(args.gpus))] create_dir(args.log_dir) # for training organise the output file if args.evaluate is False: # add architecture to directory args.log_dir = os.path.join(args.log_dir, args.architecture) create_dir(args.log_dir) # add frame based or time integration to directory if args.frame_based: time_or_frame = 'frame_based' else: time_or_frame = 'time_integration' args.log_dir = os.path.join(args.log_dir, time_or_frame) create_dir(args.log_dir) # add scratch or fine tune to directory if args.weights is None: new_or_tune = 'scratch'
def main_worker(ngpus_per_node, args): mean, std = model_utils.get_preprocessing_function(args.colour_space, args.vision_type) # preparing the output folder create_dir(args.out_dir) if args.gpus is not None: print("Use GPU: {} for training".format(args.gpus)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + args.gpus dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model if args.transfer_weights is not None: print('Transferred model!') model = contrast_utils.AFCModel(args.network_name, args.transfer_weights) elif args.custom_arch: print('Custom model!') supported_customs = ['resnet_basic_custom', 'resnet_bottleneck_custom'] if args.network_name in supported_customs: model = custom_models.__dict__[args.network_name]( args.blocks, pooling_type=args.pooling_type, in_chns=len(mean), num_classes=args.num_classes, inplanes=args.num_kernels, kernel_size=args.kernel_size) elif args.pretrained: print("=> using pre-trained model '{}'".format(args.network_name)) model = models.__dict__[args.network_name](pretrained=True) else: print("=> creating model '{}'".format(args.network_name)) model = models.__dict__[args.network_name]() if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpus is not None: torch.cuda.set_device(args.gpus) model.cuda(args.gpus) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpus]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpus is not None: torch.cuda.set_device(args.gpus) model = model.cuda(args.gpus) else: # DataParallel will divide and allocate batch_size to all available GPUs if (args.network_name.startswith('alexnet') or args.network_name.startswith('vgg')): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = soft_cross_entropy # optimiser if args.transfer_weights is None: optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: params_to_optimize = [ { 'params': [p for p in model.parameters() if p.requires_grad] }, ] optimizer = torch.optim.SGD(params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) model_progress = [] model_progress_path = os.path.join(args.out_dir, 'model_progress.csv') # optionally resume from a checkpoint # TODO: it would be best if resume load the architecture from this file # TODO: merge with which_architecture best_acc1 = 0 if args.resume is not None: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') args.initial_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) if args.gpus is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpus) model = model.cuda(args.gpus) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) if os.path.exists(model_progress_path): model_progress = np.loadtxt(model_progress_path, delimiter=',') model_progress = model_progress.tolist() else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True train_trans = [] valid_trans = [] both_trans = [] if args.mosaic_pattern is not None: mosaic_trans = preprocessing.mosaic_transformation(args.mosaic_pattern) both_trans.append(mosaic_trans) if args.num_augmentations != 0: augmentations = preprocessing.random_augmentation( args.augmentation_settings, args.num_augmentations) train_trans.append(augmentations) target_size = default_configs.get_default_target_size( args.dataset, args.target_size) final_trans = [ cv2_transforms.ToTensor(), cv2_transforms.Normalize(mean, std), ] train_trans.append( cv2_transforms.RandomResizedCrop(target_size, scale=(0.08, 1.0))) # loading the training set train_trans = torch_transforms.Compose( [*both_trans, *train_trans, *final_trans]) train_dataset = image_quality.BAPPS2afc(root=args.data_dir, split='train', transform=train_trans, concat=0.5) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) valid_trans.extend([ cv2_transforms.Resize(target_size), cv2_transforms.CenterCrop(target_size), ]) # loading validation set valid_trans = torch_transforms.Compose( [*both_trans, *valid_trans, *final_trans]) validation_dataset = image_quality.BAPPS2afc(root=args.data_dir, split='val', transform=valid_trans, concat=0) val_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # training on epoch for epoch in range(args.initial_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) misc_utils.adjust_learning_rate(optimizer, epoch, args) # train for one epoch train_log = train_on_data(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set validation_log = validate_on_data(val_loader, model, criterion, args) model_progress.append([*train_log, *validation_log]) # remember best acc@1 and save checkpoint acc1 = validation_log[2] is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if misc_utils.is_saving_node(args.multiprocessing_distributed, args.rank, ngpus_per_node): misc_utils.save_checkpoint( { 'epoch': epoch + 1, 'arch': args.network_name, 'customs': { 'pooling_type': args.pooling_type, 'in_chns': len(mean), 'num_classes': args.num_classes, 'blocks': args.blocks, 'num_kernels': args.num_kernels, 'kernel_size': args.kernel_size }, 'transfer_weights': args.transfer_weights, 'preprocessing': { 'mean': mean, 'std': std }, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), 'target_size': target_size, }, is_best, out_folder=args.out_dir) # TODO: get this header directly as a dictionary keys header = 'epoch,t_time,t_loss,t_top5,v_time,v_loss,v_top1' np.savetxt(model_progress_path, np.array(model_progress), delimiter=',', header=header)
def main_worker(ngpus_per_node, args): global best_acc1 is_pill_img = 'wcs_xyz_png_1600' in args.data_dir mean, std = model_utils.get_preprocessing_function(args.colour_space, args.vision_type) # preparing the output folder create_dir(args.out_dir) if args.gpus is not None: print("Use GPU: {} for training".format(args.gpus)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + args.gpus dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model if args.prediction: checkpoint = torch.load(args.network_weights, map_location='cpu') blocks = checkpoint['customs']['blocks'] pooling_type = checkpoint['customs']['pooling_type'] num_kernels = checkpoint['customs']['num_kernels'] outputs = checkpoint['customs']['outputs'] for key, val in outputs.items(): if 'area' not in val: outputs[key] = None model = resnet.__dict__[args.network_name](blocks, pooling_type=pooling_type, in_chns=len(mean), inplanes=num_kernels, outputs=outputs) model.load_state_dict(checkpoint['state_dict']) elif args.transfer_weights is not None: print('Transferred model!') (model, _) = model_utils.which_network(args.transfer_weights, args.task_type, num_classes=args.old_classes) model = model_utils.NewClassificationModel(model, args.num_classes) elif args.custom_arch: print('Custom model!') if (args.network_name == 'resnet_basic_custom' or args.network_name == 'resnet_bottleneck_custom'): outputs = {'objects': None, 'munsells': None, 'illuminants': None} imagenet_weights = args.imagenet_weights if args.object_area is not None: outputs['objects'] = { 'num_classes': 2100, 'area': args.object_area } if args.munsell_area is not None: outputs['munsells'] = { 'num_classes': 1600, 'area': args.munsell_area } if args.illuminant_area is not None: outputs['illuminants'] = { 'num_classes': 280, 'area': args.illuminant_area } model = resnet.__dict__[args.network_name]( args.blocks, pooling_type=args.pooling_type, in_chns=len(mean), inplanes=args.num_kernels, outputs=outputs, imagenet_weights=imagenet_weights) elif args.pretrained: print("=> using pre-trained model '{}'".format(args.network_name)) model = models.__dict__[args.network_name](pretrained=True) else: print("=> creating model '{}'".format(args.network_name)) model = models.__dict__[args.network_name]() if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpus is not None: torch.cuda.set_device(args.gpus) model.cuda(args.gpus) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpus]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpus is not None: torch.cuda.set_device(args.gpus) model = model.cuda(args.gpus) else: # DataParallel will divide and allocate batch_size to all available GPUs if (args.network_name.startswith('alexnet') or args.network_name.startswith('vgg')): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpus) # optimiser optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) model_progress = [] model_progress_path = os.path.join(args.out_dir, 'model_progress.csv') # optionally resume from a checkpoint # TODO: it would be best if resume load the architecture from this file # TODO: merge with which_architecture if args.resume is not None: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume, map_location='cpu') args.initial_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] model.load_state_dict(checkpoint['state_dict']) if args.gpus is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpus) model = model.cuda(args.gpus) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) if os.path.exists(model_progress_path): model_progress = np.loadtxt(model_progress_path, delimiter=',') model_progress = model_progress.tolist() else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True normalize = transforms.Normalize(mean=mean, std=std) other_transformations = [] if args.num_augmentations != 0: augmentations = preprocessing.random_augmentation( args.augmentation_settings, args.num_augmentations) other_transformations.append(augmentations) target_size = get_default_target_size(args.dataset, args.target_size) train_dataset, validation_dataset = get_train_val_dataset( args.data_dir, other_transformations, [], normalize, args.imagenet_weights) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None if args.prediction: manipulation_values = args.parameters['kwargs'][args.manipulation] manipulation_name = args.parameters['f_name'] for j, manipulation_value in enumerate(manipulation_values): args.parameters['kwargs'][args.manipulation] = manipulation_value prediction_transformation = preprocessing.prediction_transformation( args.parameters, args.colour_space, tmp_c_space(manipulation_name)) other_transformations = [prediction_transformation] _, validation_dataset = get_train_val_dataset( args.data_dir, other_transformations, other_transformations, normalize, args.imagenet_weights) val_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) pred_log = predict(val_loader, model, criterion, torch.device(args.gpus)) from kernelphysiology.dl.utils import prepapre_testing prepapre_testing.save_predictions(pred_log, args.experiment_name, args.pred_name, args.dataset, manipulation_name, manipulation_value) return val_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) if args.ill_colour is not None: print('Performing with illuminant correction') args.ill_colour = np.loadtxt(args.ill_colour, delimiter=',') # training on epoch for epoch in range(args.initial_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) if args.imagenet_weights is None: adjust_learning_rate(optimizer, epoch, args) # train for one epoch train_log = train_on_data(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set validation_log = validate_on_data(val_loader, model, criterion, args) model_progress.append([*train_log, *validation_log]) # remember best acc@1 and save checkpoint acc1 = validation_log[2] is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { 'epoch': epoch + 1, 'arch': args.network_name, 'customs': { 'pooling_type': args.pooling_type, 'in_chns': len(mean), 'num_classes': args.num_classes, 'blocks': args.blocks, 'num_kernels': args.num_kernels, 'outputs': outputs }, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), 'target_size': target_size, }, is_best, out_folder=args.out_dir) # TODO: get this header directly as a dictionary keys header = 'epoch,t_time,t_loss,t_lo,t_lm,t_li,t_ao,t_am,t_ai,' \ 'v_time,v_loss,v_lo,v_lm,v_li,v_ao,v_am,v_ai' np.savetxt(model_progress_path, np.array(model_progress), delimiter=',', header=header)