def test_create_model(self): # Check output of encoder has shape specified in paper encoder = get_few_shot_encoder(num_input_channels=1).float() omniglot = OmniglotDataset('background') self.assertEqual( encoder(omniglot[0][0].unsqueeze(0).float()).shape[1], 64, 'Encoder network should produce 64 dimensional embeddings on Omniglot dataset.' ) encoder = get_few_shot_encoder(num_input_channels=3).float() omniglot = MiniImageNet('background') self.assertEqual( encoder(omniglot[0][0].unsqueeze(0).float()).shape[1], 1600, 'Encoder network should produce 1600 dimensional embeddings on miniImageNet dataset.' )
def __init__(self, dataset: str, num_tasks: int, n_shot: int, k_way: int, q_queries: int, distance_metric: str, open_world_testing: bool): self.dataset_name = dataset self.num_tasks = num_tasks self.n_shot = n_shot self.k_way = k_way self.q_queries = q_queries self.episodes_per_epoch = 10 self.distance_metric = 'l2' self.open_world_testing = open_world_testing self.prepare_batch = prepare_nshot_task(self.n_shot, self.k_way, self.q_queries) self.num_different_models = 0 if dataset == 'whoas': self.evaluation_dataset = Whoas('evaluation') #self.evaluation_dataset = Whoas('background') elif dataset == 'kaggle': self.evaluation_dataset = Kaggle('evaluation') elif dataset == 'miniImageNet': self.evaluation_dataset = MiniImageNet('evaluation') else: raise (ValueError, 'Unsupported dataset') self.batch_sampler = NShotCustomTaskSampler(self.evaluation_dataset, self.episodes_per_epoch, n_shot, k_way, q_queries, num_tasks, None, open_world_testing) self.evaluation_taskloader = DataLoader( self.evaluation_dataset, batch_sampler=self.batch_sampler) assert torch.cuda.is_available() self.device = torch.device('cuda') torch.backends.cudnn.benchmark = True self.model = get_few_shot_encoder( self.evaluation_dataset.num_input_channels) self.model.to(self.device, dtype=torch.double) self.optimiser = Adam(self.model.parameters(), lr=1e-3) self.loss_fn = torch.nn.NLLLoss().cuda()
batch_sampler=NShotTaskSampler(background, episodes_per_epoch, args.n_train, args.k_train, args.q_train), num_workers=4 ) # evaluation = dataset_class('images_evaluation') evaluation = dataset_class('datasets/test') evaluation_taskloader = DataLoader( evaluation, batch_sampler=NShotTaskSampler(evaluation, episodes_per_epoch, args.n_test, args.k_test, args.q_test), num_workers=4 ) ######### # Model # ######### model = get_few_shot_encoder(num_input_channels) if args.retrain: model.load_state_dict(torch.load(PATH + f'/models/proto_nets/{param_str}.pth')) model.to(device, dtype=torch.double) ############ # Training # ############ print(f'Training Prototypical network on {args.dataset}...') if args.retrain: param_str = f'{args.dataset}_nt={args.n_train}_kt={args.k_train}_qt={args.q_train}_' \ f'nv={args.n_test}_kv={args.k_test}_qv={args.q_test}_experiment={args.experiment_name}_retrain=True' optimiser = Adam(model.parameters(), lr=1e-3) loss_fn = torch.nn.NLLLoss().cuda()
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--model_path", type=str, default= "./models/proto_nets/miniImageNet_nt=5_kt=5_qt=10_nv=5_kv=5_qv=10_dist=l2_sampling_method=True_is_diverisity=True.pth", help="model path") parser.add_argument( "--result_path", type=str, default="./results/proto_nets/5shot_training_5shot_diverisity.csv", help="Directory for evaluation report result (for experiments)") parser.add_argument('--dataset', type=str, required=True) parser.add_argument('--distance', default='cosine') parser.add_argument('--n_train', default=1, type=int) parser.add_argument('--n_test', default=1, type=int) parser.add_argument('--k_train', default=5, type=int) parser.add_argument('--k_test', default=5, type=int) parser.add_argument('--q_train', default=15, type=int) parser.add_argument('--q_test', default=15, type=int) parser.add_argument( "--debug", action="store_true", help="set logging level DEBUG", ) args = parser.parse_args() # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.DEBUG if args.debug else logging.INFO, ) ################### # Create datasets # ################### episodes_per_epoch = 600 if args.dataset == 'miniImageNet': n_epochs = 5 dataset_class = MiniImageNet num_input_channels = 3 else: raise (ValueError('need to make other datasets module')) test_dataset = dataset_class('test') test_dataset_taskloader = DataLoader( test_dataset, batch_sampler=NShotTaskSampler(test_dataset, episodes_per_epoch, args.n_test, args.k_test, args.q_test), num_workers=4) ######### # Model # ######### model = get_few_shot_encoder(num_input_channels).to(device, dtype=torch.double) model.load_state_dict(torch.load(args.model_path), strict=False) model.eval() ############# # Inference # ############# logger.info("***** Epochs = %d *****", n_epochs) logger.info("***** Num episodes per epoch = %d *****", episodes_per_epoch) result_writer = ResultWriter(args.result_path) # just argument (function: proto_net_episode) prepare_batch = prepare_nshot_task(args.n_test, args.k_test, args.q_test) optimiser = Adam(model.parameters(), lr=1e-3) loss_fn = torch.nn.NLLLoss().cuda() train_iterator = trange( 0, int(n_epochs), desc="Epoch", ) for i_epoch in train_iterator: epoch_iterator = tqdm( test_dataset_taskloader, desc="Iteration", ) seen = 0 metric_name = f'test_{args.n_test}-shot_{args.k_test}-way_acc' metric = {metric_name: 0.0} for _, batch in enumerate(epoch_iterator): x, y = prepare_batch(batch) loss, y_pred = proto_net_episode(model, optimiser, loss_fn, x, y, n_shot=args.n_test, k_way=args.k_test, q_queries=args.q_test, train=False, distance=args.distance) seen += y_pred.shape[0] metric[metric_name] += categorical_accuracy( y, y_pred) * y_pred.shape[0] metric[metric_name] = metric[metric_name] / seen logger.info("epoch: {}, categorical_accuracy: {}".format( i_epoch, metric[metric_name])) result_writer.update(**metric)
def few_shot_training(datadir=DATA_PATH, dataset='fashion', num_input_channels=3, drop_lr_every=20, validation_episodes=200, evaluation_episodes=1000, episodes_per_epoch=100, n_epochs=80, small_dataset=False, n_train=1, n_test=1, k_train=30, k_test=5, q_train=5, q_test=1, distance='l2', pretrained=False, monitor_validation=False, n_val_classes=10, architecture='resnet18', gpu=None): setup_dirs() if dataset == 'fashion': dataset_class = FashionProductImagesSmall if small_dataset \ else FashionProductImages else: raise (ValueError, 'Unsupported dataset') param_str = f'{dataset}_nt={n_train}_kt={k_train}_qt={q_train}_' \ f'nv={n_test}_kv={k_test}_qv={q_test}_small={small_dataset}_' \ f'pretrained={pretrained}_validate={monitor_validation}' print(param_str) ################### # Create datasets # ################### # ADAPTED: data transforms including augmentation resize = (80, 60) if small_dataset else (400, 300) background_transform = transforms.Compose([ transforms.RandomResizedCrop(resize, scale=(0.8, 1.0)), # transforms.RandomGrayscale(), transforms.RandomPerspective(), transforms.RandomHorizontalFlip(), # transforms.Resize(resize), transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225]) ]) evaluation_transform = transforms.Compose([ transforms.Resize(resize), # transforms.CenterCrop(224), transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], # std=[0.229, 0.224, 0.225]) ]) if monitor_validation: if not n_val_classes >= k_test: n_val_classes = k_test print("Warning: `n_val_classes` < `k_test`. Take a larger number" " of validation classes next time. Increased to `k_test`" " classes") # class structure for background (training), validation (validation), # evaluation (test): take a random subset of background classes validation_classes = list( np.random.choice(dataset_class.background_classes, n_val_classes)) background_classes = list( set(dataset_class.background_classes).difference( set(validation_classes))) # use keyword for evaluation classes evaluation_classes = 'evaluation' # Meta-validation set validation = dataset_class(datadir, split='all', classes=validation_classes, transform=evaluation_transform) # ADAPTED: in the original code, `episodes_per_epoch` was provided to # `NShotTaskSampler` instead of `validation_episodes`. validation_sampler = NShotTaskSampler(validation, validation_episodes, n_test, k_test, q_test) validation_taskloader = DataLoader(validation, batch_sampler=validation_sampler, num_workers=4) else: # use keyword for both background and evaluation classes background_classes = 'background' evaluation_classes = 'evaluation' # Meta-training set background = dataset_class(datadir, split='all', classes=background_classes, transform=background_transform) background_sampler = NShotTaskSampler(background, episodes_per_epoch, n_train, k_train, q_train) background_taskloader = DataLoader(background, batch_sampler=background_sampler, num_workers=4) # Meta-test set evaluation = dataset_class(datadir, split='all', classes=evaluation_classes, transform=evaluation_transform) # ADAPTED: in the original code, `episodes_per_epoch` was provided to # `NShotTaskSampler` instead of `evaluation_episodes`. evaluation_sampler = NShotTaskSampler(evaluation, evaluation_episodes, n_test, k_test, q_test) evaluation_taskloader = DataLoader(evaluation, batch_sampler=evaluation_sampler, num_workers=4) ######### # Model # ######### if torch.cuda.is_available(): if gpu is not None: device = torch.device('cuda', gpu) else: device = torch.device('cuda') torch.backends.cudnn.benchmark = True else: device = torch.device('cpu') if not pretrained: model = get_few_shot_encoder(num_input_channels) # ADAPTED model.to(device) # BEFORE # model.to(device, dtype=torch.double) else: assert torch.cuda.is_available() model = models.__dict__[architecture](pretrained=True) model.fc = Identity() if gpu is not None: model = model.cuda(gpu) else: model = model.cuda() # TODO this is too risky: I'm not sure that this can work, since in # the few-shot github repo the batch axis is actually split into # support and query samples # model = torch.nn.DataParallel(model).cuda() def lr_schedule(epoch, lr): # Drop lr every 2000 episodes if epoch % drop_lr_every == 0: return lr / 2 else: return lr ############ # Training # ############ print(f'Training Prototypical network on {dataset}...') optimiser = Adam(model.parameters(), lr=1e-3) loss_fn = torch.nn.NLLLoss().to(device) callbacks = [ # ADAPTED: this is the test monitoring now - and is only done at the # end of training. EvaluateFewShot( eval_fn=proto_net_episode, num_tasks=evaluation_episodes, # THIS IS NOT USED n_shot=n_test, k_way=k_test, q_queries=q_test, taskloader=evaluation_taskloader, prepare_batch=prepare_nshot_task(n_test, k_test, q_test, device=device), distance=distance, on_epoch_end=False, on_train_end=True, prefix='test_') ] if monitor_validation: callbacks.append( # ADAPTED: this is the validation monitoring now - computed # after every epoch. EvaluateFewShot( eval_fn=proto_net_episode, num_tasks=evaluation_episodes, # THIS IS NOT USED n_shot=n_test, k_way=k_test, q_queries=q_test, # BEFORE taskloader=evaluation_taskloader, taskloader=validation_taskloader, # ADAPTED prepare_batch=prepare_nshot_task(n_test, k_test, q_test, device=device), distance=distance, on_epoch_end=True, # ADAPTED on_train_end=False, # ADAPTED prefix='val_')) callbacks.extend([ ModelCheckpoint( filepath=PATH + f'/models/proto_nets/{param_str}.pth', monitor=f'val_{n_test}-shot_{k_test}-way_acc', verbose=1, # ADAPTED save_best_only=monitor_validation # ADAPTED ), LearningRateScheduler(schedule=lr_schedule), CSVLogger(PATH + f'/logs/proto_nets/{param_str}.csv'), ]) fit( model, optimiser, loss_fn, epochs=n_epochs, dataloader=background_taskloader, prepare_batch=prepare_nshot_task(n_train, k_train, q_train, device=device), callbacks=callbacks, metrics=['categorical_accuracy'], fit_function=proto_net_episode, fit_function_kwargs={ 'n_shot': n_train, 'k_way': k_train, 'q_queries': q_train, 'train': True, 'distance': distance }, )
def run(): episodes_per_epoch = 600 ''' ###### LearningRateScheduler ###### drop_lr_every = 20 def lr_schedule(epoch, lr): # Drop lr every 2000 episodes if epoch % drop_lr_every == 0: return lr / 2 else: return lr # callbacks add: LearningRateScheduler(schedule=lr_schedule) ''' if args.dataset == 'miniImageNet': n_epochs = 500 dataset_class = MiniImageNet num_input_channels = 3 else: raise (ValueError('need to make other datasets module')) param_str = f'{args.dataset}_nt={args.n_train}_kt={args.k_train}_qt={args.q_train}_' \ f'nv={args.n_test}_kv={args.k_test}_qv={args.q_test}_' \ f'dist={args.distance}_sampling_method={args.sampling_method}_is_diverisity={args.is_diversity}' print(param_str) ######### # Model # ######### model = get_few_shot_encoder(num_input_channels) model.to(device, dtype=torch.double) ################### # Create datasets # ################### train_dataset = dataset_class('train') eval_dataset = dataset_class('eval') # Original sampling if not args.sampling_method: train_dataset_taskloader = DataLoader( train_dataset, batch_sampler=NShotTaskSampler(train_dataset, episodes_per_epoch, args.n_train, args.k_train, args.q_train), num_workers=4) eval_dataset_taskloader = DataLoader( eval_dataset, batch_sampler=NShotTaskSampler(eval_dataset, episodes_per_epoch, args.n_test, args.k_test, args.q_test), num_workers=4) # Importance sampling else: # ImportanceSampler: Latent space of model train_dataset_taskloader = DataLoader( train_dataset, batch_sampler=ImportanceSampler( train_dataset, model, episodes_per_epoch, n_epochs, args.n_train, args.k_train, args.q_train, args.num_s_candidates, args.init_temperature, args.is_diversity), num_workers=4) eval_dataset_taskloader = DataLoader( eval_dataset, batch_sampler=NShotTaskSampler(eval_dataset, episodes_per_epoch, args.n_test, args.k_test, args.q_test), num_workers=4) ############ # Training # ############ print(f'Training Prototypical network on {args.dataset}...') optimiser = Adam(model.parameters(), lr=1e-3) loss_fn = torch.nn.NLLLoss().cuda() callbacks = [ EvaluateFewShot(eval_fn=proto_net_episode, n_shot=args.n_test, k_way=args.k_test, q_queries=args.q_test, taskloader=eval_dataset_taskloader, prepare_batch=prepare_nshot_task( args.n_test, args.k_test, args.q_test), distance=args.distance), ModelCheckpoint( filepath=PATH + f'/models/proto_nets/{param_str}.pth', monitor=f'val_{args.n_test}-shot_{args.k_test}-way_acc', save_best_only=True, ), ReduceLROnPlateau( patience=40, factor=0.5, monitor=f'val_{args.n_test}-shot_{args.k_test}-way_acc'), CSVLogger(PATH + f'/logs/proto_nets/{param_str}.csv'), ] fit( model, optimiser, loss_fn, epochs=n_epochs, dataloader=train_dataset_taskloader, prepare_batch=prepare_nshot_task(args.n_train, args.k_train, args.q_train), callbacks=callbacks, metrics=['categorical_accuracy'], fit_function=proto_net_episode, fit_function_kwargs={ 'n_shot': args.n_train, 'k_way': args.k_train, 'q_queries': args.q_train, 'train': True, 'distance': args.distance }, )
def evaluate_few_shot(state_dict, n_shot, k_way, q_queries, device, architecture='resnet18', pretrained=False, small_dataset=False, metric_name=None, evaluation_episodes=1000, num_input_channels=3, distance='l2'): if not pretrained: model = get_few_shot_encoder(num_input_channels) model.load_state_dict(state_dict) else: # assert torch.cuda.is_available() model = models.__dict__[architecture](pretrained=True) model.fc = Identity() model.load_state_dict(state_dict) dataset_class = FashionProductImagesSmall if small_dataset \ else FashionProductImages # Meta-test set resize = (80, 60) if small_dataset else (400, 300) evaluation_transform = transforms.Compose([ transforms.Resize(resize), transforms.ToTensor(), ]) evaluation = FashionProductImagesSmall(DATA_PATH, split='all', classes='evaluation', transform=evaluation_transform) sampler = NShotTaskSampler(evaluation, evaluation_episodes, n_shot, k_way, q_queries) taskloader = DataLoader(evaluation, batch_sampler=sampler, num_workers=4) prepare_batch = prepare_nshot_task(n_shot, k_way, q_queries) if metric_name is None: metric_name = f'test_{n_shot}-shot_{k_way}-way_acc' seen = 0 totals = {'loss': 0, metric_name: 0} optimiser = torch.optim.Adam(model.parameters(), lr=1e-3) loss_fn = torch.nn.NLLLoss().to(device) for batch_index, batch in enumerate(taskloader): x, y = prepare_batch(batch) loss, y_pred = proto_net_episode(model, optimiser, loss_fn, x, y, n_shot=n_shot, k_way=k_way, q_queries=q_queries, train=False, distance=distance) seen += y_pred.shape[0] totals['loss'] += loss.item() * y_pred.shape[0] totals[metric_name] += categorical_accuracy(y, y_pred) * \ y_pred.shape[0] totals['loss'] = totals['loss'] / seen totals[metric_name] = totals[metric_name] / seen return totals