def main(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) wandb.init(project='meta-analogy') # print(args.use_cuda) # print(f'CUDA IS AVAILABLE: {torch.cuda.is_available()}') # assert args.use_cuda # assert torch.cuda.is_available() logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO) device = torch.device('cuda' if args.use_cuda and torch.cuda.is_available() else 'cpu') if (args.output_folder is not None): if not os.path.exists(args.output_folder): os.makedirs(args.output_folder) logging.debug('Creating folder `{0}`'.format(args.output_folder)) folder = os.path.join(args.output_folder, time.strftime('%Y-%m-%d_%H%M%S')+args.model_type) os.makedirs(folder) logging.debug('Creating folder `{0}`'.format(folder)) # args.folder = os.path.abspath(args.folder) args.model_path = os.path.abspath(os.path.join(folder, 'model.th')) # Save the configuration in a config.json file with open(os.path.join(folder, 'config.json'), 'w') as f: json.dump(vars(args), f, indent=2) logging.info('Saving configuration file in `{0}`'.format( os.path.abspath(os.path.join(folder, 'config.json')))) dataset_transform = ClassSplitter(shuffle=True, num_train_per_class=args.num_shots, num_test_per_class=args.num_shots_test) # meta_train_dataset = Analogy(num_samples_per_task=args.batch_size, # dataset_transform=dataset_transform) # meta_val_dataset = Analogy(num_samples_per_task=args.batch_size, # dataset_transform=dataset_transform) meta_train_dataset = Analogy(dataset_transform=dataset_transform) meta_val_dataset = Analogy(dataset_transform=dataset_transform) meta_train_dataloader = BatchMetaDataLoader(meta_train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) meta_val_dataloader = BatchMetaDataLoader(meta_val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) if args.model_type == 'linear': model = MetaLinear(in_features=300, out_features=300) elif args.model_type == 'mlp1': model = MetaMLPModel(in_features=300, out_features=300, hidden_sizes=[500]) elif args.model_type == 'mlp2': model = MetaMLPModel(in_features=300, out_features=300, hidden_sizes=[500, 500]) else: raise ValueError('unrecognized model type') loss_function = nn.MSELoss() wandb.watch(model) meta_optimizer = torch.optim.Adam(model.parameters(), lr=args.meta_lr) metalearner = ModelAgnosticMetaLearning(model, meta_optimizer, first_order=args.first_order, num_adaptation_steps=args.num_steps, step_size=args.step_size, loss_function=loss_function, device=device) best_value = None # Training loop epoch_desc = 'Epoch {{0: <{0}d}}'.format(1 + int(math.log10(args.num_epochs))) for epoch in range(args.num_epochs): metalearner.train(meta_train_dataloader, max_batches=args.num_batches, verbose=args.verbose, desc='Training', leave=False) results = metalearner.evaluate(meta_val_dataloader, max_batches=args.num_batches, verbose=args.verbose, desc=epoch_desc.format(epoch + 1)) wandb.log({'results' : results}) # Save best model if 'accuracies_after' in results: if (best_value is None) or (best_value < results['accuracies_after']): best_value = results['accuracies_after'] save_model = True elif (best_value is None) or (best_value > results['mean_outer_loss']): best_value = results['mean_outer_loss'] save_model = True else: save_model = False if save_model and (args.output_folder is not None): with open(args.model_path, 'wb') as f: torch.save(model.state_dict(), f) torch.save(model.state_dict(), os.path.join(wandb.run.dir, 'model.pt')) if hasattr(meta_train_dataset, 'close'): meta_train_dataset.close() meta_val_dataset.close()
def __init__(self, input_shape, Nhid=[1], Mhid=[128], out_channels=1, kernel_size=[7], stride=[1], pool_size=[2], alpha=[.9], beta=[.85], alpharp=[.65], dropout=[0.5], num_conv_layers=2, num_mlp_layers=1, deltat=1000, lc_ampl=.5, lif_layer_type = LIFLayer, method='rtrl', with_output_layer = True): self.with_output_layer = with_output_layer if with_output_layer: Mhid += [out_channels] num_mlp_layers += 1 self.num_layers = num_layers = num_conv_layers + num_mlp_layers # If only one value provided, then it is duplicated for each layer if len(kernel_size) == 1: kernel_size = kernel_size * num_conv_layers if stride is None: stride=[1] if len(stride) == 1: stride = stride * num_conv_layers if pool_size is None: pool_size = [1] if len(pool_size) == 1: pool_size = pool_size * num_conv_layers if len(alpha) == 1: alpha = alpha * num_layers if len(alpharp) == 1: alpharp = alpharp * num_layers if len(beta) == 1: beta = beta * num_layers if len(dropout) == 1: self.dropout = dropout = dropout * num_layers if Nhid is None: self.Nhid = Nhid = [] if Mhid is None: self.Mhid = Mhid = [] super(MetaLenetDECOLLE, self).__init__() # Computing padding to preserve feature size padding = (np.array(kernel_size) - 1) // 2 # TODO try to remove padding # THe following lists need to be nn.ModuleList in order for pytorch to properly load and save the state_dict self.pool_layers = nn.ModuleList() self.dropout_layers = nn.ModuleList() self.input_shape = input_shape Nhid = [input_shape[0]] + Nhid self.num_conv_layers = num_conv_layers self.num_mlp_layers = num_mlp_layers feature_height = self.input_shape[1] feature_width = self.input_shape[2] for i in range(self.num_conv_layers): feature_height, feature_width = get_output_shape( [feature_height, feature_width], kernel_size = kernel_size[i], stride = stride[i], padding = padding[i], dilation = 1) feature_height //= pool_size[i] feature_width //= pool_size[i] base_layer = MetaConv2d(Nhid[i], Nhid[i + 1], kernel_size[i], stride[i], padding[i]) layer = lif_layer_type(base_layer, alpha=alpha[i], beta=beta[i], alpharp=alpharp[i], deltat=deltat, do_detach= True if method == 'rtrl' else False) pool = nn.MaxPool2d(kernel_size=pool_size[i]) readout = MetaLinear(int(feature_height * feature_width * Nhid[i + 1]), out_channels) # Readout layer has random fixed weights for param in readout.parameters(): param.requires_grad = False self.reset_lc_parameters(readout, lc_ampl) dropout_layer = nn.Dropout(dropout[i]) self.LIF_layers.append(layer) self.pool_layers.append(pool) self.readout_layers.append(readout) self.dropout_layers.append(dropout_layer) mlp_in = int(feature_height * feature_width * Nhid[-1]) Mhid = [mlp_in] + Mhid for i in range(num_mlp_layers): base_layer = MetaLinear(Mhid[i], Mhid[i+1]) layer = lif_layer_type(base_layer, alpha=alpha[i], beta=beta[i], alpharp=alpharp[i], deltat=deltat, do_detach= True if method == 'rtrl' else False) if self.with_output_layer and i+1==num_mlp_layers: readout = nn.Identity() dropout_layer = nn.Identity() else: readout = MetaLinear(Mhid[i+1], out_channels) # Readout layer has random fixed weights for param in readout.parameters(): param.requires_grad = False self.reset_lc_parameters(readout, lc_ampl) dropout_layer = nn.Dropout(dropout[self.num_conv_layers+i]) self.LIF_layers.append(layer) self.pool_layers.append(nn.Sequential()) self.readout_layers.append(readout) self.dropout_layers.append(dropout_layer)