def train(net, ctx, batch_size=64, epochs=1, learning_rate=0.01, wd=0.001): train_data = gluon.data.DataLoader(train_imgs, batch_size, shuffle=True) test_data = gluon.data.DataLoader(test_imgs, batch_size) # 确保net的初始化在ctx上 net.collect_params().reset_ctx(ctx) net.hybridize() loss = gluon.loss.SoftmaxCrossEntropyLoss() # 训练 trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': learning_rate, 'wd': wd }) utils.train(train_data, test_data, net, loss, trainer, ctx, epochs)
def run_experiment(dataset, optimizer, optimizer_name, batch_size=100, num_epochs=10, data_dir='/home/app/datasets', seed=12345, use_gpu=True, create_graph=False, **kwargs): """ Runs a single training experiment, i.e one dataset and one optimizer Args: dataset: dataset name, one of ['MNIST', 'Fashion_MNIST', 'CIFAR'] optimizer: torch.optim.optimizer optimizer_name: name of the optimizer batch_size: batch size for training num_epochs: num epochs to train data_dir: directory containing the dataset, or where to store a downloaded dataset seed: seed to set for reproducibility use_gpu: whether to try using the gpu or not create_graph: used by the training function, when computing the gradient **kwargs: kwargs to pass to the utils.train function Returns: Dictionary of results """ torch.manual_seed(seed) logging.info("Dataset: {0}, Optimizer: {1}".format(dataset, optimizer_name)) train_loader, test_loader = utils.load_data(data_dir, batch_size=batch_size, dataset=dataset) training_losses, test_losses, training_accuracies, test_accuracies, model = \ utils.train(train_loader, test_loader, use_gpu=use_gpu, create_graph=create_graph, num_epochs=num_epochs, optimizer=optimizer, **kwargs) return {'training_losses': training_losses, 'test_losses': test_losses, 'training_accuracies': training_accuracies, 'test_accuracies': test_accuracies, 'model': model}
def get_correlations(net, model_name, n_epochs, evaluation_epochs, train_loader, test_loader, device): criterion = torch.nn.CrossEntropyLoss() optimiser = optim.SGD(net.parameters(), lr=0.001) trained_net = train(net, train_loader, criterion, optimiser, n_epochs, device, save=True, name=model_name, log_interval=1) test(trained_net, test_loader, criterion, device) trained_activations = get_activations( net, 'models/{}_{}.pth'.format(model_name, n_epochs - 1), train_loader, device) correlations = [] for epoch in evaluation_epochs: print(model_name, 'at epoch:', epoch) partial_activations = get_activations( net, 'models/{}_{}.pth'.format(model_name, epoch), train_loader, device) correlation = get_diag_correlation(trained_activations, partial_activations) correlations.append(np.mean(np.diag(correlation))) print("HERE") return correlations
def train_and_save_model(): trainset, testset = data.get_train_test_set() trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True) testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False) net = resnet.resnet(in_channel=3, num_classes=1) optimizer = torch.optim.Adam(net.parameters(), lr=LR) criterion = nn.BCELoss() utils.train( net, trainloader, testloader, 20, optimizer, criterion, debug=DEBUG, )
def main(ix): args = utils.initialize() tr_dl, vd_dl, ts_dl, e_ix_ln, r_ix_ln, t_ix_ln, tp_ix, tp_rix = utils.data( args) mdl, opt, lr_sc, ls_f, st_e, bst_ls = utils.prepare( args, e_ix_ln, r_ix_ln, t_ix_ln) tb_sw = SummaryWriter() if utils.is_master(args) else None if not args.test: ls_mtr = utils.BestMetric() if utils.is_master(args) else None for e in range(st_e, args.epochs + 1): utils.train(args, e, mdl, opt, ls_f, tr_dl, tb_sw) if e % args.validation_frequency == 0 or e == args.epochs: utils.validate(args, e, mdl, opt, ls_f, vd_dl, tp_ix, tp_rix, ls_mtr, tb_sw) lr_sc.step() else: utils.test(args, mdl, ts_dl, tp_ix, tp_rix, tb_sw) if utils.is_master(args): tb_sw.flush() tb_sw.close()
def pre_train_cnn(cnn, device, loaders, n_ep=200, val=False, wd=1e-4, lr=1e-3, save_model_path=None): print('Training CNN...') model = cnn(num_classes=230) if val: tr_loader = loaders['pre_train'] va_loader = loaders['pre_val'] else: tr_loader = loaders['all_train'] va_loader = None model = train(model, device, tr_loader, n_ep, wd, lr, va_loader=va_loader) if save_model_path is not None: torch.save(model.state_dict(), save_model_path) return model
def train_DEM(cnn, device, loaders, word_embeddings, n_ep=50, wd=1e-8, lr=1e-4, val=False, save_model_path=None): print('Training DEM...') model = DEM(cnn) word_embeddings = torch.from_numpy(word_embeddings).to(device) if val: tr_loader = loaders['train'] va_loader = loaders['val'] else: tr_loader = loaders['all_train'] va_loader = None model = train(model, device, tr_loader, n_ep, wd, lr, word_embeddings, va_loader=va_loader, grad_norm_clip=1) if save_model_path is not None: torch.save(model.state_dict(), save_model_path) return model
optim_kwargs = {'lr': lr, 'momentum': momentum} optimizer = optim.SGD(model.parameters(), **optim_kwargs) if scheduler_steps is not None and scheduler_gamma is not 0: scheduler = MultiStepLR(optimizer, milestones=scheduler_steps, gamma=scheduler_gamma) else: scheduler = None loss_fn = nn.CrossEntropyLoss() acc = test(model, device, test_loader, 0, loss_fn) for epoch in range(1, epochs + 1): logging.info("Training epoch {}/{}".format(epoch, epochs + 1)) if robust is False: train(model, device, train_loader, optimizer, epoch, loss_fn) else: train_regularized(model, device, train_loader, optimizer, epoch, loss_fn, lambda_const) if scheduler: scheduler.step(epoch) acc = test(model, device, test_loader, epoch, loss_fn) model_name = "pretrained_models/{}.pt".format(output_file) torch.save(model.state_dict(), model_name) # store arguments as mongodb object args_dict = args.__dict__ args_dict['model_path'] = os.path.abspath(model_name) args_dict['final_acc'] = acc
noise_level, model_dir=model_dir) else: print("starting from epoch {}".format(start_epoch)) net = recreate_model(model_to_load, dataset=dataset, act=act) # optimiser parameters optimiser = get_optimiser(net.parameters(), op, learning_rate, momentum) # training criterion criterion = torch.nn.CrossEntropyLoss() # train network train(net, train_loader, test_loader, criterion, optimiser, epochs, noise_type, noise_level, save=True, name=experiment_name, model_dir=model_dir, results_dir=experiment_results_directory, start_epoch=start_epoch) else: print("results are already present, skipping test.")