def train(args, model, device, train_loader, test_loader, optimizer): for epoch in range(args.num_pre_epochs): print('Pre epoch: {}'.format(epoch + 1)) model.train() for batch_idx, (data, target) in enumerate(tqdm(train_loader)): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = regularized_nll_loss(args, model, output, target) loss.backward() optimizer.step() test(args, model, device, test_loader) Z, U = initialize_Z_and_U(model) for epoch in range(args.num_epochs): model.train() print('Epoch: {}'.format(epoch + 1)) for batch_idx, (data, target) in enumerate(tqdm(train_loader)): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = admm_loss(args, device, model, Z, U, output, target) loss.backward() optimizer.step() X = update_X(model) Z = update_Z_l1(X, U, args) if args.l1 else update_Z(X, U, args) U = update_U(U, X, Z) print_convergence(model, X, Z) test(args, model, device, test_loader)
def train(args, model, device, train_loader, test_loader, optimizer): train_start = time.time() Z, U = initialize_Z_and_U(model, device) for epoch in range(args.num_epochs): print('Epoch: {}'.format(epoch + 1)) model.train() epoch_start = time.time() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = admm_loss(args, device, model, Z, U, output, target) loss.backward() optimizer.step() epoch_end = time.time() print("train epoch time cost: {}".format(epoch_end - epoch_start)) admm_step_start = time.time() X = update_X(model, device) Z = update_Z_l1(X, U, args) if args.l1 else update_Z( X, U, args, device) U = update_U(U, X, Z) admm_step_end = time.time() print("admm step time cost: {}".format(admm_step_end - admm_step_start)) print_convergence(model, X, Z) test(args, model, device, test_loader) train_end = time.time() print("train total time cost: {}".format(train_end - train_start))
def train_one_epoch(model, criterion, admm_optimizer, data_loader, device, epoch, print_freq, layer_names, percent, pattern, Z, U, arg_rho, apex=False): # Plot([float(x) for x in list(Z[layer_names[-1]].flatten())], plot_type=2) model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) metric_logger.add_meter('img/s', utils.SmoothedValue(window_size=10, fmt='{value}')) header = 'Epoch: [{}]'.format(epoch) rho = arg_rho for image, target in metric_logger.log_every(data_loader, print_freq, header): start_time = time.time() image, target = image.to(device), target.to(device) output = model(image) loss = utils.admm_loss(device, model, layer_names, criterion, Z, U, output, target, rho) admm_optimizer.zero_grad() if apex: with amp.scale_loss(loss, admm_optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() admm_optimizer.step() acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) batch_size = image.shape[0] metric_logger.update(loss=loss.item(), lr=admm_optimizer.param_groups[0]["lr"]) metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) metric_logger.meters['img/s'].update(batch_size / (time.time() - start_time)) print("=" * 10, "Entering ADMM Optimization") X = utils.update_X(model, layer_names) Z, layer_pattern = utils.update_Z_Pattern(X, U, layer_names, pattern) U = utils.update_U(U, X, Z, layer_names) return Z, U
def train(args, model, device, train_loader, test_loader, optimizer): loss_iter = [] for epoch in range(args.num_pre_epochs): print('Pre epoch: {}'.format(epoch + 1)) model.train() for batch_idx, (data, target) in enumerate(tqdm(train_loader)): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = regularized_nll_loss(args, model, output, target) loss_iter.append(loss) loss.backward() optimizer.step() test(args, model, device, test_loader) Z, U = initialize_Z_and_U(model) #初始化 Z,U A = np.zeros((args.idx, args.num_epochs)) for epoch in range(args.num_epochs): model.train() print('Epoch: {}'.format(epoch + 1)) for batch_idx, (data, target) in enumerate(tqdm(train_loader)): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = admm_loss(args, device, model, Z, U, output, target) loss.backward() optimizer.step() X = update_X(model) #更新X #Z的更新根据正则项来选择 if (args.l1): Z = update_Z_l1(X, U, args) elif (args.l0): Z = update_Z_l0(X, U, args) elif (args.SCAD): Z = update_Z_SCAD(X, U, args) elif (args.rscad): print('use rscad updata z') Z = updata_Z_Prox_glarho(X, U, args) else: Z = update_Z(X, U, args) #根据稀疏项 选择跟新Z 方式 U = update_U(U, X, Z) if not args.test_lamda: a = print_convergence(model, X, Z) for i in range(args.idx): A[i, epoch] = a[i] test(args, model, device, test_loader) return A