def test_times_two_function(self): # Hyperparameters num_levels = 2 num_nodes_at_level = {0: 2, 1: 2} num_ops_at_level = {0: LEN_SIMPLE_OPS, 1: 1} num_epochs = 100 # Initialize tensorboard writer dt_string = datetime.now().strftime("%d-%m-%Y--%H-%M-%S") writer = SummaryWriter('test/test_double_func/' + str(dt_string) + "/") # Define model model = ModelController(num_levels=num_levels, num_nodes_at_level=num_nodes_at_level, num_ops_at_level=num_ops_at_level, primitives=SIMPLE_OPS, channels_in=1, channels_start=1, stem_multiplier=1, num_classes=1, loss_criterion=nn.L1Loss(), writer=writer, test_mode=True) # Input x = tensor([[ # feature 1 [[1.]] ]]) # Expected output y = tensor([[ # feature 1 [[2.]] ]]) # Alpha Optimizer - one for each level alpha_optim = [] for level in range(0, num_levels): alpha_optim.append( torch.optim.Adam(params=model.get_alpha_level(level), lr=0.1)) for _ in range(0, num_epochs): # Alpha Gradient Steps for each level for level in range(0, num_levels): alpha_optim[level].zero_grad() loss = model.loss_criterion(model(x), y) print(loss) loss.backward() alpha_optim[level].step()
def train_alpha(self, valid_loader, model: ModelController, alpha_optim, epoch, lr, temp=None): top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() cur_step = epoch * len(valid_loader) # Log LR self.writer.add_scalar('train/lr', lr, epoch) # Prepares the model for training - 'training mode' model.train() for step, (val_X, val_y) in enumerate(valid_loader): N = val_X.size(0) if torch.cuda.is_available(): val_X = val_X.cuda(non_blocking=True) val_y = val_y.cuda(non_blocking=True) # Alpha Gradient Steps for each level for level in range(len(alpha_optim)): alpha_optim[level].zero_grad() logits = model(val_X, temp=temp) loss = model.loss_criterion(logits, val_y) loss.backward() for level in range(len(alpha_optim)): alpha_optim[level].step() prec1, prec5 = accuracy(logits, val_y, topk=(1, 5)) losses.update(loss.item(), N) top1.update(prec1.item(), N) top5.update(prec5.item(), N) if step % config.PRINT_STEP_FREQUENCY == 0 or step == len( valid_loader) - 1: print( datetime.now(), "Alpha Train: [{:2d}/{}] Step {:03d}/{:03d} Loss {losses.avg:.3f} " "Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})".format( epoch + 1, config.EPOCHS, step, len(valid_loader) - 1, losses=losses, top1=top1, top5=top5)) self.writer.add_scalar('val/loss', losses.avg, cur_step) self.writer.add_scalar('val/top1', top1.avg, cur_step) self.writer.add_scalar('val/top5', top5.avg, cur_step) cur_step += 1 print( "Alpha Train (Uses Validation Loss): [{:2d}/{}] Final Prec@1 {:.4%}" .format(epoch + 1, config.EPOCHS, top1.avg)) return top1.avg
def train_weights(self, train_loader, model: ModelController, w_optim, epoch, lr, temp=None): top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() cur_step = epoch * len(train_loader) # Log LR self.writer.add_scalar('train/lr', lr, epoch) # Prepares the model for training - 'training mode' model.train() for step, (trn_X, trn_y) in enumerate(train_loader): N = trn_X.size(0) if torch.cuda.is_available(): trn_X = trn_X.cuda(non_blocking=True) trn_y = trn_y.cuda(non_blocking=True) # Weights Step w_optim.zero_grad() logits = model(trn_X, temp=temp) loss = model.loss_criterion(logits, trn_y) loss.backward() # gradient clipping nn.utils.clip_grad_norm_(model.get_weights(), config.WEIGHTS_GRADIENT_CLIP) w_optim.step() prec1, prec5 = accuracy(logits, trn_y, topk=(1, 5)) losses.update(loss.item(), N) top1.update(prec1.item(), N) top5.update(prec5.item(), N) if step % config.PRINT_STEP_FREQUENCY == 0 or step == len( train_loader) - 1: print( datetime.now(), "Weight Train: [{:2d}/{}] Step {:03d}/{:03d} Loss {losses.avg:.3f} " "Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})".format( epoch + 1, config.EPOCHS, step, len(train_loader) - 1, losses=losses, top1=top1, top5=top5)) self.writer.add_scalar('train/loss', loss.item(), cur_step) self.writer.add_scalar('train/top1', prec1.item(), cur_step) self.writer.add_scalar('train/top5', prec5.item(), cur_step) cur_step += 1 print("Weight Train: [{:2d}/{}] Final Prec@1 {:.4%}".format( epoch + 1, config.EPOCHS, top1.avg))