criterion = nn.CrossEntropyLoss() normal_arc = best_reward_arc[0] reduction_arc = best_reward_arc[1] child = Child().to(device) child_optimizer = optim.SGD(child.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-4, nesterov=True) cosine_lr_scheduler = cosine_annealing_scheduler(child_optimizer, lr=0.05) best_acc = 0 for epoch in range(300): child.train() cosine_lr_scheduler.step() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(train_loader): inputs = inputs.to(device) targets = targets.to(device) outputs, aux_outs = child(inputs, normal_arc, reduction_arc) loss = criterion(outputs, targets) if args.use_auxiliary: loss += 0.4 * criterion(aux_outs, targets) child_optimizer.zero_grad() loss.backward()