Пример #1
0
criterion = nn.CrossEntropyLoss()
normal_arc = best_reward_arc[0]
reduction_arc = best_reward_arc[1]

child = Child().to(device)
child_optimizer = optim.SGD(child.parameters(),
                            lr=0.05,
                            momentum=0.9,
                            weight_decay=1e-4,
                            nesterov=True)
cosine_lr_scheduler = cosine_annealing_scheduler(child_optimizer, lr=0.05)

best_acc = 0
for epoch in range(300):
    child.train()
    cosine_lr_scheduler.step()

    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs, aux_outs = child(inputs, normal_arc, reduction_arc)
        loss = criterion(outputs, targets)
        if args.use_auxiliary:
            loss += 0.4 * criterion(aux_outs, targets)

        child_optimizer.zero_grad()
        loss.backward()