Пример #1
0
 def f(inputs, params, stats, mode):
     if opt.gamma:
         y_s, y_t_auto, g_s = f_s(inputs, params, stats, mode,
                                  'student.')
         y_t, g_t = f_t(inputs, params, stats, False, 'teacher.')
         return y_s, y_t_auto, y_t, [
             at_loss(x, y) for x, y in zip(g_s, g_t)
         ]
     else:
         y_s, g_s = f_s(inputs, params, stats, mode, 'student.')
         y_t, g_t = f_t(inputs, params, stats, False, 'teacher.')
         return y_s, y_t, [at_loss(x, y) for x, y in zip(g_s, g_t)]
Пример #2
0
def train_student(net, teacher, optimizer, criterion, scheduler):
    net.train()
    pbar = tqdm(train_loader)
    for images, labels in pbar:
        images = Variable(images.to(device, dtype=torch.float32))
        labels = Variable(labels.to(device, dtype=torch.long))
        
        outputs_student, ints_student = net(images)
        outputs_teacher, ints_teacher = teacher(images)
        
        if opts.loss_type == 'both' or opts.loss_type == 'kd':
            loss = utils.distillation(outputs_student, outputs_teacher, labels, opts.temperature, opts.alpha)
        else:
            loss = criterion(outputs_student, labels)
        
        if opts.loss_type == 'both' or opts.loss_type == 'at' and opts.at_type != 'none':
         
             
            adjusted_beta = (opts.beta*3)/len(ints_student)    
            for i in range(len(ints_student)): 
                if ints_teacher[i].shape[2] != ints_student[i].shape[2]:
                    ints_teacher[i] = F.interpolate(ints_teacher[i], size=ints_student[i].shape[2:], mode='bilinear', align_corners=False)
                loss += adjusted_beta * utils.at_loss(ints_student[i], ints_teacher[i])
        
        preds = outputs_student.detach().max(dim=1)[1].cpu().numpy()
        targets = labels.cpu().numpy()
        metrics.update(targets, preds)
        score = metrics.get_results()
        pbar.set_postfix({"IoU": score["Mean IoU"]})
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
Пример #3
0
 def f(inputs, params, mode):
     y_s, g_s = f_s(inputs, params, mode, 'student.')
     with torch.no_grad():
         y_t, g_t = f_t(inputs, params, False, 'teacher.')
     return y_s, y_t, [
         utils.at_loss(x, y) for x, y in zip(g_s, g_t)
     ]
Пример #4
0
 def f(inputs, params, mode):
     #y_s:学生网络最后的输出
     #g_s:每个组输出的tuple
     y_s, g_s = f_s(inputs, params, mode, 'student.')
     with torch.no_grad():
         #y_t:教师网络最后的输出
         #g_t:教师网络每个组输出的tuple
         y_t, g_t = f_t(inputs, params, False, 'teacher.')
     #返回的是学生网络和教师网络最后的输出
     #第三部分是师生网络attention的attention map结果
     return y_s, y_t, [utils.at_loss(x, y) for x, y in zip(g_s, g_t)]
Пример #5
0
 def f(inputs, params, stats, mode):
     y_s, g_s = f_s(inputs, params, stats, mode, 'student.')
     y_t, g_t = f_t(inputs, params, 'teacher.')
     return y_s, y_t, [at_loss(x, y) for x, y in zip(g_s, g_t)]