def f(inputs, params, stats, mode): if opt.gamma: y_s, y_t_auto, g_s = f_s(inputs, params, stats, mode, 'student.') y_t, g_t = f_t(inputs, params, stats, False, 'teacher.') return y_s, y_t_auto, y_t, [ at_loss(x, y) for x, y in zip(g_s, g_t) ] else: y_s, g_s = f_s(inputs, params, stats, mode, 'student.') y_t, g_t = f_t(inputs, params, stats, False, 'teacher.') return y_s, y_t, [at_loss(x, y) for x, y in zip(g_s, g_t)]
def train_student(net, teacher, optimizer, criterion, scheduler): net.train() pbar = tqdm(train_loader) for images, labels in pbar: images = Variable(images.to(device, dtype=torch.float32)) labels = Variable(labels.to(device, dtype=torch.long)) outputs_student, ints_student = net(images) outputs_teacher, ints_teacher = teacher(images) if opts.loss_type == 'both' or opts.loss_type == 'kd': loss = utils.distillation(outputs_student, outputs_teacher, labels, opts.temperature, opts.alpha) else: loss = criterion(outputs_student, labels) if opts.loss_type == 'both' or opts.loss_type == 'at' and opts.at_type != 'none': adjusted_beta = (opts.beta*3)/len(ints_student) for i in range(len(ints_student)): if ints_teacher[i].shape[2] != ints_student[i].shape[2]: ints_teacher[i] = F.interpolate(ints_teacher[i], size=ints_student[i].shape[2:], mode='bilinear', align_corners=False) loss += adjusted_beta * utils.at_loss(ints_student[i], ints_teacher[i]) preds = outputs_student.detach().max(dim=1)[1].cpu().numpy() targets = labels.cpu().numpy() metrics.update(targets, preds) score = metrics.get_results() pbar.set_postfix({"IoU": score["Mean IoU"]}) optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step()
def f(inputs, params, mode): y_s, g_s = f_s(inputs, params, mode, 'student.') with torch.no_grad(): y_t, g_t = f_t(inputs, params, False, 'teacher.') return y_s, y_t, [ utils.at_loss(x, y) for x, y in zip(g_s, g_t) ]
def f(inputs, params, mode): #y_s:学生网络最后的输出 #g_s:每个组输出的tuple y_s, g_s = f_s(inputs, params, mode, 'student.') with torch.no_grad(): #y_t:教师网络最后的输出 #g_t:教师网络每个组输出的tuple y_t, g_t = f_t(inputs, params, False, 'teacher.') #返回的是学生网络和教师网络最后的输出 #第三部分是师生网络attention的attention map结果 return y_s, y_t, [utils.at_loss(x, y) for x, y in zip(g_s, g_t)]
def f(inputs, params, stats, mode): y_s, g_s = f_s(inputs, params, stats, mode, 'student.') y_t, g_t = f_t(inputs, params, 'teacher.') return y_s, y_t, [at_loss(x, y) for x, y in zip(g_s, g_t)]