def train(epoch): epoch_loss = 0 for iteration, batch in enumerate(training_data_loader, 1): LR, HR_2_target, HR_4_target = batch[0].to(device), batch[1].to(device), batch[2].to(device) optimizer.zero_grad() HR_2, HR_4 = model(LR) loss1 = Loss(HR_2, HR_2_target) loss2 = Loss(HR_4, HR_4_target) loss = loss1 + loss2 epoch_loss += loss.item() loss1.backward(retain_graph = True) loss.backward() optimizer.step() print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epoch_loss / len(training_data_loader))) results['Avg. Loss'].append(float('%.4f'%(epoch_loss / len(training_data_loader))))
def backprop(y, cache): derivative_w = [] derivative_b = [] loss = Loss(cache[-1], y) loss_value = loss.forward() # print(loss_value) dA = loss.backward() for index,layer in reversed(list(enumerate(layers))[1:]): derivative_w.append(np.dot(cache[index].T,dA)) derivative_b.append(np.sum(dA, axis=0, keepdims=True)) dZ = np.dot(dA, layer.w.T) dA = dZ * layer.backward(cache[index]) derivative_w.append(np.dot(cache[0].T, dA)) derivative_b.append(np.sum(dA, axis=0)) derivative_w = derivative_w[::-1] derivative_b = derivative_b[::-1] return derivative_w, derivative_b, loss_value
#print(lossFunc.forward(activation2.output, y)) loss = lossFunc.forward(activation2.output, y) # Calculate accuracy from output of activation2 and targets predictions = np.argmax(activation2.output, axis=1) # calculate values along first axis accuracy = np.mean(predictions==y) #------------------------------------------------------------------------------------------------------- #back propagation lossFunc.backward(activation2.output, y) activation2.backward(lossFunc.dvalues) layer2.backward(activation2.dvalues) activation1.backward(layer2.dvalues) layer1.backward(activation1.dvalues) if not epoch % 100: print(f'epoch: {epoch}, acc: {accuracy:.3f}, loss: {loss:.3f}, lr: {optimizer.currentLearningRate}') optimizer.preUpdateParameters() optimizer.updateParameters(layer1) optimizer.updateParameters(layer2) optimizer.postUpdateParameters() #Create test data X_test, y_test = create_data(100, 3)
def train_advent(model, trainloader, targetloader, cfg): ''' UDA training with advent ''' # Create the model and start the training. input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET device = cfg.GPU_ID num_classes = cfg.NUM_CLASSES ###########################TODO viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) if viz_tensorboard: writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) # # SEGMNETATION NETWORK # model.train() # model.to(device) # cudnn.benchmark = True # cudnn.enabled = True model.train() model.to(device) model.apply(weights_init) cudnn.benchmark = True cudnn.enabled = True # DISCRIMINATOR NETWORK # feature-level d_aux = get_fc_discriminator(num_classes=num_classes) d_aux.train() d_aux.to(device) # seg maps, i.e. output, level d_main = get_fc_discriminator(num_classes=num_classes) d_main.train() d_main.to(device) # OPTIMIZERS # segnet's optimizer # optimizer = optim.SGD(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), # lr=cfg.TRAIN.LEARNING_RATE, # momentum=cfg.TRAIN.MOMENTUM, # weight_decay=cfg.TRAIN.WEIGHT_DECAY) optimizer = Adam(model.parameters(), config.lr) # discriminators' optimizers optimizer_d_aux = optim.Adam(d_aux.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, betas=(0.9, 0.99)) optimizer_d_main = optim.Adam(d_main.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, betas=(0.9, 0.99)) # interpolate output segmaps interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', align_corners=True) interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', align_corners=True) # labels for adversarial training source_label = 0 target_label = 1 trainloader_iter = enumerate(trainloader) targetloader_iter = enumerate(targetloader) for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP)): # reset optimizers optimizer.zero_grad() optimizer_d_aux.zero_grad() optimizer_d_main.zero_grad() # adapt LR if needed adjust_learning_rate(optimizer, i_iter, cfg) adjust_learning_rate_discriminator(optimizer_d_aux, i_iter, cfg) adjust_learning_rate_discriminator(optimizer_d_main, i_iter, cfg) # UDA Training # only train segnet. Don't accumulate grads in disciminators for param in d_aux.parameters(): param.requires_grad = False for param in d_main.parameters(): param.requires_grad = False # train on source _, batch = trainloader_iter.__next__() x, y = batch # x, y = x.to(self.device), y.to(self.device) y_pred = model(x.cuda(device)) loss = Loss().to(torch.device('cuda:0')) loss = loss(y_pred, y) loss.backward() # images_source, labels, _, _ = batch # pred_src_aux, pred_src_main = model(images_source.cuda(device)) # if cfg.TRAIN.MULTI_LEVEL: # pred_src_aux = interp(pred_src_aux) # loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) # else: # loss_seg_src_aux = 0 # pred_src_main = interp(pred_src_main) # loss_seg_src_main = loss_calc(pred_src_main, labels, device) # loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main # + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) # loss.backward() # adversarial training ot fool the discriminator _, batch = targetloader_iter.__next__() images, _, _, _ = batch pred_trg_aux, pred_trg_main = model(images.cuda(device)) if cfg.TRAIN.MULTI_LEVEL: pred_trg_aux = interp_target(pred_trg_aux) d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) loss_adv_trg_aux = bce_loss(d_out_aux, source_label) else: loss_adv_trg_aux = 0 pred_trg_main = interp_target(pred_trg_main) d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) loss_adv_trg_main = bce_loss(d_out_main, source_label) loss = (cfg.TRAIN.LAMBDA_ADV_MAIN * loss_adv_trg_main + cfg.TRAIN.LAMBDA_ADV_AUX * loss_adv_trg_aux) loss = loss loss.backward() # Train discriminator networks # enable training mode on discriminator networks for param in d_aux.parameters(): param.requires_grad = True for param in d_main.parameters(): param.requires_grad = True # train with source if cfg.TRAIN.MULTI_LEVEL: pred_src_aux = pred_src_aux.detach() d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_src_aux))) loss_d_aux = bce_loss(d_out_aux, source_label) loss_d_aux = loss_d_aux / 2 loss_d_aux.backward() pred_src_main = pred_src_main.detach() d_out_main = d_main(prob_2_entropy(F.softmax(pred_src_main))) loss_d_main = bce_loss(d_out_main, source_label) loss_d_main = loss_d_main / 2 loss_d_main.backward() # train with target if cfg.TRAIN.MULTI_LEVEL: pred_trg_aux = pred_trg_aux.detach() d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) loss_d_aux = bce_loss(d_out_aux, target_label) loss_d_aux = loss_d_aux / 2 loss_d_aux.backward() else: loss_d_aux = 0 pred_trg_main = pred_trg_main.detach() d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) loss_d_main = bce_loss(d_out_main, target_label) loss_d_main = loss_d_main / 2 loss_d_main.backward() optimizer.step() if cfg.TRAIN.MULTI_LEVEL: optimizer_d_aux.step() optimizer_d_main.step() current_losses = { 'loss_seg_src_aux': loss_seg_src_aux, 'loss_seg_src_main': loss_seg_src_main, 'loss_adv_trg_aux': loss_adv_trg_aux, 'loss_adv_trg_main': loss_adv_trg_main, 'loss_d_aux': loss_d_aux, 'loss_d_main': loss_d_main } print_losses(current_losses, i_iter) if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0: print('taking snapshot ...') print('exp =', cfg.TRAIN.SNAPSHOT_DIR) snapshot_dir = Path(cfg.TRAIN.SNAPSHOT_DIR) torch.save(model.state_dict(), snapshot_dir / f'model_{i_iter}.pth') torch.save(d_aux.state_dict(), snapshot_dir / f'model_{i_iter}_D_aux.pth') torch.save(d_main.state_dict(), snapshot_dir / f'model_{i_iter}_D_main.pth') if i_iter >= cfg.TRAIN.EARLY_STOP - 1: break sys.stdout.flush() # Visualize with tensorboard if viz_tensorboard: log_losses_tensorboard(writer, current_losses, i_iter) if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S')