def train(opt, loader, model, criterion, optimizer, epoch, logger): model.train() losses, data_time, batch_time = AverageMeter(), AverageMeter(), AverageMeter() start = time.time() for inputs, labels in loader: # Tweak inputs inputs, labels = inputs.half().cuda(non_blocking=True), (labels).cuda(non_blocking=True) do_cutmix = opt.regularization == 'cutmix' and np.random.rand(1) < opt.cutmix_prob if do_cutmix: inputs, labels_a, labels_b, lam = cutmix_data(x=inputs, y=labels, alpha=opt.cutmix_alpha) data_time.update(time.time() - start) # Forward, backward passes then step outputs = model(inputs) loss = lam * criterion(outputs, labels_a) + (1 - lam) * criterion(outputs, labels_b) if do_cutmix else criterion(outputs, labels) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), opt.clip) # Always be safe than sorry optimizer.step() # Log losses losses.update(loss.data.item(), labels.size(0)) batch_time.update(time.time() - start) start = time.time() logger.info('==> Train:[{0}]\tTime:{batch_time.sum:.4f}\tData:{data_time.sum:.4f}\tLoss:{loss.avg:.4f}\t' .format(epoch, batch_time=batch_time, data_time=data_time, loss=losses)) return model, optimizer
def train(cfg, train_loader, model, criterion, kd_criterion, optimizer, scheduler, epoch): """ Helper function to train. """ losses = AverageMeter() model.train() tbar = tqdm(train_loader) for i, (image, target) in enumerate(tbar): image = image.cuda() target = target.cuda() bsize, seq_len, c, h, w = image.size() # image = image.view(bsize * seq_len, c, h, w) # target = target.view(-1, target.size(-1)) data_aug = cfg["CUTMIX"] or cfg["MIXUP"] if np.random.uniform() < cfg["P_AUGMENT"] and data_aug: # if cfg["CUTMIX"]: # mixed_x, y_a, y_b, lam = cutmix_data(image, target) # elif cfg["MIXUP"]: # mixed_x, y_a, y_b, lam = mixup_data(image, target) mixed_x = [] y_a = [] y_b = [] lam = [] for st_image, st_target in zip(image, target): mixed_st_image, st_y_a, st_y_b, st_lam = cutmix_data( st_image, st_target) mixed_x.append(mixed_st_image) y_a.append(st_y_a) y_b.append(st_y_b) lam.append(torch.FloatTensor([st_lam] * seq_len)) mixed_x = torch.stack(mixed_x) y_a = torch.stack(y_a) y_b = torch.stack(y_b) lam = torch.cat(lam, 0).unsqueeze(1).cuda() mixed_x = mixed_x.view(bsize * seq_len, c, h, w) y_a = y_a.view(-1, target.size(-1)) y_b = y_b.view(-1, target.size(-1)) output, aux_output0, aux_output1 = model(mixed_x, seq_len) main_loss = mixup_criterion(criterion, output, y_a, y_b, lam) if cfg["USE_KD"]: aux_loss = cfg["ALPHA"] * ( mixup_criterion(criterion, aux_output0, y_a, y_b, lam) + mixup_criterion(criterion, aux_output1, y_a, y_b, lam) ) + (1. - cfg["ALPHA"]) * (kd_criterion(aux_output0, output) + kd_criterion(aux_output1, output)) else: aux_loss = mixup_criterion( criterion, aux_output0, y_a, y_b, lam) + mixup_criterion( criterion, aux_output1, y_a, y_b, lam) else: image = image.view(bsize * seq_len, c, h, w) target = target.view(-1, target.size(-1)) output, aux_output0, aux_output1 = model(image, seq_len) main_loss = criterion(output, target) if cfg["USE_KD"]: aux_loss = cfg["ALPHA"] * ( criterion(aux_output0, target) + criterion(aux_output1, target)) + (1. - cfg["ALPHA"]) * ( kd_criterion(aux_output0, output) + kd_criterion(aux_output1, output)) else: aux_loss = criterion(aux_output0, target) + criterion( aux_output1, target) loss = main_loss + cfg["AUX_W"] * aux_loss loss = loss.mean() # gradient accumulation loss = loss / cfg['GD_STEPS'] with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() if (i + 1) % cfg['GD_STEPS'] == 0: scheduler(optimizer, i, epoch) optimizer.step() optimizer.zero_grad() # record loss losses.update(loss.item() * cfg['GD_STEPS'], image.size(0)) tbar.set_description("Train loss: %.5f, learning rate: %.6f" % (losses.avg, optimizer.param_groups[-1]['lr']))
def train_model(model,criterion, optimizer): train_dataset = rubbishDataset(opt.train_val_data, opt.train_list, phase='train', input_size=opt.input_size) # train_dataset = w_rubbishDataset(opt.train_val_data, opt.train_list, phase='train', input_size=opt.input_size) trainloader = DataLoader(train_dataset, batch_size=opt.train_batch_size, shuffle=True, num_workers=opt.num_workers) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=False) # scheduler=torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_mult=2,T_0=3) scheduler = lr_scheduler.StepLR(optimizer, step_size=8, gamma=0.1) total_iters=len(trainloader) model_name=opt.backbone train_loss = [] since = time.time() best_score = 0.0 best_epoch = 0 # for epoch in range(1,opt.max_epoch+1): model.train(True) begin_time=time.time() running_corrects_linear = 0 count=0 for i, data in enumerate(trainloader): count+=1 inputs, labels = data labels = labels.type(torch.LongTensor) inputs, labels = inputs.cuda(), labels.cuda() if np.random.rand(1)<opt.cut_prob: inputs, targets_a, targets_b, lam = cutmix_data(inputs, labels, 1.0, use_cuda=True) # print(epoch) # out_linear= model(inputs) _, linear_preds = torch.max(out_linear.data, 1) loss = criterion(out_linear, targets_a) * lam + criterion(out_linear, targets_b) * (1. - lam) else: out_linear = model(inputs) _, linear_preds = torch.max(out_linear.data, 1) loss = criterion(out_linear, labels) # loss = criterion(out_linear, labels) # optimizer.zero_grad() with amp.scale_loss(loss,optimizer) as scaled_loss: scaled_loss.backward() # loss.backward() optimizer.step() if i % opt.print_interval == 0 or out_linear.size()[0] < opt.train_batch_size: spend_time = time.time() - begin_time print( ' Epoch:{}({}/{}) loss:{:.3f} lr:{:.7f} epoch_Time:{}min:'.format( epoch, count, total_iters, loss.item(), optimizer.param_groups[-1]['lr'], spend_time / count * total_iters // 60 - spend_time // 60)) train_loss.append(loss.item()) running_corrects_linear += torch.sum(linear_preds == labels.data) # weight_score,val_loss = val_model(model, criterion) scheduler.step() # scheduler.step(val_loss) epoch_acc_linear = running_corrects_linear.double() / total_iters / opt.train_batch_size print('Epoch:[{}/{}] train_acc={:.4f} '.format(epoch, opt.max_epoch, epoch_acc_linear)) # with open() with open(os.path.join(model_save_dir, 'log.txt'), 'a+')as f: f.write('epoch:{}, loss:{:.4f}, acc:{:.4f}\n'.format(epoch, val_loss, weight_score)) # model_out_path = model_save_dir + "/" + '{}_'.format(model_name) + str(epoch) +'_'+str(weight_score)[:6]+ '.pth' best_model_out_path = model_save_dir + "/" + '{}_'.format(model_name) + 'best' + '{:.4f}'.format(weight_score)+ '.pth' #save the best model if weight_score > best_score: best_score = weight_score best_epoch=epoch torch.save(model.state_dict(), best_model_out_path) print("best epoch: {} best acc: {}".format(best_epoch,weight_score)) #save based on epoch interval if epoch % opt.save_interval == 0 and epoch>opt.min_save_epoch: torch.save(model.state_dict(), model_out_path) # print('Best acc: {:.3f} Best epoch:{}'.format(best_score,best_epoch)) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
def forward(self, x,target=None, cutmix_hidden = False, cutmix_beta = 0.1, layer_mix=None ): if self.dataset == 'cifar10' or self.dataset == 'cifar100': if cutmix_hidden == True: if layer_mix == None: layer_mix = random.randint(0,3) out=x if layer_mix == 0: #out = lam * out + (1 - lam) * out[index,:] out, y_a, y_b, lam = cutmix_data(out, target, cutmix_beta) out= self.conv1(out) out = self.bn1(out) out = self.relu(out) out = self.layer1(out) if layer_mix == 1: #out = lam * out + (1 - lam) * out[index,:] out, y_a, y_b, lam = cutmix_data(out, target, cutmix_beta) out = self.layer2(out) if layer_mix == 2: #out = lam * out + (1 - lam) * out[index,:] out, y_a, y_b, lam = cutmix_data(out, target, cutmix_beta) out = self.layer3(out) if layer_mix == 3: #out = lam * out + (1 - lam) * out[index,:] out, y_a, y_b, lam = cutmix_data(out, target, cutmix_beta) out = self.avgpool(out) out = out.view(out.size(0), -1) out = self.fc(out) # if layer_mix == 4: # #out = lam * out + (1 - lam) * out[index,:] # out, y_a, y_b, lam = cutmix_data(out, target, cutmix_beta) # # print(lam) # # lam = torch.tensor(lam).cuda() # lam = lam.repeat(y_a.size()) # print(lam) # print(layer_mix) return out, y_a, y_b, lam else: x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) return x elif self.dataset == 'imagenet': x = self.conv1(x) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = self.avgpool(x) x = x.view(x.size(0), -1) x = self.fc(x) return x