예제 #1
0
def train(opt, loader, model, criterion, optimizer, epoch, logger):
        model.train()
        losses, data_time, batch_time = AverageMeter(), AverageMeter(), AverageMeter()
        start = time.time()

        for inputs, labels in loader:
            # Tweak inputs
            inputs, labels = inputs.half().cuda(non_blocking=True), (labels).cuda(non_blocking=True)
            do_cutmix = opt.regularization == 'cutmix' and np.random.rand(1) < opt.cutmix_prob
            if do_cutmix: inputs, labels_a, labels_b, lam = cutmix_data(x=inputs, y=labels, alpha=opt.cutmix_alpha)
            data_time.update(time.time() - start)
            
            # Forward, backward passes then step
            outputs = model(inputs)
            loss = lam * criterion(outputs, labels_a) + (1 - lam) * criterion(outputs, labels_b) if do_cutmix else criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.clip) # Always be safe than sorry
            optimizer.step()

            # Log losses
            losses.update(loss.data.item(), labels.size(0))
            batch_time.update(time.time() - start)
            start = time.time()

        logger.info('==> Train:[{0}]\tTime:{batch_time.sum:.4f}\tData:{data_time.sum:.4f}\tLoss:{loss.avg:.4f}\t'
            .format(epoch, batch_time=batch_time, data_time=data_time, loss=losses))
        return model, optimizer
예제 #2
0
def train(cfg, train_loader, model, criterion, kd_criterion, optimizer,
          scheduler, epoch):
    """
    Helper function to train.
    """
    losses = AverageMeter()
    model.train()
    tbar = tqdm(train_loader)

    for i, (image, target) in enumerate(tbar):
        image = image.cuda()
        target = target.cuda()
        bsize, seq_len, c, h, w = image.size()
        # image = image.view(bsize * seq_len, c, h, w)
        # target = target.view(-1, target.size(-1))

        data_aug = cfg["CUTMIX"] or cfg["MIXUP"]
        if np.random.uniform() < cfg["P_AUGMENT"] and data_aug:
            #     if cfg["CUTMIX"]:
            #         mixed_x, y_a, y_b, lam = cutmix_data(image, target)
            #     elif cfg["MIXUP"]:
            #         mixed_x, y_a, y_b, lam = mixup_data(image, target)
            mixed_x = []
            y_a = []
            y_b = []
            lam = []
            for st_image, st_target in zip(image, target):
                mixed_st_image, st_y_a, st_y_b, st_lam = cutmix_data(
                    st_image, st_target)
                mixed_x.append(mixed_st_image)
                y_a.append(st_y_a)
                y_b.append(st_y_b)
                lam.append(torch.FloatTensor([st_lam] * seq_len))
            mixed_x = torch.stack(mixed_x)
            y_a = torch.stack(y_a)
            y_b = torch.stack(y_b)
            lam = torch.cat(lam, 0).unsqueeze(1).cuda()
            mixed_x = mixed_x.view(bsize * seq_len, c, h, w)
            y_a = y_a.view(-1, target.size(-1))
            y_b = y_b.view(-1, target.size(-1))

            output, aux_output0, aux_output1 = model(mixed_x, seq_len)
            main_loss = mixup_criterion(criterion, output, y_a, y_b, lam)
            if cfg["USE_KD"]:
                aux_loss = cfg["ALPHA"] * (
                    mixup_criterion(criterion, aux_output0, y_a, y_b, lam) +
                    mixup_criterion(criterion, aux_output1, y_a, y_b, lam)
                ) + (1. - cfg["ALPHA"]) * (kd_criterion(aux_output0, output) +
                                           kd_criterion(aux_output1, output))
            else:
                aux_loss = mixup_criterion(
                    criterion, aux_output0, y_a, y_b, lam) + mixup_criterion(
                        criterion, aux_output1, y_a, y_b, lam)
        else:
            image = image.view(bsize * seq_len, c, h, w)
            target = target.view(-1, target.size(-1))
            output, aux_output0, aux_output1 = model(image, seq_len)
            main_loss = criterion(output, target)
            if cfg["USE_KD"]:
                aux_loss = cfg["ALPHA"] * (
                    criterion(aux_output0, target) +
                    criterion(aux_output1, target)) + (1. - cfg["ALPHA"]) * (
                        kd_criterion(aux_output0, output) +
                        kd_criterion(aux_output1, output))
            else:
                aux_loss = criterion(aux_output0, target) + criterion(
                    aux_output1, target)
        loss = main_loss + cfg["AUX_W"] * aux_loss
        loss = loss.mean()

        # gradient accumulation
        loss = loss / cfg['GD_STEPS']
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        if (i + 1) % cfg['GD_STEPS'] == 0:
            scheduler(optimizer, i, epoch)
            optimizer.step()
            optimizer.zero_grad()

        # record loss
        losses.update(loss.item() * cfg['GD_STEPS'], image.size(0))
        tbar.set_description("Train loss: %.5f, learning rate: %.6f" %
                             (losses.avg, optimizer.param_groups[-1]['lr']))
def train_model(model,criterion, optimizer):

    train_dataset = rubbishDataset(opt.train_val_data, opt.train_list, phase='train', input_size=opt.input_size)
    # train_dataset = w_rubbishDataset(opt.train_val_data, opt.train_list, phase='train', input_size=opt.input_size)
    trainloader = DataLoader(train_dataset,
                             batch_size=opt.train_batch_size,
                             shuffle=True,
                             num_workers=opt.num_workers)

    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=False)
    # scheduler=torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_mult=2,T_0=3)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=8, gamma=0.1)

    total_iters=len(trainloader)
    model_name=opt.backbone
    train_loss = []
    since = time.time()
    best_score = 0.0
    best_epoch = 0
    #
    for epoch in range(1,opt.max_epoch+1):
        model.train(True)
        begin_time=time.time()
        running_corrects_linear = 0
        count=0
        for i, data in enumerate(trainloader):
            count+=1
            inputs, labels = data
            labels = labels.type(torch.LongTensor)
            inputs, labels = inputs.cuda(), labels.cuda()

            if np.random.rand(1)<opt.cut_prob:

                inputs, targets_a, targets_b, lam = cutmix_data(inputs, labels, 1.0, use_cuda=True)
            # print(epoch)

                #
                out_linear= model(inputs)
                _, linear_preds = torch.max(out_linear.data, 1)

                loss = criterion(out_linear, targets_a) * lam + criterion(out_linear, targets_b) * (1. - lam)
            else:
                out_linear = model(inputs)
                _, linear_preds = torch.max(out_linear.data, 1)
                loss = criterion(out_linear, labels)

            # loss = criterion(out_linear, labels)
            #
            optimizer.zero_grad()
            with amp.scale_loss(loss,optimizer) as scaled_loss:
                scaled_loss.backward()
            # loss.backward()

            optimizer.step()

            if i % opt.print_interval == 0 or out_linear.size()[0] < opt.train_batch_size:
                spend_time = time.time() - begin_time
                print(
                    ' Epoch:{}({}/{}) loss:{:.3f} lr:{:.7f} epoch_Time:{}min:'.format(
                        epoch, count, total_iters,
                        loss.item(), optimizer.param_groups[-1]['lr'],
                        spend_time / count * total_iters // 60 - spend_time // 60))
                train_loss.append(loss.item())
            running_corrects_linear += torch.sum(linear_preds == labels.data)
            #
        weight_score,val_loss = val_model(model, criterion)
        scheduler.step()
        # scheduler.step(val_loss)

        epoch_acc_linear = running_corrects_linear.double() / total_iters / opt.train_batch_size
        print('Epoch:[{}/{}] train_acc={:.4f} '.format(epoch, opt.max_epoch,
                                                       epoch_acc_linear))
        # with open()
        with open(os.path.join(model_save_dir, 'log.txt'), 'a+')as f:
            f.write('epoch:{}, loss:{:.4f}, acc:{:.4f}\n'.format(epoch, val_loss, weight_score))
        #
        model_out_path = model_save_dir + "/" + '{}_'.format(model_name) + str(epoch) +'_'+str(weight_score)[:6]+ '.pth'
        best_model_out_path = model_save_dir + "/" + '{}_'.format(model_name) + 'best' + '{:.4f}'.format(weight_score)+ '.pth'
        #save the best model
        if weight_score > best_score:
            best_score = weight_score
            best_epoch=epoch
            torch.save(model.state_dict(), best_model_out_path)
            print("best epoch: {} best acc: {}".format(best_epoch,weight_score))
        #save based on epoch interval
        if epoch % opt.save_interval == 0 and epoch>opt.min_save_epoch:
            torch.save(model.state_dict(), model_out_path)

    #
    print('Best acc: {:.3f} Best epoch:{}'.format(best_score,best_epoch))
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
예제 #4
0
    def forward(self, x,target=None, cutmix_hidden = False,  cutmix_beta = 0.1, layer_mix=None ):
        if self.dataset == 'cifar10' or self.dataset == 'cifar100':
            if cutmix_hidden == True:
                if layer_mix == None:
                    layer_mix = random.randint(0,3)

                out=x
                if layer_mix == 0:
                    #out = lam * out + (1 - lam) * out[index,:]
                    out, y_a, y_b, lam = cutmix_data(out, target, cutmix_beta)

                out= self.conv1(out)
                out = self.bn1(out)
                out = self.relu(out)
                out = self.layer1(out)

                if layer_mix == 1:
                    #out = lam * out + (1 - lam) * out[index,:]
                    out, y_a, y_b, lam = cutmix_data(out, target, cutmix_beta)

                out = self.layer2(out)

                if layer_mix == 2:
                    #out = lam * out + (1 - lam) * out[index,:]
                    out, y_a, y_b, lam = cutmix_data(out, target, cutmix_beta)

                out = self.layer3(out)

                if layer_mix == 3:
                    #out = lam * out + (1 - lam) * out[index,:]
                    out, y_a, y_b, lam = cutmix_data(out, target, cutmix_beta)

                out = self.avgpool(out)
                out = out.view(out.size(0), -1)
                out = self.fc(out)

                # if layer_mix == 4:
                #     #out = lam * out + (1 - lam) * out[index,:]
                #     out, y_a, y_b, lam = cutmix_data(out, target, cutmix_beta)
                #
                # print(lam)
                #
                # lam = torch.tensor(lam).cuda()
                # lam = lam.repeat(y_a.size())

                # print(lam)

                # print(layer_mix)
                return out, y_a, y_b, lam

            else:
                x = self.conv1(x)
                x = self.bn1(x)
                x = self.relu(x)
                x = self.layer1(x)
                x = self.layer2(x)
                x = self.layer3(x)
                x = self.avgpool(x)
                x = x.view(x.size(0), -1)
                x = self.fc(x)

                return x

        elif self.dataset == 'imagenet':
            x = self.conv1(x)
            x = self.bn1(x)
            x = self.relu(x)
            x = self.maxpool(x)

            x = self.layer1(x)
            x = self.layer2(x)
            x = self.layer3(x)
            x = self.layer4(x)

            x = self.avgpool(x)
            x = x.view(x.size(0), -1)
            x = self.fc(x)

        return x