예제 #1
0
def train(train_loader, net, criterion, optimizer, epoch, device):
    global writer

    start = time.time()
    net.train()

    train_loss = 0
    correct = 0
    total = 0
    logger.info(" === Epoch: [{}/{}] === ".format(epoch + 1, config.epochs))

    for batch_index, (inputs, targets) in enumerate(train_loader):
        # move tensor to GPU
        inputs, targets = inputs.to(device), targets.to(device)
        if config.mixup:
            inputs, targets_a, targets_b, lam = mixup_data(
                inputs, targets, config.mixup_alpha, device)

            outputs = net(inputs)
            loss = mixup_criterion(
                criterion, outputs, targets_a, targets_b, lam)
        else:
            outputs = net(inputs)
            loss = criterion(outputs, targets)

        # zero the gradient buffers
        optimizer.zero_grad()
        # backward
        loss.backward()
        # update weight
        optimizer.step()

        # count the loss and acc
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        if config.mixup:
            correct += (lam * predicted.eq(targets_a).sum().item()
                        + (1 - lam) * predicted.eq(targets_b).sum().item())
        else:
            correct += predicted.eq(targets).sum().item()

        if (batch_index + 1) % 100 == 0:
            logger.info("   == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}".format(
                batch_index + 1, len(train_loader),
                train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer)))

    logger.info("   == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}".format(
        batch_index + 1, len(train_loader),
        train_loss / (batch_index + 1), 100.0 * correct / total, get_current_lr(optimizer)))

    end = time.time()
    logger.info("   == cost time: {:.4f}s".format(end - start))
    train_loss = train_loss / (batch_index + 1)
    train_acc = correct / total

    writer.add_scalar('train_loss', train_loss, global_step=epoch)
    writer.add_scalar('train_acc', train_acc, global_step=epoch)

    return train_loss, train_acc
예제 #2
0
def train(
    model,
    device,
    train_loader,
    optimizer,
    criterion,
    epoch,
    mixup=False,
    avg_meter=None,
):
    model.train()
    batch_loss = list()
    alpha = 0.2 if mixup else 0
    lam = None  # Required if doing mixup training

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        data, target_a, target_b, lam = mixup_data(
            data, target, device, alpha
        )  # Targets here correspond to the pair of examples used to create the mix
        optimizer.zero_grad()
        output = model(data)
        loss = mixup_criterion(criterion, output, target_a, target_b, lam)
        loss.backward()
        optimizer.step()
        batch_loss.append(loss.item())
        if avg_meter is not None:
            avg_meter.update(batch_loss[-1], n=len(data))

    return batch_loss
예제 #3
0
def train():
    params = split_weights(model) if opt.no_wd else model.parameters()
    optimizer = optim.SGD(params, lr=base_lr, momentum=0.9, nesterov=True, weight_decay=0.0001)

    Loss = nn.CrossEntropyLoss()
    metric_loss = mloss()
    alpha = 1. if mixup else 0.
    iterations = 0
    for epoch in range(epochs):
        model.train()
        metric_loss.reset()
        st_time = time.time()
        if mixup and epoch > epochs - 20:
            alpha = 0.
        for i, (trans, labels) in enumerate(train_data):
            trans, targets_a, targets_b, lam = mixup_data(trans.cuda(), labels.cuda(), alpha=alpha)
            trans, targets_a, targets_b = map(Variable, (trans, targets_a, targets_b))

            optimizer.zero_grad()
            outputs = model(trans)
            loss = mixup_criterion(Loss, outputs, targets_a, targets_b, lam)
            loss.backward()
            optimizer.step()

            metric_loss.update(loss)
            iterations += 1
            lr_scheduler.update(optimizer, iterations)
        learning_rate = lr_scheduler.get()
        met_name, metric = metric_loss.get()
        epoch_time = time.time() - st_time
        epoch_str = 'Epoch {}. Train {}: {:.5f}. {} samples/s. lr {:.5}'. \
            format(epoch, met_name, metric, int(num_train_samples // epoch_time), learning_rate)
        logger.info(epoch_str)
        test(epoch, True)
예제 #4
0
 def _training_step_mixup(self, imgs, targets, data_provider):
     imgs, targets_a, targets_b, lam = mixup_data_same_provider(
         imgs, targets, data_provider
     )
     logits = self.forward(imgs)
     loss = mixup_criterion(self.loss, logits, targets_a, targets_b, lam)
     return loss
예제 #5
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        # generate mixed inputs, two one-hot label vectors and mixing coefficient
        inputs, targets_a, targets_b, lam = mixup_data(inputs, targets,
                                                       args.alpha, use_cuda)
        optimizer.zero_grad()
        inputs, targets_a, targets_b = Variable(inputs), Variable(
            targets_a), Variable(targets_b)
        outputs = net(inputs)

        loss_func = mixup_criterion(targets_a, targets_b, lam)
        loss = loss_func(criterion, outputs)
        loss.backward()
        optimizer.step()

        train_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += lam * predicted.eq(targets_a.data).cpu().sum() + (
            1 - lam) * predicted.eq(targets_b.data).cpu().sum()

        progress_bar(
            batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))
    return (train_loss / batch_idx, 100. * correct / total)
예제 #6
0
    def training_step(self, batch, batch_idx):
        x, y, idx = batch
        x, y_a, y_b, lam = mixup_data(x, y)
        y_hat = self.forward(x)
        loss = mixup_criterion(self.crit, y_hat, y_a.float(), y_b.float(), lam)
        self.log('trn/_loss', loss)

        return loss
예제 #7
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    total_gnorm = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        # generate mixed inputs, two one-hot label vectors and mixing coefficient
        optimizer.zero_grad()

        if args.train_loss == 'mixup':
            inputs, targets_a, targets_b, lam = mixup_data(
                inputs, targets, args.alpha, use_cuda)
            outputs = net(inputs)

            loss_func = mixup_criterion(targets_a, targets_b, lam)
            loss = loss_func(criterion, outputs)
        else:
            outputs = net(inputs)
            loss = cel(outputs, targets)

        loss.backward()

        if args.train_clip > 0:
            gnorm = torch.nn.utils.clip_grad_norm_(net.parameters(),
                                                   args.train_clip)
        else:
            gnorm = -1
        total_gnorm += gnorm

        optimizer.step()
        sgdr.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        _, predicted = torch.max(outputs.data, 1)
        correct += predicted.eq(targets.data).cpu().sum()
        acc = 100. * float(correct) / float(total)

        if batch_idx % 50 == 0 or batch_idx == len(trainloader) - 1:
            wnorms = [
                w.norm().item() for n, w in net.named_parameters()
                if 'weight' in n
            ]
            print(
                batch_idx, len(trainloader),
                'Loss: %.3f | Acc: %.3f%% (%d/%d) | WNorm: %.3e (min: %.3e, max: %.3e) | GNorm: %.3e (%.3e)'
                % (train_loss / (batch_idx + 1), acc, correct, total,
                   sum(wnorms), min(wnorms), max(wnorms), gnorm, total_gnorm /
                   (batch_idx + 1)))

    return train_loss / batch_idx, acc
예제 #8
0
def mixup_train(loader, model, criterion, optimizer, epoch, use_cuda):
    global BEST_ACC, LR_STATE
    # switch to train mode
    if not cfg.CLS.fix_bn:
        model.train()
    else:
        model.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    for batch_idx, (inputs, targets) in enumerate(loader):
        # adjust learning rate
        adjust_learning_rate(optimizer, epoch, batch=batch_idx, batch_per_epoch=len(loader))

        # mixup
        inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, ALPHA)
        if use_cuda:
            inputs, targets_a, targets_b = inputs.cuda(), targets_a.cuda(), targets_b.cuda()
        inputs, targets_a, targets_b = torch.autograd.Variable(inputs), torch.autograd.Variable(targets_a), \
                                       torch.autograd.Variable(targets_b)

        # measure data loading time
        data_time.update(time.time() - end)

        # forward pass: compute output
        outputs = model(inputs)
        # forward pass: compute gradient and do SGD step
        optimizer.zero_grad()
        loss_func = mixup_criterion(targets_a, targets_b, lam)
        loss = loss_func(criterion, outputs)
        # backward
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        # measure accuracy and record loss
        prec1, prec5 = [0.0], [0.0]
        losses.update(loss.data[0], inputs.size(0))
        top1.update(prec1[0], inputs.size(0))
        top5.update(prec5[0], inputs.size(0))

        if (batch_idx + 1) % cfg.CLS.disp_iter == 0:
            print('Training: [{}/{}][{}/{}] | Best_Acc: {:4.2f}% | Time: {:.2f} | Data: {:.2f} | '
                  'LR: {:.8f} | Top1: {:.4f}% | Top5: {:.4f}% | Loss: {:.4f} | Total: {:.2f}'
                  .format(epoch + 1, cfg.CLS.epochs, batch_idx + 1, len(loader), BEST_ACC, batch_time.average(),
                          data_time.average(), LR_STATE, top1.avg, top5.avg, losses.avg,
                          batch_time.sum + data_time.sum))

    return (losses.avg, top1.avg)
예제 #9
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    correct = 0
    total = 0

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate ** frac
        current_lr = opt.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = opt.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        
        if opt.mixup:
            inputs, targets_a, targets_b, lam = utils.mixup_data(inputs, targets, 0.6, True)
            inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b))
        else:
            inputs, targets = Variable(inputs), Variable(targets)
        
        outputs = net(inputs)
        
        if opt.mixup:
            loss = utils.mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        else:
            loss = criterion(outputs, targets)
        
        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.item()
        
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        
        if opt.mixup:
            correct += (lam * predicted.eq(targets_a.data).cpu().sum().float()
                    + (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float())
        else:
            correct += predicted.eq(targets.data).cpu().sum()
       
        utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            % (train_loss/(batch_idx+1), 100.*float(correct)/float(total), correct, total))

    Train_acc = 100.*float(correct)/float(total)
    
    return train_loss/(batch_idx+1), Train_acc
예제 #10
0
    def _training_step_cutmix(self, imgs, targets):
        # No cutmix end of epochs
        if self.current_epoch > self.cutmix_epoch or np.random.rand() < 0.5:
            return self._training_step_normal(imgs, targets)

        imgs, targets_a, targets_b, lam = cutmix_tile(
            imgs, targets, self.img_size, self.tile_size, beta=1.0
        )
        logits = self.forward(imgs)
        loss = mixup_criterion(self.loss, logits, targets_a, targets_b, lam)
        return loss
예제 #11
0
파일: easy_mwh.py 프로젝트: yuhao318/mwh
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0.0
    correct = 0.0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        mask = random.random()

        if epoch >= 90:
            # threshold = math.cos( math.pi * (epoch - 150) / ((200 - 150) * 2))
            threshold = (100 - epoch) / (100 - 90)
            # threshold = 1.0 - math.cos( math.pi * (200 - epoch) / ((200 - 150) * 2))
            if mask < threshold:
                inputs, targets_a, targets_b, lam = mixup_data(
                    inputs, targets, args.alpha, use_cuda)
            else:
                targets_a, targets_b = targets, targets
                lam = 1.0
        elif epoch >= 60:
            if epoch % 2 == 0:
                inputs, targets_a, targets_b, lam = mixup_data(
                    inputs, targets, args.alpha, use_cuda)
            else:
                targets_a, targets_b = targets, targets
                lam = 1.0
        else:
            inputs, targets_a, targets_b, lam = mixup_data(
                inputs, targets, args.alpha, use_cuda)

        optimizer.zero_grad()
        inputs, targets_a, targets_b = Variable(inputs), Variable(
            targets_a), Variable(targets_b)
        outputs = net(inputs)
        loss_func = mixup_criterion(targets_a, targets_b, lam)
        loss = loss_func(criterion, outputs)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += lam * predicted.eq(targets_a.data).cpu().sum().item() + (
            1.0 - lam) * predicted.eq(targets_b.data).cpu().sum().item()

        progress_bar(
            batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss / (batch_idx + 1),
             (100. * correct) / total, correct, total))
    return (train_loss / batch_idx, 100. * correct / total)
예제 #12
0
def train(train_loader, net, criterion, optimizer, epoch, device):
    global writer

    start = time.time()
    # 设置为tranin模式,仅当有dropout和batchnormal时工作
    net.train()

    train_loss = 0
    correct = 0
    total = 0
    logger.info("====Epoch:[{}/{}]====".format(epoch + 1, config.epochs))
    for batch_index, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)

        if config.mixup:
            inputs, targets_a, targets_b, lam = utils.mixup_data(
                inputs, targets, config.mixup_alpha, device)
            outputs = net(inputs)
            loss = utils.mixup_criterion(criterion, outputs, targets_a,
                                         targets_b, lam)
        else:
            outputs = net(inputs)
            loss = criterion(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += inputs.size()[0]
        if config.mixup:
            correct += (lam * predicted.eq * (targets_a)).sum().item() + (
                1 - lam) * predicted.eq(targets_b).sum().item()
        else:
            correct += predicted.eq(targets).sum().item()
        if batch_index % 100 == 99:
            logger.info(
                "   == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}"
                .format(batch_index + 1, len(train_loader),
                        train_loss / (batch_index + 1),
                        100.0 * correct / total,
                        utils.get_current_lr(optimizer)))

    end = time.time()
    logger.info("   == cost time: {:.4f}s".format(end - start))
    train_loss = train_loss / (batch_index + 1)
    train_acc = correct / total
    writer.add_scalar('test_loss', train_loss, global_step=epoch)
    writer.add_scalar('test_acc', train_acc, global_step=epoch)
    return train_loss, train_acc
예제 #13
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        if args.mixup:
            inputs, targets_a, targets_b, lam = mixup_data(
                inputs, targets, 1.0, use_cuda)
            inputs, targets_a, targets_b = map(Variable,
                                               (inputs, targets_a, targets_b))
            outputs = net(inputs)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b,
                                   lam)
            _, predicted = torch.max(outputs.data, 1)
            correct += lam * predicted.eq(targets_a.data).cpu().sum().float()
            correct += (1 - lam) * predicted.eq(
                targets_b.data).cpu().sum().float()
        else:
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            _, predicted = torch.max(outputs.data, 1)
            correct += predicted.eq(targets.data).cpu().sum()

        total += targets.size(0)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(
            batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss /
             (batch_idx + 1), 100. * correct / total, correct, total))

        if args.transfer_learning:
            if batch_idx >= len(trainloader) - 2:
                break
    def train(self, loader):
        self.model.train()  # Set model to training mode

        running_loss = 0.0
        running_corrects = 0
        num_inst = 0

        if self.config.model.use_center_loss:
            running_centloss = 0.

        for batch_idx, batch_samples in enumerate(loader):
            batch_data = batch_samples['image'].cuda()
            if self.config.model.use_relabel:
                batch_label = batch_samples['class'].cuda().float().unsqueeze(
                    1)
            else:
                batch_label = batch_samples['class'].cuda()

            if self.config.model.use_mixup:
                # generate mixed inputs, two one-hot label vectors
                # and mixing coefficient
                batch_data, batch_label_A, batch_label_B, lam = \
                    mixup_data(batch_data, batch_label,
                               self.config.model.mixup_alpha, True)

            # zero the parameter gradients
            self.optimizer.zero_grad()
            if self.config.model.use_center_loss:
                self.optimizer_centloss.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(True):
                features, outputs = self.model(batch_data)

                if self.config.model.use_relabel:
                    pred_labels = torch.where(outputs > 0.5,
                                              torch.ones(1).cuda(),
                                              torch.zeros(1).cuda())
                else:
                    _, pred_labels = torch.max(outputs, 1)

                if self.config.model.use_mixup:
                    loss_func = mixup_criterion(batch_label_A, batch_label_B,
                                                lam)
                    loss = loss_func(self.config.train.criterion.ce, outputs)
                else:
                    if self.config.model.use_relabel:
                        loss = self.config.train.criterion.sl1(
                            outputs, batch_label)
                    else:
                        loss = self.config.train.criterion.ce(
                            outputs, batch_label)

                if self.config.model.use_center_loss:
                    loss_cent = self.config.train.center_loss_weight * \
                        self.config.train.criterion.cent(features, batch_label)
                    loss += loss_cent

                loss.backward()
                self.optimizer.step()

                if self.config.model.use_center_loss:
                    for param in self.config.train.criterion.cent.parameters():
                        scale = 1. / self.config.train.center_loss_weight
                        param.grad.data *= scale
                    self.optimizer_centloss.step()

            # statistics
            running_loss += loss.item() * batch_label.size(0)
            if self.config.model.use_mixup:
                tmp_a = lam * \
                    pred_labels.eq(batch_label_A.data).cpu().sum()
                tmp_b = (1 - lam) * \
                    pred_labels.eq(batch_label_B.data).cpu().sum()
                running_corrects += (tmp_a + tmp_b)
            else:
                running_corrects += torch.sum(pred_labels == batch_label.data)
            num_inst += batch_label.size(0)

            if self.config.model.use_center_loss:
                running_centloss += loss_cent.item() * batch_label.size(0)

            batch_loss = running_loss / num_inst
            batch_acc = running_corrects.double() / num_inst
            if ((batch_idx + 1) % 20) == 0:
                if self.config.model.use_center_loss:
                    cent_loss_ = running_centloss / num_inst
                    self.disp_batch(batch_idx, batch_loss, batch_acc,
                                    cent_loss_)
                else:
                    self.disp_batch(batch_idx, batch_loss, batch_acc)

        return running_loss, running_corrects, num_inst
def train(args,
          model: nn.Module,
          criterion,
          *,
          params,
          train_loader,
          valid_loader,
          init_optimizer,
          use_cuda,
          n_epochs=None,
          patience=2,
          max_lr_changes=3) -> bool:
    lr = args.lr
    n_epochs = n_epochs or args.n_epochs
    params = list(params)
    optimizer = init_optimizer(params, lr)

    run_root = Path(args.run_root)

    model_path = Path(str(run_root) + '/' + 'model.pt')

    if model_path.exists():
        state = load_model(model, model_path)
        epoch = state['epoch']
        step = state['step']
        best_valid_loss = state['best_valid_loss']
        best_f2 = state['best_f2']
    else:
        epoch = 1
        step = 0
        best_valid_loss = float('inf')
        best_f2 = 0

    lr_changes = 0

    save = lambda ep: torch.save(
        {
            'model': model.state_dict(),
            'epoch': ep,
            'step': step,
            'best_valid_loss': best_valid_loss,
            'best_f2': best_f2
        }, str(model_path))

    report_each = 100
    log = run_root.joinpath('train.log').open('at', encoding='utf8')
    valid_losses = []
    valid_f2s = []
    lr_reset_epoch = epoch
    for epoch in range(epoch, n_epochs + 1):
        model.train()
        tq = tqdm.tqdm(
            total=(args.epoch_size or len(train_loader) * args.batch_size))
        tq.set_description(f'Epoch {epoch}, lr {lr}')
        losses = []
        tl = train_loader
        if args.epoch_size:
            tl = islice(tl, args.epoch_size // args.batch_size)
        try:
            mean_loss = 0
            for i, (inputs, targets) in enumerate(tl):
                if use_cuda:
                    inputs, targets = inputs.cuda(), targets.cuda()
                inputs, targets_a, targets_b, lam = mixup_data(
                    inputs, targets, 1, use_cuda)
                inputs, targets_a, targets_b = Variable(inputs), Variable(
                    targets_a), Variable(targets_b)
                outputs = model(inputs)
                loss_func = mixup_criterion(targets_a, targets_b, lam)
                loss = loss_func(criterion, outputs)
                loss = _reduce_loss(loss)

                batch_size = inputs.size(0)
                (batch_size * loss).backward()
                if (i + 1) % args.step == 0:
                    optimizer.step()
                    optimizer.zero_grad()
                    step += 1
                tq.update(batch_size)
                losses.append(loss.item())
                mean_loss = np.mean(losses[-report_each:])
                tq.set_postfix(loss=f'{mean_loss:.3f}')
                # if i and i % report_each == 0:
                #     write_event(log, step, loss=mean_loss)
            write_event(log, step, loss=mean_loss)
            tq.close()
            save(epoch + 1)
            valid_metrics = validation(model, criterion, valid_loader,
                                       use_cuda)
            write_event(log, step, **valid_metrics)
            valid_loss = valid_metrics['valid_loss']
            valid_f2 = valid_metrics['valid_f2_th_0.10']
            valid_f2s.append(valid_f2)
            valid_losses.append(valid_loss)

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                #shutil.copy(str(model_path), str(run_root) + '/model_loss_' + f'{valid_loss:.4f}' + '.pt')

            if valid_f2 > best_f2:
                best_f2 = valid_f2
                shutil.copy(
                    str(model_path),
                    str(run_root) + '/model_f2_' + f'{valid_f2:.4f}' + '.pt')


#             if epoch == 7:
#                 lr = 1e-4
#                 print(f'lr updated to {lr}')
#                 optimizer = init_optimizer(params, lr)
#             if epoch == 8:
#                 lr = 1e-5
#                 optimizer = init_optimizer(params, lr)
#                 print(f'lr updated to {lr}')
        except KeyboardInterrupt:
            tq.close()
            #             print('Ctrl+C, saving snapshot')
            #             save(epoch)
            #             print('done.')
            return False
    return True
예제 #16
0
def train(train_loader, net, criterion, optimizer, epoch, device,\
          layer_inputs, layer_outputs, grad_inputs, grad_outputs, layers, crit, groups):
    global writer

    start = time.time()
    net.train()

    train_loss = 0
    correct = 0
    total = 0
    eps = 0.001
    logger.info(" === Epoch: [{}/{}] === ".format(epoch + 1, config.epochs))

    for batch_index, (inputs, targets) in enumerate(train_loader):
        # move tensor to GPU
        inputs, targets = inputs.to(device), targets.to(device)
        inputs.requires_grad = True
        layer_inputs.clear()
        layer_outputs.clear()
        grad_inputs.clear()
        grad_outputs.clear()
        if config.mixup:
            inputs, targets_a, targets_b, lam = mixup_data(
                inputs, targets, config.mixup_alpha, device)

            outputs = net(inputs)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b,
                                   lam)
        else:
            outputs = net(inputs)
            loss = criterion(outputs, targets)

        # zero the gradient buffers
        optimizer.zero_grad()
        # backward
        loss.backward()

        #fgsm
        # for p in net.parameters():
        #     p.grad *= args.alpha
        # adv_input = inputs + eps * inputs.grad.sign()
        #
        # outputs = net(adv_input)
        #
        # loss_2 = (1-args.alpha) * criterion(outputs, targets)
        # loss_2.backward()

        # layer_loss = update_grad(net, layer_inputs, layer_outputs, grad_inputs, grad_outputs, layers, crit, args.alpha)
        layer_loss = group_noise(net, groups, crit, args.alpha)
        optimizer.step()

        # count the loss and acc
        train_loss += args.alpha * loss.item() + (1 - args.alpha) * layer_loss
        _, predicted = outputs.max(1)
        total += targets.size(0)
        if config.mixup:
            correct += (lam * predicted.eq(targets_a).sum().item() +
                        (1 - lam) * predicted.eq(targets_b).sum().item())
        else:
            correct += predicted.eq(targets).sum().item()

        if (batch_index + 1) % 100 == 0:
            logger.info(
                "   == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}"
                .format(batch_index + 1, len(train_loader),
                        train_loss / (batch_index + 1),
                        100.0 * correct / total, get_current_lr(optimizer)))

    logger.info(
        "   == step: [{:3}/{}], train loss: {:.3f} | train acc: {:6.3f}% | lr: {:.6f}"
        .format(batch_index + 1, len(train_loader),
                train_loss / (batch_index + 1), 100.0 * correct / total,
                get_current_lr(optimizer)))

    end = time.time()
    logger.info("   == cost time: {:.4f}s".format(end - start))
    train_loss = train_loss / (batch_index + 1)
    train_acc = correct / total

    writer.add_scalar('train_loss', train_loss, global_step=epoch)
    writer.add_scalar('train_acc', train_acc, global_step=epoch)

    return train_loss, train_acc
예제 #17
0
    def train(self, epoch):
        batch_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()
        self.scheduler.step()
        self.model.train()

        end = time.time()
        lr = self.scheduler.get_lr()[0]

        # for batch, (softmax_data, triplet_data) in enumerate(itertools.zip_longest(self.softmax_train_loader, self.triplet_train_loader)):
        for batch, (softmax_data, triplet_data) in enumerate(
                zip(self.softmax_train_loader, self.triplet_train_loader)):
            loss = 0
            softmax_inputs, softmax_labels = softmax_data
            # 转cuda
            softmax_inputs = softmax_inputs.to(
                self.device
            ) if torch.cuda.device_count() >= 1 else softmax_inputs
            softmax_labels = softmax_labels.to(
                self.device
            ) if torch.cuda.device_count() >= 1 else softmax_labels

            # softmax_score, softmax_outputs = self.model(softmax_inputs)
            # traditional_loss = self.softmax_loss(softmax_score, softmax_outputs, softmax_labels)
            # loss += traditional_loss

            inputs, targets_a, targets_b, lam = mixup_data(softmax_inputs,
                                                           softmax_labels,
                                                           alpha=opt.alpha)
            # inputs, targets_a, targets_b = Variable(inputs), Variable(targets_a), Variable(targets_b)
            softmax_score, softmax_outputs = self.model(softmax_inputs)
            loss_func = mixup_criterion(targets_a, targets_b, lam)
            mixup_loss = loss_func(criterion, softmax_score)
            loss += mixup_loss

            losses.update(loss.item(), softmax_inputs.size(0))
            prec = (softmax_score.max(1)[1] == softmax_labels).float().mean()
            acc.update(prec, softmax_inputs.size(0))

            triplet_inputs, triplet_labels = triplet_data
            # 转cuda
            triplet_inputs = triplet_inputs.to(
                self.device
            ) if torch.cuda.device_count() >= 1 else triplet_inputs
            triplet_labels = triplet_labels.to(
                self.device
            ) if torch.cuda.device_count() >= 1 else triplet_labels
            triplet_score, triplet_outputs = self.model(triplet_inputs)
            triplet_loss = self.triplet_loss(triplet_score, triplet_outputs,
                                             triplet_labels)
            loss += triplet_loss

            self.optimizer.zero_grad()
            if opt.fp16:  # we use optimier to backward loss
                with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()
            self.optimizer.step()

            # 评估训练耗时
            batch_time.update(time.time() - end)
            end = time.time()

            # 打印耗时与结果
            if (batch + 1) % 10 == 0:
                logger.debug(
                    'Epoch: [{}][{}/{}]\t'
                    'Base_lr: [{:.2e}]\t'
                    'Time: ({batch_time.avg:.3f})\t'
                    'Loss_val: {loss.val:.4f}  (Loss_avg: {loss.avg:.4f})\t'
                    'Accuray_val: {acc.val:.4f}  (Accuray_avg: {acc.avg:.4f})'.
                    format(epoch,
                           batch + 1,
                           len(self.softmax_train_loader),
                           lr,
                           batch_time=batch_time,
                           loss=losses,
                           acc=acc))

        # 每个epoch的结果
        log_text = 'Epoch[{}]\tBase_lr {:.2e}\tAccuray {acc.avg:.4f}\tLoss {loss.avg:.4f}'.format(
            epoch, lr, acc=acc, loss=losses)
        logger.info(log_text)
        with open(log_file, 'a') as f:
            f.write(log_text + '\n')
            f.flush()
예제 #18
0
def valid(loader, model, criterion_cls, criterion_ranking, optimizer, epoch, history, logger, args):
    batch_time = utils.AverageMeter()
    data_time = utils.AverageMeter()
    total_losses = utils.AverageMeter()
    top1 = utils.AverageMeter()
    cls_losses = utils.AverageMeter()          ## cross entropy loss
    ranking_losses = utils.AverageMeter()      ## marginranking loss
    end = time.time()

    print("*** Valid ***")
    model.eval()

    all_idx = []
    all_iscorrect = []
    all_confidence = []
    all_target = []

    ## 원본 이미지, 라벨 저장

    for i, (input, target, idx) in enumerate(loader):   ## batchsize = 128
    # for i, (input, target) in enumerate(loader):   ## batchsize = 128
        with torch.no_grad():
            data_time.update(time.time() - end)
            input, target = input.cuda(), target.cuda()
            confidence = []
            all_idx.extend(idx.tolist())
            all_target.extend(target.tolist())

            ##mixup
            if args.mixup is not None:
                input, target_a, target_b, lam = utils.mixup_data(input, target, args.mixup, True)
                input, target_a, target_b = map(Variable, (input, target_a, target_b))

            output = model(input)

            # NaN alert
            assert torch.all(output == output)

            # compute ranking target value normalize (0 ~ 1) range
            # max(softmax)
            if args.rank_target == 'softmax':
                conf = F.softmax(output, dim=1)
                confidence, prediction = conf.max(dim=1)        ## predictin : 예측 class, confidence : 그때의 confidence

            # entropy
            elif args.rank_target == 'entropy':
                if args.data == 'cifar100':
                    value_for_normalizing = 4.605170
                else:
                    value_for_normalizing = 2.302585
                confidence = crl_utils.negative_entropy(output,
                                                        normalize=True,
                                                        max_value=value_for_normalizing)
            # margin
            elif args.rank_target == 'margin':
                conf, _ = torch.topk(F.softmax(output), 2, dim=1)
                conf[:,0] = conf[:,0] - conf[:,1]
                confidence = conf[:,0]

            # make input pair
            rank_input1 = confidence
            rank_input2 = torch.roll(confidence, -1)
            idx2 = torch.roll(idx, -1)

            # calc target, margin
            rank_target, rank_margin, norm_cor = history.get_target_margin(idx, idx2) ## rank_target : 누가 더 크냐 1, 0, -1 / rank_margin : 옳게 맞춘 횟수의 차이

            rank_target_nonzero = rank_target.clone()
            rank_target_nonzero[rank_target_nonzero == 0] = 1 ## rank_target 에서 0을 다 1로 바꿈
            rank_input2 = rank_input2 + rank_margin / rank_target_nonzero
            ranking_loss = criterion_ranking(rank_input1,
                                             rank_input2,
                                             rank_target)

            # total loss
            if args.mixup is not None:
                cls_loss = utils.mixup_criterion(criterion_cls, output, target_a, target_b, lam)
            else:
                cls_loss = criterion_cls(output, target)

            ranking_loss = args.rank_weight * ranking_loss
            loss = cls_loss + ranking_loss

        # record loss and accuracy
        prec, correct = utils.accuracy(output, target)

        all_iscorrect.extend(map(int, correct))
        all_confidence.extend(confidence.tolist())
        total_losses.update(loss.item(), input.size(0))
        cls_losses.update(cls_loss.item(), input.size(0))
        ranking_losses.update(ranking_loss.item(), input.size(0))
        top1.update(prec.item(), input.size(0))


        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % args.print_freq == 0:
            print('[{0}][{1}/{2}] '
                  'Time {batch_time.val:.3f}({batch_time.avg:.3f}) '
                  'Data {data_time.val:.3f}({data_time.avg:.3f}) '
                  'Loss {loss.val:.4f}({loss.avg:.4f}) '
                  'CLS Loss {cls_loss.val:.4f}({cls_loss.avg:.4f}) '
                  'Rank Loss {rank_loss.val:.4f}({rank_loss.avg:.4f}) '
                  'Prec {top1.val:.2f}%({top1.avg:.2f}%)'.format(
                   epoch, i, len(loader), batch_time=batch_time,
                   data_time=data_time, loss=total_losses, cls_loss=cls_losses,
                   rank_loss=ranking_losses,top1=top1))

        # history.confidence_update(idx, correct, output)


    # max correctness update
    # history.max_correctness_update(epoch)
    logger.write([epoch, total_losses.avg, cls_losses.avg, ranking_losses.avg, top1.avg])

    return all_idx, all_iscorrect, all_confidence, all_target, total_losses, prec.item()
예제 #19
0
def train(matrix_idx_confidence, matrix_idx_iscorrect, loader, model, wr, criterion_cls, criterion_ranking, optimizer, epoch, history, logger, args):
    batch_time = utils.AverageMeter()
    data_time = utils.AverageMeter()
    total_losses = utils.AverageMeter()
    top1 = utils.AverageMeter()
    cls_losses = utils.AverageMeter()          ## cross entropy loss
    ranking_losses = utils.AverageMeter()      ## marginranking loss
    ji_wj_losses = utils.AverageMeter()        ## JI_WJ
    end = time.time()

    print("*** Training ***")
    model.train()

    all_idx = []
    all_iscorrect = []
    all_confidence = []
    all_target = []

    ## 원본 이미지, 라벨 저장

    for i, (input, target, idx) in enumerate(loader):   ## batchsize = 128
    # for i, (input, target) in enumerate(loader):   ## batchsize = 128
        data_time.update(time.time() - end)
        input, target = input.cuda(), target.cuda()
        confidence = []
        all_idx.extend(idx.tolist())
        all_target.extend(target.tolist())

        ##mixup
        if args.mixup is not None:
            input, target_a, target_b, lam = utils.mixup_data(input, target, args.mixup, True)
            input, target_a, target_b = map(Variable, (input, target_a, target_b))

        output = model(input)

        if args.ts is not None:
            temp = torch.nn.Parameter(torch.ones(1) * args.ts)
            ts = temp.unsqueeze(1).expand(output.size(0), output.size(1)).cuda()

            output = output / ts
        # NaN alert
        assert torch.all(output == output)

        for a in range(len(input)):
            wr.writerow([str(idx[a].item()), str(target[a].item())])

        # record loss and accuracy
        prec, correct = utils.accuracy(output, target)


        # # compute ranking target value normalize (0 ~ 1) range
        # max(softmax)
        if args.rank_target == 'softmax':
            conf = F.softmax(output, dim=1)
            confidence, prediction = conf.max(dim=1)        ## predictin : 예측 class, confidence : 그때의 confidence

        # entropy
        elif args.rank_target == 'entropy':
            if args.data == 'cifar100':
                value_for_normalizing = 4.605170
            else:
                value_for_normalizing = 2.302585
            confidence = crl_utils.negative_entropy(output,
                                                    normalize=True,
                                                    max_value=value_for_normalizing)

        # margin
        elif args.rank_target == 'margin':
            conf, _ = torch.topk(F.softmax(output), 2, dim=1)
            conf[:,0] = conf[:,0] - conf[:,1]
            confidence = conf[:, 0]

        # correctness count update
        if args.loss == "CRL" or args.cal == "Cor":
            history.correctness_update(idx, correct, output)

        # Avg confidence update
        if args.cal == "Conf":
            history.confidence_update(idx, confidence, output)

        for a in range(len(input)):
            matrix_idx_confidence[idx[a]].append(confidence[a].item())

        all_iscorrect.extend(map(int, correct))
        all_confidence.extend(confidence.tolist())


        # make input pair
        rank_input1 = confidence
        rank_input2 = torch.roll(confidence, -1)
        idx2 = torch.roll(idx, -1)

        # calc target, margin
        rank_target, rank_margin, acc, correctness= history.get_target_margin(idx, idx2) ## rank_target : 누가 더 크냐 1, 0, -1 / rank_margin : 옳게 맞춘 횟수의 차이
        # print(rank_target, rank_margin)

        rank_target_nonzero = rank_target.clone()
        # print("rank_target_nonzero", rank_target_nonzero)
        rank_target_nonzero[rank_target_nonzero == 0] = 1 ## rank_target 에서 0을 다 1로 바꿈
        # print("rank_target_nonzero", rank_target_nonzero)
        rank_input2 = rank_input2 + rank_margin / rank_target_nonzero
        # print(rank_input2)
        # ranking loss // margin rankingloss
        ranking_loss = criterion_ranking(rank_input1,
                                         rank_input2,
                                         rank_target)
        # total loss
        ji_loss = 0
        if args.mixup is not None:
            cls_loss = utils.mixup_criterion(criterion_cls, output, target_a, target_b, lam)

        else:
            cls_loss = criterion_cls(output, target) # (128, 1)
            if args.b != None:
                # print("******************************")
                # print("Conf = ", confidence.sum().item()/len(confidence))
                if args.mode == 0: ## batch-wised
                    if args.ji_conf == True and cls_loss <= args.b:
                        print("*** Adjusting b(1.5-conf) ***")
                        print("[Before]", cls_loss.item())
                        cls_loss = abs(cls_loss - args.b * (1.5 - confidence.mean())) + args.b * (1.5 - confidence.mean())
                        print("[After]", cls_loss.item())
                    elif args.minus_1_conf == True and cls_loss <= args.b:
                        print("*** Adjusting b(1/conf) ***")
                        print("[Before]", cls_loss.item())
                        cls_loss = abs(cls_loss - args.b * (1 / confidence.mean())) + args.b * (1 / confidence.mean())
                        print("[After]", cls_loss.item())
                    elif args.ji_acc_conf == True and cls_loss <= args.b:
                        print("*** Adjusting b(acc/conf) ***")
                        print("[Before]", cls_loss.item())
                        acc_conf = (torch.from_numpy(correctness).to(torch.device("cuda")) / confidence).mean()
                        cls_loss = abs(cls_loss - args.b * acc_conf) + args.b * acc_conf
                        print("[After]", cls_loss.item())
                        print("--------------------------------------------")
                    elif args.ji_wj != 0 and cls_loss <= args.b:
                        print("*** Adjusting wj")
                        l1loss = nn.L1Loss(reduction="mean").cuda()
                        ji_wj_loss = l1loss(confidence, torch.from_numpy(correctness / epoch).to(torch.device("cuda")))
                    else:
                        if cls_loss.item() <= args.b:
                            print("*** Adjusting b(Flood) ***")
                        cls_loss = abs(cls_loss - args.b) + args.b

                else: ## sample-wised
                    if args.ji_conf == True and cls_loss.mean().item() <= args.b:
                        print("*** Adjusting b(1.5-conf) ***")
                        print("[Before]", cls_loss.mean().item())
                        cls_loss = abs(cls_loss - args.b * (1.5 - confidence)) + args.b * (1.5 - confidence)
                        print("[After]", cls_loss.mean().item())
                        cls_loss = cls_loss.mean()
                    elif args.minus_1_conf == True and cls_loss.mean().item() <= args.b:
                        print("*** Adjusting b(1/conf) ***")
                        print("[Before]", cls_loss.mean().item())
                        cls_loss = abs(cls_loss - args.b * (1 / confidence)) + args.b * (1 / confidence)
                        print("[After]", cls_loss.mean().item())
                        cls_loss = cls_loss.mean()
                    elif args.ji_acc_conf == True and cls_loss.mean().item() <= args.b:
                        print("*** Adjusting b(acc/conf) ***")
                        print("[Before]", cls_loss.mean().item())
                        acc_conf = torch.from_numpy(correctness).to(torch.device("cuda")) / confidence
                        cls_loss = abs(cls_loss - args.b * acc_conf) + args.b * acc_conf
                        cls_loss = cls_loss.mean()
                        print("[After]", cls_loss.mean().item())
                        print("--------------------------------------------")
                    elif args.ji_wj != 0 and cls_loss <= args.b:
                        print("*** Adjusting wj")
                        l1loss = nn.L1Loss(reduction="mean").cuda()
                        ji_wj_loss = l1loss(confidence, torch.from_numpy(correctness / epoch).to(torch.device("cuda")))
                    else:
                        if cls_loss.mean().item() <= args.b:
                            print("*** Adjusting b(Flood) ***")
                        cls_loss = abs(cls_loss - args.b) + args.b
                        cls_loss = cls_loss.mean()



        ranking_loss = args.rank_weight * ranking_loss

        if args.loss == "Margin":
            loss = ranking_loss
        elif args.ji_wj != 0:
            if cls_loss <= args.b:
                loss = cls_loss + args.ji_wj * ji_wj_loss
            else:
                loss = cls_loss
        else:
            loss = cls_loss + ranking_loss
        # compute gradient and do optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print("prec", prec)
        # print("correct", correct)
        for a in range(len(idx)):
            if correct[a].item() == False:
                matrix_idx_iscorrect[idx[a]].append(0)
            else:
                matrix_idx_iscorrect[idx[a]].append(1)
        total_losses.update(loss.item(), input.size(0))
        cls_losses.update(cls_loss.mean().item(), input.size(0))
        # cls_losses.update(cls_loss.item(), input.size(0))
        if args.ji_wj != 0 and cls_loss <= args.b:
            ji_wj_losses.update(ji_wj_loss.item(), input.size(0))
        ranking_losses.update(ranking_loss.item(), input.size(0))
        top1.update(prec.item(), input.size(0))


        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if i % args.print_freq == 0:
            print('[{0}][{1}/{2}] '
                  'Time {batch_time.val:.3f}({batch_time.avg:.3f}) '
                  'Data {data_time.val:.3f}({data_time.avg:.3f}) '
                  'Loss {loss.val:.4f}({loss.avg:.4f}) '
                  'CLS Loss {cls_loss.val:.4f}({cls_loss.avg:.4f}) '
                  'Rank Loss {rank_loss.val:.4f}({rank_loss.avg:.4f}) '
                  'JI_WJ Loss {ji_wj_loss.val:.4f}({ji_wj_loss.avg:.4f}) '
                  'Prec {top1.val:.2f}%({top1.avg:.2f}%)'.format(
                   epoch, i, len(loader), batch_time=batch_time,
                   data_time=data_time, loss=total_losses, cls_loss=cls_losses,
                   rank_loss=ranking_losses, ji_wj_loss = ji_wj_losses, top1=top1))



    # max correctness update
    history.max_correctness_update(epoch)
    logger.write([epoch, total_losses.avg, cls_losses.avg, ranking_losses.avg, top1.avg])

    cur_confidence = history.get_confidence()
    cur_correctness = history.get_correctness()

    if args.rank_weight != 0.0:
        return matrix_idx_confidence, matrix_idx_iscorrect, all_idx, all_iscorrect, all_confidence, all_target, cls_losses.avg, ranking_losses.avg, correctness, cur_confidence, cur_correctness

    else:
        return matrix_idx_confidence, matrix_idx_iscorrect, all_idx, all_iscorrect, all_confidence, all_target, total_losses.avg, 0, correctness, cur_confidence, cur_correctness
예제 #20
0
def train(args):

    train_dataset = FurnitureDataset('train',
                                     transform=preprocess_with_augmentation)
    val_dataset = FurnitureDataset('val', transform=preprocess)
    training_data_loader = DataLoader(dataset=train_dataset,
                                      num_workers=8,
                                      batch_size=BATCH_SIZE,
                                      shuffle=True)
    validation_data_loader = DataLoader(dataset=val_dataset,
                                        num_workers=1,
                                        batch_size=BATCH_SIZE,
                                        shuffle=False)

    model = get_model(args.name)

    class_weight = np.load('./class_weight.npy')

    #criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor(class_weight)).cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    #criterion = FocalLoss(alpha=alpha, gamma=0).cuda()

    nb_learnable_params = sum(p.numel() for p in model.fresh_params())
    print(f'[+] nb learnable params {nb_learnable_params}')

    min_loss = float("inf")
    lr = 0
    patience = 0
    for epoch in range(30):
        print(f'epoch {epoch}')
        if epoch == 1:
            lr = 0.00003
            print(f'[+] set lr={lr}')
        if patience == 2:
            patience = 0
            model.load_state_dict(
                torch.load(
                    'models_trained/{}_{}_{}/best_val_weight_{}.pth'.format(
                        args.name, args.aug, args.alpha, args.name)))
            lr = lr / 10
            if lr < 3e-6:
                lr = 3e-6
            print(f'[+] set lr={lr}')
        if epoch == 0:
            lr = 0.001
            print(f'[+] set lr={lr}')
            optimizer = torch.optim.Adam(model.fresh_params(), lr=lr)
        else:
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=lr,
                                         weight_decay=0.0001)

        running_loss = RunningMean()
        running_score = RunningMean()

        model.train()
        pbar = tqdm(training_data_loader, total=len(training_data_loader))
        for inputs, labels in pbar:
            batch_size = inputs.size(0)

            inputs = Variable(inputs)
            labels = Variable(labels)
            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            optimizer.zero_grad()

            if args.aug:
                inputs, targets_a, targets_b, lam = mixup_data(
                    inputs, labels, args.alpha, use_gpu)

            outputs = model(inputs)

            if args.aug:
                loss_func = mixup_criterion(targets_a, targets_b, lam)
                loss = loss_func(criterion, outputs)
            else:
                loss = criterion(outputs, labels)

            _, preds = torch.max(outputs.data, dim=1)
            running_loss.update(loss.data[0], 1)

            if args.aug:
                running_score.update(
                    batch_size - lam * preds.eq(targets_a.data).cpu().sum() -
                    (1 - lam) * preds.eq(targets_b.data).cpu().sum(),
                    batch_size)
            else:
                running_score.update(torch.sum(preds != labels.data),
                                     batch_size)

            loss.backward()
            optimizer.step()

            pbar.set_description(
                f'{running_loss.value:.5f} {running_score.value:.3f}')
        print(
            f'[+] epoch {epoch} {running_loss.value:.5f} {running_score.value:.3f}'
        )

        lx, px = utils.predict(model, validation_data_loader)
        log_loss = criterion(Variable(px), Variable(lx))
        log_loss = log_loss.data[0]
        _, preds = torch.max(px, dim=1)
        accuracy = torch.mean((preds != lx).float())
        print(f'[+] val {log_loss:.5f} {accuracy:.3f}')

        if log_loss < min_loss:
            torch.save(
                model.state_dict(),
                'models_trained/{}_{}_{}/best_val_weight_{}.pth'.format(
                    args.name, args.aug, args.alpha, args.name))
            print(
                f'[+] val score improved from {min_loss:.5f} to {log_loss:.5f}. Saved!'
            )
            min_loss = log_loss
            patience = 0
        else:
            patience += 1
예제 #21
0
def train(cfg, train_loader, model, criterion, kd_criterion, optimizer,
          scheduler, epoch):
    """
    Helper function to train.
    """
    losses = AverageMeter()
    model.train()
    tbar = tqdm(train_loader)

    for i, (image, target) in enumerate(tbar):
        image = image.cuda()
        target = target.cuda()
        bsize, seq_len, c, h, w = image.size()
        # image = image.view(bsize * seq_len, c, h, w)
        # target = target.view(-1, target.size(-1))

        data_aug = cfg["CUTMIX"] or cfg["MIXUP"]
        if np.random.uniform() < cfg["P_AUGMENT"] and data_aug:
            #     if cfg["CUTMIX"]:
            #         mixed_x, y_a, y_b, lam = cutmix_data(image, target)
            #     elif cfg["MIXUP"]:
            #         mixed_x, y_a, y_b, lam = mixup_data(image, target)
            mixed_x = []
            y_a = []
            y_b = []
            lam = []
            for st_image, st_target in zip(image, target):
                mixed_st_image, st_y_a, st_y_b, st_lam = cutmix_data(
                    st_image, st_target)
                mixed_x.append(mixed_st_image)
                y_a.append(st_y_a)
                y_b.append(st_y_b)
                lam.append(torch.FloatTensor([st_lam] * seq_len))
            mixed_x = torch.stack(mixed_x)
            y_a = torch.stack(y_a)
            y_b = torch.stack(y_b)
            lam = torch.cat(lam, 0).unsqueeze(1).cuda()
            mixed_x = mixed_x.view(bsize * seq_len, c, h, w)
            y_a = y_a.view(-1, target.size(-1))
            y_b = y_b.view(-1, target.size(-1))

            output, aux_output0, aux_output1 = model(mixed_x, seq_len)
            main_loss = mixup_criterion(criterion, output, y_a, y_b, lam)
            if cfg["USE_KD"]:
                aux_loss = cfg["ALPHA"] * (
                    mixup_criterion(criterion, aux_output0, y_a, y_b, lam) +
                    mixup_criterion(criterion, aux_output1, y_a, y_b, lam)
                ) + (1. - cfg["ALPHA"]) * (kd_criterion(aux_output0, output) +
                                           kd_criterion(aux_output1, output))
            else:
                aux_loss = mixup_criterion(
                    criterion, aux_output0, y_a, y_b, lam) + mixup_criterion(
                        criterion, aux_output1, y_a, y_b, lam)
        else:
            image = image.view(bsize * seq_len, c, h, w)
            target = target.view(-1, target.size(-1))
            output, aux_output0, aux_output1 = model(image, seq_len)
            main_loss = criterion(output, target)
            if cfg["USE_KD"]:
                aux_loss = cfg["ALPHA"] * (
                    criterion(aux_output0, target) +
                    criterion(aux_output1, target)) + (1. - cfg["ALPHA"]) * (
                        kd_criterion(aux_output0, output) +
                        kd_criterion(aux_output1, output))
            else:
                aux_loss = criterion(aux_output0, target) + criterion(
                    aux_output1, target)
        loss = main_loss + cfg["AUX_W"] * aux_loss
        loss = loss.mean()

        # gradient accumulation
        loss = loss / cfg['GD_STEPS']
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        if (i + 1) % cfg['GD_STEPS'] == 0:
            scheduler(optimizer, i, epoch)
            optimizer.step()
            optimizer.zero_grad()

        # record loss
        losses.update(loss.item() * cfg['GD_STEPS'], image.size(0))
        tbar.set_description("Train loss: %.5f, learning rate: %.6f" %
                             (losses.avg, optimizer.param_groups[-1]['lr']))
    def train(
        self,
        data_loader: torch.utils.data.DataLoader,
        epoch: int,
        scheduler: Union[Callable, None] = None,
        print_every: int = 100,
    ) -> float:
        batch_time = utils.AverageMeter()
        data_time = utils.AverageMeter()
        losses = utils.AverageMeter()

        # put the model to train mode
        self.model.train()

        start = end = time.time()

        for batch_idx, (images, labels) in enumerate(data_loader):
            # measure the data loading time
            data_time.update(time.time() - end)

            # zero out the accumulated gradients
            self.optimizer.zero_grad()

            # send the data to device
            images = images.to(self.device)
            labels = labels.to(self.device)
            batch_size = images.size(0)

            if self.use_mixup:
                mixed_x, y_a, y_b, lam = utils.mixup_data(
                    images,
                    labels,
                    alpha=config.MIXUP_ALPHA,
                    use_cuda=True,
                )

                # forward pass
                y_preds = self.model(mixed_x)
                y_preds = y_preds.squeeze(1)
                y_a = y_a.type_as(y_preds)
                y_b = y_b.type_as(y_preds)
                loss = utils.mixup_criterion(
                    self.criterion, y_preds, y_a, y_b, lam
                )
            else:
                y_preds = self.model(images)
                y_preds = y_preds.squeeze(1)
                loss = self.criterion(y_preds, labels.type_as(y_preds))

            # record loss
            losses.update(loss.item(), batch_size)

            # backpropagate
            loss.backward()

            # step the optimizer
            self.optimizer.step()

            # measure the elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            # step the scheduler if provided
            if scheduler is not None:
                scheduler.step()

            # display results
            if (batch_idx + 1) % print_every == 0:
                print(
                    f"Epoch: [{epoch+1}][{batch_idx+1}/{len(data_loader)}] "
                    # f"Data loading time: {data_time.val:.3f} ({data_time.avg:.3f}) "
                    f"Batch time: {batch_time.val:.3f} ({batch_time.avg:.3f}) "
                    f"Elapsed {utils.time_since(start, float(batch_idx+1)/len(data_loader))} "
                    f"Loss: {losses.val:.4f} ({losses.avg:.4f}) "
                )
        return losses.avg
예제 #23
0
            train_loss = []
            model.train()

            loop = tqdm(train_loader)
            for inputs, labels in loop:
                inputs = inputs.cuda()
                labels = labels.cuda()

                inputs, labels_a, labels_b, lam = mixup_data(
                    inputs, labels, 0.5, True)
                inputs, labels_a, labels_b = map(Variable,
                                                 (inputs, labels_a, labels_b))

                with torch.set_grad_enabled(True):
                    outputs = model(inputs)
                    loss = mixup_criterion(train_criterion, outputs, labels_a,
                                           labels_b, lam)

                    optimizer.zero_grad()
                    if args.fp16:
                        with amp.scale_loss(loss, optimizer) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    optimizer.step()
                    scheduler.batch_step()

                train_loss.append(loss.item())

                loop.set_description('Epoch {:2d}/{:2d}'.format(
                    epoch, args.epochs - 1))
                loop.set_postfix(loss=np.mean(train_loss))
예제 #24
0
def train(loader, model, criterion, optimizer, args, scheduler, epoch, lr):
    batch_time = utils.AverageMeter('Time', ':6.3f')
    data_time = utils.AverageMeter('Data', ':6.3f')
    losses = utils.AverageMeter()

    if isinstance(loader, torch.utils.data.dataloader.DataLoader):
        length = len(loader)
    else:
        length = getattr(loader, '_size', 0) / getattr(loader, 'batch_size', 1)
    model.train()
    if 'less_bn' in args.keyword:
        utils.custom_state(model)

    end = time.time()
    for i, data in enumerate(loader):
        if isinstance(data, list) and isinstance(data[0], dict):
            input = data[0]['data']
            target = data[0]['label'].squeeze()
        else:
            input, target = data
        data_time.update(time.time() - end)

        if args.device_ids is not None:
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True).long()

        if args.mixup_enable:
            input, target_a, target_b, lam = utils.mixup_data(
                input,
                target,
                args.mixup_alpha,
                use_cuda=(args.device_ids is not None))

        if 'sgdr' in args.lr_policy and scheduler is not None and torch.__version__ < "1.0.4" and epoch < args.epochs:
            scheduler.step()
            for group in optimizer.param_groups:
                if 'lr_constant' in group:
                    group['lr'] = group['lr_constant']
            lr_list = scheduler.get_lr()
            if isinstance(lr_list, list):
                lr = lr_list[0]

        outputs = model(input)
        if isinstance(outputs, dict) and hasattr(model, '_out_features'):
            outputs = outputs[model._out_features[0]]

        if args.mixup_enable:
            mixup_criterion = lambda pred, target, \
                    lam: (-F.log_softmax(pred, dim=1) * torch.zeros(pred.size()).cuda().scatter_(1, target.data.view(-1, 1), lam.view(-1, 1))) \
                    .sum(dim=1).mean()
            loss = utils.mixup_criterion(target_a, target_b,
                                         lam)(mixup_criterion, outputs)
        else:
            loss = criterion(outputs, target)

        if 'quant_loss' in args.global_buffer:
            loss += args.global_buffer['quant_loss']
            args.global_buffer.pop('quant_loss')

        if i % args.iter_size == 0:
            optimizer.zero_grad()

        if args.fp16:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()

        if i % args.iter_size == (args.iter_size - 1):
            if args.grad_clip is not None:
                nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)

            iterations = epoch * length + i
            if args.wakeup > iterations:
                for param_group in optimizer.param_groups:
                    if param_group.get('lr_constant', None) is not None:
                        continue
                    param_group['lr'] = param_group['lr'] * (
                        1.0 / args.wakeup) * iterations
                logging.info(
                    'train {}/{}, change learning rate to lr * {}'.format(
                        i, length, iterations / args.wakeup))
            if iterations >= args.warmup:
                optimizer.step()

        if 'sgdr' in args.lr_policy and scheduler is not None and torch.__version__ > "1.0.4" and epoch < args.epochs:
            scheduler.step()
            for group in optimizer.param_groups:
                if 'lr_constant' in group:
                    group['lr'] = group['lr_constant']
            lr_list = scheduler.get_lr()
            if isinstance(lr_list, list):
                lr = lr_list[0]

        losses.update(loss.item(), input.size(0))
        batch_time.update(time.time() - end)
        end = time.time()
        if i % args.report_freq == 0:
            logging.info(
                'train %d/%d, loss:%.3f(%.3f), batch time:%.2f(%.2f), data load time: %.2f(%.2f)'
                % (i, length, losses.val, losses.avg, batch_time.val,
                   batch_time.avg, data_time.val, data_time.avg))

        if epoch == 0 and i == 10:
            logging.info(utils.gpu_info())
        if args.delay > 0:
            time.sleep(args.delay)

        input = None
        target = None
        data = None

    if 'dali' in args.dataset:
        loader.reset()

    return losses.avg
예제 #25
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    global Train_acc
    net.train()
    train_loss = 0
    conf_mat = np.zeros((NUM_CLASSES, NUM_CLASSES))
    conf_mat_a = np.zeros((NUM_CLASSES, NUM_CLASSES))
    conf_mat_b = np.zeros((NUM_CLASSES, NUM_CLASSES))

    if epoch > learning_rate_decay_start and learning_rate_decay_start >= 0:
        frac = (epoch - learning_rate_decay_start) // learning_rate_decay_every
        decay_factor = learning_rate_decay_rate**frac
        current_lr = args.lr * decay_factor
        utils.set_lr(optimizer, current_lr)  # set the decayed rate
    else:
        current_lr = args.lr
    print('learning_rate: %s' % str(current_lr))

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()

        if args.augmentation:
            inputs, targets_a, targets_b, lam = utils.mixup_data(
                inputs, targets, 0.6, True)
            inputs, targets_a, targets_b = map(Variable,
                                               (inputs, targets_a, targets_b))
        else:
            inputs, targets = Variable(inputs), Variable(targets)

        _, _, _, _, outputs = net(inputs)

        if args.augmentation:
            loss = utils.mixup_criterion(criterion, outputs, targets_a,
                                         targets_b, lam)
        else:
            loss = criterion(outputs, targets)

        loss.backward()
        utils.clip_gradient(optimizer, 0.1)
        optimizer.step()
        train_loss += loss.item()

        if args.augmentation:

            conf_mat_a += losses.confusion_matrix(outputs, targets_a,
                                                  NUM_CLASSES)
            acc_a = sum([conf_mat_a[i, i] for i in range(conf_mat_a.shape[0])
                         ]) / conf_mat_a.sum()
            precision_a = np.array([
                conf_mat_a[i, i] / (conf_mat_a[i].sum() + 1e-10)
                for i in range(conf_mat_a.shape[0])
            ])
            recall_a = np.array([
                conf_mat_a[i, i] / (conf_mat_a[:, i].sum() + 1e-10)
                for i in range(conf_mat_a.shape[0])
            ])
            mAP_a = sum(precision_a) / len(precision_a)
            F1_score_a = (2 * precision_a * recall_a /
                          (precision_a + recall_a + 1e-10)).mean()

            conf_mat_b += losses.confusion_matrix(outputs, targets_b,
                                                  NUM_CLASSES)
            acc_b = sum([conf_mat_b[i, i] for i in range(conf_mat_b.shape[0])
                         ]) / conf_mat_b.sum()
            precision_b = np.array([
                conf_mat_b[i, i] / (conf_mat_b[i].sum() + 1e-10)
                for i in range(conf_mat_b.shape[0])
            ])
            recall_b = np.array([
                conf_mat_b[i, i] / (conf_mat_b[:, i].sum() + 1e-10)
                for i in range(conf_mat_b.shape[0])
            ])
            mAP_b = sum(precision_b) / len(precision_b)
            F1_score_b = (2 * precision_b * recall_b /
                          (precision_b + recall_b + 1e-10)).mean()

            acc = lam * acc_a + (1 - lam) * acc_b
            mAP = lam * mAP_a + (1 - lam) * mAP_b
            F1_score = lam * F1_score_a + (1 - lam) * F1_score_b
        else:
            conf_mat += losses.confusion_matrix(outputs, targets, NUM_CLASSES)
            acc = sum([conf_mat[i, i]
                       for i in range(conf_mat.shape[0])]) / conf_mat.sum()
            precision = [
                conf_mat[i, i] / (conf_mat[i].sum() + 1e-10)
                for i in range(conf_mat.shape[0])
            ]
            mAP = sum(precision) / len(precision)

            recall = [
                conf_mat[i, i] / (conf_mat[:, i].sum() + 1e-10)
                for i in range(conf_mat.shape[0])
            ]
            precision = np.array(precision)
            recall = np.array(recall)
            f1 = 2 * precision * recall / (precision + recall + 1e-10)
            F1_score = f1.mean()

        #utils.progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% | mAP: %.3f%% | F1: %.3f%%'

#% (train_loss/(batch_idx+1), 100.*acc, 100.* mAP, 100.* F1_score))

    return train_loss / (batch_idx + 1), 100. * acc, 100. * mAP, 100 * F1_score
예제 #26
0
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    l2_losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, data in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        input = data[0]
        target = data[-1]
        if args.l2_loss:
            dual_input = data[1]
            dual_input_var = torch.autograd.Variable(dual_input)  
        if CUDA:
            input = input.cuda(async=True)
            target = target.cuda(async=True)
        if args.mixup:
            input, y_a, y_b, lam = utils.mixup_data(input, target, alpha=1.0)
            y_a = torch.autograd.Variable(y_a)
            y_b = torch.autograd.Variable(y_b)


        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        if args.l2_loss:
            f1, f2, y1, y2 = model(input_var, dual_input_var)
            l2_loss = l2_loss_w * mse_loss(f1, f2)
            output = torch.cat([y1, y2])
            target = torch.cat([target, target])
            target_var = torch.cat([target_var, target_var])
            loss = criterion(output, target_var)
            loss = loss + l2_loss

            l2_losses.update(l2_loss.data[0], input.size(0))
        else:
            output = model(input_var)
            if args.mixup:
                loss_fun = utils.mixup_criterion(y_a, y_b, lam)
                loss = loss_fun(criterion, output)
            else:
                loss = criterion(output, target_var)

        # measure accuracy and record loss
        if args.mixup:
            _, predicted = torch.max(output.data, 1)
            prec1 = lam*predicted.eq(y_a.data).cpu().sum() + (1-lam)*predicted.eq(y_b.data).cpu().sum()
            top1.update(prec1, input.size(0))
        else:
            prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
            top1.update(prec1[0], input.size(0))
        #top5.update(prec5[0], input.size(0))

        losses.update(loss.data[0], input.size(0))
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % 5 == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'L2Loss {l2_loss.val:.4f} ({l2_loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'.format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, l2_loss=l2_losses, top1=top1))
        
        step = epoch * len(train_loader) + i
        #print(type(step))
        writer.add_scalar('train/acc', prec1[0], step)
        writer.add_scalar('train/loss', loss.data[0], step)
        if args.l2_loss:
            writer.add_scalar('train/l2_loss', l2_loss.data[0], step)
        for name, param in model.named_parameters():
            #print(name, param.data.cpu().numpy().dtype)
            if name.find('batchnorm')==-1:
                writer.add_histogram(name, param.data.cpu().numpy(), step)
예제 #27
0
def train(fold, train_dataset,path, test_dataset):
    batch_size = 64
    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False)
    start_step = 0
    end_step = 150
    lr = 0.0001
    disp_interval = 10
    device = torch.device("cuda:0,1,2,3" if torch.cuda.is_available() else "cpu")
    net = GLENet(n_classes)
    for param in net.parameters():
        param.requires_grad = True
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        net = nn.DataParallel(net)
    net.to(device)
    net = net.cuda()
    #net.apply(init_params)

    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr)
    loss_fun = nn.CrossEntropyLoss()

    best_acc = 0
    best_epoch = 0
    train_loss_graph = []
    test_loss_graph = []
    for epoch in range(start_step, end_step+1):
        net.train()
        step = 0
        train_loss = 0
        test_loss = 0
        running_acc = 0
        total = 0
        
        if epoch == 80:
            lr = lr*0.1
            print('learning rate, ',lr)
            for g in optimizer.param_groups:
                g['lr'] = lr
        
        for index, data in enumerate(train_loader):
            #print(index)
            step = step + 1
            label, x1, x2, x3, e1, e2, e3, n1, n2, n3, m1, m2, m3 = data

            label = label.type(torch.LongTensor)
            label = network.tensor_to_variable(label, is_cuda=True, is_training=True)

            x1 = x1.type(torch.FloatTensor)
            x1 = network.tensor_to_variable(x1, is_cuda=True, is_training=True)
            x2 = x2.type(torch.FloatTensor)
            x2 = network.tensor_to_variable(x2, is_cuda=True, is_training=True)
            x3 = x3.type(torch.FloatTensor)
            x3 = network.tensor_to_variable(x3, is_cuda=True, is_training=True)

            e1 = e1.type(torch.FloatTensor)
            e1 = network.tensor_to_variable(e1, is_cuda=True, is_training=True)
            e2 = e2.type(torch.FloatTensor)
            e2 = network.tensor_to_variable(e2, is_cuda=True, is_training=True)
            e3 = e3.type(torch.FloatTensor)
            e3 = network.tensor_to_variable(e3, is_cuda=True, is_training=True)


            n1 = n1.type(torch.FloatTensor)
            n1 = network.tensor_to_variable(n1, is_cuda=True, is_training=True)
            n2 = n2.type(torch.FloatTensor)
            n2 = network.tensor_to_variable(n2, is_cuda=True, is_training=True)
            n3 = n3.type(torch.FloatTensor)
            n3 = network.tensor_to_variable(n3, is_cuda=True, is_training=True)

            m1 = m1.type(torch.FloatTensor)
            m1 = network.tensor_to_variable(m1, is_cuda=True, is_training=True)
            m2 = m2.type(torch.FloatTensor)
            m2 = network.tensor_to_variable(m2, is_cuda=True, is_training=True)
            m3 = m3.type(torch.FloatTensor)
            m3 = network.tensor_to_variable(m3, is_cuda=True, is_training=True)

            mixup, targets_a, targets_b, lam = mixup_data(x3,label,1)

            out1, out2 = net(x1,x2,mixup,e1,e2,e3,n1,n2,n3,m1,m2,m3)
            #print(out.shape, label.shape)

            loss_f = mixup_criterion(targets_a.reshape(-1),targets_b.reshape(-1),lam)
            loss1 = loss_f(loss_fun, out1)
            loss2 = loss_fun(out2, label.reshape(-1))
            loss = loss1 + loss2
            #loss = loss_fun(out, label.reshape(-1))
            train_loss += loss

            out = out1 + out2
            
            _,pred = torch.max(out,1)
            
            num_correct = 0

            for j in range(label.shape[0]):
                if pred[i] == label[i]:
                    num_correct += 1
            
            #num_correct = torch.sum(pred == label.data)
            running_acc += num_correct
            total += label.size(0)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


            """
            if step % disp_interval == 0:
                ave_loss = train_loss / (batch_size*(index+1))
                acc = running_acc / (batch_size*(index+1))
                log_text = 'epoch: %4d, step: %4d, loss: %4.6f, Acc: %4.6f' % (epoch+1, step, ave_loss, acc)
                print(log_text)
            """
        acc = float(running_acc) / total
        print('epoch: {}, Loss: {}, Acc:{} , acc:{}'.format(epoch+1, train_loss/(index+1), running_acc, acc))
        train_loss_graph.append(train_loss/(index+1))
        net.eval()
        eval_acc = 0
        for tindex,test_data in enumerate(test_loader):
            tlabel, tx1, tx2, tx3, te1, te2, te3, tn1, tn2, tn3, tm1, tm2, tm3 = test_data
            tlabel = tlabel.type(torch.LongTensor)
            tlabel = network.tensor_to_variable(tlabel, is_cuda=True, is_training=False)

            tx1 = tx1.type(torch.FloatTensor)
            tx1 = network.tensor_to_variable(tx1, is_cuda=True, is_training=False)
            tx2 = tx2.type(torch.FloatTensor)
            tx2 = network.tensor_to_variable(tx2, is_cuda=True, is_training=False)
            tx3 = tx3.type(torch.FloatTensor)
            tx3 = network.tensor_to_variable(tx3, is_cuda=True, is_training=False)

            te1 = te1.type(torch.FloatTensor)
            te1 = network.tensor_to_variable(te1, is_cuda=True, is_training=False)
            te2 = te2.type(torch.FloatTensor)
            te2 = network.tensor_to_variable(te2, is_cuda=True, is_training=False)
            te3 = te3.type(torch.FloatTensor)
            te3 = network.tensor_to_variable(te3, is_cuda=True, is_training=False)


            tn1 = tn1.type(torch.FloatTensor)
            tn1 = network.tensor_to_variable(tn1, is_cuda=True, is_training=False)
            tn2 = tn2.type(torch.FloatTensor)
            tn2 = network.tensor_to_variable(tn2, is_cuda=True, is_training=False)
            tn3 = tn3.type(torch.FloatTensor)
            tn3 = network.tensor_to_variable(tn3, is_cuda=True, is_training=False)

            tm1 = tm1.type(torch.FloatTensor)
            tm1 = network.tensor_to_variable(tm1, is_cuda=True, is_training=False)
            tm2 = tm2.type(torch.FloatTensor)
            tm2 = network.tensor_to_variable(tm2, is_cuda=True, is_training=False)
            tm3 = tm3.type(torch.FloatTensor)
            tm3 = network.tensor_to_variable(tm3, is_cuda=True, is_training=False)

            tout1, tout2 = net(tx3,tx3,tx3,te3,te3,te3,tn3,tn3,tn3,tm3,tm3,tm3)

            tout = tout1 + tout2

            tloss = loss_fun(tout, tlabel.reshape(-1))
            test_loss += float(tloss)
            
            _,tpred = torch.max(tout,1)
            correct = (tpred == tlabel).sum()
            eval_acc += correct
        print('test_acc : {} test_loss: {}'.format(eval_acc, test_loss/ tindex+1))
        test_loss_graph.append(test_loss/ tindex+1)
        if eval_acc > best_acc:
            best_acc = eval_acc
            best_epoch = epoch
            torch.save(net.state_dict(), path+'{}_best.pth'.format(best_epoch))

    return best_epoch,best_acc
예제 #28
0
def main_worker(args, logger):
    try:
        writer = SummaryWriter(logdir=args.sub_tensorboard_dir)

        train_set = RSDataset(rootpth=args.data_dir, mode='train')
        train_loader = DataLoader(train_set,
                                  batch_size=args.batch_size,
                                  drop_last=True,
                                  shuffle=True,
                                  pin_memory=True,
                                  num_workers=args.num_workers)

        # 权重list,每个样本被选择的概率,重采样效果不好,不使用,但是留作实例,以后参考
        # sampler_weight = train_set.get_sampler_weight()
        #
        # train_sampler = WeightedRandomSampler(sampler_weight,
        #                                 num_samples=100000,     # 每次循环,使用的样本数量
        #                                 replacement=True)
        #
        # train_loader = DataLoader(train_set,
        #                           batch_size=args.batch_size,
        #                           pin_memory=True,
        #                           num_workers=args.num_workers,
        #                           sampler=train_sampler)

        val_set = RSDataset(rootpth=args.data_dir, mode='val')
        val_loader = DataLoader(val_set,
                                batch_size=args.test_batch_size,
                                drop_last=False,
                                shuffle=False,
                                pin_memory=True,
                                num_workers=args.num_workers)

        net = Dense201()
        logger.info('net name: {}'.format(net.__class__.__name__))
        net.train()
        input_ = torch.randn((1, 3, 224, 224))
        writer.add_graph(net, input_)
        net = net.cuda()
        criterion = nn.CrossEntropyLoss().cuda()

        if args.pre_epoch:
            # 预训练:冻结前面的层,只训练新增加的全连接层
            for name, param in net.named_parameters():
                if 'classifier' not in name:
                    param.requires_grad = False

            optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                         net.parameters()),
                                  lr=args.base_lr,
                                  momentum=0.9,
                                  nesterov=args.sgdn,
                                  weight_decay=args.weight_decay)
            scheduler = optim.lr_scheduler.CosineAnnealingLR(
                optimizer,
                T_max=args.pre_epoch * len(train_loader),
                eta_min=args.min_lr)

        loss_record = []
        iter_step = 0
        running_loss = []
        st = glob_st = time.time()
        total_epoch = args.pre_epoch + args.warmup_epoch + args.normal_epoch
        total_iter_step = len(train_loader) * total_epoch

        logger.info('len(train_set): {}'.format(len(train_set)))
        logger.info('len(train_loader): {}'.format(len(train_loader)))
        logger.info('len(val_set): {}'.format(len(val_set)))
        logger.info('len(val_loader): {}'.format(len(val_loader)))
        logger.info('total_epoch: {}'.format(total_epoch))
        logger.info('total_iter_step: {}'.format(total_iter_step))

        if args.pre_epoch:
            logger.info('----- start pre train ------')
        for epoch in range(total_epoch):

            # 评估
            # if epoch % args.eval_fre == 0 and epoch!=0 :
            if epoch % args.eval_fre == 0:
                evalute(net, val_loader, writer, epoch, logger)

            # 保存
            if epoch % args.save_fre == 0 and epoch > args.save_after:
                model_out_name = osp.join(args.sub_model_out_dir,
                                          'out_{}.pth'.format(epoch))
                # 防止分布式训练保存失败
                state_dict = net.modules.state_dict() if hasattr(
                    net, 'module') else net.state_dict()
                torch.save(state_dict, model_out_name)

            # 预训练结束,训练所有参数,重构optimizer--但是只对全连接和卷积层的乘权重进行衰减
            if epoch == args.pre_epoch:
                for param in net.parameters():
                    param.requires_grad = True

                wd_params, nowd_params = [], []
                for name, module in net.named_modules():
                    if isinstance(module, (nn.Linear, nn.Conv2d)):
                        wd_params.append(module.weight)
                        if not module.bias is None:
                            nowd_params.append(module.bias)
                    # todo 这种paramlist会不会漏掉了一些参数
                    elif isinstance(module, nn.BatchNorm2d):
                        nowd_params += list(module.parameters())
                    # else:
                    #     nowd_params += list(module.parameters())
                param_list = [{
                    'params': wd_params
                }, {
                    'params': nowd_params,
                    'weight_decay': 0
                }]

                optimizer = optim.SGD(param_list,
                                      lr=args.base_lr,
                                      momentum=0.9,
                                      nesterov=args.sgdn,
                                      weight_decay=args.weight_decay)
                # 重构学习率调度器
                if args.warmup_epoch:
                    scheduler = LinearScheduler(optimizer,
                                                start_lr=args.min_lr,
                                                end_lr=args.base_lr,
                                                all_steps=args.warmup_epoch *
                                                len(train_loader))
                    logger.info(
                        '-------- start warmup for {} epochs -------'.format(
                            args.warmup_epoch))

            # 如果到了正式训练,构建新的scheduller
            if epoch == args.pre_epoch + args.warmup_epoch:
                scheduler = optim.lr_scheduler.CosineAnnealingLR(
                    optimizer,
                    T_max=args.normal_epoch * len(train_loader),
                    eta_min=args.min_lr)
                logger.info('---- start normal train for {} epoch ----'.format(
                    args.normal_epoch))

            for img, lb in train_loader:
                iter_step += 1
                img = img.cuda()
                lb = lb.cuda()

                optimizer.zero_grad()

                inputs, targets_a, targets_b, lam = mixup_data(
                    img, lb, args.mixup_alpha)
                outputs = net(inputs)
                loss = mixup_criterion(criterion, outputs, targets_a,
                                       targets_b, lam)
                # outputs = net(img)
                # loss = criterion(outputs, lb)

                loss.backward()
                optimizer.step()
                scheduler.step()

                running_loss.append(loss.item())

                if iter_step % args.msg_fre == 0:
                    ed = time.time()
                    spend = ed - st
                    global_spend = ed - glob_st
                    st = ed

                    eta = int((total_iter_step - iter_step) *
                              (global_spend / iter_step))
                    eta = str(datetime.timedelta(seconds=eta))
                    global_spend = str(
                        datetime.timedelta(seconds=(int(global_spend))))

                    avg_loss = np.mean(running_loss)
                    loss_record.append(avg_loss)
                    running_loss = []

                    lr = optimizer.param_groups[0]['lr']

                    msg = '. '.join([
                        'epoch:{epoch}', 'iter/total_iter:{iter}/{total_iter}',
                        'lr:{lr:.7f}', 'loss:{loss:.4f}',
                        'spend/global_spend:{spend:.4f}/{global_spend}',
                        'eta:{eta}'
                    ]).format(epoch=epoch,
                              iter=iter_step,
                              total_iter=total_iter_step,
                              lr=lr,
                              loss=avg_loss,
                              spend=spend,
                              global_spend=global_spend,
                              eta=eta)
                    logger.info(msg)
                    writer.add_scalar('loss', avg_loss, iter_step)
                    writer.add_scalar('lr', lr, iter_step)

        # 训练完最后评估一次
        evalute(net, val_loader, writer, args.pre_epoch + args.normal_epoch,
                logger)

        out_name = osp.join(args.sub_model_out_dir, args.model_out_name)
        torch.save(net.cpu().state_dict(), out_name)

        logger.info('-----------Done!!!----------')

    except:
        logger.exception('Exception logged')
    finally:
        writer.close()
예제 #29
0
def train_with_exhaustive_testing(epoch):
    """
    to find best pair for a single picture
    :param epoch: epoch to train
    :return: accuracy
    """
    print('\n Epoch: %d' % epoch)
    train_loss = 0
    correct = 0
    total = 0
    # et_criterion = CustomLoss()
    et_criterion = nn.CrossEntropyLoss(reduction='none')
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()

        net.eval()
        best_pair_list = None
        lam_list = None
        with torch.no_grad():
            # starttime = datetime.datetime.now()

            for index in range(inputs.size()[0]):

                inputs_next, targets_a, targets_b, lam = exhaustive_mix_data_pre(inputs, targets, index, use_cuda=use_cuda)
                inputs_next, targets_a, targets_b = Variable(inputs_next), Variable(targets_a), Variable(targets_b)
                outputs = net(inputs_next)
                loss_func = mixup_criterion(targets_a, targets_b, lam)
                best_pair = loss_func(et_criterion, outputs)
                best_pair = best_pair.argmax()
                if type(best_pair_list) == type(None):
                    best_pair_list = best_pair.unsqueeze(0)
                    lam_list = torch.tensor([lam], dtype=torch.float)
                else:
                    best_pair_list = torch.cat((best_pair_list, best_pair.unsqueeze(0)),0)
                    lam = torch.tensor([lam], dtype=torch.float)
                    lam_list = torch.cat((lam_list, lam),0)

            # find_pair_time = datetime.datetime.now()
            # print(str((find_pair_time - starttime).microseconds) + "======")

        net.train()
        with torch.enable_grad():
            optimizer.zero_grad()
            lam_list = lam_list.cuda()
            inputs, targets_a, targets_b, lam = exhausitive_mix_data(inputs, best_pair_list, targets, lam_list)
            loss_func = mixup_criterion(targets_a, targets_b, lam)
            outputs = net(inputs)
            loss = loss_func(criterion, outputs)
            loss.backward()
            optimizer.step()

        # train_time = datetime.datetime.now()

        # print(str((train_time - find_pair_time).microseconds) + "--------")

        train_loss += loss.data.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        # correct += lam * predicted.eq(targets_a.data).cpu().sum() + (1 - lam) * predicted.eq(targets_b.data).cpu().sum()
        # correct = correct.item()
        correct = 0
        print(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                     % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
    return (train_loss / batch_idx, 100. * correct / total)
예제 #30
0
def train(args, i):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      holdout_fold: '1' | 'none', set 1 for development and none for training 
          on all data without validation
      model_type: string, e.g. 'Cnn_9layers_AvgPooling'
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    holdout_fold = args.holdout_fold
    model_type = args.model_type
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    audio_num = config.audio_num
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = None  # Number of mini-batches to evaluate on training data
    reduce_lr = True
    in_domain_classes_num = len(config.labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_csv = os.path.join(sys.path[0], 'fold' + str(i) + '_train.csv')

    validate_csv = os.path.join(sys.path[0], 'fold' + str(i) + '_test.csv')

    feature_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second,
                                                mel_bins))

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second,
                                                mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')

    create_folder(os.path.dirname(validate_statistics_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Model
    Model = eval(model_type)

    model = Model(in_domain_classes_num, activation='logsoftmax')
    loss_func = nll_loss

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)
    #     optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-5)
    # Data generator
    data_generator = DataGenerator(feature_hdf5_path=feature_hdf5_path,
                                   train_csv=train_csv,
                                   validate_csv=validate_csv,
                                   holdout_fold=holdout_fold,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          cuda=cuda)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 100 == 0 and iteration >= 1500:
            logging.info('------------------------------------')
            logging.info('Iteration: {}'.format(iteration))

            train_fin_time = time.time()

            train_statistics = evaluator.evaluate(data_type='train',
                                                  iteration=iteration,
                                                  max_iteration=None,
                                                  verbose=False)

            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(data_type='validate',
                                                         iteration=iteration,
                                                         max_iteration=None,
                                                         verbose=False)
                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()


#         Save model
        if iteration % 100 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 100 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        for i in range(audio_num):
            model.train()
            data, target_a, target_b, lam = mixup_data(
                x=batch_data_dict['feature'][:, i, :, :],
                y=batch_data_dict['target'],
                alpha=0.2)
            batch_output = model(data)
            #         batch_output = model(batch_data_dict['feature'])
            # loss
            loss = loss_func(batch_output, batch_data_dict['target'])
            loss = mixup_criterion(loss_func, batch_output, target_a, target_b,
                                   lam)

            # Backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Stop learning
        if iteration == 4000:
            break

        iteration += 1
예제 #31
0
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    p1 = 0
    p2 = 0
    p3 = 0
    p4 = 0
    correct = 0
    total = 0
    for batch_idx, ((m_in, m_tar), (f_in, f_tar), (c_in, c_tar),
                    (C_in, C_tar)) in enumerate(
                        zip(m_train, f_train, c_train, C_train)):
        # generate mixed inputs, two one-hot label vectors and mixing coefficient
        lam = 0
        inputs = torch.cat((m_in, f_in, c_in, C_in), 1)
        if use_cuda:
            inputs, targets_a, targets_b, targets_c, targets_d = inputs.cuda(
            ), m_tar.type(torch.LongTensor).cuda(), f_tar.type(
                torch.LongTensor).cuda(), c_tar.type(
                    torch.LongTensor).cuda(), C_tar.type(
                        torch.LongTensor).cuda()
        optimizer.zero_grad()
        inputs, targets_a, targets_b, targets_c, targets_d = Variable(
            inputs), Variable(targets_a), Variable(targets_b), Variable(
                targets_c), Variable(targets_d)
        outputs, outputs2, outputs3, outputs4 = net(inputs, targets_a,
                                                    targets_b, targets_c,
                                                    targets_d)

        #rand = np.random.uniform(1, 1)
        loss_func = mixup_criterion(targets_a, targets_b, targets_c, targets_d,
                                    lam)
        loss = loss_func(criterion, outputs, outputs2, outputs3, outputs4)
        loss.backward()
        optimizer.step()

        train_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        _, predicted2 = torch.max(outputs2.data, 1)
        _, predicted3 = torch.max(outputs3.data, 1)
        _, predicted4 = torch.max(outputs4.data, 1)
        total += targets_a.size(0)
        # --------------
        prec1 = predicted.eq(targets_a.data).cpu().sum()
        prec2 = predicted2.eq(targets_b.data).cpu().sum()
        prec3 = predicted3.eq(targets_c.data).cpu().sum()
        prec4 = predicted4.eq(targets_d.data).cpu().sum()
        p1 += prec1
        p2 += prec2
        p3 += prec3
        p4 += prec4

        correct += (prec1 + prec2 + prec3 + prec4) / 3
        progress_bar(
            batch_idx, len(m_train),
            'Loss: %.3f |P1: %.3f |P2: %.3f |P3: %.3f| P4: %.3f| Acc: %.3f%% (%d/%d)'
            %
            (train_loss /
             (batch_idx + 1), 100. * p1 / total, 100. * p2 / total, 100. * p3 /
             total, 100. * p4 / total, 100. * correct / total, correct, total))
    return (train_loss / batch_idx, 100. * correct / total, 100. * p1 / total,
            100. * p2 / total, 100. * p3 / total, 100. * p4 / total, lam)