예제 #1
0
    def train_fn(self, optimizer, criterion, loader, device, train=True):
        """
        Training method
        :param optimizer: optimization algorithm
        :criterion: loss function
        :param loader: data loader for either training or testing set
        :param device: torch device
        :param train: boolean to indicate if training or test set is used
        :return: (accuracy, loss) on the data
        """
        score = AvgrageMeter()
        objs = AvgrageMeter()
        self.train()

        t = tqdm(loader)
        for images, labels in t:
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            logits = self(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            acc, _ = accuracy(logits, labels, topk=(1, 5))
            n = images.size(0)
            objs.update(loss.item(), n)
            score.update(acc.item(), n)

            t.set_description('(=> Training) Loss: {:.4f}'.format(objs.avg))

        return score.avg, objs.avg
def infer(val_dataprovider, model, criterion, fair_arc_list, val_iters,
          archloader):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    model.eval()
    now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    print('{} |=> Test rng = {}'.format(now, fair_arc_list[0]))

    with torch.no_grad():
        for step in range(val_iters):
            t0 = time.time()
            image, target = val_dataprovider.next()
            datatime = time.time() - t0
            image = Variable(image, requires_grad=False).cuda()
            target = Variable(target, requires_grad=False).cuda()
            logits = model(image,
                           archloader.convert_list_arc_str(fair_arc_list[0]))
            loss = criterion(logits, target)

            prec1, _ = accuracy(logits, target, topk=(1, 5))
            n = image.size(0)
            objs.update(loss.data.item(), n)
            top1.update(prec1.data.item(), n)

        now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        print(
            '{} |=> valid: step={}, loss={:.2f}, acc={:.2f}, datatime={:.2f}'.
            format(now, step, objs.avg, top1.avg, datatime))

    return top1.avg, objs.avg
def train(train_dataloader, val_dataloader, optimizer, scheduler, model, archloader, criterion, args, seed, epoch, writer=None):
    losses_, top1_, top5_ = AvgrageMeter(), AvgrageMeter(), AvgrageMeter()

    # for p in model.parameters():
    #     p.grad = torch.zeros_like(p)
    model.train()

    train_loader = tqdm(train_dataloader)
    train_loader.set_description(
        '[%s%04d/%04d %s%f]' % ('Epoch:', epoch + 1, args.epochs, 'lr:', scheduler.get_last_lr()[0]))
    for step, (image, target) in enumerate(train_loader):
        n = image.size(0)
        image = Variable(image, requires_grad=False).cuda(
            args.gpu, non_blocking=True)
        target = Variable(target, requires_grad=False).cuda(
            args.gpu, non_blocking=True)

        # Fair Sampling
        # [archloader.generate_niu_fair_batch(step)[-1]]
        # [16, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 64]
        spos_arc_list = archloader.generate_spos_like_batch().tolist()

        # for arc in fair_arc_list:
        # logits = model(image, archloader.convert_list_arc_str(arc))
        # loss = criterion(logits, target)
        # loss_reduce = reduce_tensor(loss, 0, args.world_size)
        # loss.backward()
        optimizer.zero_grad()
        logits = model(image, spos_arc_list[:-1])
        loss = criterion(logits, target)
        prec1, prec5 = accuracy(logits, target, topk=(1, 5))

        if torch.cuda.device_count() > 1:
            torch.distributed.barrier()

            loss = reduce_mean(loss, args.nprocs)
            prec1 = reduce_mean(prec1, args.nprocs)
            prec5 = reduce_mean(prec5, args.nprocs)

        loss.backward()

        # nn.utils.clip_grad_value_(model.parameters(), args.grad_clip)

        optimizer.step()

        losses_.update(loss.data.item(), n)
        top1_.update(prec1.data.item(), n)
        top5_.update(prec1.data.item(), n)

        postfix = {'train_loss': '%.6f' % (
            losses_.avg), 'train_acc1': '%.6f' % top1_.avg, 'train_acc5': '%.6f' % top5_.avg}
        train_loader.set_postfix(log=postfix)

        if args.local_rank == 0 and step % 10 == 0 and writer is not None:
            writer.add_scalar("Train/loss", losses_.avg, step +
                              len(train_dataloader) * epoch * args.batch_size)
            writer.add_scalar("Train/acc1", top1_.avg, step +
                              len(train_dataloader) * epoch * args.batch_size)
            writer.add_scalar("Train/acc5", top5_.avg, step +
                              len(train_loader)*args.batch_size*epoch)
def train(train_dataprovider, val_dataprovider, optimizer, scheduler, model,
          archloader, criterion, args, val_iters, seed):
    objs, top1 = AvgrageMeter(), AvgrageMeter()

    for p in model.parameters():
        p.grad = torch.zeros_like(p)

    for step in range(args.total_iters):
        model.train()
        t0 = time.time()
        image, target = train_dataprovider.next()
        datatime = time.time() - t0
        n = image.size(0)
        optimizer.zero_grad()
        image = Variable(image, requires_grad=False).cuda(args.gpu)
        target = Variable(target, requires_grad=False).cuda(args.gpu)

        # Fair Sampling
        # rngs = []
        # for i in range(len(operations)):  # 21个layer
        #     seed += 1
        #     random.seed(seed)
        #     rngs.append(random.sample(operations[i], len(operations[i])))
        # rngs = np.transpose(rngs)

        fair_arc_list = archloader.generate_niu_fair_batch()

        for ii, arc in enumerate(fair_arc_list):
            logits = model(image, archloader.convert_list_arc_str(arc))
            loss = criterion(logits, target)
            loss.backward()

        # for rng in rngs:
        #     logits = model(image, rng)
        #     loss = criterion(logits, target)
        #     loss.backward()

        nn.utils.clip_grad_value_(model.parameters(), args.grad_clip)
        optimizer.step()
        scheduler.step()

        prec1, _ = accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data.item(), n)
        top1.update(prec1.data.item(), n)

        if step % args.report_freq == 0 and args.local_rank == 0:
            now = time.strftime('%Y-%m-%d %H:%M:%S',
                                time.localtime(time.time()))
            print(
                '{} |=> train: {} / {}, lr={}, loss={:.2f}, acc={:.2f}, datatime={:.2f}, seed={}'
                .format(now, step, args.total_iters,
                        scheduler.get_lr()[0], objs.avg, top1.avg,
                        float(datatime), seed))

    if args.local_rank == 0:
        now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        print('{} |=> Test rng = {}'.format(now, fair_arc_list[0]))
        infer(val_dataprovider, model.module, criterion, fair_arc_list,
              val_iters, archloader)
예제 #5
0
def train(train_dataprovider, val_dataprovider, optimizer, scheduler, model, archloader, criterion, args, val_iters, seed, writer=None):
    objs, top1 = AvgrageMeter(), AvgrageMeter()

    for p in model.parameters():
        p.grad = torch.zeros_like(p)

    for step in range(args.total_iters):
        model.train()
        t0 = time.time()
        image, target = train_dataprovider.next()
        datatime = time.time() - t0
        n = image.size(0)
        optimizer.zero_grad()
        image = Variable(image, requires_grad=False).cuda(args.gpu)
        target = Variable(target, requires_grad=False).cuda(args.gpu)

        # Fair Sampling
        fair_arc_list = archloader.generate_niu_fair_batch()

        for arc in fair_arc_list:
            logits = model(image, archloader.convert_list_arc_str(arc))
            loss = criterion(logits, target)
            loss_reduce = reduce_tensor(loss, 0, args.world_size)
            loss.backward()

        nn.utils.clip_grad_value_(model.parameters(), args.grad_clip)
        optimizer.step()
        scheduler.step()

        prec1, _ = accuracy(logits, target, topk=(1, 5))
        objs.update(loss_reduce.data.item(), n)
        top1.update(prec1.data.item(), n)

        if step % args.report_freq == 0 and args.local_rank == 0:
            now = time.strftime('%Y-%m-%d %H:%M:%S',
                                time.localtime(time.time()))
            print('{} |=> train: {} / {}, lr={}, loss={:.2f}, acc={:.2f}, datatime={:.2f}, seed={}'
                  .format(now, step, args.total_iters, scheduler.get_lr()[0], objs.avg, top1.avg, float(datatime), seed))

        if args.local_rank == 0 and step % 5 == 0 and writer is not None:
            writer.add_scalar("Train/loss", objs.avg, step)
            writer.add_scalar("Train/acc1", top1.avg, step)

        if args.local_rank == 0 and step % args.report_freq == 0:
            # model

            top1_val, objs_val = infer(train_dataprovider, val_dataprovider, model.module, criterion,
                                       fair_arc_list, val_iters, archloader)

            if writer is not None:
                writer.add_scalar("Val/loss", objs_val, step)
                writer.add_scalar("Val/acc1", top1_val, step)

            save_checkpoint(
                {'state_dict': model.state_dict(), }, step, args.exp)
예제 #6
0
def validate(model, device, args, *, all_iters=None, arch_loader=None):
    assert arch_loader is not None

    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    loss_function = args.loss_function
    val_loader = args.val_loader

    model.eval()
    t1 = time.time()

    result_dict = {}

    arch_dict = arch_loader.get_part_dict()

    with torch.no_grad():
        for ii, (key, value) in enumerate(arch_dict.items()):
            for data, target in val_loader:
                target = target.type(torch.LongTensor)
                data, target = data.to(device), target.to(device)

                output = model(data, value["arch"])
                loss = loss_function(output, target)

                acc1, acc5 = accuracy(output, target, topk=(1, 5))
                n = data.size(0)
                objs.update(loss.item(), n)

                top1.update(acc1.item(), n)
                top5.update(acc5.item(), n)

            if ii % 5:
                logging.info("validate acc:{:.6f} iter:{}".format(
                    top1.avg / 100, ii))
                writer.add_scalar(
                    "Val/Loss", loss.item(),
                    all_iters * len(val_loader) * args.batch_size + ii)
                writer.add_scalar(
                    "Val/acc1", acc1.item(),
                    all_iters * len(val_loader) * args.batch_size + ii)
                writer.add_scalar(
                    "Val/acc5", acc5.item(),
                    all_iters * len(val_loader) * args.batch_size + ii)

            result_dict[key] = top1.avg

    logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
              'Top-1 acc = {:.6f},\t'.format(top1.avg) + \
              'Top-5 acc = {:.6f},\t'.format(top5.avg) + \
              'val_time = {:.6f}'.format(time.time() - t1)
    logging.info(logInfo)

    logging.info("RESULTS")
    for ii, (key, value) in enumerate(result_dict.items()):
        logging.info("{: ^10}  \t  {:.6f}".format(key, value))
        if ii > 10:
            break
    logging.info("E N D")
예제 #7
0
    def eval_fn(self, loader, device, train=False):
        """
        Evaluation method
        :param loader: data loader for either training or testing set
        :param device: torch device
        :param train: boolean to indicate if training or test set is used
        :return: accuracy on the data
        """
        score = AvgrageMeter()
        self.eval()

        t = tqdm(loader)
        with torch.no_grad():  # no gradient needed
            for images, labels in t:
                images = images.to(device)
                labels = labels.to(device)

                outputs = self(images)
                acc, _ = accuracy(outputs, labels, topk=(1, 5))
                score.update(acc.item(), images.size(0))

                t.set_description('(=> Test) Score: {:.4f}'.format(score.avg))

        return score.avg