示例#1
0
def test(validate_loader, device, model, criterion):
    val_acc = 0.0
    model = model.to(device)
    model.eval()
    confuse_meter = ConfuseMeter()
    with torch.no_grad():  # 进行评测的时候网络不更新梯度
        val_top1 = AvgrageMeter()
        validate_loader = tqdm(validate_loader)
        validate_loss = 0.0
        for i, data in enumerate(validate_loader, 0):  # 0是下标起始位置默认为0
            inputs, labels, batch_seq_len = data[0].to(device), data[1].to(
                device), data[2]
            #         inputs,labels = data[0],data[1]
            outputs, _ = model(inputs, batch_seq_len)
            #             loss = criterion(outputs, labels)

            prec1, prec2 = accuracy(outputs, labels, topk=(1, 2))
            n = inputs.size(0)
            val_top1.update(prec1.item(), n)
            confuse_meter.update(outputs, labels)
            #             validate_loss += loss.item()
            postfix = {
                'test_acc': '%.6f' % val_top1.avg,
                'confuse_acc': '%.6f' % confuse_meter.acc
            }
            validate_loader.set_postfix(log=postfix)
        val_acc = val_top1.avg
    return confuse_meter
示例#2
0
def train(epoch, epochs, train_loader, device, model, criterion, optimizer,
          scheduler, tensorboard_path):
    model.train()
    top1 = AvgrageMeter()
    model = model.to(device)
    train_loss = 0.0
    for i, data in enumerate(train_loader, 0):  # 0是下标起始位置默认为0
        inputs, labels, batch_seq_len = data[0].to(device), data[1].to(
            device), data[2]
        # 初始为0,清除上个batch的梯度信息
        optimizer.zero_grad()
        outputs, hidden = model(inputs, batch_seq_len)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        _, pred = outputs.topk(1)
        prec1, prec2 = accuracy(outputs, labels, topk=(1, 2))
        n = inputs.size(0)
        top1.update(prec1.item(), n)
        train_loss += loss.item()
        postfix = {
            'train_loss': '%.6f' % (train_loss / (i + 1)),
            'train_acc': '%.6f' % top1.avg
        }
        train_loader.set_postfix(log=postfix)

        # ternsorboard 曲线绘制
        if os.path.exists(tensorboard_path) == False:
            os.mkdir(tensorboard_path)
        writer = SummaryWriter(tensorboard_path)
        writer.add_scalar('Train/Loss', loss.item(), epoch)
        writer.add_scalar('Train/Accuracy', top1.avg, epoch)
        writer.flush()
    scheduler.step()
示例#3
0
    def update_MLP(self):
        all_archs = torch.zeros(self.max, 6).cuda()
        all_target = torch.zeros(self.max).cuda()
        self.MLP.train()
        for i, structure_father in enumerate(self.group):
            all_archs[i][:] = torch.tensor([
                item for sublist in structure_father.structure
                for item in sublist
            ])[:]
            all_target[i] = structure_father.loss

        indx = all_target < 15
        all_archs = all_archs[indx, :]
        all_target = all_target[indx]
        epoch = 20
        objs = AvgrageMeter()
        batch_size = 32

        for i in range(epoch):
            start = (batch_size * i) % all_archs.size(0)
            end = start + batch_size
            archs = all_archs[start:end]
            target = all_target[start:end]
            output = self.MLP(archs)
            loss = self.criterion(output, target)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            n = archs.size(0)
            objs.update(loss.item(), n)

        logInfo = 'MLP: loss = {:.6f},\t'.format(objs.avg)
        logging.info(logInfo)
示例#4
0
def validate(epoch, validate_loader, device, model, criterion,
             tensorboard_path):
    val_acc = 0.0
    model = model.to(device)
    model.eval()
    with torch.no_grad():  # 进行评测的时候网络不更新梯度
        val_top1 = AvgrageMeter()
        validate_loader = tqdm(validate_loader)
        validate_loss = 0.0
        for i, data in enumerate(validate_loader, 0):  # 0是下标起始位置默认为0
            inputs, labels, batch_seq_len = data[0].to(device), data[1].to(
                device), data[2]
            #         inputs,labels = data[0],data[1]
            outputs, _ = model(inputs, batch_seq_len)
            loss = criterion(outputs, labels)

            prec1, prec2 = accuracy(outputs, labels, topk=(1, 2))
            n = inputs.size(0)
            val_top1.update(prec1.item(), n)
            validate_loss += loss.item()
            postfix = {
                'validate_loss': '%.6f' % (validate_loss / (i + 1)),
                'validate_acc': '%.6f' % val_top1.avg
            }
            validate_loader.set_postfix(log=postfix)

            # ternsorboard 曲线绘制
            if os.path.exists(tensorboard_path) == False:
                os.mkdir(tensorboard_path)
            writer = SummaryWriter(tensorboard_path)
            writer.add_scalar('Validate/Loss', loss.item(), epoch)
            writer.add_scalar('Validate/Accuracy', val_top1.avg, epoch)
            writer.flush()
        val_acc = val_top1.avg
    return val_acc
示例#5
0
    def eval_fn(self, loader, device, train=False, confusion_m = False, criterion = None):
        """
        Evaluation method
        :param loader: data loader for either training or testing set
        :param device: torch device
        :param train: boolean to indicate if training or test set is used
        :return: accuracy on the data
        """
        objs = AvgrageMeter()
        score = AvgrageMeter()
        self.eval()
        with torch.no_grad():
            for step, (images, labels) in enumerate(loader):
                images = images.to(device)
                labels = labels.to(device)
                outputs = self(images)

                acc, _ = accuracy(outputs, labels, topk=(1, 5))
                score.update(acc.item(), images.size(0))

                if(criterion):
                    loss = criterion(outputs, labels)
                    objs.update(loss.item(), images.size(0))

                if step % self.report_freq == 0:
                    logging.info('Evaluation | step: %d | accuracy: %f' % (step, score.avg))   

        return score.avg, objs.avg
示例#6
0
def aggregation(data_train, data_test, args, clientIDs, model, optimizer):
    mean_train_acc, mean_train_loss = AvgrageMeter(), AvgrageMeter()
    mean_test_acc, mean_test_loss = AvgrageMeter(), AvgrageMeter()

    initial_weights = copy.deepcopy(model.state_dict())

    num_examples = []
    weight_dict_list = []

    for clientID in clientIDs:
        model.load_state_dict(initial_weights)
        model, train_acc, train_loss, test_acc_list, test_loss_list = lstm_train(
            data_train[clientID], data_test[clientID], args, model, optimizer,
            1)
        num_examples.append(len(data_train[clientID]))
        # load state_dict for each client
        weight_dict_list.append(copy.deepcopy(model.state_dict()))
        mean_train_acc.update(train_acc, 1)
        mean_train_loss.update(train_loss, 1)
        mean_test_acc.update(test_acc_list[-1], 1)
        mean_test_loss.update(test_loss_list[-1], 1)

    # meta-learning
    for key in weight_dict_list[0].keys():
        for model_id in range(1, len(weight_dict_list)):
            weight_dict_list[0][key].add_(weight_dict_list[model_id][key])
        weight_dict_list[0][key].mul_(args.global_lr / len(clientIDs)).add_(
            (1 - args.global_lr) * initial_weights[key])

    return weight_dict_list[
        0], mean_train_acc.avg, mean_train_loss.avg, mean_test_acc.avg, mean_test_loss.avg
示例#7
0
def aggregation(data_train, data_test, args, clientIDs, model, optimizer):
    mean_train_acc, mean_train_loss = AvgrageMeter(), AvgrageMeter()
    mean_test_acc, mean_test_loss = AvgrageMeter(), AvgrageMeter()

    initial_weights = copy.deepcopy(model.state_dict())

    num_examples = []
    weight_dict_list = []

    for clientID in clientIDs:
        model.load_state_dict(initial_weights)
        model, train_acc, train_loss, test_acc_list, test_loss_list = client_update(
            data_train[clientID], data_test[clientID], args, model, optimizer,
            args.train_epochs)
        num_examples.append(len(data_train[clientID]))
        # load state_dict for each client
        weight_dict_list.append(copy.deepcopy(model.state_dict()))
        mean_train_acc.update(train_acc, 1)
        mean_train_loss.update(train_loss, 1)
        mean_test_acc.update(test_acc_list[-1], 1)
        mean_test_loss.update(test_loss_list[-1], 1)

    # fedAveraging
    for key in weight_dict_list[0].keys():
        weight_dict_list[0][key] *= num_examples[0]
        for model_id in range(1, len(weight_dict_list)):
            weight_dict_list[0][key].add_(weight_dict_list[model_id][key] *
                                          num_examples[model_id])
        weight_dict_list[0][key].div_(np.sum(num_examples))

    return weight_dict_list[
        0], mean_train_acc.avg, mean_train_loss.avg, mean_test_acc.avg, mean_test_loss.avg
示例#8
0
    def train_fn(self, optimizer, criterion, loader, device, train=True):
        """
        Training method
        :param optimizer: optimization algorithm
        :criterion: loss function
        :param loader: data loader for either training or testing set
        :param device: torch device
        :param train: boolean to indicate if training or test set is used
        :return: (accuracy, loss) on the data
        """
        score = AvgrageMeter()
        objs = AvgrageMeter()
        self.train()

        t = tqdm(loader)
        for images, labels in t:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            logits = self(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            acc, _ = accuracy(logits, labels, topk=(1, 5))
            n = images.size(0)
            objs.update(loss.item(), n)
            score.update(acc.item(), n)

            t.set_description('(=> Training) Loss: {:.4f}'.format(objs.avg))

        return score.avg, objs.avg
示例#9
0
    def eval_fn(self, loader, device, train=False, confusion_m = False, criterion = None):
        """
        Evaluation method
        :param loader: data loader for either training or testing set
        :param device: torch device
        :param train: boolean to indicate if training or test set is used
        :return: accuracy on the data
        """
        objs = AvgrageMeter()
        score = AvgrageMeter()
        self.eval()

        t = tqdm(loader)
        with torch.no_grad():
            for images, labels in t:
                images = images.to(device)
                labels = labels.to(device)

                outputs = self(images)
                acc, _ = accuracy(outputs, labels, topk=(1, 5))
                score.update(acc.item(), images.size(0))

                if(criterion):
                  loss = criterion(outputs, labels)
                  objs.update(loss.data, images.size(0))

                if(confusion_m):
                  # Plot confusion matrix
                  plot_confusion_matrix(labels.cpu(), outputs.topk(1, 1, True, True)[1].cpu(), normalize = True, title='Confusion matrix')

                t.set_description('(=> Test) Score: {:.4f}'.format(score.avg))

        return score.avg, objs.avg
示例#10
0
def validate(model, device, args, *, all_iters=None, arch_loader=None):
    assert arch_loader is not None

    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    loss_function = args.loss_function
    val_dataloader = args.val_dataloader

    model.eval()
    # model.apply(bn_calibration_init)

    max_val_iters = 0
    t1 = time.time()

    result_dict = {}

    arch_dict = arch_loader.get_arch_dict()

    base_model = mutableResNet20(10).cuda()

    with torch.no_grad():
        for key, value in arch_dict.items():  # 每一个网络
            max_val_iters += 1
            # print('\r ', key, ' iter:', max_val_iters, end='')

            for data, target in val_dataloader:  # 过一遍数据集
                target = target.type(torch.LongTensor)
                data, target = data.to(device), target.to(device)

                output = model(data, value["arch"])

                prec1, prec5 = accuracy(output, target, topk=(1, 5))

                print("acc1: ", prec1.item())
                n = data.size(0)

                top1.update(prec1.item(), n)
                top5.update(prec5.item(), n)

            tmp_dict = {}
            tmp_dict['arch'] = value['arch']
            tmp_dict['acc'] = top1.avg

            result_dict[key] = tmp_dict

    with open("acc_result.json", "w") as f:
        json.dump(result_dict, f)
示例#11
0
def eval_one_epoch():
    model.eval()
    acc = 0.0
    map_score_list = []

    loss_absolute = AvgrageMeter()
    loss_contra = AvgrageMeter()

    for i, batch in enumerate(tqdm(val_dataloader)):
        # get the inputs
        with torch.no_grad():
            data, binary_mask, label = batch
            data, binary_mask, label = data.cuda(), binary_mask.cuda(
            ), label.cuda()

            optimizer.zero_grad()
            map_score = 0.0

            map_x, embedding, x_Block1, x_Block2, x_Block3, x_input = model(
                data)

            absolute_loss = criterion_absolute_loss(map_x, binary_mask)
            contrastive_loss = criterion_contrastive_loss(map_x, binary_mask)

            loss = absolute_loss + contrastive_loss

            n = data.size(0)
            loss_absolute.update(absolute_loss.data, n)
            loss_contra.update(contrastive_loss.data, n)

            map_score = torch.mean(map_x)

        map_score = 1.0 if map_score > 1 else map_score.item()
        map_score_list.append(map_score)

        # need another way to evaluate
        pred = 1 if map_score > 0.5 else 0
        acc += (pred == label.item())

    loss_avg = loss_absolute.avg + loss_contra.avg
    aou = metrics.roc_auc_score(labels, map_score_list)

    print(
        'epoch:%d, Eval:  Absolute_Depth_loss= %.4f, Contrastive_Depth_loss= %.4f, Total Loss: %.4f, AOU: %.4f\n'
        % (epoch + 1, loss_absolute.avg, loss_contra.avg, loss_avg, aou))

    return acc / len(val_dataset), loss_avg, aou, map_score_list
示例#12
0
    def train(self, epochs, train_dl, log_freq = 1):

        loss_avg = AvgrageMeter('loss')
        acc_avg = AvgrageMeter('acc')
        epoch_loss_avg = AvgrageMeter('epoch_loss')
        epoch_acc_avg = AvgrageMeter('epoch_acc')

        last_epoch = self.epoch + epochs
        self.logger.info(f"begin training for {epochs} epochs")
        while self.epoch < last_epoch:
            self.epoch += 1
            batch_tic = time.time()
            epoch_tic = time.time()
            self.logger.info(
                f"Start train for epoch {self.epoch}/{last_epoch}")
            for step, (X, y) in enumerate(train_dl, 1):

                # preform training step
                X = Variable(X, requires_grad = True).cuda()
                y = Variable(y,
                             requires_grad = False).cuda(non_blocking = True)
                pred, loss = self._step(X, y)

                # update status
                batch_size = y.size()[0]
                acc = torch.sum(pred == y).float() / batch_size
                loss_avg.update(loss)
                acc_avg.update(acc)
                epoch_loss_avg.update(loss)
                epoch_acc_avg.update(acc)

                # report status
                if step % log_freq is 0:
                    speed = 1.0 * (batch_size * log_freq) / (time.time() -
                                                             batch_tic)
                    self.logger.info(
                        "Epoch[%d]/[%d] Batch[%d] Speed: %.6f samples/sec %s %s"
                        % (self.epoch, last_epoch, step, speed, loss_avg,
                           acc_avg))
                    map(lambda avg: avg.reset(), [loss_avg, acc_avg])
                    batch_tic = time.time()

            self.logger.info("Epoch[%d]/[%d] Time: %.3f sec %s %s" %
                             (self.epoch, last_epoch, time.time() - epoch_tic,
                              epoch_loss_avg, epoch_acc_avg))
            self.train_acc.append(epoch_acc_avg.avg)
            self.train_loss.append(epoch_loss_avg.avg)
            map(lambda avg: avg.reset(), [epoch_loss_avg, epoch_acc_avg])
            self._post_epoch()
        checkpoint_path = os.path.join(self.model_dir,
                                       f'checkpoint_{self.epoch}.tar')
        self.save_checkpoint(checkpoint_path)
        return self.train_acc, self.train_loss
示例#13
0
def infer(model):

    test_data = dset.CIFAR10(
        root=TestConfig['data_path'],
        train=False,
        download=True,
        transform=data_transforms_cifar10(0, False),
    )

    if DEBUG:
        sampler = torch.utils.data.sampler.SubsetRandomSampler(list(
            range(256)))
        test_queue = torch.utils.data.DataLoader(
            test_data,
            sampler=sampler,
            batch_size=TestConfig['batch_size'],
            shuffle=False,
            pin_memory=True,
            num_workers=16,
        )

    else:
        test_queue = torch.utils.data.DataLoader(
            test_data,
            batch_size=TestConfig['batch_size'],
            shuffle=False,
            pin_memory=True,
            num_workers=16,
        )

    model.eval().cuda()
    acc_avg = AvgrageMeter('acc')
    for step, (X, y) in enumerate(test_queue):
        X = Variable(X, requires_grad=False).cuda()
        y = Variable(y, requires_grad=False).cuda(non_blocking=True)
        logits, _ = model(X, TestConfig['drop_path_prob'])
        pred = torch.argmax(logits, dim=1)
        acc = torch.sum(pred == y).float() / TestConfig['batch_size']
        acc_avg.update(acc)

        if step % TestConfig['log_freq'] is 0:
            print(f"test batch {step}: {acc_avg}")
    print(f"Final test: {acc_avg}")
def test(model, data_test_loader):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    criterion = torch.nn.CrossEntropyLoss().cuda()

    model.eval()
    with torch.no_grad():
        for i, (images_test, labels_test) in enumerate(data_test_loader):
            images_test, labels_test = images_test.cuda(), labels_test.cuda()
            output_test = model(images_test)
            loss_test = criterion(output_test, labels_test)
            prec_test, = accuracy(output_test, labels_test)

            n_test = images_test.size(0)
            objs.update(loss_test.item(), n_test)
            top1.update(prec_test.item(), n_test)
            if i % 50 == 0:
                print(f'Finished {i+1}/{len(data_test_loader)}')

    print(f'Avg Loss = {objs.avg}' f'Test Acc = {top1.avg}')
示例#15
0
def infer(valid_queue, model, criterion, local_rank, epoch, lr):
    loss_avg = AvgrageMeter()
    Acc_avg = AvgrageMeter()
    infer_time = AvgrageMeter()
    model.eval()
    with torch.no_grad():
        for step, (input, target) in enumerate(valid_queue):
            input = input.cuda(local_rank, non_blocking=True)
            target = target.cuda(local_rank, non_blocking=True)
            end = time.time()
            logits = model(input)
            infer_time.update(time.time() - end)
            loss = criterion(logits, target)

            accuracy = calculate_accuracy(logits, target)
            n = input.size(0)

            torch.distributed.barrier()
            reduced_loss = reduce_mean(loss, args.nprocs)
            reduced_acc = reduce_mean(accuracy, args.nprocs)
            loss_avg.update(reduced_loss.item(), n)
            Acc_avg.update(reduced_acc.item(), n)

            if step % args.report_freq == 0 and local_rank == 0:
                logging.info(
                    'epoch: %d, mini-batch: %3d, inference time: %.4f, lr = %.5f, loss_CE= %.5f, Accuracy= %.4f'
                    % (epoch + 1, step + 1, infer_time.avg, lr, loss_avg.avg,
                       Acc_avg.avg))
    return Acc_avg.avg, loss_avg.avg
示例#16
0
def validate(model, args, *, all_iters=None, arch_loader=None):
    assert arch_loader is not None

    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    loss_function = args.loss_function
    val_loader = args.val_loader

    model.eval()
    t1 = time.time()

    result_dict = {}

    arch_dict = arch_loader.get_part_dict()

    with torch.no_grad():
        for ii, (key, value) in enumerate(arch_dict.items()):
            for data, target in val_loader:
                target = target.type(torch.LongTensor)
                data, target = data.cuda(args.gpu), target.cuda(args.gpu)

                output = model(data, value["arch"])
                loss = loss_function(output, target)

                acc1, acc5 = accuracy(output, target, topk=(1, 5))
                n = data.size(0)
                objs.update(loss.item(), n)

                top1.update(acc1.item(), n)
                top5.update(acc5.item(), n)

            if ii % 5:
                logging.info("validate acc:{:.6f} iter:{}".format(
                    top1.avg / 100, ii))
                writer.add_scalar(
                    "Val/Loss", loss.item(),
                    all_iters * len(val_loader) * args.batch_size + ii)
                writer.add_scalar(
                    "Val/acc1", acc1.item(),
                    all_iters * len(val_loader) * args.batch_size + ii)
                writer.add_scalar(
                    "Val/acc5", acc5.item(),
                    all_iters * len(val_loader) * args.batch_size + ii)

            result_dict[key] = top1.avg

    logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
              'Top-1 acc = {:.6f},\t'.format(top1.avg) + \
              'Top-5 acc = {:.6f},\t'.format(top5.avg) + \
              'val_time = {:.6f}'.format(time.time() - t1)
    logging.info(logInfo)

    logging.info("RESULTS")
    for ii, (key, value) in enumerate(result_dict.items()):
        logging.info("{: ^10}  \t  {:.6f}".format(key, value))
        if ii > 10:
            break
    logging.info("E N D")
示例#17
0
def validate(model, device, args, *, all_iters=None):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    loss_function = args.loss_function
    val_dataprovider = args.val_dataprovider

    model.eval()
    max_val_iters = 250
    t1 = time.time()
    with torch.no_grad():
        for _ in range(1, max_val_iters + 1):
            data, target = val_dataprovider.next()
            target = target.type(torch.LongTensor)
            data, target = data.to(device), target.to(device)

            output = model(data)
            loss = loss_function(output, target)

            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            n = data.size(0)
            objs.update(loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)

    logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
              'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
              'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
              'val_time = {:.6f}'.format(time.time() - t1)
    logging.info(logInfo)
示例#18
0
def validate(model, device, args):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    loss_function = args.loss_function
    val_dataloader = args.val_dataloader
    L = len(val_dataloader)

    model.eval()
    with torch.no_grad():
        data_iterator = enumerate(val_dataloader)
        for _ in tqdm(range(250)):
            _, data = next(data_iterator)
            target = data[1].type(torch.LongTensor)
            data, target = data[0].to(device), target.to(device)
            output = model(data)
            loss = loss_function(output, target)
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            n = data.size(0)
            objs.update(loss.item())
            top1.update(prec1.item())
            top5.update(prec5.item())

    if args.local_rank == 0:
        logInfo = 'TEST: loss = {:.6f},\t'.format(objs.avg) + \
                  'Top-1 err = {:.6f},\t'.format(100 - top1.avg) + \
                  'Top-5 err = {:.6f},\t'.format(100 - top5.avg)
        logging.info(logInfo)
示例#19
0
def train_one_epoch():
    model.train()
    loss_absolute = AvgrageMeter()
    loss_contra = AvgrageMeter()

    trange = tqdm(train_dataloader)

    for i, batch in enumerate(trange):
        # get the inputs
        data, binary_mask, label = batch
        data, binary_mask, label = data.cuda(), binary_mask.cuda(), label.cuda(
        )

        optimizer.zero_grad()

        # forward + backward + optimize
        map_x, embedding, x_Block1, x_Block2, x_Block3, x_input = model(data)

        absolute_loss = criterion_absolute_loss(map_x, binary_mask)
        contrastive_loss = criterion_contrastive_loss(map_x, binary_mask)

        loss = absolute_loss + contrastive_loss
        loss.backward()
        optimizer.step()

        n = data.size(0)
        loss_absolute.update(absolute_loss.data, n)
        loss_contra.update(contrastive_loss.data, n)

        postfix_dict = {
            "loss_absolute": absolute_loss.item(),
            "loss_contra": contrastive_loss.item(),
            "loss": loss.item()
        }
        trange.set_postfix(**postfix_dict)

    print(
        'epoch:%d, Train:  Absolute_Depth_loss= %.4f, Contrastive_Depth_loss= %.4f\n'
        % (epoch + 1, loss_absolute.avg, loss_contra.avg))
示例#20
0
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr,
          epoch, local_rank):
    model.train()
    loss_avg = AvgrageMeter()
    arc_loss_avg = AvgrageMeter()
    Acc_avg = AvgrageMeter()
    data_time = AvgrageMeter()
    prefetcher = data_prefetcher(valid_queue)
    input_search, target_search = prefetcher.next()
    end = time.time()
    for step, (input, target) in enumerate(train_queue):
        data_time.update(time.time() - end)
        n = input.size(0)
        input, target = map(lambda x: x.cuda(local_rank, non_blocking=True),
                            [input, target])
        if epoch >= args.warmUp:
            while input_search is not None:
                arc_loss = architect.step(
                    input_search.cuda(local_rank, non_blocking=True),
                    target_search.cuda(local_rank, non_blocking=True))
                arc_loss_avg.update(arc_loss.item(), input_search.size(0))
                input_search, target_search = prefetcher.next()
                break
        optimizer.zero_grad()
        logits = model(input)
        loss = criterion(logits, target)

        loss.backward()
        nn.utils.clip_grad_norm_(model.module.parameters(), args.grad_clip)
        optimizer.step()

        accuracy = calculate_accuracy(logits, target)

        torch.distributed.barrier()
        reduced_loss = reduce_mean(loss, args.nprocs)
        reduced_acc = reduce_mean(accuracy, args.nprocs)

        loss_avg.update(reduced_loss.item(), n)
        Acc_avg.update(reduced_acc.item(), n)

        if step % args.report_freq == 0 and local_rank == 0:
            logging.info(
                'epoch:%d, mini-batch:%3d, data time: %.5f, lr = %.5f, loss_CE = %.5f, loss_ARC = %.5f, Accuracy = %.4f'
                % (epoch + 1, step + 1, data_time.avg, lr, loss_avg.avg,
                   arc_loss_avg.avg, Acc_avg.avg))
        end = time.time()
    return Acc_avg.avg, loss_avg.avg
示例#21
0
    def eval_fn(self, loader, device, train=False):
        """
        Evaluation method
        :param loader: data loader for either training or testing set
        :param device: torch device
        :param train: boolean to indicate if training or test set is used
        :return: accuracy on the data
        """
        score = AvgrageMeter()
        self.eval()

        t = tqdm(loader)
        with torch.no_grad():  # no gradient needed
            for images, labels in t:
                images = images.to(device)
                labels = labels.to(device)

                outputs = self(images)
                acc, _ = accuracy(outputs, labels, topk=(1, 5))
                score.update(acc.item(), images.size(0))

                t.set_description('(=> Test) Score: {:.4f}'.format(score.avg))

        return score.avg
示例#22
0
    def train_fn(self, optimizer, criterion, loader, device, train=True):
        score = AvgrageMeter()
        objs = AvgrageMeter()
        self.train()

        for step, (images, labels) in enumerate(loader):
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            logits = self(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()

            acc, _ = accuracy(logits, labels, topk=(1, 5))
            n = images.size(0)
            objs.update(loss.item(), n)
            score.update(acc.item(), n)

            if step % self.report_freq == 0:
                logging.info('Training | step: %d | loss: %e | accuracy: %f' % (step, objs.avg, score.avg))

        return score.avg, objs.avg
def train_teacher(teacher, data_train_loader, data_test_loader, optimizer,
                  num_epochs):
    """ train a teacher model on a specified dataset
    """
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    criterion = torch.nn.CrossEntropyLoss().cuda()

    for epoch in range(num_epochs):
        # train
        teacher.train()
        for i, (images, labels) in enumerate(data_train_loader):
            images, labels = images.cuda(), labels.cuda()
            optimizer.zero_grad()
            output = teacher(images)
            loss = criterion(output, labels)

            loss.backward()
            prec, = accuracy(output, labels)
            optimizer.step()
            n = images.size(0)
            objs.update(loss.item(), n)
            top1.update(prec.item(), n)

            if i % 50 == 0:
                print(f'Epoch {epoch}/{num_epochs}, Batch {i*50}; '\
                      f'loss = {objs.avg}, acc = {top1.avg}')
        # test
        objs.reset()
        top1.reset()
        teacher.eval()

        with torch.no_grad():
            for images_test, labels_test in data_test_loader:
                images_test, labels_test = images_test.cuda(
                ), labels_test.cuda()
                output_test = teacher(images_test)
                loss_test = criterion(output_test, labels_test)
                prec_test, = accuracy(output_test, labels_test)

                n_test = images_test.size(0)
                objs.update(loss_test.item(), n_test)
                top1.update(prec_test.item(), n_test)

        print(f'Epoch {epoch}/{num_epochs}; Test Acc = {top1.avg}')
示例#24
0
def infer(args, epoch, valid_queue, net, criterion, mode='val', record_file=None):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()
    net.eval()

    test_loss = 0.0  # cost function error
    correct = 0.0

    for step, (images, labels, index) in enumerate(valid_queue):
        images = Variable(images)
        labels = Variable(labels)

        images = images.cuda()
        labels = labels.cuda()

        outputs = net(images)
        # loss = loss_function(outputs, labels)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        prec1, prec5 = accuracy(outputs, labels, topk=(1, 5))
        n = images.shape[0]
        objs.update(loss.item(), n)
        top1.update(prec1.item(), n)
        top5.update(prec5.item(), n)
    if mode == 'val':
        print('Valid: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}'.format(epoch, objs.avg,
                                                                              top1.avg,
                                                                              top5.avg))
        if record_file is not None:
            with open(record_file, 'a') as f:
                f.write('Valid: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}\n'.format(epoch, objs.avg,
                                                                                          top1.avg,
                                                                                          top5.avg))
    else:
        print('Test: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}'.format(epoch, objs.avg,
                                                                             top1.avg,
                                                                             top5.avg))
        if record_file is not None:
            with open(record_file, 'a') as f:
                f.write('Test: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}\n'.format(epoch, objs.avg,
                                                                                         top1.avg,
                                                                                         top5.avg))

    # print()

    # add informations to tensorboard
    # writer.add_scalar('Test/Average loss', test_loss / len(cifar100_test_loader.dataset), epoch)
    # writer.add_scalar('Test/Accuracy', correct.float() / len(cifar100_test_loader.dataset), epoch)

    return top1.avg, top5.avg, objs.avg
示例#25
0
def infer(args,
          epoch,
          valid_queue,
          net,
          criterion,
          mode='val',
          record_file=None):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()
    net.eval()

    test_loss = 0.0  # cost function error
    correct = 0.0

    for step, (images, labels, index) in enumerate(valid_queue):
        images = Variable(images)
        labels = Variable(labels)

        images = images.cuda()
        labels = labels.cuda()
        with torch.no_grad():
            outputs = net(images)
            # loss = loss_function(outputs, labels)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

        prec1, prec5 = accuracy(outputs, labels, topk=(1, 5))
        n = images.shape[0]
        objs.update(loss.item(), n)
        top1.update(prec1.item(), n)
        top5.update(prec5.item(), n)
    if mode == 'val':
        print('Valid: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}'.format(
            epoch, objs.avg, top1.avg, top5.avg))
        if record_file is not None:
            with open(record_file, 'a') as f:
                f.write(
                    'Valid: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}\n'.
                    format(epoch, objs.avg, top1.avg, top5.avg))
    else:
        print('Test: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}'.format(
            epoch, objs.avg, top1.avg, top5.avg))
        if record_file is not None:
            with open(record_file, 'a') as f:
                f.write(
                    'Test: epoch {}, loss {:.3f}, top1 {:.3f}, top5 {:.3f}\n'.
                    format(epoch, objs.avg, top1.avg, top5.avg))

    return top1.avg, top5.avg, objs.avg
示例#26
0
def validate(model, device, args, *, all_iters=None, arch_loader=None):
    assert arch_loader is not None

    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    loss_function = args.loss_function
    # val_dataprovider = args.val_dataprovider
    val_loader = args.val_loader

    model.eval()
    max_val_iters = 250
    t1 = time.time()

    result_dict = {}

    arch_dict = arch_loader.get_arch_dict()[:100]  # 为了速度暂且测评前100个

    with torch.no_grad():
        for key, value in arch_dict.items():
            for _ in range(1, max_val_iters + 1):
                # data, target = val_dataprovider.next()
                for data, target in val_loader:
                    target = target.type(torch.LongTensor)
                    data, target = data.to(device), target.to(device)

                    output = model(data, value["arch"])
                    loss = loss_function(output, target)

                    prec1, prec5 = accuracy(output, target, topk=(1, 5))
                    n = data.size(0)
                    objs.update(loss.item(), n)
                    top1.update(prec1.item(), n)
                    top5.update(prec5.item(), n)

            result_dict[key] = top1.avg / 100

    # logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
    #           'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
    #           'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
    #           'val_time = {:.6f}'.format(time.time() - t1)
    # logging.info(logInfo)

    print("=" * 50, "RESULTS", "=" * 50)
    for key, value in result_dict:
        print(key, "\t", value)
    print("=" * 50, "E N D", "=" * 50)
示例#27
0
def validate(model, device, args, *, all_iters=None, arch_loader=None):
    assert arch_loader is not None

    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    loss_function = args.loss_function
    val_dataloader = args.val_dataloader

    model.eval()
    model.apply(bn_calibration_init)

    max_val_iters = 25
    t1 = time.time()

    result_dict = {}

    arch_dict = arch_loader.get_arch_dict()

    with torch.no_grad():
        for key, value in arch_dict.items():  # 每一个网络
            max_val_iters -= 1
            print('\r ', key, ' iter:', max_val_iters, end='')
            if max_val_iters == 0:
                break
            for data, target in val_dataloader:  # 过一遍数据集
                target = target.type(torch.LongTensor)
                data, target = data.to(device), target.to(device)

                output = model(data, value["arch"])
                loss = loss_function(output, target)

                prec1, prec5 = accuracy(output, target, topk=(1, 5))
                n = data.size(0)

                objs.update(loss.item(), n)
                top1.update(prec1.item(), n)
                top5.update(prec5.item(), n)

            result_dict[key] = top1.avg / 100

    print('\n', "=" * 10, "RESULTS", "=" * 10)
    for key, value in result_dict.items():
        print(key, "\t", value)
    print("=" * 10, "E N D", "=" * 10)
示例#28
0
def validate(model, device, args, *, all_iters=None):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    get_random_cand = lambda: tuple(np.random.randint(4) for i in range(20))
    flops_l, flops_r, flops_step = 290, 360, 10
    bins = [[i, i + flops_step] for i in range(flops_l, flops_r, flops_step)]

    def get_uniform_sample_cand(*, timeout=500):
        idx = np.random.randint(len(bins))
        l, r = bins[idx]
        for i in range(timeout):
            cand = get_random_cand()
            if l * 1e6 <= get_cand_flops(cand) <= r * 1e6:
                return cand
        return get_random_cand()

    loss_function = args.loss_function
    val_dataprovider = args.val_dataprovider
    cand = get_uniform_sample_cand()

    model.eval()
    max_val_iters = 250
    t1 = time.time()
    with torch.no_grad():
        for _ in range(1, max_val_iters + 1):
            data, target = val_dataprovider.next()
            target = target.type(torch.LongTensor)
            data, target = data.to(device), target.to(device)

            output = model(data, cand)
            loss = loss_function(output, target)

            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            n = data.size(0)
            objs.update(loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)

    logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
              'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
              'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
              'val_time = {:.6f}'.format(time.time() - t1)
    logging.info(logInfo)
示例#29
0
def validate(model, device, args, *, all_iters=None):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    loss_function = args.loss_function
    val_dataprovider = args.val_dataprovider

    trained_group = args.evo_controller.trained_group

    t1 = time.time()
    with torch.no_grad():
        for i in range(len(trained_group)):
            data, target = val_dataprovider.next()
            target = target.type(torch.LongTensor)
            data, target = data.to(device), target.to(device)

            structure_father = trained_group[i]
            structure = structure_father.structure

            output = model(data, structure)
            loss = loss_function(output, target)

            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            n = data.size(0)
            objs.update(loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)
            if structure_father.count == 0:
                structure_father.loss = float(loss.item()) + 1e-4  #initial
            else:
                structure_father.loss = (float(loss.item())) * (
                    1 - args.evo_controller.momentum
                ) + structure_father.loss * args.evo_controller.momentum

            structure_father.count += 1

    logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
              'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
              'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
              'val_time = {:.6f}'.format(time.time() - t1)
    logging.info(logInfo)
示例#30
0
def validate(model, device, args, *, all_iters=None, architecture=None):
    objs = AvgrageMeter()
    top1 = AvgrageMeter()
    top5 = AvgrageMeter()

    loss_function = args.loss_function
    val_dataprovider = args.val_dataprovider

    model.eval()
    max_val_iters = int(args.test_interval / args.batch_size)
    t1  = time.time()
    with torch.no_grad():
        for _ in range(1, max_val_iters + 1):
            data, target = val_dataprovider.next()
            target = target.type(torch.LongTensor)
            data, target = data.to(device), target.to(device)

            if args.block == 4:
                # cifar_architecture = [0, 0, 0, 0, 0]
                batch = {'input': data, 'target': target}
                states = model(batch)
                output = states['logits']
                # output = model(data, cifar_architecture)
            elif args.block == 3:
                output = model(data, architecture)

            loss = loss_function(output, target)

            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            n = data.size(0)
            objs.update(loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)

    logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \
              'Top-1 err = {:.6f},\t'.format(1 - top1.avg / 100) + \
              'Top-5 err = {:.6f},\t'.format(1 - top5.avg / 100) + \
              'val_time = {:.6f}'.format(time.time() - t1)
    logging.info(logInfo)
    top1_acc = top1.avg / 100
    return top1_acc