def train(self, epoch):
        self.model.train()

        train_loss = AverageMeter()
        train_acc = AccuracyMeter()

        for i, (x, y) in enumerate(self.train_loader):
            x = Variable(x)
            y = Variable(y)
            if self.use_cuda:
                x = x.cuda()
                y = y.cuda()
            output = self.model(x)
            loss = F.cross_entropy(output, y)

            self.optimizer.zero_grad()
            loss.backward()
            clip_grad_norm(self.optimizer, max_norm=1)  #防止梯度爆炸
            self.optimizer.step()

            train_loss.update(float(loss.data), x.size(0))

            y_pred = output.data.max(dim=1)[1]

            #correct = int(y_pred.eq(y.data).cpu().sum())
            _, correct, _ = get_accuracy(y.data, y_pred)
            train_acc.update(correct, x.size(0))
            if i % 100 == 0:
                print(
                    '\nTrain Epoch/batch| [{}/{}]: Average batch loss:{:.6f},acc: {:.6f}\n'
                    .format(epoch, i, train_loss.average, train_acc.accuracy))

        #save_model_checkpoint(self.model,epoch,self.save)
        return train_loss.average, train_acc.accuracy
示例#2
0
def train(model, data_loader, optimizer, criterion, device):
    """

    :param model:
    :param criterion:
    :param device:
    :return:
    """
    model.train()

    losses = AverageMeter()

    pbar = tqdm(data_loader)
    for image, target in pbar:
        image, target = image.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(image)

        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        losses.update(loss.item(), image.size(0))

        pbar.set_description('\ttrain => loss {:.4f}'.format(losses.avg),
                             refresh=True)

    return losses.avg
示例#3
0
def train(epoch):
    losses = AverageMeter()
    # switch to train mode
    model.train()
    if args.distribute:
        train_sampler.set_epoch(epoch)
    correct = 0
    preds = []
    train_labels = []
    for i, (image, label) in enumerate(train_loader):
        rate = get_learning_rate(optimizer)
        image, label = image.cuda(), label.cuda()

        output = model(image)
        loss = criterion(output, label)

        optimizer.zero_grad()

        loss.backward()
        optimizer.step()

        # measure accuracy and record loss
        losses.update(loss.item(), image.size(0))
        if i % args.print_freq == 0 or i == len(train_loader) - 1:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Rate:{rate}\t'
                  'Loss {loss.val:.5f} ({loss.avg:.5f})\t'.format(
                      epoch, i, len(train_loader), rate=rate, loss=losses))

    return
示例#4
0
    def validation(self, dataloader):
        # This function doesn't distinguish tasks.
        batch_timer = Timer()
        val_acc = AverageMeter()
        losses = AverageMeter()
        batch_timer.tic()

        # self.hypermodel.eval()
        self.model.eval()

        for i, (inputs, target, task) in enumerate(dataloader):

            if self.config['gpu']:
                with torch.no_grad():
                    inputs = inputs.cuda()
                    target = target.cuda()

                    output = self.model.forward(inputs)
                    loss = self.criterion(output,
                                          target,
                                          task,
                                          regularization=False)
            losses.update(loss, inputs.size(0))
            for t in output.keys():
                output[t] = output[t].detach()
            # Summarize the performance of all tasks, or 1 task, depends on dataloader.
            # Calculated by total number of data.
            val_acc = accumulate_acc(output, target, task, val_acc)

        self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'.format(
            acc=val_acc, time=batch_timer.toc()))
        self.log(' * Val loss {loss.avg:.3f}, Total time {time:.2f}'.format(
            loss=losses, time=batch_timer.toc()))
        return val_acc.avg
示例#5
0
def validate():
    losses = AverageMeter()
    val_acc1 = AverageMeter()
    # switch to evaluate mode
    model.eval()

    for i, (image, label) in enumerate(val_loader):
        with torch.no_grad():
            image, label = image.cuda(), label.cuda()
            # compute output
            output = model(image)
            loss = criterion(output, label)

            # statistics
            val_acc = accuracy(output, label)
            val_acc1.update(val_acc.item(), image.size(0))
            losses.update(loss.item(), image.size(0))
            if i % args.print_freq == 0 or i == len(val_loader) - 1:
                print('[TEST]: {0}/{1}\tLoss {loss.val:.5f} ({loss.avg:.5f})'.
                      format(i, len(val_loader), loss=losses))

    if args.distribute:
        # Horovod: average metric values across workers.
        val_acc1.avg = metric_average(val_acc1.avg, 'val_acc')
        losses.vag = metric_average(losses.avg, 'losses.avg')

    return val_acc1.avg, losses.avg
def test(data_loader, network, args):
    batch_time = AverageMeter()

    # switch to evaluate mode
    network.eval()
    max_size = 64 * len(data_loader)
    images_bank = torch.zeros((max_size, args.feature_size)).cuda()
    text_bank = torch.zeros((max_size,args.feature_size)).cuda()
    labels_bank = torch.zeros(max_size).cuda()
    index = 0
    with torch.no_grad():
        end = time.time()
        for images, captions, labels, captions_length in data_loader:
            images = images.cuda()
            captions = captions.cuda()

            interval = images.shape[0]
            image_embeddings, text_embeddings = network(images, captions, captions_length)
            images_bank[index: index + interval] = image_embeddings
            text_bank[index: index + interval] = text_embeddings
            labels_bank[index:index + interval] = labels
            batch_time.update(time.time() - end)
            end = time.time()
            
            index = index + interval
        
        images_bank = images_bank[:index]
        text_bank = text_bank[:index]
        labels_bank = labels_bank[:index]
        #[ac_top1_t2i, ac_top10_t2i] = compute_topk(text_bank, images_bank, labels_bank, labels_bank, [1,10])
        #[ac_top1_i2t, ac_top10_i2t] = compute_topk(images_bank, text_bank, labels_bank, labels_bank, [1,10])
        ac_top1_i2t, ac_top10_i2t, ac_top1_t2i, ac_top10_t2i = compute_topk(images_bank, text_bank, labels_bank, labels_bank, [1,10], True)
        return ac_top1_i2t, ac_top10_i2t, ac_top1_t2i, ac_top10_t2i, batch_time.avg
示例#7
0
    def test(self, epoch):
        self.model.eval()
        top1 = AverageMeter()
        all_result = []
        for batch_idx, data in enumerate(self.test_dataloader):
            images, labels, images_path = data['images'], data['labels'], data[
                'images_path']
            if self.use_cuda:
                images, labels = images.cuda(), labels.cuda()
            outputs = self.model(images)
            prec1 = accuracy(outputs.data, labels.data, topk=(1, ))
            top1.update(prec1[0].detach().item(), images.size(0))
            self.writer.add_scalar('test/acc', top1.val, self.iters)

            if self.args.is_save:
                probs, preds = outputs.softmax(dim=1).max(dim=1)
                probs, preds = probs.view(-1), preds.view(-1)
                for idx in range(images.size(0)):
                    result = '{}\t{}\t{}\t{}\n'.format(images_path[idx],
                                                       labels[idx].item(),
                                                       preds[idx].item(),
                                                       probs[idx].item())
                    all_result.append(result)
        if self.args.is_save:
            with open('result.txt', 'w') as f:
                f.writelines(all_result)
        self.acc = top1.avg
        print('Test epoch:{}, acc:{}'.format(epoch, top1.avg))
示例#8
0
def eval(model, data_loader, criterion, device):
    """

    :param model:
    :param data_loader:
    :param criterion:
    :param device:
    :return:
    """
    model.eval()

    losses = AverageMeter()
    val_iou = []
    pbar = tqdm(data_loader)
    with torch.no_grad():
        for image, target in pbar:
            image, target = image.to(device), target.to(device)
            output = model(image)

            # calculate loss
            loss = criterion(output, target)
            # calculate iou
            pred = output.argmax(1)
            iou = get_iou(pred, target)
            val_iou.append(iou)

            losses.update(loss.item(), image.size(0))

            pbar.set_description('eval loss {0}'.format(loss.item()),
                                 refresh=True)

    pbar.write('\teval => loss {:.4f}'.format(losses.avg))

    return losses.avg, val_iou
示例#9
0
    def validation(self, dataloader):
        # This function doesn't distinguish tasks.
        batch_timer = Timer()
        acc = AverageMeter()
        batch_timer.tic()

        orig_mode = self.training
        self.eval()
        for i, (input, target, task) in enumerate(dataloader):

            if self.gpu:
                with torch.no_grad():
                    input = input.cuda()
                    target = target.cuda()
            output = self.predict(input)

            # Summarize the performance of all tasks, or 1 task, depends on dataloader.
            # Calculated by total number of data.
            acc = accumulate_acc(output, target, task, acc)

        self.train(orig_mode)

        self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'
              .format(acc=acc,time=batch_timer.toc()))
        return acc.avg
示例#10
0
    def validation(self, dataloader, task_n=''):
        # this might possibly change for other incremental scenario
        # This function doesn't distinguish tasks.
        batch_timer = Timer()
        acc = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()

        batch_timer.tic()

        orig_mode = self.training
        self.eval()
        for i, (input, target, task) in enumerate(dataloader):

            if self.gpu:
                with torch.no_grad():
                    input = input.cuda()
                    target = target.cuda()

            output = self.predict(input, task_n)
            loss = self.criterion(output, target, task)
            losses.update(loss, input.size(0))
            # Summarize the performance of all tasks, or 1 task, depends on dataloader.
            # Calculated by total number of data.
            acc = accumulate_acc(output, target, task, acc)

        self.train(orig_mode)

        self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'.format(
            acc=acc, time=batch_timer.toc()))
        return acc, losses
示例#11
0
    def validate(self):
        self.model.eval()

        valid_loss = AverageMeter()
        valid_acc = AccuracyMeter()

        for i, (x, y) in enumerate(self.valid_loader):
            x = Variable(x, volatile=True)
            y = Variable(y).long()
            if self.use_cuda:
                x = x.cuda()
                y = y.cuda()
            output = self.model(x)
            loss = F.cross_entropy(output, y)

            valid_loss.update(float(loss.data))

            y_pred = output.data.max(dim=1)[1]
            correct = int(y_pred.eq(y.data).cpu().sum())
            valid_acc.update(correct, x.size(0))
        print('\nTrain Epoch [{}]: Average batch loss: {:.6f}\n'.format(epoch,valid_acc.accuracy))
        return valid_loss.average, valid_acc.accuracy
示例#12
0
def eval_accuracies(hypothesis_list, reference_list, mode='valid'):
    """An unofficial evalutation helper.
     Arguments:
        hypothese_list: A mapping from instance id to predicted sequences.
        reference_list: A mapping from instance id to ground truth sequences.
        copy_info: Map of id --> copy information.
        sources: Map of id --> input text sequence.
        filename:
        print_copy_info:
    """
    assert (sorted(reference_list.keys()) == sorted(hypothesis_list.keys()))
    # Compute BLEU
    _, bleu, ind_bleu = google_bleu.corpus_bleu(reference_list,
                                                hypothesis_list)

    # Compute ROGUE
    rouge_l, ind_rogue = Rouge().compute_score(reference_list, hypothesis_list)

    # Compute METEOR
    if mode == 'test':
        meteor, _ = Meteor().compute_score(reference_list, hypothesis_list)
    else:
        meteor = 0

    # Compute F1, Precision, Recall
    f1, precision, recall = AverageMeter(), AverageMeter(), AverageMeter()
    """
    hypothesis_list example
    {
        0: ['the the the given fo...e the </s>'], 
        1: ['return the first for...e the </s>'], 
        2: ['setup the given for ...e the </s>'], 
        3: ['expand the given for...e the </s>'], 
        4: ['test that the given ...e the </s>'], 
        5: ['attach the given for...e the </s>'], 
        6: ['add the given for th...e the </s>'], 
        7: ['guess the given for ...e the </s>'], 
        8: ['return the given for...e the </s>'], 
        9: ['open the filepath fo...e the </s>'], 
        10: ['open the pathname fo...e the </s>'], 
        11: ['delete the given for...e the </s>'], 
        12: ['get the given for th...e the </s>'],
        13: ['write the given for ...e the </s>'], ...
    }
    """
    for key in reference_list.keys():
        _precision, _recall, _f1 = F1().compute_eval_score(
            hypothesis_list[key][0], reference_list[key])
        # update() - updates the dictionary with the element with other
        precision.update(_precision)
        recall.update(_recall)
        f1.update(_f1)

    return bleu, rouge_l, meteor, precision.avg, recall.avg, f1.avg
示例#13
0
    def learn_batch(self,
                    train_loader,
                    val_loader=None,
                    curr_global_decoder=None,
                    local_vae=None,
                    class_table=None,
                    global_classes_list=None,
                    task_id=None,
                    n_codes=None,
                    global_n_codes=None,
                    new_task_data_processing='original'):
        if self.reset_optimizer:  # Reset optimizer before learning each task
            self.log('Optimizer is reset!')
            self.init_optimizer()

        print("Classifier: learning new task in '{}' new data processing mode".
              format(new_task_data_processing))

        if new_task_data_processing == 'original':
            process_through_local_vae = False
            train_only_on_generated_data = False
        elif new_task_data_processing == 'original_through_vae':
            process_through_local_vae = True
            train_only_on_generated_data = False
        elif new_task_data_processing == 'generated':
            process_through_local_vae = False
            train_only_on_generated_data = True
        else:
            raise ValueError(
                "'new_task_data_processing' argument is invalid: '{}'. "
                "Valid values are: 'original', 'original_through_vae', 'generated."
            )

        if self.score_generated_images_by_freezed_classifier:
            frozen_model = copy.deepcopy(self.model)
            frozen_model.eval()

        train_accs = []
        val_accs = []

        for epoch in range(self.config['base_schedule'][-1]):
            data_timer = Timer()
            batch_timer = Timer()
            batch_time = AverageMeter()
            data_time = AverageMeter()
            losses = AverageMeter()
            acc = AverageMeter()

            # Config the model and optimizer
            self.log('Epoch:{0}'.format(epoch))
            self.model.train()
            for param_group in self.optimizer.param_groups:
                self.log('LR:', param_group['lr'])

            # Learning with mini-batch
            data_timer.tic()
            batch_timer.tic()
            self.log('Itr\t\t  Time\t\t\t  Data\t\t\t  Loss\t\t\t  Acc')

            current_start = 0

            if train_only_on_generated_data:
                n_tasks_to_generate = task_id + 1
            else:
                n_tasks_to_generate = task_id

            if not train_only_on_generated_data and (task_id == 0):
                starting_points_fixed = np.array([[0]])
            else:
                starting_points = []
                for prev_task_id in range(n_tasks_to_generate):
                    starting_points.append(
                        np.random.permutation(
                            np.array(
                                range(
                                    math.ceil(global_n_codes[prev_task_id] /
                                              train_loader.batch_size)))))
                max_len = max([len(repeats) for repeats in starting_points])
                starting_points_fixed = []
                for points in starting_points:
                    starting_points_fixed.append(
                        np.pad(points, [0, max_len - len(points)],
                               mode="reflect"))
                starting_points_fixed = np.array(starting_points_fixed)

            for i, (orig_input, orig_target,
                    orig_task) in enumerate(train_loader):

                data_time.update(data_timer.toc())  # measure data loading time

                batch_size = len(orig_task)

                # generate data so every task is equally represented
                with torch.no_grad():
                    if process_through_local_vae:
                        orig_input, orig_target, _ = vae_utils.generate_current_data(
                            local_vae.decoder, task_id, batch_size,
                            current_start, global_classes_list, n_codes,
                            global_n_codes)

                    generate_impl = vae_utils.generate_previous_data

                    if train_only_on_generated_data:
                        # generate data from previous tasks and the current one
                        generate_impl = vae_utils.generate_previous_and_current_data
                        # clear original data
                        orig_input, orig_target = torch.Tensor(), torch.Tensor(
                        )

                    if train_only_on_generated_data or (task_id > 0):
                        gen_input, gen_target_orig, _ = generate_impl(
                            curr_global_decoder, task_id, batch_size,
                            starting_points_fixed[:, current_start] *
                            batch_size, global_classes_list, n_codes,
                            global_n_codes)
                        current_start += 1
                    else:
                        gen_input = torch.Tensor()
                        gen_target_orig = torch.Tensor()

                    if self.score_generated_images_by_freezed_classifier:
                        if task_id > 0:
                            gen_target = frozen_model.forward(
                                gen_input[:-batch_size])
                            gen_target = gen_target['All']
                            gen_target = F.softmax(gen_target, 1)

                            if train_only_on_generated_data:
                                targets_orig = self.one_hot_targets(
                                    gen_target_orig[-batch_size:]).to(
                                        local_vae.device)
                                gen_target = torch.cat(
                                    [gen_target, targets_orig])
                            else:
                                targets_orig = self.one_hot_targets(
                                    orig_target).to(local_vae.device)
                                gen_target = torch.cat(
                                    [gen_target, targets_orig])
                        else:
                            gen_target = gen_target_orig
                            gen_target = self.one_hot_targets(
                                gen_target, self.model.n_classes)
                    else:
                        gen_target = self.one_hot_targets(
                            gen_target, self.model.n_classes)

                orig_target = self.one_hot_targets(orig_target,
                                                   self.model.n_classes)
                if self.gpu:
                    orig_input = orig_input.cuda()
                    orig_target = orig_target.cuda()
                    gen_input = gen_input.cuda()
                    gen_target = gen_target.cuda()

                # merge original and generated data
                multi_input = torch.cat((orig_input, gen_input), 0)
                multi_target = torch.cat((orig_target, gen_target), 0)

                # zip and shuffle
                multibatch = list(zip(multi_input, multi_target))
                random.shuffle(multibatch)

                # iterate over batches in multibatch
                multibatch_parted = zip(*(iter(multibatch), ) * batch_size)
                for part in multibatch_parted:
                    input, target = zip(*part)

                    # convert tuples of tensors into one tensor
                    input = torch.stack(input)
                    target = torch.stack(target)

                    loss, output = self.update_model(input, target, None)
                    input = input.detach()
                    target = target.detach()

                    # measure accuracy and record loss
                    acc = accumulate_acc(output, target, None, acc)
                    losses.update(loss, input.size(0))

                    batch_time.update(
                        batch_timer.toc())  # measure elapsed time
                    data_timer.toc()

                    if ((self.config['base_print_freq'] > 0) and
                        (i % self.config['base_print_freq']
                         == 0)) or (i + 1) == len(train_loader):
                        self.log(
                            '[{0}/{1}]\t'
                            '{batch_time.val:.4f} ({batch_time.avg:.4f})\t'
                            '{data_time.val:.4f} ({data_time.avg:.4f})\t'
                            '{loss.val:.3f} ({loss.avg:.3f})\t'
                            '{acc.val:.2f} ({acc.avg:.2f})'.format(
                                i,
                                len(train_loader),
                                batch_time=batch_time,
                                data_time=data_time,
                                loss=losses,
                                acc=acc))

            train_accs.append(acc.avg)

            self.log(
                ' * Train on {} original batches, Acc {acc.avg:.3f}'.format(
                    len(train_loader), acc=acc))

            # Evaluate the performance of current task
            if val_loader != None:
                val_accs.append(self.validation(val_loader))

        print("All epochs ended")
    def train_(self, epochs, finetune=False):
        str_ = 'pretrain'
        self.str_ = str_

        if finetune:
            self.switch_finetune()
            str_ = 'finetune'
            self.str_ = str_

        for epoch in range(epochs):

            data_timer = Timer()
            batch_timer = Timer()
            batch_time = AverageMeter()
            data_time = AverageMeter()
            losses = AverageMeter()
            acc = AverageMeter()
            self.model.train()
            self.scheduler.step(epoch)
            if self.config['train_between']:
                if epoch == self.config['schedule'][0]:
                    for param in self.model.parameters():
                        param.requires_grad = True
                    #self.config['lr'] = 0.01
                    self.config['weight_decay'] = 5e-4
                    self.init_optimizer()
                if self.config['switch_all']:
                    if epoch == self.config['switch_all']:
                        self.config['weight_decay'] = 5e-3
                        for param in self.model.parameters():
                            param.requires_grad = True
                        self.init_optimizer()
                    #self.config['lr'] = 0.01

            for param_group in self.optimizer.param_groups:
                self.log('LR:', param_group['lr'])

            self.log('Itr\t\tTime\t\t  Data\t\t  Loss\t\tAcc')
            self.log('{0} Epoch:{1}'.format(str_, epoch))

            data_timer.tic()
            batch_timer.tic()

            for i, (input, target) in enumerate(self.train_loader):
                self.model.train()
                data_time.update(data_timer.toc())  # measure data loading time

                if self.gpu:
                    input = input.cuda()
                    target = target.cuda()

                loss, output = self.update_model(input, target)
                input = input.detach()
                target = target.detach()

                # measure accuracy and record loss
                acc = self.accumulate_acc(output, target, acc)
                losses.update(loss, input.size(0))
                batch_time.update(batch_timer.toc())  # measure elapsed time
                data_timer.toc()
                self.n_iter = (epoch) * len(self.train_loader) + i
                self.writer.add_scalar(str_ + '/Loss_train', losses.avg,
                                       self.n_iter)
                self.writer.add_scalar(str_ + '/Acc_train', acc.avg,
                                       self.n_iter)
                # if ((self.config['print_freq']>0) and (i % self.config['print_freq'] == 0)) or (i+1)==len(train_loader):
                self.log('[{0}/{1}]\t'
                         '{batch_time.val:.4f} ({batch_time.avg:.4f})\t'
                         '{data_time.val:.4f} ({data_time.avg:.4f})\t'
                         '{loss.val:.3f} ({loss.avg:.3f})\t'
                         '{acc.val:.2f} ({acc.avg:.2f})'.format(
                             i,
                             len(self.train_loader),
                             batch_time=batch_time,
                             data_time=data_time,
                             loss=losses,
                             acc=acc))

            acc_v, loss_v = self.validation(self.test_loader)
            self.writer.add_scalar(str_ + '/Loss_test', loss_v.avg,
                                   self.n_iter)
            self.writer.add_scalar(str_ + '/Acc_test', acc_v.avg, self.n_iter)

            if epoch % self.save_after == 0 and epoch != 0:
                self.save_model(str_ + str(epoch))
    def validation(self, test_loader, from_train=1):
        # this might possibly change for other incremental scenario
        # This function doesn't distinguish tasks.
        batch_timer = Timer()
        acc = AverageMeter()
        losses = AverageMeter()
        acc_5 = AverageMeter()
        acc_class = [
            AverageMeter()
            for i in range(len(self.train_loader.dataset.class_list))
        ]  #[AverageMeter()] *  len(self.train_loader.dataset.class_list)
        acc_class_5 = [
            AverageMeter()
            for i in range(len(self.train_loader.dataset.class_list))
        ]
        batch_timer.tic()
        orig_mode = self.training
        self.eval()
        for i, (input, target) in enumerate(test_loader):

            if self.gpu:
                with torch.no_grad():
                    input = input.cuda()
                    target = target.cuda()
                    output = self.forward(input)
                    loss = self.criterion(output, target)

            losses.update(loss, input.size(0))
            # Summarize the performance of all tasks, or 1 task, depends on dataloader.
            # Calculated by total number of data.

            t_acc, acc_class = accuracy(
                output, target, topk=(1, ), avg_meters=acc_class
            )  #self.accumulate_acc(output, target, acc)
            t_acc_5, acc_class_5 = accuracy(output,
                                            target,
                                            topk=(5, ),
                                            avg_meters=acc_class_5)
            # import pdb; pdb.set_trace()
            acc.update(t_acc, len(target))
            acc_5.update(t_acc_5, len(target))

        class_list = self.train_loader.dataset.class_list.inverse
        acc_cl_1 = {}
        acc_cl_5 = {}

        #from accuracies obtained create inst size based accuracies
        inst_clss_lst = self.train_loader.dataset.class_inst_list
        # import pdb; pdb.set_trace()
        for ins_clss_, insts in inst_clss_lst.items():
            cls_sum = sum([acc_class[inst].sum for inst in insts])
            cls_cnt = sum([acc_class[inst].count for inst in insts])
            if cls_cnt == 0:
                import pdb
                pdb.set_trace()
            inst_avg = cls_sum / cls_cnt

            self.writer.add_scalar(self.str_ + '/Acc_1_{}'.format(ins_clss_),
                                   inst_avg, self.n_iter)

            cls_sum_5 = sum([acc_class_5[inst].sum for inst in insts])
            cls_cnt_5 = sum([acc_class_5[inst].count for inst in insts])
            inst_avg_5 = cls_sum_5 / cls_cnt_5
            self.writer.add_scalar(self.str_ + '/Acc_5_{}'.format(ins_clss_),
                                   inst_avg_5, self.n_iter)

        for idx, cl_ in class_list.items():
            acc_cl_1[cl_] = [
                acc_class[idx].avg, acc_class[idx].sum, acc_class[idx].count
            ]
            acc_cl_5[cl_] = [
                acc_class_5[idx].avg, acc_class_5[idx].sum,
                acc_class_5[idx].count
            ]
            # self.log(' * Val Acc {acc.avg:.3f} for class {cls}, {acc.sum} / {acc.count} '
            #       .format(acc=acc_class[idx], cls=cl_))

        self.train(orig_mode)

        self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'.format(
            acc=acc, time=batch_timer.toc()))
        if from_train:
            return acc, losses
        else:
            return acc, acc_5, acc_cl_1, acc_cl_5, losses
示例#16
0
def train(epoch, train_loader, network, optimizer, compute_loss, args):
    batch_time = AverageMeter()
    train_loss = AverageMeter()
    image_pre = AverageMeter()
    text_pre = AverageMeter()

    # switch to train mode
    network.train()

    end = time.time()
    for step, (images, captions, labels,
               captions_length) in enumerate(train_loader):
        images = images.cuda()
        labels = labels.cuda()
        captions = captions.cuda()

        # compute loss
        image_embeddings, text_embeddings = network(images, captions,
                                                    captions_length)
        cmpm_loss, cmpc_loss, loss, image_precision, text_precision, pos_avg_sim, neg_arg_sim = compute_loss(
            image_embeddings, text_embeddings, labels)

        if step % 10 == 0:
            print(
                'epoch:{}, step:{}, cmpm_loss:{:.3f}, cmpc_loss:{:.3f}'.format(
                    epoch, step, cmpm_loss, cmpc_loss))

        # constrain embedding with the same id at the end of one epoch
        if (args.constraints_images
                or args.constraints_text) and step == len(train_loader) - 1:
            con_images, con_text = constraints_loss(train_loader, network,
                                                    args)
            loss += (con_images + con_text)
            print(
                'epoch:{}, step:{}, con_images:{:.3f}, con_text:{:.3f}'.format(
                    epoch, step, con_images, con_text))

        # compute gradient and do ADAM step
        optimizer.zero_grad()
        loss.backward()
        #nn.utils.clip_grad_norm(network.parameters(), 5)
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        train_loss.update(loss, images.shape[0])
        image_pre.update(image_precision, images.shape[0])
        text_pre.update(text_precision, images.shape[0])

    return train_loss.avg, batch_time.avg, image_pre.avg, text_pre.avg
示例#17
0
def train(train_loader, encoder, criterion, encoder_optimizer, epoch):
    r"""Performs one epoch's training.

    Arguments
        train_loader: DataLoader for training data
        encoder: encoder model
        criterion: loss layer
        encoder_optimizer: optimizer to update encoder's weights
        epoch: epoch number
    """

    encoder.train()

    batch_time = AverageMeter()  # forward prop. + back prop. time
    data_time = AverageMeter()  # data loading time
    losses = AverageMeter()  # loss (per word decoded)
    accs = AverageMeter()  # acc accuracy

    start = time.time()

    # Batches
    for i, (imgs, tags) in enumerate(train_loader):
        data_time.update(time.time() - start)

        # Move to GPU, if available
        imgs = imgs.to(device)
        targets = tags.to(device)

        # Forward prop.
        scores = encoder(imgs)
        # Calculate loss
        loss = criterion(scores, targets)

        # Back prop.
        encoder_optimizer.zero_grad()
        loss.backward()

        # Clip gradients
        clip_gradient(encoder_optimizer, grad_clip)

        # Update weights
        encoder_optimizer.step()

        # Keep track of metrics
        acc = binary_accuracy(scores, targets)
        losses.update(loss.item())
        accs.update(acc)
        batch_time.update(time.time() - start)

        start = time.time()

        # Print status
        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data Load Time {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top-5 Accuracy {accs.val:.3f} ({accs.avg:.3f})'.format(epoch, i, len(train_loader),
                                                                          batch_time=batch_time,
                                                                          data_time=data_time, loss=losses,
                                                                          accs=accs))
示例#18
0
    def train_epoch(self, train_loader, epoch, count_cls_step):
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()

        end = time.time()
        for i, (inputs, target, task) in enumerate(train_loader):
            # print("*"*100)
            # print(inputs.mean())
            count_cls_step += 1
            data_time.update(time.time() - end)  # measure data loading time

            if self.config['gpu']:
                inputs = inputs.cuda()
                target = target.cuda()
            output = self.model.forward(inputs)
            loss = self.criterion(output, target, task)

            acc = accumulate_acc(output, target, task, acc)

            self.model_optimizer.zero_grad()
            self.model_scheduler.step(epoch)

            loss.backward()
            self.model_optimizer.step()

            batch_time.update(time.time() - end)
            end = time.time()

            losses.update(loss, inputs.size(0))

            if ((self.config['print_freq'] > 0) and
                (i % self.config['print_freq']
                 == 0)) or (i + 1) == len(train_loader):
                self.log('[{0}/{1}]\t'
                         '{batch_time.val:.4f} ({batch_time.avg:.4f})\t'
                         '{data_time.val:.4f} ({data_time.avg:.4f})\t'
                         '{loss.val:.3f} ({loss.avg:.3f})\t'
                         '{acc.val:.2f} ({acc.avg:.2f})'.format(
                             i,
                             len(train_loader),
                             batch_time=batch_time,
                             data_time=data_time,
                             loss=losses,
                             acc=acc))
        self.log(' * Train Acc {acc.avg:.3f}'.format(acc=acc))

        return losses.avg, acc.avg
示例#19
0
    def training(self, epoch):
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        self.model.train()

        for i, (input, target) in tqdm(enumerate(self.train_loader), total=len(self.train_loader)):
            output = self.model(input)
            loss = self.criterion(output, target)

            # print(output) # Tensor(shape=[256, 1000]
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.numpy()[0], input.shape[0])
            top1.update(prec1.numpy()[0], input.shape[0])
            top5.update(prec5.numpy()[0], input.shape[0])

            self.optimizer.clear_grad()
            loss.backward()
            self.optimizer.step()

            if i % self.cfg.Log_print_freq == 0:
                self.logger.info('Epoch: [{0}][{1}/{2}]\t'
                                 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                                 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                                 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                    epoch, i, len(self.train_loader), loss=losses, top1=top1, top5=top5))

        prec1, prec5 = self.validate()

        if self.cfg.visualDL:
            with LogWriter(logdir=self.logDir) as writer:
                # 使用scalar组件记录一个标量数据
                writer.add_scalar(tag="loss", step=epoch, value=losses.avg)
                writer.add_scalar(tag="prec1", step=epoch, value=prec1)
                writer.add_scalar(tag="prec5", step=epoch, value=prec5)

        self.logger.info("Epoch {}: prec1: {} prec5: {}".format(epoch, prec1, prec5))

        return prec1, prec5, losses
示例#20
0
def validate(val_loader, encoder, criterion):
    r"""Performs one epoch's validation.

    Arguments
        val_loader (Generator): DataLoader for validation data.
        encoder (nn.Module): encoder model
        criterion: loss layer
    Returns
        AverageMeter: Accuracy
    """

    encoder.eval()

    batch_time = AverageMeter()
    losses = AverageMeter()
    accs = AverageMeter()

    start = time.time()

    # explicitly disable gradient calculation to avoid CUDA memory error
    # solves the issue #57
    with torch.no_grad():
        # Batches
        for i, (imgs, tags) in enumerate(val_loader):

            # Move to device, if available
            imgs = imgs.to(device)
            targets = tags.to(device)

            # Forward prop.
            scores = encoder(imgs)

            # Calculate loss
            loss = criterion(scores, targets)

            # Keep track of metrics
            losses.update(loss.item())
            acc = binary_accuracy(scores, targets)
            accs.update(acc)
            batch_time.update(time.time() - start)

            start = time.time()

            if i % print_freq == 0:
                print('Validation: [{0}/{1}]\t'
                      'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Accuracy {accs.val:.3f} ({accs.avg:.3f})\t'.format(i, len(val_loader), batch_time=batch_time,
                                                                          loss=losses, accs=accs))

        print(
            '\n * LOSS - {loss.avg:.3f}, ACCURACY - {acc.avg:.3f}\n'.format(
                loss=losses,
                acc=accs))

    return accs
示例#21
0
    def learn_batch(self, train_loader, val_loader=None):
        if self.reset_optimizer:  # Reset optimizer before learning each task
            self.log('Optimizer is reset!')
            self.init_optimizer()

        schedule = self.schedule_stack.pop()
        for epoch in range(schedule):
            data_timer = Timer()
            batch_timer = Timer()
            batch_time = AverageMeter()
            data_time = AverageMeter()
            losses = AverageMeter()
            acc = AverageMeter()
            robust_err, robust_loss = -1, -1

            # Config the model and optimizer
            self.log('Epoch:{0}'.format(epoch))
            self.model.train()
            for param_group in self.optimizer.param_groups:
                self.log('LR:', param_group['lr'])

            # Learning with mini-batch
            data_timer.tic()
            batch_timer.tic()

            for i, (inputs, target, task) in enumerate(train_loader):
                data_time.update(data_timer.toc())  # measure data loading time
                if self.gpu:
                    inputs = inputs.cuda()
                    target = target.cuda()

                loss, robust_err, robust_loss, output = self.update_model(
                    inputs, target, task)
                inputs = inputs.detach()
                target = target.detach()
                self.tb.add_scalar(f"Loss/train - task {self.current_task}",
                                   loss, epoch)
                self.tb.add_scalar(
                    f"Robust error/train - task {self.current_task}",
                    robust_err, epoch)

                # measure accuracy and record loss
                acc = accumulate_acc(output, target, task, acc)
                losses.update(loss, inputs.size(0))

                batch_time.update(batch_timer.toc())  # measure elapsed time
                data_timer.toc()

            self.log(' * Train Acc {acc.avg:.3f}, Loss {loss.avg:.3f}'.format(
                loss=losses, acc=acc))
            self.log(
                f" * robust loss: {robust_loss:.10f} robust error: {robust_err:.10f}"
            )
            # self.log(f"  * model: {self.model.features_loss_term}")

            # Evaluate the performance of current task
            if val_loader is not None:
                self.validation(val_loader)

            self.scheduler.step()
示例#22
0
def test(data_loader, network, args, unique_image):
    batch_time = AverageMeter()

    # switch to evaluate mode
    network.eval()
    max_size = 64 * len(data_loader)
    global_img_feat_bank = torch.zeros((max_size, args.feature_size)).cuda()
    global_text_feat_bank = torch.zeros((max_size, args.feature_size)).cuda()

    local_img_query_bank = torch.zeros((max_size, args.part2 + args.part3 + 1, args.feature_size)).cuda()
    local_img_value_bank = torch.zeros((max_size, args.part2 + args.part3 + 1, args.feature_size)).cuda()

    local_text_key_bank = torch.zeros((max_size, 98 + 2 + 1, args.feature_size)).cuda()
    local_text_value_bank = torch.zeros((max_size, 98 + 2 + 1, args.feature_size)).cuda()

    labels_bank = torch.zeros(max_size).cuda()
    length_bank = torch.zeros(max_size, dtype=torch.long).cuda()
    index = 0

    with torch.no_grad():
        end = time.time()
        for images, captions, labels in data_loader:
            sep_captions = []
            n_sep = 2

            for i, c in enumerate(captions):
                c = re.split(r'[;,!?.]', c)
                if len(c) > n_sep or len(c) == n_sep:
                    sep_captions = sep_captions + c[0:n_sep]
                else:
                    pad_length = n_sep - len(c)
                    padding = ["[PAD]" for j in range(pad_length)]
                    sep_captions = sep_captions + c + padding

            tokens, segments, input_masks, caption_length = network.module.language_model.pre_process(captions)
            sep_tokens, sep_segments, sep_input_masks, sep_caption_length = network.module.language_model.pre_process(sep_captions)

            tokens = tokens.cuda()
            segments = segments.cuda()
            input_masks = input_masks.cuda()
            caption_length = caption_length.cuda()

            sep_tokens = sep_tokens.cuda()
            sep_segments = sep_segments.cuda()
            sep_input_masks = sep_input_masks.cuda()
            
            images = images.cuda()
            labels = labels.cuda()
            interval = images.shape[0]

            p2 = [i for i in range(args.part2)]
            p3 = [i for i in range(args.part3)]

            global_img_feat, global_text_feat, local_img_query, local_img_value, local_text_key, local_text_value = network(images, tokens, segments, input_masks, sep_tokens, sep_segments, sep_input_masks, n_sep, p2, p3,  stage='train')

            global_img_feat_bank[index: index + interval] = global_img_feat
            global_text_feat_bank[index: index + interval] = global_text_feat
            local_img_query_bank[index: index + interval, :, :] = local_img_query
            local_img_value_bank[index: index + interval, :, :] = local_img_value
            local_text_key_bank[index: index + interval, :, :] = local_text_key
            local_text_value_bank[index: index + interval, :, :] = local_text_value
            labels_bank[index:index + interval] = labels
            length_bank[index:index + interval] = caption_length
            batch_time.update(time.time() - end)
            end = time.time()
            index = index + interval

        global_img_feat_bank = global_img_feat_bank[:index]
        global_text_feat_bank = global_text_feat_bank[:index]
        local_img_query_bank = local_img_query_bank[:index]
        local_img_value_bank = local_img_value_bank[:index]
        local_text_key_bank = local_text_key_bank[:index]
        local_text_value_bank = local_text_value_bank[:index]
        labels_bank = labels_bank[:index]
        length_bank = length_bank[:index]
        unique_image = torch.tensor(unique_image) == 1

        global_result, local_result, result = compute_topk(global_img_feat_bank[unique_image], local_img_query_bank[unique_image], local_img_value_bank[unique_image], global_text_feat_bank, local_text_key_bank,
                                                        local_text_value_bank, length_bank, labels_bank[unique_image], labels_bank, args, [1, 5, 10], True)

        ac_top1_i2t, ac_top5_i2t, ac_top10_i2t, ac_top1_t2i, ac_top5_t2i, ac_top10_t2i = result
    
        return ac_top1_i2t, ac_top5_i2t, ac_top10_i2t, ac_top1_t2i, ac_top5_t2i , ac_top10_t2i, batch_time.avg
示例#23
0
def train(epoch, train_loader, network, optimizer, compute_loss, args, co_location_loss=None):
    batch_time = AverageMeter()
    train_loss = AverageMeter()
    image_pre = AverageMeter()
    text_pre = AverageMeter()

    # switch to train mode
    network.train()

    end = time.time()

    for step, (images, captions, labels) in enumerate(train_loader):
        sep_captions = []
        
        n_sep = 2

        for i, c in enumerate(captions):
            c = c.split()
            s = math.floor(len(c) / n_sep)
            start = 0
            for j in range(0, n_sep):
                if j == n_sep: sep_c = c[start:]
                else: sep_c = c[start:min(start + s, len(c))]
                sep_captions.append(' '.join(sep_c))
                start += s

        tokens, segments, input_masks, caption_length = network.module.language_model.pre_process(captions)
        sep_tokens, sep_segments, sep_input_masks, sep_caption_length = network.module.language_model.pre_process(sep_captions)

        tokens = tokens.cuda()
        segments = segments.cuda()
        input_masks = input_masks.cuda()
        caption_length = caption_length.cuda()

        sep_tokens = sep_tokens.cuda()
        sep_segments = sep_segments.cuda()
        sep_input_masks = sep_input_masks.cuda()
        
        images = images.cuda()
        labels = labels.cuda()

        p2 = [i for i in range(args.part2)]
        p3 = [i for i in range(args.part3)]
        random.shuffle(p2)
        random.shuffle(p3)

        global_img_feat, global_text_feat, local_img_query, local_img_value, local_text_key, local_text_value = network(images, tokens, segments, input_masks, sep_tokens, sep_segments, sep_input_masks, n_sep, p2, p3,  stage='train')

        cmpm_loss, cmpc_loss, cont_loss, loss, image_precision, text_precision, pos_avg_sim, neg_arg_sim, local_pos_avg_sim, local_neg_avg_sim = compute_loss(
            global_img_feat, global_text_feat, local_img_query, local_img_value, local_text_key, local_text_value, caption_length, labels)

        if step % 10 == 0:
            print('epoch:{}, step:{}, cmpm_loss:{:.3f}, cmpc_loss:{:.3f}, cont_loss:{:.3f}, pos_sim_avg:{:.3f}, neg_sim_avg:{:.3f}, lpos_sim_avg:{:.3f}, lneg_sim_avg:{:.3f}'.
                  format(epoch, step, cmpm_loss, cmpc_loss, cont_loss, pos_avg_sim, neg_arg_sim, local_pos_avg_sim, local_neg_avg_sim))
        # constrain embedding with the same id at the end of one epoch
        if (args.constraints_images or args.constraints_text) and step == len(train_loader) - 1:
            con_images, con_text = constraints_loss(train_loader, network, args)
            loss += (con_images + con_text)

            print('epoch:{}, step:{}, con_images:{:.3f}, con_text:{:.3f}'.format(epoch, step, con_images.item(), con_text.item()))

        # compute gradient and do ADAM step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        
        train_loss.update(loss.item(), images.shape[0])
        image_pre.update(image_precision, images.shape[0])
        text_pre.update(text_precision, images.shape[0])
    return train_loss.avg, batch_time.avg, image_pre.avg, text_pre.avg
示例#24
0
    def learn_batch(self, site_name, train_loader, val_loader=None):
        if self.config['reset_optimizer']:
            self.log('Optimizer is reset!')
            self.init_optimizer()
        else:
            self.optimizer.param_groups[0]['lr'] = self.config['lr']

        for self.epoch in range(self.config['epoches']):
            losses_seg = AverageMeter()
            losses_regression = AverageMeter()
            losses_embedding = AverageMeter()
            losses = AverageMeter()

            self.model.train()
            self.scheduler.step()

            with tqdm(total=len(train_loader), desc='Epoch %d/%d' % (self.epoch+1,self.config['epoches']), unit='batch') \
                    as pbar:
                for batch in train_loader:
                    imgs, gts = batch['img'], batch['gt']
                    if self.gpu:
                        imgs, gts = imgs.cuda(), gts.cuda()

                    loss_seg, loss_regression, loss_embedding, loss = self.update_model(imgs, gts)

                    losses_seg.update(loss_seg, imgs.shape[0])
                    losses_regression.update(loss_regression, imgs.shape[0])
                    losses_embedding.update(loss_embedding, imgs.shape[0])
                    losses.update(loss, imgs.shape[0])

                    pbar.set_postfix({'loss_seg': '{0:.4f}'.format(losses_seg.val),
                                      'loss_regression': '{0:.4f}'.format(losses_regression.val),
                                      'loss_embedding': '{0:.4f}'.format(losses_embedding.val),
                                      'loss':'{0:.4f}'.format(losses.val),
                                      'lr':'{0:.5f}'.format(self.optimizer.param_groups[0]['lr'])
                                      })
                    pbar.update(1)

            self.log(' * Train Epoch: {epoch:n}, '
                     'LearningRate {lr:.5f}, SegLoss {losses_seg.avg:.4f}, RegressionLoss {losses_regression.avg:.4f}, '
                     'EmbeddingLoss {losses_embedding.avg:.4f}, '
                     'Loss {losses.avg:.4f}'.format(epoch=self.epoch + 1,
                                                    lr=self.optimizer.param_groups[0]['lr'],
                                                    losses_seg=losses_seg,
                                                    losses_regression=losses_regression,
                                                    losses_embedding=losses_embedding,
                                                    losses=losses))

            if (self.epoch + 1) % 1 == 0:
                model_dir = os.path.join(self.exp_dir, site_name)
                os.makedirs(model_dir, exist_ok=True)
                self.save_model(filename=os.path.join(model_dir,'Epoch_%d_Dice_%.4f' % (self.epoch + 1, val_dice)))
示例#25
0
    def learn_batch(self, train_loader, val_loader=None):
        if self.reset_optimizer:  # Reset optimizer before learning each task
            self.log('Optimizer is reset!')
            self.init_optimizer()
        self.model.zero_grad()
        # epoch_iterator = tqdm(train_loader, desc="Iteration", disable=False)
        # global_step = 0
        # epochs_trained = 0
        # steps_trained_in_current_epoch = 0
        # train_iterator = trange(
        #     epochs_trained, int(self.config['schedule'][-1]), desc="Epoch", disable=False,
        # )

        # for _ in train_iterator:
        for epoch in range(self.config['schedule'][-1]):
            data_timer = Timer()
            batch_timer = Timer()
            batch_time = AverageMeter()
            data_time = AverageMeter()
            losses = AverageMeter()
            acc = AverageMeter()

            # Config the model and optimizer
            self.log('Epoch:{0}'.format(epoch))
            self.model.train()
            self.scheduler.step(epoch)
            for param_group in self.optimizer.param_groups:
                self.log('LR:', param_group['lr'])

            # Learning with mini-batch
            data_timer.tic()
            batch_timer.tic()
            self.log('Itr\t\tTime\t\t  Data\t\t  Loss\t\tAcc')
            for i, (inputs_1, inputs_2, inputs_3,
                    target) in enumerate(train_loader):
                #changed here for creating 2d tensor
                input = torch.stack([inputs_1, inputs_2, inputs_3]).reshape(
                    (8, -1))
                task = 'mrpc'

                data_time.update(data_timer.toc())  # measure data loading time

                if self.gpu:
                    input = input.cuda()
                    target = target.cuda()

                loss, output = self.update_model(input, target, task)
                input = input.detach()
                target = target.detach()

                # measure accuracy and record loss
                acc = accumulate_acc(output, target, task, acc)
                losses.update(loss, input.size(0))

                batch_time.update(batch_timer.toc())  # measure elapsed time
                data_timer.toc()

                if ((self.config['print_freq'] > 0) and
                    (i % self.config['print_freq']
                     == 0)) or (i + 1) == len(train_loader):
                    self.log('[{0}/{1}]\t'
                             '{batch_time.val:.4f} ({batch_time.avg:.4f})\t'
                             '{data_time.val:.4f} ({data_time.avg:.4f})\t'
                             '{loss.val:.3f} ({loss.avg:.3f})\t'
                             '{acc.val:.2f} ({acc.avg:.2f})'.format(
                                 i,
                                 len(train_loader),
                                 batch_time=batch_time,
                                 data_time=data_time,
                                 loss=losses,
                                 acc=acc))

            self.log(' * Train Acc {acc.avg:.3f}'.format(acc=acc))

            # Evaluate the performance of current task
            if val_loader != None:
                self.validation(val_loader)

        #from regularization

        # 2.Backup the weight of current task
        task_param = {}
        for n, p in self.params.items():
            task_param[n] = p.clone().detach()

        # 3.Calculate the importance of weights for current task
        importance = self.calculate_importance(train_loader)

        # Save the weight and importance of weights of current task
        self.task_count += 1
        if self.online_reg and len(self.regularization_terms) > 0:
            # Always use only one slot in self.regularization_terms
            self.regularization_terms[1] = {
                'importance': importance,
                'task_param': task_param
            }
        else:
            # Use a new slot to store the task-specific information
            self.regularization_terms[self.task_count] = {
                'importance': importance,
                'task_param': task_param
            }
示例#26
0
    def learn_batch(self, site_name, train_loader, val_loader=None):
        if self.config['reset_optimizer']:
            self.log('Optimizer is reset!')
            self.init_optimizer()
        else:
            self.optimizer.param_groups[0]['lr'] = self.config['lr']

        for self.epoch in range(self.config['epoches']):
            losses_bce = AverageMeter()
            losses_dice = AverageMeter()
            losses = AverageMeter()

            self.model.train()
            self.scheduler.step()

            with tqdm(total=len(train_loader), desc='Epoch %d/%d' % (self.epoch+1,self.config['epoches']), unit='batch') \
                    as pbar:

                for batch in train_loader:

                    imgs, gts = batch['img'], batch['gt']
                    if self.gpu:
                        imgs, gts = imgs.cuda(), gts.cuda()

                    loss_bce, loss_dice, loss, outs = self.update_model(
                        imgs, gts)
                    imgs = imgs.detach()
                    gts = gts.detach()

                    losses_bce.update(loss_bce, imgs.size(0))
                    losses_dice.update(loss_dice, imgs.size(0))
                    losses.update(loss, imgs.size(0))

                    pbar.set_postfix({
                        'bce_loss':
                        '{0:.4f}'.format(losses_bce.val),
                        'dice_loss':
                        '{0:.4f}'.format(losses_dice.val),
                        'loss':
                        '{0:.4f}'.format(losses.val),
                        'lr':
                        '{0:.5f}'.format(self.optimizer.param_groups[0]['lr'])
                    })
                    pbar.update(1)

            # print result in each epoch
            self.log(
                ' * Train Epoch: {epoch:n}, '
                'LearningRate {lr:.5f}, BCELoss {losses_bce.avg:.4f}, DiceLoss {losses_dice.avg:.4f}, '
                'Loss {losses.avg:.4f}'.format(
                    epoch=self.epoch + 1,
                    lr=self.optimizer.param_groups[0]['lr'],
                    losses_bce=losses_bce,
                    losses_dice=losses_dice,
                    losses=losses))

            # save model
            if (self.epoch + 1) % 1 == 0:
                model_dir = os.path.join(self.exp_dir, site_name)
                os.makedirs(model_dir, exist_ok=True)
                self.save_model(filename=os.path.join(
                    model_dir, 'Epoch_%d_Dice_%.4f' %
                    (self.epoch + 1, val_dice)))
示例#27
0
    def learn_batch(self, train_loader, val_loader=None):
        if self.reset_optimizer:  # Reset optimizer before learning each task
            self.log('Optimizer is reset !')
            self.init_optimizer()

        # for epoch in range(self.config['schedule'][-1]):
        for epoch in range(self.config.nepoch):
            data_timer = Timer()
            batch_timer = Timer()
            batch_time = AverageMeter()
            data_time = AverageMeter()
            losses = AverageMeter()
            acc = AverageMeter()

            # Config the model and optimizer
            self.log('Epoch:{0}'.format(epoch))
            self.model.train()
            if self.config.scheduler :
                self.scheduler.step(epoch)
            for param_group in self.optimizer.param_groups:
                self.log('LR:',param_group['lr'])

            # Learning with mini-batch
            data_timer.tic()
            batch_timer.tic()
            self.log('Itr\t\tTime\t\t  Data\t\t  Loss\t\tAcc')
            for i, (input, target, task) in enumerate(train_loader):

                data_time.update(data_timer.toc())  # measure data loading time

                if self.gpu:
                    input = input.cuda()
                    target = target.cuda()

                loss, output = self.update_model(input, target, task)
                input = input.detach()
                target = target.detach()

                # measure accuracy and record loss
                acc = accumulate_acc(output, target, task, acc)
                losses.update(loss, input.size(0))

                batch_time.update(batch_timer.toc())  # measure elapsed time
                data_timer.toc()

                # Add wandb logging
                # log_dict = dict()
                # wandb.log(log_dict)

                if ((self.config['print_freq']>0) and (i % self.config['print_freq'] == 0)) or (i+1)==len(train_loader):
                    self.log('[{0}/{1}]\t'
                          '{batch_time.val:.4f} ({batch_time.avg:.4f})\t'
                          '{data_time.val:.4f} ({data_time.avg:.4f})\t'
                          '{loss.val:.3f} ({loss.avg:.3f})\t'
                          '{acc.val:.2f} ({acc.avg:.2f})'.format(
                        i, len(train_loader), batch_time=batch_time,
                        data_time=data_time, loss=losses, acc=acc))

            self.log(' * Train Acc {acc.avg:.3f}'.format(acc=acc))

            # Evaluate the performance of current task
            if val_loader != None:
                self.validation(val_loader)
示例#28
0
    def validate(self):
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        # switch to evaluate mode
        self.model.eval()

        for i, (input, target) in enumerate(self.val_loader):
            output = self.model(input)
            loss = self.criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.numpy()[0], input.shape[0])
            top1.update(prec1.numpy()[0], input.shape[0])
            top5.update(prec5.numpy()[0], input.shape[0])

        self.logger.info(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
                         .format(top1=top1, top5=top5))

        return top1.avg, top5.avg
示例#29
0
def train(epoch, train_loader, learner, args):
    # This function optimize the objective

    # Initialize all meters
    data_timer = Timer()
    batch_timer = Timer()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    confusion = Confusion(args.out_dim)

    # Setup learner's configuration
    print('\n\n==== Epoch:{0} ===='.format(epoch))
    learner.train()
    learner.step_schedule(epoch)

    # The optimization loop
    data_timer.tic()
    batch_timer.tic()
    if args.print_freq > 0:  # Enable to print mini-log
        print('Itr            |Batch time     |Data Time      |Loss')
    for i, (input, target) in enumerate(train_loader):

        data_time.update(data_timer.toc())  # measure data loading time

        # Prepare the inputs
        if args.use_gpu:
            input = input.cuda()
            target = target.cuda()
        train_target, eval_target = prepare_task_target(input, target, args)

        # Optimization
        loss, output = learner.learn(input, train_target)

        # Update the performance meter
        confusion.add(output, eval_target)

        # Measure elapsed time
        batch_time.update(batch_timer.toc())
        data_timer.toc()

        # Mini-Logs
        losses.update(loss, input.size(0))
        if args.print_freq > 0 and ((i % args.print_freq == 0) or
                                    (i == len(train_loader) - 1)):
            print('[{0:6d}/{1:6d}]\t'
                  '{batch_time.val:.4f} ({batch_time.avg:.4f})\t'
                  '{data_time.val:.4f} ({data_time.avg:.4f})\t'
                  '{loss.val:.3f} ({loss.avg:.3f})'.format(
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses))

    # Loss-specific information
    if args.loss == 'CE':
        print('[Train] ACC: ', confusion.acc())
    elif args.loss in ['KCL', 'MCL']:
        args.cluster2Class = confusion.optimal_assignment(
            train_loader.num_classes
        )  # Save the mapping in args to use in eval
        if args.out_dim <= 20:  # Avoid to print a large confusion matrix
            confusion.show()
        print('Clustering scores:', confusion.clusterscores())
        print('[Train] ACC: ', confusion.acc())
    elif args.loss == 'DPS':
        confusion.show(width=15,
                       row_labels=['GT_dis-simi', 'GT_simi'],
                       column_labels=['Pred_dis-simi', 'Pred_simi'])
        print('[Train] similar pair f1-score:',
              confusion.f1score(1))  # f1-score for similar pair (label:1)
        print('[Train] dissimilar pair f1-score:', confusion.f1score(0))
示例#30
0
def reset_bn(model: nn.Module,
             data_loader,
             sync=False,
             backend="ddp",
             progress_bar=False) -> None:
    bn_mean = {}
    bn_var = {}

    tmp_model = copy.deepcopy(model)
    for name, m in tmp_model.named_modules():
        if isinstance(m, _BatchNorm):
            bn_mean[name] = AverageMeter()
            bn_var[name] = AverageMeter()

            def new_forward(bn, mean_est, var_est):
                def lambda_forward(x):
                    x = x.contiguous()
                    if sync:
                        batch_mean = (x.mean(0, keepdim=True).mean(
                            2, keepdim=True).mean(3,
                                                  keepdim=True))  # 1, C, 1, 1
                        if backend == "ddp":
                            batch_mean = ddp_reduce_tensor(batch_mean,
                                                           reduce="cat")
                        else:
                            raise NotImplementedError
                        batch_mean = torch.mean(batch_mean,
                                                dim=0,
                                                keepdim=True)

                        batch_var = (x - batch_mean) * (x - batch_mean)
                        batch_var = (batch_var.mean(0, keepdim=True).mean(
                            2, keepdim=True).mean(3, keepdim=True))
                        if backend == "ddp":
                            batch_var = ddp_reduce_tensor(batch_var,
                                                          reduce="cat")
                        else:
                            raise NotImplementedError
                        batch_var = torch.mean(batch_var, dim=0, keepdim=True)
                    else:
                        batch_mean = (x.mean(0, keepdim=True).mean(
                            2, keepdim=True).mean(3,
                                                  keepdim=True))  # 1, C, 1, 1
                        batch_var = (x - batch_mean) * (x - batch_mean)
                        batch_var = (batch_var.mean(0, keepdim=True).mean(
                            2, keepdim=True).mean(3, keepdim=True))

                    batch_mean = torch.squeeze(batch_mean)
                    batch_var = torch.squeeze(batch_var)

                    mean_est.update(batch_mean.data, x.size(0))
                    var_est.update(batch_var.data, x.size(0))

                    # bn forward using calculated mean & var
                    _feature_dim = batch_mean.shape[0]
                    return F.batch_norm(
                        x,
                        batch_mean,
                        batch_var,
                        bn.weight[:_feature_dim],
                        bn.bias[:_feature_dim],
                        False,
                        0.0,
                        bn.eps,
                    )

                return lambda_forward

            m.forward = new_forward(m, bn_mean[name], bn_var[name])

    # skip if there is no batch normalization layers in the network
    if len(bn_mean) == 0:
        return

    tmp_model.eval()
    with torch.no_grad():
        with tqdm(total=len(data_loader),
                  desc="reset bn",
                  disable=(not progress_bar)) as t:
            for images, _ in data_loader:
                images = images.cuda()
                tmp_model(images)
                t.set_postfix({
                    "batch_size": images.size(0),
                    "image_size": images.size(2),
                })
                t.update()

    for name, m in model.named_modules():
        if name in bn_mean and bn_mean[name].count > 0:
            feature_dim = bn_mean[name].avg.size(0)
            assert isinstance(m, _BatchNorm)
            m.running_mean.data[:feature_dim].copy_(bn_mean[name].avg)
            m.running_var.data[:feature_dim].copy_(bn_var[name].avg)