예제 #1
0
    def forward(self, data_loader, num_steps=None, training=False):
        meters = {
            name: AverageMeter()
            for name in ['step', 'data', 'loss', 'prec1', 'prec5']
        }
        if training and self.grad_clip > 0:
            meters['grad'] = AverageMeter()

        def meter_results(meters):
            results = {name: meter.avg for name, meter in meters.items()}
            results['error1'] = 100. - results['prec1']
            results['error5'] = 100. - results['prec5']
            return results

        end = time.time()

        for i, (inputs, target) in enumerate(data_loader):
            # measure data loading time
            meters['data'].update(time.time() - end)
            target = target.to(self.device)
            inputs = inputs.to(self.device, dtype=self.dtype)

            output, loss, grad = self._step(inputs, target, training=training)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5))
            meters['loss'].update(float(loss), inputs.size(0))
            meters['prec1'].update(float(prec1), inputs.size(0))
            meters['prec5'].update(float(prec5), inputs.size(0))
            if grad is not None:
                meters['grad'].update(float(grad), inputs.size(0))

            # measure elapsed time
            meters['step'].update(time.time() - end)
            end = time.time()

            if i % self.print_freq == 0:
                report = str(
                    '{phase} - Epoch: [{0}][{1}/{2}]\t'
                    'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t'
                    'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t'
                    'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t'
                    'Prec@1 {meters[prec1].val:.3f} ({meters[prec1].avg:.3f})\t'
                    'Prec@5 {meters[prec5].val:.3f} ({meters[prec5].avg:.3f})\t'
                    .format(self.epoch,
                            i,
                            len(data_loader),
                            phase='TRAINING' if training else 'EVALUATING',
                            meters=meters))
                if 'grad' in meters.keys():
                    report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})'\
                        .format(meters=meters)
                logging.info(report)

            if num_steps is not None and i >= num_steps:
                break

        return meter_results(meters)
예제 #2
0
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None):
    regularizer = getattr(model, 'regularization', None)
    if args.device_ids and len(args.device_ids) > 1:
        model = torch.nn.DataParallel(model, args.device_ids)

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    end = time.time()
    for i, (inputs, target) in enumerate(data_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        target = target.to(args.device)
        inputs = inputs.to(args.device, dtype=dtype)

        # compute output
        output = model(inputs)
        loss = criterion(output, target)
        if regularizer is not None:
            loss += regularizer(model)

        if type(output) is list:
            output = output[0]

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5))
        losses.update(float(loss), inputs.size(0))
        top1.update(float(prec1), inputs.size(0))
        top5.update(float(prec5), inputs.size(0))

        if training:
            optimizer.update(epoch, epoch * len(data_loader) + i)
            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t'
                         'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                         'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                         'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                         'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                         'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                             epoch, i, len(data_loader),
                             phase='TRAINING' if training else 'EVALUATING',
                             batch_time=batch_time,
                             data_time=data_time, loss=losses, top1=top1, top5=top5))

    return losses.avg, top1.avg, top5.avg
예제 #3
0
파일: run.py 프로젝트: Zhaojp-Frank/CAT
def runTrain(model, args, trainLoader, epoch, optimizer, criterion, logging,
             layer):
    model.train()
    batch_time = AverageMeter()
    totalLosses = AverageMeter()
    ceLosses = AverageMeter()
    paramsLosses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()
    for batch_idx, (inputs, targets) in enumerate(trainLoader):

        inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        out, params = model(inputs)

        totalLoss, crossEntropyLoss, paramsLoss = criterion(
            out, targets,
            getParamsLoss(params[:layer + 1], len(model.device_ids)))

        totalLoss.backward()
        optimizer.step()

        # measure accuracy and record loss
        prec1, prec5 = accuracy(out, targets, topk=(1, 5))
        totalLosses.update(totalLoss.item(), inputs.size(0))
        ceLosses.update(crossEntropyLoss.item(), inputs.size(0))
        paramsLosses.update(paramsLoss.item(), inputs.size(0))
        top1.update(float(prec1), inputs.size(0))
        top5.update(float(prec5), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % args.print_freq == 0:
            logging.info(
                'Epoch Train: [{}]\t'
                'Train: [{}/{}]\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Total Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                'Cross Entropy Loss {CEloss.val:.4f} ({CEloss.avg:.4f})\t'
                'paramsLoss Loss {paramsLoss.val:.4f} ({paramsLoss.avg:.4f})\t'
                'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                    epoch,
                    batch_idx + 1,
                    len(trainLoader),
                    batch_time=batch_time,
                    loss=totalLosses,
                    CEloss=ceLosses,
                    paramsLoss=paramsLosses,
                    top1=top1,
                    top5=top5))

    return totalLosses.avg, ceLosses.avg, paramsLosses.avg, top1.avg, top5.avg
예제 #4
0
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None):
    if args.gpus and len(args.gpus) > 1:
        model=torch.nn.DataParallel(model, args.gpus)
    batch_time=AverageMeter()
    data_time=AverageMeter()
    losses=AverageMeter()
    top1=AverageMeter()
    top5=AverageMeter()

    end=time.time()
    for i, (inputs, target) in enumerate(data_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        if args.gpus is not None:
            target=target.cuda(async=True)
        input_var=Variable(inputs.type(args.type), volatile=not training)
        target_var=Variable(target)

        # compute output
        output=model(input_var)
        loss=criterion(output, target_var)
        if type(output) is list:
            output=output[0]

        # measure accuracy and record loss
        prec1, prec5=accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data[0], inputs.size(0))
        top1.update(prec1[0], inputs.size(0))
        top5.update(prec5[0], inputs.size(0))

        if training:
            optimizer.update(epoch, epoch * len(data_loader) + i)
            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end=time.time()

        if i % args.print_freq == 0:
            logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t'
                         'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                         'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                         'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                         'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                         'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                             epoch, i, len(data_loader),
                             phase='TRAINING' if training else 'EVALUATING',
                             batch_time=batch_time,
                             data_time=data_time, loss=losses, top1=top1, top5=top5))

    return losses.avg, top1.avg, top5.avg
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    if args.dump_dir is not None:
        QM().disable()
        DM(args.dump_dir)

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            input = input.to(args.device)
            target = target.to(args.device)
            if args.dump_dir is not None and i == 5:
                with DM(args.dump_dir):
                    DM().set_tag('batch%d'%i)
                    # compute output
                    output = model(input)
                    break
            else:
                output = model(input)

            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(float(prec1), input.size(0))
            top5.update(float(prec5), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                       i, len(val_loader), batch_time=batch_time, loss=losses,
                       top1=top1, top5=top5))

        print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return losses.avg, top1.avg, top5.avg
예제 #6
0
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()
    '''print("Validate begin")
    for n, m in self.model.named_modules():
            print(m)'''

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            input = input.to(args.device)
            target = target.to(args.device)
            output = model(input)

            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(float(prec1), input.size(0))
            top5.update(float(prec5), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))
            #return

        print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(
            top1=top1, top5=top5))

    return losses.avg, top1.avg, top5.avg
예제 #7
0
파일: run.py 프로젝트: Zhaojp-Frank/CAT
def runTest(model, args, testLoader, epoch, criterion, logging):
    model.eval()
    batch_time = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    entropy = [AverageMeter() for i in range(model.module.depth)]

    end = time.time()

    for batch_idx, (inputs, targets) in enumerate(testLoader):
        inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            out, params = model(inputs)

            if len(model.device_ids) > 1:  # parallel
                assert len(params[0]) % 2 == 0
                for p in params:
                    p[0:2] = sum(list(torch.split(p, 2))) / len(args.gpu)
            # For parallel implementation - transform dict to tensor
            # 0 - maxStdRatio. 1- MaxMeanRatio . 2- kurtosis. 3 -entropy. 4-act. 5-quantError
        for i in range(model.module.depth):
            entropy[i].update(params[i][1], params[i][0])

        # measure accuracy and record loss
        prec1, prec5 = accuracy(out, targets, topk=(1, 5))
        top1.update(float(prec1), inputs.size(0))
        top5.update(float(prec5), inputs.size(0))

    # measure elapsed time
    batch_time.update(time.time() - end)
    logging.info(
        'Epoch Test: [{}]\t'
        'Time ({batch_time.avg:.3f})\t'
        'Entropy {ent} \t'
        # 'Kurtosis {kurt} \t'
        # 'maxStdRatio {mxstd} \t'
        # 'maxMeanRatio {mxmean} \t'
        'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
        'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
            epoch,
            batch_time=batch_time,
            ent=sum(d.sum for d in entropy) / sum(d.count for d in entropy),
            top1=top1,
            top5=top5))

    return top1.avg, top5.avg, entropy
def validate(val_loader, model, criterion, args, device):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(len(val_loader),
                             batch_time,
                             losses,
                             top1,
                             top5,
                             prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            images = images.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1.item(), images.size(0))
            top5.update(acc5.item(), images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                progress.print(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1,
                                                                    top5=top5))

    return top1.avg
예제 #9
0
파일: run.py 프로젝트: CAT-teams/CAT
def runTest(model, args, testLoader, epoch, criterion, logging):
    model.eval()
    batch_time = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    entropy = [AverageMeter() for i in range(model.module.depth)]
    entropyW = [AverageMeter() for i in range(20)] #change to parameter - this is only for resnet18
    end = time.time()

    for batch_idx, (inputs, targets) in enumerate(testLoader):
        inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            out,params = model(inputs)

            if len(model.device_ids) > 1:  # parallel
                assert len(params[0]) % 2 == 0
                for p in params:
                    p[0:2] = sum(list(torch.split(p, 2))) / len(args.gpu)
            # For parallel implementation - transform dict to tensor
            # 0 - maxStdRatio. 1- MaxMeanRatio . 2- kurtosis. 3 -entropy. 4-act. 5-quantError
        for i in range(model.module.depth):
            entropy[i].update(params[i][1],params[i][0])

		if args.regul2 >0:
			for i,m in enumerate(model.modules()):
				if isinstance(m, torch.nn.Conv2d):
					elems = torch.numel(m.weight)
					scale = (torch.max(m.weight) - torch.min(m.weight)) / ((2. ** 8) - 1.)
					qweight = (m.weight.view(-1) - torch.min(m.weight)) / scale
					numIdxs = int(elems)
					idx = torch.randperm(numIdxs, device=m.weight.device)[:int(numIdxs / 20)]
					qweight = qweight[idx]			
					entropyW[i].update(shannon_entropy2(qweight,bits=8),elems)
			
        # measure accuracy and record loss
        prec1, prec5 = accuracy(out, targets, topk=(1, 5))
        top1.update(float(prec1), inputs.size(0))
        top5.update(float(prec5), inputs.size(0))
예제 #10
0
def train(train_loader,
          model,
          criterion,
          optimizer,
          epoch,
          args,
          device,
          ml_logger,
          val_loader,
          mq=None,
          weight_to_hook=None,
          w_k_scale=0):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    w_k_losses = AverageMeter('W_K_Loss', ':.4e')
    w_k_vals = AverageMeter('W_K_Val', ':6.2f')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(len(train_loader),
                             batch_time,
                             data_time,
                             losses,
                             w_k_losses,
                             w_k_vals,
                             top1,
                             top5,
                             prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()
    best_acc1 = -1
    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        images = images.to(device, non_blocking=True)
        target = target.to(device, non_blocking=True)

        hookF_weights = {}
        for name, w_tensor in weight_to_hook.items():
            # pdb.set_trace()
            hookF_weights[name] = KurtosisWeight(
                w_tensor,
                name,
                kurtosis_target=args.w_kurtosis_target,
                k_mode=args.kurtosis_mode)

        # compute output
        output = model(images)

        w_kurtosis_regularization = 0
        # pdb.set_trace()
        if args.w_kurtosis:
            w_temp_values = []
            w_kurtosis_loss = 0
            for w_kurt_inst in hookF_weights.values():
                # pdb.set_trace()
                w_kurt_inst.fn_regularization()
                w_temp_values.append(w_kurt_inst.kurtosis_loss)
            # pdb.set_trace()
            if args.kurtosis_mode == 'sum':
                w_kurtosis_loss = reduce((lambda a, b: a + b), w_temp_values)
            elif args.kurtosis_mode == 'avg':
                # pdb.set_trace()
                w_kurtosis_loss = reduce((lambda a, b: a + b), w_temp_values)
                if args.arch == 'resnet18':
                    w_kurtosis_loss = w_kurtosis_loss / 19
                elif args.arch == 'mobilenet_v2':
                    w_kurtosis_loss = w_kurtosis_loss / 51
                elif args.arch == 'resnet50':
                    w_kurtosis_loss = w_kurtosis_loss / 52
            elif args.kurtosis_mode == 'max':
                # pdb.set_trace()
                w_kurtosis_loss = reduce((lambda a, b: max(a, b)),
                                         w_temp_values)
            w_kurtosis_regularization = (
                10**w_k_scale) * args.w_lambda_kurtosis * w_kurtosis_loss

        orig_loss = criterion(output, target)
        loss = orig_loss + w_kurtosis_regularization

        if args.w_kurtosis:
            w_temp_values = []
            for w_kurt_inst in hookF_weights.values():
                w_kurt_inst.fn_regularization()
                w_temp_values.append(w_kurt_inst.kurtosis)
            w_kurtosis_val = reduce((lambda a, b: a + b), w_temp_values)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        w_k_losses.update(w_kurtosis_regularization.item(), images.size(0))
        w_k_vals.update(w_kurtosis_val.item(), images.size(0))
        top1.update(acc1.item(), images.size(0))
        top5.update(acc5.item(), images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            progress.print(i)
            ml_logger.log_metric('Train Acc1',
                                 top1.avg,
                                 step='auto',
                                 log_to_tfboard=False)
            ml_logger.log_metric('Train Loss',
                                 losses.avg,
                                 step='auto',
                                 log_to_tfboard=False)
            ml_logger.log_metric('Train weight kurtosis Loss',
                                 w_k_losses.avg,
                                 step='auto',
                                 log_to_tfboard=False)
            ml_logger.log_metric('Train weight kurtosis Val',
                                 w_k_vals.avg,
                                 step='auto',
                                 log_to_tfboard=False)

        for w_kurt_inst in hookF_weights.values():
            del w_kurt_inst
예제 #11
0
    def forward(self,
                data_loader,
                num_steps=None,
                training=False,
                duplicates=1,
                average_output=False,
                chunk_batch=1,
                rec=False):
        if rec: output_embed = {}
        meters = {
            name: AverageMeter()
            for name in ['step', 'data', 'loss', 'prec1', 'prec5']
        }
        if training and self.grad_clip > 0:
            meters['grad'] = AverageMeter()

        batch_first = True
        if training and isinstance(self.model,
                                   nn.DataParallel) or chunk_batch > 1:
            batch_first = False
        if average_output:
            assert duplicates > 1 and batch_first, "duplicates must be > 1 for output averaging"

        def meter_results(meters):
            results = {name: meter.avg for name, meter in meters.items()}
            results['error1'] = 100. - results['prec1']
            results['error5'] = 100. - results['prec5']
            return results

        end = time.time()
        for i, (inputs, target) in (enumerate(data_loader)):
            if training and duplicates > 1 and self.adapt_grad_norm is not None \
                    and i % self.adapt_grad_norm == 0:
                grad_mean = 0
                num = inputs.size(1)
                for j in range(num):
                    grad_mean += float(
                        self._grad_norm(inputs.select(1, j), target))
                grad_mean /= num
                grad_all = float(
                    self._grad_norm(
                        *_flatten_duplicates(inputs, target, batch_first)))
                self.grad_scale = grad_mean / grad_all
                logging.info('New loss scale: %s', self.grad_scale)

            # measure data loading time
            meters['data'].update(time.time() - end)
            if duplicates > 1:  # multiple versions for each sample (dim 1)
                inputs, target = _flatten_duplicates(
                    inputs,
                    target,
                    batch_first,
                    expand_target=not average_output)

            output, loss, grad = self._step(inputs,
                                            target,
                                            training=training,
                                            average_output=average_output,
                                            chunk_batch=chunk_batch)
            if rec:
                with torch.no_grad():
                    for i in range(target.shape[0]):
                        tt = target[i]
                        emb = output[i]
                        output_embed[tt.tolist()] = emb
            if self.pruner is not None:
                with torch.no_grad():
                    if training:
                        compression_rate = self.pruner.calc_param_masks(
                            self.model, i % self.print_freq == 0,
                            i + self.epoch * len(data_loader))
                        if i % self.print_freq == 0:
                            logging.info('Total compression ratio is: ' +
                                         str(compression_rate))
                    self.model = self.pruner.prune_layers(self.model)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            meters['loss'].update(float(loss), inputs.size(0))
            meters['prec1'].update(float(prec1), inputs.size(0))
            meters['prec5'].update(float(prec5), inputs.size(0))
            if grad is not None:
                meters['grad'].update(float(grad), inputs.size(0))

            # measure elapsed time
            meters['step'].update(time.time() - end)
            end = time.time()

            if i % self.print_freq == 0:
                report = str(
                    '{phase} - Epoch: [{0}][{1}/{2}]\t'
                    'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t'
                    'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t'
                    'Loss {meters[loss].val:.7f} ({meters[loss].avg:.7f})\t'
                    'Prec@1 {meters[prec1].val:.6f} ({meters[prec1].avg:.6f})\t'
                    'Prec@5 {meters[prec5].val:.6f} ({meters[prec5].avg:.6f})\t'
                    .format(self.epoch,
                            i,
                            len(data_loader),
                            phase='TRAINING' if training else 'EVALUATING',
                            meters=meters))
                if 'grad' in meters.keys():
                    report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})'\
                        .format(meters=meters)
                logging.info(report)
            if num_steps is not None and i >= num_steps or (self.update_only_th
                                                            and training
                                                            and i > 2):
                break
        if self.pruner is not None:
            self.pruner.save_eps(epoch=self.epoch + 1)
            self.pruner.save_masks(epoch=self.epoch + 1)

        if rec: torch.save(output_embed, 'output_embed_calib')
        return meter_results(meters)
예제 #12
0
    def forward(self, data_loader, num_steps=None, training=False, duplicates=1, average_output=False, chunk_batch=1):

        meters = {name: AverageMeter()
                  for name in ['step', 'data', 'loss', 'prec1', 'prec5']}
        if training and self.grad_clip > 0:
            meters['grad'] = AverageMeter()

        batch_first = True
        if training and isinstance(self.model, nn.DataParallel) or chunk_batch > 1:
            batch_first = False
        if average_output:
            assert duplicates > 1 and batch_first, "duplicates must be > 1 for output averaging"

        def meter_results(meters):
            results = {name: meter.avg for name, meter in meters.items()}
            results['error1'] = 100. - results['prec1']
            results['error5'] = 100. - results['prec5']
            return results

        end = time.time()

        for i, (inputs, target) in enumerate(data_loader):
            if training and duplicates > 1 and self.adapt_grad_norm is not None \
                    and i % self.adapt_grad_norm == 0:
                grad_mean = 0
                num = inputs.size(1)
                for j in range(num):
                    grad_mean += float(self._grad_norm(inputs.select(1, j), target))
                grad_mean /= num
                grad_all = float(self._grad_norm(
                    *_flatten_duplicates(inputs, target, batch_first)))
                self.grad_scale = grad_mean / grad_all
                logging.info('New loss scale: %s', self.grad_scale)

            # measure data loading time
            meters['data'].update(time.time() - end)
            if duplicates > 1:  # multiple versions for each sample (dim 1)
                inputs, target = _flatten_duplicates(inputs, target, batch_first,
                                                     expand_target=not average_output)

            output, loss, grad = self._step(inputs, target,
                                            training=training,
                                            average_output=average_output,
                                            chunk_batch=chunk_batch)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            meters['loss'].update(float(loss), inputs.size(0))
            meters['prec1'].update(float(prec1), inputs.size(0))
            meters['prec5'].update(float(prec5), inputs.size(0))
            if grad is not None:
                meters['grad'].update(float(grad), inputs.size(0))

            # measure elapsed time
            meters['step'].update(time.time() - end)
            end = time.time()

            if i % self.print_freq == 0 or i == len(data_loader) - 1:
                report = str('{phase} - Epoch: [{0}][{1}/{2}]\t'
                             'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t'
                             'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t'
                             'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t'
                             'Prec@1 {meters[prec1].val:.3f} ({meters[prec1].avg:.3f})\t'
                             'Prec@5 {meters[prec5].val:.3f} ({meters[prec5].avg:.3f})\t'
                             .format(
                                 self.epoch, i, len(data_loader),
                                 phase='TRAINING' if training else 'EVALUATING',
                                 meters=meters))
                if 'grad' in meters.keys():
                    report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})'\
                        .format(meters=meters)
                logging.info(report)
                self.observe(model=self._model, data=(inputs, target))
                self.stream_meters(meters,
                                   prefix='train' if training else 'eval')

            if num_steps is not None and i >= num_steps:
                break

        return meter_results(meters)
예제 #13
0
def validate(val_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    if args.dump_dir is not None:
        QM().disable()
        DM(args.dump_dir)

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if (args.stats_mode == 'collect' and i*args.batch_size >= args.cal_set_size and (args.kld_threshold or args.aciq_cal)) or \
                (args.subset is not None and i*args.batch_size >= args.subset):
                break
            if args.measure_entropy and i*args.batch_size >= args.subset:
                break
            # Uncomment to enable dump
            # QM().disable()
            # if i > 0:
            #     break
            if i == 0:
                QM().verbose = True
            input = input.to(args.device)
            target = target.to(args.device)
            if args.dump_dir is not None and i == 5:
                with DM(args.dump_dir):
                    DM().set_tag('batch%d'%i)
                    # compute output
                    output = model(input)
                    break
            else:
                output = model(input)

            QM().reset_counters()
            QM().verbose = False

            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(float(prec1), input.size(0))
            top5.update(float(prec5), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                       i, len(val_loader), batch_time=batch_time, loss=losses,
                       top1=top1, top5=top5))

        print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return losses.avg, top1.avg, top5.avg
    def _evaluate_pvalues_dict(pvalues_dict,
                               logits,
                               labels=None):  #, prefix=''):
        predicted = logits.argmax(1).cpu()
        num_samples = logits.shape[0]
        num_classes = logits.shape[1]
        if labels is not None:
            correct_predictions = labels == predicted
            incorrect_preds = th.logical_not(correct_predictions)

        for reduction_name, pvalues in pvalues_dict.items():
            #reduction_name = f'{prefix}-{reduction_name}'
            # if save_pvalues and bool(re.match(save_pvalues, reduction_name)):
            #     if reduction_name in save_pvalues_dict:
            #         save_pvalues_dict[reduction_name] = th.cat([save_pvalues_dict[reduction_name], pvalues], 0)
            #     else:
            #         save_pvalues_dict[reduction_name] = pvalues

            # measure rejection rates for a range of pvalues under each measure and each reduction
            if reduction_name not in rejected:
                rejected[reduction_name] = MeterDict(
                    meter_factory=SimpleOnlineMeterFactory(batched=True))
            if pvalues.shape[1] != num_classes:
                rejected[reduction_name].update({
                    'joint_pval_roc':
                    gen_curve_fn(pvalues.squeeze(1)),
                    # 'max_pval_roc': gen_curve_fn(best_class_pval),
                })
            else:
                # aggragate pvalues or return per reduction score
                # best_class_pval, best_class_pval_id = pvalues.max(1)
                class_conditional_pval = pvalues[th.arange(num_samples),
                                                 predicted]
                # joint dstribution: single pvalue for all classes
                rejected[reduction_name].update({
                    'class_conditional_pval_roc':
                    gen_curve_fn(class_conditional_pval),
                    # 'max_pval_roc': gen_curve_fn(best_class_pval),
                })
                if in_dist:
                    t1_likely, t5_likely = accuracy(pvalues, labels, (1, 5))

                    # rescaled_outputs = out*pvalues
                    # t1_rescaled, t5_rescaled = accuracy(rescaled_outputs, l, (1, 5))

                    #rescaled_outputs_post_smx = th.nn.functional.softmax(logits, -1) * pvalues
                    #t1_rescaled_smx, t5_rescaled_smx = accuracy(rescaled_outputs_post_smx, labels, (1, 5))

                    accuracy_dict.update({
                        f'{reduction_name}-pval_acc':
                        (th.stack([t1_likely, t5_likely]), num_samples),
                        # f'{reduction_name}-rescaled_t1': (t1_rescaled, out.shape[0]),
                        # f'{reduction_name}-rescaled_t5': (t5_rescaled, out.shape[0]),
                        # f'{reduction_name}-rescaled-smx_acc': (
                        #     th.stack([t1_rescaled_smx, t5_rescaled_smx]), num_samples),
                    })

                    # pvalue of the annotated class
                    true_class_pval = pvalues[th.arange(num_samples), labels]
                    # the pvalue of correct class prediction
                    correct_pred_pvalues = true_class_pval[correct_predictions]
                    # what was the pvalue of the correct class pval when prediction was wrong
                    true_class_pvalues_on_error = true_class_pval[
                        incorrect_preds]
                    predicted_class_pvalues_on_error = class_conditional_pval[
                        incorrect_preds]
                    rejected[reduction_name].update({
                        'true_pval_mean':
                        true_class_pval,
                        'correct_pval_mean':
                        correct_pred_pvalues,
                        'incorrect_pval_mean':
                        predicted_class_pvalues_on_error,
                        'true_pval_on_error_mean':
                        true_class_pvalues_on_error
                    })

                    rejected[reduction_name].update({
                        'true_pval_roc':
                        gen_curve_fn(true_class_pval),
                        'correct_pval_roc':
                        gen_curve_fn(correct_pred_pvalues),
                        'incorrect_pval_roc':
                        gen_curve_fn(predicted_class_pvalues_on_error),
                    })
예제 #15
0
    def forward(self,
                data_loader,
                num_steps=None,
                training=False,
                duplicates=1):
        meters = {
            name: AverageMeter()
            for name in ['step', 'data', 'loss', 'prec1', 'prec5']
        }
        if training and self.grad_clip > 0:
            meters['grad'] = AverageMeter()

        def meter_results(meters):
            results = {name: meter.avg for name, meter in meters.items()}
            results['error1'] = 100. - results['prec1']
            results['error5'] = 100. - results['prec5']
            return results

        end = time.time()
        if training:
            self.delay_hist = defaultdict(int)
        for i, (inputs, target) in enumerate(data_loader):
            if training:
                self._schedule_worker(self.epoch * len(data_loader) + i)
            if training and tb.tboard.res_iterations:
                tb.tboard.update_step(self.epoch * len(data_loader) + i)
            # measure data loading time
            meters['data'].update(time.time() - end)
            target = target.to(self.device)
            inputs = inputs.to(self.device, dtype=self.dtype)

            if duplicates > 1:  # multiple versions for each sample (dim 1)
                target = target.view(-1, 1).expand(-1, inputs.size(1))
                inputs = inputs.flatten(0, 1)
                target = target.flatten(0, 1)

            output, loss, grad = self._step(inputs, target, training=training)
            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5))
            meters['loss'].update(float(loss), inputs.size(0))
            meters['prec1'].update(float(prec1), inputs.size(0))
            meters['prec5'].update(float(prec5), inputs.size(0))
            if grad is not None:
                meters['grad'].update(float(grad), inputs.size(0))

            # measure elapsed time
            meters['step'].update(time.time() - end)
            if training and tb.tboard.res_iterations:
                tb.tboard.log_results(
                    training_loss_iter=float(loss),
                    training_error1_iter=100 - float(prec1),
                    iterations=self.epoch * len(data_loader) + i)
            end = time.time()

            if i % self.print_freq == 0:
                errors = {
                    'error1_val': 100 - meters['prec1'].val,
                    'error5_val': 100 - meters['prec5'].val,
                    'error1_avg': 100 - meters['prec1'].avg,
                    'error5_avg': 100 - meters['prec5'].avg
                }
                report = str(
                    '{phase} - Epoch: [{0}][{1}/{2}]\t'
                    'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t'
                    'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t'
                    'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t'
                    'Error@1 {errors[error1_val]:.3f} ({errors[error1_avg]:.3f})\t'
                    'Error@5 {errors[error5_val]:.3f} ({errors[error5_avg]:.3f})\t'
                    .format(self.epoch,
                            i,
                            len(data_loader),
                            phase='TRAINING' if training else 'EVALUATING',
                            meters=meters,
                            errors=errors))
                if 'grad' in meters.keys():
                    report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})' \
                        .format(meters=meters)
                logging.info(report)

            if num_steps is not None and i >= num_steps:
                break

        return meter_results(meters)
    def _predict():
        intermidiate_pvalues = []
        _labels = []
        _logits = []
        model.eval()
        model.to(model_device)
        batch_count = 0
        with th.no_grad():
            for d, l in tqdm.tqdm(loader, total=len(loader)):
                if limit and batch_count * d.shape[0] >= limit:
                    break
                batch_count += 1
                out = model(d.to(model_device))
                _logits.append(out)
                if in_dist:
                    # model accuracy
                    t1, t5 = accuracy(out, l, (1, 5))
                    accuracy_dict.update(
                        {'model_acc': (th.stack([t1, t5]), out.shape[0])})
                    _labels.append(l)

                if keep_intermidiate_pvalues:
                    #todo concat instead
                    intermidiate_pvalues.append(
                        detector.stats_recorder.record.copy())

                ## extract pvalues and evaluate them
                if isinstance(detector.filter_layer, ls.GroupWhiteListInclude):
                    pvalues_dict_fisher_groups = detector.get_fisher_groups()
                    _extend_master_pvalues_dict(pvalues_dict_fisher_groups,
                                                'fisher_group')
                    #call _evaluate_pvalues_dict to average over each batch (reduce memory)
                    #_evaluate_pvalues_dict(out,l,pvalues_dict_fisher_groups, 'fisher_group')

                pvalues_dict_fisher = detector.get_fisher()
                _extend_master_pvalues_dict(pvalues_dict_fisher, 'fisher')
                #_evaluate_pvalues_dict(out,l,pvalues_dict_fisher, 'fisher')
                if simes_l:
                    pvalues_dict_simes = detector.get_simes()
                    _extend_master_pvalues_dict(pvalues_dict_simes, 'simes')
                    #_evaluate_pvalues_dict(out,l,pvalues_dict_simes, 'simes')

                if fusions:
                    if simes_l:
                        joint_dict = {}
                        for pval_layer_reduction_method, pval_dict in zip(
                            ['simes', 'fisher'],
                            [pvalues_dict_simes, pvalues_dict_fisher]):
                            joint_dict.update({
                                f'{pval_layer_reduction_method}-{rm}': p
                                for rm, p in pval_dict.items()
                            })
                        pvalues_fusion = _fusion_pvalues(joint_dict, 2)
                    else:
                        pvalues_fusion = _fusion_pvalues(
                            pvalues_dict_fisher, 2)
                    _extend_master_pvalues_dict(pvalues_fusion, 'fusion')
                    #_evaluate_pvalues_dict(out,l,pvalues_fusion, 'fusion')

                detector.stats_recorder.record.clear()
                _report(logging.DEBUG)

            return th.cat(_logits), th.cat(
                _labels) if in_dist else None, intermidiate_pvalues
예제 #17
0
def forward(data_loader,
            model,
            criterion,
            epoch=0,
            training=True,
            optimizer=None):
    regularizer = getattr(model, 'regularization', None)
    layers = model.layers
    num_layers = sum(layers)
    num_convs = 1 + 2 * sum(layers)

    cp_record = AverageMeter()

    bits = [32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

    computation_parameters = []

    for k in range(len(bits)):
        L = [3 * 9 * bits[k] * 0.1]
        computation_parameters.append(L)

    for layers in range(3):
        for block in range(model.layers[0]):
            channels = getattr(model,
                               'layer{}'.format(layers + 1))[block].planes
            for j in range(len(bits)):
                cost = channels * 9 * bits[j] * 0.1
                computation_parameters[j].append(cost)
                computation_parameters[j].append(cost)

    ARRAY = np.array(computation_parameters)
    denominator = np.amax(ARRAY)

    for m in range(len(bits)):
        computation_parameters[m] = np.array(computation_parameters[m])
        computation_parameters[m] /= denominator

    if args.device_ids and len(args.device_ids) > 1:
        model = torch.nn.DataParallel(model, args.device_ids)

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    bit_assignment_statistics = []

    for k in range(num_convs):
        bit_assignment_statistics.append([])
        for j in range(1, len(bits)):
            cp_ratio = AverageMeter()
            bit_assignment_statistics[k].append(cp_ratio)

    end = time.time()
    for i, (inputs, target) in enumerate(data_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        target = target.to(args.device)
        inputs = inputs.to(args.device, dtype=dtype)

        # compute output
        output, Masks = model(inputs)
        loss = criterion(output, target)

        computation_costs = [0] * 11

        for layer in range(1 + 2 * num_layers):

            computation_costs[0] += reduce(
                (lambda x, y: x * y),
                Masks[layer][0].shape) * computation_parameters[0][layer]

            full_layer = reduce((lambda x, y: x * y), Masks[layer][0].shape)

            for k in range(1, len(bits)):
                computation_costs[k] += Masks[layer][
                    k - 1].sum() * computation_parameters[k][layer]

                dynamic_layer = Masks[layer][k - 1].sum()

                ratio = dynamic_layer / full_layer

                bit_assignment_statistics[layer][k - 1].update(ratio)

        total_cost = sum(computation_costs[1:])

        original_cost = computation_costs[0]

        compression_rate = original_cost.item() / total_cost.item()

        total_cost *= args.beta

        # args.computation_cost = False

        if args.computation_cost:
            loss += total_cost

        if regularizer is not None:
            loss += regularizer(model)

        if type(output) is list:
            output = output[0]

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5))
        losses.update(float(loss), inputs.size(0))
        top1.update(float(prec1), inputs.size(0))
        top5.update(float(prec5), inputs.size(0))
        cp_record.update(compression_rate, 1)

        if training:
            optimizer.update(epoch, epoch * len(data_loader) + i)
            # compute gradient and do SGD step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            logging.info(
                '{phase} - Epoch: [{0}][{1}/{2}]\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t'
                'Compression_rate: {cp_record.val:.3f}({cp_record.avg:.3f})\t'.
                format(epoch,
                       i,
                       len(data_loader),
                       phase='TRAINING' if training else 'EVALUATING',
                       batch_time=batch_time,
                       data_time=data_time,
                       loss=losses,
                       top1=top1,
                       top5=top5,
                       cp_record=cp_record))

    for layer in range(num_convs):
        print('layer{}'.format(layer + 1))
        for g in range(1, len(bits)):
            bit = bits[g]
            print('{}bit_ratio{}'.format(
                bit, bit_assignment_statistics[layer][g - 1].avg))

    return losses.avg, top1.avg, top5.avg
예제 #18
0
    def forward(self,
                data_loader,
                num_steps=None,
                training=False,
                average_output=False,
                chunk_batch=1,
                ml_logger=None,
                collectStats=False,
                lbl=False):

        meters = {
            name: AverageMeter()
            for name in ['step', 'data', 'loss', 'prec1', 'prec5']
        }
        if training and self.grad_clip > 0:
            meters['grad'] = AverageMeter()

        batch_first = True
        if training and isinstance(self.model,
                                   nn.DataParallel) or chunk_batch > 1:
            batch_first = False

        def meter_results(meters):
            results = {name: meter.avg for name, meter in meters.items()}
            results['error1'] = 100. - results['prec1']
            results['error5'] = 100. - results['prec5']
            return results

        end = time.time()
        stepsCollectStats = np.random.permutation(len(data_loader))[:9]
        np.append(stepsCollectStats, 0)

        for i, (inputs, target) in enumerate(data_loader):

            # measure data loading time
            meters['data'].update(time.time() - end)

            if collectStats:
                handle = []
                for m in self._model.modules():
                    if isinstance(m, ZeroBN):
                        handle.append(
                            m.register_backward_hook(
                                self.collectGradLayerByLayer))

            if not collectStats and training and ml_logger is not None and i in stepsCollectStats:
                handle2 = []
                for m in self._model.modules():
                    if isinstance(m, Conv2dStats):
                        handle2.append(m.register_backward_hook(
                            self.saveStats))

            output, loss, grad = self._step(inputs,
                                            target,
                                            training=training,
                                            average_output=average_output,
                                            chunk_batch=chunk_batch,
                                            ml_logger=ml_logger,
                                            collectStats=i
                                            in stepsCollectStats,
                                            first_batch=i == 0)

            if collectStats:
                for h in handle:
                    h.remove()

            if not collectStats and training and ml_logger is not None and i in stepsCollectStats:
                for h in handle2:
                    h.remove()

            if training and ml_logger is not None and i in stepsCollectStats:

                totalZeros = 0
                totalMinusTau = 0
                totalTau = 0
                totalElems = 0
                for m in self.model.modules():
                    if isinstance(m, Conv2dStats):
                        ml_logger.log_metric(m.fullName +
                                             'Grad output sparsifty',
                                             m.gradOutputSparsity / m.elems,
                                             step='auto',
                                             log_to_tfboard=False)
                        ml_logger.log_metric(m.fullName + 'Grad output Tau',
                                             m.gradOutputTau / m.elems,
                                             step='auto',
                                             log_to_tfboard=False)
                        ml_logger.log_metric(m.fullName +
                                             'Grad output Minus Tau',
                                             m.gradOutputMinusTau / m.elems,
                                             step='auto',
                                             log_to_tfboard=False)

                        totalElems += m.elems
                        totalZeros += m.gradOutputSparsity
                        totalMinusTau += m.gradOutputMinusTau
                        totalTau += m.gradOutputTau

                if totalElems > 0:
                    ml_logger.log_metric('Total Zeros',
                                         totalZeros / totalElems,
                                         step='auto',
                                         log_to_tfboard=False)
                    ml_logger.log_metric('Total Tau',
                                         totalTau / totalElems,
                                         step='auto',
                                         log_to_tfboard=False)
                    ml_logger.log_metric('Total Minus Tau',
                                         totalMinusTau / totalElems,
                                         step='auto',
                                         log_to_tfboard=False)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            meters['loss'].update(float(loss), inputs.size(0))
            meters['prec1'].update(float(prec1), inputs.size(0))
            meters['prec5'].update(float(prec5), inputs.size(0))
            if grad is not None:
                meters['grad'].update(float(grad), inputs.size(0))

            # measure elapsed time
            meters['step'].update(time.time() - end)
            end = time.time()

            if i % self.print_freq == 0 or i == len(data_loader) - 1:
                if training and ml_logger is not None:
                    ml_logger.log_metric('Train Acc1',
                                         meters['prec1'].avg,
                                         step='auto',
                                         log_to_tfboard=False)
                    ml_logger.log_metric('Train Acc5',
                                         meters['prec5'].avg,
                                         step='auto',
                                         log_to_tfboard=False)
                report = str(
                    '{phase} - Epoch: [{0}][{1}/{2}]\t'
                    'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t'
                    'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t'
                    'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t'
                    'Prec@1 {meters[prec1].val:.3f} ({meters[prec1].avg:.3f})\t'
                    'Prec@5 {meters[prec5].val:.3f} ({meters[prec5].avg:.3f})\t'
                    .format(self.epoch,
                            i,
                            len(data_loader),
                            phase='TRAINING' if training else 'EVALUATING',
                            meters=meters))
                if 'grad' in meters.keys():
                    report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})' \
                        .format(meters=meters)
                logging.info(report)
                self.observe(trainer=self,
                             model=self._model,
                             optimizer=self.optimizer,
                             data=(inputs, target))
                self.stream_meters(meters,
                                   prefix='train' if training else 'eval')
                if training:
                    self.write_stream(
                        'lr',
                        (self.training_steps, self.optimizer.get_lr()[0]))

            if num_steps is not None and i >= num_steps:
                break

        return meter_results(meters)
예제 #19
0
def runTrain(model,
             args,
             trainLoader,
             epoch,
             optimizer,
             criterion,
             logging,
             use_corr=False):
    model.train()
    batch_time = AverageMeter()
    totalLosses = AverageMeter()
    ceLosses = AverageMeter()
    corrLosses = AverageMeter()
    eLosses = AverageMeter()
    eiLosses = AverageMeter()
    leLosses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()
    for batch_idx, (inputs, targets) in enumerate(trainLoader):
        inputs, targets = inputs.cuda(), targets.cuda()
        optimizer.zero_grad()
        out = model(inputs)
        # for m in model.modules():
        #     if hasattr(m,"corr"):
        #         if 'corr' in locals():
        #             corr = torch.cat((corr, m.corr))
        #         else:
        #             corr = m.corr
        corr = torch.sum(
            torch.stack(
                [m.corr for m in model.modules() if hasattr(m, "corr")]))

        totalLoss, crossEntropyLoss, corrLoss = criterion(out, targets, corr)
        if use_corr:
            ls = totalLoss
        else:
            ls = crossEntropyLoss
        if args.ea:
            eloss = None
            eiloss = None
            leloss = None
            cnt = 0
            for layer in model.modules():
                if hasattr(layer, 'entropy_loss_value'):
                    cnt += 1
                    if eloss is None:
                        eloss = layer.entropy_loss_value
                    else:
                        eloss += layer.entropy_loss_value
                    if batch_idx % args.print_freq == 0:
                        pass  # print(cnt, layer.entropy_loss_value.item())
                    leloss = layer.entropy_loss_value.item()
                    if args.ei:
                        if eiloss is None:
                            eiloss = layer.entropy_value.mean()
                        else:
                            eiloss += layer.entropy_value.mean()

            if eloss is not None:
                ls += args.ea_lr * eloss
                eLosses.update(eloss.item(), inputs.size(0))
                leLosses.update(leloss, inputs.size(0))
            if eiloss is not None:
                ls += args.ei_lr * eiloss
                eiLosses.update(eiloss.item(), inputs.size(0))
        ls.backward()
        optimizer.step()

        # measure accuracy and record loss
        prec1, prec5 = accuracy(out, targets, topk=(1, 5))
        totalLosses.update(ls.item(), inputs.size(0))
        ceLosses.update(crossEntropyLoss.item(), inputs.size(0))
        corrLosses.update(corrLoss.item(), inputs.size(0))
        top1.update(float(prec1), inputs.size(0))
        top5.update(float(prec5), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % args.print_freq == 0:
            logging.info(
                'Epoch Train: [{}]\t'
                'Train: [{}/{}]\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Total Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                'Cross Entropy Loss {CEloss.val:.4f} ({CEloss.avg:.4f})\t'
                'Entropy Loss {Eloss.val:.4f} ({Eloss.avg:.4f})\t'
                'Last layer entropy MSE {lEloss.val:.4f} ({lEloss.avg:.4f})\t'
                'Entropy I Loss {EIloss.val:.4f} ({EIloss.avg:.4f})\t'
                'Correlation Loss {Corrloss.val:.4f} ({Corrloss.avg:.4f})\t'
                'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                    epoch,
                    batch_idx + 1,
                    len(trainLoader),
                    batch_time=batch_time,
                    loss=totalLosses,
                    CEloss=ceLosses,
                    lEloss=leLosses,
                    EIloss=eiLosses,
                    Eloss=eLosses,
                    Corrloss=corrLosses,
                    top1=top1,
                    top5=top5))

    return totalLosses.avg, ceLosses.avg, corrLosses.avg, top1.avg, top5.avg
예제 #20
0
def runTest(model, args, testLoader, epoch, criterion, logging):
    model.eval()
    batch_time = AverageMeter()
    totalLosses = AverageMeter()
    ceLosses = AverageMeter()
    corrLosses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    entropy = 0
    act_count = 0
    end = time.time()
    for batch_idx, (inputs, targets) in enumerate(testLoader):
        inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            out = model(inputs)
            corr = torch.sum(
                torch.tensor(
                    [m.corr for m in model.modules() if hasattr(m, "corr")]))
            totalLoss, crossEntropyLoss, corrLoss = criterion(
                out, targets, corr)

            entropy += np.sum(
                np.array([
                    x.bit_count for x in model.modules()
                    if hasattr(x, "bit_count")
                ]))
            act_count += np.sum(
                np.array([
                    x.act_size for x in model.modules()
                    if hasattr(x, "act_size")
                ]))
        # measure accuracy and record loss
        prec1, prec5 = accuracy(out, targets, topk=(1, 5))
        totalLosses.update(totalLoss.item(), inputs.size(0))
        ceLosses.update(crossEntropyLoss.item(), inputs.size(0))
        corrLosses.update(corrLoss.item(), inputs.size(0))
        top1.update(float(prec1), inputs.size(0))
        top5.update(float(prec5), inputs.size(0))

    # measure elapsed time
    batch_time.update(time.time() - end)
    #   act_count = np.sum(np.array([x.act_size for x in model.modules() if hasattr(x, "act_size")]))
    avgEntropy = float(entropy) / act_count
    logging.info('Epoch Test: [{}]\t'
                 'Time ({batch_time.avg:.3f})\t'
                 'Total Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                 'Cross Entropy Loss {CEloss.val:.4f} ({CEloss.avg:.4f})\t'
                 'Correlation Loss {Corrloss.val:.4f} ({Corrloss.avg:.4f})\t'
                 'Entropy {ent} \t'
                 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                     epoch,
                     batch_time=batch_time,
                     loss=totalLosses,
                     CEloss=ceLosses,
                     Corrloss=corrLosses,
                     ent=avgEntropy,
                     top1=top1,
                     top5=top5))

    return totalLosses.avg, ceLosses.avg, corrLosses.avg, top1.avg, top5.avg, avgEntropy
def train(train_loader,
          model,
          criterion,
          optimizer,
          epoch,
          args,
          device,
          ml_logger,
          val_loader,
          mq=None):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(len(train_loader),
                             batch_time,
                             data_time,
                             losses,
                             top1,
                             top5,
                             prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()
    best_acc1 = -1
    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        images = images.to(device, non_blocking=True)
        target = target.to(device, non_blocking=True)

        # compute output
        output = model(images)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1.item(), images.size(0))
        top5.update(acc5.item(), images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            progress.print(i)
            ml_logger.log_metric('Train Acc1',
                                 top1.avg,
                                 step='auto',
                                 log_to_tfboard=False)
            ml_logger.log_metric('Train Loss',
                                 losses.avg,
                                 step='auto',
                                 log_to_tfboard=False)
예제 #22
0
    def forward(self,
                data_loader,
                num_steps=None,
                training=False,
                average_output=False,
                chunk_batch=1):
        self.train_batches = len(data_loader)
        meters = {
            name: AverageMeter()
            for name in [
                'step', 'data', 'loss', 'prec1', 'prec5', 'samples',
                'confidence'
            ]
        }
        if training and self.grad_clip > 0:
            meters['grad'] = AverageMeter()
        if self.calc_grad_var is not None:
            var_meter = OnlineMeter()
            meters['grad_var'] = AverageMeter()

        batch_first = True
        if training and isinstance(self.model,
                                   nn.DataParallel) or chunk_batch > 1:
            batch_first = False

        def meter_results(meters):
            results = {name: meter.avg for name, meter in meters.items()}
            results['error1'] = 100. - results['prec1']
            results['error5'] = 100. - results['prec5']
            return results

        end = time.time()

        for i, (inputs, target) in enumerate(data_loader):

            # measure data loading time
            meters['data'].update(time.time() - end)
            inputs = inputs.to(self.device, dtype=self.dtype)
            target = target.to(self.device)

            if training:

                inputs, target = self.select_hard_samples(
                    inputs, target, meters)

            target = target.to(self.device)
            inputs = inputs.to(self.device, dtype=self.dtype)

            output, loss, grad = self._step(inputs,
                                            target,
                                            training=training,
                                            average_output=average_output,
                                            chunk_batch=chunk_batch)

            if self.calc_grad_var is not None:
                var_meter.update(
                    self.collect_flatten_grads_(self.model.parameters()))
                if (self.training_steps + 1) % self.calc_grad_var == 0:
                    meters['grad_var'].update(float(var_meter.var.mean()),
                                              inputs.size(0))
                    var_meter.needs_init = True

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            meters['loss'].update(float(loss), inputs.size(0))
            meters['prec1'].update(float(prec1), inputs.size(0))
            meters['prec5'].update(float(prec5), inputs.size(0))

            if grad is not None:
                meters['grad'].update(float(grad), inputs.size(0))

            # measure elapsed time
            meters['step'].update(time.time() - end)
            end = time.time()

            if i % self.print_freq == 0 or i == len(data_loader) - 1:
                report = str(
                    '{phase} - Epoch: [{0}][{1}/{2}]\t'
                    'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t'
                    'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t'
                    'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t'
                    'Prec@1 {meters[prec1].val:.3f} ({meters[prec1].avg:.3f})\t'
                    'Prec@5 {meters[prec5].val:.3f} ({meters[prec5].avg:.3f})\t'
                    # 'Samples {meters[samples].val}\t'
                    'Confidence {meters[confidence].val}\t'.format(
                        self.epoch,
                        i,
                        len(data_loader),
                        phase='TRAINING' if training else 'EVALUATING',
                        meters=meters))
                if 'grad' in meters.keys():
                    report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})'\
                        .format(meters=meters)
                logging.info(report)
                self.observe(trainer=self,
                             model=self._model,
                             optimizer=self.optimizer,
                             data=(inputs, target))
                self.stream_meters(meters,
                                   prefix='train' if training else 'eval')
                if training:
                    self.write_stream(
                        'lr',
                        (self.training_steps, self.optimizer.get_lr()[0]))

            if num_steps is not None and i >= num_steps:
                break

        return meter_results(meters)
    def forward(self, data_loader, num_steps=None, training=False, average_output=False, chunk_batch=1, scheduled_instructions=None):

        meters = {name: AverageMeter()
                  for name in ['step', 'data', 'loss', 'prec1', 'prec5']}
        if training and self.grad_clip > 0:
            meters['grad'] = AverageMeter()

        meters_grad = {name: {'mean': AverageMeter(), 'std': AverageMeter()} for name in self.module_to_hook.keys()}

        batch_first = True
        if training and isinstance(self.model, nn.DataParallel) or chunk_batch > 1:
            batch_first = False

        def meter_results(meters):
            results = {name: meter.avg for name, meter in meters.items()}
            results['error1'] = 100. - results['prec1']
            results['error5'] = 100. - results['prec5']
            return results

        end = time.time()

        for i, (inputs, target) in enumerate(data_loader):
            duplicates = inputs.dim() > 4  # B x D x C x H x W
            if training and duplicates and self.adapt_grad_norm is not None \
                    and i % self.adapt_grad_norm == 0:
                grad_mean = 0
                num = inputs.size(1)
                for j in range(num):
                    grad_mean += float(self._grad_norm(inputs.select(1, j), target))
                grad_mean /= num
                grad_all = float(self._grad_norm(
                    *_flatten_duplicates(inputs, target, batch_first)))
                self.grad_scale = grad_mean / grad_all
                logging.info('New loss scale: %s', self.grad_scale)

            # measure data loading time
            meters['data'].update(time.time() - end)
            if duplicates:  # multiple versions for each sample (dim 1)
                inputs, target = _flatten_duplicates(inputs, target, batch_first,
                                                     expand_target=not average_output)

            output, loss, grad, grad_log_stats = self._step(inputs, target,
                                            training=training,
                                            average_output=average_output,
                                            chunk_batch=chunk_batch, scheduled_instructions=scheduled_instructions)

            # print("grad_log_stats!!!")
            # print(grad_log_stats)
            # pdb.set_trace()
            # measure accuracy and record loss
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            meters['loss'].update(float(loss), inputs.size(0))
            meters['prec1'].update(float(prec1), inputs.size(0))
            meters['prec5'].update(float(prec5), inputs.size(0))
            if grad is not None:
                meters['grad'].update(float(grad), inputs.size(0))

            for name, met in meters_grad.items():
                met['mean'].update(float(grad_log_stats[name]['mean']), inputs.size(0))
                met['std'].update(float(grad_log_stats[name]['std']), inputs.size(0))


            # measure elapsed time
            meters['step'].update(time.time() - end)
            end = time.time()

            if i % self.print_freq == 0 or i == len(data_loader) - 1:
                report = str('{phase} - Epoch: [{0}][{1}/{2}]\t'
                             'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t'
                             'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t'
                             'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t'
                             'Prec@1 {meters[prec1].val:.3f} ({meters[prec1].avg:.3f})\t'
                             'Prec@5 {meters[prec5].val:.3f} ({meters[prec5].avg:.3f})\t'
                             .format(
                                 self.epoch, i, len(data_loader),
                                 phase='TRAINING' if training else 'EVALUATING',
                                 meters=meters))
                if 'grad' in meters.keys():
                    report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})'\
                        .format(meters=meters)
                logging.info(report)
                self.observe(trainer=self,
                             model=self._model,
                             optimizer=self.optimizer,
                             data=(inputs, target))
                self.stream_meters(meters,
                                   prefix='train' if training else 'eval')
                if training:
                    self.write_stream('lr',
                                      (self.training_steps, self.optimizer.get_lr()[0]))

            if num_steps is not None and i >= num_steps:
                break

        # print("grad_log_stats! loop 2")
        if training:
            for name, met in meters_grad.items():
                print("module name: " + str(name) + " mean_grad: " + str(met['mean'].avg) + " std_grad: " + str(met['std'].avg))

        return meter_results(meters), meters_grad
예제 #24
0
파일: run.py 프로젝트: CAT-teams/CAT
					qweight = qweight[idx]
					EntrTotal.append(soft_entropy(qweight,bits=8,temp=-10) * elems)
					totalElems += elems
			EntrTotal = sum(EntrTotal) / totalElems
		else:
			EntrTotal = 0
			
        totalLoss, crossEntropyLoss, paramsLoss, paramLoss2 = criterion(out, targets, getParamsLoss(params[:layer + 1], len(model.device_ids)) ,EntrTotal)

      #  totalLoss, crossEntropyLoss, paramsLoss = criterion(out, targets, getParamsLoss(params[:layer + 1], len(model.device_ids)) )

        totalLoss.backward()
        optimizer.step()

        # measure accuracy and record loss
        prec1, prec5 = accuracy(out, targets, topk=(1, 5))
        totalLosses.update(totalLoss.item(), inputs.size(0))
        ceLosses.update(crossEntropyLoss.item(), inputs.size(0))
        paramsLosses.update(paramsLoss.item(), inputs.size(0))
		paramsLosses2.update(paramLoss2.item(), inputs.size(0))
        top1.update(float(prec1), inputs.size(0))
        top5.update(float(prec5), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % args.print_freq == 0:
            logging.info('Epoch Train: [{}]\t'
                         'Train: [{}/{}]\t'
                         'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'