def forward(self, data_loader, num_steps=None, training=False): meters = { name: AverageMeter() for name in ['step', 'data', 'loss', 'prec1', 'prec5'] } if training and self.grad_clip > 0: meters['grad'] = AverageMeter() def meter_results(meters): results = {name: meter.avg for name, meter in meters.items()} results['error1'] = 100. - results['prec1'] results['error5'] = 100. - results['prec5'] return results end = time.time() for i, (inputs, target) in enumerate(data_loader): # measure data loading time meters['data'].update(time.time() - end) target = target.to(self.device) inputs = inputs.to(self.device, dtype=self.dtype) output, loss, grad = self._step(inputs, target, training=training) # measure accuracy and record loss prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5)) meters['loss'].update(float(loss), inputs.size(0)) meters['prec1'].update(float(prec1), inputs.size(0)) meters['prec5'].update(float(prec5), inputs.size(0)) if grad is not None: meters['grad'].update(float(grad), inputs.size(0)) # measure elapsed time meters['step'].update(time.time() - end) end = time.time() if i % self.print_freq == 0: report = str( '{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t' 'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t' 'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t' 'Prec@1 {meters[prec1].val:.3f} ({meters[prec1].avg:.3f})\t' 'Prec@5 {meters[prec5].val:.3f} ({meters[prec5].avg:.3f})\t' .format(self.epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', meters=meters)) if 'grad' in meters.keys(): report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})'\ .format(meters=meters) logging.info(report) if num_steps is not None and i >= num_steps: break return meter_results(meters)
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): regularizer = getattr(model, 'regularization', None) if args.device_ids and len(args.device_ids) > 1: model = torch.nn.DataParallel(model, args.device_ids) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for i, (inputs, target) in enumerate(data_loader): # measure data loading time data_time.update(time.time() - end) target = target.to(args.device) inputs = inputs.to(args.device, dtype=dtype) # compute output output = model(inputs) loss = criterion(output, target) if regularizer is not None: loss += regularizer(model) if type(output) is list: output = output[0] # measure accuracy and record loss prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5)) losses.update(float(loss), inputs.size(0)) top1.update(float(prec1), inputs.size(0)) top5.update(float(prec5), inputs.size(0)) if training: optimizer.update(epoch, epoch * len(data_loader) + i) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def runTrain(model, args, trainLoader, epoch, optimizer, criterion, logging, layer): model.train() batch_time = AverageMeter() totalLosses = AverageMeter() ceLosses = AverageMeter() paramsLosses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (inputs, targets) in enumerate(trainLoader): inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() out, params = model(inputs) totalLoss, crossEntropyLoss, paramsLoss = criterion( out, targets, getParamsLoss(params[:layer + 1], len(model.device_ids))) totalLoss.backward() optimizer.step() # measure accuracy and record loss prec1, prec5 = accuracy(out, targets, topk=(1, 5)) totalLosses.update(totalLoss.item(), inputs.size(0)) ceLosses.update(crossEntropyLoss.item(), inputs.size(0)) paramsLosses.update(paramsLoss.item(), inputs.size(0)) top1.update(float(prec1), inputs.size(0)) top5.update(float(prec5), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % args.print_freq == 0: logging.info( 'Epoch Train: [{}]\t' 'Train: [{}/{}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Total Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Cross Entropy Loss {CEloss.val:.4f} ({CEloss.avg:.4f})\t' 'paramsLoss Loss {paramsLoss.val:.4f} ({paramsLoss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, batch_idx + 1, len(trainLoader), batch_time=batch_time, loss=totalLosses, CEloss=ceLosses, paramsLoss=paramsLosses, top1=top1, top5=top5)) return totalLosses.avg, ceLosses.avg, paramsLosses.avg, top1.avg, top5.avg
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): if args.gpus and len(args.gpus) > 1: model=torch.nn.DataParallel(model, args.gpus) batch_time=AverageMeter() data_time=AverageMeter() losses=AverageMeter() top1=AverageMeter() top5=AverageMeter() end=time.time() for i, (inputs, target) in enumerate(data_loader): # measure data loading time data_time.update(time.time() - end) if args.gpus is not None: target=target.cuda(async=True) input_var=Variable(inputs.type(args.type), volatile=not training) target_var=Variable(target) # compute output output=model(input_var) loss=criterion(output, target_var) if type(output) is list: output=output[0] # measure accuracy and record loss prec1, prec5=accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) if training: optimizer.update(epoch, epoch * len(data_loader) + i) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end=time.time() if i % args.print_freq == 0: logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def validate(val_loader, model, criterion): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() if args.dump_dir is not None: QM().disable() DM(args.dump_dir) with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): input = input.to(args.device) target = target.to(args.device) if args.dump_dir is not None and i == 5: with DM(args.dump_dir): DM().set_tag('batch%d'%i) # compute output output = model(input) break else: output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(float(prec1), input.size(0)) top5.update(float(prec5), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def validate(val_loader, model, criterion): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() '''print("Validate begin") for n, m in self.model.named_modules(): print(m)''' with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): input = input.to(args.device) target = target.to(args.device) output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(float(prec1), input.size(0)) top5.update(float(prec5), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) #return print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def runTest(model, args, testLoader, epoch, criterion, logging): model.eval() batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() entropy = [AverageMeter() for i in range(model.module.depth)] end = time.time() for batch_idx, (inputs, targets) in enumerate(testLoader): inputs, targets = inputs.cuda(), targets.cuda() with torch.no_grad(): out, params = model(inputs) if len(model.device_ids) > 1: # parallel assert len(params[0]) % 2 == 0 for p in params: p[0:2] = sum(list(torch.split(p, 2))) / len(args.gpu) # For parallel implementation - transform dict to tensor # 0 - maxStdRatio. 1- MaxMeanRatio . 2- kurtosis. 3 -entropy. 4-act. 5-quantError for i in range(model.module.depth): entropy[i].update(params[i][1], params[i][0]) # measure accuracy and record loss prec1, prec5 = accuracy(out, targets, topk=(1, 5)) top1.update(float(prec1), inputs.size(0)) top5.update(float(prec5), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) logging.info( 'Epoch Test: [{}]\t' 'Time ({batch_time.avg:.3f})\t' 'Entropy {ent} \t' # 'Kurtosis {kurt} \t' # 'maxStdRatio {mxstd} \t' # 'maxMeanRatio {mxmean} \t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, batch_time=batch_time, ent=sum(d.sum for d in entropy) / sum(d.count for d in entropy), top1=top1, top5=top5)) return top1.avg, top5.avg, entropy
def validate(val_loader, model, criterion, args, device): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), batch_time, losses, top1, top5, prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): images = images.to(device, non_blocking=True) target = target.to(device, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1.item(), images.size(0)) top5.update(acc5.item(), images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.print(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return top1.avg
def runTest(model, args, testLoader, epoch, criterion, logging): model.eval() batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() entropy = [AverageMeter() for i in range(model.module.depth)] entropyW = [AverageMeter() for i in range(20)] #change to parameter - this is only for resnet18 end = time.time() for batch_idx, (inputs, targets) in enumerate(testLoader): inputs, targets = inputs.cuda(), targets.cuda() with torch.no_grad(): out,params = model(inputs) if len(model.device_ids) > 1: # parallel assert len(params[0]) % 2 == 0 for p in params: p[0:2] = sum(list(torch.split(p, 2))) / len(args.gpu) # For parallel implementation - transform dict to tensor # 0 - maxStdRatio. 1- MaxMeanRatio . 2- kurtosis. 3 -entropy. 4-act. 5-quantError for i in range(model.module.depth): entropy[i].update(params[i][1],params[i][0]) if args.regul2 >0: for i,m in enumerate(model.modules()): if isinstance(m, torch.nn.Conv2d): elems = torch.numel(m.weight) scale = (torch.max(m.weight) - torch.min(m.weight)) / ((2. ** 8) - 1.) qweight = (m.weight.view(-1) - torch.min(m.weight)) / scale numIdxs = int(elems) idx = torch.randperm(numIdxs, device=m.weight.device)[:int(numIdxs / 20)] qweight = qweight[idx] entropyW[i].update(shannon_entropy2(qweight,bits=8),elems) # measure accuracy and record loss prec1, prec5 = accuracy(out, targets, topk=(1, 5)) top1.update(float(prec1), inputs.size(0)) top5.update(float(prec5), inputs.size(0))
def train(train_loader, model, criterion, optimizer, epoch, args, device, ml_logger, val_loader, mq=None, weight_to_hook=None, w_k_scale=0): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') w_k_losses = AverageMeter('W_K_Loss', ':.4e') w_k_vals = AverageMeter('W_K_Val', ':6.2f') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, w_k_losses, w_k_vals, top1, top5, prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() best_acc1 = -1 end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) images = images.to(device, non_blocking=True) target = target.to(device, non_blocking=True) hookF_weights = {} for name, w_tensor in weight_to_hook.items(): # pdb.set_trace() hookF_weights[name] = KurtosisWeight( w_tensor, name, kurtosis_target=args.w_kurtosis_target, k_mode=args.kurtosis_mode) # compute output output = model(images) w_kurtosis_regularization = 0 # pdb.set_trace() if args.w_kurtosis: w_temp_values = [] w_kurtosis_loss = 0 for w_kurt_inst in hookF_weights.values(): # pdb.set_trace() w_kurt_inst.fn_regularization() w_temp_values.append(w_kurt_inst.kurtosis_loss) # pdb.set_trace() if args.kurtosis_mode == 'sum': w_kurtosis_loss = reduce((lambda a, b: a + b), w_temp_values) elif args.kurtosis_mode == 'avg': # pdb.set_trace() w_kurtosis_loss = reduce((lambda a, b: a + b), w_temp_values) if args.arch == 'resnet18': w_kurtosis_loss = w_kurtosis_loss / 19 elif args.arch == 'mobilenet_v2': w_kurtosis_loss = w_kurtosis_loss / 51 elif args.arch == 'resnet50': w_kurtosis_loss = w_kurtosis_loss / 52 elif args.kurtosis_mode == 'max': # pdb.set_trace() w_kurtosis_loss = reduce((lambda a, b: max(a, b)), w_temp_values) w_kurtosis_regularization = ( 10**w_k_scale) * args.w_lambda_kurtosis * w_kurtosis_loss orig_loss = criterion(output, target) loss = orig_loss + w_kurtosis_regularization if args.w_kurtosis: w_temp_values = [] for w_kurt_inst in hookF_weights.values(): w_kurt_inst.fn_regularization() w_temp_values.append(w_kurt_inst.kurtosis) w_kurtosis_val = reduce((lambda a, b: a + b), w_temp_values) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) w_k_losses.update(w_kurtosis_regularization.item(), images.size(0)) w_k_vals.update(w_kurtosis_val.item(), images.size(0)) top1.update(acc1.item(), images.size(0)) top5.update(acc5.item(), images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.print(i) ml_logger.log_metric('Train Acc1', top1.avg, step='auto', log_to_tfboard=False) ml_logger.log_metric('Train Loss', losses.avg, step='auto', log_to_tfboard=False) ml_logger.log_metric('Train weight kurtosis Loss', w_k_losses.avg, step='auto', log_to_tfboard=False) ml_logger.log_metric('Train weight kurtosis Val', w_k_vals.avg, step='auto', log_to_tfboard=False) for w_kurt_inst in hookF_weights.values(): del w_kurt_inst
def forward(self, data_loader, num_steps=None, training=False, duplicates=1, average_output=False, chunk_batch=1, rec=False): if rec: output_embed = {} meters = { name: AverageMeter() for name in ['step', 'data', 'loss', 'prec1', 'prec5'] } if training and self.grad_clip > 0: meters['grad'] = AverageMeter() batch_first = True if training and isinstance(self.model, nn.DataParallel) or chunk_batch > 1: batch_first = False if average_output: assert duplicates > 1 and batch_first, "duplicates must be > 1 for output averaging" def meter_results(meters): results = {name: meter.avg for name, meter in meters.items()} results['error1'] = 100. - results['prec1'] results['error5'] = 100. - results['prec5'] return results end = time.time() for i, (inputs, target) in (enumerate(data_loader)): if training and duplicates > 1 and self.adapt_grad_norm is not None \ and i % self.adapt_grad_norm == 0: grad_mean = 0 num = inputs.size(1) for j in range(num): grad_mean += float( self._grad_norm(inputs.select(1, j), target)) grad_mean /= num grad_all = float( self._grad_norm( *_flatten_duplicates(inputs, target, batch_first))) self.grad_scale = grad_mean / grad_all logging.info('New loss scale: %s', self.grad_scale) # measure data loading time meters['data'].update(time.time() - end) if duplicates > 1: # multiple versions for each sample (dim 1) inputs, target = _flatten_duplicates( inputs, target, batch_first, expand_target=not average_output) output, loss, grad = self._step(inputs, target, training=training, average_output=average_output, chunk_batch=chunk_batch) if rec: with torch.no_grad(): for i in range(target.shape[0]): tt = target[i] emb = output[i] output_embed[tt.tolist()] = emb if self.pruner is not None: with torch.no_grad(): if training: compression_rate = self.pruner.calc_param_masks( self.model, i % self.print_freq == 0, i + self.epoch * len(data_loader)) if i % self.print_freq == 0: logging.info('Total compression ratio is: ' + str(compression_rate)) self.model = self.pruner.prune_layers(self.model) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) meters['loss'].update(float(loss), inputs.size(0)) meters['prec1'].update(float(prec1), inputs.size(0)) meters['prec5'].update(float(prec5), inputs.size(0)) if grad is not None: meters['grad'].update(float(grad), inputs.size(0)) # measure elapsed time meters['step'].update(time.time() - end) end = time.time() if i % self.print_freq == 0: report = str( '{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t' 'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t' 'Loss {meters[loss].val:.7f} ({meters[loss].avg:.7f})\t' 'Prec@1 {meters[prec1].val:.6f} ({meters[prec1].avg:.6f})\t' 'Prec@5 {meters[prec5].val:.6f} ({meters[prec5].avg:.6f})\t' .format(self.epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', meters=meters)) if 'grad' in meters.keys(): report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})'\ .format(meters=meters) logging.info(report) if num_steps is not None and i >= num_steps or (self.update_only_th and training and i > 2): break if self.pruner is not None: self.pruner.save_eps(epoch=self.epoch + 1) self.pruner.save_masks(epoch=self.epoch + 1) if rec: torch.save(output_embed, 'output_embed_calib') return meter_results(meters)
def forward(self, data_loader, num_steps=None, training=False, duplicates=1, average_output=False, chunk_batch=1): meters = {name: AverageMeter() for name in ['step', 'data', 'loss', 'prec1', 'prec5']} if training and self.grad_clip > 0: meters['grad'] = AverageMeter() batch_first = True if training and isinstance(self.model, nn.DataParallel) or chunk_batch > 1: batch_first = False if average_output: assert duplicates > 1 and batch_first, "duplicates must be > 1 for output averaging" def meter_results(meters): results = {name: meter.avg for name, meter in meters.items()} results['error1'] = 100. - results['prec1'] results['error5'] = 100. - results['prec5'] return results end = time.time() for i, (inputs, target) in enumerate(data_loader): if training and duplicates > 1 and self.adapt_grad_norm is not None \ and i % self.adapt_grad_norm == 0: grad_mean = 0 num = inputs.size(1) for j in range(num): grad_mean += float(self._grad_norm(inputs.select(1, j), target)) grad_mean /= num grad_all = float(self._grad_norm( *_flatten_duplicates(inputs, target, batch_first))) self.grad_scale = grad_mean / grad_all logging.info('New loss scale: %s', self.grad_scale) # measure data loading time meters['data'].update(time.time() - end) if duplicates > 1: # multiple versions for each sample (dim 1) inputs, target = _flatten_duplicates(inputs, target, batch_first, expand_target=not average_output) output, loss, grad = self._step(inputs, target, training=training, average_output=average_output, chunk_batch=chunk_batch) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) meters['loss'].update(float(loss), inputs.size(0)) meters['prec1'].update(float(prec1), inputs.size(0)) meters['prec5'].update(float(prec5), inputs.size(0)) if grad is not None: meters['grad'].update(float(grad), inputs.size(0)) # measure elapsed time meters['step'].update(time.time() - end) end = time.time() if i % self.print_freq == 0 or i == len(data_loader) - 1: report = str('{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t' 'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t' 'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t' 'Prec@1 {meters[prec1].val:.3f} ({meters[prec1].avg:.3f})\t' 'Prec@5 {meters[prec5].val:.3f} ({meters[prec5].avg:.3f})\t' .format( self.epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', meters=meters)) if 'grad' in meters.keys(): report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})'\ .format(meters=meters) logging.info(report) self.observe(model=self._model, data=(inputs, target)) self.stream_meters(meters, prefix='train' if training else 'eval') if num_steps is not None and i >= num_steps: break return meter_results(meters)
def validate(val_loader, model, criterion): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() if args.dump_dir is not None: QM().disable() DM(args.dump_dir) with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if (args.stats_mode == 'collect' and i*args.batch_size >= args.cal_set_size and (args.kld_threshold or args.aciq_cal)) or \ (args.subset is not None and i*args.batch_size >= args.subset): break if args.measure_entropy and i*args.batch_size >= args.subset: break # Uncomment to enable dump # QM().disable() # if i > 0: # break if i == 0: QM().verbose = True input = input.to(args.device) target = target.to(args.device) if args.dump_dir is not None and i == 5: with DM(args.dump_dir): DM().set_tag('batch%d'%i) # compute output output = model(input) break else: output = model(input) QM().reset_counters() QM().verbose = False loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(float(prec1), input.size(0)) top5.update(float(prec5), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def _evaluate_pvalues_dict(pvalues_dict, logits, labels=None): #, prefix=''): predicted = logits.argmax(1).cpu() num_samples = logits.shape[0] num_classes = logits.shape[1] if labels is not None: correct_predictions = labels == predicted incorrect_preds = th.logical_not(correct_predictions) for reduction_name, pvalues in pvalues_dict.items(): #reduction_name = f'{prefix}-{reduction_name}' # if save_pvalues and bool(re.match(save_pvalues, reduction_name)): # if reduction_name in save_pvalues_dict: # save_pvalues_dict[reduction_name] = th.cat([save_pvalues_dict[reduction_name], pvalues], 0) # else: # save_pvalues_dict[reduction_name] = pvalues # measure rejection rates for a range of pvalues under each measure and each reduction if reduction_name not in rejected: rejected[reduction_name] = MeterDict( meter_factory=SimpleOnlineMeterFactory(batched=True)) if pvalues.shape[1] != num_classes: rejected[reduction_name].update({ 'joint_pval_roc': gen_curve_fn(pvalues.squeeze(1)), # 'max_pval_roc': gen_curve_fn(best_class_pval), }) else: # aggragate pvalues or return per reduction score # best_class_pval, best_class_pval_id = pvalues.max(1) class_conditional_pval = pvalues[th.arange(num_samples), predicted] # joint dstribution: single pvalue for all classes rejected[reduction_name].update({ 'class_conditional_pval_roc': gen_curve_fn(class_conditional_pval), # 'max_pval_roc': gen_curve_fn(best_class_pval), }) if in_dist: t1_likely, t5_likely = accuracy(pvalues, labels, (1, 5)) # rescaled_outputs = out*pvalues # t1_rescaled, t5_rescaled = accuracy(rescaled_outputs, l, (1, 5)) #rescaled_outputs_post_smx = th.nn.functional.softmax(logits, -1) * pvalues #t1_rescaled_smx, t5_rescaled_smx = accuracy(rescaled_outputs_post_smx, labels, (1, 5)) accuracy_dict.update({ f'{reduction_name}-pval_acc': (th.stack([t1_likely, t5_likely]), num_samples), # f'{reduction_name}-rescaled_t1': (t1_rescaled, out.shape[0]), # f'{reduction_name}-rescaled_t5': (t5_rescaled, out.shape[0]), # f'{reduction_name}-rescaled-smx_acc': ( # th.stack([t1_rescaled_smx, t5_rescaled_smx]), num_samples), }) # pvalue of the annotated class true_class_pval = pvalues[th.arange(num_samples), labels] # the pvalue of correct class prediction correct_pred_pvalues = true_class_pval[correct_predictions] # what was the pvalue of the correct class pval when prediction was wrong true_class_pvalues_on_error = true_class_pval[ incorrect_preds] predicted_class_pvalues_on_error = class_conditional_pval[ incorrect_preds] rejected[reduction_name].update({ 'true_pval_mean': true_class_pval, 'correct_pval_mean': correct_pred_pvalues, 'incorrect_pval_mean': predicted_class_pvalues_on_error, 'true_pval_on_error_mean': true_class_pvalues_on_error }) rejected[reduction_name].update({ 'true_pval_roc': gen_curve_fn(true_class_pval), 'correct_pval_roc': gen_curve_fn(correct_pred_pvalues), 'incorrect_pval_roc': gen_curve_fn(predicted_class_pvalues_on_error), })
def forward(self, data_loader, num_steps=None, training=False, duplicates=1): meters = { name: AverageMeter() for name in ['step', 'data', 'loss', 'prec1', 'prec5'] } if training and self.grad_clip > 0: meters['grad'] = AverageMeter() def meter_results(meters): results = {name: meter.avg for name, meter in meters.items()} results['error1'] = 100. - results['prec1'] results['error5'] = 100. - results['prec5'] return results end = time.time() if training: self.delay_hist = defaultdict(int) for i, (inputs, target) in enumerate(data_loader): if training: self._schedule_worker(self.epoch * len(data_loader) + i) if training and tb.tboard.res_iterations: tb.tboard.update_step(self.epoch * len(data_loader) + i) # measure data loading time meters['data'].update(time.time() - end) target = target.to(self.device) inputs = inputs.to(self.device, dtype=self.dtype) if duplicates > 1: # multiple versions for each sample (dim 1) target = target.view(-1, 1).expand(-1, inputs.size(1)) inputs = inputs.flatten(0, 1) target = target.flatten(0, 1) output, loss, grad = self._step(inputs, target, training=training) # measure accuracy and record loss prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5)) meters['loss'].update(float(loss), inputs.size(0)) meters['prec1'].update(float(prec1), inputs.size(0)) meters['prec5'].update(float(prec5), inputs.size(0)) if grad is not None: meters['grad'].update(float(grad), inputs.size(0)) # measure elapsed time meters['step'].update(time.time() - end) if training and tb.tboard.res_iterations: tb.tboard.log_results( training_loss_iter=float(loss), training_error1_iter=100 - float(prec1), iterations=self.epoch * len(data_loader) + i) end = time.time() if i % self.print_freq == 0: errors = { 'error1_val': 100 - meters['prec1'].val, 'error5_val': 100 - meters['prec5'].val, 'error1_avg': 100 - meters['prec1'].avg, 'error5_avg': 100 - meters['prec5'].avg } report = str( '{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t' 'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t' 'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t' 'Error@1 {errors[error1_val]:.3f} ({errors[error1_avg]:.3f})\t' 'Error@5 {errors[error5_val]:.3f} ({errors[error5_avg]:.3f})\t' .format(self.epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', meters=meters, errors=errors)) if 'grad' in meters.keys(): report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})' \ .format(meters=meters) logging.info(report) if num_steps is not None and i >= num_steps: break return meter_results(meters)
def _predict(): intermidiate_pvalues = [] _labels = [] _logits = [] model.eval() model.to(model_device) batch_count = 0 with th.no_grad(): for d, l in tqdm.tqdm(loader, total=len(loader)): if limit and batch_count * d.shape[0] >= limit: break batch_count += 1 out = model(d.to(model_device)) _logits.append(out) if in_dist: # model accuracy t1, t5 = accuracy(out, l, (1, 5)) accuracy_dict.update( {'model_acc': (th.stack([t1, t5]), out.shape[0])}) _labels.append(l) if keep_intermidiate_pvalues: #todo concat instead intermidiate_pvalues.append( detector.stats_recorder.record.copy()) ## extract pvalues and evaluate them if isinstance(detector.filter_layer, ls.GroupWhiteListInclude): pvalues_dict_fisher_groups = detector.get_fisher_groups() _extend_master_pvalues_dict(pvalues_dict_fisher_groups, 'fisher_group') #call _evaluate_pvalues_dict to average over each batch (reduce memory) #_evaluate_pvalues_dict(out,l,pvalues_dict_fisher_groups, 'fisher_group') pvalues_dict_fisher = detector.get_fisher() _extend_master_pvalues_dict(pvalues_dict_fisher, 'fisher') #_evaluate_pvalues_dict(out,l,pvalues_dict_fisher, 'fisher') if simes_l: pvalues_dict_simes = detector.get_simes() _extend_master_pvalues_dict(pvalues_dict_simes, 'simes') #_evaluate_pvalues_dict(out,l,pvalues_dict_simes, 'simes') if fusions: if simes_l: joint_dict = {} for pval_layer_reduction_method, pval_dict in zip( ['simes', 'fisher'], [pvalues_dict_simes, pvalues_dict_fisher]): joint_dict.update({ f'{pval_layer_reduction_method}-{rm}': p for rm, p in pval_dict.items() }) pvalues_fusion = _fusion_pvalues(joint_dict, 2) else: pvalues_fusion = _fusion_pvalues( pvalues_dict_fisher, 2) _extend_master_pvalues_dict(pvalues_fusion, 'fusion') #_evaluate_pvalues_dict(out,l,pvalues_fusion, 'fusion') detector.stats_recorder.record.clear() _report(logging.DEBUG) return th.cat(_logits), th.cat( _labels) if in_dist else None, intermidiate_pvalues
def forward(data_loader, model, criterion, epoch=0, training=True, optimizer=None): regularizer = getattr(model, 'regularization', None) layers = model.layers num_layers = sum(layers) num_convs = 1 + 2 * sum(layers) cp_record = AverageMeter() bits = [32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] computation_parameters = [] for k in range(len(bits)): L = [3 * 9 * bits[k] * 0.1] computation_parameters.append(L) for layers in range(3): for block in range(model.layers[0]): channels = getattr(model, 'layer{}'.format(layers + 1))[block].planes for j in range(len(bits)): cost = channels * 9 * bits[j] * 0.1 computation_parameters[j].append(cost) computation_parameters[j].append(cost) ARRAY = np.array(computation_parameters) denominator = np.amax(ARRAY) for m in range(len(bits)): computation_parameters[m] = np.array(computation_parameters[m]) computation_parameters[m] /= denominator if args.device_ids and len(args.device_ids) > 1: model = torch.nn.DataParallel(model, args.device_ids) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() bit_assignment_statistics = [] for k in range(num_convs): bit_assignment_statistics.append([]) for j in range(1, len(bits)): cp_ratio = AverageMeter() bit_assignment_statistics[k].append(cp_ratio) end = time.time() for i, (inputs, target) in enumerate(data_loader): # measure data loading time data_time.update(time.time() - end) target = target.to(args.device) inputs = inputs.to(args.device, dtype=dtype) # compute output output, Masks = model(inputs) loss = criterion(output, target) computation_costs = [0] * 11 for layer in range(1 + 2 * num_layers): computation_costs[0] += reduce( (lambda x, y: x * y), Masks[layer][0].shape) * computation_parameters[0][layer] full_layer = reduce((lambda x, y: x * y), Masks[layer][0].shape) for k in range(1, len(bits)): computation_costs[k] += Masks[layer][ k - 1].sum() * computation_parameters[k][layer] dynamic_layer = Masks[layer][k - 1].sum() ratio = dynamic_layer / full_layer bit_assignment_statistics[layer][k - 1].update(ratio) total_cost = sum(computation_costs[1:]) original_cost = computation_costs[0] compression_rate = original_cost.item() / total_cost.item() total_cost *= args.beta # args.computation_cost = False if args.computation_cost: loss += total_cost if regularizer is not None: loss += regularizer(model) if type(output) is list: output = output[0] # measure accuracy and record loss prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5)) losses.update(float(loss), inputs.size(0)) top1.update(float(prec1), inputs.size(0)) top5.update(float(prec5), inputs.size(0)) cp_record.update(compression_rate, 1) if training: optimizer.update(epoch, epoch * len(data_loader) + i) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logging.info( '{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t' 'Compression_rate: {cp_record.val:.3f}({cp_record.avg:.3f})\t'. format(epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, cp_record=cp_record)) for layer in range(num_convs): print('layer{}'.format(layer + 1)) for g in range(1, len(bits)): bit = bits[g] print('{}bit_ratio{}'.format( bit, bit_assignment_statistics[layer][g - 1].avg)) return losses.avg, top1.avg, top5.avg
def forward(self, data_loader, num_steps=None, training=False, average_output=False, chunk_batch=1, ml_logger=None, collectStats=False, lbl=False): meters = { name: AverageMeter() for name in ['step', 'data', 'loss', 'prec1', 'prec5'] } if training and self.grad_clip > 0: meters['grad'] = AverageMeter() batch_first = True if training and isinstance(self.model, nn.DataParallel) or chunk_batch > 1: batch_first = False def meter_results(meters): results = {name: meter.avg for name, meter in meters.items()} results['error1'] = 100. - results['prec1'] results['error5'] = 100. - results['prec5'] return results end = time.time() stepsCollectStats = np.random.permutation(len(data_loader))[:9] np.append(stepsCollectStats, 0) for i, (inputs, target) in enumerate(data_loader): # measure data loading time meters['data'].update(time.time() - end) if collectStats: handle = [] for m in self._model.modules(): if isinstance(m, ZeroBN): handle.append( m.register_backward_hook( self.collectGradLayerByLayer)) if not collectStats and training and ml_logger is not None and i in stepsCollectStats: handle2 = [] for m in self._model.modules(): if isinstance(m, Conv2dStats): handle2.append(m.register_backward_hook( self.saveStats)) output, loss, grad = self._step(inputs, target, training=training, average_output=average_output, chunk_batch=chunk_batch, ml_logger=ml_logger, collectStats=i in stepsCollectStats, first_batch=i == 0) if collectStats: for h in handle: h.remove() if not collectStats and training and ml_logger is not None and i in stepsCollectStats: for h in handle2: h.remove() if training and ml_logger is not None and i in stepsCollectStats: totalZeros = 0 totalMinusTau = 0 totalTau = 0 totalElems = 0 for m in self.model.modules(): if isinstance(m, Conv2dStats): ml_logger.log_metric(m.fullName + 'Grad output sparsifty', m.gradOutputSparsity / m.elems, step='auto', log_to_tfboard=False) ml_logger.log_metric(m.fullName + 'Grad output Tau', m.gradOutputTau / m.elems, step='auto', log_to_tfboard=False) ml_logger.log_metric(m.fullName + 'Grad output Minus Tau', m.gradOutputMinusTau / m.elems, step='auto', log_to_tfboard=False) totalElems += m.elems totalZeros += m.gradOutputSparsity totalMinusTau += m.gradOutputMinusTau totalTau += m.gradOutputTau if totalElems > 0: ml_logger.log_metric('Total Zeros', totalZeros / totalElems, step='auto', log_to_tfboard=False) ml_logger.log_metric('Total Tau', totalTau / totalElems, step='auto', log_to_tfboard=False) ml_logger.log_metric('Total Minus Tau', totalMinusTau / totalElems, step='auto', log_to_tfboard=False) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) meters['loss'].update(float(loss), inputs.size(0)) meters['prec1'].update(float(prec1), inputs.size(0)) meters['prec5'].update(float(prec5), inputs.size(0)) if grad is not None: meters['grad'].update(float(grad), inputs.size(0)) # measure elapsed time meters['step'].update(time.time() - end) end = time.time() if i % self.print_freq == 0 or i == len(data_loader) - 1: if training and ml_logger is not None: ml_logger.log_metric('Train Acc1', meters['prec1'].avg, step='auto', log_to_tfboard=False) ml_logger.log_metric('Train Acc5', meters['prec5'].avg, step='auto', log_to_tfboard=False) report = str( '{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t' 'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t' 'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t' 'Prec@1 {meters[prec1].val:.3f} ({meters[prec1].avg:.3f})\t' 'Prec@5 {meters[prec5].val:.3f} ({meters[prec5].avg:.3f})\t' .format(self.epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', meters=meters)) if 'grad' in meters.keys(): report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})' \ .format(meters=meters) logging.info(report) self.observe(trainer=self, model=self._model, optimizer=self.optimizer, data=(inputs, target)) self.stream_meters(meters, prefix='train' if training else 'eval') if training: self.write_stream( 'lr', (self.training_steps, self.optimizer.get_lr()[0])) if num_steps is not None and i >= num_steps: break return meter_results(meters)
def runTrain(model, args, trainLoader, epoch, optimizer, criterion, logging, use_corr=False): model.train() batch_time = AverageMeter() totalLosses = AverageMeter() ceLosses = AverageMeter() corrLosses = AverageMeter() eLosses = AverageMeter() eiLosses = AverageMeter() leLosses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (inputs, targets) in enumerate(trainLoader): inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() out = model(inputs) # for m in model.modules(): # if hasattr(m,"corr"): # if 'corr' in locals(): # corr = torch.cat((corr, m.corr)) # else: # corr = m.corr corr = torch.sum( torch.stack( [m.corr for m in model.modules() if hasattr(m, "corr")])) totalLoss, crossEntropyLoss, corrLoss = criterion(out, targets, corr) if use_corr: ls = totalLoss else: ls = crossEntropyLoss if args.ea: eloss = None eiloss = None leloss = None cnt = 0 for layer in model.modules(): if hasattr(layer, 'entropy_loss_value'): cnt += 1 if eloss is None: eloss = layer.entropy_loss_value else: eloss += layer.entropy_loss_value if batch_idx % args.print_freq == 0: pass # print(cnt, layer.entropy_loss_value.item()) leloss = layer.entropy_loss_value.item() if args.ei: if eiloss is None: eiloss = layer.entropy_value.mean() else: eiloss += layer.entropy_value.mean() if eloss is not None: ls += args.ea_lr * eloss eLosses.update(eloss.item(), inputs.size(0)) leLosses.update(leloss, inputs.size(0)) if eiloss is not None: ls += args.ei_lr * eiloss eiLosses.update(eiloss.item(), inputs.size(0)) ls.backward() optimizer.step() # measure accuracy and record loss prec1, prec5 = accuracy(out, targets, topk=(1, 5)) totalLosses.update(ls.item(), inputs.size(0)) ceLosses.update(crossEntropyLoss.item(), inputs.size(0)) corrLosses.update(corrLoss.item(), inputs.size(0)) top1.update(float(prec1), inputs.size(0)) top5.update(float(prec5), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % args.print_freq == 0: logging.info( 'Epoch Train: [{}]\t' 'Train: [{}/{}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Total Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Cross Entropy Loss {CEloss.val:.4f} ({CEloss.avg:.4f})\t' 'Entropy Loss {Eloss.val:.4f} ({Eloss.avg:.4f})\t' 'Last layer entropy MSE {lEloss.val:.4f} ({lEloss.avg:.4f})\t' 'Entropy I Loss {EIloss.val:.4f} ({EIloss.avg:.4f})\t' 'Correlation Loss {Corrloss.val:.4f} ({Corrloss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, batch_idx + 1, len(trainLoader), batch_time=batch_time, loss=totalLosses, CEloss=ceLosses, lEloss=leLosses, EIloss=eiLosses, Eloss=eLosses, Corrloss=corrLosses, top1=top1, top5=top5)) return totalLosses.avg, ceLosses.avg, corrLosses.avg, top1.avg, top5.avg
def runTest(model, args, testLoader, epoch, criterion, logging): model.eval() batch_time = AverageMeter() totalLosses = AverageMeter() ceLosses = AverageMeter() corrLosses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() entropy = 0 act_count = 0 end = time.time() for batch_idx, (inputs, targets) in enumerate(testLoader): inputs, targets = inputs.cuda(), targets.cuda() with torch.no_grad(): out = model(inputs) corr = torch.sum( torch.tensor( [m.corr for m in model.modules() if hasattr(m, "corr")])) totalLoss, crossEntropyLoss, corrLoss = criterion( out, targets, corr) entropy += np.sum( np.array([ x.bit_count for x in model.modules() if hasattr(x, "bit_count") ])) act_count += np.sum( np.array([ x.act_size for x in model.modules() if hasattr(x, "act_size") ])) # measure accuracy and record loss prec1, prec5 = accuracy(out, targets, topk=(1, 5)) totalLosses.update(totalLoss.item(), inputs.size(0)) ceLosses.update(crossEntropyLoss.item(), inputs.size(0)) corrLosses.update(corrLoss.item(), inputs.size(0)) top1.update(float(prec1), inputs.size(0)) top5.update(float(prec5), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) # act_count = np.sum(np.array([x.act_size for x in model.modules() if hasattr(x, "act_size")])) avgEntropy = float(entropy) / act_count logging.info('Epoch Test: [{}]\t' 'Time ({batch_time.avg:.3f})\t' 'Total Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Cross Entropy Loss {CEloss.val:.4f} ({CEloss.avg:.4f})\t' 'Correlation Loss {Corrloss.val:.4f} ({Corrloss.avg:.4f})\t' 'Entropy {ent} \t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, batch_time=batch_time, loss=totalLosses, CEloss=ceLosses, Corrloss=corrLosses, ent=avgEntropy, top1=top1, top5=top5)) return totalLosses.avg, ceLosses.avg, corrLosses.avg, top1.avg, top5.avg, avgEntropy
def train(train_loader, model, criterion, optimizer, epoch, args, device, ml_logger, val_loader, mq=None): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, top1, top5, prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() best_acc1 = -1 end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) images = images.to(device, non_blocking=True) target = target.to(device, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1.item(), images.size(0)) top5.update(acc5.item(), images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.print(i) ml_logger.log_metric('Train Acc1', top1.avg, step='auto', log_to_tfboard=False) ml_logger.log_metric('Train Loss', losses.avg, step='auto', log_to_tfboard=False)
def forward(self, data_loader, num_steps=None, training=False, average_output=False, chunk_batch=1): self.train_batches = len(data_loader) meters = { name: AverageMeter() for name in [ 'step', 'data', 'loss', 'prec1', 'prec5', 'samples', 'confidence' ] } if training and self.grad_clip > 0: meters['grad'] = AverageMeter() if self.calc_grad_var is not None: var_meter = OnlineMeter() meters['grad_var'] = AverageMeter() batch_first = True if training and isinstance(self.model, nn.DataParallel) or chunk_batch > 1: batch_first = False def meter_results(meters): results = {name: meter.avg for name, meter in meters.items()} results['error1'] = 100. - results['prec1'] results['error5'] = 100. - results['prec5'] return results end = time.time() for i, (inputs, target) in enumerate(data_loader): # measure data loading time meters['data'].update(time.time() - end) inputs = inputs.to(self.device, dtype=self.dtype) target = target.to(self.device) if training: inputs, target = self.select_hard_samples( inputs, target, meters) target = target.to(self.device) inputs = inputs.to(self.device, dtype=self.dtype) output, loss, grad = self._step(inputs, target, training=training, average_output=average_output, chunk_batch=chunk_batch) if self.calc_grad_var is not None: var_meter.update( self.collect_flatten_grads_(self.model.parameters())) if (self.training_steps + 1) % self.calc_grad_var == 0: meters['grad_var'].update(float(var_meter.var.mean()), inputs.size(0)) var_meter.needs_init = True # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) meters['loss'].update(float(loss), inputs.size(0)) meters['prec1'].update(float(prec1), inputs.size(0)) meters['prec5'].update(float(prec5), inputs.size(0)) if grad is not None: meters['grad'].update(float(grad), inputs.size(0)) # measure elapsed time meters['step'].update(time.time() - end) end = time.time() if i % self.print_freq == 0 or i == len(data_loader) - 1: report = str( '{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t' 'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t' 'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t' 'Prec@1 {meters[prec1].val:.3f} ({meters[prec1].avg:.3f})\t' 'Prec@5 {meters[prec5].val:.3f} ({meters[prec5].avg:.3f})\t' # 'Samples {meters[samples].val}\t' 'Confidence {meters[confidence].val}\t'.format( self.epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', meters=meters)) if 'grad' in meters.keys(): report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})'\ .format(meters=meters) logging.info(report) self.observe(trainer=self, model=self._model, optimizer=self.optimizer, data=(inputs, target)) self.stream_meters(meters, prefix='train' if training else 'eval') if training: self.write_stream( 'lr', (self.training_steps, self.optimizer.get_lr()[0])) if num_steps is not None and i >= num_steps: break return meter_results(meters)
def forward(self, data_loader, num_steps=None, training=False, average_output=False, chunk_batch=1, scheduled_instructions=None): meters = {name: AverageMeter() for name in ['step', 'data', 'loss', 'prec1', 'prec5']} if training and self.grad_clip > 0: meters['grad'] = AverageMeter() meters_grad = {name: {'mean': AverageMeter(), 'std': AverageMeter()} for name in self.module_to_hook.keys()} batch_first = True if training and isinstance(self.model, nn.DataParallel) or chunk_batch > 1: batch_first = False def meter_results(meters): results = {name: meter.avg for name, meter in meters.items()} results['error1'] = 100. - results['prec1'] results['error5'] = 100. - results['prec5'] return results end = time.time() for i, (inputs, target) in enumerate(data_loader): duplicates = inputs.dim() > 4 # B x D x C x H x W if training and duplicates and self.adapt_grad_norm is not None \ and i % self.adapt_grad_norm == 0: grad_mean = 0 num = inputs.size(1) for j in range(num): grad_mean += float(self._grad_norm(inputs.select(1, j), target)) grad_mean /= num grad_all = float(self._grad_norm( *_flatten_duplicates(inputs, target, batch_first))) self.grad_scale = grad_mean / grad_all logging.info('New loss scale: %s', self.grad_scale) # measure data loading time meters['data'].update(time.time() - end) if duplicates: # multiple versions for each sample (dim 1) inputs, target = _flatten_duplicates(inputs, target, batch_first, expand_target=not average_output) output, loss, grad, grad_log_stats = self._step(inputs, target, training=training, average_output=average_output, chunk_batch=chunk_batch, scheduled_instructions=scheduled_instructions) # print("grad_log_stats!!!") # print(grad_log_stats) # pdb.set_trace() # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) meters['loss'].update(float(loss), inputs.size(0)) meters['prec1'].update(float(prec1), inputs.size(0)) meters['prec5'].update(float(prec5), inputs.size(0)) if grad is not None: meters['grad'].update(float(grad), inputs.size(0)) for name, met in meters_grad.items(): met['mean'].update(float(grad_log_stats[name]['mean']), inputs.size(0)) met['std'].update(float(grad_log_stats[name]['std']), inputs.size(0)) # measure elapsed time meters['step'].update(time.time() - end) end = time.time() if i % self.print_freq == 0 or i == len(data_loader) - 1: report = str('{phase} - Epoch: [{0}][{1}/{2}]\t' 'Time {meters[step].val:.3f} ({meters[step].avg:.3f})\t' 'Data {meters[data].val:.3f} ({meters[data].avg:.3f})\t' 'Loss {meters[loss].val:.4f} ({meters[loss].avg:.4f})\t' 'Prec@1 {meters[prec1].val:.3f} ({meters[prec1].avg:.3f})\t' 'Prec@5 {meters[prec5].val:.3f} ({meters[prec5].avg:.3f})\t' .format( self.epoch, i, len(data_loader), phase='TRAINING' if training else 'EVALUATING', meters=meters)) if 'grad' in meters.keys(): report += 'Grad {meters[grad].val:.3f} ({meters[grad].avg:.3f})'\ .format(meters=meters) logging.info(report) self.observe(trainer=self, model=self._model, optimizer=self.optimizer, data=(inputs, target)) self.stream_meters(meters, prefix='train' if training else 'eval') if training: self.write_stream('lr', (self.training_steps, self.optimizer.get_lr()[0])) if num_steps is not None and i >= num_steps: break # print("grad_log_stats! loop 2") if training: for name, met in meters_grad.items(): print("module name: " + str(name) + " mean_grad: " + str(met['mean'].avg) + " std_grad: " + str(met['std'].avg)) return meter_results(meters), meters_grad
qweight = qweight[idx] EntrTotal.append(soft_entropy(qweight,bits=8,temp=-10) * elems) totalElems += elems EntrTotal = sum(EntrTotal) / totalElems else: EntrTotal = 0 totalLoss, crossEntropyLoss, paramsLoss, paramLoss2 = criterion(out, targets, getParamsLoss(params[:layer + 1], len(model.device_ids)) ,EntrTotal) # totalLoss, crossEntropyLoss, paramsLoss = criterion(out, targets, getParamsLoss(params[:layer + 1], len(model.device_ids)) ) totalLoss.backward() optimizer.step() # measure accuracy and record loss prec1, prec5 = accuracy(out, targets, topk=(1, 5)) totalLosses.update(totalLoss.item(), inputs.size(0)) ceLosses.update(crossEntropyLoss.item(), inputs.size(0)) paramsLosses.update(paramsLoss.item(), inputs.size(0)) paramsLosses2.update(paramLoss2.item(), inputs.size(0)) top1.update(float(prec1), inputs.size(0)) top5.update(float(prec5), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % args.print_freq == 0: logging.info('Epoch Train: [{}]\t' 'Train: [{}/{}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'