def run_epoch(model, loader, loss_fn, optimizer, desc_default='', epoch=0, writer=None, verbose=1, scheduler=None): tqdm_disable = bool(os.environ.get('TASK_NAME', '')) # KakaoBrain Environment if verbose: loader = tqdm(loader, disable=tqdm_disable) loader.set_description('[%s %04d/%04d]' % (desc_default, epoch, C.get()['epoch'])) metrics = Accumulator() cnt = 0 total_steps = len(loader) steps = 0 for data, label in loader: steps += 1 data, label = data.cuda(), label.cuda() if optimizer: optimizer.zero_grad() preds = model(data) loss = loss_fn(preds, label) if optimizer: loss.backward() if getattr(optimizer, "synchronize", None): optimizer.synchronize() # for horovod if C.get()['optimizer'].get('clip', 5) > 0: nn.utils.clip_grad_norm_(model.parameters(), C.get()['optimizer'].get('clip', 5)) optimizer.step() top1, top5 = accuracy(preds, label, (1, 5)) metrics.add_dict({ 'loss': loss.item() * len(data), 'top1': top1.item() * len(data), 'top5': top5.item() * len(data), }) cnt += len(data) if verbose: postfix = metrics / cnt if optimizer: postfix['lr'] = optimizer.param_groups[0]['lr'] loader.set_postfix(postfix) if scheduler is not None: scheduler.step(epoch - 1 + float(steps) / total_steps) del preds, loss, top1, top5, data, label if tqdm_disable: if optimizer: logger.info('[%s %03d/%03d] %s lr=%.6f', desc_default, epoch, C.get()['epoch'], metrics / cnt, optimizer.param_groups[0]['lr']) else: logger.info('[%s %03d/%03d] %s', desc_default, epoch, C.get()['epoch'], metrics / cnt) metrics /= cnt if optimizer: metrics.metrics['lr'] = optimizer.param_groups[0]['lr'] if verbose: for key, value in metrics.items(): writer.add_scalar(key, value, epoch) return metrics
def run_epoch(model, loader, loss_fn, optimizer, desc_default='', epoch=0, writer=None, verbose=1): if verbose: loader = tqdm(loader) if optimizer: curr_lr = optimizer.param_groups[0]['lr'] loader.set_description( '[%s %04d/%04d] lr=%.4f' % (desc_default, epoch, C.get()['epoch'], curr_lr)) else: loader.set_description('[%s %04d/%04d]' % (desc_default, epoch, C.get()['epoch'])) metrics = Accumulator() cnt = 0 for data, label in loader: data, label = data.cuda(), label.cuda() if optimizer: optimizer.zero_grad() preds = model(data) loss = loss_fn(preds, label) if optimizer: nn.utils.clip_grad_norm_(model.parameters(), 5) loss.backward() optimizer.step() top1, top5 = accuracy(preds, label, (1, 5)) metrics.add_dict({ 'loss': loss.item() * len(data), 'top1': top1.item() * len(data), 'top5': top5.item() * len(data), }) cnt += len(data) if verbose: loader.set_postfix(metrics / cnt) del preds, loss, top1, top5, data, label metrics /= cnt if optimizer: metrics.metrics['lr'] = optimizer.param_groups[0]['lr'] if verbose: for key, value in metrics.items(): writer.add_scalar(key, value, epoch) return metrics
def run_epoch(model, loader, loss_fn, optimizer, desc_default='', epoch=0, writer=None, verbose=1, scheduler=None, is_master=True, ema=None, wd=0.0, tqdm_disabled=False): if verbose: loader = tqdm(loader, disable=tqdm_disabled) loader.set_description('[%s %04d/%04d]' % (desc_default, epoch, C.get()['epoch'])) params_without_bn = [ params for name, params in model.named_parameters() if not ('_bn' in name or '.bn' in name) ] loss_ema = None metrics = Accumulator() cnt = 0 total_steps = len(loader) steps = 0 for data, label in loader: steps += 1 data, label = data.cuda(), label.cuda() if C.get().conf.get('mixup', 0.0) <= 0.0 or optimizer is None: preds = model(data) loss = loss_fn(preds, label) else: # mixup data, targets, shuffled_targets, lam = mixup( data, label, C.get()['mixup']) preds = model(data) loss = loss_fn(preds, targets, shuffled_targets, lam) del shuffled_targets, lam if optimizer: loss += wd * (1. / 2.) * sum( [torch.sum(p**2) for p in params_without_bn]) loss.backward() grad_clip = C.get()['optimizer'].get('clip', 5.0) if grad_clip > 0: nn.utils.clip_grad_norm_(model.parameters(), grad_clip) optimizer.step() optimizer.zero_grad() if ema is not None: ema(model, (epoch - 1) * total_steps + steps) top1, top5 = accuracy(preds, label, (1, 5)) metrics.add_dict({ 'loss': loss.item() * len(data), 'top1': top1.item() * len(data), 'top5': top5.item() * len(data), }) cnt += len(data) if loss_ema: loss_ema = loss_ema * 0.9 + loss.item() * 0.1 else: loss_ema = loss.item() if verbose: postfix = metrics / cnt if optimizer: postfix['lr'] = optimizer.param_groups[0]['lr'] postfix['loss_ema'] = loss_ema loader.set_postfix(postfix) if scheduler is not None: scheduler.step(epoch - 1 + float(steps) / total_steps) del preds, loss, top1, top5, data, label if tqdm_disabled and verbose: if optimizer: logger.info('[%s %03d/%03d] %s lr=%.6f', desc_default, epoch, C.get()['epoch'], metrics / cnt, optimizer.param_groups[0]['lr']) else: logger.info('[%s %03d/%03d] %s', desc_default, epoch, C.get()['epoch'], metrics / cnt) metrics /= cnt if optimizer: metrics.metrics['lr'] = optimizer.param_groups[0]['lr'] if verbose: for key, value in metrics.items(): writer.add_scalar(key, value, epoch) return metrics
def run_epoch(model, loader, loss_fn, optimizer, desc_default='', epoch=0, writer=None, verbose=1, scheduler=None): model_name = C.get()['model']['type'] alpha = C.get()['alpha'] skip_ratios = ListAverageMeter() tqdm_disable = bool(os.environ.get('TASK_NAME', '')) if verbose: loader = tqdm(loader, disable=tqdm_disable) loader.set_description('[%s %04d/%04d]' % (desc_default, epoch, C.get()['epoch'])) metrics = Accumulator() cnt = 0 total_steps = len(loader) steps = 0 for data, label in loader: steps += 1 data, label = data.cuda(), label.cuda() if optimizer: optimizer.zero_grad() if model_name == 'pyramid_skip': if desc_default == '*test': with torch.no_grad(): preds, masks, gprobs = model(data) skips = [mask.data.le(0.5).float().mean() for mask in masks] if skip_ratios.len != len(skips): skip_ratios.set_len(len(skips)) skip_ratios.update(skips, data.size(0)) else: preds, masks, gprobs = model(data) sparsity_loss = 0 for mask in masks: sparsity_loss += mask.mean() loss1 = loss_fn(preds, label) loss2 = alpha * sparsity_loss loss = loss1 + loss2 else: preds = model(data) loss = loss_fn(preds, label) if optimizer: loss.backward() if getattr(optimizer, "synchronize", None): optimizer.skip_synchronize() if C.get()['optimizer'].get('clip', 5) > 0: nn.utils.clip_grad_norm_(model.parameters(), C.get()['optimizer'].get('clip', 5)) optimizer.step() top1, top5 = accuracy(preds, label, (1, 5)) if model_name == 'pyramid_skip': metrics.add_dict({ 'loss1': loss1.item() * len(data), 'loss2': loss2.item() * len(data), 'top1': top1.item() * len(data), 'top5': top5.item() * len(data), }) else: metrics.add_dict({ 'loss': loss.item() * len(data), 'top1': top1.item() * len(data), 'top5': top5.item() * len(data), }) cnt += len(data) if verbose: postfix = metrics / cnt if optimizer: postfix['lr'] = optimizer.param_groups[0]['lr'] loader.set_postfix(postfix) # if scheduler is not None: # scheduler.step(epoch - 1 + float(steps) / total_steps) if model_name == 'pyramid_skip': del masks[:], gprobs[:] del preds, loss, top1, top5, data, label if model_name == 'pyramid_skip': if desc_default == '*test': skip_summaries = [] for idx in range(skip_ratios.len): skip_summaries.append(1 - skip_ratios.avg[idx]) cp = ((sum(skip_summaries) + 1) / (len(skip_summaries) + 1)) * 100 if tqdm_disable: logger.info('[%s %03d/%03d] %s', desc_default, epoch, C.get()['epoch'], metrics / cnt) metrics /= cnt if optimizer: metrics.metrics['lr'] = optimizer.param_groups[0]['lr'] if verbose: for key, value in metrics.items(): writer.add_scalar(key, value, epoch) if model_name == 'pyramid_skip': if desc_default == '*test': writer.add_scalar('Computation Percentage', cp, epoch) return metrics