def create_compressed_model(model, config): input_args = (next(model.parameters()).new_empty( config['input_sample_size']), ) if is_main_process(): print(*get_all_modules(model).keys(), sep="\n") ctx = build_graph(model, input_args, {}, 'create_model', reset_context=True) dump_graph(ctx, osp.join(config.log_dir, "original_graph.dot")) compression_algo = create_compression_algorithm(model, config) if is_main_process(): if hasattr(compression_algo.model, "build_graph"): ctx = compression_algo.model.build_graph() else: ctx = build_graph(compression_algo.model, input_args, {}, "create_model_compressed", reset_context=True) dump_graph(ctx, osp.join(config.log_dir, "compressed_graph.dot")) model = compression_algo.model return compression_algo, model
def safe_thread_call(main_call_fn, after_barrier_call_fn=None): result = None if is_dist_avail_and_initialized(): if is_main_process(): result = main_call_fn() distributed.barrier() if not is_main_process(): result = after_barrier_call_fn( ) if after_barrier_call_fn else main_call_fn() else: result = main_call_fn() return result
def validate(val_loader, model, criterion, config): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input_, target) in enumerate(val_loader): input_ = input_.to(config.device) target = target.to(config.device) # compute output output = model(input_) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss, input_.size(0)) top1.update(acc1, input_.size(0)) top5.update(acc5, input_.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.print_freq == 0: print('{rank}' 'Test: [{0}/{1}] ' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss: {loss.val:.4f} ({loss.avg:.4f}) ' 'Acc@1: {top1.val:.3f} ({top1.avg:.3f}) ' 'Acc@5: {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5, rank='{}:'.format(config.rank) if config.multiprocessing_distributed else '')) if is_main_process(): config.tb.add_scalar("val/loss", losses.avg, len(val_loader) * config.get('cur_epoch', 0)) config.tb.add_scalar("val/top1", top1.avg, len(val_loader) * config.get('cur_epoch', 0)) config.tb.add_scalar("val/top5", top5.avg, len(val_loader) * config.get('cur_epoch', 0)) print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return top1.avg, top5.avg
def train(config, compression_algo, model, criterion, is_inception, lr_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader): global best_acc1 for epoch in range(config.start_epoch, config.epochs): config.cur_epoch = epoch if config.distributed: train_sampler.set_epoch(epoch) lr_scheduler.step(epoch if not isinstance( lr_scheduler, ReduceLROnPlateau) else best_acc1) # train for one epoch train_epoch(train_loader, model, criterion, optimizer, compression_algo, epoch, config, is_inception) # compute compression algo statistics stats = compression_algo.statistics() acc1 = best_acc1 if epoch % config.test_every_n_epochs == 0: # evaluate on validation set acc1, _ = validate(val_loader, model, criterion, config) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) # update compression scheduler state at the end of the epoch compression_algo.scheduler.epoch_step() if is_main_process(): print_statistics(stats) checkpoint_path = osp.join(config.checkpoint_save_dir, get_name(config) + '_last.pth') checkpoint = { 'epoch': epoch + 1, 'arch': model_name, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), 'scheduler': compression_algo.scheduler.state_dict() } torch.save(checkpoint, checkpoint_path) make_additional_checkpoints(checkpoint_path, is_best, epoch + 1, config) for key, value in stats.items(): if isinstance(value, (int, float)): config.tb.add_scalar( "compression/statistics/{0}".format(key), value, len(train_loader) * epoch)
def main_worker(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in (ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) if is_main_process(): configure_logging(config) print_args(config) print(config) config.device = get_device(config) dataset = get_dataset(config.dataset) color_encoding = dataset.color_encoding num_classes = len(color_encoding) weights = config.get('weights') model = load_model(config.model, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=num_classes, model_params=config.get('model_params', {})) compression_algo, model = create_compressed_model(model, config) if weights: sd = torch.load(weights, map_location='cpu') load_state(model, sd) model, model_without_dp = prepare_model_for_execution(model, config) if config.distributed: compression_algo.distributed() resuming_checkpoint = config.resuming_checkpoint if resuming_checkpoint is not None: if not config.pretrained: # Load the previously saved model state model, _, _, _, _ = \ load_checkpoint(model, resuming_checkpoint, config.device, compression_scheduler=compression_algo.scheduler) if config.to_onnx is not None: compression_algo.export_model(config.to_onnx) print("Saved to", config.to_onnx) return if config.mode.lower() == 'test': print(model) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print("Trainable argument count:{params}".format(params=params)) model = model.to(config.device) loaders, w_class = load_dataset(dataset, config) _, val_loader = loaders test(model, val_loader, w_class, color_encoding, config) print_statistics(compression_algo.statistics()) elif config.mode.lower() == 'train': loaders, w_class = load_dataset(dataset, config) train_loader, val_loader = loaders if not resuming_checkpoint: compression_algo.initialize(train_loader) model = \ train(model, model_without_dp, compression_algo, train_loader, val_loader, w_class, color_encoding, config) else: # Should never happen...but just in case it does raise RuntimeError( "\"{0}\" is not a valid choice for execution mode.".format( config.mode))
def train(model, model_without_dp, compression_algo, train_loader, val_loader, class_weights, class_encoding, config): print("\nTraining...\n") # Check if the network architecture is correct print(model) optim_config = config.get('optimizer', {}) optim_params = optim_config.get('optimizer_params', {}) lr = optim_params.get("lr", 1e-4) params_to_optimize, criterion = get_aux_loss_dependent_params(model_without_dp, class_weights, lr * 10, config) optimizer, lr_scheduler = make_optimizer(params_to_optimize, config) # Evaluation metric ignore_index = None ignore_unlabeled = config.get("ignore_unlabeled", True) if ignore_unlabeled and ('unlabeled' in class_encoding): ignore_index = list(class_encoding).index('unlabeled') metric = IoU(len(class_encoding), ignore_index=ignore_index) best_miou = -1 resuming_checkpoint = config.resuming_checkpoint # Optionally resume from a checkpoint if resuming_checkpoint is not None: model, optimizer, start_epoch, best_miou, _ = \ load_checkpoint( model, resuming_checkpoint, config.device, optimizer, compression_algo.scheduler) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) config.start_epoch = start_epoch # Start Training train_obj = Train(model, train_loader, optimizer, criterion, compression_algo, metric, config.device, config.model) val_obj = Test(model, val_loader, criterion, metric, config.device, config.model) for epoch in range(config.start_epoch, config.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) if config.distributed: train_loader.sampler.set_epoch(epoch) if not isinstance(lr_scheduler, ReduceLROnPlateau): lr_scheduler.step(epoch) epoch_loss, (iou, miou) = train_obj.run_epoch(config.print_step) compression_algo.scheduler.epoch_step() print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) if is_main_process(): config.tb.add_scalar("train/loss", epoch_loss, epoch) config.tb.add_scalar("train/mIoU", miou, epoch) config.tb.add_scalar("train/learning_rate", optimizer.param_groups[0]['lr'], epoch) config.tb.add_scalar("train/compression_loss", compression_algo.loss(), epoch) for key, value in compression_algo.statistics().items(): if isinstance(value, (int, float)): config.tb.add_scalar("compression/statistics/{0}".format(key), value, epoch) if (epoch + 1) % config.save_freq == 0 or epoch + 1 == config.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val_obj.run_epoch(config.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) if is_main_process(): config.tb.add_scalar("val/mIoU", miou, epoch) config.tb.add_scalar("val/loss", loss, epoch) for i, (key, class_iou) in enumerate(zip(class_encoding.keys(), iou)): config.tb.add_scalar("{}/mIoU_Cls{}_{}".format(config.dataset, i, key), class_iou, epoch) is_best = miou > best_miou best_miou = max(miou, best_miou) if isinstance(lr_scheduler, ReduceLROnPlateau): lr_scheduler.step(best_miou) # Print per class IoU on last epoch or if best iou if epoch + 1 == config.epochs or is_best: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if is_main_process(): checkpoint_path = save_checkpoint(model, optimizer, epoch + 1, best_miou, compression_algo.scheduler, config) make_additional_checkpoints(checkpoint_path, is_best, epoch + 1, config) print_statistics(compression_algo.statistics()) return model
def train_epoch(train_loader, model, criterion, optimizer, compression_algo, epoch, config, is_inception=False): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() compression_losses = AverageMeter() criterion_losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() compression_scheduler = compression_algo.scheduler # switch to train mode model.train() end = time.time() for i, (input_, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) input_ = input_.to(config.device) target = target.to(config.device) # compute output if is_inception: # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958 output, aux_outputs = model(input_) loss1 = criterion(output, target) loss2 = criterion(aux_outputs, target) criterion_loss = loss1 + 0.4 * loss2 else: output = model(input_) criterion_loss = criterion(output, target) # compute compression loss compression_loss = compression_algo.loss() loss = criterion_loss + compression_loss # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input_.size(0)) comp_loss_val = compression_loss.item() if isinstance( compression_loss, torch.Tensor) else compression_loss compression_losses.update(comp_loss_val, input_.size(0)) criterion_losses.update(criterion_loss.item(), input_.size(0)) top1.update(acc1, input_.size(0)) top5.update(acc5, input_.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() compression_scheduler.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % config.print_freq == 0: print('{rank}: ' 'Epoch: [{0}][{1}/{2}] ' 'Lr: {3:.3} ' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data: {data_time.val:.3f} ({data_time.avg:.3f}) ' 'CE_loss: {ce_loss.val:.4f} ({ce_loss.avg:.4f}) ' 'CR_loss: {cr_loss.val:.4f} ({cr_loss.avg:.4f}) ' 'Loss: {loss.val:.4f} ({loss.avg:.4f}) ' 'Acc@1: {top1.val:.3f} ({top1.avg:.3f}) ' 'Acc@5: {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), get_lr(optimizer), batch_time=batch_time, data_time=data_time, ce_loss=criterion_losses, cr_loss=compression_losses, loss=losses, top1=top1, top5=top5, rank='{}:'.format(config.rank) if config.multiprocessing_distributed else '')) if is_main_process(): global_step = len(train_loader) * epoch config.tb.add_scalar("train/learning_rate", get_lr(optimizer), i + global_step) config.tb.add_scalar("train/criterion_loss", criterion_losses.avg, i + global_step) config.tb.add_scalar("train/compression_loss", compression_losses.avg, i + global_step) config.tb.add_scalar("train/loss", losses.avg, i + global_step) config.tb.add_scalar("train/top1", top1.avg, i + global_step) config.tb.add_scalar("train/top5", top5.avg, i + global_step) for stat_name, stat_value in compression_algo.statistics().items(): if isinstance(stat_value, (int, float)): config.tb.add_scalar( 'train/statistics/{}'.format(stat_name), stat_value, i + global_step)
def main_worker(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # create model model_name = config['model'] weights = config.get('weights') model = load_model(model_name, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) compression_algo, model = create_compressed_model(model, config) if weights: load_state(model, torch.load(weights, map_location='cpu')) model, _ = prepare_model_for_execution(model, config) if config.distributed: compression_algo.distributed() is_inception = 'inception' in model_name # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) params_to_optimize = get_parameter_groups(model, config) optimizer, lr_scheduler = make_optimizer(params_to_optimize, config) resuming_checkpoint = config.resuming_checkpoint best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint is not None: model, config, optimizer, compression_algo, best_acc1 = \ resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, compression_algo) if config.to_onnx is not None: compression_algo.export_model(config.to_onnx) print("Saved to", config.to_onnx) return if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True # Data loading code train_loader, train_sampler, val_loader = create_dataloaders(config) if config.mode.lower() == 'test': print_statistics(compression_algo.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': if not resuming_checkpoint: compression_algo.initialize(train_loader) train(config, compression_algo, model, criterion, is_inception, lr_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, best_acc1)
def main_worker_binarization(current_gpu, config): config.current_gpu = current_gpu config.distributed = config.execution_mode in ( ExecutionMode.DISTRIBUTED, ExecutionMode.MULTIPROCESSING_DISTRIBUTED) if config.distributed: configure_distributed(config) config.device = get_device(config) if is_main_process(): configure_logging(config) print_args(config) if config.seed is not None: manual_seed(config.seed) cudnn.deterministic = True cudnn.benchmark = False # create model model_name = config['model'] weights = config.get('weights') model = load_model(model_name, pretrained=config.get('pretrained', True) if weights is None else False, num_classes=config.get('num_classes', 1000), model_params=config.get('model_params')) original_model = copy.deepcopy(model) compression_algo, model = create_compressed_model(model, config) if not isinstance(compression_algo, Binarization): raise RuntimeError( "The binarization sample worker may only be run with the binarization algorithm!" ) if weights: load_state(model, torch.load(weights, map_location='cpu')) model, _ = prepare_model_for_execution(model, config) original_model.to(config.device) if config.distributed: compression_algo.distributed() is_inception = 'inception' in model_name # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss() criterion = criterion.to(config.device) params_to_optimize = model.parameters() compression_config = config['compression'] binarization_config = compression_config if isinstance( compression_config, dict) else compression_config[0] optimizer = get_binarization_optimizer(params_to_optimize, binarization_config) optimizer_scheduler = BinarizationOptimizerScheduler( optimizer, binarization_config) kd_loss_calculator = KDLossCalculator(original_model) resuming_checkpoint = config.resuming_checkpoint best_acc1 = 0 # optionally resume from a checkpoint if resuming_checkpoint is not None: model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_algo, best_acc1 = \ resume_from_checkpoint(resuming_checkpoint, model, config, optimizer, optimizer_scheduler, kd_loss_calculator, compression_algo) if config.to_onnx is not None: compression_algo.export_model(config.to_onnx) print("Saved to", config.to_onnx) return if config.execution_mode != ExecutionMode.CPU_ONLY: cudnn.benchmark = True # Data loading code train_loader, train_sampler, val_loader = create_dataloaders(config) if config.mode.lower() == 'test': print_statistics(compression_algo.statistics()) validate(val_loader, model, criterion, config) if config.mode.lower() == 'train': if not resuming_checkpoint: compression_algo.initialize(train_loader) batch_multiplier = (binarization_config.get("params", {})).get( "batch_multiplier", 1) train_bin(config, compression_algo, model, criterion, is_inception, optimizer_scheduler, model_name, optimizer, train_loader, train_sampler, val_loader, kd_loss_calculator, batch_multiplier, best_acc1)