def main(): global msglogger check_pytorch_version() args = parser.parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) msglogger = apputils.config_pylogger( os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir) # Log various details about the execution environment. It is sometimes useful # to refer to past experiment executions and this information may be useful. apputils.log_execution_env_state(sys.argv, gitroot=module_path) msglogger.debug("Distiller: %s", distiller.__version__) start_epoch = 0 best_top1 = 0 if args.deterministic: # Experiment reproducibility is sometimes important. Pete Warden expounded about this # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/ # In Pytorch, support for deterministic execution is still a bit clunky. if args.workers > 1: msglogger.error( 'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1' ) exit(1) # Use a well-known seed, for repeatability of experiments torch.manual_seed(0) random.seed(0) np.random.seed(0) cudnn.deterministic = True else: # This issue: https://github.com/pytorch/pytorch/issues/3659 # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled. cudnn.benchmark = True if args.gpus is not None: try: args.gpus = [int(s) for s in args.gpus.split(',')] except ValueError: msglogger.error( 'ERROR: Argument --gpus must be a comma-separated list of integers only' ) exit(1) available_gpus = torch.cuda.device_count() for dev_id in args.gpus: if dev_id >= available_gpus: msglogger.error( 'ERROR: GPU device ID {0} requested, but only {1} devices available' .format(dev_id, available_gpus)) exit(1) # Set default device in case the first one on the list != 0 torch.cuda.set_device(args.gpus[0]) # Infer the dataset from the model name args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet' # Create the model png_summary = args.summary is not None and args.summary.startswith('png') is_parallel = not png_summary and args.summary != 'compute' # For PNG summary, parallel graphs are illegible model = create_model(args.pretrained, args.dataset, args.arch, parallel=is_parallel, device_ids=args.gpus) compression_scheduler = None # Create a couple of logging backends. TensorBoardLogger writes log files in a format # that can be read by Google's Tensor Board. PythonLogger writes to the Python logger. tflogger = TensorBoardLogger(msglogger.logdir) pylogger = PythonLogger(msglogger) # We can optionally resume from a checkpoint if args.resume: model, compression_scheduler, start_epoch = apputils.load_checkpoint( model, chkpt_file=args.resume) if 'resnet' in args.arch and 'preact' not in args.arch and 'cifar' in args.arch: distiller.resnet_cifar_remove_layers(model) #model = distiller.resnet_cifar_remove_channels(model, compression_scheduler.zeros_mask_dict) # Define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) msglogger.info('Optimizer Type: %s', type(optimizer)) msglogger.info('Optimizer Args: %s', optimizer.defaults) # This sample application can be invoked to produce various summary reports. if args.summary: which_summary = args.summary if which_summary.startswith('png'): apputils.draw_img_classifier_to_file( model, 'model.png', args.dataset, which_summary == 'png_w_params') else: distiller.model_summary(model, which_summary, args.dataset) exit() # Load the datasets: the dataset to load is inferred from the model name passed # in args.arch. The default dataset is ImageNet, but if args.arch contains the # substring "_cifar", then cifar10 is used. train_loader, val_loader, test_loader, _ = apputils.load_data( args.dataset, os.path.expanduser(args.data), args.batch_size, args.workers, args.validation_size, args.deterministic) msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d', len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler)) activations_sparsity = None if args.activation_stats: # If your model has ReLU layers, then those layers have sparse activations. # ActivationSparsityCollector will collect information about this sparsity. # WARNING! Enabling activation sparsity collection will significantly slow down training! activations_sparsity = ActivationSparsityCollector(model) if args.sensitivity is not None: # This sample application can be invoked to execute Sensitivity Analysis on your # model. The ouptut is saved to CSV and PNG. msglogger.info("Running sensitivity tests") test_fnc = partial(test, test_loader=test_loader, criterion=criterion, loggers=[pylogger], print_freq=args.print_freq) which_params = [ param_name for param_name, _ in model.named_parameters() ] sensitivity = distiller.perform_sensitivity_analysis( model, net_params=which_params, sparsities=np.arange(0.0, 0.50, 0.05) if args.sensitivity == 'filter' else np.arange(0.0, 0.95, 0.05), test_func=test_fnc, group=args.sensitivity) distiller.sensitivities_to_png(sensitivity, 'sensitivity.png') distiller.sensitivities_to_csv(sensitivity, 'sensitivity.csv') exit() if args.evaluate: # This sample application can be invoked to evaluate the accuracy of your model on # the test dataset. # You can optionally quantize the model to 8-bit integer before evaluation. # For example: # python3 compress_classifier.py --arch resnet20_cifar ../data.cifar10 -p=50 --resume=checkpoint.pth.tar --evaluate if args.quantize: model.cpu() quantizer = quantization.SymmetricLinearQuantizer(model, 8, 8) quantizer.prepare_model() model.cuda() top1, _, _ = test(test_loader, model, criterion, [pylogger], args.print_freq) if args.quantize: checkpoint_name = 'quantized' apputils.save_checkpoint(0, args.arch, model, optimizer=None, best_top1=top1, name='_'.split(args.name, checkpoint_name) if args.name else checkpoint_name, dir=msglogger.logdir) exit() if args.compress: # The main use-case for this sample application is CNN compression. Compression # requires a compression schedule configuration file in YAML. compression_scheduler = distiller.file_config(model, optimizer, args.compress) for epoch in range(start_epoch, start_epoch + args.epochs): # This is the main training loop. msglogger.info('\n') if compression_scheduler: compression_scheduler.on_epoch_begin(epoch) # Train for one epoch train(train_loader, model, criterion, optimizer, epoch, compression_scheduler, loggers=[tflogger, pylogger], print_freq=args.print_freq, log_params_hist=args.log_params_histograms) distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger]) if args.activation_stats: distiller.log_activation_sparsity(epoch, loggers=[tflogger, pylogger], collector=activations_sparsity) # evaluate on validation set top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args.print_freq, epoch) stats = ('Peformance/Validation/', OrderedDict([('Loss', vloss), ('Top1', top1), ('Top5', top5)])) distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1, loggers=[tflogger]) if compression_scheduler: compression_scheduler.on_epoch_end(epoch) # remember best top1 and save checkpoint is_best = top1 > best_top1 best_top1 = max(top1, best_top1) apputils.save_checkpoint(epoch, args.arch, model, optimizer, compression_scheduler, best_top1, is_best, args.name, msglogger.logdir) # Finally run results on the test set test(test_loader, model, criterion, [pylogger], args.print_freq)
def main(): global msglogger check_pytorch_version() args = parser.parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir) # Log various details about the execution environment. It is sometimes useful # to refer to past experiment executions and this information may be useful. apputils.log_execution_env_state(sys.argv, gitroot=module_path) msglogger.debug("Distiller: %s", distiller.__version__) start_epoch = 0 best_top1 = 0 best_epoch = 0 if args.deterministic: # Experiment reproducibility is sometimes important. Pete Warden expounded about this # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/ # In Pytorch, support for deterministic execution is still a bit clunky. if args.workers > 1: msglogger.error('ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1') exit(1) # Use a well-known seed, for repeatability of experiments torch.manual_seed(0) random.seed(0) np.random.seed(0) cudnn.deterministic = True else: # This issue: https://github.com/pytorch/pytorch/issues/3659 # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled. cudnn.benchmark = True if args.gpus is not None: try: args.gpus = [int(s) for s in args.gpus.split(',')] except ValueError: msglogger.error('ERROR: Argument --gpus must be a comma-separated list of integers only') exit(1) available_gpus = torch.cuda.device_count() for dev_id in args.gpus: if dev_id >= available_gpus: msglogger.error('ERROR: GPU device ID {0} requested, but only {1} devices available' .format(dev_id, available_gpus)) exit(1) # Set default device in case the first one on the list != 0 torch.cuda.set_device(args.gpus[0]) # Infer the dataset from the model name args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet' args.num_classes = 10 if args.dataset == 'cifar10' else 1000 if args.earlyexit_thresholds: args.num_exits = len(args.earlyexit_thresholds) + 1 args.loss_exits = [0] * args.num_exits args.losses_exits = [] args.exiterrors = [] # Create the model model = create_model(args.pretrained, args.dataset, args.arch, device_ids=args.gpus) compression_scheduler = None # Create a couple of logging backends. TensorBoardLogger writes log files in a format # that can be read by Google's Tensor Board. PythonLogger writes to the Python logger. tflogger = TensorBoardLogger(msglogger.logdir) pylogger = PythonLogger(msglogger) # capture thresholds for early-exit training if args.earlyexit_thresholds: msglogger.info('=> using early-exit threshold values of %s', args.earlyexit_thresholds) # We can optionally resume from a checkpoint if args.resume: model, compression_scheduler, start_epoch = apputils.load_checkpoint( model, chkpt_file=args.resume) # Define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) msglogger.info('Optimizer Type: %s', type(optimizer)) msglogger.info('Optimizer Args: %s', optimizer.defaults) if args.ADC: return automated_deep_compression(model, criterion, pylogger, args) # This sample application can be invoked to produce various summary reports. if args.summary: return summarize_model(model, args.dataset, which_summary=args.summary) # Load the datasets: the dataset to load is inferred from the model name passed # in args.arch. The default dataset is ImageNet, but if args.arch contains the # substring "_cifar", then cifar10 is used. train_loader, val_loader, test_loader, _ = apputils.load_data( args.dataset, os.path.expanduser(args.data), args.batch_size, args.workers, args.validation_size, args.deterministic) msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d', len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler)) activations_sparsity = None if args.activation_stats: # If your model has ReLU layers, then those layers have sparse activations. # ActivationSparsityCollector will collect information about this sparsity. # WARNING! Enabling activation sparsity collection will significantly slow down training! activations_sparsity = ActivationSparsityCollector(model) if args.sensitivity is not None: return sensitivity_analysis(model, criterion, test_loader, pylogger, args) if args.evaluate: return evaluate_model(model, criterion, test_loader, pylogger, args) if args.compress: # The main use-case for this sample application is CNN compression. Compression # requires a compression schedule configuration file in YAML. compression_scheduler = distiller.file_config(model, optimizer, args.compress) # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer) model.cuda() else: compression_scheduler = distiller.CompressionScheduler(model) args.kd_policy = None if args.kd_teacher: teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus) if args.kd_resume: teacher, _, _ = apputils.load_checkpoint(teacher, chkpt_file=args.kd_resume) dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt) args.kd_policy = distiller.KnowledgeDistillationPolicy(model, teacher, args.kd_temp, dlw) compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs, frequency=1) msglogger.info('\nStudent-Teacher knowledge distillation enabled:') msglogger.info('\tTeacher Model: %s', args.kd_teacher) msglogger.info('\tTemperature: %s', args.kd_temp) msglogger.info('\tLoss Weights (distillation | student | teacher): %s', ' | '.join(['{:.2f}'.format(val) for val in dlw])) msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch) for epoch in range(start_epoch, start_epoch + args.epochs): # This is the main training loop. msglogger.info('\n') if compression_scheduler: compression_scheduler.on_epoch_begin(epoch) # Train for one epoch train(train_loader, model, criterion, optimizer, epoch, compression_scheduler, loggers=[tflogger, pylogger], args=args) distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger]) if args.activation_stats: distiller.log_activation_sparsity(epoch, loggers=[tflogger, pylogger], collector=activations_sparsity) # evaluate on validation set top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch) stats = ('Peformance/Validation/', OrderedDict([('Loss', vloss), ('Top1', top1), ('Top5', top5)])) distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1, loggers=[tflogger]) if compression_scheduler: compression_scheduler.on_epoch_end(epoch, optimizer) # remember best top1 and save checkpoint is_best = top1 > best_top1 if is_best: best_epoch = epoch best_top1 = top1 msglogger.info('==> Best Top1: %.3f On Epoch: %d\n', best_top1, best_epoch) apputils.save_checkpoint(epoch, args.arch, model, optimizer, compression_scheduler, best_top1, is_best, args.name, msglogger.logdir) # Finally run results on the test set test(test_loader, model, criterion, [pylogger], args=args)