示例#1
0
def _init_logger(args, script_dir):
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'),
                                         args.name, args.output_dir, args.verbose)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(
        filter(None, [args.compress, args.qe_stats_file]),  # remove both None and empty strings
        msglogger.logdir, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)
    return msglogger.logdir
示例#2
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 90

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress, msglogger.logdir, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        distiller.set_deterministic()  # Use a well-known seed, for repeatability of experiments
    else:
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError('ERROR: Argument --gpus must be a comma-separated list of integers only')
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError('ERROR: GPU device ID {0} requested, but only {1} devices available'
                                     .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained, args.dataset, args.arch,
                         parallel=not args.load_serialized, device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s', args.earlyexit_thresholds)

    # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1
    if args.deprecated_resume:
        msglogger.warning('The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.')
        if not args.reset_optimizer:
            msglogger.warning('If you wish to also reset the optimizer, call with: --reset-optimizer')
            args.reset_optimizer = True
        args.resumed_checkpoint_path = args.deprecated_resume

    # We can optionally resume from a checkpoint
    optimizer = None
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model, args.load_model_path,
                                              model_device=args.device)
    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info('\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0')

    # Define loss function (criterion)
    criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is None:
        optimizer = torch.optim.SGD(model.parameters(),
            lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer, pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    activations_collectors = create_activation_stats_collectors(model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info('\tStats will be collected for {:.1%} of test dataset'.format(args.qe_calibration))
        msglogger.info('\tSetting constant seeds and converting model to serialized execution')
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(create_quantization_stats_collector(model))
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size, args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0], args.sensitivity_range[1], args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger, args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger, activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer, args.compress, compression_scheduler,
            (start_epoch-1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model, compression_scheduler.zeros_mask_dict, args.arch, args.dataset, optimizer=None)
        apputils.save_checkpoint(0, args.arch, model, optimizer=None, scheduler=compression_scheduler,
                                 name="{}_thinned".format(args.resumed_checkpoint_path.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print("Note: your model may have collapsed to random inference, so you may want to fine-tune")
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_lean_checkpoint(teacher, args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    if start_epoch >= ending_epoch:
        msglogger.error(
            'epoch count is too low, starting epoch is {} but total epochs set to {}'.format(
            start_epoch, ending_epoch))
        raise ValueError('Epochs parameter is too low. Nothing to do.')
    for epoch in range(start_epoch, ending_epoch):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch,
                metrics=(vloss if (epoch != start_epoch) else 10**6))

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader, model, criterion, optimizer, epoch, compression_scheduler,
                  loggers=[tflogger, pylogger], args=args)
            distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(epoch, "train", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(distiller.masks_sparsity_tbl_summary(model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch)
            distiller.log_activation_statsitics(epoch, "valid", loggers=[tflogger],
                                                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss),
                              ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, top1, top5, epoch, args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        checkpoint_extras = {'current_top1': top1,
                             'best_top1': perf_scores_history[0].top1,
                             'best_epoch': perf_scores_history[0].epoch}
        apputils.save_checkpoint(epoch, args.arch, model, optimizer=optimizer, scheduler=compression_scheduler,
                                 extras=checkpoint_extras, is_best=is_best, name=args.name, dir=msglogger.logdir)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], activations_collectors, args=args)
示例#3
0
torch.manual_seed(args.seed)
cudnn.benchmark = False

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

if args.resume:
    with open(args.resume, 'rb') as f:
        model = torch.load(f).to(device)
        # after load the rnn params are not a continuous chunk of memory
        # this makes them a continuous chunk, and will speed up forward pass
        model.rnn.flatten_parameters()
else:
    model = LII_LSTM(args.model, args.input_size, args.nhid, args.nlayers, args.dropout).to(device)

# Distiller loggers
msglogger = apputils.config_pylogger('config/logging.conf', experiment_name=None, output_dir='logs')
tflogger = TensorBoardLogger(msglogger.logdir)
tflogger.log_gradients = True
pylogger = PythonLogger(msglogger)

def export_onnx(path, batch_size, seq_len):
    msglogger.info('The model is also exported in ONNX format at {}'.
                   format(os.path.realpath(args.onnx_export)))
    model.eval()
    dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(-1, batch_size).to(device)
    hidden = model.init_hidden(batch_size)
    torch.onnx.export(model, (dummy_input, hidden), path)


def draw_lang_model_to_file(model, png_fname):
    """Draw a language model graph to a PNG file.
示例#4
0
def train(c, net, compression_scheduler=None):
    import distiller.apputils as apputils
    from distiller.data_loggers import TensorBoardLogger, PythonLogger
    msglogger = apputils.config_pylogger('logging.conf', None)
    tflogger = TensorBoardLogger(msglogger.logdir)
    tflogger.log_gradients = True
    pylogger = PythonLogger(msglogger)
    c.setdefault(hebbian=False)

    emb_params = count_params(net.embed) + count_params(net.loss.projections) + count_params(net.loss.clusters)
    opt = get_opt(c, net)
    net, opt, step = c.init_model(net, opt=opt, step='max', train=True)
    step_lr = scheduler(c, opt, step)
    data_tr = SampleIterator(c, c.train_batch, split='valid' if c.debug else 'train')
    iter_tr = iter(data_tr)
    data_val = SequentialIterator(c, c.eval_batch, split='valid')

    s = Namespace(net=net, opt=opt, step=step)
    c.on_train_start(s)

    c.log('Embedding has %s parameters' % emb_params)

    if c.get("steps_per_epoch"):
        steps_per_epoch = c.steps_per_epoch
    else:
        steps_per_epoch = len(data_tr.tokens) // data_tr.bs // c.train_chunk
    print("#### steps per epoch %d ####" % steps_per_epoch)

    if c.hebbian:
        counters = [torch.ones(end - start, dtype=torch.long, device=c.device) for start, end in zip([0] + c.cutoffs, c.cutoffs + [c.n_vocab])]
        temp_counters = [torch.zeros_like(x) for x in counters]

    best_val_loss = np.inf
    if s.results is not None and 'val_loss' in s.results.columns:
        best_val_loss = s.results['val_loss'].dropna().max()
    try:
        while step < s.step_max:
            batch = step % steps_per_epoch
            epoch = step // steps_per_epoch
            if step % steps_per_epoch == 0:
                c.log("====> batch=%d, epoch=%d, step=%d" % (batch, epoch, step))
                if compression_scheduler:
                    compression_scheduler.on_epoch_begin(epoch)

            if compression_scheduler:
                compression_scheduler.on_minibatch_begin(epoch, minibatch_id=batch, minibatches_per_epoch=steps_per_epoch)

            step_lr(step)

            x = to_torch(next(iter_tr), c.device).t()

            t_s = time()
            inputs, labels = x[:-1], x[1:]
            preds = net(inputs, labels)
            loss = preds['loss']

            if compression_scheduler:
                _  = compression_scheduler.before_backward_pass(epoch, minibatch_id=batch,
                                                           minibatches_per_epoch=steps_per_epoch,
                                                           loss=loss, return_loss_components=False)

            opt.zero_grad()
            if torch.isnan(loss):
                raise RuntimeError('Encountered nan loss during training')
            loss.backward()
            torch.nn.utils.clip_grad_norm_(net.parameters(), c.get('clip_grad', 0.5))
            opt.step()

            if c.hebbian:
                hebbian_weight_update(c, net, preds['hiddens'], counters, temp_counters)

            time_model = np.round(time() - t_s, 5)

            loss = from_torch(loss)
            perplexity = np.nan if loss > 5 else np.e ** loss
            step_result = pd.Series(dict(
                loss=loss,
                perplexity=perplexity,
                time=time_model
            )).add_prefix('train_')
            step_result['lr'] = next(iter(opt.param_groups))['lr']
            if c.use_cache:
                step_result['theta'] = preds['theta']
                step_result['lambda'] = preds['lambda'].item()

            if compression_scheduler:
                compression_scheduler.on_minibatch_end(epoch, minibatch_id=batch, minibatches_per_epoch=steps_per_epoch)

            if step % steps_per_epoch == 0:
                if compression_scheduler:
                    compression_scheduler.on_epoch_end(epoch)

            s.step = step = step + 1
            if step % c.step_eval == 0:
                distiller.log_weights_sparsity(net, epoch, loggers=[tflogger, pylogger])
                t, total = distiller.weights_sparsity_tbl_summary(net, return_total_sparsity=True)
                c.log("total sparsity: %.3lf" % total)

                step_result = step_result.append(
                    pd.Series(evaluate(c, data_val, net)).add_prefix('val_')
                )
                s.record_step = step_result['val_loss'] < best_val_loss
                clear_gpu_memory()
            s.step_result = step_result
            c.on_step_end(s)
    except Exception as e:
        import traceback
        err = traceback.format_exc()
        if c.main:
            c.log(err)
        else:
            print(err)
    finally:
        c.on_train_end(s)
示例#5
0
def main(args):
    utils.init_distributed_mode(args)
    print(args)

    device = torch.device(args.device)

    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    if utils.is_main_process():
        msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir,
                                             args.verbose)

        # Log various details about the execution environment.  It is sometimes useful
        # to refer to past experiment executions and this information may be useful.
        apputils.log_execution_env_state(
            filter(None, [args.compress, args.qe_stats_file]),  # remove both None and empty strings
            msglogger.logdir)
        msglogger.debug("Distiller: %s", distiller.__version__)
    else:
        msglogger = logging.getLogger()
        msglogger.disabled = True

    # Data loading code
    print("Loading data")
    dataset, num_classes = get_dataset(args.dataset, "train", get_transform(train=True), args.data_path)
    dataset_test, _ = get_dataset(args.dataset, "val", get_transform(train=False), args.data_path)

    print("Creating data loaders")
    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test)
    else:
        train_sampler = torch.utils.data.RandomSampler(dataset)
        test_sampler = torch.utils.data.SequentialSampler(dataset_test)

    if args.aspect_ratio_group_factor >= 0:
        group_ids = create_aspect_ratio_groups(dataset, k=args.aspect_ratio_group_factor)
        train_batch_sampler = GroupedBatchSampler(train_sampler, group_ids, args.batch_size)
    else:
        train_batch_sampler = torch.utils.data.BatchSampler(
            train_sampler, args.batch_size, drop_last=True)

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_sampler=train_batch_sampler, num_workers=args.workers,
        collate_fn=utils.collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1,
        sampler=test_sampler, num_workers=args.workers,
        collate_fn=utils.collate_fn)

    print("Creating model")
    model = detection.__dict__[args.model](num_classes=num_classes,
                                                              pretrained=args.pretrained)
    patch_fastrcnn(model)
    model.to(device)

    if args.summary:
        if utils.is_main_process():
            for summary in args.summary:
                distiller.model_summary(model, summary, args.dataset)
        return

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(
        params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_step_size, gamma=args.lr_gamma)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_gamma)

    compression_scheduler = None
    if utils.is_main_process():
        # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
        # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
        tflogger = TensorBoardLogger(msglogger.logdir)
        pylogger = PythonLogger(msglogger)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer, args.compress, compression_scheduler, None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.qe_calibration:
        def test_fn(model):
            return evaluate(model, data_loader_test, device=device)
        collect_quant_stats(model_without_ddp, test_fn, save_dir=args.output_dir,
                            modules_to_collect=['backbone', 'rpn', 'roi_heads'])
        # We skip `.transform` because it is a pre-processing unit.
        return

    if args.resume:
        checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        if compression_scheduler and 'compression_scheduler' in checkpoint:
            compression_scheduler.load_state_dict(checkpoint['compression_scheduler'])

    if args.test_only:
        evaluate(model, data_loader_test, device=device)
        return
    activations_collectors = create_activation_stats_collectors(model, *args.activation_stats)
    print("Start training")
    start_time = time.time()

    # if not isinstance(model, nn.DataParallel) and torch.cuda.is_available() \
    #    and torch.cuda.device_count() > 1:
    #     msglogger.info("Using %d GPUs on DataParallel." % torch.cuda.device_count())
    #     model = nn.DataParallel(model)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
            dist.barrier()

        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        with collectors_context(activations_collectors["train"]) as collectors:
            train_one_epoch(model, optimizer, data_loader, device, epoch, args.print_freq, compression_scheduler)
            if utils.is_main_process():
                distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger])
                distiller.log_activation_statsitics(epoch, "train", loggers=[tflogger],
                                                    collector=collectors["sparsity"])
            if args.masks_sparsity and utils.is_main_process():
                msglogger.info(distiller.masks_sparsity_tbl_summary(model, compression_scheduler))

        lr_scheduler.step()
        if args.output_dir:
            save_dict = {
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args}
            if compression_scheduler:
                save_dict['compression_scheduler'] = compression_scheduler.state_dict()
            utils.save_on_master(save_dict,
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))

        # evaluate after every epoch
        evaluate(model, data_loader_test, device=device)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))
示例#6
0
def train(args):

    SRC = data.Field(tokenize=tokenize_de,
                     pad_token=BLANK_WORD,
                     lower=args.lower)
    TGT = data.Field(tokenize=tokenize_en,
                     init_token=BOS_WORD,
                     eos_token=EOS_WORD,
                     pad_token=BLANK_WORD,
                     lower=args.lower)

    # Load IWSLT Data ---> German to English Translation
    if args.dataset == 'IWSLT':
        train, val, test = datasets.IWSLT.splits(
            exts=('.de', '.en'),
            fields=(SRC, TGT),
            filter_pred=lambda x: len(vars(x)['src']) <= args.max_length and
            len(vars(x)['trg']) <= args.max_length)
    else:
        train, val, test = datasets.Multi30k.splits(
            exts=('.de', '.en'),
            fields=(SRC, TGT),
            filter_pred=lambda x: len(vars(x)['src']) <= args.max_length and
            len(vars(x)['trg']) <= args.max_length)

    # Frequency of words in the vocabulary
    SRC.build_vocab(train.src, min_freq=args.min_freq)
    TGT.build_vocab(train.trg, min_freq=args.min_freq)

    print("Size of source vocabulary:", len(SRC.vocab))
    print("Size of target vocabulary:", len(TGT.vocab))

    pad_idx = TGT.vocab.stoi[BLANK_WORD]
    model = make_model(len(SRC.vocab),
                       len(TGT.vocab),
                       n=args.num_blocks,
                       d_model=args.hidden_dim,
                       d_ff=args.ff_dim,
                       h=args.num_heads,
                       dropout=args.dropout)
    print("Model made with n:", args.num_blocks, "hidden_dim:",
          args.hidden_dim, "feed forward dim:", args.ff_dim, "heads:",
          args.num_heads, "dropout:", args.dropout)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("Number of parameters: ", params)

    if args.load_model:
        print("Loading model from [%s]" % args.load_model)
        model.load_state_dict(torch.load(args.load_model))

    # UNCOMMENT WHEN RUNNING ON RESEARCH MACHINES - run on GPU
    # model.cuda()

    # Used by original authors, hurts perplexity but improves BLEU score
    criterion = LabelSmoothing(size=len(TGT.vocab),
                               padding_idx=pad_idx,
                               smoothing=0.1)

    # UNCOMMENT WHEN RUNNING ON RESEARCH MACHINES - run on GPU
    # criterion.cuda()

    train_iter = MyIterator(train,
                            batch_size=args.batch_size,
                            device=0,
                            repeat=False,
                            sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn,
                            train=True)
    valid_iter = MyIterator(val,
                            batch_size=args.batch_size,
                            device=0,
                            repeat=False,
                            sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn,
                            train=False,
                            sort=False)
    model_par = nn.DataParallel(model, device_ids=devices)

    # model_opt = NoamOpt(model.src_embed[0].d_model, 1, 2000,
    #                     torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

    # Use standard optimizer -- As used in the paper
    model_opt = get_std_opt(model)

    # PRUNING CODE
    if args.summary:
        df = distiller.weights_sparsity_tbl_summary(model, False)
        print(df)
        exit(0)

    msglogger = apputils.config_pylogger('logging.conf', None)
    tflogger = TensorBoardLogger(msglogger.logdir)
    tflogger.log_gradients = True
    pylogger = PythonLogger(msglogger)

    source = args.compress

    if args.compress:
        compression_scheduler = distiller.config.file_config(
            model_par.module, None, args.compress)

    print(model_par.module)

    best_bleu = 0
    best_epoch = 0

    steps_per_epoch = math.ceil(len(train_iter.data()) / 60)

    for epoch in range(args.epoch):
        print("=" * 80)
        print("Epoch ", epoch + 1)
        print("=" * 80)
        print("Training...")
        model_par.train()

        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # IF PRUNING
        run_epoch((rebatch(pad_idx, b) for b in train_iter),
                  model_par,
                  MultiGPULossCompute(model.generator,
                                      criterion,
                                      devices=devices,
                                      opt=model_opt),
                  args,
                  epoch,
                  steps_per_epoch,
                  compression_scheduler,
                  SRC,
                  TGT,
                  valid_iter,
                  is_valid=False)

        # run_epoch((rebatch(pad_idx, b) for b in train_iter), model_par,
        #           MultiGPULossCompute(model.generator, criterion, devices=devices, opt=model_opt), args,
        #           SRC, TGT, valid_iter, is_valid=False)

        print("Validation...")
        model_par.eval()

        # IF PRUNING
        loss = run_epoch((rebatch(pad_idx, b) for b in valid_iter),
                         model_par,
                         MultiGPULossCompute(model.generator,
                                             criterion,
                                             devices=devices,
                                             opt=None),
                         args,
                         epoch,
                         steps_per_epoch,
                         compression_scheduler,
                         SRC,
                         TGT,
                         valid_iter,
                         is_valid=True)

        # loss = run_epoch((rebatch(pad_idx, b) for b in valid_iter), model_par,
        #                  MultiGPULossCompute(model.generator, criterion, devices=devices, opt=None), args,
        #                  SRC, TGT, valid_iter, is_valid=True)

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch)

        print('Validation loss:', loss)
        print('Validation perplexity: ', np.exp(loss))
        bleu_score = run_validation_bleu_score(model, SRC, TGT, valid_iter)

        if best_bleu < bleu_score:
            best_bleu = bleu_score
            model_file = args.save_to + args.exp_name + 'validation.bin'
            print('Saving model without optimizer [%s]' % model_file)
            torch.save(model_par.module.state_dict(), model_file)
            best_epoch = epoch

        model_file = args.save_to + args.exp_name + 'latest.bin'
        print('Saving latest model without optimizer [%s]' % model_file)
        torch.save(model_par.module.state_dict(), model_file)

    print('The best epoch was:', best_epoch)
示例#7
0
                           args.nlayers, args.dropout, args.tied).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),
                            args.lr,
                            momentum=args.momentum,
                            weight_decay=args.weight_decay)
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                          mode='min',
                                                          patience=0,
                                                          verbose=True,
                                                          factor=0.5)
# </editor-fold>

# <editor-fold desc=">>> Loggers & Summary">
msglogger = apputils.config_pylogger('logging.conf', None)
tflogger = TensorBoardLogger(msglogger.logdir)
tflogger.log_gradients = True
pylogger = PythonLogger(msglogger)

if args.summary:
    which_summary = args.summary
    if which_summary == 'png':
        draw_lang_model_to_file(model, 'rnn.png', 'wikitext2')
    elif which_summary == 'percentile':
        percentile = 0.9
        for name, param in model.state_dict().items():
            if param.dim() < 2:
                # Skip biases
                continue
            bottomk, _ = torch.topk(param.abs().view(-1),
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress,
                                     msglogger.logdir,
                                     gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_epochs = [
        distiller.MutableNamedTuple({
            'epoch': 0,
            'top1': 0,
            'sparsity': 0
        }) for i in range(args.num_best_scores)
    ]

    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1'
            )
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        distiller.set_deterministic()
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                msglogger.error(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
                exit(1)
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    msglogger.error(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
                    exit(1)
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         parallel=not args.load_serialized,
                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    if args.resume:
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)
        model.to(args.device)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(args.device)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info(
            '\tStats will be collected for {:.1%} of test dataset'.format(
                args.qe_calibration))
        msglogger.info(
            '\tSetting constant seeds and converting model to serialized execution'
        )
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(
            create_quantization_stats_collector(model))
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size,
        args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress,
                                                      compression_scheduler)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resume is not None, "You must use --resume to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resume.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained,
                               args.dataset,
                               args.kd_teacher,
                               device_ids=args.gpus)
        if args.kd_resume:
            teacher, _, _ = apputils.load_checkpoint(teacher,
                                                     chkpt_file=args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy,
                                         starting_epoch=args.kd_start_epoch,
                                         ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        is_best = top1 > best_epochs[-1].top1
        if top1 > best_epochs[0].top1:
            best_epochs[0].epoch = epoch
            best_epochs[0].top1 = top1
            # Keep best_epochs sorted such that best_epochs[0] is the lowest top1 in the best_epochs list
            best_epochs = sorted(best_epochs, key=lambda score: score.top1)
        for score in reversed(best_epochs):
            if score.top1 > 0:
                msglogger.info('==> Best Top1: %.3f on Epoch: %d', score.top1,
                               score.epoch)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler, best_epochs[-1].top1,
                                 is_best, args.name, msglogger.logdir)

    # Finally run results on the test set
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)
示例#9
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 90

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress,
                                     msglogger.logdir,
                                     gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        distiller.set_deterministic(
        )  # Use a well-known seed, for repeatability of experiments
    else:
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    # Create the model
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         parallel=not args.load_serialized,
                         device_ids=args.gpus)

    compression_scheduler = None
    optimizer = None

    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model,
            args.resumed_checkpoint_path,
            use_swa_model=args.use_swa_model,
            model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model,
                                              args.load_model_path,
                                              model_device=args.device)
    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info(
                '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0'
            )

    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # Define loss function (criterion)
    criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is None:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info(
            '\tStats will be collected for {:.1%} of test dataset'.format(
                args.qe_calibration))
        msglogger.info(
            '\tSetting constant seeds and converting model to serialized execution'
        )
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(
            create_quantization_stats_collector(model))
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size,
        args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)
示例#10
0
def main():
    global msglogger

    script_dir = os.path.dirname(__file__)

    args = parse_args()

    # Distiller loggers
    msglogger = apputils.config_pylogger('logging.conf',
                                         args.name,
                                         output_dir=args.output_dir)
    tflogger = TensorBoardLogger(msglogger.logdir)
    # tflogger.log_gradients = True
    # pylogger = PythonLogger(msglogger)

    if args.seed is not None:
        msglogger.info("Using seed = {}".format(args.seed))
        torch.manual_seed(args.seed)
        np.random.seed(seed=args.seed)

    args.qe_mode = str(args.qe_mode).split('.')[1]
    args.qe_clip_acts = str(args.qe_clip_acts).split('.')[1]

    apputils.log_execution_env_state(sys.argv)

    if args.gpus is not None:
        try:
            args.gpus = [int(s) for s in args.gpus.split(',')]
        except ValueError:
            msglogger.error(
                'ERROR: Argument --gpus must be a comma-separated list of integers only'
            )
            exit(1)
        if len(args.gpus) > 1:
            msglogger.error('ERROR: Only single GPU supported for NCF')
            exit(1)
        available_gpus = torch.cuda.device_count()
        for dev_id in args.gpus:
            if dev_id >= available_gpus:
                msglogger.error(
                    'ERROR: GPU device ID {0} requested, but only {1} devices available'
                    .format(dev_id, available_gpus))
                exit(1)
        # Set default device in case the first one on the list != 0
        torch.cuda.set_device(args.gpus[0])

    # Save configuration to file
    config = {k: v for k, v in args.__dict__.items()}
    config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp())
    config['local_timestamp'] = str(datetime.now())
    run_dir = msglogger.logdir
    msglogger.info("Saving config and results to {}".format(run_dir))
    if not os.path.exists(run_dir) and run_dir != '':
        os.makedirs(run_dir)
    utils.save_config(config, run_dir)

    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    t1 = time.time()
    # Load Data
    training = not (args.eval or args.qe_calibration
                    or args.activation_histograms)
    msglogger.info('Loading data')
    if training:
        train_dataset = CFTrainDataset(
            os.path.join(args.data, TRAIN_RATINGS_FILENAME),
            args.negative_samples)
        train_dataloader = torch.utils.data.DataLoader(
            dataset=train_dataset,
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.workers,
            pin_memory=True)
        nb_users, nb_items = train_dataset.nb_users, train_dataset.nb_items
    else:
        train_dataset = None
        train_dataloader = None
        nb_users, nb_items = (138493, 26744)

    test_ratings = load_test_ratings(
        os.path.join(args.data, TEST_RATINGS_FILENAME))  # noqa: E501
    test_negs = load_test_negs(os.path.join(args.data, TEST_NEG_FILENAME))

    msglogger.info(
        'Load data done [%.1f s]. #user=%d, #item=%d, #train=%s, #test=%d' %
        (time.time() - t1, nb_users, nb_items,
         str(train_dataset.mat.nnz) if training else 'N/A', len(test_ratings)))

    # Create model
    model = NeuMF(nb_users,
                  nb_items,
                  mf_dim=args.factors,
                  mf_reg=0.,
                  mlp_layer_sizes=args.layers,
                  mlp_layer_regs=[0. for i in args.layers],
                  split_final=args.split_final)
    if use_cuda:
        model = model.cuda()
    msglogger.info(model)
    msglogger.info("{} parameters".format(utils.count_parameters(model)))

    # Save model text description
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))

    compression_scheduler = None
    start_epoch = 0
    optimizer = None
    if args.load:
        if training:
            model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
                model, args.load)
            if args.reset_optimizer:
                start_epoch = 0
                optimizer = None
        else:
            model = apputils.load_lean_checkpoint(model, args.load)

    # Add loss to graph
    criterion = nn.BCEWithLogitsLoss()

    if use_cuda:
        criterion = criterion.cuda()

    if training and optimizer is None:
        optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.compress:
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress)
        model.cuda()

    # Create files for tracking training
    valid_results_file = os.path.join(run_dir, 'valid_results.csv')

    if args.qe_calibration or args.activation_histograms:
        calib = {
            'portion':
            args.qe_calibration,
            'desc_str':
            'quantization calibration stats',
            'collect_func':
            partial(distiller.data_loggers.collect_quant_stats,
                    inplace_runtime_check=True,
                    disable_inplace_attrs=True)
        }
        hists = {
            'portion':
            args.activation_histograms,
            'desc_str':
            'activation histograms',
            'collect_func':
            partial(distiller.data_loggers.collect_histograms,
                    activation_stats=None,
                    nbins=2048,
                    save_hist_imgs=True)
        }
        d = calib if args.qe_calibration else hists

        distiller.utils.assign_layer_fq_names(model)
        num_users = int(np.floor(len(test_ratings) * d['portion']))
        msglogger.info(
            "Generating {} based on {:.1%} of the test-set ({} users)".format(
                d['desc_str'], d['portion'], num_users))

        test_fn = partial(val_epoch,
                          ratings=test_ratings,
                          negs=test_negs,
                          K=args.topk,
                          use_cuda=use_cuda,
                          processes=args.processes,
                          num_users=num_users)
        d['collect_func'](model=model,
                          test_fn=test_fn,
                          save_dir=run_dir,
                          classes=None)

        return 0

    if args.eval:
        if args.quantize_eval and args.qe_calibration is None:
            model.cpu()
            quantizer = quantization.PostTrainLinearQuantizer.from_args(
                model, args)
            dummy_input = (torch.tensor([1]), torch.tensor([1]),
                           torch.tensor([True], dtype=torch.bool))
            quantizer.prepare_model(dummy_input)
            model.cuda()

        distiller.utils.assign_layer_fq_names(model)

        if args.eval_fp16:
            model = model.half()

        # Calculate initial Hit Ratio and NDCG
        begin = time.time()
        hits, ndcgs = val_epoch(model,
                                test_ratings,
                                test_negs,
                                args.topk,
                                use_cuda=use_cuda,
                                processes=args.processes)
        val_time = time.time() - begin
        hit_rate = np.mean(hits)
        msglogger.info(
            'Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}, val_time = {val_time:.2f}'
            .format(K=args.topk,
                    hit_rate=hit_rate,
                    ndcg=np.mean(ndcgs),
                    val_time=val_time))
        hit_rate = 0

        if args.quantize_eval:
            checkpoint_name = 'quantized'
            apputils.save_checkpoint(0,
                                     'NCF',
                                     model,
                                     optimizer=None,
                                     extras={'quantized_hr@10': hit_rate},
                                     name='_'.join([args.name, 'quantized'])
                                     if args.name else checkpoint_name,
                                     dir=msglogger.logdir)
        return 0

    total_samples = len(train_dataloader.sampler)
    steps_per_epoch = math.ceil(total_samples / args.batch_size)
    best_hit_rate = 0
    best_epoch = 0
    for epoch in range(start_epoch, args.epochs):
        msglogger.info('')
        model.train()
        losses = utils.AverageMeter()

        begin = time.time()

        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch, optimizer)

        loader = tqdm.tqdm(train_dataloader)
        for batch_index, (user, item, label) in enumerate(loader):
            user = torch.autograd.Variable(user, requires_grad=False)
            item = torch.autograd.Variable(item, requires_grad=False)
            label = torch.autograd.Variable(label, requires_grad=False)
            if use_cuda:
                user = user.cuda(async=True)
                item = item.cuda(async=True)
                label = label.cuda(async=True)

            if compression_scheduler:
                compression_scheduler.on_minibatch_begin(
                    epoch, batch_index, steps_per_epoch, optimizer)

            outputs = model(user, item, torch.tensor([False],
                                                     dtype=torch.bool))
            loss = criterion(outputs, label)

            if compression_scheduler:
                compression_scheduler.before_backward_pass(
                    epoch,
                    batch_index,
                    steps_per_epoch,
                    loss,
                    optimizer,
                    return_loss_components=False)

            losses.update(loss.data.item(), user.size(0))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if compression_scheduler:
                compression_scheduler.on_minibatch_end(epoch, batch_index,
                                                       steps_per_epoch,
                                                       optimizer)

            # Save stats to file
            description = (
                'Epoch {} Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                    epoch, loss=losses))
            loader.set_description(description)

            steps_completed = batch_index + 1
            if steps_completed % args.log_freq == 0:
                stats_dict = OrderedDict()
                stats_dict['Loss'] = losses.avg
                stats = ('Performance/Training/', stats_dict)
                params = model.named_parameters(
                ) if args.log_params_histograms else None
                distiller.log_training_progress(stats, params, epoch,
                                                steps_completed,
                                                steps_per_epoch, args.log_freq,
                                                [tflogger])

                tflogger.log_model_buffers(model,
                                           ['tracked_min', 'tracked_max'],
                                           'Quant/Train/Acts/TrackedMinMax',
                                           epoch, steps_completed,
                                           steps_per_epoch, args.log_freq)

        train_time = time.time() - begin
        begin = time.time()
        hits, ndcgs = val_epoch(model,
                                test_ratings,
                                test_negs,
                                args.topk,
                                use_cuda=use_cuda,
                                output=valid_results_file,
                                epoch=epoch,
                                processes=args.processes)
        val_time = time.time() - begin

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        hit_rate = np.mean(hits)
        mean_ndcgs = np.mean(ndcgs)

        stats_dict = OrderedDict()
        stats_dict['HR@{0}'.format(args.topk)] = hit_rate
        stats_dict['NDCG@{0}'.format(args.topk)] = mean_ndcgs
        stats = ('Performance/Validation/', stats_dict)
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        msglogger.info(
            'Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}, AvgTrainLoss = {loss.avg:.4f}, '
            'train_time = {train_time:.2f}, val_time = {val_time:.2f}'.format(
                epoch=epoch,
                K=args.topk,
                hit_rate=hit_rate,
                ndcg=mean_ndcgs,
                loss=losses,
                train_time=train_time,
                val_time=val_time))

        is_best = False
        if hit_rate > best_hit_rate:
            best_hit_rate = hit_rate
            is_best = True
            best_epoch = epoch
        extras = {
            'current_hr@10': hit_rate,
            'best_hr@10': best_hit_rate,
            'best_epoch': best_epoch
        }
        apputils.save_checkpoint(epoch,
                                 'NCF',
                                 model,
                                 optimizer,
                                 compression_scheduler,
                                 extras,
                                 is_best,
                                 dir=run_dir)

        if args.threshold is not None:
            if np.mean(hits) >= args.threshold:
                msglogger.info("Hit threshold of {}".format(args.threshold))
                break
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 90

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir,
        args.verbose)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(
        filter(None, [args.compress, args.qe_stats_file
                      ]),  # remove both None and empty strings
        msglogger.logdir,
        gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        distiller.set_deterministic(
            args.seed)  # For experiment reproducability
    else:
        if args.seed is not None:
            distiller.set_seed(args.seed)
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = distiller.apputils.classification_dataset_str_from_arch(
        args.arch)
    args.num_classes = distiller.apputils.classification_num_classes(
        args.dataset)

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model, config = create_model(args.pretrained,
                                 args.dataset,
                                 args.arch,
                                 parallel=not args.load_serialized,
                                 device_ids=args.gpus)

    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # TODO(barrh): args.deprecated_resume is deprecated since v0.3.1
    if args.deprecated_resume:
        msglogger.warning(
            'The "--resume" flag is deprecated. Please use "--resume-from=YOUR_PATH" instead.'
        )
        if not args.reset_optimizer:
            msglogger.warning(
                'If you wish to also reset the optimizer, call with: --reset-optimizer'
            )
            args.reset_optimizer = True
        args.resumed_checkpoint_path = args.deprecated_resume

    # We can optionally resume from a checkpoint
    optimizer = None
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model,
                                              args.load_model_path,
                                              model_device=args.device)
    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info(
                '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0'
            )

    # Define loss function (criterion)
    if "ssd" in args.arch:
        neg_pos_ratio = 3
        criterion = MultiboxLoss(config.priors,
                                 iou_threshold=0.5,
                                 neg_pos_ratio=neg_pos_ratio,
                                 center_variance=0.1,
                                 size_variance=0.2,
                                 device=args.device,
                                 reduction="sum",
                                 class_reduction=True,
                                 verbose=0)
    else:
        criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is None:
        if "ssd" in args.arch:
            base_net_lr = args.lr
            extra_layers_lr = args.lr
            params = [{
                'params': model.base_net.parameters(),
                'lr': base_net_lr
            }, {
                'params':
                itertools.chain(model.source_layer_add_ons.parameters(),
                                model.extras.parameters()),
                'lr':
                extra_layers_lr
            }, {
                'params':
                itertools.chain(model.regression_headers.parameters(),
                                model.classification_headers.parameters())
            }]
        else:
            params = model.parameters()
        optimizer = torch.optim.SGD(params,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        for summary in args.summary:
            distiller.model_summary(model, summary, args.dataset)
        return

    if args.export_onnx is not None:
        return distiller.export_img_classifier_to_onnx(model,
                                                       os.path.join(
                                                           msglogger.logdir,
                                                           args.export_onnx),
                                                       args.dataset,
                                                       add_softmax=True,
                                                       verbose=False)

    if args.qe_calibration:
        return acts_quant_stats_collection(model, criterion, pylogger, args)

    if args.activation_histograms:
        return acts_histogram_collection(model, criterion, pylogger, args)

    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = load_data(args, config=config)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resumed_checkpoint_path.replace(
                                         ".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher, _ = create_model(args.kd_pretrained,
                                  args.dataset,
                                  args.kd_teacher,
                                  parallel=not args.load_serialized,
                                  device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_lean_checkpoint(teacher, args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        raw_teacher_model_path = msglogger.logdir + "/raw_teacher.pth.tar"
        if not os.path.exists(raw_teacher_model_path):
            teacher.save(raw_teacher_model_path)
            msglogger.info(Fore.CYAN + '\tRaw Teacher Model saved: {0}'.format(
                raw_teacher_model_path) + Style.RESET_ALL)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model,
            teacher,
            args.kd_temp,
            dlw,
            loss_type=args.kd_loss_type,
            focal_alpha=args.kd_focal_alpha,
            use_adaptive=args.kd_focal_adaptive,
            verbose=0)
        compression_scheduler.add_policy(args.kd_policy,
                                         starting_epoch=args.kd_start_epoch,
                                         ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    if start_epoch >= ending_epoch:
        msglogger.error(
            'epoch count is too low, starting epoch is {} but total epochs set to {}'
            .format(start_epoch, ending_epoch))
        raise ValueError('Epochs parameter is too low. Nothing to do.')

    for epoch in range(start_epoch, ending_epoch):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(
                epoch, metrics=(vloss if (epoch != start_epoch) else 10**6))

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, top1, top5,
                                       epoch, args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        checkpoint_extras = {
            'current_top1': top1,
            'best_top1': perf_scores_history[0].top1,
            'best_epoch': perf_scores_history[0].epoch
        }
        try:
            raw_fullpath_best = apputils.save_checkpoint(
                epoch,
                args.arch,
                model,
                optimizer=optimizer,
                scheduler=compression_scheduler,
                extras=checkpoint_extras,
                is_best=is_best,
                name=args.name,
                dir=msglogger.logdir)
        except Exception as ex:
            # keep previous fullpath_best
            pass
        mlflow.log_artifacts(msglogger.logdir)

    # Finally run results on the test set
    eval_params = {
        "model_type": args.arch,
        "model_path": raw_fullpath_best,
        "dataset_path": args.data,
        "label_path": "models/voc-model-labels.txt"
    }
    mlflow.projects.run(uri=".",
                        entry_point="eval",
                        use_conda=False,
                        parameters=eval_params)
示例#12
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress,
                                     msglogger.logdir,
                                     gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_epochs = list()

    if args.deterministic:
        if args.loaders is None:
            args.loaders = 1
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.loaders > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --loaders to 0 or 1'
            )
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        distiller.set_deterministic()
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.use_cpu or (args.gpus is None
                        and not torch.cuda.is_available()) or (args.gpus
                                                               == ''):
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                msglogger.error(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
                exit(1)
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    msglogger.error(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
                    exit(1)
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    if args.loaders is None:
        active_gpus = args.gpus if args.gpus is not None else torch.cuda.device_count(
        )
        args.loaders = max(parser.DEFAULT_LOADERS_COUNT,
                           parser.DEFAULT_LOADERS_COUNT * active_gpus)
    msglogger.debug('Number of data loaders set to: {}'.format(args.loaders))

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         parallel=not args.load_serialized,
                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    optimizer = None
    resumed_training_steps = None
    if args.resume or args.load_state_dict:
        if args.resume and not args.reset_optimizer:
            # initiate SGD with dummy lr
            optimizer = torch.optim.SGD(model.parameters(), lr=0.36787944117)
        model, compression_scheduler, optimizer, start_epoch, resumed_training_steps = apputils.load_checkpoint(
            model, args.resume or args.load_state_dict, optimizer=optimizer)
        model.to(args.device)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().to(args.device)

    if optimizer is not None:
        # optimizer was resumed from checkpoint
        # check if user has tried to set optimizer arguments
        # if so, ignore arguments with a warning.
        optimizer_group_args = [
            'lr', 'learning-rate', 'momentum', 'weight-decay', 'wd'
        ]
        user_optim_args = [
            x for x in optimizer_group_args for arg in sys.argv
            if arg.startswith('--' + x)
        ]
        if user_optim_args:
            msglogger.warning(
                '{} optimizer arguments are ignored.'.format(user_optim_args))
            msglogger.info(
                'setting optimizer arguments when optimizer is resumed '
                'from checkpoint is forbidden.')
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info(
            '\tStats will be collected for {:.1%} of test dataset'.format(
                args.qe_calibration))
        msglogger.info(
            '\tSetting constant seeds and converting model to serialized execution'
        )
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(
            create_quantization_stats_collector(model))
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.loaders, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size,
        args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))
    args.trainset_print_period = parser.getPrintPeriod(
        args, len(train_loader.sampler), args.batch_size)
    args.validset_print_period = parser.getPrintPeriod(args,
                                                       len(val_loader.sampler),
                                                       args.batch_size)
    args.testset_print_period = parser.getPrintPeriod(args,
                                                      len(test_loader.sampler),
                                                      args.batch_size)

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if
            (args.resume and not args.reset_optimizer) else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        #zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resume is not None, "You must use --resume to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resume.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained,
                               args.dataset,
                               args.kd_teacher,
                               device_ids=args.gpus)
        if args.kd_resume:
            teacher = apputils.load_checkpoint(teacher,
                                               chkpt_file=args.kd_resume)[0]
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(
            args.kd_policy, range(args.kd_start_epoch, args.epochs, 1))

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    if getattr(compression_scheduler, 'global_policy_end_epoch',
               None) is not None:
        if compression_scheduler.global_policy_end_epoch >= (start_epoch +
                                                             args.epochs):
            msglogger.warning(
                'scheduler requires at least {} epochs, but only {} are sanctioned'
                .format(compression_scheduler.global_policy_end_epoch,
                        args.epochs))

    accumulated_training_steps = resumed_training_steps if resumed_training_steps is not None else 0
    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            try:
                train(train_loader,
                      model,
                      criterion,
                      optimizer,
                      epoch,
                      accumulated_training_steps,
                      compression_scheduler,
                      loggers=[tflogger, pylogger],
                      args=args)
            except RuntimeError as e:
                if ('cuda out of memory' in str(e).lower()):
                    msglogger.error(
                        'CUDA memory failure has been detected.\n'
                        'Sometimes it helps to decrease batch size.\n'
                        'e.g. Add the following flag to your call: --batch-size={}'
                        .format(args.batch_size // 10))
                raise
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))
        accumulated_training_steps += math.ceil(
            len(train_loader.sampler) / train_loader.batch_size)

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Performance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        tflogger.log_training_progress(stats, epoch, None)

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        if getattr(compression_scheduler, 'global_policy_end_epoch',
                   None) is None or (
                       compression_scheduler.global_policy_end_epoch <= epoch):
            # Update the list of top scores achieved since all policies have concluded
            if top1 > 0:
                best_epochs.append(
                    distiller.MutableNamedTuple({
                        'top1': top1,
                        'top5': top5,
                        'epoch': epoch
                    }))
            # Keep best_epochs sorted from best to worst
            # Sort by top1 first, secondary sort by top5, and so forth
            best_epochs.sort(key=operator.attrgetter('top1', 'top5', 'epoch'),
                             reverse=True)
            for score in best_epochs[:args.num_best_scores]:
                msglogger.info('==> Best Top1: %.3f Top5: %.3f on epoch: %d',
                               score.top1, score.top5, score.epoch)

        is_best = best_epochs and (epoch == best_epochs[0].epoch)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler,
                                 best_epochs[0].top1 if best_epochs else None,
                                 is_best, args.name, msglogger.logdir,
                                 accumulated_training_steps)

    # Finally run results on the test set
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)
示例#13
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 200

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(
        filter(None, [args.compress, args.qe_stats_file
                      ]),  # remove both None and empty strings
        msglogger.logdir,
        gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    if args.evaluate:
        args.deterministic = True
    if args.deterministic:
        distiller.set_deterministic(
            args.seed)  # For experiment reproducability
    else:
        if args.seed is not None:
            distiller.set_seed(args.seed)
        # Turn on CUDNN benchmark mode for best performance. This is usually "safe" for image
        # classification models, as the input sizes don't change during the run
        # See here: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936/3
        cudnn.benchmark = True

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    # TODO
    args.dataset = 'coco'
    # args.num_classes = 21  # wc -l ~/data/VOC2012/voc-model-labels.txt

    if args.load_vgg19 and args.arch != 'vgg19':
        raise ValueError(
            '``load_vgg19`` should be set only when vgg19 is used')

    model = create_pose_estimation_model(args.pretrained,
                                         args.dataset,
                                         args.arch,
                                         load_vgg19=args.load_vgg19,
                                         parallel=not args.load_serialized,
                                         device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # <editor-fold desc=">>> Load Model">

    # We can optionally resume from a checkpoint
    optimizer = None
    if args.resumed_checkpoint_path:
        model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
            model, args.resumed_checkpoint_path, model_device=args.device)
    elif args.load_model_path:
        model = apputils.load_lean_checkpoint(model,
                                              args.load_model_path,
                                              model_device=args.device)

    if args.reset_optimizer:
        start_epoch = 0
        if optimizer is not None:
            optimizer = None
            msglogger.info(
                '\nreset_optimizer flag set: Overriding resumed optimizer and resetting epoch count to 0'
            )
    # </editor-fold>

    # Define loss function (criterion)
    # get_loss(saved_for_loss, heat_temp, heat_weight,vec_temp, vec_weight)
    criterion = {
        'shufflenetv2': shufflenetv2_get_loss,
        'vgg19': vgg19_get_loss,
        'hourglass': hourglass_get_loss,
    }[args.arch]

    if optimizer is None:
        trainable_vars = [
            param for param in model.parameters() if param.requires_grad
        ]
        optimizer = torch.optim.SGD(trainable_vars,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    # TODO: load lr_scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer,
                                     mode='min',
                                     factor=0.8,
                                     patience=5,
                                     verbose=True,
                                     threshold=0.0001,
                                     threshold_mode='rel',
                                     cooldown=3,
                                     min_lr=0,
                                     eps=1e-08)

    if args.AMC:
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        for summary in args.summary:
            distiller.model_summary(model, summary, args.dataset)
        return

    if args.export_onnx is not None:
        return distiller.export_img_classifier_to_onnx(model,
                                                       os.path.join(
                                                           msglogger.logdir,
                                                           args.export_onnx),
                                                       args.dataset,
                                                       add_softmax=True,
                                                       verbose=False)

    if args.qe_calibration:
        return acts_quant_stats_collection(model, criterion, pylogger, args)

    if args.activation_histograms:
        return acts_histogram_collection(model, criterion, pylogger, args)

    print('Building activations_collectors...')
    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    print('Loading data...')
    train_loader, val_loader, test_loader, _ = load_data(args)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(
            model, optimizer, args.compress, compression_scheduler,
            (start_epoch - 1) if args.resumed_checkpoint_path else None)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)

    if args.thinnify:
        # zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resumed_checkpoint_path is not None, \
            "You must use --resume-from to provide a checkpoint file to thinnify"
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resumed_checkpoint_path.replace(
                                         ".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )
        return

    if start_epoch >= ending_epoch:
        msglogger.error(
            'epoch count is too low, starting epoch is {} but total epochs set to {}'
            .format(start_epoch, ending_epoch))
        raise ValueError('Epochs parameter is too low. Nothing to do.')

    for epoch in range(start_epoch, ending_epoch):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(
                epoch,
                metrics=(total_loss if (epoch != start_epoch) else 10**6))

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            loss = validate(val_loader, model, criterion, [pylogger], args,
                            epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        lr_scheduler.step(loss)

        stats = ('Performance/Validation/', OrderedDict([('Loss', loss)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint
        update_training_scores_history(perf_scores_history, model, loss, epoch,
                                       args.num_best_scores)
        is_best = epoch == perf_scores_history[0].epoch
        checkpoint_extras = {
            'current_loss': loss,
            'best_loss': perf_scores_history[0].loss,
            'best_epoch': perf_scores_history[0].epoch
        }
        apputils.save_checkpoint(epoch,
                                 args.arch,
                                 model,
                                 optimizer=optimizer,
                                 scheduler=compression_scheduler,
                                 extras=checkpoint_extras,
                                 is_best=is_best,
                                 name=args.name,
                                 dir=msglogger.logdir)

    # Finally run results on the test set
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)
示例#14
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()
    if args.epochs is None:
        args.epochs = 90

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(args.compress, msglogger.logdir, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    ending_epoch = args.epochs
    perf_scores_history = []
    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            raise ValueError('ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1')
        # Use a well-known seed, for repeatability of experiments
        distiller.set_deterministic()
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                raise ValueError('ERROR: Argument --gpus must be a comma-separated list of integers only')
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    raise ValueError('ERROR: GPU device ID {0} requested, but only {1} devices available'
                                     .format(dev_id, available_gpus))
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []
示例#15
0
def main():
    script_dir = os.path.dirname(__file__)
    module_path = os.path.abspath(os.path.join(script_dir, '..', '..'))
    global msglogger

    # Parse arguments
    args = parser.get_parser().parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    # 记录有关执行环境的各种详细信息。有时是有用的
    # 参考过去的实验执行,这些信息可能有用。
    apputils.log_execution_env_state(args.compress,
                                     msglogger.logdir,
                                     gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    perf_scores_history = []
    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1'
            )  # 错误:设置--确定性要求将--workers/-j设置为0或1
            exit(1)  # 正常退出程序
        # Use a well-known seed, for repeatability of experiments 使用一种众所周知的种子,用于实验的重复性。
        distiller.set_deterministic()
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.cpu or not torch.cuda.is_available():
        # Set GPU index to -1 if using CPU
        args.device = 'cpu'
        args.gpus = -1
    else:
        args.device = 'cuda'
        if args.gpus is not None:
            try:
                args.gpus = [int(s) for s in args.gpus.split(',')]
            except ValueError:
                msglogger.error(
                    'ERROR: Argument --gpus must be a comma-separated list of integers only'
                )
                exit(1)
            available_gpus = torch.cuda.device_count()
            for dev_id in args.gpus:
                if dev_id >= available_gpus:
                    msglogger.error(
                        'ERROR: GPU device ID {0} requested, but only {1} devices available'
                        .format(dev_id, available_gpus))
                    exit(1)
            # Set default device in case the first one on the list != 0
            torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cousm'

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = ResNet152()
    # model = torch.nn.DataParallel(model, device_ids=args.gpus) # 并行GPU
    model.to(args.device)
    compression_scheduler = None  # 压缩调度
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    # 创建两个日志后端 TensorBoardLogger以Google的Tensor板可以读取的格式写入日志文件。python logger将写入python记录器。
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s',
                       args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    if args.resume:  # 加载训练模型
        # checkpoint = torch.load(args.resume)
        # model.load_state_dict(checkpoint['state_dict'])
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)
        model.to(args.device)

    # Define loss function (criterion) and optimizer  # 定义损失函数和优化器SGD
    criterion = nn.CrossEntropyLoss().to(args.device)

    # optimizer = torch.optim.SGD(model.fc.parameters(), lr=args.lr,
    #                             momentum=args.momentum,
    #                             weight_decay=args.weight_decay)
    optimizer = torch.optim.Adam(model.model.fc.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.AMC:  # 自动化的深层压缩
        return automated_deep_compression(model, criterion, optimizer,
                                          pylogger, args)
    if args.greedy:  # 贪婪的
        return greedy(model, criterion, optimizer, pylogger, args)

    # This sample application can be invoked to produce various summary reports. # 可以调用此示例应用程序来生成各种摘要报告。
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)
    # 激活统计收集器
    activations_collectors = create_activation_stats_collectors(
        model, *args.activation_stats)

    if args.qe_calibration:
        msglogger.info('Quantization calibration stats collection enabled:')
        msglogger.info(
            '\tStats will be collected for {:.1%} of test dataset'.format(
                args.qe_calibration))
        msglogger.info(
            '\tSetting constant seeds and converting model to serialized execution'
        )
        distiller.set_deterministic()
        model = distiller.make_non_parallel_copy(model)
        activations_collectors.update(
            create_quantization_stats_collector(model))  # 量化统计收集器
        args.evaluate = True
        args.effective_test_size = args.qe_calibration

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    # 加载数据集:从传递的模型名称推断要加载的数据集

    train_loader, val_loader, test_loader, _ = get_data_loaders(
        datasets_fn, r'/home/tian/Desktop/image_yasuo', args.batch_size,
        args.workers, args.validation_split, args.deterministic,
        args.effective_train_size, args.effective_valid_size,
        args.effective_test_size)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))
    # 可以调用此示例应用程序来对模型执行敏感性分析。输出保存到csv和png。
    if args.sensitivity is not None:
        sensitivities = np.arange(args.sensitivity_range[0],
                                  args.sensitivity_range[1],
                                  args.sensitivity_range[2])
        return sensitivity_analysis(model, criterion, test_loader, pylogger,
                                    args, sensitivities)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger,
                              activations_collectors, args,
                              compression_scheduler)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        # #这个示例应用程序的主要用例是CNN压缩
        # #需要yaml中的压缩计划配置文件。
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress,
                                                      compression_scheduler)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        # 如果添加了参数(如PactQualifier),则模型会重新传输到GPU。
        model.to(args.device)
    elif compression_scheduler is None:
        compression_scheduler = distiller.CompressionScheduler(model)  # 压缩计划程序

    if args.thinnify:
        # zeros_mask_dict = distiller.create_model_masks_dict(model)
        assert args.resume is not None, "You must use --resume to provide a checkpoint file to thinnify"  # 必须使用--resume提供检查点文件以细化
        distiller.remove_filters(model,
                                 compression_scheduler.zeros_mask_dict,
                                 args.arch,
                                 args.dataset,
                                 optimizer=None)
        apputils.save_checkpoint(0,
                                 args.arch,
                                 model,
                                 optimizer=None,
                                 scheduler=compression_scheduler,
                                 name="{}_thinned".format(
                                     args.resume.replace(".pth.tar", "")),
                                 dir=msglogger.logdir)
        print(
            "Note: your model may have collapsed to random inference, so you may want to fine-tune"
        )  # 注意:您的模型可能已折叠为随机推理,因此您可能需要对其进行微调。
        return

    args.kd_policy = None  # 蒸馏
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained,
                               args.dataset,
                               args.kd_teacher,
                               device_ids=args.gpus)
        if args.kd_resume:
            teacher, _, _ = apputils.load_checkpoint(teacher,
                                                     chkpt_file=args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt,
                                                args.kd_student_wt,
                                                args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(
            model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy,
                                         starting_epoch=args.kd_start_epoch,
                                         ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)
    lr = args.lr
    lr_decay = 0.5
    for epoch in range(start_epoch, args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        with collectors_context(activations_collectors["train"]) as collectors:
            train(train_loader,
                  model,
                  criterion,
                  optimizer,
                  epoch,
                  compression_scheduler,
                  loggers=[tflogger, pylogger],
                  args=args)
            distiller.log_weights_sparsity(model,
                                           epoch,
                                           loggers=[tflogger, pylogger])
            distiller.log_activation_statsitics(
                epoch,
                "train",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            if args.masks_sparsity:  # 打印掩盖稀疏表 在end of each epoch
                msglogger.info(
                    distiller.masks_sparsity_tbl_summary(
                        model, compression_scheduler))

        # evaluate on validation set
        with collectors_context(activations_collectors["valid"]) as collectors:
            top1, top5, vloss = validate(val_loader, model, criterion,
                                         [pylogger], args, epoch)
            distiller.log_activation_statsitics(
                epoch,
                "valid",
                loggers=[tflogger],
                collector=collectors["sparsity"])
            save_collectors_data(collectors, msglogger.logdir)

        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # Update the list of top scores achieved so far, and save the checkpoint # 更新到目前为止获得的最高分数列表,并保存检查点
        sparsity = distiller.model_sparsity(model)
        perf_scores_history.append(
            distiller.MutableNamedTuple({
                'sparsity': sparsity,
                'top1': top1,
                'top5': top5,
                'epoch': epoch
            }))
        # Keep perf_scores_history sorted from best to worst
        # Sort by sparsity as main sort key, then sort by top1, top5 and epoch
        # 保持绩效分数历史记录从最好到最差的排序
        # 按稀疏度排序为主排序键,然后按top1、top5、epoch排序
        perf_scores_history.sort(key=operator.attrgetter(
            'sparsity', 'top1', 'top5', 'epoch'),
                                 reverse=True)
        for score in perf_scores_history[:args.num_best_scores]:
            msglogger.info(
                '==> Best [Top1: %.3f   Top5: %.3f   Sparsity: %.2f on epoch: %d]',
                score.top1, score.top5, score.sparsity, score.epoch)

        is_best = epoch == perf_scores_history[0].epoch
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler,
                                 perf_scores_history[0].top1, is_best,
                                 args.name, msglogger.logdir)
        if not is_best:
            lr = lr * lr_decay
            # 当loss大于上一次loss,降低学习率
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

    # Finally run results on the test set # 最后在测试集上运行结果
    test(test_loader,
         model,
         criterion, [pylogger],
         activations_collectors,
         args=args)