示例#1
0
def main():
    global msglogger
    check_pytorch_version()
    args = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(
        os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(sys.argv, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_top1 = 0

    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error(
                'ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1'
            )
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        torch.manual_seed(0)
        random.seed(0)
        np.random.seed(0)
        cudnn.deterministic = True
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.gpus is not None:
        try:
            args.gpus = [int(s) for s in args.gpus.split(',')]
        except ValueError:
            msglogger.error(
                'ERROR: Argument --gpus must be a comma-separated list of integers only'
            )
            exit(1)
        available_gpus = torch.cuda.device_count()
        for dev_id in args.gpus:
            if dev_id >= available_gpus:
                msglogger.error(
                    'ERROR: GPU device ID {0} requested, but only {1} devices available'
                    .format(dev_id, available_gpus))
                exit(1)
        # Set default device in case the first one on the list != 0
        torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'

    # Create the model
    png_summary = args.summary is not None and args.summary.startswith('png')
    is_parallel = not png_summary and args.summary != 'compute'  # For PNG summary, parallel graphs are illegible
    model = create_model(args.pretrained,
                         args.dataset,
                         args.arch,
                         parallel=is_parallel,
                         device_ids=args.gpus)

    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # We can optionally resume from a checkpoint
    if args.resume:
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)

        if 'resnet' in args.arch and 'preact' not in args.arch and 'cifar' in args.arch:
            distiller.resnet_cifar_remove_layers(model)
            #model = distiller.resnet_cifar_remove_channels(model, compression_scheduler.zeros_mask_dict)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        which_summary = args.summary
        if which_summary.startswith('png'):
            apputils.draw_img_classifier_to_file(
                model, 'model.png', args.dataset,
                which_summary == 'png_w_params')
        else:
            distiller.model_summary(model, which_summary, args.dataset)
        exit()

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_size, args.deterministic)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler),
                   len(test_loader.sampler))

    activations_sparsity = None
    if args.activation_stats:
        # If your model has ReLU layers, then those layers have sparse activations.
        # ActivationSparsityCollector will collect information about this sparsity.
        # WARNING! Enabling activation sparsity collection will significantly slow down training!
        activations_sparsity = ActivationSparsityCollector(model)

    if args.sensitivity is not None:
        # This sample application can be invoked to execute Sensitivity Analysis on your
        # model.  The ouptut is saved to CSV and PNG.
        msglogger.info("Running sensitivity tests")
        test_fnc = partial(test,
                           test_loader=test_loader,
                           criterion=criterion,
                           loggers=[pylogger],
                           print_freq=args.print_freq)
        which_params = [
            param_name for param_name, _ in model.named_parameters()
        ]
        sensitivity = distiller.perform_sensitivity_analysis(
            model,
            net_params=which_params,
            sparsities=np.arange(0.0, 0.50, 0.05)
            if args.sensitivity == 'filter' else np.arange(0.0, 0.95, 0.05),
            test_func=test_fnc,
            group=args.sensitivity)
        distiller.sensitivities_to_png(sensitivity, 'sensitivity.png')
        distiller.sensitivities_to_csv(sensitivity, 'sensitivity.csv')
        exit()

    if args.evaluate:
        # This sample application can be invoked to evaluate the accuracy of your model on
        # the test dataset.
        # You can optionally quantize the model to 8-bit integer before evaluation.
        # For example:
        # python3 compress_classifier.py --arch resnet20_cifar  ../data.cifar10 -p=50 --resume=checkpoint.pth.tar --evaluate
        if args.quantize:
            model.cpu()
            quantizer = quantization.SymmetricLinearQuantizer(model, 8, 8)
            quantizer.prepare_model()
            model.cuda()
        top1, _, _ = test(test_loader, model, criterion, [pylogger],
                          args.print_freq)
        if args.quantize:
            checkpoint_name = 'quantized'
            apputils.save_checkpoint(0,
                                     args.arch,
                                     model,
                                     optimizer=None,
                                     best_top1=top1,
                                     name='_'.split(args.name, checkpoint_name)
                                     if args.name else checkpoint_name,
                                     dir=msglogger.logdir)
        exit()

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress)

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        train(train_loader,
              model,
              criterion,
              optimizer,
              epoch,
              compression_scheduler,
              loggers=[tflogger, pylogger],
              print_freq=args.print_freq,
              log_params_hist=args.log_params_histograms)
        distiller.log_weights_sparsity(model,
                                       epoch,
                                       loggers=[tflogger, pylogger])
        if args.activation_stats:
            distiller.log_activation_sparsity(epoch,
                                              loggers=[tflogger, pylogger],
                                              collector=activations_sparsity)

        # evaluate on validation set
        top1, top5, vloss = validate(val_loader, model, criterion, [pylogger],
                                     args.print_freq, epoch)
        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss), ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch)

        # remember best top1 and save checkpoint
        is_best = top1 > best_top1
        best_top1 = max(top1, best_top1)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer,
                                 compression_scheduler, best_top1, is_best,
                                 args.name, msglogger.logdir)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], args.print_freq)
示例#2
0
                                            [tflogger])


def export_onnx(path, batch_size, seq_len):
    msglogger.info('The model is also exported in ONNX format at {}'.format(
        os.path.realpath(args.onnx_export)))
    model.eval()
    dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(
        -1, batch_size).to(device)
    hidden = model.init_hidden(batch_size)
    torch.onnx.export(model, (dummy_input, hidden), path)


# Distiller loggers
msglogger = apputils.config_pylogger('logging.conf', None)
tflogger = TensorBoardLogger(msglogger.logdir)
tflogger.log_gradients = True
pylogger = PythonLogger(msglogger)

if args.summary:
    which_summary = args.summary
    if which_summary == 'png':
        draw_lang_model_to_file(model, 'rnn.png', 'wikitext2')
    elif which_summary == 'percentile':
        percentile = 0.9
        for name, param in model.state_dict().items():
            if param.dim() < 2:
                # Skip biases
                continue
            bottomk, _ = torch.topk(param.abs().view(-1),
                                    int(percentile * param.numel()),
def main():
    global msglogger
    check_pytorch_version()
    args = parser.parse_args()
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    msglogger = apputils.config_pylogger(os.path.join(script_dir, 'logging.conf'), args.name, args.output_dir)

    # Log various details about the execution environment.  It is sometimes useful
    # to refer to past experiment executions and this information may be useful.
    apputils.log_execution_env_state(sys.argv, gitroot=module_path)
    msglogger.debug("Distiller: %s", distiller.__version__)

    start_epoch = 0
    best_top1 = 0
    best_epoch = 0

    if args.deterministic:
        # Experiment reproducibility is sometimes important.  Pete Warden expounded about this
        # in his blog: https://petewarden.com/2018/03/19/the-machine-learning-reproducibility-crisis/
        # In Pytorch, support for deterministic execution is still a bit clunky.
        if args.workers > 1:
            msglogger.error('ERROR: Setting --deterministic requires setting --workers/-j to 0 or 1')
            exit(1)
        # Use a well-known seed, for repeatability of experiments
        torch.manual_seed(0)
        random.seed(0)
        np.random.seed(0)
        cudnn.deterministic = True
    else:
        # This issue: https://github.com/pytorch/pytorch/issues/3659
        # Implies that cudnn.benchmark should respect cudnn.deterministic, but empirically we see that
        # results are not re-produced when benchmark is set. So enabling only if deterministic mode disabled.
        cudnn.benchmark = True

    if args.gpus is not None:
        try:
            args.gpus = [int(s) for s in args.gpus.split(',')]
        except ValueError:
            msglogger.error('ERROR: Argument --gpus must be a comma-separated list of integers only')
            exit(1)
        available_gpus = torch.cuda.device_count()
        for dev_id in args.gpus:
            if dev_id >= available_gpus:
                msglogger.error('ERROR: GPU device ID {0} requested, but only {1} devices available'
                                .format(dev_id, available_gpus))
                exit(1)
        # Set default device in case the first one on the list != 0
        torch.cuda.set_device(args.gpus[0])

    # Infer the dataset from the model name
    args.dataset = 'cifar10' if 'cifar' in args.arch else 'imagenet'
    args.num_classes = 10 if args.dataset == 'cifar10' else 1000

    if args.earlyexit_thresholds:
        args.num_exits = len(args.earlyexit_thresholds) + 1
        args.loss_exits = [0] * args.num_exits
        args.losses_exits = []
        args.exiterrors = []

    # Create the model
    model = create_model(args.pretrained, args.dataset, args.arch, device_ids=args.gpus)
    compression_scheduler = None
    # Create a couple of logging backends.  TensorBoardLogger writes log files in a format
    # that can be read by Google's Tensor Board.  PythonLogger writes to the Python logger.
    tflogger = TensorBoardLogger(msglogger.logdir)
    pylogger = PythonLogger(msglogger)

    # capture thresholds for early-exit training
    if args.earlyexit_thresholds:
        msglogger.info('=> using early-exit threshold values of %s', args.earlyexit_thresholds)

    # We can optionally resume from a checkpoint
    if args.resume:
        model, compression_scheduler, start_epoch = apputils.load_checkpoint(
            model, chkpt_file=args.resume)

    # Define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    msglogger.info('Optimizer Type: %s', type(optimizer))
    msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.ADC:
        return automated_deep_compression(model, criterion, pylogger, args)

    # This sample application can be invoked to produce various summary reports.
    if args.summary:
        return summarize_model(model, args.dataset, which_summary=args.summary)

    # Load the datasets: the dataset to load is inferred from the model name passed
    # in args.arch.  The default dataset is ImageNet, but if args.arch contains the
    # substring "_cifar", then cifar10 is used.
    train_loader, val_loader, test_loader, _ = apputils.load_data(
        args.dataset, os.path.expanduser(args.data), args.batch_size,
        args.workers, args.validation_size, args.deterministic)
    msglogger.info('Dataset sizes:\n\ttraining=%d\n\tvalidation=%d\n\ttest=%d',
                   len(train_loader.sampler), len(val_loader.sampler), len(test_loader.sampler))

    activations_sparsity = None
    if args.activation_stats:
        # If your model has ReLU layers, then those layers have sparse activations.
        # ActivationSparsityCollector will collect information about this sparsity.
        # WARNING! Enabling activation sparsity collection will significantly slow down training!
        activations_sparsity = ActivationSparsityCollector(model)

    if args.sensitivity is not None:
        return sensitivity_analysis(model, criterion, test_loader, pylogger, args)

    if args.evaluate:
        return evaluate_model(model, criterion, test_loader, pylogger, args)

    if args.compress:
        # The main use-case for this sample application is CNN compression. Compression
        # requires a compression schedule configuration file in YAML.
        compression_scheduler = distiller.file_config(model, optimizer, args.compress)
        # Model is re-transferred to GPU in case parameters were added (e.g. PACTQuantizer)
        model.cuda()
    else:
        compression_scheduler = distiller.CompressionScheduler(model)

    args.kd_policy = None
    if args.kd_teacher:
        teacher = create_model(args.kd_pretrained, args.dataset, args.kd_teacher, device_ids=args.gpus)
        if args.kd_resume:
            teacher, _, _ = apputils.load_checkpoint(teacher, chkpt_file=args.kd_resume)
        dlw = distiller.DistillationLossWeights(args.kd_distill_wt, args.kd_student_wt, args.kd_teacher_wt)
        args.kd_policy = distiller.KnowledgeDistillationPolicy(model, teacher, args.kd_temp, dlw)
        compression_scheduler.add_policy(args.kd_policy, starting_epoch=args.kd_start_epoch, ending_epoch=args.epochs,
                                         frequency=1)

        msglogger.info('\nStudent-Teacher knowledge distillation enabled:')
        msglogger.info('\tTeacher Model: %s', args.kd_teacher)
        msglogger.info('\tTemperature: %s', args.kd_temp)
        msglogger.info('\tLoss Weights (distillation | student | teacher): %s',
                       ' | '.join(['{:.2f}'.format(val) for val in dlw]))
        msglogger.info('\tStarting from Epoch: %s', args.kd_start_epoch)

    for epoch in range(start_epoch, start_epoch + args.epochs):
        # This is the main training loop.
        msglogger.info('\n')
        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # Train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, compression_scheduler,
              loggers=[tflogger, pylogger], args=args)
        distiller.log_weights_sparsity(model, epoch, loggers=[tflogger, pylogger])
        if args.activation_stats:
            distiller.log_activation_sparsity(epoch, loggers=[tflogger, pylogger],
                                              collector=activations_sparsity)

        # evaluate on validation set
        top1, top5, vloss = validate(val_loader, model, criterion, [pylogger], args, epoch)
        stats = ('Peformance/Validation/',
                 OrderedDict([('Loss', vloss),
                              ('Top1', top1),
                              ('Top5', top5)]))
        distiller.log_training_progress(stats, None, epoch, steps_completed=0, total_steps=1, log_freq=1,
                                        loggers=[tflogger])

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        # remember best top1 and save checkpoint
        is_best = top1 > best_top1
        if is_best:
            best_epoch = epoch
            best_top1 = top1
        msglogger.info('==> Best Top1: %.3f   On Epoch: %d\n', best_top1, best_epoch)
        apputils.save_checkpoint(epoch, args.arch, model, optimizer, compression_scheduler, best_top1, is_best,
                                 args.name, msglogger.logdir)

    # Finally run results on the test set
    test(test_loader, model, criterion, [pylogger], args=args)
示例#4
0
def train(c, net, compression_scheduler=None):
    import distiller.apputils as apputils
    from distiller.data_loggers import TensorBoardLogger, PythonLogger
    msglogger = apputils.config_pylogger('logging.conf', None)
    tflogger = TensorBoardLogger(msglogger.logdir)
    tflogger.log_gradients = True
    pylogger = PythonLogger(msglogger)
    c.setdefault(hebbian=False)

    emb_params = count_params(net.embed) + count_params(net.loss.projections) + count_params(net.loss.clusters)
    opt = get_opt(c, net)
    net, opt, step = c.init_model(net, opt=opt, step='max', train=True)
    step_lr = scheduler(c, opt, step)
    data_tr = SampleIterator(c, c.train_batch, split='valid' if c.debug else 'train')
    iter_tr = iter(data_tr)
    data_val = SequentialIterator(c, c.eval_batch, split='valid')

    s = Namespace(net=net, opt=opt, step=step)
    c.on_train_start(s)

    c.log('Embedding has %s parameters' % emb_params)

    if c.get("steps_per_epoch"):
        steps_per_epoch = c.steps_per_epoch
    else:
        steps_per_epoch = len(data_tr.tokens) // data_tr.bs // c.train_chunk
    print("#### steps per epoch %d ####" % steps_per_epoch)

    if c.hebbian:
        counters = [torch.ones(end - start, dtype=torch.long, device=c.device) for start, end in zip([0] + c.cutoffs, c.cutoffs + [c.n_vocab])]
        temp_counters = [torch.zeros_like(x) for x in counters]

    best_val_loss = np.inf
    if s.results is not None and 'val_loss' in s.results.columns:
        best_val_loss = s.results['val_loss'].dropna().max()
    try:
        while step < s.step_max:
            batch = step % steps_per_epoch
            epoch = step // steps_per_epoch
            if step % steps_per_epoch == 0:
                c.log("====> batch=%d, epoch=%d, step=%d" % (batch, epoch, step))
                if compression_scheduler:
                    compression_scheduler.on_epoch_begin(epoch)

            if compression_scheduler:
                compression_scheduler.on_minibatch_begin(epoch, minibatch_id=batch, minibatches_per_epoch=steps_per_epoch)

            step_lr(step)

            x = to_torch(next(iter_tr), c.device).t()

            t_s = time()
            inputs, labels = x[:-1], x[1:]
            preds = net(inputs, labels)
            loss = preds['loss']

            if compression_scheduler:
                _  = compression_scheduler.before_backward_pass(epoch, minibatch_id=batch,
                                                           minibatches_per_epoch=steps_per_epoch,
                                                           loss=loss, return_loss_components=False)

            opt.zero_grad()
            if torch.isnan(loss):
                raise RuntimeError('Encountered nan loss during training')
            loss.backward()
            torch.nn.utils.clip_grad_norm_(net.parameters(), c.get('clip_grad', 0.5))
            opt.step()

            if c.hebbian:
                hebbian_weight_update(c, net, preds['hiddens'], counters, temp_counters)

            time_model = np.round(time() - t_s, 5)

            loss = from_torch(loss)
            perplexity = np.nan if loss > 5 else np.e ** loss
            step_result = pd.Series(dict(
                loss=loss,
                perplexity=perplexity,
                time=time_model
            )).add_prefix('train_')
            step_result['lr'] = next(iter(opt.param_groups))['lr']
            if c.use_cache:
                step_result['theta'] = preds['theta']
                step_result['lambda'] = preds['lambda'].item()

            if compression_scheduler:
                compression_scheduler.on_minibatch_end(epoch, minibatch_id=batch, minibatches_per_epoch=steps_per_epoch)

            if step % steps_per_epoch == 0:
                if compression_scheduler:
                    compression_scheduler.on_epoch_end(epoch)

            s.step = step = step + 1
            if step % c.step_eval == 0:
                distiller.log_weights_sparsity(net, epoch, loggers=[tflogger, pylogger])
                t, total = distiller.weights_sparsity_tbl_summary(net, return_total_sparsity=True)
                c.log("total sparsity: %.3lf" % total)

                step_result = step_result.append(
                    pd.Series(evaluate(c, data_val, net)).add_prefix('val_')
                )
                s.record_step = step_result['val_loss'] < best_val_loss
                clear_gpu_memory()
            s.step_result = step_result
            c.on_step_end(s)
    except Exception as e:
        import traceback
        err = traceback.format_exc()
        if c.main:
            c.log(err)
        else:
            print(err)
    finally:
        c.on_train_end(s)
示例#5
0
def train(args):

    SRC = data.Field(tokenize=tokenize_de,
                     pad_token=BLANK_WORD,
                     lower=args.lower)
    TGT = data.Field(tokenize=tokenize_en,
                     init_token=BOS_WORD,
                     eos_token=EOS_WORD,
                     pad_token=BLANK_WORD,
                     lower=args.lower)

    # Load IWSLT Data ---> German to English Translation
    if args.dataset == 'IWSLT':
        train, val, test = datasets.IWSLT.splits(
            exts=('.de', '.en'),
            fields=(SRC, TGT),
            filter_pred=lambda x: len(vars(x)['src']) <= args.max_length and
            len(vars(x)['trg']) <= args.max_length)
    else:
        train, val, test = datasets.Multi30k.splits(
            exts=('.de', '.en'),
            fields=(SRC, TGT),
            filter_pred=lambda x: len(vars(x)['src']) <= args.max_length and
            len(vars(x)['trg']) <= args.max_length)

    # Frequency of words in the vocabulary
    SRC.build_vocab(train.src, min_freq=args.min_freq)
    TGT.build_vocab(train.trg, min_freq=args.min_freq)

    print("Size of source vocabulary:", len(SRC.vocab))
    print("Size of target vocabulary:", len(TGT.vocab))

    pad_idx = TGT.vocab.stoi[BLANK_WORD]
    model = make_model(len(SRC.vocab),
                       len(TGT.vocab),
                       n=args.num_blocks,
                       d_model=args.hidden_dim,
                       d_ff=args.ff_dim,
                       h=args.num_heads,
                       dropout=args.dropout)
    print("Model made with n:", args.num_blocks, "hidden_dim:",
          args.hidden_dim, "feed forward dim:", args.ff_dim, "heads:",
          args.num_heads, "dropout:", args.dropout)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("Number of parameters: ", params)

    if args.load_model:
        print("Loading model from [%s]" % args.load_model)
        model.load_state_dict(torch.load(args.load_model))

    # UNCOMMENT WHEN RUNNING ON RESEARCH MACHINES - run on GPU
    # model.cuda()

    # Used by original authors, hurts perplexity but improves BLEU score
    criterion = LabelSmoothing(size=len(TGT.vocab),
                               padding_idx=pad_idx,
                               smoothing=0.1)

    # UNCOMMENT WHEN RUNNING ON RESEARCH MACHINES - run on GPU
    # criterion.cuda()

    train_iter = MyIterator(train,
                            batch_size=args.batch_size,
                            device=0,
                            repeat=False,
                            sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn,
                            train=True)
    valid_iter = MyIterator(val,
                            batch_size=args.batch_size,
                            device=0,
                            repeat=False,
                            sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn,
                            train=False,
                            sort=False)
    model_par = nn.DataParallel(model, device_ids=devices)

    # model_opt = NoamOpt(model.src_embed[0].d_model, 1, 2000,
    #                     torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

    # Use standard optimizer -- As used in the paper
    model_opt = get_std_opt(model)

    # PRUNING CODE
    if args.summary:
        df = distiller.weights_sparsity_tbl_summary(model, False)
        print(df)
        exit(0)

    msglogger = apputils.config_pylogger('logging.conf', None)
    tflogger = TensorBoardLogger(msglogger.logdir)
    tflogger.log_gradients = True
    pylogger = PythonLogger(msglogger)

    source = args.compress

    if args.compress:
        compression_scheduler = distiller.config.file_config(
            model_par.module, None, args.compress)

    print(model_par.module)

    best_bleu = 0
    best_epoch = 0

    steps_per_epoch = math.ceil(len(train_iter.data()) / 60)

    for epoch in range(args.epoch):
        print("=" * 80)
        print("Epoch ", epoch + 1)
        print("=" * 80)
        print("Training...")
        model_par.train()

        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch)

        # IF PRUNING
        run_epoch((rebatch(pad_idx, b) for b in train_iter),
                  model_par,
                  MultiGPULossCompute(model.generator,
                                      criterion,
                                      devices=devices,
                                      opt=model_opt),
                  args,
                  epoch,
                  steps_per_epoch,
                  compression_scheduler,
                  SRC,
                  TGT,
                  valid_iter,
                  is_valid=False)

        # run_epoch((rebatch(pad_idx, b) for b in train_iter), model_par,
        #           MultiGPULossCompute(model.generator, criterion, devices=devices, opt=model_opt), args,
        #           SRC, TGT, valid_iter, is_valid=False)

        print("Validation...")
        model_par.eval()

        # IF PRUNING
        loss = run_epoch((rebatch(pad_idx, b) for b in valid_iter),
                         model_par,
                         MultiGPULossCompute(model.generator,
                                             criterion,
                                             devices=devices,
                                             opt=None),
                         args,
                         epoch,
                         steps_per_epoch,
                         compression_scheduler,
                         SRC,
                         TGT,
                         valid_iter,
                         is_valid=True)

        # loss = run_epoch((rebatch(pad_idx, b) for b in valid_iter), model_par,
        #                  MultiGPULossCompute(model.generator, criterion, devices=devices, opt=None), args,
        #                  SRC, TGT, valid_iter, is_valid=True)

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch)

        print('Validation loss:', loss)
        print('Validation perplexity: ', np.exp(loss))
        bleu_score = run_validation_bleu_score(model, SRC, TGT, valid_iter)

        if best_bleu < bleu_score:
            best_bleu = bleu_score
            model_file = args.save_to + args.exp_name + 'validation.bin'
            print('Saving model without optimizer [%s]' % model_file)
            torch.save(model_par.module.state_dict(), model_file)
            best_epoch = epoch

        model_file = args.save_to + args.exp_name + 'latest.bin'
        print('Saving latest model without optimizer [%s]' % model_file)
        torch.save(model_par.module.state_dict(), model_file)

    print('The best epoch was:', best_epoch)
示例#6
0
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ckpt.pth')
        best_acc = acc

    return test_loss


# for epoch in range(start_epoch, start_epoch+200):
#     train(epoch)
#     test(epoch)

# Distiller loggers
msglogger = apputils.config_pylogger('logging.conf', None)
tflogger = TensorBoardLogger(msglogger.logdir)
tflogger.log_gradients = True
pylogger = PythonLogger(msglogger)

if args.summary:
    # The last string is the dataset
    distiller.model_summary(net, args.summary, 'coco')
    exit(0)

compression_scheduler = None

if args.compress:
    source = args.compress
    compression_scheduler = distiller.CompressionScheduler(net)
    distiller.config.file_config(net, optimizer, source, compression_scheduler)
示例#7
0
def main():
    global msglogger

    script_dir = os.path.dirname(__file__)

    args = parse_args()

    # Distiller loggers
    msglogger = apputils.config_pylogger('logging.conf',
                                         args.name,
                                         output_dir=args.output_dir)
    tflogger = TensorBoardLogger(msglogger.logdir)
    # tflogger.log_gradients = True
    # pylogger = PythonLogger(msglogger)

    if args.seed is not None:
        msglogger.info("Using seed = {}".format(args.seed))
        torch.manual_seed(args.seed)
        np.random.seed(seed=args.seed)

    args.qe_mode = str(args.qe_mode).split('.')[1]
    args.qe_clip_acts = str(args.qe_clip_acts).split('.')[1]

    apputils.log_execution_env_state(sys.argv)

    if args.gpus is not None:
        try:
            args.gpus = [int(s) for s in args.gpus.split(',')]
        except ValueError:
            msglogger.error(
                'ERROR: Argument --gpus must be a comma-separated list of integers only'
            )
            exit(1)
        if len(args.gpus) > 1:
            msglogger.error('ERROR: Only single GPU supported for NCF')
            exit(1)
        available_gpus = torch.cuda.device_count()
        for dev_id in args.gpus:
            if dev_id >= available_gpus:
                msglogger.error(
                    'ERROR: GPU device ID {0} requested, but only {1} devices available'
                    .format(dev_id, available_gpus))
                exit(1)
        # Set default device in case the first one on the list != 0
        torch.cuda.set_device(args.gpus[0])

    # Save configuration to file
    config = {k: v for k, v in args.__dict__.items()}
    config['timestamp'] = "{:.0f}".format(datetime.utcnow().timestamp())
    config['local_timestamp'] = str(datetime.now())
    run_dir = msglogger.logdir
    msglogger.info("Saving config and results to {}".format(run_dir))
    if not os.path.exists(run_dir) and run_dir != '':
        os.makedirs(run_dir)
    utils.save_config(config, run_dir)

    # Check that GPUs are actually available
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    t1 = time.time()
    # Load Data
    training = not (args.eval or args.qe_calibration
                    or args.activation_histograms)
    msglogger.info('Loading data')
    if training:
        train_dataset = CFTrainDataset(
            os.path.join(args.data, TRAIN_RATINGS_FILENAME),
            args.negative_samples)
        train_dataloader = torch.utils.data.DataLoader(
            dataset=train_dataset,
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.workers,
            pin_memory=True)
        nb_users, nb_items = train_dataset.nb_users, train_dataset.nb_items
    else:
        train_dataset = None
        train_dataloader = None
        nb_users, nb_items = (138493, 26744)

    test_ratings = load_test_ratings(
        os.path.join(args.data, TEST_RATINGS_FILENAME))  # noqa: E501
    test_negs = load_test_negs(os.path.join(args.data, TEST_NEG_FILENAME))

    msglogger.info(
        'Load data done [%.1f s]. #user=%d, #item=%d, #train=%s, #test=%d' %
        (time.time() - t1, nb_users, nb_items,
         str(train_dataset.mat.nnz) if training else 'N/A', len(test_ratings)))

    # Create model
    model = NeuMF(nb_users,
                  nb_items,
                  mf_dim=args.factors,
                  mf_reg=0.,
                  mlp_layer_sizes=args.layers,
                  mlp_layer_regs=[0. for i in args.layers],
                  split_final=args.split_final)
    if use_cuda:
        model = model.cuda()
    msglogger.info(model)
    msglogger.info("{} parameters".format(utils.count_parameters(model)))

    # Save model text description
    with open(os.path.join(run_dir, 'model.txt'), 'w') as file:
        file.write(str(model))

    compression_scheduler = None
    start_epoch = 0
    optimizer = None
    if args.load:
        if training:
            model, compression_scheduler, optimizer, start_epoch = apputils.load_checkpoint(
                model, args.load)
            if args.reset_optimizer:
                start_epoch = 0
                optimizer = None
        else:
            model = apputils.load_lean_checkpoint(model, args.load)

    # Add loss to graph
    criterion = nn.BCEWithLogitsLoss()

    if use_cuda:
        criterion = criterion.cuda()

    if training and optimizer is None:
        optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
        msglogger.info('Optimizer Type: %s', type(optimizer))
        msglogger.info('Optimizer Args: %s', optimizer.defaults)

    if args.compress:
        compression_scheduler = distiller.file_config(model, optimizer,
                                                      args.compress)
        model.cuda()

    # Create files for tracking training
    valid_results_file = os.path.join(run_dir, 'valid_results.csv')

    if args.qe_calibration or args.activation_histograms:
        calib = {
            'portion':
            args.qe_calibration,
            'desc_str':
            'quantization calibration stats',
            'collect_func':
            partial(distiller.data_loggers.collect_quant_stats,
                    inplace_runtime_check=True,
                    disable_inplace_attrs=True)
        }
        hists = {
            'portion':
            args.activation_histograms,
            'desc_str':
            'activation histograms',
            'collect_func':
            partial(distiller.data_loggers.collect_histograms,
                    activation_stats=None,
                    nbins=2048,
                    save_hist_imgs=True)
        }
        d = calib if args.qe_calibration else hists

        distiller.utils.assign_layer_fq_names(model)
        num_users = int(np.floor(len(test_ratings) * d['portion']))
        msglogger.info(
            "Generating {} based on {:.1%} of the test-set ({} users)".format(
                d['desc_str'], d['portion'], num_users))

        test_fn = partial(val_epoch,
                          ratings=test_ratings,
                          negs=test_negs,
                          K=args.topk,
                          use_cuda=use_cuda,
                          processes=args.processes,
                          num_users=num_users)
        d['collect_func'](model=model,
                          test_fn=test_fn,
                          save_dir=run_dir,
                          classes=None)

        return 0

    if args.eval:
        if args.quantize_eval and args.qe_calibration is None:
            model.cpu()
            quantizer = quantization.PostTrainLinearQuantizer.from_args(
                model, args)
            dummy_input = (torch.tensor([1]), torch.tensor([1]),
                           torch.tensor([True], dtype=torch.bool))
            quantizer.prepare_model(dummy_input)
            model.cuda()

        distiller.utils.assign_layer_fq_names(model)

        if args.eval_fp16:
            model = model.half()

        # Calculate initial Hit Ratio and NDCG
        begin = time.time()
        hits, ndcgs = val_epoch(model,
                                test_ratings,
                                test_negs,
                                args.topk,
                                use_cuda=use_cuda,
                                processes=args.processes)
        val_time = time.time() - begin
        hit_rate = np.mean(hits)
        msglogger.info(
            'Initial HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}, val_time = {val_time:.2f}'
            .format(K=args.topk,
                    hit_rate=hit_rate,
                    ndcg=np.mean(ndcgs),
                    val_time=val_time))
        hit_rate = 0

        if args.quantize_eval:
            checkpoint_name = 'quantized'
            apputils.save_checkpoint(0,
                                     'NCF',
                                     model,
                                     optimizer=None,
                                     extras={'quantized_hr@10': hit_rate},
                                     name='_'.join([args.name, 'quantized'])
                                     if args.name else checkpoint_name,
                                     dir=msglogger.logdir)
        return 0

    total_samples = len(train_dataloader.sampler)
    steps_per_epoch = math.ceil(total_samples / args.batch_size)
    best_hit_rate = 0
    best_epoch = 0
    for epoch in range(start_epoch, args.epochs):
        msglogger.info('')
        model.train()
        losses = utils.AverageMeter()

        begin = time.time()

        if compression_scheduler:
            compression_scheduler.on_epoch_begin(epoch, optimizer)

        loader = tqdm.tqdm(train_dataloader)
        for batch_index, (user, item, label) in enumerate(loader):
            user = torch.autograd.Variable(user, requires_grad=False)
            item = torch.autograd.Variable(item, requires_grad=False)
            label = torch.autograd.Variable(label, requires_grad=False)
            if use_cuda:
                user = user.cuda(async=True)
                item = item.cuda(async=True)
                label = label.cuda(async=True)

            if compression_scheduler:
                compression_scheduler.on_minibatch_begin(
                    epoch, batch_index, steps_per_epoch, optimizer)

            outputs = model(user, item, torch.tensor([False],
                                                     dtype=torch.bool))
            loss = criterion(outputs, label)

            if compression_scheduler:
                compression_scheduler.before_backward_pass(
                    epoch,
                    batch_index,
                    steps_per_epoch,
                    loss,
                    optimizer,
                    return_loss_components=False)

            losses.update(loss.data.item(), user.size(0))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if compression_scheduler:
                compression_scheduler.on_minibatch_end(epoch, batch_index,
                                                       steps_per_epoch,
                                                       optimizer)

            # Save stats to file
            description = (
                'Epoch {} Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                    epoch, loss=losses))
            loader.set_description(description)

            steps_completed = batch_index + 1
            if steps_completed % args.log_freq == 0:
                stats_dict = OrderedDict()
                stats_dict['Loss'] = losses.avg
                stats = ('Performance/Training/', stats_dict)
                params = model.named_parameters(
                ) if args.log_params_histograms else None
                distiller.log_training_progress(stats, params, epoch,
                                                steps_completed,
                                                steps_per_epoch, args.log_freq,
                                                [tflogger])

                tflogger.log_model_buffers(model,
                                           ['tracked_min', 'tracked_max'],
                                           'Quant/Train/Acts/TrackedMinMax',
                                           epoch, steps_completed,
                                           steps_per_epoch, args.log_freq)

        train_time = time.time() - begin
        begin = time.time()
        hits, ndcgs = val_epoch(model,
                                test_ratings,
                                test_negs,
                                args.topk,
                                use_cuda=use_cuda,
                                output=valid_results_file,
                                epoch=epoch,
                                processes=args.processes)
        val_time = time.time() - begin

        if compression_scheduler:
            compression_scheduler.on_epoch_end(epoch, optimizer)

        hit_rate = np.mean(hits)
        mean_ndcgs = np.mean(ndcgs)

        stats_dict = OrderedDict()
        stats_dict['HR@{0}'.format(args.topk)] = hit_rate
        stats_dict['NDCG@{0}'.format(args.topk)] = mean_ndcgs
        stats = ('Performance/Validation/', stats_dict)
        distiller.log_training_progress(stats,
                                        None,
                                        epoch,
                                        steps_completed=0,
                                        total_steps=1,
                                        log_freq=1,
                                        loggers=[tflogger])

        msglogger.info(
            'Epoch {epoch}: HR@{K} = {hit_rate:.4f}, NDCG@{K} = {ndcg:.4f}, AvgTrainLoss = {loss.avg:.4f}, '
            'train_time = {train_time:.2f}, val_time = {val_time:.2f}'.format(
                epoch=epoch,
                K=args.topk,
                hit_rate=hit_rate,
                ndcg=mean_ndcgs,
                loss=losses,
                train_time=train_time,
                val_time=val_time))

        is_best = False
        if hit_rate > best_hit_rate:
            best_hit_rate = hit_rate
            is_best = True
            best_epoch = epoch
        extras = {
            'current_hr@10': hit_rate,
            'best_hr@10': best_hit_rate,
            'best_epoch': best_epoch
        }
        apputils.save_checkpoint(epoch,
                                 'NCF',
                                 model,
                                 optimizer,
                                 compression_scheduler,
                                 extras,
                                 is_best,
                                 dir=run_dir)

        if args.threshold is not None:
            if np.mean(hits) >= args.threshold:
                msglogger.info("Hit threshold of {}".format(args.threshold))
                break
示例#8
0
文件: main.py 项目: barrh/distiller
                log_period=args.log_interval)


def export_onnx(path, batch_size, seq_len):
    msglogger.info('The model is also exported in ONNX format at {}'.format(
        os.path.realpath(args.onnx_export)))
    model.eval()
    dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(
        -1, batch_size).to(device)
    hidden = model.init_hidden(batch_size)
    torch.onnx.export(model, (dummy_input, hidden), path)


# Distiller loggers
msglogger = apputils.config_pylogger('logging.conf', None)
tflogger = TensorBoardLogger(msglogger.logdir)
tflogger.log_gradients = True
pylogger = PythonLogger(msglogger)

if args.summary:
    which_summary = args.summary
    if which_summary == 'png':
        draw_lang_model_to_file(model, 'rnn.png', 'wikitext2')
    elif which_summary == 'percentile':
        percentile = 0.9
        for name, param in model.state_dict().items():
            if param.dim() < 2:
                # Skip biases
                continue
            bottomk, _ = torch.topk(param.abs().view(-1),
                                    int(percentile * param.numel()),