def run(self):
     tq = tqdm(range(self.epochs))
     for epoch in tq:
         # for recordio data
         if hasattr(self.train_data, 'reset'): self.train_data.reset()
         tbar = tqdm(self.train_data)
         idx = 0
         for batch in tbar:
             # sample network configuration
             config = self.controller.pre_sample()[0]
             self.supernet.sample(**config)
             # self.train_fn(self.supernet, batch, **self.train_args)
             self.train_fn(epoch, self.epochs, self.supernet, batch,
                           **self.train_args)
             mx.nd.waitall()
             if epoch >= self.warmup_epochs and (
                     idx % self.update_arch_frequency) == 0:
                 self.train_controller()
             if self.plot_frequency > 0 and idx % self.plot_frequency == 0 and in_ipynb(
             ):
                 graph = self.supernet.graph
                 graph.attr(rankdir='LR', size='8,3')
                 tbar.set_svg(graph._repr_svg_())
             if self.baseline:
                 tbar.set_description('avg reward: {:.2f}'.format(
                     self.baseline))
             idx += 1
         self.validation()
         self.save()
         msg = 'epoch {}, val_acc: {:.2f}'.format(epoch, self.val_acc)
         if self.baseline:
             msg += ', avg reward: {:.2f}'.format(self.baseline)
         tq.set_description(msg)
Exemplo n.º 2
0
 def predict_imgs(X):
     if isinstance(X, list):
         different_dataset = []
         for i, x in enumerate(X):
             proba_all_one_dataset = []
             tbar = tqdm(range(len(x.items)))
             for j, x_item in enumerate(x):
                 tbar.update(1)
                 proba_all = predict_img(x_item[0], ensemble=True)
                 tbar.set_description(
                     'ratio:[%d],The input picture [%d]' % (i, j))
                 proba_all_one_dataset.append(proba_all)
             different_dataset.append(proba_all_one_dataset)
         inds, probas, probals_all = avg_prediction(
             different_dataset, threshold=set_prob_thresh)
     else:
         inds, probas, probals_all = [], [], []
         tbar = tqdm(range(len(X.items)))
         for i, x in enumerate(X):
             tbar.update(1)
             ind, proba, proba_all = predict_img(x[0])
             tbar.set_description(
                 'The input picture [%d] is classified as [%d], with probability %.2f '
                 % (i, ind.asscalar(), proba.asscalar()))
             inds.append(ind.asscalar())
             probas.append(proba.asnumpy())
             probals_all.append(proba_all.asnumpy().flatten())
     return inds, probas, probals_all
Exemplo n.º 3
0
    def evaluate(loader_dev, metric, segment):
        """Evaluate the model on validation dataset."""
        metric.reset()
        step_loss = 0
        tbar = tqdm(loader_dev)
        for batch_id, seqs in enumerate(tbar):
            input_ids, valid_length, segment_ids, label = seqs
            input_ids = input_ids.as_in_context(ctx)
            valid_length = valid_length.as_in_context(ctx).astype('float32')
            label = label.as_in_context(ctx)
            if use_roberta:
                out = model(input_ids, valid_length)
            else:
                out = model(input_ids, segment_ids.as_in_context(ctx),
                            valid_length)
            ls = loss_function(out, label).mean()

            step_loss += ls.asscalar()
            metric.update([label], [out])

            if (batch_id + 1) % (args.log_interval) == 0:
                log_eval(batch_id, len(loader_dev), metric, step_loss,
                         args.log_interval, tbar)
                step_loss = 0

        metric_nm, metric_val = metric.get()
        if not isinstance(metric_nm, list):
            metric_nm, metric_val = [metric_nm], [metric_val]
        metric_str = 'validation metrics:' + ','.join(
            [i + ':%.4f' for i in metric_nm])
        logger.info(metric_str, *metric_val)

        mx.nd.waitall()
        return metric_nm, metric_val
    def validation(self):
        if hasattr(self.val_data, 'reset'): self.val_data.reset()
        # data iter, avoid memory leak
        it = iter(self.val_data)
        if hasattr(it, 'reset_sample_times'): it.reset_sample_times()
        tbar = tqdm(it)
        # update network arc
        config = self.controller.inference()
        self.supernet.sample(**config)
        metric = mx.metric.Accuracy()
        for batch in tbar:
            self.eval_fn(self.supernet, batch, metric=metric, **self.val_args)
            reward = metric.get()[1]
            tbar.set_description('Val Acc: {}'.format(reward))

        self.val_acc = reward
        self.training_history.append(reward)
Exemplo n.º 5
0
    def evaluate(self, dataset, input_size=224, ctx=[mx.cpu()]):
        """Evaluate predictive performance of trained image classifier using given test data.
        
        Parameters
        ----------
        dataset : :class:`autogluon.task.ImagePredictor.Dataset`
            The dataset containing test images (must be in same format as the training dataset).
        input_size : int
            Size of the images (pixels).
        ctx : List of mxnet.context elements.
            Determines whether to use CPU or GPU(s), options include: `[mx.cpu()]` or `[mx.gpu()]`.
        
        Examples
        --------
        >>> import autogluon.core as ag
        >>> from autogluon.vision import ImagePredictor as task
        >>> train_data = task.Dataset(train_path='~/data/train')
        >>> classifier = task.fit(train_data,
        >>>                       nets=ag.space.Categorical['resnet18_v1', 'resnet34_v1'],
        >>>                       time_limits=600, ngpus_per_trial=1, num_trials = 4)
        >>> test_data = task.Dataset('~/data/test', train=False)
        >>> test_acc = classifier.evaluate(test_data)
        """
        args = self.args
        net = self.model
        batch_size = args.batch_size * max(len(ctx), 1)
        metric = get_metric_instance(args.metric)
        input_size = net.input_size if hasattr(net,
                                               'input_size') else input_size

        test_data, _, batch_fn, _ = get_data_loader(dataset, input_size,
                                                    batch_size,
                                                    args.num_workers, True,
                                                    None)
        tbar = tqdm(test_data)
        for batch in tbar:
            self.eval_func(net, batch, batch_fn, metric, ctx)
            _, test_reward = metric.get()
            tbar.set_description('{}: {}'.format(args.metric, test_reward))
        _, test_reward = metric.get()
        return test_reward
Exemplo n.º 6
0
def train_text_classification(args, reporter=None):
    # Step 1: add scripts every function and python objects in the original training script except for the training function
    # at the beginning of the decorated function
    nlp = try_import_gluonnlp()
    logger = logging.getLogger(__name__)
    if args.verbose:
        logger.setLevel(logging.INFO)
        logger.info(args)
    batch_size = args.batch_size
    dev_batch_size = args.dev_batch_size
    lr = args.lr
    epsilon = args.epsilon
    accumulate = args.accumulate
    log_interval = args.log_interval * accumulate if accumulate else args.log_interval
    if accumulate:
        logger.info('Using gradient accumulation. Effective batch size = ' \
                     'batch_size * accumulate = %d', accumulate * batch_size)

    # random seed
    np.random.seed(args.seed)
    random.seed(args.seed)
    mx.random.seed(args.seed)

    # TODO support for multi-GPU
    ctx = [mx.gpu(i) for i in range(args.num_gpus)
           ][0] if args.num_gpus > 0 else [mx.cpu()][0]

    task = args.dataset
    # data type with mixed precision training
    if args.dtype == 'float16':
        try:
            from mxnet.contrib import amp  # pylint: disable=ungrouped-imports
            # monkey patch amp list since topk does not support fp16
            amp.lists.symbol.FP32_FUNCS.append('topk')
            amp.lists.symbol.FP16_FP32_FUNCS.remove('topk')
            amp.init()
        except ValueError:
            # topk is already in the FP32_FUNCS list
            amp.init()
        except ImportError:
            # amp is not available
            logger.info(
                'Mixed precision training with float16 requires MXNet >= '
                '1.5.0b20190627. Please consider upgrading your MXNet version.'
            )
            exit()

    # model and loss
    model_name = args.net
    dataset = args.pretrained_dataset

    use_roberta = 'roberta' in model_name
    get_model_params = {
        'name': model_name,
        'dataset_name': dataset,
        'pretrained': True,
        'ctx': ctx,
        'use_decoder': False,
        'use_classifier': False,
    }
    # RoBERTa does not contain parameters for sentence pair classification
    if not use_roberta:
        get_model_params['use_pooler'] = True

    bert, vocabulary = nlp.model.get_model(**get_model_params)
    model = get_network(bert, task.class_labels, use_roberta)
    #do_regression = not task.class_labels
    #if do_regression:
    #    num_classes = 1
    #    loss_function = gluon.loss.L2Loss()
    #else:
    #    num_classes = len(task.class_labels)
    #    loss_function = gluon.loss.SoftmaxCELoss()
    ## reuse the BERTClassifier class with num_classes=1 for regression
    #if use_roberta:
    #    model = RoBERTaClassifier(bert, dropout=0.0, num_classes=num_classes)
    #else:
    #    model = BERTClassifier(bert, dropout=0.1, num_classes=num_classes)
    # initialize classifier
    loss_function = gluon.loss.SoftmaxCELoss(
    ) if task.class_labels else gluon.loss.L2Loss()
    initializer = mx.init.Normal(0.02)
    model.classifier.initialize(init=initializer, ctx=ctx)

    model.hybridize(static_alloc=True)
    loss_function.hybridize(static_alloc=True)

    # data processing
    do_lower_case = 'uncased' in dataset
    if use_roberta:
        bert_tokenizer = nlp.data.GPT2BPETokenizer()
    else:
        bert_tokenizer = nlp.data.BERTTokenizer(vocabulary,
                                                lower=do_lower_case)

    # Get the loader.
    train_data, dev_data_list, num_train_examples, trans, test_trans = preprocess_data(
        bert_tokenizer, task, batch_size, dev_batch_size, args.max_len,
        vocabulary, True, args.num_workers)

    def log_train(batch_id, batch_num, metric, step_loss, log_interval,
                  epoch_id, learning_rate, tbar):
        """Generate and print out the log message for training. """
        metric_nm, metric_val = metric.get()
        if not isinstance(metric_nm, list):
            metric_nm, metric_val = [metric_nm], [metric_val]

        train_str = '[Epoch %d] loss=%.4f, lr=%.7f, metrics:' + \
                    ','.join([i + ':%.4f' for i in metric_nm])
        tbar.set_description(
            train_str %
            (epoch_id, step_loss / log_interval, learning_rate, *metric_val))

    def log_eval(batch_id, batch_num, metric, step_loss, log_interval, tbar):
        """Generate and print out the log message for inference. """
        metric_nm, metric_val = metric.get()
        if not isinstance(metric_nm, list):
            metric_nm, metric_val = [metric_nm], [metric_val]

        eval_str = 'loss=%.4f, metrics:' + \
                   ','.join([i + ':%.4f' for i in metric_nm])
        tbar.set_description(eval_str %
                             (step_loss / log_interval, *metric_val))

    def evaluate(loader_dev, metric, segment):
        """Evaluate the model on validation dataset."""
        metric.reset()
        step_loss = 0
        tbar = tqdm(loader_dev)
        for batch_id, seqs in enumerate(tbar):
            input_ids, valid_length, segment_ids, label = seqs
            input_ids = input_ids.as_in_context(ctx)
            valid_length = valid_length.as_in_context(ctx).astype('float32')
            label = label.as_in_context(ctx)
            if use_roberta:
                out = model(input_ids, valid_length)
            else:
                out = model(input_ids, segment_ids.as_in_context(ctx),
                            valid_length)
            ls = loss_function(out, label).mean()

            step_loss += ls.asscalar()
            metric.update([label], [out])

            if (batch_id + 1) % (args.log_interval) == 0:
                log_eval(batch_id, len(loader_dev), metric, step_loss,
                         args.log_interval, tbar)
                step_loss = 0

        metric_nm, metric_val = metric.get()
        if not isinstance(metric_nm, list):
            metric_nm, metric_val = [metric_nm], [metric_val]
        metric_str = 'validation metrics:' + ','.join(
            [i + ':%.4f' for i in metric_nm])
        logger.info(metric_str, *metric_val)

        mx.nd.waitall()
        return metric_nm, metric_val

    # Step 2: the training function in the original training script is added in the decorated function in autogluon for training.
    """Training function."""

    all_model_params = model.collect_params()
    optimizer_params = {'learning_rate': lr, 'epsilon': epsilon, 'wd': 0.01}
    trainer = gluon.Trainer(all_model_params,
                            'bertadam',
                            optimizer_params,
                            update_on_kvstore=False)
    if args.dtype == 'float16':
        amp.init_trainer(trainer)

    step_size = batch_size * accumulate if accumulate else batch_size
    num_train_steps = int(num_train_examples / step_size * args.epochs)
    warmup_ratio = args.warmup_ratio
    num_warmup_steps = int(num_train_steps * warmup_ratio)
    step_num = 0

    # Do not apply weight decay on LayerNorm and bias terms
    for _, v in model.collect_params('.*beta|.*gamma|.*bias').items():
        v.wd_mult = 0.0
    # Collect differentiable parameters
    params = [p for p in all_model_params.values() if p.grad_req != 'null']

    # Set grad_req if gradient accumulation is required
    if accumulate and accumulate > 1:
        for p in params:
            p.grad_req = 'add'
    # track best eval score
    metric_history = []
    best_metric = None
    patience = args.early_stop

    tic = time.time()
    for epoch_id in range(args.epochs):
        if args.early_stop and patience == 0:
            logger.info('Early stopping at epoch %d', epoch_id)
            break
        task.metric.reset()
        step_loss = 0
        tic = time.time()
        all_model_params.zero_grad()

        tbar = tqdm(train_data)
        for batch_id, seqs in enumerate(tbar):
            # learning rate schedule
            if step_num < num_warmup_steps:
                new_lr = lr * step_num / num_warmup_steps
            else:
                non_warmup_steps = step_num - num_warmup_steps
                offset = non_warmup_steps / (num_train_steps -
                                             num_warmup_steps)
                new_lr = lr - offset * lr
            trainer.set_learning_rate(new_lr)

            # forward and backward
            with mx.autograd.record():
                input_ids, valid_length, segment_ids, label = seqs
                input_ids = input_ids.as_in_context(ctx)
                valid_length = valid_length.as_in_context(ctx).astype(
                    'float32')
                label = label.as_in_context(ctx)
                if use_roberta:
                    out = model(input_ids, valid_length)
                else:
                    out = model(input_ids, segment_ids.as_in_context(ctx),
                                valid_length)
                ls = loss_function(out, label).mean()
                if args.dtype == 'float16':
                    with amp.scale_loss(ls, trainer) as scaled_loss:
                        mx.autograd.backward(scaled_loss)
                else:
                    ls.backward()

            # update
            if not accumulate or (batch_id + 1) % accumulate == 0:
                trainer.allreduce_grads()
                nlp.utils.clip_grad_global_norm(params, 1)
                trainer.update(accumulate if accumulate else 1)
                step_num += 1
                if accumulate and accumulate > 1:
                    # set grad to zero for gradient accumulation
                    all_model_params.zero_grad()

            step_loss += ls.asscalar()
            task.metric.update([label], [out])
            if (batch_id + 1) % (args.log_interval) == 0:
                log_train(batch_id, len(train_data), task.metric, step_loss,
                          args.log_interval, epoch_id, trainer.learning_rate,
                          tbar)
                step_loss = 0
        mx.nd.waitall()

        # inference on dev data
        for segment, dev_data in dev_data_list:
            metric_nm, metric_val = evaluate(dev_data, task.metric, segment)
            if best_metric is None or metric_val >= best_metric:
                best_metric = metric_val
                patience = args.early_stop
            else:
                if args.early_stop is not None:
                    patience -= 1
            metric_history.append((epoch_id, metric_nm, metric_val))

        if reporter is not None:
            # Note: epoch reported back must start with 1, not with 0
            reporter(epoch=epoch_id + 1, accuracy=metric_val[0])

    if args.final_fit:
        get_model_params.pop('ctx')
        return {
            'model_params': collect_params(model),
            'get_model_args': get_model_params,
            'class_labels': task.class_labels,
            'transform': trans,
            'test_transform': test_trans
        }
Exemplo n.º 7
0
def train_image_classification(args, reporter):
    logging.basicConfig()
    logger = logging.getLogger(__name__)
    if args.verbose:
        logger.setLevel(logging.INFO)
        logger.info(args)

    target_params = Sample_params(args.batch_size, args.num_gpus,
                                  args.num_workers)
    batch_size = target_params.get_batchsize
    ctx = target_params.get_context
    classes = args.dataset.num_classes if hasattr(args.dataset,
                                                  'num_classes') else None
    target_kwargs = Getmodel_kwargs(ctx, classes, args.net,
                                    args.tricks.teacher_name,
                                    args.tricks.hard_weight, args.hybridize,
                                    args.optimizer.multi_precision,
                                    args.tricks.use_pretrained,
                                    args.tricks.use_gn, args.tricks.last_gamma,
                                    args.tricks.batch_norm, args.tricks.use_se)
    distillation = target_kwargs.distillation
    net = target_kwargs.get_net
    input_size = net.input_size if hasattr(net,
                                           'input_size') else args.input_size

    if args.tricks.no_wd:
        for k, v in net.collect_params('.*beta|.*gamma|.*bias').items():
            v.wd_mult = 0.0

    if args.tricks.label_smoothing or args.tricks.mixup:
        sparse_label_loss = False
    else:
        sparse_label_loss = True

    if distillation:
        teacher = target_kwargs.get_teacher

        def teacher_prob(data):
            return [
                nd.softmax(
                    teacher(X.astype(target_kwargs.dtype, copy=False)) /
                    args.tricks.temperature) for X in data
            ]

        L = DistillationSoftmaxCrossEntropyLoss(
            temperature=args.tricks.temperature,
            hard_weight=args.tricks.hard_weight,
            sparse_label=sparse_label_loss)
    else:
        L = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=sparse_label_loss)
        teacher_prob = None
    if args.tricks.mixup:
        metric = get_metric_instance('rmse')
    else:
        metric = get_metric_instance(args.metric)

    train_data, val_data, batch_fn, num_batches = get_data_loader(
        args.dataset, input_size, batch_size, args.num_workers, args.final_fit,
        args.split_ratio)

    if isinstance(args.lr_config.lr_mode, str):  # fix
        target_lr = LR_params(
            args.optimizer.lr, args.lr_config.lr_mode, args.epochs,
            num_batches, args.lr_config.lr_decay_epoch,
            args.lr_config.lr_decay, args.lr_config.lr_decay_period,
            args.lr_config.warmup_epochs, args.lr_config.warmup_lr)
        lr_scheduler = target_lr.get_lr_scheduler
    else:
        lr_scheduler = args.lr_config.lr_mode
    args.optimizer.lr_scheduler = lr_scheduler

    trainer = gluon.Trainer(net.collect_params(), args.optimizer)

    def train(epoch, num_epochs, metric):
        for i, batch in enumerate(train_data):
            metric = default_train_fn(
                epoch, num_epochs, net, batch, batch_size, L, trainer,
                batch_fn, ctx, args.tricks.mixup, args.tricks.label_smoothing,
                distillation, args.tricks.mixup_alpha,
                args.tricks.mixup_off_epoch, classes, target_kwargs.dtype,
                metric, teacher_prob)
            mx.nd.waitall()
        return metric

    def test(epoch):
        metric.reset()
        for i, batch in enumerate(val_data):
            default_val_fn(net, batch, batch_fn, metric, ctx,
                           target_kwargs.dtype)
        _, reward = metric.get()
        reporter(epoch=epoch, classification_reward=reward)
        return reward

    # Note: epoch must start with 1, not 0
    tbar = tqdm(range(1, args.epochs + 1))
    for epoch in tbar:
        metric = train(epoch, args.epochs, metric)
        train_metric_name, train_metric_score = metric.get()
        tbar.set_description(
            f'[Epoch {epoch}] training: {train_metric_name}={train_metric_score :.3f}'
        )
        if not args.final_fit:
            reward = test(epoch)
            tbar.set_description(f'[Epoch {epoch}] Validation: {reward :.3f}')

    if args.final_fit:
        return {'model_params': collect_params(net), 'num_classes': classes}