Exemplo n.º 1
0
def forward_backward_v2(net, criterions, ctx, packet, is_train=True):
    data, ht8, ht8_mask, paf8, paf8_mask = packet
    criterion, criterion_ohkm = criterions
    # split to gpus
    data = gl.utils.split_and_load(data, ctx)
    ht8 = gl.utils.split_and_load(ht8, ctx)
    ht8_mask = gl.utils.split_and_load(ht8_mask, ctx)
    paf8 = gl.utils.split_and_load(paf8, ctx)
    paf8_mask = gl.utils.split_and_load(paf8_mask, ctx)
    # run
    ag.set_recording(is_train)
    ag.set_training(is_train)
    losses = []
    for data_, ht8_, paf8_, ht8_mask_, paf8_mask_ in zip(
            data, ht8, paf8, ht8_mask, paf8_mask):
        # forward
        out_ = net(data_)
        losses_ = []
        num_stage = len(out_)
        for i in range(num_stage):
            losses_.append(criterion(out_[i][0], ht8_, ht8_mask_))
            losses_.append(criterion(out_[i][1], paf8_, paf8_mask_))
        losses.append(losses_)
        # backward
        if is_train:
            ag.backward(losses_)
    ag.set_recording(False)
    ag.set_training(False)
    return losses
Exemplo n.º 2
0
def evaluate(model, dataset_type='train', ema=None):
    r"""Evaluate the model on train/dev/test dataset.

    This function is just an encapsulation of official evaluate function.

    The official evaluate code can be find in https://rajpurkar.github.io/SQuAD-explorer/

    Parameters
    ----------
    dataset_type : string, default 'train'
        which dataset to evaluate.
    ema : object or None, default None
        Whether use the shadow variable to evaluate.
    """
    model.save_parameters('tmp')
    if ema is not None:
        for name, params in model.collect_params().items():
            params.set_data(ema.get(name))
    if dataset_type == 'train':
        data_loader = DataLoader(batch_size=EVAL_BATCH_SIZE, dev_set=False)
    else:
        data_loader = DataLoader(batch_size=EVAL_BATCH_SIZE, dev_set=True)
    autograd.set_training(False)
    total_answers = {}

    for batch_data in tqdm(data_loader.next_batch()):
        ids = [x[0] for x in batch_data]
        context = nd.array([x[1] for x in batch_data], ctx=ctx)
        context_mask = context > 0
        query = nd.array([x[2] for x in batch_data], ctx=ctx)
        query_mask = query > 0
        context_char = nd.array([x[3] for x in batch_data], ctx=ctx)
        query_char = nd.array([x[4] for x in batch_data], ctx=ctx)
        raw_context = [x[7] for x in batch_data]
        spans = [x[8] for x in batch_data]

        begin_hat, end_hat, _, _ = model(context, query, context_char,
                                         query_char, context_mask, query_mask,
                                         None, None)
        begin_hat = begin_hat.softmax(axis=1)
        end_hat = end_hat.softmax(axis=1)

        answer_span_pair = matrix_answer_select(begin_hat, end_hat)
        for i, a, r, s in zip(ids, answer_span_pair, raw_context, spans):
            total_answers[i] = format_answer(a, r, s)
    model.load_parameters('tmp', ctx=CTX)
    autograd.set_training(True)
    if dataset_type == 'train':
        with open(DATA_PATH + RAW_TRAIN_FILE) as dataset_file:
            dataset_json = json.load(dataset_file)
            dataset = dataset_json['data']
    else:
        with open(DATA_PATH + RAW_DEV_FILE) as dataset_file:
            dataset_json = json.load(dataset_file)
            dataset = dataset_json['data']
    result = offical_eval(dataset, total_answers)
    f1_score = result['f1']
    em_score = result['exact_match']
    return f1_score, em_score
Exemplo n.º 3
0
    def export_model(self):
        if not isinstance(self.model, nn.HybridBlock):
            raise ValueError(
                "Expected a HybridBlock but the model seems not one.")

        autograd.set_training(False)
        autograd.set_recording(False)
        loader = self.create_dataloader("train")
        raw_data = next(iter(loader))
        splitted_data = utils.split_and_load(raw_data, self.ctx)
        for data in splitted_data:
            inputs, labels = self.parse_data(data, "train")
            self.model(*inputs)
        self.model.export(os.path.join(self.config.PARAM_DIR, "model"), 9999)
Exemplo n.º 4
0
def forward_backward(net, criterions, ctx, data, rois, is_train=True):
    criterion_cls1, criterion_cls2, criterion_reg = criterions
    data = gl.utils.split_and_load(data, ctx)
    rois = gl.utils.split_and_load(rois, ctx)
    ag.set_recording(is_train)
    ag.set_training(is_train)
    # forward rpn
    rpn_cls1, rpn_reg1, rpn_cls2, rpn_reg2 = [], [], [], []
    for data_ in data:
        rpn_cls1_, rpn_reg1_, rpn_cls2_, rpn_reg2_ = net(data_)
        rpn_cls1.append(rpn_cls1_)
        rpn_reg1.append(rpn_reg1_)
        rpn_cls2.append(rpn_cls2_)
        rpn_reg2.append(rpn_reg2_)
    losses = []
    anchor_proposals = net.anchor_proposals
    for data_, rois_, rpn_cls1_, rpn_reg1_, rpn_cls2_, rpn_reg2_ in zip(
            data, rois, rpn_cls1, rpn_reg1, rpn_cls2, rpn_reg2):
        im_info = data_.shape[-2:]
        # anchor target
        # feat 1/8
        # parallel stops here
        batch_label1, batch_label_weight1, batch_bbox_targets1, batch_bbox_weights1 = anchor_proposals[
            0].target(rpn_cls1_, rois_, im_info)
        # loss cls
        loss_cls1 = criterion_cls1(rpn_cls1_, batch_label1,
                                   batch_label_weight1) / data_.shape[0]
        # loss reg
        loss_reg1 = criterion_reg(rpn_reg1_, batch_bbox_targets1,
                                  batch_bbox_weights1) / data_.shape[0]
        # feat 1/16
        # parallel stops here
        batch_label2, batch_label_weight2, batch_bbox_targets2, batch_bbox_weights2 = anchor_proposals[
            1].target(rpn_cls2_, rois_, im_info)
        # loss cls
        loss_cls2 = criterion_cls2(rpn_cls2_, batch_label2,
                                   batch_label_weight2) / data_.shape[0]
        # loss reg
        loss_reg2 = criterion_reg(rpn_reg2_, batch_bbox_targets2,
                                  batch_bbox_weights2) / data_.shape[0]

        loss = [loss_cls1, loss_reg1, loss_cls2, loss_reg2]
        # backward
        if is_train:
            ag.backward(loss)
        losses.append(loss)
    ag.set_recording(False)
    ag.set_training(False)
    return losses
Exemplo n.º 5
0
def validate_faster_rcnn(net, val_data, cfg):
    """Test on validation dataset."""
    # When hybridize is true, set network to test mode, set proposal nms test params
    # then clear and cache new compute graph
    # FIXME Will raise deferred init error if call hybridized net in test mode first
    net.proposal.set_nms(cfg.rpn_test_pre_nms_top_n,
                         cfg.rpn_test_post_nms_top_n)
    if cfg.hybridize:
        autograd.set_training(train_mode=False)
        net.hybridize()

    metric = VOC07MApMetric(iou_thresh=0.5, class_names=cfg.classes)

    for batch in val_data:
        pred_bboxes = []
        pred_cls = []
        pred_scores = []
        gt_bboxes = []
        gt_cls = []
        gt_difficults = []
        # Split and load data for multi-gpu
        data_list = gluon.utils.split_and_load(batch[0],
                                               ctx_list=ctx,
                                               batch_axis=0)
        gt_box_list = gluon.utils.split_and_load(batch[1],
                                                 ctx_list=ctx,
                                                 batch_axis=0)
        im_info_list = gluon.utils.split_and_load(batch[2],
                                                  ctx_list=ctx,
                                                  batch_axis=0)
        for data, gt_box, im_info in zip(data_list, gt_box_list, im_info_list):
            # get prediction results
            cls, scores, bboxes = net(data, im_info)
            pred_cls.append(cls)
            pred_scores.append(scores)
            pred_bboxes.append(bboxes)
            # split ground truths
            gt_cls.append(gt_box.slice_axis(axis=-1, begin=4, end=5))
            gt_bboxes.append(gt_box.slice_axis(axis=-1, begin=0, end=4))
            gt_difficults.append(
                gt_box.slice_axis(axis=-1, begin=5, end=6
                                  ) if gt_box.shape[-1] > 5 else None)

        # update metric
        metric.update(pred_bboxes, pred_cls, pred_scores, gt_bboxes, gt_cls,
                      gt_difficults)

    return metric.get()
Exemplo n.º 6
0
def forward_backward_v3(net, criterions, ctx, packet, is_train=True):
    data, ht4, ht4_mask, paf4, paf4_mask, ht8, ht8_mask, paf8, paf8_mask, ht16, ht16_mask, paf16, paf16_mask = packet
    criterion, criterion_ohkm = criterions
    ht = [ht4, ht8, ht16]
    paf = [paf4, paf8, paf16]
    ht_mask = [ht4_mask, ht8_mask, ht16_mask]
    paf_mask = [paf4_mask, paf8_mask, paf16_mask]
    # split to gpus
    data = gl.utils.split_and_load(data, ctx)
    ht = [gl.utils.split_and_load(x, ctx) for x in ht]
    paf = [gl.utils.split_and_load(x, ctx) for x in paf]
    ht_mask = [gl.utils.split_and_load(x, ctx) for x in ht_mask]
    paf_mask = [gl.utils.split_and_load(x, ctx) for x in paf_mask]
    # run
    ag.set_recording(is_train)
    ag.set_training(is_train)
    losses = []
    for idx, data_ in enumerate(data):
        # forward
        g_ht4, g_paf4, r_ht4, r_paf4, g_ht8, g_paf8, r_ht8, r_paf8, g_ht16, g_paf16, r_ht16, r_paf16 = net(
            data_)
        ht4_, ht8_, ht16_ = [h[idx] for h in ht]
        paf4_, paf8_, paf16_ = [p[idx] for p in paf]
        ht4_mask_, ht8_mask_, ht16_mask_ = [hm[idx] for hm in ht_mask]
        paf4_mask_, paf8_mask_, paf16_mask_ = [pm[idx] for pm in paf_mask]
        # loss
        losses_ = [
            criterion(g_ht4, ht4_, ht4_mask_),
            criterion_ohkm(r_ht4, ht4_, ht4_mask_),
            criterion(g_ht8, ht8_, ht8_mask_),
            criterion_ohkm(r_ht8, ht8_, ht8_mask_),
            criterion(g_ht16, ht16_, ht16_mask_),
            criterion_ohkm(r_ht16, ht16_, ht16_mask_),
            criterion(g_paf4, paf4_, paf4_mask_),
            criterion(r_paf4, paf4_, paf4_mask_),
            criterion(g_paf8, paf8_, paf8_mask_),
            criterion(r_paf8, paf8_, paf8_mask_),
            criterion(g_paf16, paf16_, paf16_mask_),
            criterion(r_paf16, paf16_, paf16_mask_)
        ]
        losses.append(losses_)
        # backward
        if is_train:
            ag.backward(losses_)
    ag.set_recording(False)
    ag.set_training(False)
    return losses
Exemplo n.º 7
0
def forward_backward(net, criterion, ctx, packet, is_train=True):
    data, ht, mask = packet
    data = gl.utils.split_and_load(data, ctx)
    ht = gl.utils.split_and_load(ht, ctx)
    mask = gl.utils.split_and_load(mask, ctx)
    # run
    ag.set_recording(is_train)
    ag.set_training(is_train)
    losses = []
    for data_, ht_, mask_ in zip(data, ht, mask):
        pred_ = net(data_)
        losses_ = [criterion(ht_, pred_, mask_)]
        losses.append(losses_)
        if is_train:
            ag.backward(losses_)
    ag.set_recording(False)
    ag.set_training(False)
    return losses
Exemplo n.º 8
0
    def _process_epoch(self, mode):
        color_code = esc_seq.GREEN if sys.platform != "win32" else ""
        end_color_code = esc_seq.END if sys.platform != "win32" else ""
        print(color_code +
              "{}: epoch {:3d}/{:3d}".format(mode, self.latest_state +
                                             1, self.config.MAX_EPOCHS) +
              end_color_code)

        loader = self.create_dataloader(mode)
        handler = self.create_handler(mode=mode, num_batch=len(loader))

        for i, raw_data in enumerate(loader):
            gathered_outputs = []
            gathered_losses = []
            losses = []
            tick = time.time()
            splitted_data = utils.split_and_load(raw_data, self.ctx)

            if mode == "train":
                autograd.set_training(True)
                autograd.set_recording(True)
            elif mode == "test":
                autograd.set_training(False)
                autograd.set_recording(False)

            for data in splitted_data:
                inputs, labels = self.parse_data(data, mode)
                outputs = self.parse_output(self.model(*inputs), mode)
                gathered_outputs.append(outputs)
                loss = self.compute_loss(outputs, labels)
                gathered_losses.append(loss)
                if mode == "train":
                    losses.extend(loss)

            autograd.set_training(False)
            autograd.set_recording(False)

            if mode == "train":
                autograd.backward(losses)
                self.trainer.step(raw_data[0].shape[0])

            handler.cleanup_batch(raw_data, gathered_outputs, gathered_losses,
                                  i, tick)

        handler.cleanup_epoch()
Exemplo n.º 9
0
def train_rpn(net, train_data, cfg):
    """Training pipeline"""
    rpn_loss = RPNLoss(cfg.rpn_batch_size)
    rpn_loss.initialize(ctx=ctx)

    if cfg.hybridize:
        autograd.set_training(train_mode=True)
        net.hybridize()

    trainer = gluon.Trainer(
        net.collect_params(), 'sgd',
        {'learning_rate': cfg.lr, 'wd': cfg.wd, 'momentum': cfg.momentum, 'clip_gradient': 5})

    # lr decay policy
    lr_decay = float(cfg.lr_decay)
    lr_steps = sorted(cfg.lr_decay_epochs)

    # Create Metrics
    log_metric = LogLossMetric(name='LogLoss', batch_size=cfg.rpn_batch_size)
    smoothl1_metric = SmoothL1LossMetric(name='SmoothL1Loss', batch_size=cfg.rpn_batch_size)

    logger.info('Config for training RPN:\n%s' % cfg)
    logger.info('Start training from [Epoch %d]' % args.start_epoch)

    for epoch in range(cfg.start_epoch, cfg.end_epoch):
        while lr_steps and epoch >= lr_steps[0]:
            new_lr = trainer.learning_rate * lr_decay
            lr_steps.pop(0)
            trainer.set_learning_rate(new_lr)
            logger.info("[Epoch {}] Set learning rate to {}".format(epoch, new_lr))
        tic = time.time()
        btic = time.time()
        log_metric.reset()
        smoothl1_metric.reset()

        for i, batch in enumerate(train_data):
            batch_size = batch[0].shape[0]
            data_list = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
            gt_box_list = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
            im_info_list = gluon.utils.split_and_load(batch[2], ctx_list=ctx, batch_axis=0)
            with autograd.record():
                cls_loss_list = []
                bbox_loss_list = []
                label_list = []
                for data, gt_box, im_info in zip(data_list, gt_box_list, im_info_list):
                    rpn_cls_prob, rpn_bbox_pred, labels, bbox_targets = net(data, im_info, gt_box)
                    cls_loss, bbox_loss = rpn_loss(rpn_cls_prob, rpn_bbox_pred, labels, bbox_targets)
                    cls_loss_list.append(cls_loss)
                    bbox_loss_list.append(bbox_loss)
                    label_list.append(labels)
            autograd.backward(cls_loss_list + bbox_loss_list)
            trainer.step(batch_size)
            log_metric.update(label_list, cls_loss_list)
            smoothl1_metric.update(label_list, bbox_loss_list)
            if cfg.log_interval and not (i + 1) % cfg.log_interval:
                name1, loss1 = log_metric.get()
                name2, loss2 = smoothl1_metric.get()
                logger.info('[Epoch %d][Batch %d], Speed: %f samples/sec, %s=%f, %s=%f' % (
                    epoch, i, batch_size / (time.time() - btic), name1, loss1, name2, loss2))
            btic = time.time()

        name1, loss1 = log_metric.get()
        name2, loss2 = smoothl1_metric.get()
        logger.info('[Epoch %d] Training cost: %f, %s=%f, %s=%f' % (
            epoch, (time.time() - tic), name1, loss1, name2, loss2))
        save_params(net, epoch, cfg.save_interval, cfg.save_prefix)
Exemplo n.º 10
0
def train_faster_rcnn(net, train_data, val_data, cfg):
    """Training pipeline"""
    rpn_loss = RPNLoss(cfg.rpn_batch_size)
    rcnn_loss = RCNNLoss(cfg.roi_batch_size)
    rpn_loss.initialize(ctx=ctx)
    rcnn_loss.initialize(ctx=ctx)

    trainer = gluon.Trainer(
        net.collect_params(), 'sgd', {
            'learning_rate': cfg.lr,
            'wd': cfg.wd,
            'momentum': cfg.momentum,
            'clip_gradient': 5
        })

    # lr decay policy
    lr_decay = float(cfg.lr_decay)
    lr_steps = sorted(cfg.lr_decay_epochs)

    # Create Metrics
    rpn_log_metric = LogLossMetric(name='RPNLogLoss',
                                   batch_size=cfg.rpn_batch_size)
    rpn_smoothl1_metric = SmoothL1LossMetric(name='RPNSmoothL1Loss',
                                             batch_size=cfg.rpn_batch_size)
    rcnn_log_metric = LogLossMetric(name='RCNNLogLoss',
                                    batch_size=cfg.roi_batch_size)
    rcnn_smoothl1_metric = SmoothL1LossMetric(name='RCNNSmoothL1Loss',
                                              batch_size=cfg.roi_batch_size)
    # New list to store loss and label for backward and update metric
    rpn_cls_loss_list = []
    rpn_bbox_loss_list = []
    rcnn_cls_loss_list = []
    rcnn_bbox_loss_list = []

    logger.info('Config for end to end training FasterRCNN:\n%s' % cfg)
    logger.info('Start training from [Epoch %d]' % args.start_epoch)
    best_map = [0]
    for epoch in range(cfg.start_epoch, cfg.end_epoch):
        # When hybridize is true, set network to train mode, reset proposal nms params
        # then clear and cache new compute graph
        net.proposal.set_nms(cfg.rpn_pre_nms_top_n, cfg.rpn_post_nms_top_n)
        if cfg.hybridize:
            autograd.set_training(train_mode=True)
            net.hybridize()

        # Check and update learning rate
        while lr_steps and epoch >= lr_steps[0]:
            new_lr = trainer.learning_rate * lr_decay
            lr_steps.pop(0)
            trainer.set_learning_rate(new_lr)
            logger.info("[Epoch {}] Set learning rate to {}".format(
                epoch, new_lr))

        # Refresh time and metrics
        tic = time.time()
        btic = time.time()
        rpn_log_metric.reset()
        rpn_smoothl1_metric.reset()
        rcnn_log_metric.reset()
        rcnn_smoothl1_metric.reset()

        for i, batch in enumerate(train_data):
            # Empty lists
            rpn_cls_loss_list[:] = []
            rpn_bbox_loss_list[:] = []
            rcnn_cls_loss_list[:] = []
            rcnn_bbox_loss_list[:] = []
            # Split and load data for multi-gpu
            batch_size = batch[0].shape[0]
            data_list = gluon.utils.split_and_load(batch[0],
                                                   ctx_list=ctx,
                                                   batch_axis=0)
            gt_box_list = gluon.utils.split_and_load(batch[1],
                                                     ctx_list=ctx,
                                                     batch_axis=0)
            im_info_list = gluon.utils.split_and_load(batch[2],
                                                      ctx_list=ctx,
                                                      batch_axis=0)

            # Network Forward
            with autograd.record():
                for data, gt_box, im_info in zip(data_list, gt_box_list,
                                                 im_info_list):
                    rpn_cls_prob, rpn_bbox_pred, rpn_label, rpn_bbox_target, \
                    rcnn_cls_prob, rcnn_bbox_pred, rcnn_label, rcnn_bbox_target = net(data, im_info, gt_box)
                    rpn_cls_loss, rpn_bbox_loss = \
                        rpn_loss(rpn_cls_prob, rpn_bbox_pred, rpn_label, rpn_bbox_target)
                    rcnn_cls_loss, rcnn_bbox_loss = \
                        rcnn_loss(rcnn_cls_prob, rcnn_bbox_pred, rcnn_label, rcnn_bbox_target)
                    rpn_cls_loss_list.append(rpn_cls_loss)
                    rpn_bbox_loss_list.append(rpn_bbox_loss)
                    rcnn_cls_loss_list.append(rcnn_cls_loss)
                    rcnn_bbox_loss_list.append(rcnn_bbox_loss)
            # Backward and update parameters and metrics
            autograd.backward(rpn_cls_loss_list + rpn_bbox_loss_list +
                              rcnn_cls_loss_list + rcnn_bbox_loss_list)
            trainer.step(1)
            rpn_log_metric.update(preds=rpn_cls_loss_list)
            rpn_smoothl1_metric.update(preds=rpn_bbox_loss_list)
            rcnn_log_metric.update(preds=rcnn_cls_loss_list)
            rcnn_smoothl1_metric.update(preds=rcnn_bbox_loss_list)

            # Log training states
            if cfg.log_interval and not (i + 1) % cfg.log_interval:
                name1, loss1 = rpn_log_metric.get()
                name2, loss2 = rpn_smoothl1_metric.get()
                name3, loss3 = rcnn_log_metric.get()
                name4, loss4 = rcnn_smoothl1_metric.get()
                logger.info(
                    '[Epoch %d][Batch %d], Speed: %f samples/sec, %s=%f, %s=%f, %s=%f, %s=%f'
                    % (epoch, i, batch_size / (time.time() - btic), name1,
                       loss1, name2, loss2, name3, loss3, name4, loss4))
            btic = time.time()

        name1, loss1 = rpn_log_metric.get()
        name2, loss2 = rpn_smoothl1_metric.get()
        name3, loss3 = rcnn_log_metric.get()
        name4, loss4 = rcnn_smoothl1_metric.get()
        logger.info(
            '[Epoch %d] Training cost: %f, %s=%f, %s=%f, %s=%f, %s=%f' %
            (epoch, (time.time() - tic), name1, loss1, name2, loss2, name3,
             loss3, name4, loss4))

        map_name, mean_ap = validate_faster_rcnn(net, val_data, cfg)
        val_msg = '\n'.join(
            ['%s=%f' % (k, v) for k, v in zip(map_name, mean_ap)])
        logger.info('[Epoch %d] Validation: \n%s' % (epoch, val_msg))
        save_params(net, best_map, mean_ap[-1], epoch, cfg.save_interval,
                    cfg.save_prefix)
Exemplo n.º 11
0
    print("Using network architecture: ", opt.arch)

    if opt.mode == "symbolic":
        print("Mode: symbolic")
        if opt.flag_finetune:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                opt.cur_epoch = int(opt.pretrained_path.split('.')[0][-4:])
                params_path = opt.pretrained_path
                json_path = opt.pretrained_path[:-11] + "symbol.json"
                model = gluon.nn.SymbolBlock.imports(json_path, ['data'],
                                                     params_path,
                                                     ctx=ctx)
        else:
            opt.cur_epoch = 0
            autograd.set_training(0)
            model = create_model(opt.arch, opt.heads, opt.head_conv, ctx=ctx)
            model.hybridize()
            autograd.set_training(1)
    else:
        print("Mode: imperative")
        opt.cur_epoch = 0
        model = create_model(opt.arch, opt.heads, opt.head_conv, ctx=ctx)
        if opt.flag_finetune:
            model = load_model(model, opt.pretrained_path, ctx=ctx)
            #model = model.load_parameters(opt.pretrained_path, ctx=ctx, ignore_extra=True, allow_missing = True)
            opt.cur_epoch = int(opt.pretrained_path.split('.')[0][-4:])
        elif opt.arch != "res_18":
            model.collect_params().initialize(init=init.Xavier(), ctx=ctx)
    """ 2. Dataset """
    train_dataset, val_dataset = get_coco(opt, "./data/coco")
Exemplo n.º 12
0
def dropout2(X, drop_rate):
    autograd.set_training(True)
    Z = nd.zeros_like(X)
    nd.Dropout(X, p=drop_rate, out=Z)
    return Z