Exemplo n.º 1
0
    def cascade_rcnn(self, F, feature, roi, sampler, gt_box):
        """Forward Faster-RCNN network.

        The behavior during traing and inference is different.

        Parameters
        ----------
        feature: feature map
        roi: ROI region to be pooled (decoded bbox)


        Returns
        -------
        box_pred:  bbox prediction(encoded bbox) 
        cls_pred:  cls prediction

        """

        if autograd.is_training():
            roi, samples, matches = sampler(roi, gt_box)
            sample_data = edict()
            sample_data.roi = roi
            sample_data.samples = samples
            sample_data.matches = matches

        pooled_feat = self.extract_ROI(F=F, feature=feature, bbox=roi)
        top_feat = self.top_features(pooled_feat)
        cls_pred = self.class_predictor(top_feat)
        box_pred = self.box_predictor(top_feat).reshape((-1, 1, 4)).transpose(
            (1, 0, 2))

        if autograd.is_training():
            return cls_pred, box_pred, sample_data
        else:
            return cls_pred, box_pred, None
Exemplo n.º 2
0
def auto_grad():
    """
    对函数  y=2x**2  求关于列向量  x  的梯度 4x
    :return:
    """
    x = nd.arange(4).reshape((4, 1))
    logger.info("autograd 数组:")
    logger.info(x)

    # 调用attach_grad函数来申请存储梯度所需要的内存
    x.attach_grad()

    logger.info("autograd.is_training():")
    logger.info(autograd.is_training())
    # 调用record函数来要求MXNet记录与求梯度有关的计算。
    with autograd.record():
        y = 2 * nd.dot(x.T, x)
        logger.info(autograd.is_training())
        logger.info(y)

    # 调用backward函数自动求梯度
    y.backward()

    logger.info("autograd 梯度:")
    logger.info(x.grad)
Exemplo n.º 3
0
    def hybrid_forward(self, F, x):
        all_box_centers = []
        all_box_scales = []
        all_objectness = []
        all_class_pred = []
        all_anchors = []
        all_offsets = []
        all_detections = []
        routes = []
        for stage, block, output in zip(self.stages, self.yolo_blocks,
                                        self.yolo_outputs):
            x = stage(x)
            routes.append(x)

        for i, block, output in zip(range(len(routes)), self.yolo_blocks,
                                    self.yolo_outputs):
            x, tip = block(x)
            if autograd.is_training():
                box_centers, box_scales, objness, class_pred, anchors, offsets = output(
                    tip)
                all_box_centers.append(box_centers)
                all_box_scales.append(box_scales)
                all_objectness.append(objness)
                all_class_pred.append(class_pred)
                all_anchors.append(anchors)
                all_offsets.append(offsets)
            else:
                detections = output(tip)
                all_detections.append(detections)
            if i >= len(routes) - 1:
                break
            x = self.transitions[i](x)
            upsample = _upsample(x, stride=2)
            x = F.concat(upsample, routes[::-1][i + 1], dim=1)

        if autograd.is_training():
            # return raw predictions
            return (F.concat(*all_box_centers,
                             dim=-2), F.concat(*all_box_scales, dim=-2),
                    F.concat(*all_objectness,
                             dim=-2), F.concat(*all_class_pred, dim=-2))

        result = F.concat(*all_detections, dim=1)
        # apply nms per class
        if self.nms_thresh > 0 and self.nms_thresh < 1:
            result = F.contrib.box_nms(result,
                                       overlap_thresh=self.nms_thresh,
                                       topk=self.nms_topk,
                                       id_index=0,
                                       score_index=1,
                                       coord_start=2,
                                       force_suppress=False)
            if self.post_nms > 0:
                result = result.slice_axis(axis=1, begin=0, end=self.post_nms)
        ids = result.slice_axis(axis=-1, begin=0, end=1)
        scores = result.slice_axis(axis=-1, begin=1, end=2)
        bboxes = result.slice_axis(axis=-1, begin=2, end=None)
        return ids, scores, bboxes
Exemplo n.º 4
0
def net(X):
    X = X.reshape((-1, num_inputs))
    H1 = (nd.dot(X, W1) + b1).relu()
    if autograd.is_training():  # 只在训练的时候丢弃
        H1 = dropout(H1, drop_prob1)  # 在第一次全连接层丢弃
    H2 = (nd.dot(H1, W2) + b2).relu()
    if autograd.is_training():  # 只在训练的时候丢弃
        H2 = dropout(H2, drop_prob2)  # 在第而次全连接层丢弃
    return nd.dot(H2, W3) + b3
Exemplo n.º 5
0
def net(X):
    X = X.reshape(-1, num_inputs)
    H1 = npx.relu(np.dot(X, W1) + b1)
    if autograd.is_training():
        H1 = dropout(H1, drop_prob1)
    H2 = npx.relu(np.dot(H1, W2) + b2)
    if autograd.is_training():
        H2 = dropout(H2, drop_prob2)
    return np.dot(H2, W3) + b3
Exemplo n.º 6
0
def net(X):
    X = X.reshape((-1, num_inputs))
    H1 = (nd.dot(X, W1) + b1).relu()
    if autograd.is_training():
        H1 = dropout(H1, drop_prob1)
    H2 = (nd.dot(H1, W2) + b2).relu()
    if autograd.is_training():
        H2 = dropout(H2, drop_prob2)
    return nd.dot(H2, W3) + b3
Exemplo n.º 7
0
def net(x):
    x = x.reshape((-1, num_inputs))
    h1 = (nd.dot(x, w1) + b1).relu()
    if(autograd.is_training()):  # only drop out in trainning mode
        h1 = dropout(h1, drop_prob1)
    h2 = (nd.dot(h1, w2) + b2).relu()
    if (autograd.is_training()):  # only drop out in trainning mode
        h2 = dropout(h2, drop_prob2)
    return nd.dot(h2, w3) + b3
Exemplo n.º 8
0
def net(X):
    X = X.reshape((-1, num_inputs))
    H1 = (nd.dot(X, W1) + b1).relu()
    if autograd.is_training():  # 只在训练模型时使用丢弃法
        H1 = dropout(H1, drop_prob1)  # 在第一层全连接后添加丢弃层
    H2 = (nd.dot(H1, W2) + b2).relu()
    if autograd.is_training():
        H2 = dropout(H2, drop_prob2)  # 在第二层全连接后添加丢弃层
    return nd.dot(H2, W3) + b3
def net(x):
    x = x.reshape((-1, 784))
    H1 = ( nd.dot(x,W1)+b1 ).relu()
    if autograd.is_training():
        H1 = dropout(H1,drop_prob1)
    H2 = ( nd.dot(H1,W2)+b2 ).relu()
    if autograd.is_training():
        H2 = dropout(H2,drop_prob2)
    return nd.dot(H2, W3) + b3
Exemplo n.º 10
0
def net(X):
    pro1, pro2 = 0.2, 0.5
    X = X.reshape(-1, num_inputs)
    H1 = (nd.dot(X, W1) + b1).relu()
    if autograd.is_training():
        dropout(H1, drop_prob1)
    H2 = nd.dot(H1, W2) + b2
    if autograd.is_training():
        dropout(H2, drop_prob2)
    return nd.dot(H2, W3) + b3
Exemplo n.º 11
0
    def hybrid_forward(self, F, *x):
        if autograd.is_training():
            pre_nms = self.train_pre_nms
            post_nms = self.train_post_nms
        else:
            pre_nms = self.test_pre_nms
            post_nms = self.test_post_nms

        anchors = []
        rpn_pre_nms_proposals = []
        raw_rpn_scores = []
        raw_rpn_boxes = []
        for i, feat in enumerate(x):
            # raw_rpn_score (B, HWN, 1)
            # raw_rpn_box (B, HWN, 4)
            rpn_score, rpn_box, raw_rpn_score, raw_rpn_box = self.head(feat)
            with autograd.pause():
                anchor = self.anchor_generator[i](feat)
                anchor = anchor.reshape((-1, 4))  # (1, N, 4)
                anchors.append(anchor)
                # (B, N, 4)
                rpn_box = self.box_decoder(rpn_box, anchor)
                rpn_box = self.box_clip(rpn_box)
                rpn_pre = F.concat(rpn_score, rpn_box, dim=-1)
                if self.per_level_nms:
                    rpn_pre = F.contrib.box_nms(rpn_pre, overlap_thresh=self.nms_thresh, topk=pre_nms // len(x),
                                                coord_start=1, score_index=0, id_index=-1)

                rpn_pre_nms_proposals.append(rpn_pre)
                raw_rpn_scores.append(raw_rpn_score)
                raw_rpn_boxes.append(raw_rpn_box)

        rpn_pre_nms_proposals = F.concat(*rpn_pre_nms_proposals, dim=1)
        raw_rpn_scores = F.concat(*raw_rpn_scores, dim=1)
        raw_rpn_boxes = F.concat(*raw_rpn_boxes, dim=1)

        with autograd.pause():
            if self.per_level_nms:
                # Sort the proposals by scores. So the overlap_thresh=2
                tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=2, topk=pre_nms + 1, coord_start=1,
                                        score_index=0, id_index=-1)
            else:
                tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=self.nms_thresh, topk=pre_nms,
                                        coord_start=1, score_index=0, id_index=-1)

        result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms)
        rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1)
        rpn_boxes = F.slice_axis(result, axis=-1, begin=1, end=None)

        if autograd.is_training():
            return rpn_scores, rpn_boxes, raw_rpn_scores, raw_rpn_boxes, anchors
        else:
            return rpn_scores, rpn_boxes
Exemplo n.º 12
0
def net(X):
    X = X.reshape((-1, num_inputs))
    H1 = (nd.dot(X, W1) + b1).relu()
    # Use dropout only when training the model
    if autograd.is_training():
        # Add a dropout layer after the first fully connected layer
        H1 = dropout(H1, drop_prob1)
    H2 = (nd.dot(H1, W2) + b2).relu()
    if autograd.is_training():
        # Add a dropout layer after the second fully connected layer
        H2 = dropout(H2, drop_prob2)
    return nd.dot(H2, W3) + b3
Exemplo n.º 13
0
    def net(self, X):
        X = X.reshape(-1, self.num_inputs)

        H1 = (nd.dot(X, self.W1) + self.b1).relu()
        if autograd.is_training():
            H1 = dropout(H1, self.drop_prob1)

        H2 = (nd.dot(H1, self.W2) + self.b2).relu()
        if autograd.is_training():
            H2 = dropout(H2, self.drop_prob2)

        return nd.dot(H2, self.W3) + self.b3
Exemplo n.º 14
0
 def test(self):
     """
     Returns true if this set contains the specified element
     """
     x = nd.arange(4).reshape((4, 1))
     x.attach_grad()
     print(autograd.is_training())
     with autograd.record():
         y = 2 * nd.dot(x.T, x)
         print(autograd.is_training())
     y.backward()
     assert (x.grad - 4 * x).norm().asscalar() == 0
     print(x.grad)
    def hybrid_forward(self, F, anchor, score, bbox_pred, img):
        """
        Generate proposals. Limit to batch-size=1 in current implementation.
        """
        if autograd.is_training():
            pre_nms = self._train_pre_nms
            post_nms = self._train_post_nms
        else:
            pre_nms = self._test_pre_nms
            post_nms = self._test_post_nms

        with autograd.pause():
            # restore bounding boxes
            roi = self._box_decoder(bbox_pred, self._box_to_center(anchor))

            # clip rois to image's boundary
            # roi = F.Custom(roi, img, op_type='bbox_clip_to_image')
            roi = self._clipper(roi, img)

            # remove bounding boxes that don't meet the min_size constraint
            # by setting them to (-1, -1, -1, -1)
            # width = roi.slice_axis(axis=-1, begin=2, end=3)
            # height = roi.slice_axis(axis=-1, begin=3, end=None)
            xmin, ymin, xmax, ymax = roi.split(axis=-1, num_outputs=4)
            width = xmax - xmin
            height = ymax - ymin
            # TODO:(zhreshold), there's im_ratio to handle here, but it requires
            # add' info, and we don't expect big difference
            invalid = (width < self._min_size) + (height < self._min_size)

            # # remove out of bound anchors
            # axmin, aymin, axmax, aymax = F.split(anchor, axis=-1, num_outputs=4)
            # # it's a bit tricky to get right/bottom boundary in hybridblock
            # wrange = F.arange(0, 2560).reshape((1, 1, 1, 2560)).slice_like(
            #    img, axes=(3)).max().reshape((1, 1, 1))
            # hrange = F.arange(0, 2560).reshape((1, 1, 2560, 1)).slice_like(
            #    img, axes=(2)).max().reshape((1, 1, 1))
            # invalid = (axmin < 0) + (aymin < 0) + F.broadcast_greater(axmax, wrange) + \
            #    F.broadcast_greater(aymax, hrange)
            # avoid invalid anchors suppress anchors with 0 confidence
            score = F.where(invalid, F.ones_like(invalid) * -1, score)
            invalid = F.repeat(invalid, axis=-1, repeats=4)
            roi = F.where(invalid, F.ones_like(invalid) * -1, roi)

            # Non-maximum suppression
            pre = F.concat(score, roi, dim=-1)
            tmp = F.contrib.box_nms(pre,
                                    overlap_thresh=self._nms_thresh,
                                    topk=pre_nms,
                                    coord_start=1,
                                    score_index=0,
                                    id_index=-1,
                                    force_suppress=True)

            # slice post_nms number of boxes
            result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms)
            rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1)
            rpn_bbox = F.slice_axis(result, axis=-1, begin=1, end=None)

        return rpn_scores, rpn_bbox
Exemplo n.º 16
0
 def sample_z(self, F, mu, logvar):
     if autograd.is_training():
         std = F.exp(0.5 * logvar)
         eps = F.normal_like(std)
         return (eps * std) + mu
     else:
         return mu
Exemplo n.º 17
0
 def hybrid_forward(self, F, x: Union[mx.nd.NDArray, mx.sym.Symbol], *args, **kwargs):
     x = self.backbone(x)
     x = self.neck(x)
     x = self.head(x)
     if autograd.is_training():
         return x
     return self.generate_result(F, x[0])
Exemplo n.º 18
0
 def feature_detect(self, tag_inputs, word_inputs, bert):
     is_train = autograd.is_training()
     batch_size = word_inputs.shape[1]
     seq_len = word_inputs.shape[0]
     # unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK)
     if self.pret_word_embs is not None:
         word_embs = self.pret_word_embs(nd.array(word_inputs))
         if bert is not None:
             word_embs = nd.concat(word_embs, nd.array(bert), dim=2)
     else:
         word_embs = nd.array(bert)
     tag_embs = self.tag_embs(nd.array(tag_inputs)) if self.tag_embs is not None else None
     # Dropout
     if is_train:
         wm, tm = self.generate_emb_mask(seq_len, batch_size)
         if self.tag_embs is not None:
             emb_inputs = nd.concat(nd.multiply(wm, word_embs), nd.multiply(tm, tag_embs), dim=2)
         else:
             emb_inputs = nd.multiply(wm, word_embs)
     else:
         if self.tag_embs is not None:
             emb_inputs = nd.concat(word_embs, tag_embs, dim=2)  # seq_len x batch_size
         else:
             emb_inputs = word_embs
     top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size,
                        dropout_x=self.dropout_lstm_input if is_train else 0)
     return top_recur
Exemplo n.º 19
0
    def biaffine(self, dep_arc, head_arc, mask, arc_targets):
        is_train = autograd.is_training()
        batch_size = mask.shape[1]
        seq_len = mask.shape[0]
        W_arc = self.arc_W.data()
        arc_logits = bilinear(dep_arc,
                              W_arc,
                              head_arc,
                              self.mlp_arc_size,
                              seq_len,
                              batch_size,
                              num_outputs=1,
                              bias_x=True,
                              bias_y=False)  # type: nd.NDArray
        # #head x #dep x batch_size
        if not is_train:
            return arc_logits
        # (#head x #dep) x batch_size
        flat_arc_logits = reshape_fortran(arc_logits,
                                          (seq_len, seq_len * batch_size))
        # (#head ) x (#dep x batch_size)
        flat_arc_targets = reshape_fortran(arc_targets,
                                           (seq_len, seq_len * batch_size))
        losses = self.binary_ce_loss(flat_arc_logits,
                                     nd.array(flat_arc_targets))
        mask_1D_tensor = nd.array(flatten_numpy(mask))
        arc_loss = nd.sum(losses * mask_1D_tensor) / mask_1D_tensor.sum()

        return arc_logits, arc_loss
Exemplo n.º 20
0
 def hybrid_forward(self, F, x):
     """Hybrid forward"""
     features = self.features(x)
     cls_preds = [F.flatten(F.transpose(cp(feat), (0, 2, 3, 1)))
                  for feat, cp in zip(features, self.class_predictors)]
     box_preds = [F.flatten(F.transpose(bp(feat), (0, 2, 3, 1)))
                  for feat, bp in zip(features, self.box_predictors)]
     anchors = [F.reshape(ag(feat), shape=(1, -1))
                for feat, ag in zip(features, self.anchor_generators)]
     cls_preds = F.concat(*cls_preds, dim=1).reshape((0, -1, self.num_classes + 1))
     box_preds = F.concat(*box_preds, dim=1).reshape((0, -1, 4))
     anchors = F.concat(*anchors, dim=1).reshape((1, -1, 4))
     if autograd.is_training():
         return [cls_preds, box_preds, anchors]
     bboxes = self.bbox_decoder(box_preds, anchors)
     cls_ids, scores = self.cls_decoder(F.softmax(cls_preds, axis=-1))
     results = []
     for i in range(self.num_classes):
         cls_id = cls_ids.slice_axis(axis=-1, begin=i, end=i+1)
         score = scores.slice_axis(axis=-1, begin=i, end=i+1)
         # per class results
         per_result = F.concat(*[cls_id, score, bboxes], dim=-1)
         results.append(per_result)
     result = F.concat(*results, dim=1)
     if self.nms_thresh > 0 and self.nms_thresh < 1:
         result = F.contrib.box_nms(
             result, overlap_thresh=self.nms_thresh, topk=self.nms_topk, valid_thresh=0.01,
             id_index=0, score_index=1, coord_start=2, force_suppress=False)
         if self.post_nms > 0:
             result = result.slice_axis(axis=1, begin=0, end=self.post_nms)
     ids = F.slice_axis(result, axis=2, begin=0, end=1)
     scores = F.slice_axis(result, axis=2, begin=1, end=2)
     bboxes = F.slice_axis(result, axis=2, begin=2, end=6)
     return ids, scores, bboxes
Exemplo n.º 21
0
    def hybrid_forward(self, F, x, anchors, offsets):
        # x ==> (B, pred per pixel, height*width)
        pred = self.prediction(x).reshape((0, self._num_anchors*self._num_pred, -1))
        pred = F.transpose(pred, (0, 2, 1)).reshape((0, -1, self._num_anchors, self._num_pred))
        # components
        raw_box_centers = pred.slice_axis(axis=-1, begin=0, end=2)
        raw_box_scales = pred.slice_axis(axis=-1, begin=2, end=4)
        objness = pred.slice_axis(axis=-1, begin=4, end=5)
        class_pred = pred.slice_axis(axis=-1, begin=5, end=None)

        # get offsets
        # (1, 1, n, n, 2) ==> (1, 1, height, width, 2)
        offsets = F.slice_like(offsets, x*0, axes=(2, 3))
        # (1, 1, height, width, 2) ==> (1, height*width, 1, 2)
        offsets = F.reshape(offsets, (1, -1, 1, 2))

        box_centers = F.broadcast_add(F.sigmoid(raw_box_centers), offsets)*self._strides
        box_scales = F.broadcast_mul(F.exp(raw_box_scales), anchors)
        confidence = F.sigmoid(objness)
        class_score = F.broadcast_mul(confidence, F.sigmoid(class_pred))
        wh = box_scales/2
        bbox = F.concat(box_centers - wh, box_centers + wh, dim=-1)
        bbox = F.reshape(bbox, (0, -1, 4))

        if autograd.is_training():
            return bbox, raw_box_centers, raw_box_scales, objness, class_pred, anchors, offsets
Exemplo n.º 22
0
    def hybrid_forward(self, F, x, img):
        """Forward RPN.

        The behavior during traing and inference is different.

        Parameters
        ----------
        x : mxnet.nd.NDArray or mxnet.symbol
            Feature tensor.
        img : mxnet.nd.NDArray or mxnet.symbol
            The original input image.

        Returns
        -------
        (rpn_score, rpn_box)
            Returns predicted scores and regions which are candidates of objects.

        """
        anchors = self.anchor_generator(x)
        x = self.conv1(x)
        raw_rpn_scores = self.score(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 1))
        rpn_scores = F.sigmoid(F.stop_gradient(raw_rpn_scores))
        rpn_box_pred = self.loc(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 4))
        rpn_score, rpn_box = self.region_proposaler(
            anchors, rpn_scores, F.stop_gradient(rpn_box_pred), img)
        if autograd.is_training():
            # return raw predictions as well in training for bp
            return rpn_score, rpn_box, raw_rpn_scores, rpn_box_pred, anchors
        return rpn_score, rpn_box
Exemplo n.º 23
0
 def hybrid_forward(self, F, X):
     mu, logvar = self.encode(X)
     if autograd.is_training():
         std = F.exp(0.5 * logvar)
         eps = F.random.normal_like(std)
         mu = (eps * std) + mu
     return self.decode(mu), mu, logvar
Exemplo n.º 24
0
 def forward(self, input):
     hidden = self.dropout(self.encoder(input))
     pred = self.decoder(hidden)
     if autograd.is_training():
         return pred * np.sign(input)
     else:
         return pred
Exemplo n.º 25
0
 def forward(self, x):
     x = self.layer(x)
     # Note that the loss function has the sigmoid operation for better numerical stability. When
     # doing inference, we need to add the sigmoid function to the model.
     if not autograd.is_training():
         x = nd.sigmoid(x)
     return x
Exemplo n.º 26
0
    def hybrid_forward(self, F, x):
        routes = []
        for stage in self.stages:
            x = stage(x)
            routes.append(x)
            
        all_dets = []
        all_box_centers = []
        all_box_scales = []
        all_objectness = []
        all_class_pred = []
        all_anchors = []
        all_offsets = []
        for i, block, output in zip(range(len(routes)), self.blocks, self.outputs):
            x, tip = block(x)
            print ('tip shape: {}'.format(tip.shape))
            if autograd.is_training():
                dets, box_centers, box_scales, objness, class_pred, anchors, offsets = output(tip)
                all_dets.append(dets)
                all_box_centers.append(box_centers)
                all_box_scales.append(box_scales)
                all_objectness.append(objness)
                all_class_pred.append(class_pred)
                all_anchors.append(anchors)
                all_offsets.append(offsets)
            if i >= len(routes) - 1:
                break
            x = self.transitions[i](x)
            upsample = _upsample(x, stride=2)
            route_now = routes[::-1][i + 1]
            x = F.concat(F.slice_like(upsample, route_now * 0, axes=(2, 3)), route_now, dim=1)

        return all_dets, all_box_centers, all_box_scales, all_objectness, all_class_pred, all_anchors, all_offsets
Exemplo n.º 27
0
def batch_norm(X, gamma, bata, moving_mean, moving_var, eps, momentum):
    """
    moving_mean 在训练阶段并不使用,而是作为推理阶段的均值和方差,进行BN。
    在训练阶段,会对 X 通过求解具体的 mean 和 var 获得 BN。
    在训练阶段,卷积会对每个通道独立求均值和方差,并且该均值和方差作为当前通道进行处理。
    """
    if not autograd.is_training():
        # 训练时,使用移动均值和方差处理样本
        X_hat = (X - moving_mean) / nd.sqrt(moving_var + eps)
    else:
        # 预测时,使用小批量样本的平均均值和方差处理样本
        assert len(X.shape) in (2, 4)
        if len(X.shape) == 2:
            # 使用全连接层时,计算特征维上的均值和方差
            mean = X.mean(axis=0)
            var = ((X - mean) ** 2).mean(axis=0)
        else:
            # 使用二维卷积层的情况, 计算通道上(axis=1)的均值和方差。这里我们需要保持
            # X的形状以便后面做广播运算。
            mean = X.mean(axis=(0, 2, 3), keepdims=True)
            var = ((X - mean) ** 2).mean(axis=(0, 2, 3), keepdims=True)

        # 训练模式下用当前的均值和方差做标准化
        X_hat = (X - mean) / nd.sqrt(var + eps)
        moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
        moving_var = momentum * moving_var + (1.0 - momentum) * mean
    Y = gamma * X_hat + bata  # 拉伸和偏移
    return Y, moving_mean, moving_var
Exemplo n.º 28
0
 def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var):
     """Hybrid forward"""
     if not autograd.is_training():
         return F.BatchNorm(x,
                            gamma,
                            beta,
                            running_mean,
                            running_var,
                            name='fwd',
                            **self._kwargs)
     isum, isqu = F.SumSquare(x)
     #isum = x.sum(axis=1, exclude=True)
     #isqu = (x**2).sum(axis=1, exclude=True)
     N = self.ndevices * x.shape[0] * x.shape[2] * x.shape[3]
     allreduce = AllReduce(self._prefix)
     osum, osqu = allreduce(isum, isqu)
     # calc mean and std
     mean = osum / N
     sumvar = osqu - osum * osum / N
     bias_var = sumvar / N
     std = F.sqrt(F.maximum(bias_var, self.eps))
     # update running mean and var
     with autograd.pause():
         unbias_var = sumvar / (N - 1)
         self.updater(self.running_mean, self.running_var, mean, unbias_var,
                      self.momentum, x.context)
     # update running mean and var
     output = F.DecoupleBatchNorm(x, gamma, beta, mean, std)
     return output
def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum):
    # 通过auto grad来判断当前模式为训练模式还是预测模式
    if not autograd.is_training():
        #在预测模式下,直接使用传入的移动平均所得的均值和方差

        # 标准化
        X_hat = (X - moving_mean) / nd.sqrt(moving_var + eps)
    else:
        assert len(X.shape) in (2, 4)
        if len(X.shape) == 2:  # 若为2维,则上面连接的是全连接层
            # 在使用全连接层的情况下,计算特征维上的均值和方差
            mean = X.mean(axis=0)
            var = ((X - mean)**2).mean(axis=0)  # axis = 0 就是指在竖直方向上
        else:  # 若不是二维,则上面连接的就是卷积层

            # 在使用二维卷积层的情况下,计算通道维上(axis=1)的均值和方差。
            # 这里我们需要保持X的形状以便后面可以做广播运算

            # 卷积层中的输入由4个维度,从1到4分别是 样本序号,通道数,高,宽
            # 这里对通道数这一维并不做均值计算

            mean = X.mean(axis=(0, 2, 3), keepdims=True)
            var = ((X - mean)**2).mean(axis=(0, 2, 3), keepdims=True)
        # 训练模式下用当前的均值和方差做标准化
        X_hat = (X - mean) / nd.sqrt(var + eps)
        # 更新移动平均的均值和方差
        moving_mean = momentum * moving_mean + (1.0 - momentum) * mean
        moving_var = momentum * moving_var + (1.0 - momentum) * var
    Y = gamma * X_hat + beta
    return Y, moving_mean, moving_var
Exemplo n.º 30
0
    def hybrid_forward(self, F, x, img):
        """Forward RPN.

        The behavior during traing and inference is different.

        Parameters
        ----------
        x : mxnet.nd.NDArray or mxnet.symbol
            Feature tensor.
        img : mxnet.nd.NDArray or mxnet.symbol
            The original input image.

        Returns
        -------
        (rpn_score, rpn_box)
            Returns predicted scores and regions which are candidates of objects.

        """
        anchors = self.anchor_generator(x)
        x = self.conv1(x)
        raw_rpn_scores = self.score(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 1))
        rpn_scores = F.sigmoid(raw_rpn_scores)
        rpn_box_pred = self.loc(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 4))
        rpn_score, rpn_box = self.region_proposaler(
            anchors, rpn_scores, rpn_box_pred, img)
        if autograd.is_training():
            # return raw predictions as well in training for bp
            return rpn_score, rpn_box, raw_rpn_scores, rpn_box_pred, anchors
        return rpn_score, rpn_box
Exemplo n.º 31
0
    def hybrid_forward(self, F, x, **kwargs):
        features = self.features(x)
        cls_preds = [F.flatten(F.transpose(cp(feat), (0, 2, 3, 1)))
                     for feat, cp in zip(features, self.class_predictors)]
        box_preds = [F.flatten(F.transpose(bp(feat), (0, 2, 3, 1)))
                     for feat, bp in zip(features, self.box_predictors)]
        anchors = [F.reshape(ag(feat), shape=(1, -1))
                   for feat, ag in zip(features, self.anchor_generators)]
        cls_preds = F.concat(*cls_preds, dim=1).reshape((0, -1, self.num_classes + 1))
        box_preds = F.concat(*box_preds, dim=1).reshape((0, -1, 4))
        anchors = F.concat(*anchors, dim=1).reshape((1, -1, 4))

        if autograd.is_training():
            return [cls_preds, box_preds, anchors]
        bboxes = self.bbox_decoder(box_preds, anchors)
        cls_ids, scores = self.cls_decoder(F.softmax(cls_preds, axis=-1))
        results = []
        for i in range(self.num_classes):
            cls_id = cls_ids.slice_axis(axis=-1, begin=i, end=i+1)
            score = scores.slice_axis(axis=-1, begin=i, end=i+1)
            # per class results
            per_result = F.concat(*[cls_id, score, bboxes], dim=-1)
            results.append(per_result)
        result = F.concat(*results, dim=1)
        if self.nms_thresh > 0 and self.nms_thresh < 1:
            result = F.contrib.box_nms(
                result, overlap_thresh=self.nms_thresh, topk=self.nms_topk, valid_thresh=0.01,
                id_index=0, score_index=1, coord_start=2, force_suppress=False)
            if self.post_nms > 0:
                result = result.slice_axis(axis=1, begin=0, end=self.post_nms)
        ids = F.slice_axis(result, axis=2, begin=0, end=1)
        scores = F.slice_axis(result, axis=2, begin=1, end=2)
        bboxes = F.slice_axis(result, axis=2, begin=2, end=6)
        return ids, scores, bboxes
    def hybrid_forward(self, F, x):
        print('F: ', F)
        print('x: ', x)
        x = self.features(x)
        x = self.deconv_layers(x)
        ret = []
        # 2dpose task -> 0: hm, 1: wh, 2: hps, 3: reg, 4: hm_hp, 5:hp_offset
        for head in self.heads:
            ret.append(self.__getattribute__(head)(x))

        if autograd.is_training():
            # during training, just need to return the tensor for computing loss
            print("training mode")
            #return [ret]
            return ret
        else:
            # during inference, need to decode the output tensor into actual detections
            # detections is composed of several things: detections = nd.concat(bboxes, scores, kps, clses, dim=2)
            # detections = decode_centernet_pose(heat, wh, kps, reg, hm_hp, hp_offset, K=100)
            print("inference mode")
            #detections = symbolic_decode_centernet_pose(F, ret[0].sigmoid(), ret[1], ret[2], ret[3], ret[4].sigmoid(), ret[5], K=10)
            detections = symbolic_decode_centernet_pose(F,
                                                        ret[0].sigmoid(),
                                                        ret[1],
                                                        ret[2],
                                                        K=10)
            print("decode finished!")
            detections.save("symbol-detections.json")
            return detections
Exemplo n.º 33
0
    def hybrid_forward(self, F, x, anchors, offsets):
        """Hybrid Foward of YOLOV3Output layer.

        Parameters
        ----------
        F : mxnet.nd or mxnet.sym
            `F` is mxnet.sym if hybridized or mxnet.nd if not.
        x : mxnet.nd.NDArray
            Input feature map.
        anchors : mxnet.nd.NDArray
            Anchors loaded from self, no need to supply.
        offsets : mxnet.nd.NDArray
            Offsets loaded from self, no need to supply.

        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            During training, return (bbox, raw_box_centers, raw_box_scales, objness,
            class_pred, anchors, offsets).
            During inference, return detections.

        """
        # prediction flat to (batch, pred per pixel, height * width)
        pred = self.prediction(x).reshape((0, self._num_anchors * self._num_pred, -1))
        # transpose to (batch, height * width, num_anchor, num_pred)
        pred = pred.transpose(axes=(0, 2, 1)).reshape((0, -1, self._num_anchors, self._num_pred))
        # components
        raw_box_centers = pred.slice_axis(axis=-1, begin=0, end=2)
        raw_box_scales = pred.slice_axis(axis=-1, begin=2, end=4)
        objness = pred.slice_axis(axis=-1, begin=4, end=5)
        class_pred = pred.slice_axis(axis=-1, begin=5, end=None)

        # valid offsets, (1, 1, height, width, 2)
        offsets = F.slice_like(offsets, x * 0, axes=(2, 3))
        # reshape to (1, height*width, 1, 2)
        offsets = offsets.reshape((1, -1, 1, 2))

        box_centers = F.broadcast_add(F.sigmoid(raw_box_centers), offsets) * self._stride
        box_scales = F.broadcast_mul(F.exp(raw_box_scales), anchors)
        confidence = F.sigmoid(objness)
        class_score = F.broadcast_mul(F.sigmoid(class_pred), confidence)
        wh = box_scales / 2.0
        bbox = F.concat(box_centers - wh, box_centers + wh, dim=-1)

        if autograd.is_training():
            # during training, we don't need to convert whole bunch of info to detection results
            return (bbox.reshape((0, -1, 4)), raw_box_centers, raw_box_scales,
                    objness, class_pred, anchors, offsets)

        # prediction per class
        bboxes = F.tile(bbox, reps=(self._classes, 1, 1, 1, 1))
        scores = F.transpose(class_score, axes=(3, 0, 1, 2)).expand_dims(axis=-1)
        ids = F.broadcast_add(scores * 0, F.arange(0, self._classes).reshape((0, 1, 1, 1, 1)))
        detections = F.concat(ids, scores, bboxes, dim=-1)
        # reshape to (B, xx, 6)
        detections = F.reshape(detections.transpose(axes=(1, 0, 2, 3, 4)), (0, -1, 6))
        return detections
Exemplo n.º 34
0
    def hybrid_forward(self, F, anchor, score, bbox_pred, img):
        """
        Generate proposals. Limit to batch-size=1 in current implementation.
        """
        if autograd.is_training():
            pre_nms = self._train_pre_nms
            post_nms = self._train_post_nms
        else:
            pre_nms = self._test_pre_nms
            post_nms = self._test_post_nms

        with autograd.pause():
            # restore bounding boxes
            roi = self._box_decoder(bbox_pred, self._box_to_center(anchor))

            # clip rois to image's boundary
            # roi = F.Custom(roi, img, op_type='bbox_clip_to_image')
            roi = self._clipper(roi, img)

            # remove bounding boxes that don't meet the min_size constraint
            # by setting them to (-1, -1, -1, -1)
            # width = roi.slice_axis(axis=-1, begin=2, end=3)
            # height = roi.slice_axis(axis=-1, begin=3, end=None)
            xmin, ymin, xmax, ymax = roi.split(axis=-1, num_outputs=4)
            width = xmax - xmin
            height = ymax - ymin
            # TODO:(zhreshold), there's im_ratio to handle here, but it requires
            # add' info, and we don't expect big difference
            invalid = (width < self._min_size) + (height < self._min_size)

            # # remove out of bound anchors
            # axmin, aymin, axmax, aymax = F.split(anchor, axis=-1, num_outputs=4)
            # # it's a bit tricky to get right/bottom boundary in hybridblock
            # wrange = F.arange(0, 2560).reshape((1, 1, 1, 2560)).slice_like(
            #    img, axes=(3)).max().reshape((1, 1, 1))
            # hrange = F.arange(0, 2560).reshape((1, 1, 2560, 1)).slice_like(
            #    img, axes=(2)).max().reshape((1, 1, 1))
            # invalid = (axmin < 0) + (aymin < 0) + F.broadcast_greater(axmax, wrange) + \
            #    F.broadcast_greater(aymax, hrange)
            # avoid invalid anchors suppress anchors with 0 confidence
            score = F.where(invalid, F.ones_like(invalid) * -1, score)
            invalid = F.repeat(invalid, axis=-1, repeats=4)
            roi = F.where(invalid, F.ones_like(invalid) * -1, roi)

            # Non-maximum suppression
            pre = F.concat(score, roi, dim=-1)
            tmp = F.contrib.box_nms(pre, overlap_thresh=self._nms_thresh, topk=pre_nms,
                                    coord_start=1, score_index=0, id_index=-1, force_suppress=True)

            # slice post_nms number of boxes
            result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms)
            rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1)
            rpn_bbox = F.slice_axis(result, axis=-1, begin=1, end=None)

        return rpn_scores, rpn_bbox
Exemplo n.º 35
0
 def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var):
     """Hybrid forward"""
     if not autograd.is_training():
         return F.BatchNorm(x, gamma, beta, running_mean, running_var, name='fwd',
                            **self._kwargs)
     isum, isqu = F.SumSquare(x)
     #isum = x.sum(axis=1, exclude=True)
     #isqu = (x**2).sum(axis=1, exclude=True)
     N = self.ndevices * x.shape[0] * x.shape[2] * x.shape[3]
     allreduce = AllReduce(self._prefix)
     osum, osqu = allreduce(isum, isqu)
     # calc mean and std
     mean = osum / N
     sumvar = osqu - osum * osum / N
     bias_var = sumvar / N
     std = F.sqrt(F.maximum(bias_var, self.eps))
     # update running mean and var
     with autograd.pause():
         unbias_var = sumvar / (N - 1)
         self.updater(self.running_mean, self.running_var, mean, unbias_var,
                      self.momentum, x.context)
     # update running mean and var
     output = F.DecoupleBatchNorm(x, gamma, beta, mean, std)
     return output
Exemplo n.º 36
0
    def hybrid_forward(self, F, x, gt_box=None):
        """Forward Mask RCNN network.

        The behavior during training and inference is different.

        Parameters
        ----------
        x : mxnet.nd.NDArray or mxnet.symbol
            The network input tensor.
        gt_box : type, only required during training
            The ground-truth bbox tensor with shape (1, N, 4).

        Returns
        -------
        (ids, scores, bboxes, masks)
            During inference, returns final class id, confidence scores, bounding
            boxes, segmentation masks.

        """
        if autograd.is_training():
            cls_pred, box_pred, rpn_box, samples, matches, \
            raw_rpn_score, raw_rpn_box, anchors, top_feat = \
                super(MaskRCNN, self).hybrid_forward(F, x, gt_box)
            mask_pred = self.mask(top_feat)
            return cls_pred, box_pred, mask_pred, rpn_box, samples, matches, \
                   raw_rpn_score, raw_rpn_box, anchors
        else:
            ids, scores, boxes, feat = \
                super(MaskRCNN, self).hybrid_forward(F, x)

            # (B, N * (C - 1), 1) -> (B, N * (C - 1)) -> (B, topk)
            num_rois = self._rcnn_max_dets
            order = F.argsort(scores.squeeze(axis=-1), axis=1, is_ascend=False)
            topk = F.slice_axis(order, axis=1, begin=0, end=num_rois)

            # pick from (B, N * (C - 1), X) to (B * topk, X) -> (B, topk, X)
            # roi_batch_id = F.arange(0, self._max_batch, repeat=num_rois)
            roi_batch_id = F.arange(0, self._max_batch)
            roi_batch_id = F.repeat(roi_batch_id, num_rois)
            indices = F.stack(roi_batch_id, topk.reshape((-1,)), axis=0)
            ids = F.gather_nd(ids, indices).reshape((-4, self._max_batch, num_rois, 1))
            scores = F.gather_nd(scores, indices).reshape((-4, self._max_batch, num_rois, 1))
            boxes = F.gather_nd(boxes, indices).reshape((-4, self._max_batch, num_rois, 4))

            # create batch id and reshape for roi pooling
            padded_rois = F.concat(roi_batch_id.reshape((-1, 1)), boxes.reshape((-3, 0)), dim=-1)
            padded_rois = F.stop_gradient(padded_rois)

            # pool to roi features
            if self.num_stages > 1:
                # using FPN
                pooled_feat = self._pyramid_roi_feats(F, feat, padded_rois, self._roi_size,
                                                      self._strides, roi_mode=self._roi_mode)
            else:
                if self._roi_mode == 'pool':
                    pooled_feat = F.ROIPooling(
                        feat[0], padded_rois, self._roi_size, 1. / self._strides)
                elif self._roi_mode == 'align':
                    pooled_feat = F.contrib.ROIAlign(
                        feat[0], padded_rois, self._roi_size, 1. / self._strides, sample_ratio=2)
                else:
                    raise ValueError("Invalid roi mode: {}".format(self._roi_mode))

            # run top_features again
            if self.top_features is not None:
                top_feat = self.top_features(pooled_feat)
            else:
                top_feat = pooled_feat
            # (B, N, C, pooled_size * 2, pooled_size * 2)
            rcnn_mask = self.mask(top_feat)
            # index the B dimension (B * N,)
            # batch_ids = F.arange(0, self._max_batch, repeat=num_rois)
            batch_ids = F.arange(0, self._max_batch)
            batch_ids = F.repeat(batch_ids, num_rois)
            # index the N dimension (B * N,)
            roi_ids = F.tile(F.arange(0, num_rois), reps=self._max_batch)
            # index the C dimension (B * N,)
            class_ids = ids.reshape((-1,))
            # clip to 0 to max class
            class_ids = F.clip(class_ids, 0, self.num_class)
            # pick from (B, N, C, PS*2, PS*2) -> (B * N, PS*2, PS*2)
            indices = F.stack(batch_ids, roi_ids, class_ids, axis=0)
            masks = F.gather_nd(rcnn_mask, indices)
            # (B * N, PS*2, PS*2) -> (B, N, PS*2, PS*2)
            masks = masks.reshape((-4, self._max_batch, num_rois, 0, 0))
            # output prob
            masks = F.sigmoid(masks)

            # ids (B, N, 1), scores (B, N, 1), boxes (B, N, 4), masks (B, N, PS*2, PS*2)
            return ids, scores, boxes, masks
Exemplo n.º 37
0
    def hybrid_forward(self, F, x, gt_box=None):
        """Forward Faster-RCNN network.

        The behavior during traing and inference is different.

        Parameters
        ----------
        x : mxnet.nd.NDArray or mxnet.symbol
            The network input tensor.
        gt_box : type, only required during training
            The ground-truth bbox tensor with shape (1, N, 4).

        Returns
        -------
        (ids, scores, bboxes)
            During inference, returns final class id, confidence scores, bounding
            boxes.

        """
        def _split(x, axis, num_outputs, squeeze_axis):
            x = F.split(x, axis=axis, num_outputs=num_outputs, squeeze_axis=squeeze_axis)
            if isinstance(x, list):
                return x
            else:
                return [x]

        feat = self.features(x)
        # RPN proposals
        if autograd.is_training():
            rpn_score, rpn_box, raw_rpn_score, raw_rpn_box, anchors = \
                self.rpn(feat, F.zeros_like(x))
            rpn_box, samples, matches = self.sampler(rpn_box, rpn_score, gt_box)
        else:
            _, rpn_box = self.rpn(feat, F.zeros_like(x))

        # create batchid for roi
        num_roi = self._num_sample if autograd.is_training() else self._rpn_test_post_nms
        with autograd.pause():
            roi_batchid = F.arange(0, self._max_batch, repeat=num_roi)
            # remove batch dim because ROIPooling require 2d input
            rpn_roi = F.concat(*[roi_batchid.reshape((-1, 1)), rpn_box.reshape((-1, 4))], dim=-1)
            rpn_roi = F.stop_gradient(rpn_roi)

        # ROI features
        if self._roi_mode == 'pool':
            pooled_feat = F.ROIPooling(feat, rpn_roi, self._roi_size, 1. / self._stride)
        elif self._roi_mode == 'align':
            pooled_feat = F.contrib.ROIAlign(feat, rpn_roi, self._roi_size, 1. / self._stride,
                                             sample_ratio=2)
        else:
            raise ValueError("Invalid roi mode: {}".format(self._roi_mode))

        # RCNN prediction
        top_feat = self.top_features(pooled_feat)
        avg_feat = self.global_avg_pool(top_feat)
        cls_pred = self.class_predictor(avg_feat)
        box_pred = self.box_predictor(avg_feat)
        # cls_pred (B * N, C) -> (B, N, C)
        cls_pred = cls_pred.reshape((self._max_batch, num_roi, self.num_class + 1))
        # box_pred (B * N, C * 4) -> (B, N, C, 4)
        box_pred = box_pred.reshape((self._max_batch, num_roi, self.num_class, 4))

        # no need to convert bounding boxes in training, just return
        if autograd.is_training():
            if self._additional_output:
                return (cls_pred, box_pred, rpn_box, samples, matches,
                        raw_rpn_score, raw_rpn_box, anchors, top_feat)
            return (cls_pred, box_pred, rpn_box, samples, matches,
                    raw_rpn_score, raw_rpn_box, anchors)

        # cls_ids (B, N, C), scores (B, N, C)
        cls_ids, scores = self.cls_decoder(F.softmax(cls_pred, axis=-1))
        # cls_ids, scores (B, N, C) -> (B, C, N) -> (B, C, N, 1)
        cls_ids = cls_ids.transpose((0, 2, 1)).reshape((0, 0, 0, 1))
        scores = scores.transpose((0, 2, 1)).reshape((0, 0, 0, 1))
        # box_pred (B, N, C, 4) -> (B, C, N, 4)
        box_pred = box_pred.transpose((0, 2, 1, 3))

        # rpn_boxes (B, N, 4) -> B * (1, N, 4)
        rpn_boxes = _split(rpn_box, axis=0, num_outputs=self._max_batch, squeeze_axis=False)
        # cls_ids, scores (B, C, N, 1) -> B * (C, N, 1)
        cls_ids = _split(cls_ids, axis=0, num_outputs=self._max_batch, squeeze_axis=True)
        scores = _split(scores, axis=0, num_outputs=self._max_batch, squeeze_axis=True)
        # box_preds (B, C, N, 4) -> B * (C, N, 4)
        box_preds = _split(box_pred, axis=0, num_outputs=self._max_batch, squeeze_axis=True)

        # per batch predict, nms, each class has topk outputs
        results = []
        for rpn_box, cls_id, score, box_pred in zip(rpn_boxes, cls_ids, scores, box_preds):
            # box_pred (C, N, 4) rpn_box (1, N, 4) -> bbox (C, N, 4)
            bbox = self.box_decoder(box_pred, self.box_to_center(rpn_box))
            # res (C, N, 6)
            res = F.concat(*[cls_id, score, bbox], dim=-1)
            # res (C, self.nms_topk, 6)
            res = F.contrib.box_nms(
                res, overlap_thresh=self.nms_thresh, topk=self.nms_topk, valid_thresh=0.0001,
                id_index=0, score_index=1, coord_start=2, force_suppress=True)
            # res (C * self.nms_topk, 6)
            res = res.reshape((-3, 0))
            results.append(res)

        # result B * (C * topk, 6) -> (B, C * topk, 6)
        result = F.stack(*results, axis=0)
        ids = F.slice_axis(result, axis=-1, begin=0, end=1)
        scores = F.slice_axis(result, axis=-1, begin=1, end=2)
        bboxes = F.slice_axis(result, axis=-1, begin=2, end=6)
        if self._additional_output:
            return ids, scores, bboxes, feat
        return ids, scores, bboxes
Exemplo n.º 38
0
    def hybrid_forward(self, F, img, *x):
        """Forward RPN.

        The behavior during training and inference is different.

        Parameters
        ----------
        img : mxnet.nd.NDArray or mxnet.symbol
            The original input image.
        x : mxnet.nd.NDArray or mxnet.symbol(s)
            Feature tensor(s).

        Returns
        -------
        (rpn_score, rpn_box)
            Returns predicted scores and regions which are candidates of objects.

        """
        if autograd.is_training():
            pre_nms = self._train_pre_nms
            post_nms = self._train_post_nms
        else:
            pre_nms = self._test_pre_nms
            post_nms = self._test_post_nms
        anchors = []
        rpn_pre_nms_proposals = []
        raw_rpn_scores = []
        raw_rpn_boxes = []
        if self._multi_level:
            # Generate anchors in [P2, P3, P4, P5, P6] order
            for i, feat in enumerate(x):
                ag = self.anchor_generator[i]
                anchor = ag(feat)
                rpn_score, rpn_box, raw_rpn_score, raw_rpn_box = \
                    self.rpn_head(feat)
                rpn_pre = self.region_proposer(anchor, rpn_score,
                                               rpn_box, img)
                anchors.append(anchor)
                rpn_pre_nms_proposals.append(rpn_pre)
                raw_rpn_scores.append(raw_rpn_score)
                raw_rpn_boxes.append(raw_rpn_box)
            rpn_pre_nms_proposals = F.concat(*rpn_pre_nms_proposals, dim=1)
            raw_rpn_scores = F.concat(*raw_rpn_scores, dim=1)
            raw_rpn_boxes = F.concat(*raw_rpn_boxes, dim=1)
        else:
            x = x[0]
            anchors = self.anchor_generator(x)
            x = self.conv1(x)
            raw_rpn_scores = self.score(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 1))
            rpn_scores = F.sigmoid(F.stop_gradient(raw_rpn_scores))
            raw_rpn_boxes = self.loc(x).transpose(axes=(0, 2, 3, 1)).reshape((0, -1, 4))
            rpn_boxes = F.stop_gradient(raw_rpn_boxes)
            rpn_pre_nms_proposals = self.region_proposer(
                anchors, rpn_scores, rpn_boxes, img)

        # Non-maximum suppression
        with autograd.pause():
            tmp = F.contrib.box_nms(rpn_pre_nms_proposals, overlap_thresh=self._nms_thresh,
                                    topk=pre_nms, coord_start=1, score_index=0, id_index=-1,
                                    force_suppress=True)

            # slice post_nms number of boxes
            result = F.slice_axis(tmp, axis=1, begin=0, end=post_nms)
            rpn_scores = F.slice_axis(result, axis=-1, begin=0, end=1)
            rpn_boxes = F.slice_axis(result, axis=-1, begin=1, end=None)

        if autograd.is_training():
            # return raw predictions as well in training for bp
            return rpn_scores, rpn_boxes, raw_rpn_scores, raw_rpn_boxes, anchors
        return rpn_scores, rpn_boxes
Exemplo n.º 39
0
    def forward(self, word_inputs, tag_inputs, arc_targets=None, rel_targets=None):
        """Run decoding

        Parameters
        ----------
        word_inputs : mxnet.ndarray.NDArray
            word indices of seq_len x batch_size
        tag_inputs : mxnet.ndarray.NDArray
            tag indices of seq_len x batch_size
        arc_targets : mxnet.ndarray.NDArray
            gold arc indices of seq_len x batch_size
        rel_targets : mxnet.ndarray.NDArray
            gold rel indices of seq_len x batch_size
        Returns
        -------
        tuple
            (arc_accuracy, rel_accuracy, overall_accuracy, loss) when training, else if given gold target
        then return arc_accuracy, rel_accuracy, overall_accuracy, outputs, otherwise return outputs, where outputs is a
        list of (arcs, rels).
        """
        is_train = autograd.is_training()

        def flatten_numpy(ndarray):
            """Flatten nd-array to 1-d column vector

            Parameters
            ----------
            ndarray : numpy.ndarray
                input tensor

            Returns
            -------
            numpy.ndarray
                A column vector

            """
            return np.reshape(ndarray, (-1,), 'F')

        batch_size = word_inputs.shape[1]
        seq_len = word_inputs.shape[0]
        mask = np.greater(word_inputs, self._vocab.ROOT).astype(np.float32)
        num_tokens = int(np.sum(mask))  # non padding, non root token number

        if is_train or arc_targets is not None:
            mask_1D = flatten_numpy(mask)
            mask_1D_tensor = nd.array(mask_1D)

        unked_words = np.where(word_inputs < self._vocab.words_in_train, word_inputs, self._vocab.UNK)
        word_embs = self.word_embs(nd.array(unked_words, dtype='int'))
        if self.pret_word_embs:
            word_embs = word_embs + self.pret_word_embs(nd.array(word_inputs))
        tag_embs = self.tag_embs(nd.array(tag_inputs))

        # Dropout
        emb_inputs = nd.concat(word_embs, tag_embs, dim=2)  # seq_len x batch_size

        top_recur = biLSTM(self.f_lstm, self.b_lstm, emb_inputs, batch_size,
                           dropout_x=self.dropout_lstm_input if is_train else 0)
        top_recur = nd.Dropout(data=top_recur, axes=[0], p=self.dropout_mlp)

        W_dep, b_dep = self.mlp_dep_W.data(), self.mlp_dep_b.data()
        W_head, b_head = self.mlp_head_W.data(), self.mlp_head_b.data()
        dep, head = leaky_relu(nd.dot(top_recur, W_dep.T) + b_dep), leaky_relu(nd.dot(top_recur, W_head.T) + b_head)
        dep, head = nd.Dropout(data=dep, axes=[0], p=self.dropout_mlp), nd.Dropout(data=head, axes=[0],
                                                                                       p=self.dropout_mlp)
        dep, head = nd.transpose(dep, axes=[2, 0, 1]), nd.transpose(head, axes=[2, 0, 1])
        dep_arc, dep_rel = dep[:self.mlp_arc_size], dep[self.mlp_arc_size:]
        head_arc, head_rel = head[:self.mlp_arc_size], head[self.mlp_arc_size:]

        W_arc = self.arc_W.data()
        arc_logits = bilinear(dep_arc, W_arc, head_arc, self.mlp_arc_size, seq_len, batch_size, num_outputs=1,
                              bias_x=True, bias_y=False)
        # (#head x #dep) x batch_size

        flat_arc_logits = reshape_fortran(arc_logits, (seq_len, seq_len * batch_size))
        # (#head ) x (#dep x batch_size)

        arc_preds = arc_logits.argmax(0)
        # seq_len x batch_size

        if is_train or arc_targets is not None:
            correct = np.equal(arc_preds.asnumpy(), arc_targets)
            arc_correct = correct.astype(np.float32) * mask
            arc_accuracy = np.sum(arc_correct) / num_tokens
            targets_1D = flatten_numpy(arc_targets)
            losses = self.softmax_loss(flat_arc_logits, nd.array(targets_1D))
            arc_loss = nd.sum(losses * mask_1D_tensor) / num_tokens

        if not is_train:
            arc_probs = np.transpose(
                np.reshape(nd.softmax(flat_arc_logits, axis=0).asnumpy(), (seq_len, seq_len, batch_size), 'F'))
        # #batch_size x #dep x #head

        W_rel = self.rel_W.data()
        rel_logits = bilinear(dep_rel, W_rel, head_rel, self.mlp_rel_size, seq_len, batch_size,
                              num_outputs=self._vocab.rel_size, bias_x=True, bias_y=True)
        # (#head x rel_size x #dep) x batch_size

        flat_rel_logits = reshape_fortran(rel_logits, (seq_len, self._vocab.rel_size, seq_len * batch_size))
        # (#head x rel_size) x (#dep x batch_size)

        _target_vec = nd.array(targets_1D if is_train else flatten_numpy(arc_preds.asnumpy())).reshape(
            seq_len * batch_size, 1)
        _target_mat = _target_vec * nd.ones((1, self._vocab.rel_size))

        partial_rel_logits = nd.pick(flat_rel_logits, _target_mat.T, axis=0)
        # (rel_size) x (#dep x batch_size)

        if is_train or arc_targets is not None:
            rel_preds = partial_rel_logits.argmax(0)
            targets_1D = flatten_numpy(rel_targets)
            rel_correct = np.equal(rel_preds.asnumpy(), targets_1D).astype(np.float32) * mask_1D
            rel_accuracy = np.sum(rel_correct) / num_tokens
            losses = self.softmax_loss(partial_rel_logits, nd.array(targets_1D))
            rel_loss = nd.sum(losses * mask_1D_tensor) / num_tokens

        if not is_train:
            rel_probs = np.transpose(np.reshape(nd.softmax(flat_rel_logits.transpose([1, 0, 2]), axis=0).asnumpy(),
                                                (self._vocab.rel_size, seq_len, seq_len, batch_size), 'F'))
        # batch_size x #dep x #head x #nclasses

        if is_train or arc_targets is not None:
            loss = arc_loss + rel_loss
            correct = rel_correct * flatten_numpy(arc_correct)
            overall_accuracy = np.sum(correct) / num_tokens

        if is_train:
            return arc_accuracy, rel_accuracy, overall_accuracy, loss

        outputs = []

        for msk, arc_prob, rel_prob in zip(np.transpose(mask), arc_probs, rel_probs):
            # parse sentences one by one
            msk[0] = 1.
            sent_len = int(np.sum(msk))
            arc_pred = arc_argmax(arc_prob, sent_len, msk)
            rel_prob = rel_prob[np.arange(len(arc_pred)), arc_pred]
            rel_pred = rel_argmax(rel_prob, sent_len)
            outputs.append((arc_pred[1:sent_len], rel_pred[1:sent_len]))

        if arc_targets is not None:
            return arc_accuracy, rel_accuracy, overall_accuracy, outputs
        return outputs
Exemplo n.º 40
0
    def hybrid_forward(self, F, x, *args):
        """YOLOV3 network hybrid forward.
        Parameters
        ----------
        F : mxnet.nd or mxnet.sym
            `F` is mxnet.sym if hybridized or mxnet.nd if not.
        x : mxnet.nd.NDArray
            Input data.
        *args : optional, mxnet.nd.NDArray
            During training, extra inputs are required:
            (gt_boxes, obj_t, centers_t, scales_t, weights_t, clas_t)
            These are generated by YOLOV3PrefetchTargetGenerator in dataloader transform function.
        Returns
        -------
        (tuple of) mxnet.nd.NDArray
            During inference, return detections in shape (B, N, 6)
            with format (cid, score, xmin, ymin, xmax, ymax)
            During training, return losses only: (obj_loss, center_loss, scale_loss, cls_loss).
        """
        all_box_centers = []
        all_box_scales = []
        all_objectness = []
        all_class_pred = []
        all_anchors = []
        all_offsets = []
        all_feat_maps = []
        all_detections = []
        routes = []
        for stage, block, output in zip(self.stages, self.yolo_blocks, self.yolo_outputs):
            x = stage(x)
            routes.append(x)

        # the YOLO output layers are used in reverse order, i.e., from very deep layers to shallow
        for i, block, output in zip(range(len(routes)), self.yolo_blocks, self.yolo_outputs):
            x, tip = block(x)
            if autograd.is_training():
                dets, box_centers, box_scales, objness, class_pred, anchors, offsets = output(tip)
                all_box_centers.append(box_centers.reshape((0, -3, -1)))
                all_box_scales.append(box_scales.reshape((0, -3, -1)))
                all_objectness.append(objness.reshape((0, -3, -1)))
                all_class_pred.append(class_pred.reshape((0, -3, -1)))
                all_anchors.append(anchors)
                all_offsets.append(offsets)
                # here we use fake featmap to reduce memory consuption, only shape[2, 3] is used
                fake_featmap = F.zeros_like(tip.slice_axis(
                    axis=0, begin=0, end=1).slice_axis(axis=1, begin=0, end=1))
                all_feat_maps.append(fake_featmap)
            else:
                dets = output(tip)
            all_detections.append(dets)
            if i >= len(routes) - 1:
                break
            # add transition layers
            x = self.transitions[i](x)
            # upsample feature map reverse to shallow layers
            upsample = _upsample(x, stride=2)
            route_now = routes[::-1][i + 1]
            x = F.concat(F.slice_like(upsample, route_now * 0, axes=(2, 3)), route_now, dim=1)

        if autograd.is_training():
            # during training, the network behaves differently since we don't need detection results
            if autograd.is_recording():
                # generate losses and return them directly
                box_preds = F.concat(*all_detections, dim=1)
                all_preds = [F.concat(*p, dim=1) for p in [
                    all_objectness, all_box_centers, all_box_scales, all_class_pred]]
                all_targets = self._target_generator(box_preds, *args)
                return self._loss(*(all_preds + all_targets))

            # return raw predictions, this is only used in DataLoader transform function.
            return (F.concat(*all_detections, dim=1), all_anchors, all_offsets, all_feat_maps,
                    F.concat(*all_box_centers, dim=1), F.concat(*all_box_scales, dim=1),
                    F.concat(*all_objectness, dim=1), F.concat(*all_class_pred, dim=1))

        # concat all detection results from different stages
        result = F.concat(*all_detections, dim=1)
        # apply nms per class
        if self.nms_thresh > 0 and self.nms_thresh < 1:
            result = F.contrib.box_nms(
                result, overlap_thresh=self.nms_thresh, valid_thresh=0.01,
                topk=self.nms_topk, id_index=0, score_index=1, coord_start=2, force_suppress=False)
            if self.post_nms > 0:
                result = result.slice_axis(axis=1, begin=0, end=self.post_nms)
        ids = result.slice_axis(axis=-1, begin=0, end=1)
        scores = result.slice_axis(axis=-1, begin=1, end=2)
        bboxes = result.slice_axis(axis=-1, begin=2, end=None)
        return ids, scores, bboxes