def grow_top_k(step_idx, alive_seq, alive_log_prob, parant_idx):
                pre_ids = alive_seq

                dec_step_emb = layers.embedding(
                    input=pre_ids,
                    size=[self.tar_vocab_size, self.hidden_size],
                    dtype='float32',
                    is_sparse=False,
                    param_attr=fluid.ParamAttr(
                        name='target_embedding',
                        initializer=fluid.initializer.UniformInitializer(
                            low=-self.init_scale, high=self.init_scale)))

                dec_att_out, new_hidden_array, new_cell_array = decoder_step(
                    dec_step_emb, pre_feed, pre_hidden_array, pre_cell_array,
                    enc_memory)

                projection = layers.matmul(dec_att_out, softmax_weight)

                logits = layers.softmax(projection)
                current_log = layers.elementwise_add(x=layers.log(logits),
                                                     y=alive_log_prob,
                                                     axis=0)
                base_1 = layers.cast(step_idx, 'float32') + 6.0
                base_1 /= 6.0
                length_penalty = layers.pow(base_1, alpha)

                len_pen = layers.pow(
                    ((5. + layers.cast(step_idx + 1, 'float32')) / 6.), alpha)

                current_log = layers.reshape(current_log, shape=[1, -1])

                current_log = current_log / length_penalty
                topk_scores, topk_indices = layers.topk(input=current_log,
                                                        k=beam_size)

                topk_scores = layers.reshape(topk_scores, shape=[-1])

                topk_log_probs = topk_scores * length_penalty

                generate_id = layers.reshape(topk_indices,
                                             shape=[-1]) % self.tar_vocab_size

                selected_beam = layers.reshape(
                    topk_indices, shape=[-1]) // self.tar_vocab_size

                topk_finished = layers.equal(generate_id, eos_ids)

                topk_finished = layers.cast(topk_finished, 'float32')

                generate_id = layers.reshape(generate_id, shape=[-1, 1])

                pre_tokens_list = layers.gather(tokens, selected_beam)

                full_tokens_list = layers.concat(
                    [pre_tokens_list, generate_id], axis=1)


                return full_tokens_list, topk_log_probs, topk_scores, topk_finished, selected_beam, generate_id, \
                        dec_att_out, new_hidden_array, new_cell_array
Пример #2
0
    def bbox_ciou(self, boxes1_x0y0x1y1, boxes2_x0y0x1y1):
        '''
        计算ciou = iou - p2/c2 - av
        :param boxes1: (batch_size, num_priors, 4)   pred_x0y0x1y1
        :param boxes2: (batch_size, num_priors, 4)   label_x0y0x1y1
        :return:
        '''

        # 得到中心点坐标、宽高
        boxes1 = P.concat(
            [(boxes1_x0y0x1y1[:, :, :2] + boxes1_x0y0x1y1[:, :, 2:]) * 0.5,
             boxes1_x0y0x1y1[:, :, 2:] - boxes1_x0y0x1y1[:, :, :2]],
            axis=-1)
        boxes2 = P.concat(
            [(boxes2_x0y0x1y1[:, :, :2] + boxes2_x0y0x1y1[:, :, 2:]) * 0.5,
             boxes2_x0y0x1y1[:, :, 2:] - boxes2_x0y0x1y1[:, :, :2]],
            axis=-1)

        # 两个矩形的面积
        boxes1_area = (boxes1_x0y0x1y1[:, :, 2] - boxes1_x0y0x1y1[:, :, 0]) * (
            boxes1_x0y0x1y1[:, :, 3] - boxes1_x0y0x1y1[:, :, 1])
        boxes2_area = (boxes2_x0y0x1y1[:, :, 2] - boxes2_x0y0x1y1[:, :, 0]) * (
            boxes2_x0y0x1y1[:, :, 3] - boxes2_x0y0x1y1[:, :, 1])

        # 相交矩形的左上角坐标、右下角坐标
        left_up = P.elementwise_max(boxes1_x0y0x1y1[:, :, :2],
                                    boxes2_x0y0x1y1[:, :, :2])
        right_down = P.elementwise_min(boxes1_x0y0x1y1[:, :, 2:],
                                       boxes2_x0y0x1y1[:, :, 2:])

        # 相交矩形的面积inter_area。iou
        inter_section = P.relu(right_down - left_up)
        inter_area = inter_section[:, :, 0] * inter_section[:, :, 1]
        union_area = boxes1_area + boxes2_area - inter_area
        iou = inter_area / union_area

        # 包围矩形的左上角坐标、右下角坐标
        enclose_left_up = P.elementwise_min(boxes1_x0y0x1y1[:, :, :2],
                                            boxes2_x0y0x1y1[:, :, :2])
        enclose_right_down = P.elementwise_max(boxes1_x0y0x1y1[:, :, 2:],
                                               boxes2_x0y0x1y1[:, :, 2:])

        # 包围矩形的对角线的平方
        enclose_wh = enclose_right_down - enclose_left_up
        enclose_c2 = P.pow(enclose_wh[:, :, 0], 2) + P.pow(
            enclose_wh[:, :, 1], 2)

        # 两矩形中心点距离的平方
        p2 = P.pow(boxes1[:, :, 0] - boxes2[:, :, 0], 2) + P.pow(
            boxes1[:, :, 1] - boxes2[:, :, 1], 2)

        # 增加av。分母boxes2[:, :, 3]可能为0,所以加了极小的常数防止nan
        atan1 = P.atan(boxes1[:, :, 2] / (boxes1[:, :, 3] + 1e-9))
        atan2 = P.atan(boxes2[:, :, 2] / (boxes2[:, :, 3] + 1e-9))
        v = 4.0 * P.pow(atan1 - atan2, 2) / (math.pi**2)
        a = v / (1 - iou + v)

        ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v
        return ciou
Пример #3
0
def norm(param, dim, power):
    powered = F.pow(param, power)
    in_dtype = powered.dtype
    if in_dtype == fluid.core.VarDesc.VarType.FP16:
        powered = F.cast(powered, "float32")
    powered_norm = F.reduce_sum(powered, dim=dim, keep_dim=False)
    norm_ = F.pow(powered_norm, 1. / power)
    if in_dtype == fluid.core.VarDesc.VarType.FP16:
        norm_ = F.cast(norm_, "float16")
    return norm_
Пример #4
0
def _iou_hw(box_a, box_b, eps=1e-9):
    """计算两组矩形两两之间的iou以及长宽比信息
    Args:
        box_a: (tensor) bounding boxes, Shape: [A, 4].
        box_b: (tensor) bounding boxes, Shape: [B, 4].
    Return:
      (tensor) iou, Shape: [A, B].
    """
    A = box_a.shape[0]
    B = box_b.shape[0]

    box_a_rb = L.reshape(box_a[:, 2:], (A, 1, 2))
    box_a_rb = L.expand(box_a_rb, [1, B, 1])
    box_b_rb = L.reshape(box_b[:, 2:], (1, B, 2))
    box_b_rb = L.expand(box_b_rb, [A, 1, 1])
    max_xy = L.elementwise_min(box_a_rb, box_b_rb)

    box_a_lu = L.reshape(box_a[:, :2], (A, 1, 2))
    box_a_lu = L.expand(box_a_lu, [1, B, 1])
    box_b_lu = L.reshape(box_b[:, :2], (1, B, 2))
    box_b_lu = L.expand(box_b_lu, [A, 1, 1])
    min_xy = L.elementwise_max(box_a_lu, box_b_lu)

    inter = L.relu(max_xy - min_xy)
    inter = inter[:, :, 0] * inter[:, :, 1]

    box_a_w = box_a[:, 2]-box_a[:, 0]
    box_a_h = box_a[:, 3]-box_a[:, 1]
    area_a = box_a_h * box_a_w
    area_a = L.reshape(area_a, (A, 1))
    area_a = L.expand(area_a, [1, B])  # [A, B]

    box_b_w = box_b[:, 2]-box_b[:, 0]
    box_b_h = box_b[:, 3]-box_b[:, 1]
    area_b = box_b_h * box_b_w
    area_b = L.reshape(area_b, (1, B))
    area_b = L.expand(area_b, [A, 1])  # [A, B]

    union = area_a + area_b - inter
    iou = inter / union  # [A, B]  iou取值0~1之间,iou越大越应该抑制

    # 长宽比信息
    atan1 = L.atan(box_a_h / (box_a_w + eps))
    atan2 = L.atan(box_b_h / (box_b_w + eps))
    atan1 = L.reshape(atan1, (A, 1))
    atan1 = L.expand(atan1, [1, B])  # [A, B]
    atan2 = L.reshape(atan2, (1, B))
    atan2 = L.expand(atan2, [A, 1])  # [A, B]
    v = 4.0 * L.pow(atan1 - atan2, 2) / (math.pi ** 2)  # [A, B]  v取值0~1之间,v越小越应该抑制

    factor = 0.4
    overlap = L.pow(iou, (1 - factor)) * L.pow(1.0 - v, factor)

    return overlap
Пример #5
0
def focal_loss(pred, label, alpha=0.25, gamma=2, epsilon=1e-6):
    '''
        alpha 变大,对前景类惩罚变大,更加重视
        gamma 变大,对信心大的例子更加忽略,学习难的例子
    '''
    pred = clip(pred, epsilon, 1 - epsilon)
    label = clip(label, epsilon, 1 - epsilon)
    loss = -1 * (alpha * layers.pow(
        (1 - pred), gamma) * label * layers.log(pred) +
                 (1 - alpha) * layers.pow(pred, gamma) *
                 (1 - label) * log(1 - pred))
    return loss
Пример #6
0
    def sigmoid_focal_loss(self, x, label, fg_num, gamma=2.0, alpha=0.25):
        C = x.shape[1]
        eye = paddle.eye(C + 1, dtype='float32')
        one_hot = L.gather(eye, label)
        pos_mask = one_hot[:, 1:]  # 正样本掩码

        p = L.sigmoid(x)  # [批大小*所有格子数, 80], 预测的类别概率
        pos_loss = pos_mask * (0 - L.log(p + 1e-9)) * L.pow(1 - p,
                                                            gamma) * alpha
        neg_loss = (1.0 - pos_mask) * (0 - L.log(1 - p + 1e-9)) * L.pow(
            p, gamma) * (1 - alpha)
        focal_loss = pos_loss + neg_loss
        if fg_num > 0.5:  # 当没有gt时,即fg_num==0时,focal_loss什么都不除。
            focal_loss = focal_loss / fg_num
        return focal_loss
    def is_finished(self, step_idx, source_length, alive_log_probs, finished_scores, finished_in_finished):
        """
            is_finished
        """
        base_1 = layers.cast(source_length, 'float32') + 55.0
        base_1 /= 6.0
        max_length_penalty = layers.pow(base_1, self.alpha)

        flat_alive_log_probs = layers.reshape(alive_log_probs, [-1])
        lower_bound_alive_scores_1 = layers.gather(flat_alive_log_probs, [self.get_alive_index])
        
        lower_bound_alive_scores = lower_bound_alive_scores_1 / max_length_penalty
        
        lowest_score_of_finished_in_finish = layers.reduce_min(finished_scores * finished_in_finished, dim=1)

        finished_in_finished = layers.cast(finished_in_finished, 'bool')
        lowest_score_of_finished_in_finish += \
                        ((1.0 - layers.cast(layers.reduce_any(finished_in_finished, 1), 'float32')) * -INF)
        
        #print lowest_score_of_finished_in_finish
        bound_is_met = layers.reduce_all(layers.greater_than(lowest_score_of_finished_in_finish, 
                                                             lower_bound_alive_scores))

        decode_length = source_length + 50
        length_cond = layers.less_than(x=step_idx, y=decode_length)

        return layers.logical_and(x=layers.logical_not(bound_is_met), y=length_cond)
Пример #8
0
                def is_finished(alive_log_prob, finished_scores,
                                finished_in_finished):

                    max_out_len = 200
                    max_length_penalty = layers.pow(
                        layers.fill_constant([1],
                                             dtype='float32',
                                             value=((5.0 + max_out_len) /
                                                    6.0)), alpha)

                    lower_bound_alive_score = layers.slice(
                        alive_log_prob, starts=[0], ends=[1],
                        axes=[0]) / max_length_penalty

                    lowest_score_of_fininshed_in_finished = finished_scores * finished_in_finished
                    lowest_score_of_fininshed_in_finished += (
                        1.0 - finished_in_finished) * -INF
                    lowest_score_of_fininshed_in_finished = layers.reduce_min(
                        lowest_score_of_fininshed_in_finished)

                    met = layers.less_than(
                        lower_bound_alive_score,
                        lowest_score_of_fininshed_in_finished)
                    met = layers.cast(met, 'float32')
                    bound_is_met = layers.reduce_sum(met)

                    finished_eos_num = layers.reduce_sum(finished_in_finished)

                    finish_cond = layers.less_than(
                        finished_eos_num,
                        layers.fill_constant([1],
                                             dtype='float32',
                                             value=beam_size))

                    return finish_cond
Пример #9
0
def _matrix_nms(bboxes, cate_labels, cate_scores, kernel='gaussian', sigma=2.0):
    """Matrix NMS for multi-class bboxes.
    Args:
        bboxes (Tensor): shape (n, 4)
        cate_labels (Tensor): shape (n), mask labels in descending order
        cate_scores (Tensor): shape (n), mask scores in descending order
        kernel (str):  'linear' or 'gaussian'
        sigma (float): std in gaussian method
    Returns:
        Tensor: cate_scores_update, tensors of shape (n)
    """
    n_samples = len(cate_labels)
    if n_samples == 0:
        return []

    # 计算一个n×n的IOU矩阵,两组矩形两两之间的IOU
    iou_matrix = jaccard(bboxes, bboxes)   # shape: [n_samples, n_samples]
    iou_matrix = paddle.triu(iou_matrix, diagonal=1)   # 只取上三角部分

    # label_specific matrix.
    cate_labels_x = L.expand(L.reshape(cate_labels, (1, -1)), [n_samples, 1])   # shape: [n_samples, n_samples]
    # 第i行第j列表示的是第i个预测框和第j个预测框的类别id是否相同。我们抑制的是同类的预测框。
    d = cate_labels_x - L.transpose(cate_labels_x, [1, 0])
    d = L.pow(d, 2)   # 同类处为0,非同类处>0。 tf中用 == 0比较无效,所以用 < 1
    label_matrix = paddle.triu(L.cast(d < 1, 'float32'), diagonal=1)   # shape: [n_samples, n_samples]

    # IoU compensation
    # 非同类的iou置为0,同类的iou保留。逐列取最大iou
    compensate_iou = L.reduce_max(iou_matrix * label_matrix, [0, ])   # shape: [n_samples, ]
    # compensate_iou第0行里的值a0(重复了n_samples次)表示第0个物体与 比它分高 的 同类物体的最高iou为a0,
    # compensate_iou第1行里的值a1(重复了n_samples次)表示第1个物体与 比它分高 的 同类物体的最高iou为a1,...
    # compensate_iou里每一列里的值依次代表第0个物体、第1个物体、...、第n_samples-1个物体与 比它自己分高 的 同类物体的最高iou。
    compensate_iou = L.transpose(L.expand(L.reshape(compensate_iou, (1, -1)), [n_samples, 1]), [1, 0])   # shape: [n_samples, n_samples]

    # IoU decay
    # 非同类的iou置为0,同类的iou保留。
    # decay_iou第i行第j列表示的是第i个预测框和第j个预测框的iou,如果不是同类,该iou置0。且只取上三角部分。
    decay_iou = iou_matrix * label_matrix   # shape: [n_samples, n_samples]

    # matrix nms
    if kernel == 'gaussian':
        decay_matrix = L.exp(-1 * sigma * (decay_iou ** 2))
        compensate_matrix = L.exp(-1 * sigma * (compensate_iou ** 2))
        decay_coefficient = L.reduce_sum(decay_matrix / compensate_matrix, [0, ])
    elif kernel == 'linear':
        # 看第j列。(1_test_matrixnms.py里的例子,看第2列)
        # decay_iou     里第2列里的值为[0.9389, 0.9979, 0,      0]。第2个物体与比它分高的2个同类物体的iou是0.9389, 0.9979。
        # compensate_iou里第2列里的值为[0,      0.9409, 0.9979, 0]。比第2个物体分高的2个同类物体 与 比它们自己分高 的 同类物体的最高iou 是0,      0.9409。
        # decay_matrix  里第2列里的值为[0.0610, 0.0348, 485.28, 1]。取该列的最小值为0.0348(抑制掉第2个物体的是第1个物体)。其实后面2个值不用看,因为它们总是>=1。
        # 总结:decay_matrix里第j列里的第i个值若为最小值,则抑制掉第j个物体的是第i个物体。
        # 而且,表现为decay_iou尽可能大,decay_matrix才会尽可能小。
        decay_matrix = (1-decay_iou)/(1-compensate_iou)
        decay_coefficient = L.reduce_min(decay_matrix, [0, ])
    else:
        raise NotImplementedError

    # 更新分数
    cate_scores_update = cate_scores * decay_coefficient
    return cate_scores_update
Пример #10
0
    def forward(self, mu, logvar=None):
        """
        Compute loss

        Args:
            mu (tensor): mean
            logvar (tensor): logarithm of variance
        """
        if logvar is None:
            logvar = L.zeros_like(mu)
        return -0.5 * L.reduce_sum(1 + logvar - L.pow(mu, 2) - L.exp(logvar))
Пример #11
0
    def forward(self, tenFirst, tenSecond, tenFeaturesFirst, tenFeaturesSecond, tenFlow):
        b, _, h, w = tenFlow.shape
        tenDifference = tenFirst - backwarp(tenInput=tenSecond, tenFlow=tenFlow * self.fltBackward)
        tenDifference = L.pow(tenDifference, 2)
        tenDifference = L.reduce_sum(tenDifference, 1, True) # [b, 1, h, w]
        tenDifference = L.sqrt(tenDifference).detach()

        tenFeaturesFirst = self.moduleFeat(tenFeaturesFirst)

        tenMean = L.reshape(tenFlow, (b, 2, -1))    # [b, 2, h * w]
        tenMean = L.reduce_mean(tenMean, 2, True)   # [b, 2, 1]
        tenMean = L.reshape(tenMean, (b, 2, 1, 1))  # [b, 2, 1, 1]
        tenMean = L.expand(tenMean, (1, 1, h, w))   # [b, 2, h, w]
        delta = tenFlow - tenMean

        diff = L.concat([tenDifference, delta, tenFeaturesFirst], 1)
        tenDist = self.moduleDist(self.moduleMain(diff))
        tenDist = L.pow(tenDist, 2.0) * -1.0
        tenDist = tenDist - L.reduce_max(tenDist, 1, True)
        tenDist = L.exp(tenDist)

        tenDivisor = L.reduce_sum(tenDist, 1, True)
        tenDivisor = L.reciprocal(tenDivisor)

        tenScaleX = L.unfold(x=tenFlow[:, 0:1, :, :], 
                             kernel_sizes=self.intUnfold, 
                             strides=1, 
                             paddings=int((self.intUnfold - 1) / 2)) # [b, c, h * w]
        tenScaleX = L.reshape(tenScaleX, (b, -1, h, w))          # [b, c, h, w]
        tenScaleX = self.moduleScaleX(tenDist * tenScaleX) * tenDivisor

        tenScaleY = L.unfold(x=tenFlow[:, 1:2, :, :], 
                             kernel_sizes=self.intUnfold, 
                             strides=1, 
                             paddings=int((self.intUnfold - 1) / 2)) # [b, c, h * w]
        tenScaleY = L.reshape(tenScaleY, (b, -1, h, w))          # [b, c, h, w]
        tenScaleY = self.moduleScaleY(tenDist * tenScaleY) * tenDivisor

        return L.concat([tenScaleX, tenScaleY], 1)
Пример #12
0
def _postprocess_output(ioup, output, an_num, num_classes, iou_aware_factor):
    """
    post process output objectness score
    """
    tensors = []
    stride = output.shape[1] // an_num
    for m in range(an_num):
        tensors.append(output[:, stride * m:stride * m + 4, :, :])
        obj = output[:, stride * m + 4:stride * m + 5, :, :]
        obj = L.sigmoid(obj)

        ip = ioup[:, m:m + 1, :, :]

        new_obj = L.pow(obj, (1 - iou_aware_factor)) * L.pow(ip, iou_aware_factor)
        new_obj = _de_sigmoid(new_obj)   # 置信位未进行sigmoid()激活

        tensors.append(new_obj)

        tensors.append(output[:, stride * m + 5:stride * m + 5 + num_classes, :, :])

    output = L.concat(tensors, axis=1)

    return output
    def grow_topk(self, i, logits, alive_seq, alive_log_probs, cache, enc_output, enc_bias):
        """
            grow_topk
        """
        logits = layers.reshape(logits, [self.batch_size, self.beam_size, -1])
        
        candidate_log_probs = layers.log(layers.softmax(logits, axis=2))
        log_probs = candidate_log_probs + layers.unsqueeze(alive_log_probs, axes=[2]) 
        
        base_1 = layers.cast(i, 'float32') + 6.0
        base_1 /= 6.0
        length_penalty = layers.pow(base_1, self.alpha)
        #length_penalty = layers.pow(((5.0 + layers.cast(i+1, 'float32')) / 6.0), self.alpha)
        
        curr_scores = log_probs / length_penalty
        flat_curr_scores = layers.reshape(curr_scores, [self.batch_size, self.beam_size * self.vocab_size])

        topk_scores, topk_ids = layers.topk(flat_curr_scores, k=self.beam_size * 2)
        
        topk_log_probs = topk_scores * length_penalty

        select_beam_index = topk_ids // self.vocab_size
        select_id = topk_ids % self.vocab_size

        #layers.Print(select_id, message="select_id", summarize=1024)
        #layers.Print(topk_scores, message="topk_scores", summarize=10000000)
        
        flat_select_beam_index = layers.reshape(select_beam_index, [-1]) + self.gather_top2k_append_index
        
        topk_seq = layers.gather(alive_seq, [flat_select_beam_index])
        topk_seq = layers.reshape(topk_seq, [self.batch_size, 2 * self.beam_size, -1])
        
        
        #concat with current ids
        topk_seq = layers.concat([topk_seq, layers.unsqueeze(select_id, axes=[2])], axis=2)
        topk_finished = layers.cast(layers.equal(select_id, self.eos_id), 'float32') 
        
        #gather cache
        self.gather_cache(cache, flat_select_beam_index)

        #topk_seq: [batch_size, 2*beam_size, i+1]
        #topk_log_probs, topk_scores, topk_finished: [batch_size, 2*beam_size]
        return topk_seq, topk_log_probs, topk_scores, topk_finished, cache
Пример #14
0
def bbox_ciou(boxes1, boxes2):
    '''
    计算ciou = iou - p2/c2 - av
    :param boxes1: (8, 13, 13, 3, 4)   pred_xywh
    :param boxes2: (8, 13, 13, 3, 4)   label_xywh
    :return:
    '''

    # 变成左上角坐标、右下角坐标
    boxes1_x0y0x1y1 = P.concat([
        boxes1[:, :, :, :, :2] - boxes1[:, :, :, :, 2:] * 0.5,
        boxes1[:, :, :, :, :2] + boxes1[:, :, :, :, 2:] * 0.5
    ],
                               axis=-1)
    boxes2_x0y0x1y1 = P.concat([
        boxes2[:, :, :, :, :2] - boxes2[:, :, :, :, 2:] * 0.5,
        boxes2[:, :, :, :, :2] + boxes2[:, :, :, :, 2:] * 0.5
    ],
                               axis=-1)
    '''
    逐个位置比较boxes1_x0y0x1y1[..., :2]和boxes1_x0y0x1y1[..., 2:],即逐个位置比较[x0, y0]和[x1, y1],小的留下。
    比如留下了[x0, y0]
    这一步是为了避免一开始w h 是负数,导致x0y0成了右下角坐标,x1y1成了左上角坐标。
    '''
    boxes1_x0y0x1y1 = P.concat([
        P.elementwise_min(boxes1_x0y0x1y1[:, :, :, :, :2],
                          boxes1_x0y0x1y1[:, :, :, :, 2:]),
        P.elementwise_max(boxes1_x0y0x1y1[:, :, :, :, :2],
                          boxes1_x0y0x1y1[:, :, :, :, 2:])
    ],
                               axis=-1)
    boxes2_x0y0x1y1 = P.concat([
        P.elementwise_min(boxes2_x0y0x1y1[:, :, :, :, :2],
                          boxes2_x0y0x1y1[:, :, :, :, 2:]),
        P.elementwise_max(boxes2_x0y0x1y1[:, :, :, :, :2],
                          boxes2_x0y0x1y1[:, :, :, :, 2:])
    ],
                               axis=-1)

    # 两个矩形的面积
    boxes1_area = (
        boxes1_x0y0x1y1[:, :, :, :, 2] - boxes1_x0y0x1y1[:, :, :, :, 0]) * (
            boxes1_x0y0x1y1[:, :, :, :, 3] - boxes1_x0y0x1y1[:, :, :, :, 1])
    boxes2_area = (
        boxes2_x0y0x1y1[:, :, :, :, 2] - boxes2_x0y0x1y1[:, :, :, :, 0]) * (
            boxes2_x0y0x1y1[:, :, :, :, 3] - boxes2_x0y0x1y1[:, :, :, :, 1])

    # 相交矩形的左上角坐标、右下角坐标,shape 都是 (8, 13, 13, 3, 2)
    left_up = P.elementwise_max(boxes1_x0y0x1y1[:, :, :, :, :2],
                                boxes2_x0y0x1y1[:, :, :, :, :2])
    right_down = P.elementwise_min(boxes1_x0y0x1y1[:, :, :, :, 2:],
                                   boxes2_x0y0x1y1[:, :, :, :, 2:])

    # 相交矩形的面积inter_area。iou
    inter_section = P.relu(right_down - left_up)
    inter_area = inter_section[:, :, :, :, 0] * inter_section[:, :, :, :, 1]
    union_area = boxes1_area + boxes2_area - inter_area
    iou = inter_area / (union_area + 1e-9)

    # 包围矩形的左上角坐标、右下角坐标,shape 都是 (8, 13, 13, 3, 2)
    enclose_left_up = P.elementwise_min(boxes1_x0y0x1y1[:, :, :, :, :2],
                                        boxes2_x0y0x1y1[:, :, :, :, :2])
    enclose_right_down = P.elementwise_max(boxes1_x0y0x1y1[:, :, :, :, 2:],
                                           boxes2_x0y0x1y1[:, :, :, :, 2:])

    # 包围矩形的对角线的平方
    enclose_wh = enclose_right_down - enclose_left_up
    enclose_c2 = P.pow(enclose_wh[:, :, :, :, 0], 2) + P.pow(
        enclose_wh[:, :, :, :, 1], 2)

    # 两矩形中心点距离的平方
    p2 = P.pow(boxes1[:, :, :, :, 0] - boxes2[:, :, :, :, 0], 2) + P.pow(
        boxes1[:, :, :, :, 1] - boxes2[:, :, :, :, 1], 2)

    # 增加av。
    atan1 = P.atan(boxes1[:, :, :, :, 2] / (boxes1[:, :, :, :, 3] + 1e-9))
    atan2 = P.atan(boxes2[:, :, :, :, 2] / (boxes2[:, :, :, :, 3] + 1e-9))
    v = 4.0 * P.pow(atan1 - atan2, 2) / (math.pi**2)
    a = v / (1 - iou + v)

    ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v
    return ciou
        print('------------------ step %d ------------------' % step)
        # ==================== train ====================
        batch_data = np.random.normal(loc=0, scale=1,
                                      size=(2, 3, 28, 28)).astype(np.float32)
        y_true_arr = np.random.normal(loc=0, scale=1,
                                      size=(2, 8, 28, 28)).astype(np.float32)

        batch_data2 = paddle.to_tensor(batch_data, place=place)
        y_true_arr2 = paddle.to_tensor(y_true_arr, place=place)

        paddle_conv01_out, paddle_bn01_out, paddle_act01_out, paddle_conv02_out = model(
            batch_data2)

        # 建立损失函数
        # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。
        mseloss = P.pow(y_true_arr2 - paddle_conv02_out, 2)
        mseloss = P.reduce_mean(mseloss)  # 再求平均,即mse损失函数

        paddle_mseloss_out = mseloss.numpy()
        paddle_bn01_out = paddle_bn01_out.numpy()
        paddle_conv02_out = paddle_conv02_out.numpy()

        # 更新权重
        mseloss.backward()
        if step % 1 == 0:
            optimizer.step()
            optimizer.clear_grad()

        print('train_forward:')
        # python代码模拟训练过程,与paddle的输出校验。我们希望和飞桨有相同的输出。
        my_conv01_out = conv01.train_forward(batch_data)
Пример #16
0
    def inference(self, model, inputs, outputs):
        """
        Run inference.

        Args:
            inputs(dict): Its key is input name(str) and its value is a Variable.
            model(object): A generate model. Need to implement `_generation_network` and `_calc_logits`.

        Returns:
            dict(str:Variable): Its key is output name(str) and its value is a Variable.
        """
        # prepare while loop
        max_len = layers.fill_constant(
            shape=[1], dtype="int64", value=self.max_dec_len, force_cpu=True)
        min_len = layers.fill_constant(
            shape=[1], dtype="int64", value=self.min_dec_len, force_cpu=True)
        step_idx = layers.fill_constant(
            shape=[1], dtype="int64", value=0, force_cpu=True)

        ids = layers.array_write(layers.reshape(inputs["tgt_ids"], (-1, 1)), step_idx)
        pos_biases = layers.array_write(layers.reshape(inputs["tgt_pos"], (-1, 1)), step_idx)
        scores = layers.array_write(inputs["init_score"], step_idx)
        tgt_generation_mask = layers.array_write(inputs["tgt_generation_mask"], step_idx)
        parent_idx = inputs["parent_idx"]

        if self.decoding_strategy == "beam_search":
            beam_size = self.beam_size
        else:
            beam_size = 1

        eos_penalty = np.zeros(self.vocab_size, dtype="float32")
        eos_penalty[self.eos_id] = -1e9
        eos_penalty = layers.assign(eos_penalty)

        token_penalty = np.zeros(self.vocab_size, dtype="float32")
        token_penalty[self.unk_id] = -1e9
        if self.mask_id >= 0:
            token_penalty[self.mask_id] = -1e9
        token_penalty = layers.assign(token_penalty)

        # start while loop
        cond = layers.less_than(x=step_idx, y=max_len)
        while_op = layers.While(cond)
        with while_op.block():
            pre_ids = layers.array_read(array=ids, i=step_idx)
            pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
            pre_scores = layers.array_read(array=scores, i=step_idx)
            pos_bias = layers.array_read(array=pos_biases, i=step_idx)
            pos_bias = layers.gather(input=pos_bias, index=parent_idx)

            tmp_tgt_generation_mask = layers.array_read(tgt_generation_mask, i=step_idx)
            dtype = tmp_tgt_generation_mask.dtype

            append_mask = layers.fill_constant_batch_size_like(
                    input=pre_ids,
                    value=1.0,
                    shape=[-1, 1, 1],
                    dtype=dtype)
            tmp_tgt_generation_mask = layers.concat([tmp_tgt_generation_mask, append_mask], axis=2)
            pre_mask = tmp_tgt_generation_mask = layers.gather(input=tmp_tgt_generation_mask, index=parent_idx)

            pre_sent = layers.fill_constant_batch_size_like(
                    input=pre_mask,
                    value=1,
                    shape=[-1, 1, 1],
                    dtype=pre_ids.dtype)

            if self.continuous_position:
                pre_pos = layers.elementwise_mul(
                    x=layers.fill_constant_batch_size_like(
                        input=pre_mask,
                        value=1,
                        shape=[-1, 1, 1],
                        dtype=pre_ids.dtype), y=step_idx, axis=0) + pos_bias
            else:
                pre_pos = layers.elementwise_mul(
                    x=layers.fill_constant_batch_size_like(
                        input=pre_mask,
                        value=1,
                        shape=[-1, 1, 1],
                        dtype=pre_ids.dtype), y=step_idx, axis=0)

            if self.use_role:
                pre_role = layers.fill_constant_batch_size_like(
                        input=pre_mask,
                        value=0,
                        shape=[-1, 1, 1],
                        dtype=pre_ids.dtype)
            else:
                pre_role = None

            dec_out, _ = model._generation_network(
                token_ids=pre_ids,
                type_ids=pre_sent,
                pos_ids=pre_pos,
                role_ids=pre_role,
                generation_mask=tmp_tgt_generation_mask,
                gather_idx=parent_idx)
            logits = model._calc_logits(dec_out)

            # ignore unk and mask token
            if self.ignore_unk:
                logits = layers.elementwise_add(logits, token_penalty, axis=1)

            # min dec length
            min_len_cond = layers.less_than(x=step_idx, y=min_len)
            def min_len_penalty():
                """Plus minimum length penalty."""
                return layers.elementwise_add(logits, eos_penalty, axis=1)
            def no_penalty():
                """No penalty."""
                return logits
            logits = layers.case([(min_len_cond, min_len_penalty)], default=no_penalty)

            # get probs
            probs = layers.softmax(logits / self.temperature)

            if self.decoding_strategy == "beam_search":
                topk_scores, topk_indices = layers.topk(
                    input=probs, k=beam_size)
            else:
                if self.decoding_strategy.startswith("sampling"):
                    sampling_ids = layers.sampling_id(probs, dtype="int")
                elif self.decoding_strategy.startswith("topk_sampling"):
                    topk_probs, _ = layers.topk(input=probs, k=self.topk)
                    ge_cond = layers.cast(
                        layers.greater_equal(
                            probs,
                            layers.unsqueeze(topk_probs[:, -1], [1])),
                        "float32")
                    old_probs = probs
                    probs = probs * ge_cond / layers.reduce_sum(topk_probs, dim=-1, keep_dim=True)
                    sampling_ids = layers.sampling_id(probs, dtype="int")
                    probs = old_probs
                else:
                    raise ValueError(self.decoding_strategy)

                sampling_scores = layers.one_hot(
                    layers.unsqueeze(sampling_ids, [1]), probs.shape[1]
                )
                sampling_scores = sampling_scores * probs - (1 - sampling_scores) * 1e3
                topk_scores, topk_indices = layers.topk(
                    input=sampling_scores, k=1)

            pre_len = layers.cast(step_idx, "float32")
            layers.increment(x=step_idx, value=1.0, in_place=True)
            cur_len = layers.cast(step_idx, "float32")

            # update scores
            if self.length_average:
                accu_scores = layers.elementwise_add(
                    x=layers.log(topk_scores), y=pre_scores * pre_len, axis=0) / cur_len
            elif self.length_penalty > 0:
                pre_lp = layers.pow((5 + pre_len) / 6, self.length_penalty)
                cur_lp = layers.pow((5 + cur_len) / 6, self.length_penalty)
                accu_scores = layers.elementwise_add(
                    x=layers.log(topk_scores), y=pre_scores * pre_lp, axis=0) / cur_lp
            else:
                accu_scores = layers.elementwise_add(
                    x=layers.log(topk_scores), y=pre_scores, axis=0)
            topk_indices = layers.lod_reset(topk_indices, pre_ids)
            accu_scores = layers.lod_reset(accu_scores, pre_ids)
            selected_ids, selected_scores, gather_idx = layers.beam_search(
                pre_ids=pre_ids,
                pre_scores=pre_scores,
                ids=topk_indices,
                scores=accu_scores,
                beam_size=beam_size,
                end_id=self.eos_id,
                return_parent_idx=True)

            layers.array_write(selected_ids, i=step_idx, array=ids)
            layers.array_write(selected_scores, i=step_idx, array=scores)
            layers.array_write(pre_mask, i=step_idx, array=tgt_generation_mask)
            layers.array_write(pos_bias, i=step_idx, array=pos_biases)

            layers.assign(gather_idx, parent_idx)

            length_cond = layers.less_than(x=step_idx, y=max_len)
            finish_cond = layers.logical_not(layers.is_empty(x=selected_ids))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)

        finished_ids, finished_scores = layers.beam_search_decode(
            ids, scores, beam_size=beam_size, end_id=self.eos_id)

        predictions = {
            "finished_ids": finished_ids,
            "finished_scores": finished_scores,
            "token_ids": inputs["token_ids"],
            "data_id": inputs["data_id"]
        }
        return predictions
Пример #17
0
    def __call__(self, kernel_preds, cls_preds, mask_protos,
                 batch_gt_objs_tensors, batch_gt_clss_tensors,
                 batch_gt_masks_tensors, batch_gt_pos_idx_tensors):
        '''
        :param kernel_preds:  kernel_preds里每个元素形状是[N, 256, seg_num_grid, seg_num_grid],  每个格子的预测卷积核。      从 小感受野 到 大感受野。
        :param cls_preds:     cls_preds里每个元素形状是   [N,  80, seg_num_grid, seg_num_grid],  每个格子的预测概率,未进行sigmoid()激活。  从 小感受野 到 大感受野。
        :param mask_protos:   [bs, 256, s4, s4]   掩码原型
        :param batch_gt_objs_tensors:   里每个元素形状是[N, seg_num_grid, seg_num_grid, 1],   每个格子的objness。           从 小感受野 到 大感受野。
        :param batch_gt_clss_tensors:   里每个元素形状是[N, seg_num_grid, seg_num_grid, 80],  每个格子真实类别onehot。      从 小感受野 到 大感受野。
        :param batch_gt_masks_tensors:     里每个元素形状是[N, -1, s4, s4],   真实掩码。  从 小感受野 到 大感受野。
        :param batch_gt_pos_idx_tensors:   里每个元素形状是[N, -1, 3],    正样本的下标。  从 小感受野 到 大感受野。
        :return:
        '''

        batch_size = self.batch_size
        num_layers = len(kernel_preds)

        # ================= 计算损失 =================
        num_ins = 0.  # 记录这一批图片的正样本个数
        loss_clss, loss_masks = [], []
        for bid in range(batch_size):
            for lid in range(num_layers):
                # ================ 掩码损失 ======================
                mask_proto = mask_protos[bid]  # [256, s4, s4]   这张图片产生的掩码原型。
                kernel_pred = kernel_preds[lid][
                    bid]  # [256, seg_num_grid, seg_num_grid]   格子预测的卷积核(yolact中的“掩码系数”)
                kernel_pred = L.transpose(
                    kernel_pred, perm=[1, 2, 0]
                )  # [seg_num_grid, seg_num_grid, 256]   格子预测的卷积核(yolact中的“掩码系数”)

                gt_objs = batch_gt_objs_tensors[lid][
                    bid]  # [seg_num_grid, seg_num_grid, 1]
                gt_masks = batch_gt_masks_tensors[lid][bid]  # [-1, s4, s4]
                pmidx = batch_gt_pos_idx_tensors[lid][bid]  # [-1, 3]
                gt_objs.stop_gradient = True
                gt_masks.stop_gradient = True
                pmidx.stop_gradient = True

                idx_sum = L.reduce_sum(pmidx, dim=1)
                keep = L.where(idx_sum > -1)
                keep = L.reshape(keep, (-1, ))
                keep.stop_gradient = True
                pmidx = L.gather(pmidx, keep)  # [M, 3]

                yx_idx = pmidx[:, :2]  # [M, 2]
                m_idx = pmidx[:, 2]  # [M, ]
                yx_idx.stop_gradient = True
                m_idx.stop_gradient = True

                # 抽出来
                gt_obj = L.gather_nd(gt_objs,
                                     yx_idx)  # [M, 1]        是否是真正的正样本。
                pos_krn = L.gather_nd(kernel_pred,
                                      yx_idx)  # [M, 256]      正样本的卷积核(掩码系数)。
                gt_mask = L.gather(gt_masks, m_idx)  # [M, s4, s4]   真实掩码。

                # 正样本数量
                num_ins += L.reduce_sum(gt_obj)

                # 生成预测掩码
                mask_proto = L.transpose(mask_proto, perm=[1, 2,
                                                           0])  # [s4, s4, 256]
                masks = L.matmul(mask_proto, pos_krn,
                                 transpose_y=True)  # [s4, s4, M]
                masks = L.sigmoid(masks)  # [s4, s4, M]
                masks = L.transpose(masks, perm=[2, 0, 1])  # [M, s4, s4]
                loss_mask = self.dice_loss(masks, gt_mask, gt_obj)
                loss_masks.append(loss_mask)

                # ================ 分类损失。sigmoid_focal_loss() ======================
                gamma = self.loss_gamma
                alpha = self.loss_alpha
                pred_conf = cls_preds[lid][
                    bid]  # [80, seg_num_grid, seg_num_grid]    未进行sigmoid()激活。
                pred_conf = L.transpose(pred_conf, perm=[
                    1, 2, 0
                ])  # [seg_num_grid, seg_num_grid, 80]    未进行sigmoid()激活。
                pred_conf = L.sigmoid(
                    pred_conf
                )  # [seg_num_grid, seg_num_grid, 80]    已进行sigmoid()激活。
                gt_clss = batch_gt_clss_tensors[lid][
                    bid]  # [seg_num_grid, seg_num_grid, 80]    真实类别onehot
                gt_clss.stop_gradient = True
                pos_loss = gt_clss * (0 - L.log(pred_conf + 1e-9)) * L.pow(
                    1 - pred_conf, gamma) * alpha
                neg_loss = (
                    1.0 - gt_clss) * (0 - L.log(1 - pred_conf + 1e-9)) * L.pow(
                        pred_conf, gamma) * (1 - alpha)
                focal_loss = pos_loss + neg_loss
                focal_loss = L.reduce_sum(focal_loss, dim=[0, 1])
                loss_clss.append(focal_loss)
        loss_masks = L.concat(loss_masks, axis=0)
        loss_masks = L.reduce_sum(loss_masks) * self.ins_loss_weight
        loss_masks = loss_masks / L.elementwise_max(
            L.ones((1, ), dtype='float32'), num_ins)

        loss_clss = L.concat(loss_clss, axis=0)
        loss_clss = L.reduce_sum(loss_clss) * self.clss_loss_weight
        loss_clss = loss_clss / L.elementwise_max(
            L.ones((1, ), dtype='float32'), num_ins)

        loss_all = {"loss_masks": loss_masks, "loss_clss": loss_clss}
        return loss_all
Пример #18
0
def hyp_score(log_probs, length, length_penalty):
    lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty)
    return log_probs / lp
Пример #19
0
    def infilling_decode(self):
        if self.task_type == "dialog":
            emb_num = 4
        else:
            emb_num = 3
        input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \
                       [[-1, self.max_seq_len, self.max_seq_len]]
        input_dtypes = ['int64'] * emb_num + ['float32']
        input_lod_levels = [0] * emb_num + [0]

        shapes = input_shapes + [[-1, self.max_seq_len, 1],
                                 [-1, self.max_seq_len, 1], [-1, 1], [-1],
                                 [-1, 1, self.max_seq_len], [-1, 1]]
        dtypes = input_dtypes + [
            'int64', 'int64', 'float32', 'int32', 'float32', 'int64'
        ]
        lod_levels = input_lod_levels + [2, 2, 2, 0, 0, 0]

        inputs = self.to_ternsor(shapes, dtypes, lod_levels)
        pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                      capacity=50,
                                                      iterable=False)

        emb_ids = {}
        for key, value in zip(self.emb_keys, inputs[:emb_num]):
            emb_ids[key] = value

        input_mask = inputs[emb_num]
        tgt_ids, tgt_pos, init_scores, parent_idx, tgt_input_mask, data_ids = inputs[
            -6:]

        ernie = ErnieModel(emb_ids=emb_ids,
                           input_mask=input_mask,
                           config=self.ernie_config,
                           use_fp16=self.use_fp16,
                           task_type=self.task_type,
                           decoding=True,
                           gather_idx=parent_idx)

        max_len = layers.fill_constant(shape=[1],
                                       dtype=tgt_ids.dtype,
                                       value=self.max_dec_len,
                                       force_cpu=True)
        step_idx = layers.fill_constant(shape=[1],
                                        dtype=tgt_ids.dtype,
                                        value=0,
                                        force_cpu=True)
        pos_idx = layers.fill_constant(shape=[1],
                                       dtype=tgt_ids.dtype,
                                       value=1,
                                       force_cpu=True)
        cond = layers.less_than(x=step_idx, y=max_len)
        while_op = layers.While(cond)

        ids = layers.array_write(layers.reshape(tgt_ids, (-1, 1)), step_idx)
        pos_biases = layers.array_write(layers.reshape(tgt_pos, (-1, 1)),
                                        step_idx)
        scores = layers.array_write(init_scores, step_idx)
        tgt_masks = layers.array_write(tgt_input_mask, step_idx)

        with while_op.block():
            pre_ids = layers.array_read(array=ids, i=step_idx)
            pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
            pre_scores = layers.array_read(array=scores, i=step_idx)
            pos_bias = layers.array_read(array=pos_biases, i=step_idx)
            pos_bias = layers.gather(input=pos_bias, index=parent_idx)
            tmp_mask = layers.array_read(tgt_masks, i=step_idx)

            def gen_batch_like(value,
                               dtype="int64",
                               shape=[-1, 1, 1],
                               is_scalar=True):
                if is_scalar:
                    return layers.fill_constant_batch_size_like(
                        input=parent_idx,
                        value=value,
                        shape=shape,
                        dtype=dtype)
                else:
                    return layers.elementwise_mul(
                        x=layers.fill_constant_batch_size_like(
                            input=parent_idx,
                            value=1,
                            shape=shape,
                            dtype=dtype),
                        y=value,
                        axis=0)

            tmp_mask = layers.gather(input=tmp_mask, index=parent_idx)
            append_0_mask = gen_batch_like(0.0, dtype=tmp_mask.dtype)
            append_1_mask = gen_batch_like(1.0, dtype=tmp_mask.dtype)
            tmp_mask = layers.concat([tmp_mask, append_1_mask], axis=2)
            pre_mask = layers.concat([tmp_mask, append_0_mask], axis=2)
            cur_mask = layers.concat([tmp_mask, append_1_mask], axis=2)

            cur_ids = gen_batch_like(self.attn_id)
            pre_pos = gen_batch_like(step_idx, is_scalar=False)
            cur_pos = gen_batch_like(pos_idx, is_scalar=False)
            if self.continuous_position:
                pre_pos = pre_pos + pos_bias
                cur_pos = cur_pos + pos_bias

            dec_emb_ids = {
                "word_embedding": layers.concat([pre_ids, cur_ids], axis=1),
                "pos_embedding": layers.concat([pre_pos, cur_pos], axis=1)
            }
            if self.task_type == "dialog":
                role_ids = gen_batch_like(0)
                turn_ids = gen_batch_like(0)
                dec_emb_ids["role_embedding"] = layers.concat(
                    [role_ids, role_ids], axis=1)
                dec_emb_ids["turn_embedding"] = layers.concat(
                    [turn_ids, turn_ids], axis=1)
            else:
                sent_ids = gen_batch_like(self.tgt_type_id)
                dec_emb_ids["sent_embedding"] = layers.concat(
                    [sent_ids, sent_ids], axis=1)
            dec_mask = layers.concat([pre_mask, cur_mask], axis=1)

            dec_out = ernie.encode(dec_emb_ids,
                                   dec_mask,
                                   parent_idx,
                                   remove_query=True)
            fc_out = self.cal_logit(dec_out[:, 1:, :], None)
            topk_scores, topk_indices = layers.topk(
                input=layers.softmax(fc_out), k=self.beam_size)
            pre_lenpen = layers.pow(
                (5.0 + layers.cast(step_idx, pre_scores.dtype)) / 6.0,
                self.length_penalty)
            cur_lenpen = layers.pow(
                (5.0 + layers.cast(pos_idx, pre_scores.dtype)) / 6.0,
                self.length_penalty)
            accu_scores = layers.elementwise_add(x=layers.log(topk_scores),
                                                 y=pre_scores * pre_lenpen,
                                                 axis=0) / cur_lenpen
            topk_indices = layers.lod_reset(topk_indices, pre_ids)
            accu_scores = layers.lod_reset(accu_scores, pre_ids)
            selected_ids, selected_scores, gather_idx = layers.beam_search(
                pre_ids=pre_ids,
                pre_scores=pre_scores,
                ids=topk_indices,
                scores=accu_scores,
                beam_size=self.beam_size,
                end_id=self.eos_idx,
                return_parent_idx=True)

            layers.increment(x=step_idx, value=1.0, in_place=True)
            layers.increment(x=pos_idx, value=1.0, in_place=True)
            layers.array_write(selected_ids, i=step_idx, array=ids)
            layers.array_write(selected_scores, i=step_idx, array=scores)
            layers.array_write(tmp_mask, i=step_idx, array=tgt_masks)
            layers.array_write(pos_bias, i=step_idx, array=pos_biases)

            layers.assign(gather_idx, parent_idx)
            length_cond = layers.less_than(x=step_idx, y=max_len)
            finish_cond = layers.logical_not(layers.is_empty(x=selected_ids))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)

        finished_ids, finished_scores = layers.beam_search_decode(
            ids, scores, beam_size=self.beam_size, end_id=self.eos_idx)

        graph_vars = {
            "finished_ids": finished_ids,
            "finished_scores": finished_scores,
            "data_ids": data_ids
        }

        for k, v in graph_vars.items():
            v.persistable = True

        return pyreader, graph_vars
Пример #20
0
def hyp_score(log_probs, length):
    factor = 1.
    lp = L.pow((5. + L.cast(length, 'float32')) / 6., factor)
    return log_probs / lp
Пример #21
0
                size=8,
                param_attr=ParamAttr(name="fc01_weights"),
                bias_attr=ParamAttr(name="fc01_bias"))
            fc02_out_tensor = fluid.layers.fc(
                input=fc01_out_tensor,
                size=8,
                param_attr=ParamAttr(name="fc02_weights"),
                bias_attr=ParamAttr(name="fc02_bias"))

            # 建立损失函数
            y_true = P.data(name='y_true',
                            shape=[-1, 8],
                            append_batch_size=False,
                            dtype='float32')
            # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。
            mseloss = P.pow(y_true - fc02_out_tensor, 2)
            # mseloss = (y_true - bn01_out_tensor) ** 2   # 也可以用python里的运算符**。
            mseloss = P.reduce_mean(mseloss)  # 再求平均,即mse损失函数

            # 优化器,选SGD
            optimizer = fluid.optimizer.SGD(learning_rate=lr)
            optimizer.minimize(mseloss)

    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, startup_prog):
        with fluid.unique_name.guard():
            # 重新建立一次网络,用相同的张量名,不用写损失层
            inputs = P.data(name='input_1',
                            shape=[-1, 3],
                            append_batch_size=False,
                            dtype='float32')
Пример #22
0
                stride=1,
                padding=1,
                param_attr=ParamAttr(name="conv02_weights"),
                bias_attr=ParamAttr(name="conv02_bias"))
            in_name = "in02"
            in02_out_tensor = innorm(conv02_out_tensor, name=in_name)
            act02_out_tensor = fluid.layers.leaky_relu(in02_out_tensor,
                                                       alpha=0.1)

            # 建立损失函数
            y_true = P.data(name='y_true',
                            shape=[-1, 8, 28, 28],
                            append_batch_size=False,
                            dtype='float32')
            # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。
            mseloss = P.pow(y_true - act02_out_tensor, 2)
            mseloss = P.reduce_mean(mseloss)  # 再求平均,即mse损失函数

            # 优化器,选SGD
            optimizer = fluid.optimizer.SGD(learning_rate=lr)
            optimizer.minimize(mseloss)

    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, startup_prog):
        with fluid.unique_name.guard():
            # 重新建立一次网络,用相同的张量名,不用写损失层
            inputs = P.data(name='input_1',
                            shape=[-1, 3, 28, 28],
                            append_batch_size=False,
                            dtype='float32')
            conv01_out_tensor = fluid.layers.conv2d(
Пример #23
0
 def get_norm(indegree):
     float_degree = L.cast(indegree, dtype="float32")
     float_degree = L.clamp(float_degree, min=1.0)
     norm = L.pow(float_degree, factor=-0.5)
     return norm
Пример #24
0
    def __iou_loss(self, pred, targets, positive_mask, weights=None):
        """
        Calculate the loss for location prediction
        Args:
            pred          (Variables): bounding boxes prediction
            targets       (Variables): targets for positive samples
            positive_mask (Variables): mask of positive samples
            weights       (Variables): weights for each positive samples
        Return:
            loss (Varialbes): location loss
        """
        positive_mask = fluid.layers.reshape(positive_mask,
                                             (-1, ))  # [批大小*所有格子数, ]
        plw = pred[:, 0] * positive_mask  # [批大小*所有格子数, ], 预测的l
        pth = pred[:, 1] * positive_mask  # [批大小*所有格子数, ], 预测的t
        prw = pred[:, 2] * positive_mask  # [批大小*所有格子数, ], 预测的r
        pbh = pred[:, 3] * positive_mask  # [批大小*所有格子数, ], 预测的b
        tlw = targets[:, 0] * positive_mask  # [批大小*所有格子数, ], 真实的l
        tth = targets[:, 1] * positive_mask  # [批大小*所有格子数, ], 真实的t
        trw = targets[:, 2] * positive_mask  # [批大小*所有格子数, ], 真实的r
        tbh = targets[:, 3] * positive_mask  # [批大小*所有格子数, ], 真实的b
        tlw.stop_gradient = True
        trw.stop_gradient = True
        tth.stop_gradient = True
        tbh.stop_gradient = True
        area_target = (tlw + trw) * (tth + tbh)  # [批大小*所有格子数, ], 真实的面积
        area_predict = (plw + prw) * (pth + pbh)  # [批大小*所有格子数, ], 预测的面积
        ilw = fluid.layers.elementwise_min(plw, tlw)  # [批大小*所有格子数, ], 相交矩形的l
        irw = fluid.layers.elementwise_min(prw, trw)  # [批大小*所有格子数, ], 相交矩形的r
        ith = fluid.layers.elementwise_min(pth, tth)  # [批大小*所有格子数, ], 相交矩形的t
        ibh = fluid.layers.elementwise_min(pbh, tbh)  # [批大小*所有格子数, ], 相交矩形的b
        clw = fluid.layers.elementwise_max(plw, tlw)  # [批大小*所有格子数, ], 包围矩形的l
        crw = fluid.layers.elementwise_max(prw, trw)  # [批大小*所有格子数, ], 包围矩形的r
        cth = fluid.layers.elementwise_max(pth, tth)  # [批大小*所有格子数, ], 包围矩形的t
        cbh = fluid.layers.elementwise_max(pbh, tbh)  # [批大小*所有格子数, ], 包围矩形的b
        area_inter = (ilw + irw) * (ith + ibh)  # [批大小*所有格子数, ], 相交矩形的面积
        ious = (area_inter + 1.0) / (area_predict + area_target - area_inter +
                                     1.0)
        ious = ious * positive_mask
        if self.iou_loss_type.lower() == "linear_iou":
            loss = 1.0 - ious
        elif self.iou_loss_type.lower() == "giou":
            area_uniou = area_predict + area_target - area_inter
            area_circum = (clw + crw) * (cth + cbh) + 1e-7
            giou = ious - (area_circum - area_uniou) / area_circum
            loss = 1.0 - giou
        elif self.iou_loss_type.lower() == "iou":
            loss = 0.0 - fluid.layers.log(ious)
        elif self.iou_loss_type.lower() == "ciou":
            # 预测的矩形。cx_cy_w_h格式,以格子中心点为坐标原点。
            pred_cx = (prw - plw) * 0.5
            pred_cy = (pbh - pth) * 0.5
            pred_w = (plw + prw)
            pred_h = (pth + pbh)
            pred_cx = L.reshape(pred_cx, (-1, 1))
            pred_cy = L.reshape(pred_cy, (-1, 1))
            pred_w = L.reshape(pred_w, (-1, 1))
            pred_h = L.reshape(pred_h, (-1, 1))
            pred_cx_cy_w_h = L.concat([pred_cx, pred_cy, pred_w, pred_h],
                                      -1)  # [批大小*所有格子数, 4]

            # 真实的矩形。cx_cy_w_h格式,以格子中心点为坐标原点。
            true_cx = (trw - tlw) * 0.5
            true_cy = (tbh - tth) * 0.5
            true_w = (tlw + trw)
            true_h = (tth + tbh)
            true_cx = L.reshape(true_cx, (-1, 1))
            true_cy = L.reshape(true_cy, (-1, 1))
            true_w = L.reshape(true_w, (-1, 1))
            true_h = L.reshape(true_h, (-1, 1))
            true_cx_cy_w_h = L.concat([true_cx, true_cy, true_w, true_h],
                                      -1)  # [批大小*所有格子数, 4]

            # 预测的矩形。x0y0x1y1格式,以格子中心点为坐标原点。
            boxes1_x0y0x1y1 = L.concat([
                pred_cx_cy_w_h[:, :2] - pred_cx_cy_w_h[:, 2:] * 0.5,
                pred_cx_cy_w_h[:, :2] + pred_cx_cy_w_h[:, 2:] * 0.5
            ],
                                       axis=-1)

            # 真实的矩形。x0y0x1y1格式,以格子中心点为坐标原点。
            boxes2_x0y0x1y1 = L.concat([
                true_cx_cy_w_h[:, :2] - true_cx_cy_w_h[:, 2:] * 0.5,
                true_cx_cy_w_h[:, :2] + true_cx_cy_w_h[:, 2:] * 0.5
            ],
                                       axis=-1)

            # 包围矩形的左上角坐标、右下角坐标,shape 都是 (批大小*所有格子数, 2)
            enclose_left_up = L.elementwise_min(boxes1_x0y0x1y1[:, :2],
                                                boxes2_x0y0x1y1[:, :2])
            enclose_right_down = L.elementwise_max(boxes1_x0y0x1y1[:, 2:],
                                                   boxes2_x0y0x1y1[:, 2:])

            # 包围矩形的对角线的平方
            enclose_wh = enclose_right_down - enclose_left_up
            enclose_c2 = L.pow(enclose_wh[:, 0], 2) + L.pow(
                enclose_wh[:, 1], 2)

            # 两矩形中心点距离的平方
            p2 = L.pow(pred_cx_cy_w_h[:, 0] - true_cx_cy_w_h[:, 0], 2) \
                 + L.pow(pred_cx_cy_w_h[:, 1] - true_cx_cy_w_h[:, 1], 2)

            # 增加av。加上除0保护防止nan。
            atan1 = L.atan(pred_cx_cy_w_h[:, 2] /
                           (pred_cx_cy_w_h[:, 3] + 1e-9))
            atan2 = L.atan(true_cx_cy_w_h[:, 2] /
                           (true_cx_cy_w_h[:, 3] + 1e-9))
            v = 4.0 * L.pow(atan1 - atan2, 2) / (math.pi**2)
            a = v / (1 - ious + v)
            ciou = ious - 1.0 * p2 / (enclose_c2 + 1e-9) - 1.0 * a * v
            loss = 1.0 - ciou
        else:
            raise KeyError
        loss = fluid.layers.reshape(loss, (-1, 1))  # [批大小*所有格子数, 1]
        if weights is not None:
            loss = loss * weights
        return loss
Пример #25
0
        def beam_search():
            """Beam search function"""

            max_len = layers.fill_constant(shape=[1],
                                           dtype=start_tokens.dtype,
                                           value=self.max_out_len,
                                           force_cpu=True)
            min_len = layers.fill_constant(shape=[1],
                                           dtype=start_tokens.dtype,
                                           value=self.min_out_len)
            neg_inf = layers.fill_constant(shape=[1],
                                           dtype='float32',
                                           value=-INF)
            step_idx = layers.fill_constant(shape=[1],
                                            dtype=start_tokens.dtype,
                                            value=0,
                                            force_cpu=True)
            step_next_idx = layers.fill_constant(shape=[1],
                                                 dtype=start_tokens.dtype,
                                                 value=1,
                                                 force_cpu=True)
            cond = layers.less_than(x=step_idx,
                                    y=max_len)  # default force_cpu=True
            while_op = layers.While(cond)
            # array states will be stored for each step.
            ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)),
                                     step_idx)
            scores = layers.array_write(init_scores, step_idx)
            # cell states will be overwrited at each step.
            # caches contains states of history steps in decoder self-attention
            # and static encoder output projections in encoder-decoder attention
            # to reduce redundant computation.
            caches = [
                {
                    "k":  # for self attention
                        layers.fill_constant_batch_size_like(
                            input=start_tokens,
                            shape=[-1, self._n_head, 0, self._emb_size // self._n_head],
                            dtype=enc_words_output.dtype,
                            value=0),
                    "v":  # for self attention
                        layers.fill_constant_batch_size_like(
                            input=start_tokens,
                            shape=[-1, self._n_head, 0, self._emb_size // self._n_head],
                            dtype=enc_words_output.dtype,
                            value=0),
                    "static_k_word":  # for encoder-decoder attention
                        layers.create_tensor(dtype=enc_words_output.dtype),
                    "static_v_word":  # for encoder-decoder attention
                        layers.create_tensor(dtype=enc_words_output.dtype),
                    "static_k_sent":  # for encoder-decoder attention
                        layers.create_tensor(dtype=enc_sents_output.dtype),
                    "static_v_sent":  # for encoder-decoder attention
                        layers.create_tensor(dtype=enc_sents_output.dtype)
                } for i in range(self._dec_n_layer)
            ]

            trigram_blocking = TrigramBlocking(start_tokens,
                                               self.tokenizer,
                                               use_fp16=self._use_fp16,
                                               beam_size=self.beam_size)

            with while_op.block():
                pre_ids = layers.array_read(array=ids, i=step_idx)
                pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
                # Since beam_search_op dosen't enforce pre_ids' shape, we can do
                # inplace reshape here which actually change the shape of pre_ids.
                # pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
                pre_scores = layers.array_read(array=scores, i=step_idx)
                # gather cell states corresponding to selected parent
                pre_src_words_attn_bias = layers.gather(
                    tgt_src_words_attn_bias, index=parent_idx)
                pre_src_sents_attn_bias = layers.gather(
                    tgt_src_sents_attn_bias, index=parent_idx)
                pre_graph_attn_bias = layers.gather(graph_attn_bias,
                                                    index=parent_idx)
                pre_pos = layers.elementwise_mul(
                    x=layers.fill_constant_batch_size_like(
                        input=
                        pre_src_sents_attn_bias,  # cann't use lod tensor here
                        value=1,
                        shape=[-1, 1, 1],
                        dtype=pre_ids.dtype),
                    y=step_idx,
                    axis=0)

                logits = self.decode(
                    dec_input=(pre_ids, pre_pos, None, pre_src_words_attn_bias,
                               pre_src_sents_attn_bias, pre_graph_attn_bias),
                    enc_words_output=enc_words_output,
                    enc_sents_output=enc_sents_output,
                    caches=caches,
                    gather_idx=parent_idx)

                # prevent generating end token if length less than min_out_len
                eos_index = layers.fill_constant(
                    shape=[layers.shape(logits)[0]],
                    dtype='int64',
                    value=self.eos_idx)
                eos_index = fluid.one_hot(eos_index, depth=self.voc_size)
                less_cond = layers.cast(layers.less_than(x=step_idx,
                                                         y=min_len),
                                        dtype='float32')
                less_val = layers.elementwise_mul(less_cond, neg_inf)
                eos_val = layers.elementwise_mul(eos_index, less_val, axis=0)
                revised_logits = layers.elementwise_add(logits,
                                                        eos_val,
                                                        axis=0)

                # topK reduction across beams, also contain special handle of
                # end beams and end sentences(batch reduction)
                topk_scores, topk_indices = layers.topk(
                    input=layers.softmax(revised_logits), k=self.beam_size)

                # Roll-Back previous-scores for length-penalty
                # previous-scores has been length-penaltied, before this timestep length-penalty, need roll-back
                # because of doing this, we need store the length-penaltied score in `scores`
                # while calculating use the un-penaltied score
                # -> safe for step_idx == 0 (initialization state), because previous-score == 0
                pre_timestep_length_penalty = fluid.layers.pow(
                    ((5.0 + fluid.layers.cast(step_idx, pre_scores.dtype)) /
                     6.0), self.len_penalty)
                pre_scores_wo_len_penalty = fluid.layers.elementwise_mul(
                    pre_scores, pre_timestep_length_penalty)

                # calc trigram-blocking delta scores for current alive sequence
                if self.block_trigram:
                    trigram_blocking.update_seq(pre_ids, parent_idx)
                    trigram_blocking.expand_cand_seq(topk_indices)
                    fluid.layers.py_func(
                        func=trigram_blocking.blocking_forward,
                        x=[
                            trigram_blocking.cand_seq,
                            trigram_blocking.id2is_full_token
                        ],
                        out=trigram_blocking.delta_score_out,
                        backward_func=None)
                    layers.Print(trigram_blocking.delta_score_out,
                                 summarize=100,
                                 message="trigram_blocking.delta_score_out")
                    pre_scores_wo_len_penalty = fluid.layers.elementwise_add(
                        x=trigram_blocking.delta_score_out,
                        y=pre_scores_wo_len_penalty,
                        axis=0)
                # => [N, topk]

                accu_scores = layers.elementwise_add(
                    x=layers.log(topk_scores),
                    y=pre_scores_wo_len_penalty,
                    axis=0)

                cur_timestep_length_penalty = layers.pow(
                    ((5.0 + layers.cast(step_next_idx, accu_scores.dtype)) /
                     6.0), self.len_penalty)
                curr_scores = layers.elementwise_div(
                    accu_scores, cur_timestep_length_penalty)

                # beam_search op uses lod to differentiate branches.
                curr_scores = layers.lod_reset(curr_scores, pre_ids)
                topk_indices = layers.lod_reset(topk_indices, pre_ids)
                selected_ids, selected_scores, gather_idx = layers.beam_search(
                    pre_ids=pre_ids,
                    pre_scores=pre_scores,
                    ids=topk_indices,
                    scores=curr_scores,
                    beam_size=self.beam_size,
                    end_id=self.eos_idx,
                    return_parent_idx=True)

                layers.increment(x=step_idx, value=1.0, in_place=True)
                layers.increment(x=step_next_idx, value=1.0, in_place=True)
                # cell states(caches) have been updated in wrap_decoder,
                # only need to update beam search states here.
                layers.array_write(selected_ids, i=step_idx, array=ids)
                layers.array_write(selected_scores, i=step_idx, array=scores)
                layers.assign(gather_idx, parent_idx)
                layers.assign(pre_src_words_attn_bias, tgt_src_words_attn_bias)
                layers.assign(pre_src_sents_attn_bias, tgt_src_sents_attn_bias)
                layers.assign(pre_graph_attn_bias, graph_attn_bias)

                length_cond = layers.less_than(x=step_idx, y=max_len)
                finish_cond = layers.logical_not(
                    layers.is_empty(x=selected_ids))
                layers.logical_and(x=length_cond, y=finish_cond, out=cond)

            finished_ids, finished_scores = layers.beam_search_decode(
                ids, scores, beam_size=self.beam_size, end_id=self.eos_idx)

            return finished_ids, finished_scores
Пример #26
0
def mse_loss(pred, label):
    loss = layers.pow((pred - label), 2)
    loss = layers.mean(loss)
    return loss
Пример #27
0
    def _calc_obj_loss(self, output, obj, tobj, gt_box, batch_size, anchors,
                       num_classes, downsample, ignore_thresh, scale_x_y):
        # A prediction bbox overlap any gt_bbox over ignore_thresh,
        # objectness loss will be ignored, process as follows:

        # 1. get pred bbox, which is same with YOLOv3 infer mode, use yolo_box here
        # NOTE: img_size is set as 1.0 to get noramlized pred bbox
        bbox, prob = fluid.layers.yolo_box(x=output,
                                           img_size=fluid.layers.ones(
                                               shape=[batch_size, 2],
                                               dtype="int32"),
                                           anchors=anchors,
                                           class_num=num_classes,
                                           conf_thresh=0.,
                                           downsample_ratio=downsample,
                                           clip_bbox=False,
                                           scale_x_y=scale_x_y)

        # 2. split pred bbox and gt bbox by sample, calculate IoU between pred bbox
        #    and gt bbox in each sample
        if batch_size > 1:
            # bbox:    [N, 3*n_grid*n_grid, 4]
            # gt_box:  [N, 50, 4]
            preds = fluid.layers.split(
                bbox, batch_size, dim=0)  # 里面每个元素形状是[1, 3*n_grid*n_grid, 4]
            gts = fluid.layers.split(gt_box, batch_size,
                                     dim=0)  # 里面每个元素形状是[1, 50, 4]
        else:
            preds = [bbox]
            gts = [gt_box]
            probs = [prob]
        ious = []
        for pred, gt in zip(preds, gts):
            # pred:   [1, 3*n_grid*n_grid, 4]
            # gt:     [1, 50, 4]

            def box_xywh2xyxy(box):
                x = box[:, 0]
                y = box[:, 1]
                w = box[:, 2]
                h = box[:, 3]
                return fluid.layers.stack([
                    x - w / 2.,
                    y - h / 2.,
                    x + w / 2.,
                    y + h / 2.,
                ],
                                          axis=1)

            pred = fluid.layers.squeeze(pred, axes=[0])  # [3*n_grid*n_grid, 4]
            gt = box_xywh2xyxy(fluid.layers.squeeze(
                gt, axes=[0]))  # [50, 4]  且转换成x0y0x1y1格式
            ious.append(fluid.layers.iou_similarity(
                pred, gt))  # [3*n_grid*n_grid, 50]   两组矩形两两之间的iou

        iou = fluid.layers.stack(
            ious, axis=0)  # [N, 3*n_grid*n_grid, 50]   两组矩形两两之间的iou
        # 3. Get iou_mask by IoU between gt bbox and prediction bbox,
        #    Get obj_mask by tobj(holds gt_score), calculate objectness loss

        max_iou = fluid.layers.reduce_max(
            iou, dim=-1)  # [N, 3*n_grid*n_grid]   预测框和本图片所有gt的最高iou
        iou_mask = fluid.layers.cast(
            max_iou <= ignore_thresh,
            dtype="float32")  # [N, 3*n_grid*n_grid]   候选负样本处为1
        if self.match_score:
            max_prob = fluid.layers.reduce_max(prob, dim=-1)
            iou_mask = iou_mask * fluid.layers.cast(max_prob <= 0.25,
                                                    dtype="float32")
        output_shape = fluid.layers.shape(output)
        an_num = len(anchors) // 2
        iou_mask = fluid.layers.reshape(
            iou_mask, (-1, an_num, output_shape[2],
                       output_shape[3]))  # [N, 3, n_grid, n_grid]   候选负样本处为1
        iou_mask.stop_gradient = True

        # NOTE: tobj holds gt_score, obj_mask holds object existence mask
        obj_mask = fluid.layers.cast(
            tobj > 0., dtype="float32")  # [N, 3, n_grid, n_grid]  正样本处为1
        obj_mask.stop_gradient = True

        noobj_mask = (1.0 -
                      obj_mask) * iou_mask  # [N, 3, n_grid, n_grid]  负样本处为1
        noobj_mask.stop_gradient = True

        # For positive objectness grids, objectness loss should be calculated
        # For negative objectness grids, objectness loss is calculated only iou_mask == 1.0
        pred_conf = L.sigmoid(obj)
        if self.focalloss_on_obj:
            alpha = self.focalloss_alpha
            gamma = self.focalloss_gamma
            pos_loss = tobj * (0 - L.log(pred_conf + 1e-9)) * L.pow(
                1 - pred_conf, gamma) * alpha
            neg_loss = noobj_mask * (0 - L.log(1 - pred_conf + 1e-9)) * L.pow(
                pred_conf, gamma) * (1 - alpha)
        else:
            pos_loss = tobj * (0 - L.log(pred_conf + 1e-9))
            neg_loss = noobj_mask * (0 - L.log(1 - pred_conf + 1e-9))
        pos_loss = fluid.layers.reduce_sum(pos_loss, dim=[1, 2, 3])
        neg_loss = fluid.layers.reduce_sum(neg_loss, dim=[1, 2, 3])

        return pos_loss, neg_loss
Пример #28
0
def get_norm(indegree):
    """Get Laplacian Normalization"""
    float_degree = L.cast(indegree, dtype="float32")
    float_degree = L.clamp(float_degree, min=1.0)
    norm = L.pow(float_degree, factor=-0.5)
    return norm
            conv02_out_tensor = fluid.layers.conv2d(
                input=conv01_out_tensor,
                num_filters=8,
                filter_size=3,
                stride=1,
                padding=1,
                param_attr=ParamAttr(name="conv02_weights"),
                bias_attr=ParamAttr(name="conv02_bias"))

            # 建立损失函数
            y_true = P.data(name='y_true',
                            shape=[-1, 8, 28, 28],
                            append_batch_size=False,
                            dtype='float32')
            # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。
            mseloss = P.pow(y_true - conv02_out_tensor, 2)
            mseloss = P.reduce_mean(mseloss)  # 再求平均,即mse损失函数

            # 优化器,选SGD
            optimizer = fluid.optimizer.SGD(learning_rate=lr)
            optimizer.minimize(mseloss)

    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, startup_prog):
        with fluid.unique_name.guard():
            # 重新建立一次网络,用相同的张量名,不用写损失层
            inputs = P.data(name='input_1',
                            shape=[-1, 3, 28, 28],
                            append_batch_size=False,
                            dtype='float32')
            conv01_out_tensor = fluid.layers.conv2d(
        print('------------------ step %d ------------------' % step)
        # ==================== train ====================
        batch_data = np.random.normal(loc=0, scale=1,
                                      size=(2, 3, 28, 28)).astype(np.float32)
        y_true_arr = np.random.normal(loc=0, scale=1,
                                      size=(2, 8, 28, 28)).astype(np.float32)

        batch_data2 = paddle.to_tensor(batch_data, place=place)
        y_true_arr2 = paddle.to_tensor(y_true_arr, place=place)

        paddle_conv01_out, paddle_bn01_out, paddle_act01_out, paddle_conv02_out, paddle_bn02_out, paddle_act02_out = model(
            batch_data2)

        # 建立损失函数
        # 先把差值逐项平方,可以用P.pow()这个op,也可以用python里的运算符**。
        mseloss = P.pow(y_true_arr2 - paddle_act02_out, 2)
        mseloss = P.reduce_mean(mseloss)  # 再求平均,即mse损失函数

        paddle_mseloss_out = mseloss.numpy()
        paddle_bn01_out = paddle_bn01_out.numpy()
        paddle_bn02_out = paddle_bn02_out.numpy()

        # 更新权重
        mseloss.backward()
        optimizer.step()
        optimizer.clear_grad()

        print('train_forward:')
        # python代码模拟训练过程,与paddle的输出校验。我们希望和飞桨有相同的输出。
        my_conv01_out = conv01.train_forward(batch_data)
        my_bn01_out = bn01.train_forward(my_conv01_out)