示例#1
0
 def call(self, inputs, **kwargs):
     if self.mode == self.MODE_EXPAND:
         if K.dtype(inputs) != 'int32':
             inputs = K.cast(inputs, 'int32')
         return K.gather(
             self.embeddings,
             K.minimum(K.maximum(inputs, -self.input_dim), self.input_dim) +
             self.input_dim,
         )
     input_shape = K.shape(inputs)
     if self.mode == self.MODE_ADD:
         batch_size, seq_len, output_dim = input_shape[0], input_shape[
             1], input_shape[2]
     else:
         batch_size, seq_len, output_dim = input_shape[0], input_shape[
             1], self.output_dim
     pos_embeddings = K.tile(
         K.expand_dims(self.embeddings[:seq_len, :self.output_dim], axis=0),
         [batch_size, 1, 1],
     )
     if self.mode == self.MODE_ADD:
         return inputs + pos_embeddings
     return K.concatenate([inputs, pos_embeddings], axis=-1)
示例#2
0
    def _preprocess_conv2d_input(input_tensor, data_format):
        """Transpose and cast the input before the conv2d.

        Parameters
        ----------
        input_tensor: tensor
            The input that requires transposing and casting
        data_format: str
            `"channels_last"` or `"channels_first"`

        Returns
        -------
        tensor
            The transposed and cast input tensor
        """
        if K.dtype(input_tensor) == "float64":
            input_tensor = tf.cast(input_tensor, "float32")
        if data_format == "channels_first":
            # Tensorflow uses the last dimension as channel dimension, instead of the 2nd one.
            # Theano input shape: (samples, input_depth, rows, cols)
            # Tensorflow input shape: (samples, rows, cols, input_depth)
            input_tensor = tf.transpose(input_tensor, (0, 2, 3, 1))
        return input_tensor
 def call(self, inputs, mask=None):
     
     inputs, pos_input = inputs
     batch_size, seq_len, output_dim = self._get_shape(inputs)
     
     if self.mode == self.MODE_EXPAND:
         pos_input = inputs
     
     if K.dtype(pos_input) != K.floatx():
         pos_input = K.cast(pos_input, K.floatx())
     
     evens = K.arange(0, output_dim // 2) * 2
     odds = K.arange(0, output_dim // 2) * 2 + 1
     even_embd = K.sin(
         K.dot(
             K.expand_dims(pos_input, -1),
             K.expand_dims(1.0 / K.pow(
                 10000.0,
                 K.cast(evens, K.floatx()) / K.cast(output_dim, K.floatx())
             ), 0)
         )
     )
     odd_embd = K.cos(
         K.dot(
             K.expand_dims(pos_input, -1),
             K.expand_dims(1.0 / K.pow(
                 10000.0, K.cast((odds - 1), K.floatx()) / K.cast(output_dim, K.floatx())
             ), 0)
         )
     )
     embd = K.stack([even_embd, odd_embd], axis=-1)
     output = K.reshape(embd, [-1, seq_len, output_dim])
     if self.mode == self.MODE_CONCAT:
         output = K.concatenate([inputs, output], axis=-1)
     if self.mode == self.MODE_ADD:
         output += inputs
     return output
示例#4
0
    def new_get_updates(self, loss, params):
        self.slow_params = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params
        ]
        update_iter = [K.update_add(self.lookahead_iterations, 1)]

        def just_copy_func():
            copy_slow_params = [
                K.update(p, q) for p, q in zip(self.slow_params, params)
            ]
            return tf.group(*copy_slow_params)

        def update_func():
            update_params = [
                K.update(q, p * self.slow_ratio + q * self.fast_ratio)
                for p, q in zip(self.slow_params, params)
            ]
            with tf.control_dependencies(update_params):
                reset_slow_params = [
                    K.update(p, q) for p, q in zip(self.slow_params, params)
                ]
            return tf.group(*(reset_slow_params + update_iter))

        def just_iter_func():
            return tf.group(*update_iter)

        # copy params to self.slow_params at iteration 0
        copy_switch = K.equal(self.lookahead_iterations, 0)
        copy_params = [K.switch(copy_switch, just_copy_func, tf.no_op())]
        with tf.control_dependencies(copy_params):
            # do the 'slow weights update' every 'k' iterations
            update_switch = K.equal(self.lookahead_iterations % self.k, 0)
            with tf.control_dependencies(self.orig_get_updates(loss, params)):
                self.updates = [
                    K.switch(update_switch, update_func, just_iter_func)
                ]
                return self.updates
示例#5
0
    def _compute_target_mask(self, inputs, mask=None):
        input_shape = K.shape(inputs)
        input_type = K.dtype(inputs)

        mask_threshold = K.constant(1e8, dtype=input_type)
        
        channel_num = int(inputs.shape[-1])
        channel_dim = K.prod(input_shape[:-1])
        masked_inputs = inputs
        if mask is not None:
            masked_inputs = K.switch(
                K.cast(mask, K.floatx()) > 0.5,
                masked_inputs,
                K.ones_like(masked_inputs, dtype=input_type) * mask_threshold
            )
        norm = K.abs(masked_inputs)
        channeled_norm = K.transpose(K.reshape(norm, (channel_dim, channel_num)))
        weight_num = K.sum(
            K.reshape(K.cast(masked_inputs < mask_threshold, K.floatx()), (channel_dim, channel_num)),
            axis=0,
        )
        indices = K.stack(
            [
                K.arange(channel_num, dtype='int32'),
                K.cast(self.target_rate * weight_num, dtype='int32') - 1,
            ],
            axis=-1,
        )
        threshold = -tf.gather_nd(tf.nn.top_k(-channeled_norm, k=K.max(indices[:, 1]) + 1).values, indices)
        
        threshold = K.reshape(tf.tile(threshold, [channel_dim]), input_shape)
        target_mask = K.switch(
            norm <= threshold,
            K.ones_like(inputs, dtype=K.floatx()),
            K.zeros_like(inputs, dtype=K.floatx()),
        )
        return target_mask
示例#6
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
示例#7
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self._iterations, 1)]

        lr = self._lr

        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self._iterations, K.dtype(self.decay))))

        t = K.cast(self._iterations, K.floatx()) + 1

        if self.initial_total_steps > 0:
            warmup_steps = self.total_steps * self.warmup_proportion
            decay_steps = self.total_steps - warmup_steps
            lr = K.switch(
                t <= warmup_steps,
                lr * (t / warmup_steps),
                lr * (1.0 - K.minimum(t, decay_steps) / decay_steps),
            )

        ms = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i))
            for (i, p) in enumerate(params)
        ]
        vs = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i))
            for (i, p) in enumerate(params)
        ]

        if self.amsgrad:
            vhats = [
                K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='vhat_' + str(i)) for (i, p) in enumerate(params)
            ]
        else:
            vhats = [
                K.zeros(1, name='vhat_' + str(i)) for i in range(len(params))
            ]

        self._weights = [self._iterations] + ms + vs + vhats

        beta_1_t = K.pow(self.beta_1, t)
        beta_2_t = K.pow(self.beta_2, t)

        sma_inf = 2.0 / (1.0 - self.beta_2) - 1.0
        sma_t = sma_inf - 2.0 * t * beta_2_t / (1.0 - beta_2_t)

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            m_corr_t = m_t / (1.0 - beta_1_t)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                v_corr_t = K.sqrt(vhat_t / (1.0 - beta_2_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                v_corr_t = K.sqrt(v_t / (1.0 - beta_2_t) + self.epsilon)

            r_t = K.sqrt((sma_t - 4.0) / (sma_inf - 4.0) * (sma_t - 2.0) /
                         (sma_inf - 2.0) * sma_inf / sma_t)

            p_t = K.switch(sma_t > 5, r_t * m_corr_t / v_corr_t, m_corr_t)

            if self.initial_weight_decay > 0:
                p_t += self.weight_decay * p

            p_t = p - lr * p_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
 def call(self, inputs):
     if K.dtype(inputs) != 'float32':
         inputs = K.cast(inputs, 'float32')
     inner_out = K.relu(K.dot(inputs, self.weights_inner) + self.bais_inner)
     outputs = K.dot(inner_out, self.weights_out) + self.bais_out
     return outputs
示例#9
0
def yolo_loss(args, input_shape, anchors, anchors_mask, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False):
    num_layers = len(anchors_mask)
    #---------------------------------------------------------------------------------------------------#
    #   将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
    #   y_true是一个列表,包含三个特征层,shape分别为:
    #   (m,13,13,3,85)
    #   (m,26,26,3,85)
    #   (m,52,52,3,85)
    #   yolo_outputs是一个列表,包含三个特征层,shape分别为:
    #   (m,13,13,3,85)
    #   (m,26,26,3,85)
    #   (m,52,52,3,85)
    #---------------------------------------------------------------------------------------------------#
    y_true          = args[num_layers:]
    yolo_outputs    = args[:num_layers]

    #-----------------------------------------------------------#
    #   得到input_shpae为416,416 
    #-----------------------------------------------------------#
    input_shape = K.cast(input_shape, K.dtype(y_true[0]))
    #-----------------------------------------------------------#
    #   得到网格的shape为[13,13]; [26,26]; [52,52]
    #-----------------------------------------------------------#
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]

    #-----------------------------------------------------------#
    #   取出每一张图片
    #   m的值就是batch_size
    #-----------------------------------------------------------#
    m = K.shape(yolo_outputs[0])[0]

    loss    = 0
    num_pos = 0
    #---------------------------------------------------------------------------------------------------#
    #   y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #   yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #---------------------------------------------------------------------------------------------------#
    for l in range(num_layers):
        #-----------------------------------------------------------#
        #   以第一个特征层(m,13,13,3,85)为例子
        #   取出该特征层中存在目标的点的位置。(m,13,13,3,1)
        #-----------------------------------------------------------#
        object_mask = y_true[l][..., 4:5]
        #-----------------------------------------------------------#
        #   取出其对应的种类(m,13,13,3,80)
        #-----------------------------------------------------------#
        true_class_probs = y_true[l][..., 5:]
        if label_smoothing:
            true_class_probs = _smooth_labels(true_class_probs, label_smoothing)

        #-----------------------------------------------------------#
        #   将yolo_outputs的特征层输出进行处理、获得四个返回值
        #   其中:
        #   grid        (13,13,1,2) 网格坐标
        #   raw_pred    (m,13,13,3,85) 尚未处理的预测结果
        #   pred_xy     (m,13,13,3,2) 解码后的中心坐标
        #   pred_wh     (m,13,13,3,2) 解码后的宽高坐标
        #-----------------------------------------------------------#
        grid, raw_pred, pred_xy, pred_wh = get_anchors_and_decode(yolo_outputs[l],
             anchors[anchors_mask[l]], num_classes, input_shape, calc_loss=True)
        
        #-----------------------------------------------------------#
        #   pred_box是解码后的预测的box的位置
        #   (m,13,13,3,4)
        #-----------------------------------------------------------#
        pred_box = K.concatenate([pred_xy, pred_wh])

        #-----------------------------------------------------------#
        #   找到负样本群组,第一步是创建一个数组,[]
        #-----------------------------------------------------------#
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        
        #-----------------------------------------------------------#
        #   对每一张图片计算ignore_mask
        #-----------------------------------------------------------#
        def loop_body(b, ignore_mask):
            #-----------------------------------------------------------#
            #   取出n个真实框:n,4
            #-----------------------------------------------------------#
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            #-----------------------------------------------------------#
            #   计算预测框与真实框的iou
            #   pred_box    13,13,3,4 预测框的坐标
            #   true_box    n,4 真实框的坐标
            #   iou         13,13,3,n 预测框和真实框的iou
            #-----------------------------------------------------------#
            iou = box_iou(pred_box[b], true_box)

            #-----------------------------------------------------------#
            #   best_iou    13,13,3 每个特征点与真实框的最大重合程度
            #-----------------------------------------------------------#
            best_iou = K.max(iou, axis=-1)

            #-----------------------------------------------------------#
            #   判断预测框和真实框的最大iou小于ignore_thresh
            #   则认为该预测框没有与之对应的真实框
            #   该操作的目的是:
            #   忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了
            #   不适合当作负样本,所以忽略掉。
            #-----------------------------------------------------------#
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask

        #-----------------------------------------------------------#
        #   在这个地方进行一个循环、循环是对每一张图片进行的
        #-----------------------------------------------------------#
        _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])

        #-----------------------------------------------------------#
        #   ignore_mask用于提取出作为负样本的特征点
        #   (m,13,13,3)
        #-----------------------------------------------------------#
        ignore_mask = ignore_mask.stack()
        #   (m,13,13,3,1)
        ignore_mask = K.expand_dims(ignore_mask, -1)

        #-----------------------------------------------------------#
        #   真实框越大,比重越小,小框的比重更大。
        #-----------------------------------------------------------#
        box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        #-----------------------------------------------------------#
        #   计算Ciou loss
        #-----------------------------------------------------------#
        raw_true_box    = y_true[l][...,0:4]
        ciou            = box_ciou(pred_box, raw_true_box)
        ciou_loss       = object_mask * box_loss_scale * (1 - ciou)
        
        #------------------------------------------------------------------------------#
        #   如果该位置本来有框,那么计算1与置信度的交叉熵
        #   如果该位置本来没有框,那么计算0与置信度的交叉熵
        #   在这其中会忽略一部分样本,这些被忽略的样本满足条件best_iou<ignore_thresh
        #   该操作的目的是:
        #   忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了
        #   不适合当作负样本,所以忽略掉。
        #------------------------------------------------------------------------------#
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) + \
            (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        
        class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)

        #-----------------------------------------------------------#
        #   将所有损失求和
        #-----------------------------------------------------------#
        location_loss   = K.sum(ciou_loss)
        confidence_loss = K.sum(confidence_loss)
        class_loss      = K.sum(class_loss)
        #-----------------------------------------------------------#
        #   计算正样本数量
        #-----------------------------------------------------------#
        num_pos += tf.maximum(K.sum(K.cast(object_mask, tf.float32)), 1)
        loss    += location_loss + confidence_loss + class_loss

    loss = loss / num_pos
    return loss
def batched_yolo3_postprocess(args,
              anchors,
              num_classes,
              max_boxes=100,
              confidence=0.1,
              iou_threshold=0.4):
    """Postprocess for YOLOv3 model on given input and return filtered boxes."""

    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    image_shape = args[num_layers]

    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32

    batch_size = K.shape(image_shape)[0] # batch size, tensor
    # print("yolo_outputs",yolo_outputs)
    boxes = []
    box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = batched_yolo3_boxes_and_scores(yolo_outputs[l],
            anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    boxes = K.concatenate(boxes, axis=1)
    box_scores = K.concatenate(box_scores, axis=1)

    mask = box_scores >= confidence
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')

    def single_image_nms(b, batch_boxes, batch_scores, batch_classes):
        boxes_ = []
        scores_ = []
        classes_ = []
        for c in range(num_classes):
            # TODO: use keras backend instead of tf.
            class_boxes = tf.boolean_mask(boxes[b], mask[b, :, c])
            class_box_scores = tf.boolean_mask(box_scores[b, :, c], mask[b, :, c])
            nms_index = tf.image.non_max_suppression(
                class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
            class_boxes = K.gather(class_boxes, nms_index)
            class_box_scores = K.gather(class_box_scores, nms_index)
            classes = K.ones_like(class_box_scores, 'int32') * c
            boxes_.append(class_boxes)
            scores_.append(class_box_scores)
            classes_.append(classes)

        boxes_ = K.concatenate(boxes_, axis=0)
        scores_ = K.concatenate(scores_, axis=0)
        classes_ = K.concatenate(classes_, axis=0)

        batch_boxes = batch_boxes.write(b, boxes_)
        batch_scores = batch_scores.write(b, scores_)
        batch_classes = batch_classes.write(b, classes_)

        return b+1, batch_boxes, batch_scores, batch_classes

    batch_boxes = tf.TensorArray(K.dtype(boxes), size=1, dynamic_size=True)
    batch_scores = tf.TensorArray(K.dtype(box_scores), size=1, dynamic_size=True)
    batch_classes = tf.TensorArray(dtype=tf.int32, size=1, dynamic_size=True)
    _, batch_boxes, batch_scores, batch_classes = tf.while_loop(lambda b,*args: b<batch_size, single_image_nms, [0, batch_boxes, batch_scores, batch_classes])

    batch_boxes = batch_boxes.stack()
    batch_scores = batch_scores.stack()
    batch_classes = batch_classes.stack()

    return batch_boxes, batch_scores, batch_classes
示例#11
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)

        # first update the number of iterations
        self.updates = [K.update_add(self.iterations, 1)]
        
        if self.decay_epochs:
            ite_casted = K.cast(self.iterations, K.dtype(self.decay_epochs))
            hit_decay_epoch = K.any(K.equal(ite_casted, self.decay_epochs))
            
            #print(hit_decay_epoch)
            lr = K.switch(hit_decay_epoch, self.lr['all']*self.decay['all'],
                          self.lr['all'])

            #K.print_tensor(self.lr['all'])
            #a = K.switch(hit_decay_epoch, 
            #             K.print_tensor(self.lr['all'],message='Decays:'), 
            #             K.print_tensor(self.lr['all'],message=' '))


            self.updates.append(K.update(self.lr['all'],lr))

        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(s) for s in shapes]
        self.weights = [self.iterations] + moments
        #print(self.weights)
        

        for p, g, m in zip(params, grads, moments):
            #print("HEREEEE:", p.name, g, m)
            lrptrkey= set_pattern_find(p.name,self.lr.keys())
            if lrptrkey:
                if self.verbose>0:
                    print("Setting different learning rate for ", p.name, " : ", K.eval(self.lr[lrptrkey]))
                lr = self.lr[lrptrkey]
                dcptrkey=set_pattern_find(p.name,self.decay.keys())
                if self.decay_epochs and dcptrkey:
                    lr = K.switch(hit_decay_epoch, self.lr[lrptrkey]*self.decay[dcptrkey],
                                  self.lr[lrptrkey])
                    self.updates.append(K.update(self.lr[lrptrkey],lr))
                    if self.verbose>0:
                        print("Added decay to ", p.name, ": ", K.eval(lr),",",self.decay[dcptrkey])
                elif self.decay_epochs:
                    lr = K.switch(hit_decay_epoch, self.lr[lrptrkey]*self.decay['all'],self.lr[lrptrkey])
                    self.updates.append(K.update(self.lr[lrptrkey],lr))
                    if self.verbose>0:
                        print("Added decay to ", p.name, ": ", K.eval(lr),",",self.decay['all'])
                else:
                    lr = self.lr[lrptrkey]

            else:
                lr = self.lr['all']
            
            momptrkey = set_pattern_find(p.name,self.momentum.keys())
            if momptrkey:
                if self.verbose>0:
                    print("Setting different momentum for ", p.name, " , ", 
                          K.eval(self.momentum[momptrkey]))
                momentum = self.momentum[momptrkey]
            else:
                momentum = self.momentum['all'] 

            v = momentum * m - lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + momentum * (momentum * m - lr * g) - lr * g
            else:
                new_p = p + momentum * m - lr * g
            
            # CHANGE CLIP
            _to_tensor = K.tensorflow_backend._to_tensor
            _clip_by_val = K.tf.clip_by_value
            margin = K.mean(K.abs(p*K.constant(self.UPCLIP)))
            min_value = _to_tensor(p-margin, p.dtype.base_dtype)
            max_value = _to_tensor(p+margin, p.dtype.base_dtype)
            
            max_v = K.maximum(min_value, max_value)
            min_v = K.minimum(min_value, max_value)

            new_p = _clip_by_val(new_p, min_v, max_v)
             
            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
            clptrkey = set_pattern_find(p.name,self.clips.keys())
            if self.clips_val and clptrkey:
                if self.verbose>0:
                    print("Clipping variable",p.name," to ", self.clips[clptrkey])
                c = K.eval(self.clips[clptrkey])
                new_p = K.clip(new_p, c[0], c[1])
            #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum))
            self.updates.append(K.update(p, new_p))
        return self.updates
示例#12
0
    def __call__(self,
                 labels,
                 outputs,
                 anchors,
                 num_classes,
                 ignore_thresh=.5,
                 label_smoothing=0,
                 elim_grid_sense=True,
                 use_focal_loss=False,
                 use_focal_obj_loss=False,
                 use_softmax_loss=False,
                 use_giou_loss=False,
                 use_diou_loss=True):  # pylint: disable=R0915
        """
        YOLOv3 loss function.

        :param yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
        :param y_true: list of array, the output of preprocess_true_boxes
        :param anchors: array, shape=(N, 2), wh
        :param num_classes: integer
        :param ignore_thresh: float, the iou threshold whether to ignore object confidence loss
        :return loss: tensor, shape=(1,)
        """
        anchors = np.array(anchors).astype(float).reshape(-1, 2)
        num_layers = len(anchors) // 3  # default setting
        yolo_outputs = list(outputs.values())  # args[:num_layers]
        y_true = list(labels.values())  # args[num_layers:]

        anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
        scale_x_y = [1.05, 1.1, 1.2] if elim_grid_sense else [None, None, None]

        input_shape = K.cast(
            K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
        loss = 0
        total_location_loss = 0
        total_confidence_loss = 0
        total_class_loss = 0
        batch_size = K.shape(yolo_outputs[0])[0]  # batch size, tensor
        batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0]))

        for i in range(num_layers):
            object_mask = y_true[i][..., 4:5]
            true_class_probs = y_true[i][..., 5:]
            if label_smoothing:
                true_class_probs = self._smooth_labels(true_class_probs,
                                                       label_smoothing)
                true_objectness_probs = self._smooth_labels(
                    object_mask, label_smoothing)
            else:
                true_objectness_probs = object_mask

            raw_pred, pred_xy, pred_wh = self.yolo3_decode(
                yolo_outputs[i],
                anchors[anchor_mask[i]],
                num_classes,
                input_shape,
                scale_x_y=scale_x_y[i])
            pred_box = K.concatenate([pred_xy, pred_wh])

            box_loss_scale = 2 - y_true[i][..., 2:3] * y_true[i][..., 3:4]

            # Find ignore mask, iterate over each of batch.
            ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                         size=1,
                                         dynamic_size=True)
            object_mask_bool = K.cast(object_mask, 'bool')

            def loop_body(b, ignore_mask):
                true_box = tf.boolean_mask(y_true[i][b, ..., 0:4],
                                           object_mask_bool[b, ..., 0])
                iou = self.box_iou(pred_box[b], true_box)
                best_iou = K.max(iou, axis=-1)
                ignore_mask = ignore_mask.write(
                    b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
                return b + 1, ignore_mask

            _, ignore_mask = tf.while_loop(lambda b, *args: b < batch_size,
                                           loop_body, [0, ignore_mask])
            ignore_mask = ignore_mask.stack()
            ignore_mask = K.expand_dims(ignore_mask, -1)

            raw_pred = raw_pred + K.epsilon()
            if use_focal_obj_loss:
                # Focal loss for objectness confidence
                confidence_loss = self.sigmoid_focal_loss(
                    true_objectness_probs, raw_pred[..., 4:5])
            else:
                confidence_loss = (object_mask * K.binary_crossentropy(true_objectness_probs,
                                                                       raw_pred[...,4:5],
                                                                       from_logits=True)) \
                                  + ((1-object_mask) * ignore_mask * K.binary_crossentropy(object_mask,
                                                                                           raw_pred[...,4:5],
                                                                                           from_logits=True))

            if use_focal_loss:
                # Focal loss for classification score
                if use_softmax_loss:
                    class_loss = self.softmax_focal_loss(
                        true_class_probs, raw_pred[..., 5:])
                else:
                    class_loss = self.sigmoid_focal_loss(
                        true_class_probs, raw_pred[..., 5:])
            else:
                if use_softmax_loss:
                    # use softmax style classification output
                    class_loss = object_mask \
                                 * K.expand_dims(K.categorical_crossentropy(true_class_probs,
                                                                            raw_pred[...,5:],
                                                                            from_logits=True), axis=-1)
                else:
                    # use sigmoid style classification output
                    class_loss = object_mask \
                                 * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)

            raw_true_box = y_true[i][..., 0:4]
            diou = self.box_diou(raw_true_box, pred_box)
            diou_loss = object_mask * box_loss_scale * (1 - diou)
            diou_loss = K.sum(diou_loss) / batch_size_f
            location_loss = diou_loss

            confidence_loss = K.sum(confidence_loss) / batch_size_f
            class_loss = K.sum(class_loss) / batch_size_f
            loss += location_loss + confidence_loss + class_loss
            total_location_loss += location_loss
            total_confidence_loss += confidence_loss
            total_class_loss += class_loss

        loss = K.expand_dims(loss, axis=-1)

        return loss, total_location_loss, total_confidence_loss, total_class_loss
示例#13
0
def yolo_loss(args,
              anchors,
              num_classes,
              ignore_thresh=.5,
              label_smoothing=0.1,
              print_loss=False):

    # 一共有2层
    num_layers = len(anchors) // 3

    # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
    # y_true是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85)。
    # yolo_outputs是一个列表,包含两个特征层,shape分别为(m,13,13,255),(m,26,26,255)。
    y_true = args[num_layers:]
    yolo_outputs = args[:num_layers]

    # 先验框
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]

    # 得到input_shpae为608,608
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))

    loss = 0

    # 取出每一张图片
    # m的值就是batch_size
    m = K.shape(yolo_outputs[0])[0]
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    # y_true是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85)。
    # yolo_outputs是一个列表,包含两个特征层,shape分别为(m,13,13,255),(m,26,26,255)。
    for l in range(num_layers):
        # 以第一个特征层(m,13,13,3,85)为例子
        # 取出该特征层中存在目标的点的位置。(m,13,13,3,1)
        object_mask = y_true[l][..., 4:5]
        # 取出其对应的种类(m,13,13,3,80)
        true_class_probs = y_true[l][..., 5:]
        if label_smoothing:
            true_class_probs = _smooth_labels(true_class_probs,
                                              label_smoothing)

        # 将yolo_outputs的特征层输出进行处理
        # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85)
        # 还有解码后的xy,wh,(m,13,13,3,2)
        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)

        # 这个是解码后的预测的box的位置
        # (m,13,13,3,4)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # 找到负样本群组,第一步是创建一个数组,[]
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        # 对每一张图片计算ignore_mask
        def loop_body(b, ignore_mask):
            # 取出第b副图内,真实存在的所有的box的参数
            # n,4
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            # 计算预测结果与真实情况的iou
            # pred_box为13,13,3,4
            # 计算的结果是每个pred_box和其它所有真实框的iou
            # 13,13,3,n
            iou = box_iou(pred_box[b], true_box)

            # 13,13,3
            best_iou = K.max(iou, axis=-1)

            # 如果某些预测框和真实框的重合程度大于0.5,则忽略。
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        # 遍历所有的图片
        _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                       [0, ignore_mask])

        # 将每幅图的内容压缩,进行处理
        ignore_mask = ignore_mask.stack()
        #(m,13,13,3,1)
        ignore_mask = K.expand_dims(ignore_mask, -1)

        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Calculate ciou loss as location loss
        raw_true_box = y_true[l][..., 0:4]
        ciou = box_ciou(pred_box, raw_true_box)
        ciou_loss = object_mask * box_loss_scale * (1 - ciou)
        ciou_loss = K.sum(ciou_loss) / mf
        location_loss = ciou_loss

        # 如果该位置本来有框,那么计算1与置信度的交叉熵
        # 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本
        # best_iou<ignore_thresh用于限制负样本数量
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask

        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += location_loss + confidence_loss + class_loss
        # if print_loss:
        # loss = tf.Print(loss, [loss, confidence_loss, class_loss, location_loss], message='loss: ')
    loss = K.expand_dims(loss, axis=-1)
    return loss
示例#14
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=0.5, print_loss=False):
    """Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body_full or yolo_body_tiny
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    """
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] \
        if num_layers == 3 else [[3, 4, 5], [0, 1, 2]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32,
                         K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[layer_idx])[1:3], K.dtype(y_true[0]))
                   for layer_idx in range(num_layers)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for layer_idx in range(num_layers):
        object_mask = y_true[layer_idx][..., 4:5]
        true_class_probs = y_true[layer_idx][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[layer_idx],
                                                     anchors[anchor_mask[layer_idx]],
                                                     num_classes, input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[layer_idx][..., :2] * grid_shapes[layer_idx][::-1] - grid
        raw_true_wh = K.log(y_true[layer_idx][..., 2:4] / anchors[anchor_mask[layer_idx]] * input_shape[::-1])
        # Keras switch allows scalr condition, bit here is expected to have elemnt-wise
        #  also the `object_mask` has in last dimension 1 but the in/out puts has 2 (some replication)
        # raw_true_wh = tf.where(tf.greater(K.concatenate([object_mask] * 2), 0),
        #                        raw_true_wh, K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[layer_idx][..., 2:3] * y_true[layer_idx][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def _loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[layer_idx][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou_xywh(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh,
                                                      K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = while_loop(
            lambda b, *args: b < m, _loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        ce = K.binary_crossentropy(raw_true_xy, raw_pred[..., 0:2],
                                   from_logits=True)
        xy_loss = object_mask * box_loss_scale * ce
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred[..., 2:4])
        ce_loss = K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
                                        from_logits=True)
        confidence_loss = object_mask * ce_loss + (1 - object_mask) * ce_loss * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(true_class_probs,
                                                         raw_pred[..., 5:],
                                                         from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss,
                                   class_loss, K.sum(ignore_mask)],
                            message='loss: ')
    # see: https://github.com/qqwweee/keras-yolo3/issues/129#issuecomment-408855511
    return K.expand_dims(loss, axis=0)
示例#15
0
def yolo5_loss(args,
               anchors,
               num_classes,
               ignore_thresh=.5,
               label_smoothing=0,
               elim_grid_sense=True,
               use_focal_loss=False,
               use_focal_obj_loss=False,
               use_softmax_loss=False,
               use_giou_loss=True,
               use_diou_loss=False):
    '''
    YOLOv5 loss function.

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]

    # gains for box, class and confidence loss
    # from https://github.com/ultralytics/yolov5/blob/master/data/hyp.scratch.yaml
    box_loss_gain = 0.05
    class_loss_gain = 0.5
    confidence_loss_gain = 1.0

    # balance weights for confidence (objectness) loss
    # on different predict heads (x/8, x/16, x/32)
    # from https://github.com/ultralytics/yolov5/blob/master/utils/loss.py#L109
    confidence_balance_weights = [4.0, 1.0, 0.4]

    if num_layers == 3:
        anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
        # YOLOv5 enable "elim_grid_sense" by default
        scale_x_y = [2.0, 2.0,
                     2.0]  #if elim_grid_sense else [None, None, None]
    else:
        anchor_mask = [[3, 4, 5], [0, 1, 2]]
        scale_x_y = [1.05, 1.05]  #if elim_grid_sense else [None, None]

    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[i])[1:3], K.dtype(y_true[0]))
        for i in range(num_layers)
    ]
    loss = 0
    total_location_loss = 0
    total_confidence_loss = 0
    total_class_loss = 0
    batch_size = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0]))

    for i in range(num_layers):
        object_mask = y_true[i][..., 4:5]
        true_class_probs = y_true[i][..., 5:]
        if label_smoothing:
            true_class_probs = _smooth_labels(true_class_probs,
                                              label_smoothing)
            #true_objectness_probs = _smooth_labels(object_mask, label_smoothing)
        #else:
        #true_objectness_probs = object_mask

        grid, raw_pred, pred_xy, pred_wh = yolo5_decode(
            yolo_outputs[i],
            anchors[anchor_mask[i]],
            num_classes,
            input_shape,
            scale_x_y=scale_x_y[i],
            calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[i][..., :2] * grid_shapes[i][::-1] - grid
        raw_true_wh = K.log(y_true[i][..., 2:4] / anchors[anchor_mask[i]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        #box_loss_scale = 2 - y_true[i][...,2:3]*y_true[i][...,3:4]

        # Find ignore mask, iterate over each of batch.
        #ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        #object_mask_bool = K.cast(object_mask, 'bool')
        #def loop_body(b, ignore_mask):
        #true_box = tf.boolean_mask(y_true[i][b,...,0:4], object_mask_bool[b,...,0])
        #iou = box_iou(pred_box[b], true_box)
        #best_iou = K.max(iou, axis=-1)
        #ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
        #return b+1, ignore_mask
        #_, ignore_mask = tf.while_loop(lambda b,*args: b<batch_size, loop_body, [0, ignore_mask])
        #ignore_mask = ignore_mask.stack()
        #ignore_mask = K.expand_dims(ignore_mask, -1)

        if use_giou_loss:
            # Calculate GIoU loss as location loss
            raw_true_box = y_true[i][..., 0:4]
            giou = box_giou(raw_true_box, pred_box)
            giou_loss = object_mask * (1 - giou)
            location_loss = giou_loss
            iou = giou
        elif use_diou_loss:
            # Calculate DIoU loss as location loss
            raw_true_box = y_true[i][..., 0:4]
            diou = box_diou(raw_true_box, pred_box)
            diou_loss = object_mask * (1 - diou)
            location_loss = diou_loss
            iou = diou
        else:
            raise ValueError('Unsupported IOU loss type')
            # Standard YOLOv3 location loss
            # K.binary_crossentropy is helpful to avoid exp overflow.
            #xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
            #wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])
            #xy_loss = K.sum(xy_loss) / batch_size_f
            #wh_loss = K.sum(wh_loss) / batch_size_f
            #location_loss = xy_loss + wh_loss

        # use box iou for positive sample as objectness ground truth,
        # to calculate confidence loss
        # from https://github.com/ultralytics/yolov5/blob/master/utils/loss.py#L127
        true_objectness_probs = object_mask * iou

        if use_focal_obj_loss:
            # Focal loss for objectness confidence
            confidence_loss = sigmoid_focal_loss(
                true_objectness_probs,
                raw_pred[..., 4:5]) * confidence_balance_weights[i]
        else:
            confidence_loss = K.binary_crossentropy(
                true_objectness_probs, raw_pred[..., 4:5],
                from_logits=True) * confidence_balance_weights[i]
            #confidence_loss = object_mask * K.binary_crossentropy(true_objectness_probs, raw_pred[...,4:5], from_logits=True)+ \
            #(1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask

        if use_focal_loss:
            # Focal loss for classification score
            if use_softmax_loss:
                class_loss = softmax_focal_loss(true_class_probs, raw_pred[...,
                                                                           5:])
            else:
                class_loss = sigmoid_focal_loss(true_class_probs, raw_pred[...,
                                                                           5:])
        else:
            if use_softmax_loss:
                # use softmax style classification output
                class_loss = object_mask * K.expand_dims(
                    K.categorical_crossentropy(
                        true_class_probs, raw_pred[..., 5:], from_logits=True),
                    axis=-1)
            else:
                # use sigmoid style classification output
                class_loss = object_mask * K.binary_crossentropy(
                    true_class_probs, raw_pred[..., 5:], from_logits=True)

        confidence_loss = confidence_loss_gain * K.sum(
            confidence_loss) / batch_size_f
        class_loss = class_loss_gain * K.sum(class_loss) / batch_size_f
        location_loss = box_loss_gain * K.sum(location_loss) / batch_size_f

        loss += location_loss + confidence_loss + class_loss
        total_location_loss += location_loss
        total_confidence_loss += confidence_loss
        total_class_loss += class_loss

    # Fit for tf 2.0.0 loss shape
    loss = K.expand_dims(loss, axis=-1)

    return loss, total_location_loss, total_confidence_loss, total_class_loss
示例#16
0
文件: loss.py 项目: yihjian/RMYolo
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):

    # 一共有三层
    num_layers = len(anchors) // 3

    # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
    # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    y_true = args[num_layers:]
    yolo_outputs = args[:num_layers]

    # 先验框
    # 678为116,90,  156,198,  373,326
    # 345为30,61,  62,45,  59,119
    # 012为10,13,  16,30,  33,23,
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]

    # 得到input_shpae为416,416
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))

    # 得到网格的shape为13,13;26,26;52,52
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0

    # 取出每一张图片
    # m的值就是batch_size
    m = K.shape(yolo_outputs[0])[0]
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    for l in range(num_layers):
        # 以第一个特征层(m,13,13,3,85)为例子
        # 取出该特征层中存在目标的点的位置。(m,13,13,3,1)
        object_mask = y_true[l][..., 4:5]
        # 取出其对应的种类(m,13,13,3,80)
        true_class_probs = y_true[l][..., 5:]

        # 将yolo_outputs的特征层输出进行处理
        # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85)
        # 还有解码后的xy,wh,(m,13,13,3,2)
        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)

        # 这个是解码后的预测的box的位置
        # (m,13,13,3,4)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # 找到负样本群组,第一步是创建一个数组,[]
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        # 对每一张图片计算ignore_mask
        def loop_body(b, ignore_mask):
            # 取出第b副图内,真实存在的所有的box的参数
            # n,4
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            # 计算预测结果与真实情况的iou
            # pred_box为13,13,3,4
            # 计算的结果是每个pred_box和其它所有真实框的iou
            # 13,13,3,n
            iou = box_iou(pred_box[b], true_box)

            # 13,13,3,1
            best_iou = K.max(iou, axis=-1)

            # 判断预测框的iou小于ignore_thresh则认为该预测框没有与之对应的真实框
            # 则被认为是这幅图的负样本
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        # 遍历所有的图片
        _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                       [0, ignore_mask])

        # 将每幅图的内容压缩,进行处理
        ignore_mask = ignore_mask.stack()
        #(m,13,13,3,1,1)
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # 将真实框进行编码,使其格式与预测的相同,后面用于计算loss
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][:] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])

        # object_mask如果真实存在目标则保存其wh值
        # switch接口,就是一个if/else条件判断语句
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])

        # 如果该位置本来有框,那么计算1与置信度的交叉熵
        # 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本
        # best_iou<ignore_thresh用于限制负样本数量
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask

        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    loss = K.expand_dims(loss, axis=-1)
    return loss
    def call(self, inputs, training=None):
        if self.quant_mode not in [None, 'extrinsic', 'hybrid', 'intrinsic']:
            raise ValueError(
                'Invalid quantization mode. The \'quant_mode\' argument must be one of \'extrinsic\' , \'intrinsic\' , \'hybrid\' or None.'
            )

        if isinstance(self.quantizer, list) and len(self.quantizer) == 3:
            quantizer_input = self.quantizer[0]
            quantizer_weight = self.quantizer[1]
            quantizer_output = self.quantizer[2]
        else:
            quantizer_input = self.quantizer
            quantizer_weight = self.quantizer
            quantizer_output = self.quantizer

        input_shape = K.int_shape(inputs)
        # Prepare broadcasting shape.
        ndim = len(input_shape)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        # Determines whether broadcasting is needed.
        needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1])

        def normalize_inference():
            if needs_broadcasting:
                # In this case we must explicitly broadcast all parameters.
                broadcast_moving_mean = K.reshape(self.moving_mean,
                                                  broadcast_shape)
                broadcast_moving_variance = K.reshape(self.moving_variance,
                                                      broadcast_shape)
                if self.center:
                    broadcast_beta = K.reshape(self.beta, broadcast_shape)
                else:
                    broadcast_beta = None
                if self.scale:
                    broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                else:
                    broadcast_gamma = None

                if self.quant_mode in ['hybrid', 'intrinsic']:
                    broadcast_moving_mean = quantizer_weight.quantize(
                        broadcast_moving_mean)
                    broadcast_moving_variance = quantizer_weight.quantize(
                        broadcast_moving_variance)
                    if self.center:
                        broadcast_beta = quantizer_weight.quantize(
                            broadcast_beta)
                    if self.scale:
                        broadcast_gamma = quantizer_weight.quantize(
                            broadcast_gamma)

                if self.quant_mode in ['hybrid', 'intrinsic']:
                    quantized_inputs = quantizer_input.quantize(inputs)

                if self.quant_mode == 'intrinsic':
                    return QuantizedBatchNormalizationCore(
                        quantized_inputs, broadcast_moving_mean,
                        broadcast_moving_variance, broadcast_beta,
                        broadcast_gamma, self.epsilon, quantizer_output)
                elif self.quant_mode == 'hybrid':
                    output = K.batch_normalization(quantized_inputs,
                                                   broadcast_moving_mean,
                                                   broadcast_moving_variance,
                                                   broadcast_beta,
                                                   broadcast_gamma,
                                                   axis=self.axis,
                                                   epsilon=self.epsilon)
                    return quantizer_output.quantize(output)
                elif self.quant_mode == 'extrinsic':
                    output = K.batch_normalization(inputs,
                                                   broadcast_moving_mean,
                                                   broadcast_moving_variance,
                                                   broadcast_beta,
                                                   broadcast_gamma,
                                                   axis=self.axis,
                                                   epsilon=self.epsilon)
                    return quantizer_output.quantize(output)
                elif self.quant_mode is None:
                    return K.batch_normalization(inputs,
                                                 broadcast_moving_mean,
                                                 broadcast_moving_variance,
                                                 broadcast_beta,
                                                 broadcast_gamma,
                                                 axis=self.axis,
                                                 epsilon=self.epsilon)

            else:
                if self.quant_mode in ['hybrid', 'intrinsic']:
                    moving_mean = quantizer_weight.quantize(self.moving_mean)
                    moving_variance = quantizer_weight.quantize(
                        self.moving_variance)
                    if self.center:
                        beta = quantizer_weight.quantize(self.beta)
                    else:
                        beta = self.beta
                    if self.scale:
                        gamma = quantizer_weight.quantize(self.gamma)
                    else:
                        gamma = self.gamma

                if self.quant_mode in ['hybrid', 'intrinsic']:
                    quantized_inputs = quantizer_input.quantize(inputs)

                if self.quant_mode == 'intrinsic':
                    return QuantizedBatchNormalizationCore(
                        quantized_inputs, moving_mean, moving_variance, beta,
                        gamma, self.epsilon, quantizer_output)
                elif self.quant_mode == 'hybrid':
                    output = K.batch_normalization(quantized_inputs,
                                                   moving_mean,
                                                   moving_variance,
                                                   beta,
                                                   gamma,
                                                   axis=self.axis,
                                                   epsilon=self.epsilon)
                    return quantizer_output.quantize(output)
                elif self.quant_mode == 'extrinsic':
                    output = K.batch_normalization(inputs,
                                                   self.moving_mean,
                                                   self.moving_variance,
                                                   self.beta,
                                                   self.gamma,
                                                   axis=self.axis,
                                                   epsilon=self.epsilon)
                    return quantizer_output.quantize(output)
                elif self.quant_mode == None:
                    return K.batch_normalization(inputs,
                                                 self.moving_mean,
                                                 self.moving_variance,
                                                 self.beta,
                                                 self.gamma,
                                                 axis=self.axis,
                                                 epsilon=self.epsilon)

        # If the learning phase is *static* and set to inference:
        if not training:
            return normalize_inference()

        # If the learning is either dynamic, or set to training:
        normed_training, mean, variance = K.normalize_batch_in_training(
            inputs,
            self.gamma,
            self.beta,
            reduction_axes,
            epsilon=self.epsilon)

        if K.backend() != 'cntk':
            sample_size = K.prod(
                [K.shape(inputs)[axis] for axis in reduction_axes])
            sample_size = K.cast(sample_size, dtype=K.dtype(inputs))

            # sample variance - unbiased estimator of population variance
            variance *= sample_size / (sample_size - (1.0 + self.epsilon))

        self.add_update([
            K.moving_average_update(self.moving_mean, mean, self.momentum),
            K.moving_average_update(self.moving_variance, variance,
                                    self.momentum)
        ], inputs)

        # Pick the normalized form corresponding to the training phase.
        return K.in_train_phase(normed_training,
                                normalize_inference,
                                training=training)
示例#18
0
def yolo_loss(y_true, y_pred):
    label_class = y_true[..., :1]  # ? * 7 * 7 * 1
    # 分类
    label_box = y_true[..., 1:5]  # ? * 7 * 7 * 4
    # BB1的坐标
    response_mask = y_true[..., 5]  # ? * 7 * 7
    # BB1的置信度
    response_mask = K.expand_dims(response_mask)  # ? * 7 * 7 * 1

    predict_class = y_pred[..., :1]  # ? * 7 * 7 * 1
    # 分类
    predict_trust = y_pred[..., 1:3]  # ? * 7 * 7 * 2
    # BB1和BB2的置信度
    predict_box = y_pred[..., 3:]  # ? * 7 * 7 * 8
    # BB1和BB2的坐标

    _label_box = K.reshape(label_box, [-1, 7, 7, 1, 4])
    _predict_box = K.reshape(predict_box, [-1, 7, 7, 2, 4])

    label_xy, label_wh = yolo_head(
        _label_box, img_size=224)  # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2
    label_xy = K.expand_dims(label_xy, 3)  # ? * 7 * 7 * 1 * 1 * 2
    label_wh = K.expand_dims(label_wh, 3)  # ? * 7 * 7 * 1 * 1 * 2
    label_xy_min, label_xy_max = X_Y_W_H_To_Min_Max(
        label_xy, label_wh)  # ? * 7 * 7 * 1 * 1 * 2, ? * 7 * 7 * 1 * 1 * 2

    predict_xy, predict_wh = yolo_head(
        _predict_box, img_size=224)  # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2
    predict_xy = K.expand_dims(predict_xy, 4)  # ? * 7 * 7 * 2 * 1 * 2
    predict_wh = K.expand_dims(predict_wh, 4)  # ? * 7 * 7 * 2 * 1 * 2
    predict_xy_min, predict_xy_max = X_Y_W_H_To_Min_Max(
        predict_xy, predict_wh)  # ? * 7 * 7 * 2 * 1 * 2, ? * 7 * 7 * 2 * 1 * 2

    iou_scores = iou(predict_xy_min, predict_xy_max, label_xy_min,
                     label_xy_max)  # ? * 7 * 7 * 2 * 1
    best_ious = K.max(iou_scores, axis=4)  # ? * 7 * 7 * 2
    best_box = K.max(best_ious, axis=3, keepdims=True)  # ? * 7 * 7 * 1

    box_mask = K.cast(best_ious >= best_box,
                      K.dtype(best_ious))  # ? * 7 * 7 * 2

    no_object_loss = 0.5 * (
        1 - box_mask * response_mask) * K.square(0 - predict_trust)
    object_loss = box_mask * response_mask * K.square(1 - predict_trust)
    confidence_loss = no_object_loss + object_loss
    confidence_loss = K.sum(confidence_loss)

    class_loss = response_mask * K.square(label_class - predict_class)
    class_loss = K.sum(class_loss)

    _label_box = K.reshape(label_box, [-1, 7, 7, 1, 4])
    _predict_box = K.reshape(predict_box, [-1, 7, 7, 2, 4])

    label_xy, label_wh = yolo_head(
        _label_box, img_size=224)  # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2
    predict_xy, predict_wh = yolo_head(
        _predict_box, img_size=224)  # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2

    box_mask = K.expand_dims(box_mask)
    response_mask = K.expand_dims(response_mask)

    box_loss = 5 * box_mask * response_mask * K.square(
        (label_xy - predict_xy) / 224)
    box_loss += 5 * box_mask * response_mask * K.square(
        (K.sqrt(label_wh) - K.sqrt(predict_wh)) / 224)
    box_loss = K.sum(box_loss)

    loss = confidence_loss + class_loss + box_loss

    return loss
示例#19
0
def yolo2_loss(args,
               anchors,
               num_classes,
               label_smoothing=0,
               use_crossentropy_loss=False,
               use_crossentropy_obj_loss=False,
               rescore_confidence=False,
               use_giou_loss=False,
               use_diou_loss=False):
    """YOLOv2 loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    y_true : array
        output of preprocess_true_boxes, with shape [conv_height, conv_width, num_anchors, 6]

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.


    Returns
    -------
    total_loss : float
        total mean YOLOv2 loss across minibatch
    """
    (yolo_output, y_true) = args
    num_anchors = len(anchors)
    yolo_output_shape = K.shape(yolo_output)
    input_shape = K.cast(yolo_output_shape[1:3] * 32, K.dtype(y_true))
    grid_shape = K.cast(yolo_output_shape[1:3],
                        K.dtype(y_true))  # height, width
    batch_size_f = K.cast(yolo_output_shape[0],
                          K.dtype(yolo_output))  # batch size, float tensor
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    location_scale = 1

    grid, raw_pred, pred_xy, pred_wh = yolo2_head(yolo_output,
                                                  anchors,
                                                  num_classes,
                                                  input_shape,
                                                  calc_loss=True)
    pred_confidence = K.sigmoid(raw_pred[..., 4:5])
    pred_class_prob = K.softmax(raw_pred[..., 5:])

    object_mask = y_true[..., 4:5]

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_boxes = K.concatenate([pred_xy, pred_wh])
    pred_boxes = K.expand_dims(pred_boxes, 4)

    raw_true_boxes = y_true[..., 0:4]
    raw_true_boxes = K.expand_dims(raw_true_boxes, 4)

    iou_scores = box_iou(pred_boxes, raw_true_boxes)
    iou_scores = K.squeeze(iou_scores, axis=0)

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLOv2 does not use binary cross-entropy. Here we try it.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - object_mask))
    if use_crossentropy_obj_loss:
        no_objects_loss = no_object_weights * K.binary_crossentropy(
            K.zeros(K.shape(pred_confidence)),
            pred_confidence,
            from_logits=False)

        if rescore_confidence:
            objects_loss = (object_scale * object_mask * K.binary_crossentropy(
                best_ious, pred_confidence, from_logits=False))
        else:
            objects_loss = (
                object_scale * object_mask *
                K.binary_crossentropy(K.ones(K.shape(pred_confidence)),
                                      pred_confidence,
                                      from_logits=False))
    else:
        no_objects_loss = no_object_weights * K.square(-pred_confidence)

        if rescore_confidence:
            objects_loss = (object_scale * object_mask *
                            K.square(best_ious - pred_confidence))
        else:
            objects_loss = (object_scale * object_mask *
                            K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLOv2 does not use categorical cross-entropy loss.
    #       Here we try it.
    matching_classes = K.cast(y_true[..., 5], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)

    if label_smoothing:
        matching_classes = _smooth_labels(matching_classes, label_smoothing)

    if use_crossentropy_loss:
        classification_loss = (
            class_scale * object_mask *
            K.expand_dims(K.categorical_crossentropy(
                matching_classes, pred_class_prob, from_logits=False),
                          axis=-1))
    else:
        classification_loss = (class_scale * object_mask *
                               K.square(matching_classes - pred_class_prob))

    if use_giou_loss:
        # Calculate GIoU loss as location loss
        giou = box_giou(pred_boxes, raw_true_boxes)
        giou = K.squeeze(giou, axis=-1)
        giou_loss = location_scale * object_mask * (1 - giou)
        location_loss = giou_loss
    elif use_diou_loss:
        # Calculate DIoU loss as location loss
        diou = box_diou(pred_boxes, raw_true_boxes)
        diou = K.squeeze(diou, axis=-1)
        diou_loss = location_scale * object_mask * (1 - diou)
        location_loss = diou_loss
    else:
        # YOLOv2 location loss for matching detection boxes.
        # Darknet trans box to calculate loss.
        trans_true_xy = y_true[..., :2] * grid_shape[..., ::-1] - grid
        trans_true_wh = K.log(y_true[..., 2:4] / anchors *
                              input_shape[..., ::-1])
        trans_true_wh = K.switch(
            object_mask, trans_true_wh,
            K.zeros_like(trans_true_wh))  # avoid log(0)=-inf
        trans_true_boxes = K.concatenate([trans_true_xy, trans_true_wh])

        # Unadjusted box predictions for loss.
        trans_pred_boxes = K.concatenate(
            (K.sigmoid(raw_pred[..., 0:2]), raw_pred[..., 2:4]), axis=-1)

        location_loss = (location_scale * object_mask *
                         K.square(trans_true_boxes - trans_pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss) / batch_size_f
    classification_loss_sum = K.sum(classification_loss) / batch_size_f
    location_loss_sum = K.sum(location_loss) / batch_size_f
    total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum +
                        location_loss_sum)

    # Fit for tf 2.0.0 loss shape
    total_loss = K.expand_dims(total_loss, axis=-1)

    return total_loss, location_loss_sum, confidence_loss_sum, classification_loss_sum
示例#20
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)

        # first update the number of iterations
        self.updates = [K.update_add(self.iterations, 1)]
        
        # Cycling Gaussian LR
        # I implement this lr_f = lambda x,b,c,s: b+ s*np.exp(-(x-c)**2/(c*0.5)**2)
        def gauss_lr(min_lr, max_lr, center, lrsigma,i):
            
            return (min_lr+ max_lr*K.exp(-(i-center)**2/(center*lrsigma)**2))
        
        ite_casted = K.cast(self.iterations, K.dtype(self.peaklriter))
        all_lr = gauss_lr(self.min_lr['all'], self.peak_lr['all'],
                              self.peaklriter,self.lrsigma,ite_casted)
        #current_lr = self.min_lr['all'] + 
        #self.peak_lr['all']*K.exp(((ite_casted-self.peaklriter)**2)/(self.dropsigma*self.peaklriter)**2)
        ############################################################################
        self.updates.append(K.update(self.lr['all'],all_lr))

        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(s) for s in shapes]
        self.weights = [self.iterations] + moments
        #print(self.weights)
        

        for p, g, m in zip(params, grads, moments):
            #print("HEREEEE:", p.name, g, m)
            lrptrkey= set_pattern_find(p.name,self.lr.keys())
            if lrptrkey:
                if self.verbose>0:
                    print("Setting different learning rate for ", p.name, " : ", K.eval(self.lr[lrptrkey]))
                if set_pattern_find(p.name,self.min_lr.keys()) and set_pattern_find(p.name,self.peak_lr.keys()):
                    p_lr = gauss_lr(self.min_lr[lrptrkey], self.peak_lr[lrptrkey],
                                          self.peaklriter,self.lrsigma,ite_casted)
                else:
                    p_lr = gauss_lr(self.min_lr['all'], self.peak_lr['all'],
                                          self.peaklriter,self.lrsigma,ite_casted)
            else:
                p_lr = self.lr['all']
                
            momptrkey = set_pattern_find(p.name,self.momentum.keys())
            if momptrkey:
                if self.verbose>0:
                    print("Setting different momentum for ", p.name, " , ", 
                          K.eval(self.momentum[momptrkey]))
                momentum = self.momentum[momptrkey]
            else:
                momentum = self.momentum['all'] 

            
            

            if self.nesterov:
                updt = momentum * (momentum * m - p_lr * g) - p_lr * g
            else:
                updt = momentum * m - p_lr * g
            
            # CHANGE CLIP
            _to_tensor = K.tensorflow_backend._to_tensor
            _clip_by_val = K.tf.clip_by_value
            margin = K.mean(K.abs(p))*K.constant(self.UPCLIP)
            #margin = K.mean(K.abs(p*K.constant(self.UPCLIP)))
            #min_value = _to_tensor(-margin, p.dtype.base_dtype)
            #max_value = _to_tensor(margin, p.dtype.base_dtype)
            
            #max_v = K.maximum(min_value, max_value)
            min_v = K.zeros_like(margin)
            updt_sign = K.sign(updt)
            updt_val = _clip_by_val(K.abs(updt), min_v, margin)
            
            v = updt_sign * updt_val  # velocity
            new_p = p + v
            
            
            self.updates.append(K.update(m, v))
            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
            clptrkey = set_pattern_find(p.name,self.clips.keys())
            if self.clips_val and clptrkey:
                c = K.eval(self.clips[clptrkey])
                if self.verbose>0:
                    print("Clipping variable",p.name," to ", c)
                    #input()
                new_p = K.clip(new_p, c[0], c[1])
            #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum))
            self.updates.append(K.update(p, new_p))
        return self.updates
    def call(self, x):
        """Postprocess part for YOLOv3 model except NMS."""
        assert isinstance(x, list)

        #num_layers = len(anchors)//3 # default setting
        yolo_outputs, image_shape = x

        #anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting
        #input_shape = K.shape(yolo_outputs[0])[1:3] * 32

        batch_size = K.shape(image_shape)[0] # batch size, tensor

        boxes = []
        box_scores = []
        for l in range(self.num_layers):
            # get anchor set for each feature layer
            if self.num_layers == 3: #YOLOv3 arch
                if l == 0:
                    anchorset = self.anchors[6:]
                    grid_shape = [self.input_dim[0]//32, self.input_dim[1]//32]
                elif l == 1:
                    anchorset = self.anchors[3:6]
                    grid_shape = [self.input_dim[0]//16, self.input_dim[1]//16]
                elif l == 2:
                    anchorset = self.anchors[:3]
                    grid_shape = [self.input_dim[0]//8, self.input_dim[1]//8]
            elif self.num_layers == 2: # Tiny YOLOv3 arch
                if l == 0:
                    anchorset = self.anchors[3:]
                    grid_shape = [self.input_dim[0]//32, self.input_dim[1]//32]
                elif l == 1:
                    anchorset = self.anchors[:3]
                    grid_shape = [self.input_dim[0]//16, self.input_dim[1]//16]
            else:
                raise ValueError('Invalid layer number')

            feats = yolo_outputs[l]
            # Convert final layer features to bounding box parameters
            num_anchors = len(anchorset)
            # Reshape to batch, height, width, num_anchors, box_params.
            anchors_tensor = K.reshape(K.constant(anchorset), [1, 1, 1, num_anchors, 2])

            #grid_shape = K.shape(feats)[1:3] # height, width
            # get total anchor number for each feature layer
            total_anchor_num = grid_shape[0] * grid_shape[1] * num_anchors
            grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                [1, grid_shape[1], 1, 1])
            grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                [grid_shape[0], 1, 1, 1])
            grid = K.concatenate([grid_x, grid_y])
            grid = K.cast(grid, K.dtype(feats))

            reshape_feats = K.reshape(
                feats, [-1, grid_shape[0], grid_shape[1], num_anchors, self.num_classes + 5])

            # Adjust preditions to each spatial grid point and anchor size.
            box_xy = (K.sigmoid(reshape_feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(reshape_feats))
            box_wh = K.exp(reshape_feats[..., 2:4]) * anchors_tensor / K.cast(self.input_dim[::-1], K.dtype(reshape_feats))
            box_confidence = K.sigmoid(reshape_feats[..., 4:5])
            box_class_probs = K.sigmoid(reshape_feats[..., 5:])

            # correct boxes to the original image shape
            input_shape = K.cast(self.input_dim, K.dtype(box_xy))
            image_shape = K.cast(image_shape, K.dtype(box_xy))
            #new_shape = K.round(image_shape * K.min(input_shape/image_shape))
            new_shape = K.cast(image_shape * K.min(input_shape/image_shape), dtype='int32')
            new_shape = K.cast(new_shape, dtype='float32')
            offset = (input_shape-new_shape)/2./input_shape
            scale = input_shape/new_shape
            box_xy = (box_xy - offset) * scale
            box_wh *= scale

            box_mins = box_xy - (box_wh / 2.)
            box_maxes = box_xy + (box_wh / 2.)
            _boxes =  K.concatenate([
                box_mins[..., 0:1],  # x_min
                box_mins[..., 1:2],  # y_min
                box_maxes[..., 0:1],  # x_max
                box_maxes[..., 1:2]  # y_max
            ])

            # Scale boxes back to original image shape.
            _boxes *= K.concatenate([image_shape, image_shape])

            # Reshape boxes to flatten the boxes
            _boxes = K.reshape(_boxes, [-1, total_anchor_num, 4])
            _box_scores = box_confidence * box_class_probs
            _box_scores = K.reshape(_box_scores, [-1, total_anchor_num, self.num_classes])

            boxes.append(_boxes)
            box_scores.append(_box_scores)

        # Merge boxes for all feature layers, for further NMS option
        boxes = K.concatenate(boxes, axis=1)
        box_scores = K.concatenate(box_scores, axis=1)

        return boxes, box_scores
示例#22
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.
    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.
    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    # print(feats.dtype)
    conv_index = K.cast(conv_index,
                        feats.dtype)  # 原本是K.dtype(feats),但是不知道为什么报错

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_confidence, box_xy, box_wh, box_class_probs
示例#23
0
 def call(self, inputs):
     if K.dtype(inputs) != 'int32':
         inputs = K.cast(inputs, 'int32')
     embeddings = K.gather(self.embeddings, inputs)
     embeddings *= self._model_dim**0.5  # Scale
     return embeddings
示例#24
0
def yolo3_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0, elim_grid_sense=False, use_focal_loss=False, use_focal_obj_loss=False, use_softmax_loss=False, use_giou_loss=False, use_diou_loss=True):
    '''
    YOLOv3 loss function.

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]

    if num_layers == 3:
        anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
        scale_x_y = [1.05, 1.1, 1.2] if elim_grid_sense else [None, None, None]
    else:
        anchor_mask = [[3,4,5], [0,1,2]]
        scale_x_y = [1.05, 1.05] if elim_grid_sense else [None, None]

    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[i])[1:3], K.dtype(y_true[0])) for i in range(num_layers)]
    loss = 0
    total_location_loss = 0
    total_confidence_loss = 0
    total_class_loss = 0
    batch_size = K.shape(yolo_outputs[0])[0] # batch size, tensor
    batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0]))

    for i in range(num_layers):
        object_mask = y_true[i][..., 4:5]
        true_class_probs = y_true[i][..., 5:]
        if label_smoothing:
            true_class_probs = _smooth_labels(true_class_probs, label_smoothing)
            true_objectness_probs = _smooth_labels(object_mask, label_smoothing)
        else:
            true_objectness_probs = object_mask

        grid, raw_pred, pred_xy, pred_wh = yolo3_decode(yolo_outputs[i],
             anchors[anchor_mask[i]], num_classes, input_shape, scale_x_y=scale_x_y[i], calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[i][..., :2]*grid_shapes[i][::-1] - grid
        raw_true_wh = K.log(y_true[i][..., 2:4] / anchors[anchor_mask[i]] * input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[i][...,2:3]*y_true[i][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[i][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = tf.while_loop(lambda b,*args: b<batch_size, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        if use_focal_obj_loss:
            # Focal loss for objectness confidence
            confidence_loss = sigmoid_focal_loss(true_objectness_probs, raw_pred[...,4:5])
        else:
            confidence_loss = object_mask * K.binary_crossentropy(true_objectness_probs, raw_pred[...,4:5], from_logits=True)+ \
                (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask

        if use_focal_loss:
            # Focal loss for classification score
            if use_softmax_loss:
                class_loss = softmax_focal_loss(true_class_probs, raw_pred[...,5:])
            else:
                class_loss = sigmoid_focal_loss(true_class_probs, raw_pred[...,5:])
        else:
            if use_softmax_loss:
                # use softmax style classification output
                class_loss = object_mask * K.expand_dims(K.categorical_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True), axis=-1)
            else:
                # use sigmoid style classification output
                class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)


        if use_giou_loss:
            # Calculate GIoU loss as location loss
            raw_true_box = y_true[i][...,0:4]
            giou = box_giou(raw_true_box, pred_box)
            giou_loss = object_mask * box_loss_scale * (1 - giou)
            giou_loss = K.sum(giou_loss) / batch_size_f
            location_loss = giou_loss
        elif use_diou_loss:
            # Calculate DIoU loss as location loss
            raw_true_box = y_true[i][...,0:4]
            diou = box_diou(raw_true_box, pred_box)
            diou_loss = object_mask * box_loss_scale * (1 - diou)
            diou_loss = K.sum(diou_loss) / batch_size_f
            location_loss = diou_loss
        else:
            # Standard YOLOv3 location loss
            # K.binary_crossentropy is helpful to avoid exp overflow.
            xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
            wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])
            xy_loss = K.sum(xy_loss) / batch_size_f
            wh_loss = K.sum(wh_loss) / batch_size_f
            location_loss = xy_loss + wh_loss

        confidence_loss = K.sum(confidence_loss) / batch_size_f
        class_loss = K.sum(class_loss) / batch_size_f
        loss += location_loss + confidence_loss + class_loss
        total_location_loss += location_loss
        total_confidence_loss += confidence_loss
        total_class_loss += class_loss

    # Fit for tf 2.0.0 loss shape
    loss = K.expand_dims(loss, axis=-1)

    return loss, total_location_loss, total_confidence_loss, total_class_loss
示例#25
0
def Yolov1Loss(y_true, y_pred):
    label_class = y_true[..., :20]  # ? * 7 * 7 * 20
    label_box = y_true[..., 20:24]  # ? * 7 * 7 * 4
    responsible_mask = y_true[..., 24]  # ? * 7 * 7
    responsible_mask = K.expand_dims(responsible_mask)  # ? * 7 * 7 * 1

    predict_class = y_pred[..., :20]  # ? * 7 * 7 * 20
    predict_bbox_confidences = y_pred[..., 20:22]  # ? * 7 * 7 * 2
    predict_box = y_pred[..., 22:]  # ? * 7 * 7 * 8

    _label_box = K.reshape(label_box,
                           [-1, 7, 7, 1, 4])  # ? * 7 * 7 * 1 * 4 (4 -> 1 * 4)
    _predict_box = K.reshape(
        predict_box, [-1, 7, 7, 2, 4])  # ? * 7 * 7 * 2 * 4 (8 -> 2 * 4)

    label_xy, label_wh = yolo_head(
        _label_box)  # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2
    label_xy = K.expand_dims(label_xy, 3)  # ? * 7 * 7 * 1 * 1 * 2
    label_wh = K.expand_dims(label_wh, 3)  # ? * 7 * 7 * 1 * 1 * 2
    label_xy_min, label_xy_max = xywh2minmax(
        label_xy, label_wh)  # ? * 7 * 7 * 1 * 1 * 2, ? * 7 * 7 * 1 * 1 * 2

    predict_xy, predict_wh = yolo_head(
        _predict_box)  # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2
    predict_xy = K.expand_dims(predict_xy, 4)  # ? * 7 * 7 * 2 * 1 * 2
    predict_wh = K.expand_dims(predict_wh, 4)  # ? * 7 * 7 * 2 * 1 * 2
    predict_xy_min, predict_xy_max = xywh2minmax(
        predict_xy, predict_wh)  # ? * 7 * 7 * 2 * 1 * 2, ? * 7 * 7 * 2 * 1 * 2

    iou_scores = iou(predict_xy_min, predict_xy_max, label_xy_min,
                     label_xy_max)  # ? * 7 * 7 * 2 * 1
    best_ious = K.max(iou_scores, axis=4)  # ? * 7 * 7 * 2
    best_box = K.max(best_ious, axis=3, keepdims=True)  # ? * 7 * 7 * 1

    box_mask = K.cast(best_ious >= best_box,
                      K.dtype(best_ious))  # ? * 7 * 7 * 2

    # Loss 함수 4번 (with lambda_noobj 0.5)
    no_object_loss = 0.5 * (1 - box_mask * responsible_mask
                            ) * K.square(0 - predict_bbox_confidences)
    # Loss 함수 3번 (without lambda_noobj)
    object_loss = box_mask * responsible_mask * K.square(
        1 - predict_bbox_confidences)

    # tf.print("\n- no_object_loss:", K.sum(no_object_loss), output_stream=sys.stdout)
    # tf.print("- object_loss:", K.sum(object_loss), output_stream=sys.stdout)

    confidence_loss = no_object_loss + object_loss
    confidence_loss = K.sum(confidence_loss)

    # Loss 함수 5번
    class_loss = responsible_mask * K.square(label_class - predict_class)

    # Loss 함수 5번 총합
    class_loss = K.sum(class_loss)

    _label_box = K.reshape(label_box, [-1, 7, 7, 1, 4])
    _predict_box = K.reshape(predict_box, [-1, 7, 7, 2, 4])

    label_xy, label_wh = yolo_head(
        _label_box)  # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2
    predict_xy, predict_wh = yolo_head(
        _predict_box)  # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2

    box_mask = K.expand_dims(box_mask)
    responsible_mask = K.expand_dims(responsible_mask)

    # Loss 함수 1번
    box_loss = 5 * box_mask * responsible_mask * K.square(
        (label_xy - predict_xy) / 448)

    # tf.print("- xy_loss:", K.sum(5 * box_mask * responsible_mask * K.square((label_xy - predict_xy) / 448)), output_stream=sys.stdout)

    # Loss 함수 2번
    #box_loss += 5 * box_mask * responsible_mask * K.square(K.sqrt(label_wh/ 448) - K.sqrt(predict_wh/ 448))
    wh_loss = 5 * box_mask * responsible_mask * K.square((label_wh / 448) -
                                                         (predict_wh / 448))
    box_loss += wh_loss

    # box_loss = 5 * box_mask * responsible_mask * K.square((label_xy - predict_xy) / 448)
    # box_loss += 5 * box_mask * responsible_mask * K.square((label_wh / 448) - (predict_wh / 448))
    # box_loss = K.sum(box_loss)

    # tf.print("- wh_loss:", K.sum(wh_loss),
    #          output_stream=sys.stdout)

    # 1번+2번 총합
    box_loss = K.sum(box_loss)

    loss = confidence_loss + class_loss + box_loss
    tf.print("\n- confidence_loss:", confidence_loss, output_stream=sys.stdout)
    tf.print("- class_loss:", class_loss, output_stream=sys.stdout)
    tf.print("- box_loss:", box_loss, output_stream=sys.stdout)

    return loss
示例#26
0
def yolo_loss(args,
              anchors,
              ignore_thresh=.5,
              att_loss_weight=0.1,
              print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:1]
    pred_att = args[1:2]
    # mask_prob=args[1]
    # co_enegy=args[2]
    # y_true = args[3:4]
    y_true = args[2:3]
    att_map = args[3::]
    # mask_gt=args[4]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [
        [0, 1, 2]
    ]  ##due to deleting 2 scales  change [[6,7,8], [3,4,5], [0,1,2]] to [[0,1,2]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32,
        K.dtype(y_true[0]))  # x32 is original size
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]  #3 degree scales output
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        # true_class_probs = y_true[l][..., 5:]  #... ==????

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])
        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        def smooth_L1(y_true, y_pred, sigma=3.0):
            """ Create a smooth L1 loss functor.

            Args
                sigma: This argument defines the point where the loss changes from L2 to L1.

            Returns
                A functor for computing the smooth L1 loss given target data and predicted data.
            """
            sigma_squared = sigma**2

            # compute smooth L1 loss
            # f(x) = 0.5 * (sigma * x)^2          if |x| < 1 / sigma / sigma
            #        |x| - 0.5 / sigma / sigma    otherwise
            regression_diff = y_true - y_pred
            regression_diff = K.abs(regression_diff)
            regression_loss = tf.where(
                K.less(regression_diff, 1.0 / sigma_squared),
                0.5 * sigma_squared * K.pow(regression_diff, 2),
                regression_diff - 0.5 / sigma_squared)
            return regression_loss

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * smooth_L1(
            raw_true_wh, raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        # seg_loss=K.binary_crossentropy(mask_gt, mask_prob, from_logits=True)
        att_loss = K.binary_crossentropy(att_map[l],
                                         pred_att[l],
                                         from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        att_loss = K.sum(att_loss) / mf
        # seg_loss = K.sum(seg_loss) / mf
        # co_enegy_loss=cem_loss(co_enegy) / mf

        # remove RES and CEM loss
        seg_loss_weight = 0.
        co_weight = 0.
        # loss += xy_loss+ wh_loss+ confidence_loss+seg_loss*seg_loss_weight+co_enegy_loss*co_weight
        loss += xy_loss + wh_loss + confidence_loss + att_loss_weight * att_loss
        if print_loss:
            # loss = tf.Print(loss, ['\n''co_peak_loss: ',co_enegy_loss,'co_peak_energe: ', K.sum(co_enegy)/mf], message='loss: ')
            loss = tf.Print(loss, [], message='loss: ')
    return K.expand_dims(loss, axis=0)
示例#27
0
 def loop_body(b, ignore_mask):
     true_box = tf.boolean_mask(y_true[i][b,...,0:4], object_mask_bool[b,...,0])
     iou = box_iou(pred_box[b], true_box)
     best_iou = K.max(iou, axis=-1)
     ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
     return b+1, ignore_mask
示例#28
0
    def call(self, inputs, training=None):
        input_shape = K.int_shape(inputs)
        # Prepare broadcasting shape.
        ndim = len(input_shape)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        # Determines whether broadcasting is needed.
        needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1])

        def normalize_inference():
            if needs_broadcasting:
                # In this case we must explicitly broadcast all parameters.
                broadcast_moving_mean = K.reshape(self.moving_mean,
                                                  broadcast_shape)
                broadcast_moving_variance = K.reshape(self.moving_variance,
                                                      broadcast_shape)
                if self.center:
                    broadcast_beta = K.reshape(self.beta, broadcast_shape)
                else:
                    broadcast_beta = None
                if self.scale:
                    broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                else:
                    broadcast_gamma = None
                return tf.nn.batch_normalization(  #K.batch_normalization(
                    inputs,
                    broadcast_moving_mean,
                    broadcast_moving_variance,
                    broadcast_beta,
                    broadcast_gamma,
                    #axis=self.axis,
                    self.epsilon)  #epsilon=self.epsilon)
            else:
                return tf.nn.batch_normalization(  #K.batch_normalization(
                    inputs,
                    self.moving_mean,
                    self.moving_variance,
                    self.beta,
                    self.gamma,
                    #axis=self.axis,
                    self.epsilon)  #epsilon=self.epsilon)

        # If the learning phase is *static* and set to inference:
        if training in {0, False}:
            return normalize_inference()

        # If the learning is either dynamic, or set to training:
        normed_training, mean, variance = _regular_normalize_batch_in_training(  #K.normalize_batch_in_training(
            inputs,
            self.gamma,
            self.beta,
            reduction_axes,
            epsilon=self.epsilon)

        if K.backend() != 'cntk':
            sample_size = K.prod(
                [K.shape(inputs)[axis] for axis in reduction_axes])
            sample_size = K.cast(sample_size, dtype=K.dtype(inputs))

            # sample variance - unbiased estimator of population variance
            variance *= sample_size / (sample_size - (1.0 + self.epsilon))

        self.add_update([
            K.moving_average_update(self.moving_mean, mean, self.momentum),
            K.moving_average_update(self.moving_variance, variance,
                                    self.momentum)
        ], inputs)

        # Pick the normalized form corresponding to the training phase.
        return K.in_train_phase(normed_training,
                                normalize_inference,
                                training=training)
示例#29
0
def yolo_loss(args,
              anchors,
              num_classes,
              ignore_threshold=0.5,
              normalize=True):

    num_layers = len(anchors) // 3

    y_true = args[num_layers:]
    yolo_outputs = args[:num_layers]

    anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]

    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))

    loss = 0

    m = K.shape(yolo_outputs[0])[0]  # Batch size
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):

        # feature maps location
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        # predict grid, class, xy, wh
        grid, raw_pred, pred_xy, pred_wh = yolo_head(
            yolo_outputs[l],
            anchors=anchors[anchors_mask[l]],
            num_classes=num_classes,
            input_shape=input_shape,
            calc_loss=True)

        pred_box = K.concatenate([pred_xy, pred_wh])
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_threshold, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                       [0, ignore_mask])

        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # True bounding box is to bigger, weights is less.
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # compute ciou loss
        raw_true_box = y_true[l][..., 0:4]
        ciou = box_ciou(pred_box, raw_true_box)
        ciou_loss = object_mask * box_loss_scale * ciou
        location_loss = K.sum(ciou_loss) / mf

        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
                        (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        # Compute positive sample
        loss += location_loss + confidence_loss + class_loss

    return loss
示例#30
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]),
                               axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum +
                        coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss