def correct_boxes(box_xy, box_wh, input_shape, image_shape):
    '''Get corrected boxes'''

    box_yx = box_xy[..., ::-1]
    box_hw = box_wh[..., ::-1]
    input_shape = K.cast(input_shape, K.dtype(box_yx))
    image_shape = K.cast(image_shape, K.dtype(box_yx))
    new_shape = K.round(image_shape * K.min(input_shape / image_shape))
    offset = (input_shape - new_shape) / 2. / input_shape
    scale = input_shape / new_shape
    box_yx = (box_yx - offset) * scale
    box_hw *= scale

    box_mins = box_yx - (box_hw / 2.)
    box_maxes = box_yx + (box_hw / 2.)
    boxes = K.concatenate([
        box_mins[..., 0:1],  # y_min
        box_mins[..., 1:2],  # x_min
        box_maxes[..., 0:1],  # y_max
        box_maxes[..., 1:2]  # x_max
    ])

    # Scale boxes back to original image shape.
    boxes *= K.concatenate([image_shape, image_shape])
    return boxes
Exemplo n.º 2
0
def yolo_head(feats, anchors, num_classes, input_shape):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))

    return box_xy, box_wh, box_confidence, box_class_probs
Exemplo n.º 3
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(T, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    yolo_outputs = args[:3]
    y_true = args[3:]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]

    for l in range(3):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l],
             anchors[anchor_mask[l]], num_classes, input_shape)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet box loss.
        xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1]
        wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh)
        # Avoid log(0)=-inf.
        wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta))
        box_delta = K.concatenate([xy_delta, wh_delta], axis=-1)
        box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        box_loss = object_mask * K.square(box_delta*box_delta_scale)
        confidence_loss = object_mask * K.square(1-pred_confidence) + \
            (1-object_mask) * K.square(0-pred_confidence) * ignore_mask
        class_loss = object_mask * K.square(true_class_probs-pred_class_probs)
        loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss)
    return loss / K.cast(m, K.dtype(loss))
Exemplo n.º 4
0
 def _get_anchor_positive_triplet_mask(self, y_true: Tensor, pairwise_dist: Tensor) -> Tensor:
     # mask label(a) != label(p)
     mask1 = K.equal(K.expand_dims(y_true, 0), K.expand_dims(y_true, 1))
     mask1 = K.cast(mask1, K.dtype(pairwise_dist))
     # mask a == p
     mask2 = K.not_equal(pairwise_dist, 0.0)
     mask2 = K.cast(mask2, K.dtype(pairwise_dist))
     return mask1 * mask2
Exemplo n.º 5
0
 def _get_semihard_anchor_negative_triplet_mask(self, negative_dist: Tensor,
                                                hardest_positive_dist: Tensor,
                                                mask_negative: Tensor) -> Tensor:
     # mask max(dist(a,p)) < dist(a,n)
     mask = K.greater(negative_dist, hardest_positive_dist)
     mask = K.cast(mask, K.dtype(negative_dist))
     mask_semihard = K.cast(K.expand_dims(K.greater(K.sum(mask, 1), 0.0), 1), K.dtype(negative_dist))
     mask = mask_negative * (1 - mask_semihard) + mask * mask_semihard
     return mask
Exemplo n.º 6
0
    def call(self, inputs, mask=None):
        if not isinstance(inputs, list) or len(inputs) <= 1:
            raise TypeError('SpkLifeLongMemory must be called on a list of tensors '
                            '(at least 2). Got: ' + str(inputs))
        # (None(batch), 1), index of speaker
        target_spk_l = inputs[0]
        target_spk_l = K.reshape(target_spk_l, (target_spk_l.shape[0], ))
        if K.dtype(target_spk_l) != 'int32':
            target_spk_l = K.cast(target_spk_l, 'int32')
        # (None(batch), embed_dim)
        spk_vector_l = inputs[1]
        # Start to update life-long memory based on the learned speech vector
        # First do normalization
        spk_vector_eps = K.switch(K.equal(spk_vector_l, 0.), np.spacing(1), spk_vector_l)  # avoid zero
        spk_vector_eps = K.sqrt(K.sum(spk_vector_eps**2, axis=1))
        spk_vector_eps = spk_vector_eps.dimshuffle((0, 'x'))
        spk_vector = T.true_div(spk_vector_l, K.repeat_elements(spk_vector_eps, self.vec_dim, axis=1))
        # Store speech vector into life-long memory according to the speaker identity.
        life_long_mem = T.inc_subtensor(self.life_long_mem[target_spk_l, :], spk_vector)
        # Normalization for memory
        life_long_mem_eps = K.switch(K.equal(life_long_mem, 0.), np.spacing(1), life_long_mem)  # avoid 0
        life_long_mem_eps = K.sqrt(K.sum(life_long_mem_eps**2, axis=1))
        life_long_mem_eps = life_long_mem_eps.dimshuffle((0, 'x'))
        life_long_mem = T.true_div(life_long_mem, K.repeat_elements(life_long_mem_eps, self.vec_dim, axis=1))

        # (None(batch), spk_size, embed_dim)
        return life_long_mem
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):

            if p.name in self.lr_mult:
                multiplied_lr = lr * self.lr_mult[p.name]
            else:
                multiplied_lr = lr

            v = self.momentum * m - multiplied_lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - multiplied_lr * g
            else:
                new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 8
0
 def _batch_all_triplet_loss(self, y_true: Tensor, pairwise_dist: Tensor) -> Tensor:
     anchor_positive_dist = K.expand_dims(pairwise_dist, 2)
     anchor_negative_dist = K.expand_dims(pairwise_dist, 1)
     triplet_loss = anchor_positive_dist - anchor_negative_dist + self.margin
     mask = self._get_triplet_mask(y_true, pairwise_dist)
     triplet_loss = mask * triplet_loss
     triplet_loss = K.clip(triplet_loss, 0.0, None)
     valid_triplets = K.cast(K.greater(triplet_loss, 1e-16), K.dtype(triplet_loss))
     num_positive_triplets = K.sum(valid_triplets)
     triplet_loss = K.sum(triplet_loss) / (num_positive_triplets + 1e-16)
     return triplet_loss
Exemplo n.º 9
0
    def call(self, x, mask=None):
        if mask is None:
            return super(GlobalAveragePooling1D, self).call(x)

        mask = K.expand_dims(mask)
        mask = K.tile(mask, [1, 1, K.shape(x)[2]])
        mask = K.cast(mask, K.dtype(x))

        safe_mask_sum = K.sum(mask, axis=1)
        safe_mask_sum = K.maximum(safe_mask_sum, K.ones_like(safe_mask_sum))

        return K.sum(mask * x, axis=1) / safe_mask_sum
Exemplo n.º 10
0
 def call(self, x, mask=None):
     if K.dtype(x) != 'int32':
         x = K.cast(x, 'int32')
     if 0. < self.dropout < 1.:
         retain_p = 1. - self.dropout
         B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p)
         B = K.expand_dims(B)
         W = K.in_train_phase(self.W * B, self.W)
     else:
         W = self.W
     denorm = K.sum(W, axis=0)
     W = W / denorm
     out = K.gather(W, x)
     return out
Exemplo n.º 11
0
 def _pairwise_distances(self, inputs: List[Tensor]) -> Tensor:
     emb_c, emb_r = inputs
     bs = K.shape(emb_c)[0]
     embeddings = K.concatenate([emb_c, emb_r], 0)
     dot_product = K.dot(embeddings, K.transpose(embeddings))
     square_norm = K.batch_dot(embeddings, embeddings, axes=1)
     distances = K.transpose(square_norm) - 2.0 * dot_product + square_norm
     distances = K.slice(distances, (0, bs), (bs, bs))
     distances = K.clip(distances, 0.0, None)
     mask = K.cast(K.equal(distances, 0.0), K.dtype(distances))
     distances = distances + mask * 1e-16
     distances = K.sqrt(distances)
     distances = distances * (1.0 - mask)
     return distances
Exemplo n.º 12
0
 def call(self, inputs, mask=None):
     if not isinstance(inputs, list) or len(inputs) <= 1:
         raise TypeError('SelectSpkMemory must be called on a list of tensors '
                         '(at least 2). Got: ' + str(inputs))
     # (None(batch), 1), speaker identity
     target_spk_l = inputs[0]
     target_spk_l = K.reshape(target_spk_l, (target_spk_l.shape[0], ))
     if K.dtype(target_spk_l) != 'int32':
         target_spk_l = K.cast(target_spk_l, 'int32')
     # (None(batch), spk_size, embed_dim), life-long memory
     life_long_mem = inputs[1]
     # Extract the acoustic feature from memory
     spk_memory = K.gather(life_long_mem, target_spk_l)
     # (None(batch), embed_dim)
     return spk_memory
Exemplo n.º 13
0
 def _preprocess_conv2d_input(x, data_format):
     """Transpose and cast the input before the conv2d.
     # Arguments
         x: input tensor.
         data_format: string, `"channels_last"` or `"channels_first"`.
     # Returns
         A tensor.
     """
     if K.dtype(x) == "float64":
         x = tf.cast(x, "float32")
     if data_format == "channels_first":
         # TF uses the last dimension as channel dimension,
         # instead of the 2nd one.
         # TH input shape: (samples, input_depth, rows, cols)
         # TF input shape: (samples, rows, cols, input_depth)
         x = tf.transpose(x, (0, 2, 3, 1))
     return x
Exemplo n.º 14
0
    def call(self, inputs, mask=None):
        if mask is None:
            mask = K.zeros_like(inputs)
            mask = K.sum(mask, axis=-1)
            mask = 1 + mask

        else:
            mask = K.cast(mask, K.dtype(inputs))

        safe_n1 = K.sum(mask, axis=1) - 1
        safe_n1 = K.maximum(safe_n1, K.ones_like(safe_n1))
        safe_n1 = K.expand_dims(safe_n1)

        r = tf.cumsum(mask, axis=1) - 1
        r = self.start + (self.stop - self.start) * r / safe_n1
        r = mask * r
        r = K.expand_dims(r)
        return r
Exemplo n.º 15
0
Arquivo: loss.py Projeto: Navdevl/kur
def keras_wrap(model, target, output, loss):
	""" Convenience function for wrapping a Keras loss function.
	"""
	# pylint: disable=import-error
	import keras.objectives as O
	import keras.backend as K
	# pylint: enable=import-error
	if isinstance(loss, str):
		loss = O.get(loss)
	shape = model.outputs[target].value._keras_shape # pylint: disable=protected-access
	ins = [
		(target, K.placeholder(
			ndim=len(shape),
			dtype=K.dtype(model.outputs[target].value),
			name=target
		))
	]
	out = loss(ins[0][1], output)
	return ins, out
Exemplo n.º 16
0
 def loop_body(b, ignore_mask):
     true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
     iou = box_iou(pred_box[b], true_box)
     best_iou = K.max(iou, axis=-1)
     ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
     return b+1, ignore_mask
Exemplo n.º 17
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Applies bounds on actual learning rate
        step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * lr / self.base_lr
        lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = final_lr * (1. + 1. / (self.gamma * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # apply weight decay
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            # Compute the bounds
            step_size_p = step_size * K.ones_like(denom)
            step_size_p_bound = step_size_p / denom
            # TODO: Replace with K.clip after releast of Keras > 2.2.4
            bounded_lr_t = m_t * tf.clip_by_value(step_size_p_bound,
                                                  lower_bound,
                                                  upper_bound)

            p_t = p - bounded_lr_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 18
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0] # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
             anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
    return loss
Exemplo n.º 19
0
boxes = list()
box_scores = list()
# classes = list()
for i in range(3):  # 52 26 13
    anchor = anchors[..., 3 * i:3 * (i + 1), :]
    # feats = model.output[i]
    feats = net_out[i]

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(feats,
                      [-1, grid_shape[0], grid_shape[1], 3, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchor / K.cast(input_shape[::-1],
                                                      K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # box_xy = (box_xy - offset) * scale
    # box_wh *= scale
    # box_mins = box_xy - (box_wh / 2.)
Exemplo n.º 20
0
def Constant(c, reference=None):
    if reference is None:
        return K.constant(c)
    else:
        dtype = K.dtype(reference)
        return K.constant(np.dtype(dtype)(c), dtype=dtype)
Exemplo n.º 21
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]

    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    # Casts a tensor to a different dtype and returns it.
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]

    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        print(f'now it is in layer {l}', object_mask)

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        #_, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        #wh_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_wh, raw_pred[...,2:4], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)

        # first update the number of iterations
        self.updates = [K.update_add(self.iterations, 1)]
        
        # Cycling Gaussian LR
        # I implement this lr_f = lambda x,b,c,s: b+ s*np.exp(-(x-c)**2/(c*0.5)**2)
        def gauss_lr(min_lr, max_lr, center, lrsigma,i):
            
            return (min_lr+ max_lr*K.exp(-(i-center)**2/(center*lrsigma)**2))
        
        ite_casted = K.cast(self.iterations, K.dtype(self.peaklriter))
        all_lr = gauss_lr(self.min_lr['all'], self.peak_lr['all'],
                              self.peaklriter,self.lrsigma,ite_casted)
        #current_lr = self.min_lr['all'] + 
        #self.peak_lr['all']*K.exp(((ite_casted-self.peaklriter)**2)/(self.dropsigma*self.peaklriter)**2)
        ############################################################################
        self.updates.append(K.update(self.lr['all'],all_lr))

        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(s) for s in shapes]
        self.weights = [self.iterations] + moments
        #print(self.weights)
        

        for p, g, m in zip(params, grads, moments):
            #print("HEREEEE:", p.name, g, m)
            lrptrkey= set_pattern_find(p.name,self.lr.keys())
            if lrptrkey:
                if self.verbose>0:
                    print("Setting different learning rate for ", p.name, " : ", K.eval(self.lr[lrptrkey]))
                if set_pattern_find(p.name,self.min_lr.keys()) and set_pattern_find(p.name,self.peak_lr.keys()):
                    p_lr = gauss_lr(self.min_lr[lrptrkey], self.peak_lr[lrptrkey],
                                          self.peaklriter,self.lrsigma,ite_casted)
                else:
                    p_lr = gauss_lr(self.min_lr['all'], self.peak_lr['all'],
                                          self.peaklriter,self.lrsigma,ite_casted)
            else:
                p_lr = self.lr['all']
                
            momptrkey = set_pattern_find(p.name,self.momentum.keys())
            if momptrkey:
                if self.verbose>0:
                    print("Setting different momentum for ", p.name, " , ", 
                          K.eval(self.momentum[momptrkey]))
                momentum = self.momentum[momptrkey]
            else:
                momentum = self.momentum['all'] 

            
            

            if self.nesterov:
                updt = momentum * (momentum * m - p_lr * g) - p_lr * g
            else:
                updt = momentum * m - p_lr * g
            
            # CHANGE CLIP
            _to_tensor = K.tensorflow_backend._to_tensor
            _clip_by_val = K.tf.clip_by_value
            margin = K.mean(K.abs(p))*K.constant(self.UPCLIP)
            #margin = K.mean(K.abs(p*K.constant(self.UPCLIP)))
            #min_value = _to_tensor(-margin, p.dtype.base_dtype)
            #max_value = _to_tensor(margin, p.dtype.base_dtype)
            
            #max_v = K.maximum(min_value, max_value)
            min_v = K.zeros_like(margin)
            updt_sign = K.sign(updt)
            updt_val = _clip_by_val(K.abs(updt), min_v, margin)
            
            v = updt_sign * updt_val  # velocity
            new_p = p + v
            
            
            self.updates.append(K.update(m, v))
            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
            clptrkey = set_pattern_find(p.name,self.clips.keys())
            if self.clips_val and clptrkey:
                c = K.eval(self.clips[clptrkey])
                if self.verbose>0:
                    print("Clipping variable",p.name," to ", c)
                    #input()
                new_p = K.clip(new_p, c[0], c[1])
            #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum))
            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 23
0
def yolo_head(feats, anchors, num_classes, tree_):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors, name='anchor'),
                               [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])
    # conv_height_index是某一feats的左上角格子高度坐标

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)

    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    # conv_width_index = K.tile(conv_width_index, [conv_dims[0]])

    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))
    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])

    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.concatenate([
        K.softmax(feats[..., 5 + tree_.group_offset[i]:5 +
                        tree_.group_offset[i] + tree_.group_size[i]])
        for i in range(tree_.group_num)
    ],
                                    axis=-1)

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    # 在整张图的相对位置
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_xy, box_wh, box_confidence, box_class_probs
Exemplo n.º 24
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    """Return yolo_loss tensor

	Parameters
	----------
	:param args:
	:param ignore_thresh: 0.5
	:param num_classes: integer
	:param anchors: array, shape=(N, 2), wh
	:param print_loss: False

	Returns
	-------
	loss: tensor, shape=(1,)

	"""
    num_layers = len(anchors) // 3  # default setting

    # yolo_outputs is model's output [y1, y2]
    yolo_outputs = args[:num_layers]
    # y_true is same shape with yolo_outputs
    # y_true is two tensors' list [tensor1: y1(26, 26, 3, 6), tensor1: y2(13, 13, 3, 6)]
    y_true = args[num_layers:]

    # I changed mask index here
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [0, 1, 2]]
    # anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf

        # 2 - true_w * true_h
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
         (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
Exemplo n.º 25
0
 def step(best_indices, previous_indices):
     # previous_indices is a batch_size vector of state indices and
     # best_indices a (batch_size, num_states) matrix. Return
     # [best_indices[previous[b]] for b in range(batch_size)].
     b_idx = arange(batch_size, dtype=K.dtype(previous_indices))
     return multi_index(best_indices, [b_idx, previous_indices])
def gaussian_yolo_loss(args,
                       anchors,
                       num_classes,
                       ignore_thresh=.5,
                       print_loss=False,
                       use_focal_confidence_loss=False,
                       use_focal_class_loss=False):

    # 3 layers
    num_layers = len(anchors)//3

    # args = [*model_body.output, *y_true]
    # y_true: [(m, 13, 13, 3, 85), (m, 26, 26, 3, 85), (m, 52, 52, 3, 85)]
    # yolo_outputs: [(m, 13, 13, 3, 89), (m, 26, 26, 3, 89), (m, 52, 52, 3, 89)]
    y_true = args[num_layers:]
    yolo_outputs = args[:num_layers]

    # [6, 7, 8]: [(116, 90),  (156, 198),  (373, 326)]
    # [3, 4, 5]: [(30 , 61),  (62,   45),  (59,  119)]
    # [0, 1, 2]: [(10,  13),  (16,   30),  (33,   23)]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]

    # [416, 416]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))

    # [[13, 13], [26, 26], [52, 52]]
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
    loss = 0

    # cast m to float
    m = K.shape(yolo_outputs[0])[0]
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        # confidence
        object_mask = y_true[l][..., 4:5]
        # class probability
        true_class_probs = y_true[l][..., 5:]

        # pred_xy and pred_wh are normalized
        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)

        # (m, 13, 13, 3, 4)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # make a dynamic tensor array
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            # (n, 4)
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0])

            # calculate iou
            # (13, 13, 3, n)
            iou = box_iou(pred_box[b], true_box)

            # (13, 13, 3, 1)
            best_iou = K.max(iou, axis=-1)

            # if iou < ignore threshold: negative.
            # if iou > ignore threshold and it not positive, it's ignore anchor.
            # And these anchors are closed to positive anchor.
            # yoloV3 uses this trick to maintain number of negative anchors.
            ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask

        # repeat loop_body function while condition is true
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask])

        ignore_mask = ignore_mask.stack()
        # (m, 13, 13, 3, 1, 1)
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # encode the gt bounding boxes
        raw_true_xy = y_true[l][..., :2]*grid_shapes[l][:] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])

        # #######################################
        # use switch to exchange -inf to 0
        # 0 * inf = NAN
        # #######################################
        raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh))

        # TODO: yolo3 uses this scale to penalize errors in small gt bounding boxes.
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        x_loss = (-1) * object_mask * box_loss_scale * \
            K.log(gaussian_distribution(mu=K.sigmoid(raw_pred[..., 0:1]),
                                        sigma=K.sigmoid(raw_pred[..., 4:5]),
                                        x=raw_true_xy[..., 0:1]) + K.epsilon())
        y_loss = (-1) * object_mask * box_loss_scale * \
            K.log(gaussian_distribution(mu=K.sigmoid(raw_pred[..., 1:2]),
                                        sigma=K.sigmoid(raw_pred[..., 5:6]),
                                        x=raw_true_xy[..., 1:2]) + K.epsilon())
        w_loss = (-1) * object_mask * box_loss_scale * \
            K.log(gaussian_distribution(mu=raw_pred[..., 2:3],
                                        sigma=K.sigmoid(raw_pred[..., 6:7]),
                                        x=raw_true_wh[..., 0:1]) + K.epsilon())
        h_loss = (-1) * object_mask * box_loss_scale * \
            K.log(gaussian_distribution(mu=raw_pred[..., 3:4],
                                        sigma=K.sigmoid(raw_pred[..., 7:8]),
                                        x=raw_true_wh[..., 1:2]) + K.epsilon())

        # use focal confidence loss
        if use_focal_confidence_loss:
            confidence_loss = sigmoid_focal_loss(object_mask, raw_pred[..., 8:9])
        else:
            confidence_loss = object_mask * K.binary_crossentropy(object_mask,
                                                                  raw_pred[..., 8:9], from_logits=True) + \
                (1-object_mask) * K.binary_crossentropy(object_mask,
                                                        raw_pred[..., 8:9], from_logits=True) * ignore_mask
        # use focal class loss
        if use_focal_class_loss:
            class_loss = sigmoid_focal_loss(true_class_probs, raw_pred[..., 9:])
        else:
            class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[..., 9:], from_logits=True)

        x_loss = K.sum(x_loss) / mf
        y_loss = K.sum(y_loss) / mf
        w_loss = K.sum(w_loss) / mf
        h_loss = K.sum(h_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += x_loss + y_loss + w_loss + h_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [loss, x_loss, y_loss, w_loss, h_loss,
                                   confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
    return loss
Exemplo n.º 27
0
def layer_test(layer_cls,
               kwargs=None,
               input_shape=None,
               input_dtype=None,
               input_data=None,
               expected_output=None,
               expected_output_dtype=None,
               expected_output_shape=None,
               validate_training=True,
               adapt_data=None,
               custom_objects=None,
               test_harness=None,
               supports_masking=None):
    """Test routine for a layer with a single input and single output.

  Args:
    layer_cls: Layer class object.
    kwargs: Optional dictionary of keyword arguments for instantiating the
      layer.
    input_shape: Input shape tuple.
    input_dtype: Data type of the input data.
    input_data: Numpy array of input data.
    expected_output: Numpy array of the expected output.
    expected_output_dtype: Data type expected for the output.
    expected_output_shape: Shape tuple for the expected shape of the output.
    validate_training: Whether to attempt to validate training on this layer.
      This might be set to False for non-differentiable layers that output
      string or integer values.
    adapt_data: Optional data for an 'adapt' call. If None, adapt() will not
      be tested for this layer. This is only relevant for PreprocessingLayers.
    custom_objects: Optional dictionary mapping name strings to custom objects
      in the layer class. This is helpful for testing custom layers.
    test_harness: The Tensorflow test, if any, that this function is being
      called in.
    supports_masking: Optional boolean to check the `supports_masking` property
      of the layer. If None, the check will not be performed.

  Returns:
    The output data (Numpy array) returned by the layer, for additional
    checks to be done by the calling code.

  Raises:
    ValueError: if `input_shape is None`.
  """
    if input_data is None:
        if input_shape is None:
            raise ValueError('input_shape is None')
        if not input_dtype:
            input_dtype = 'float32'
        input_data_shape = list(input_shape)
        for i, e in enumerate(input_data_shape):
            if e is None:
                input_data_shape[i] = np.random.randint(1, 4)
        input_data = 10 * np.random.random(input_data_shape)
        if input_dtype[:5] == 'float':
            input_data -= 0.5
        input_data = input_data.astype(input_dtype)
    elif input_shape is None:
        input_shape = input_data.shape
    if input_dtype is None:
        input_dtype = input_data.dtype
    if expected_output_dtype is None:
        expected_output_dtype = input_dtype

    if tf.as_dtype(expected_output_dtype) == tf.string:
        if test_harness:
            assert_equal = test_harness.assertAllEqual
        else:
            assert_equal = string_test
    else:
        if test_harness:
            assert_equal = test_harness.assertAllClose
        else:
            assert_equal = numeric_test

    # instantiation
    kwargs = kwargs or {}
    layer = layer_cls(**kwargs)

    if (supports_masking is not None
            and layer.supports_masking != supports_masking):
        raise AssertionError(
            'When testing layer %s, the `supports_masking` property is %r'
            'but expected to be %r.\nFull kwargs: %s' %
            (layer_cls.__name__, layer.supports_masking, supports_masking,
             kwargs))

    # Test adapt, if data was passed.
    if adapt_data is not None:
        layer.adapt(adapt_data)

    # test get_weights , set_weights at layer level
    weights = layer.get_weights()
    layer.set_weights(weights)

    # test and instantiation from weights
    if 'weights' in tf_inspect.getargspec(layer_cls.__init__):
        kwargs['weights'] = weights
        layer = layer_cls(**kwargs)

    # test in functional API
    x = layers.Input(shape=input_shape[1:], dtype=input_dtype)
    y = layer(x)
    if backend.dtype(y) != expected_output_dtype:
        raise AssertionError(
            'When testing layer %s, for input %s, found output '
            'dtype=%s but expected to find %s.\nFull kwargs: %s' %
            (layer_cls.__name__, x, backend.dtype(y), expected_output_dtype,
             kwargs))

    def assert_shapes_equal(expected, actual):
        """Asserts that the output shape from the layer matches the actual shape."""
        if len(expected) != len(actual):
            raise AssertionError(
                'When testing layer %s, for input %s, found output_shape='
                '%s but expected to find %s.\nFull kwargs: %s' %
                (layer_cls.__name__, x, actual, expected, kwargs))

        for expected_dim, actual_dim in zip(expected, actual):
            if isinstance(expected_dim, tf.compat.v1.Dimension):
                expected_dim = expected_dim.value
            if isinstance(actual_dim, tf.compat.v1.Dimension):
                actual_dim = actual_dim.value
            if expected_dim is not None and expected_dim != actual_dim:
                raise AssertionError(
                    'When testing layer %s, for input %s, found output_shape='
                    '%s but expected to find %s.\nFull kwargs: %s' %
                    (layer_cls.__name__, x, actual, expected, kwargs))

    if expected_output_shape is not None:
        assert_shapes_equal(tf.TensorShape(expected_output_shape), y.shape)

    # check shape inference
    model = models.Model(x, y)
    computed_output_shape = tuple(
        layer.compute_output_shape(tf.TensorShape(input_shape)).as_list())
    computed_output_signature = layer.compute_output_signature(
        tf.TensorSpec(shape=input_shape, dtype=input_dtype))
    actual_output = model.predict(input_data)
    actual_output_shape = actual_output.shape
    assert_shapes_equal(computed_output_shape, actual_output_shape)
    assert_shapes_equal(computed_output_signature.shape, actual_output_shape)
    if computed_output_signature.dtype != actual_output.dtype:
        raise AssertionError(
            'When testing layer %s, for input %s, found output_dtype='
            '%s but expected to find %s.\nFull kwargs: %s' %
            (layer_cls.__name__, x, actual_output.dtype,
             computed_output_signature.dtype, kwargs))
    if expected_output is not None:
        assert_equal(actual_output, expected_output)

    # test serialization, weight setting at model level
    model_config = model.get_config()
    recovered_model = models.Model.from_config(model_config, custom_objects)
    if model.weights:
        weights = model.get_weights()
        recovered_model.set_weights(weights)
        output = recovered_model.predict(input_data)
        assert_equal(output, actual_output)

    # test training mode (e.g. useful for dropout tests)
    # Rebuild the model to avoid the graph being reused between predict() and
    # See b/120160788 for more details. This should be mitigated after 2.0.
    layer_weights = layer.get_weights(
    )  # Get the layer weights BEFORE training.
    if validate_training:
        model = models.Model(x, layer(x))
        if _thread_local_data.run_eagerly is not None:
            model.compile('rmsprop',
                          'mse',
                          weighted_metrics=['acc'],
                          run_eagerly=should_run_eagerly())
        else:
            model.compile('rmsprop', 'mse', weighted_metrics=['acc'])
        model.train_on_batch(input_data, actual_output)

    # test as first layer in Sequential API
    layer_config = layer.get_config()
    layer_config['batch_input_shape'] = input_shape
    layer = layer.__class__.from_config(layer_config)

    # Test adapt, if data was passed.
    if adapt_data is not None:
        layer.adapt(adapt_data)

    model = models.Sequential()
    model.add(layers.Input(shape=input_shape[1:], dtype=input_dtype))
    model.add(layer)

    layer.set_weights(layer_weights)
    actual_output = model.predict(input_data)
    actual_output_shape = actual_output.shape
    for expected_dim, actual_dim in zip(computed_output_shape,
                                        actual_output_shape):
        if expected_dim is not None:
            if expected_dim != actual_dim:
                raise AssertionError(
                    'When testing layer %s **after deserialization**, '
                    'for input %s, found output_shape='
                    '%s but expected to find inferred shape %s.\nFull kwargs: %s'
                    % (layer_cls.__name__, x, actual_output_shape,
                       computed_output_shape, kwargs))
    if expected_output is not None:
        assert_equal(actual_output, expected_output)

    # test serialization, weight setting at model level
    model_config = model.get_config()
    recovered_model = models.Sequential.from_config(model_config,
                                                    custom_objects)
    if model.weights:
        weights = model.get_weights()
        recovered_model.set_weights(weights)
        output = recovered_model.predict(input_data)
        assert_equal(output, actual_output)

    # for further checks in the caller function
    return actual_output
Exemplo n.º 28
0
def custom_loss(args,
                anchors,
                num_classes,
                global_step=0.,
                rescore_confidence=False):
    """
    Modified YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 2.5
    coordinates_scale = 1
    edl_scale = 2.5

    yad2kOutput, edlOutput = yolo_head(yolo_output,
                                       anchors,
                                       num_classes,
                                       clip=5.)
    pred_xy, pred_wh, pred_confidence, pred_softmax_class_probs = yad2kOutput
    pred_class_logits, pred_box_class_evidence, pred_alpha, \
                pred_S, pred_uncertainty, pred_class_probs = edlOutput

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]),
                               axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_probs))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    ########################################################
    ########################################################
    ########                                       #########
    ######## EDL Loss and metric calculations here #########
    ########                                       #########
    ########################################################
    ########################################################

    ########             EDL Loss                  #########

    ### EDL Loss - expected value of cross entropy loss over
    # the predicted Dirichlet distribution + KL regularization term

    # Expected value of cross entropy loss
    A = tf.reduce_sum(matching_classes *
                      (tf.digamma(pred_S) - tf.digamma(pred_alpha)),
                      4,
                      keepdims=True)

    # KL term
    alp = pred_box_class_evidence * (1 - matching_classes) + 1
    beta = K.ones_like(alp)
    S_alpha = tf.reduce_sum(alp, axis=4, keep_dims=True)
    S_beta = tf.reduce_sum(beta, axis=4, keep_dims=True)
    lnB = tf.lgamma(S_alpha) - tf.reduce_sum(
        tf.lgamma(alp), axis=4, keep_dims=True)
    lnB_uni = tf.reduce_sum(tf.lgamma(beta), axis=4,
                            keep_dims=True) - tf.lgamma(S_beta)
    dg0 = tf.digamma(S_alpha)
    dg1 = tf.digamma(alp)
    kl = tf.reduce_sum(
        (alp - beta) * (dg1 - dg0), axis=4, keep_dims=True) + lnB + lnB_uni

    #annealing_coeff = 2.0 * tf.minimum(1.0,  tf.cast(global_step / annealing_step, tf.float32))
    annealing_coeff = 5.0
    B = annealing_coeff * kl  # Anneal the KL term during training phase

    # 5. Apply detector mask and sum the loss components
    edl_loss = edl_scale * detectors_mask * (A + B)

    # EDL loss components
    exp_ce_loss_sum = tf.reduce_sum(detectors_mask * A)
    kl_loss_sum = tf.reduce_sum(detectors_mask * kl)
    akl_loss_sum = annealing_coeff * kl_loss_sum

    ########             EDL Metrics               #########

    preds = tf.cast(tf.argmax(pred_box_class_evidence, 4), 'int32')
    truth = tf.cast(matching_true_boxes[..., 4], 'int32')
    matchs = tf.cast(tf.equal(preds, truth), tf.float32)
    match = tf.boolean_mask(tf.expand_dims(matchs, 4), detectors_mask)
    acc = tf.reduce_mean(match)

    total_evidence = tf.reduce_sum(pred_box_class_evidence, 4, keepdims=True)
    total_evidence = tf.boolean_mask(total_evidence, detectors_mask)

    mean_ev_succ = tf.reduce_sum(
        total_evidence * match) / tf.reduce_sum(match + 1e-20)
    mean_ev_fail = tf.reduce_sum(
        total_evidence *
        (1 - match)) / (tf.reduce_sum(tf.abs(1 - match)) + 1e-20)

    ########################################################
    ########################################################

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    edl_loss_sum = K.sum(edl_loss)

    total_loss = 0.5 * (confidence_loss_sum + edl_loss_sum +
                        coordinates_loss_sum + classification_loss_sum)

    return tf.stack([
        total_loss, confidence_loss_sum, classification_loss_sum, edl_loss_sum,
        coordinates_loss_sum, acc, mean_ev_succ, mean_ev_fail, annealing_coeff,
        exp_ce_loss_sum, kl_loss_sum, akl_loss_sum
    ])
Exemplo n.º 29
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):

    num_layers = len(
        anchors
    ) // 3  # égal à 3, puisqu'il y a 9 anchors,  3 anchors pour chaque layer de prediction.
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5],
                   [0, 1, 2]]  # masks correspondants des anchors.
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0  # initialiser la variable de loss à zéro.
    m = K.shape(yolo_outputs[0])[0]  # le batch-size.
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        # Probabilités réelles:  1 s'il existe un objet, 0 sinon. (extraites du dataset)
        object_mask = y_true[l][..., 4:5]
        # Probabilités réelles: 1 s'il appartient à la classe i, 0 sinon. (extraites du dataset)
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        # Concaténer les prédictions xy et wh en un seul array : pred_box.
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Données de box brutes pour calcler le coût.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Trouver ignore_mask, en itérant par chaque batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy utile pour éviter le débordement de l'exp().
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
Exemplo n.º 30
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=0.5, print_loss=False):
    """Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body_full or yolo_body_tiny
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    """
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] \
        if num_layers == 3 else [[3, 4, 5], [0, 1, 2]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32,
                         K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
                   for l in range(num_layers)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes, input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
        # Keras switch allows scalr condition, bit here is expected to have elemnt-wise
        #  also the `object_mask` has in last dimension 1 but the in/out puts has 2 (some replication)
        # raw_true_wh = tf.where(tf.greater(K.concatenate([object_mask] * 2), 0),
        #                        raw_true_wh, K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def _loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh,
                                                      K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(
            lambda b, *args: b < m, _loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        ce = K.binary_crossentropy(raw_true_xy, raw_pred[..., 0:2],
                                   from_logits=True)
        xy_loss = object_mask * box_loss_scale * ce
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred[..., 2:4])
        ce_loss = K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
                                        from_logits=True)
        confidence_loss = object_mask * ce_loss + (1 - object_mask) * ce_loss * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(true_class_probs,
                                                         raw_pred[..., 5:],
                                                         from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss,
                                   class_loss, K.sum(ignore_mask)],
                            message='loss: ')
    # see: https://github.com/qqwweee/keras-yolo3/issues/129#issuecomment-408855511
    return K.expand_dims(loss, axis=0)
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)

        # first update the number of iterations
        self.updates = [K.update_add(self.iterations, 1)]
        
        if self.decay_epochs:
            ite_casted = K.cast(self.iterations, K.dtype(self.decay_epochs))
            hit_decay_epoch = K.any(K.equal(ite_casted, self.decay_epochs))
            
            #print(hit_decay_epoch)
            lr = K.switch(hit_decay_epoch, self.lr['all']*self.decay['all'],
                          self.lr['all'])

            #K.print_tensor(self.lr['all'])
            #a = K.switch(hit_decay_epoch, 
            #             K.print_tensor(self.lr['all'],message='Decays:'), 
            #             K.print_tensor(self.lr['all'],message=' '))


            self.updates.append(K.update(self.lr['all'],lr))

        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(s) for s in shapes]
        self.weights = [self.iterations] + moments
        #print(self.weights)
        

        for p, g, m in zip(params, grads, moments):
            #print("HEREEEE:", p.name, g, m)
            lrptrkey= set_pattern_find(p.name,self.lr.keys())
            if lrptrkey:
                if self.verbose>0:
                    print("Setting different learning rate for ", p.name, " : ", K.eval(self.lr[lrptrkey]))
                lr = self.lr[lrptrkey]
                dcptrkey=set_pattern_find(p.name,self.decay.keys())
                if self.decay_epochs and dcptrkey:
                    lr = K.switch(hit_decay_epoch, self.lr[lrptrkey]*self.decay[dcptrkey],
                                  self.lr[lrptrkey])
                    self.updates.append(K.update(self.lr[lrptrkey],lr))
                    if self.verbose>0:
                        print("Added decay to ", p.name, ": ", K.eval(lr),",",self.decay[dcptrkey])
                elif self.decay_epochs:
                    lr = K.switch(hit_decay_epoch, self.lr[lrptrkey]*self.decay['all'],self.lr[lrptrkey])
                    self.updates.append(K.update(self.lr[lrptrkey],lr))
                    if self.verbose>0:
                        print("Added decay to ", p.name, ": ", K.eval(lr),",",self.decay['all'])
                else:
                    lr = self.lr[lrptrkey]

            else:
                lr = self.lr['all']
            
            momptrkey = set_pattern_find(p.name,self.momentum.keys())
            if momptrkey:
                if self.verbose>0:
                    print("Setting different momentum for ", p.name, " , ", 
                          K.eval(self.momentum[momptrkey]))
                momentum = self.momentum[momptrkey]
            else:
                momentum = self.momentum['all'] 

            v = momentum * m - lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + momentum * (momentum * m - lr * g) - lr * g
            else:
                new_p = p + momentum * m - lr * g
            
            # CHANGE CLIP
            if self.UPCLIP:
                _to_tensor = K.tensorflow_backend._to_tensor
                _clip_by_val = K.tf.clip_by_value
                margin = K.mean(K.abs(p*K.constant(self.UPCLIP)))
                min_value = _to_tensor(p-margin, p.dtype.base_dtype)
                max_value = _to_tensor(p+margin, p.dtype.base_dtype)
                
                max_v = K.maximum(min_value, max_value)
                min_v = K.minimum(min_value, max_value)

                new_p = _clip_by_val(new_p, min_v, max_v)
             
            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
            clptrkey = set_pattern_find(p.name,self.clips.keys())
            if self.clips_val and clptrkey:
                if self.verbose>0:
                    print("Clipping variable",p.name," to ", self.clips[clptrkey])
                c = K.eval(self.clips[clptrkey])
                new_p = K.clip(new_p, c[0], c[1])
            #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum))
            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 32
0
def yolo4_loss(args,
               anchors,
               num_classes,
               ignore_thresh=.5,
               label_smoothing=0,
               use_focal_loss=False,
               use_focal_obj_loss=False,
               use_softmax_loss=False,
               use_giou_loss=False,
               use_diou_loss=False):
    '''Return yolo4_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [0, 1, 2]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    total_location_loss = 0
    total_confidence_loss = 0
    total_class_loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]
        if label_smoothing:
            true_class_probs = _smooth_labels(true_class_probs,
                                              label_smoothing)

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        if use_focal_obj_loss:
            # Focal loss for objectness confidence
            confidence_loss = sigmoid_focal_loss(object_mask, raw_pred[...,
                                                                       4:5])
        else:
            confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
                (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask

        if use_focal_loss:
            # Focal loss for classification score
            if use_softmax_loss:
                class_loss = softmax_focal_loss(true_class_probs, raw_pred[...,
                                                                           5:])
            else:
                class_loss = sigmoid_focal_loss(true_class_probs, raw_pred[...,
                                                                           5:])
        else:
            if use_softmax_loss:
                # use softmax style classification output
                class_loss = object_mask * K.expand_dims(
                    K.categorical_crossentropy(
                        true_class_probs, raw_pred[..., 5:], from_logits=True),
                    axis=-1)
            else:
                # use sigmoid style classification output
                class_loss = object_mask * K.binary_crossentropy(
                    true_class_probs, raw_pred[..., 5:], from_logits=True)

        if use_giou_loss:
            # Calculate GIoU loss as location loss
            raw_true_box = y_true[l][..., 0:4]
            giou = box_giou(pred_box, raw_true_box)
            giou_loss = object_mask * box_loss_scale * (1 - giou)
            giou_loss = K.sum(giou_loss) / mf
            location_loss = giou_loss
        elif use_diou_loss:
            # Calculate DIoU loss as location loss
            raw_true_box = y_true[l][..., 0:4]
            diou = box_diou(pred_box, raw_true_box)
            diou_loss = object_mask * box_loss_scale * (1 - diou)
            diou_loss = K.sum(diou_loss) / mf
            location_loss = diou_loss
        else:
            # Standard YOLO location loss
            # K.binary_crossentropy is helpful to avoid exp overflow.
            xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
                raw_true_xy, raw_pred[..., 0:2], from_logits=True)
            wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
                raw_true_wh - raw_pred[..., 2:4])
            xy_loss = K.sum(xy_loss) / mf
            wh_loss = K.sum(wh_loss) / mf
            location_loss = xy_loss + wh_loss

        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += location_loss + confidence_loss + class_loss
        total_location_loss += location_loss
        total_confidence_loss += confidence_loss
        total_class_loss += class_loss

    # Fit for tf 2.0.0 loss shape
    loss = K.expand_dims(loss, axis=-1)

    return loss  #, total_location_loss, total_confidence_loss, total_class_loss
Exemplo n.º 33
0
    def _model_head(
            self, feats, anchors, num_classes,
            input_shape, batch_size, calc_loss=False, verbose=False):
        """Convert final layer features to bounding box parameters.
        No threshold or nms applied yet.

        Args:
            feats : `Tensor`
                Elements in the output list from K.model.output:
                shape = (N, 13, 13, 255)
            anchors : list
                anchors.
            num_classes : int
                num of classes.
            input_shape : tuple
                input shape obtained from model output grid information.

        Returns:
            Breaking the (num_class + 5) output logits into box_xy,
            box_wh, box_confidence, and box_class_probs.
        """

        num_anchors = len(anchors)
        # Reshape to batch, height, width, num_anchors, box_params.
        anchors_tensor = K.reshape(
            K.constant(anchors), [1, 1, 1, num_anchors, 2])

        grid_shape = K.shape(feats)[1:3]  # height, width
        grid_y = K.tile(
            K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
            [1, grid_shape[1], 1, 1])
        grid_x = K.tile(
            K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
            [grid_shape[0], 1, 1, 1])
        grid = K.concatenate([grid_x, grid_y])
        grid = K.cast(grid, K.dtype(feats))

        feats = K.reshape(
            feats, [-1, batch_size, grid_shape[0],
                    grid_shape[1], num_anchors, num_classes + 5])

        # Adjust preditions to each spatial grid point and anchor size.
        box_xy = (K.sigmoid(feats[..., :2]) + grid) /\
            K.cast(grid_shape[::-1], K.dtype(feats))
        box_wh = K.exp(feats[..., 2:4]) * anchors_tensor /\
            K.cast(input_shape[::-1], K.dtype(feats))
        box_confidence = K.sigmoid(feats[..., 4:5])
        box_class_probs = K.sigmoid(feats[..., 5:])

        if calc_loss is True:
            return grid, feats, box_xy, box_wh

        if verbose is True:
            # In verbose mode, return logits BEFORE sigmoid activation
            box_coord_logits = feats[..., :4]
            box_confidence_logits = feats[..., 4: 5]
            box_class_probs_logits = feats[..., 5:]
            return box_xy, box_wh, box_confidence, box_class_probs, \
                box_coord_logits, box_confidence_logits, \
                box_class_probs_logits

        return box_xy, box_wh, box_confidence, box_class_probs
Exemplo n.º 34
0
 def call(self, inputs):
     if K.dtype(inputs) != 'float32':
         inputs = K.cast(inputs, 'float32')
     inner_out = K.relu(K.dot(inputs, self.weights_inner) + self.bais_inner)
     outputs = K.dot(inner_out, self.weights_out) + self.bais_out
     return outputs
Exemplo n.º 35
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        self.updates.append(K.update_add(self.t_cur, 1))

        lr = self.learning_rate
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i))
            for (i, p) in enumerate(params)
        ]
        vs = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i))
            for (i, p) in enumerate(params)
        ]

        if self.amsgrad:
            vhats = [
                K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='vhat_' + str(i)) for (i, p) in enumerate(params)
            ]
        else:
            vhats = [
                K.zeros(1, name='vhat_' + str(i)) for i in range(len(params))
            ]
        self.weights = [self.iterations] + ms + vs + vhats

        total_iterations = self.total_iterations
        # Cosine annealing
        if self.use_cosine_annealing and total_iterations != 0:
            self.eta_t = _compute_eta_t(self)
        self.lr_t = lr_t * self.eta_t  # for external tracking

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # Learning rate multipliers
            if self.lr_multipliers is not None:
                lr_t = _apply_lr_multiplier(self, lr_t, p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            # Weight decays
            if p.name in self.weight_decays.keys() and total_iterations != 0:
                p_t = _apply_weight_decays(self, p, p_t)
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))

        self._init_notified = True
        return self.updates
Exemplo n.º 36
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate(
        (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (
        confidence_loss_sum + classification_loss_sum + coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss
Exemplo n.º 37
0
 def call(self, inputs):
     a = K.cast(self.a, dtype=K.dtype(inputs))
     P = (K.sigmoid(a*(K.mean(inputs,axis=(1,2))-self.b)) - K.sigmoid(-a * self.b)) / (K.sigmoid(a * (1. - self.b)) - K.sigmoid(-a * self.b))
     return P
Exemplo n.º 38
0
    def get_updates(self, loss, params):

        self.updates = []
        self.updates.append(K.update_add(self.state_counter, 1))
        self.updates.append(K.update_add(self.iterator, 1))
        self.updates.append(K.update_add(self.iterations, 1))
        t = K.cast(self.iterations, K.floatx()) + 1

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        shapes = [K.int_shape(p) for p in params]
        x = [K.update(K.zeros(shape), p) for shape, p in zip(shapes, params)]
        mu = [K.update(K.zeros(shape), p) for shape, p in zip(shapes, params)]
        grads = self.get_gradients(loss, params)

        ms = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i))
            for (i, p) in enumerate(params)
        ]
        vs = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i))
            for (i, p) in enumerate(params)
        ]

        if self.amsgrad:
            vhats = [
                K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='vhat_' + str(i)) for (i, p) in enumerate(params)
            ]
        else:
            vhats = [
                K.zeros(1, name='vhat_' + str(i)) for i in range(len(params))
            ]

        for x_i, x_prime_i, mu_i, g, m, v, vhat in zip(x, params, mu, grads,
                                                       ms, vs, vhats):

            ## we update x_prime (if we are in LAngevin steps, we update otherwise we switch to parameters x_i)
            dx_prime_i = g - self.gamma * (x_i - x_prime_i)
            x_prime_update_i = K.switch(
                K.any(K.stack([
                    K.equal(self.state_counter, 0),
                    K.equal(self.num_steps, self.iterator)
                ],
                              axis=0),
                      axis=0), x_i, x_prime_i - self.sgld_step * dx_prime_i +
                K.sqrt(self.sgld_step) * self.sgld_noise *
                K.random_normal(K.int_shape(x_prime_i)))
            # Apply constraints.
            if getattr(x_prime_i, 'constraint', None) is not None:
                x_prime_update_i = x_prime_i.constraint(x_prime_update_i)
            self.updates.append(K.update(x_prime_i, x_prime_update_i))

            ## We update mu (if we are in LAngevin steps, we update otherwise we switch to parameters x_i)
            mu_update_i = K.switch(K.equal(self.state_counter,
                                           0), x_i, (1 - self.alpha) * mu_i +
                                   self.alpha * x_prime_i)
            self.updates.append(K.update(mu_i, mu_update_i))

            ## We update x every L steps (Note that at step L+1 or when step < L, the update term is 0. This is coherent with the paper)
            ## As they described in the paper, we remove the gamma from the update because it interferes with the learning annealing
            ## After each update we rescale gamme with a factor of 1.001

            ## Adam update
            gradient = (x_i - mu_i)
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * gradient
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(gradient)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                x_i_t = x_i - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(
                    K.update(
                        vhat,
                        K.switch(K.equal(self.state_counter, self.L + 1),
                                 vhat_t, vhat)))
            else:
                x_i_t = x_i - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(
                K.update(
                    m, K.switch(K.equal(self.state_counter, self.L + 1), m_t,
                                m)))
            self.updates.append(
                K.update(
                    v, K.switch(K.equal(self.state_counter, self.L + 1), v_t,
                                v)))
            new_x_i = x_i_t

            x_i_update = K.switch(K.equal(self.state_counter, self.L + 1),
                                  new_x_i, x_i)
            self.updates.append(K.update(x_i, x_i_update))

            ## Gamma scoping
            gamma_update = K.switch(K.equal(self.state_counter,
                                            self.L + 1), self.gamma,
                                    self.gamma * (1. + self.scoping))
            self.updates.append(K.update(self.gamma, gamma_update))

        counter = K.switch(K.equal(self.state_counter, self.L + 2),
                           K.constant(0, dtype='int64'), self.state_counter)
        self.updates.append(K.update(self.state_counter, counter))
        return self.updates
Exemplo n.º 39
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(T, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    yolo_outputs = args[:3]
    y_true = args[3:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(3)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]

    for l in range(3):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(
            yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet box loss.
        xy_delta = (y_true[l][..., :2] - pred_xy) * grid_shapes[l][::-1]
        wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh)
        # Avoid log(0)=-inf.
        wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta))
        box_delta = K.concatenate([xy_delta, wh_delta], axis=-1)
        box_delta_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        box_loss = object_mask * K.square(box_delta * box_delta_scale)
        confidence_loss = object_mask * K.square(1-pred_confidence) + \
            (1-object_mask) * K.square(0-pred_confidence) * ignore_mask
        class_loss = object_mask * K.square(true_class_probs -
                                            pred_class_probs)
        loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss)
    return loss / K.cast(m, K.dtype(loss))
Exemplo n.º 40
0
def yolo_loss(args, anchors, num_seen, ignore_thresh=.5, plus=False):
    """Return yolo_loss tensor

    Parameters
    ----------
    args: [*yolo_outputs, *y_true, y_embedding]
    # yolo_outputs: list of tensor, the output of yolo_body
    # y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_seen: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss
    plus: if true, calculate yolo plus model loss

    Returns
    -------
    loss: tensor, shape=(1,)

    """
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:-1]  # shape=(num_layers, b, h, w, anchors, 5 + num_classes)
    embeddings = args[-1]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
    loss = 0.
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))
    for _ in range(3):
        embeddings = K.expand_dims(embeddings, 1)

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh, pred_embedding = \
            yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], input_shape, calc_loss=True, plus=plus)

        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            mask = mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *arg: b < m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        raw_pred_xy = raw_pred[..., 0:2]
        raw_pred_wh = raw_pred[..., 2:4]
        raw_pred_objectness = raw_pred[..., 4:]
        raw_pred_embedding = pred_embedding
        # rescale relation to [0, 1]
        true_relation = 0.5 * (class_relation(true_class_probs, embeddings) + 1)

        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred_xy, True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred_wh)
        object_loss = object_mask * K.binary_crossentropy(object_mask * true_relation, raw_pred_objectness, True) + \
                      (1 - object_mask) * \
                      K.binary_crossentropy(object_mask * true_relation, raw_pred_objectness, True) * ignore_mask
        embedding_loss = object_mask * category_loss(embeddings[..., :num_seen, :], raw_pred_embedding,
                                                     true_class_probs[..., :num_seen])

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        object_loss = K.sum(object_loss) / mf
        embedding_loss = K.sum(embedding_loss) / mf
        loss += xy_loss + wh_loss + object_loss + embedding_loss

    return loss
Exemplo n.º 41
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])
    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)
    #feats的五个维度分别是  【图片个数,height,width,anchors个数,(xy(2),wh(2),是否发现目标(1),类别(80))】
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_confidence, box_xy, box_wh, box_class_probs
Exemplo n.º 42
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32,
        K.dtype(y_true[0]))  # #13*32=416  input_shape--->[416,416]
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]  # 获取置信度
        true_class_probs = y_true[l][..., 5:]  # 获取类别信息

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        # yolo_head将预测的偏移量转化为真实值,这里的真实值是用来计算iou,并不是来计算loss的,loss使用偏差来计算的
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][
            ..., :2] * grid_shapes[l][::-1] - grid  #根据公式将boxes中心点x,y的真实值转换为偏移量
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])  #计算宽高的偏移量
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][
            ..., 3:4]  # (2-box_ares)避免大框的误差对loss 比小框误差对loss影响大

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(
            K.dtype(y_true[0]), size=1,
            dynamic_size=True)  # 定义一个size可变的张量来存储不含有目标的预测框的信息
        object_mask_bool = K.cast(object_mask,
                                  'bool')  # 映射成bool类型  1=true 0=false

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])  # 剔除为0的行
            iou = box_iou(pred_box[b], true_box)
            # 一张图片预测出的所有boxes与所有的ground truth boxes计算iou
            best_iou = K.max(iou, axis=-1)  # 找出最大iou
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            # 当iou小于阈值时记录,即认为这个预测框不包含物体
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        # 传入loop_body函数初值为b=0,ignore_mask
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)  # 扩展维度用来后续计算loss

        # K.binary_crossentropy is helpful to avoid exp overflow.
        # 仅计算包含物体框的x,y,w,h的损失
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        # 置信度损失既包含有物体的损失 也包含无物体的置信度损失
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        # 分类损失只计算包含物体的损失
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
Exemplo n.º 43
0
 def _get_anchor_negative_triplet_mask(self, y_true: Tensor, pairwise_dist: Tensor) -> Tensor:
     # mask label(n) == label(a)
     mask = K.not_equal(K.expand_dims(y_true, 0), K.expand_dims(y_true, 1))
     mask = K.cast(mask, K.dtype(pairwise_dist))
     return mask
Exemplo n.º 44
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr

        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        if self.initial_total_steps > 0:
            warmup_steps = self.total_steps * self.warmup_proportion
            decay_steps = K.maximum(self.total_steps - warmup_steps, 1)
            decay_rate = (self.min_lr - lr) / decay_steps
            lr = K.switch(
                t <= warmup_steps,
                lr * (t / warmup_steps),
                lr + decay_rate * K.minimum(t - warmup_steps, decay_steps),
            )

        ms = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i))
            for (i, p) in enumerate(params)
        ]
        vs = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i))
            for (i, p) in enumerate(params)
        ]

        if self.amsgrad:
            vhats = [
                K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='vhat_' + str(i)) for (i, p) in enumerate(params)
            ]
        else:
            vhats = [
                K.zeros(1, name='vhat_' + str(i)) for i in range(len(params))
            ]

        self.weights = [self.iterations] + ms + vs + vhats

        beta_1_t = K.pow(self.beta_1, t)
        beta_2_t = K.pow(self.beta_2, t)

        sma_inf = 2.0 / (1.0 - self.beta_2) - 1.0
        sma_t = sma_inf - 2.0 * t * beta_2_t / (1.0 - beta_2_t)

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            m_corr_t = m_t / (1.0 - beta_1_t)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                v_corr_t = K.sqrt(vhat_t / (1.0 - beta_2_t))
                self.updates.append(K.update(vhat, vhat_t))
            else:
                v_corr_t = K.sqrt(v_t / (1.0 - beta_2_t))

            r_t = K.sqrt((sma_t - 4.0) / (sma_inf - 4.0) * (sma_t - 2.0) /
                         (sma_inf - 2.0) * sma_inf / sma_t)

            p_t = K.switch(sma_t >= 5,
                           r_t * m_corr_t / (v_corr_t + self.epsilon),
                           m_corr_t)

            if self.initial_weight_decay > 0:
                p_t += self.weight_decay * p

            p_t = p - lr * p_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemplo n.º 45
0
 def on_epoch_end(self, epoch, logs=None):
     lr = self.model.optimizer.lr
     decay = self.model.optimizer.decay
     iterations = self.model.optimizer.iterations
     lr_with_decay = lr / (1. + decay * K.cast(iterations, K.dtype(decay)))
     print K.eval(lr_with_decay)
Exemplo n.º 46
0
def obj_detection_loss_by_parts(y_true, y_pred):
  max_boxes = 20
  object_scale = 5.
  no_object_scale = 1.
  class_scale = 1.
  coordinates_scale = 1.

  N = K.shape(y_true)[0]       # number of samples in batch
  
  # retrieve the detectors_mask and matching_true_boxes from y_true
  masks_and_true_boxes = K.reshape(y_true, [N, conv_height, conv_width, 1, -1])
  detectors_mask = masks_and_true_boxes[..., 0:1]
  matching_true_boxes = masks_and_true_boxes[..., 1:]
  
  # reshape y_pred as well, we call these t parameters as they are before final activation values
  t_pred = K.reshape(y_pred, [N, conv_height, conv_width, 1, -1])
  
  # loss related to classification
  matching_classes = matching_true_boxes[..., 3:]
  y_pred_class = K.softmax(t_pred[..., 4:])

  classification_loss = K.sum(class_scale * detectors_mask * K.square(matching_classes - y_pred_class), axis=(-4, -3, -2, -1))
  
  # loss related to coordinates
  matching_box_coord = matching_true_boxes[..., :3]
  y_pred_coord = t_pred[..., 1:4]

  coordinates_loss = K.sum(coordinates_scale * detectors_mask * K.square(matching_box_coord - y_pred_coord), axis=(-4, -3, -2, -1))
  
  # get a box tensor whose 2nd dimension list the individual boxes  
  
  boxes = inv_preprocess_true_boxes(detectors_mask, matching_true_boxes, conv_index, conv_dims, max_boxes=max_boxes)
  
  boxes_shape = K.shape(boxes)
  boxes = K.reshape(boxes, [boxes_shape[0], 1, 1, 1, boxes_shape[1], boxes_shape[2]])
  
  pred_xy, pred_r = transform_predicted_from_t_to_actual(t_pred, conv_index, conv_dims)
  pred_xy = K.expand_dims(pred_xy, 4)
  pred_r = K.expand_dims(pred_r, 4)

  true_xy, true_r = boxes[..., 0:2], boxes[..., 2:3]

  # Find IOU of each predicted box with each ground truth box.
  pred_mins = pred_xy - pred_r
  pred_maxes = pred_xy + pred_r

  true_mins = true_xy - true_r
  true_maxes = true_xy + true_r

  intersect_mins = K.maximum(pred_mins, true_mins)
  intersect_maxes = K.minimum(pred_maxes, true_maxes)
  intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
  intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

  pred_areas = 4. * pred_r[..., 0] * pred_r[..., 0]   # a square
  true_areas = 4. * true_r[..., 0] * true_r[..., 0]

  union_areas = pred_areas + true_areas - intersect_areas
  iou_scores = intersect_areas / union_areas

  # Best IOUs for each location.
  best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
  best_ious = K.expand_dims(best_ious)

  # A detector has found an object if IOU > thresh for some true box.
  object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))
  
  no_object_weights = no_object_scale * (1 - object_detections) * (1 - detectors_mask)
  no_objects_loss = no_object_weights * K.square(-K.sigmoid(t_pred[..., 0:1]))

  objects_loss = object_scale * detectors_mask * K.square(1 - K.sigmoid(t_pred[..., 0:1]))

  confidence_loss = K.sum(objects_loss + no_objects_loss, axis=(-4, -3, -2, -1))
  
  
  total_loss = 0.5 * (confidence_loss + classification_loss + coordinates_loss)
  
  return 0.5 * confidence_loss, 0.5 * classification_loss, 0.5 * coordinates_loss
Exemplo n.º 47
0
def yolo_loss(args,
              anchors,
              num_classes,
              ignore_thresh=.5,
              label_smoothing=0.1,
              print_loss=False):
    # num_anchors = 3
    num_layers = len(anchors) // 3

    # yolo_outputs = [shape = (None, h//32, w//32, num_anchors*(5+num_classes)),
    #                 shape = (None, h//16, w//16, num_anchors*(5+num_classes)),
    #                 shape = (None, h//8, w//8, num_anchors*(5+num_classes))]
    yolo_outputs = args[:num_layers]
    # y_true = [shape = (None, h//32, w//32, num_anchors, 5+num_classes),
    #           shape = (None, h//16, w//16, num_anchors, 5+num_classes),
    #           shape = (None, h//8, w//8, num_anchors, 5+num_classes)]
    y_true = args[num_layers:]

    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]

    # input_shape = (h, w)
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))

    loss = 0
    bs = K.shape(yolo_outputs[0])[0]
    batch_size = K.cast(bs, K.dtype(yolo_outputs[0]))

    # y_true是一个列表,包含三个特征层,shape分别为(bs,13,13,3,85),(bs,26,26,3,85),(bs,52,52,3,85)。
    # yolo_outputs是一个列表,包含三个特征层,shape分别为(bs,13,13,255),(bs,26,26,255),(bs,52,52,255)。
    for i in range(num_layers):
        # 以第一个特征层(bs,13,13,3,85)为例子
        # 取出该特征层中存在目标的点的位置。(bs,13,13,3,1)
        object_mask = y_true[i][..., 4:5]
        # 取出其对应的种类(bs,13,13,3,80)
        true_class_probabilities = y_true[i][..., 5:]
        if label_smoothing:
            true_class_probabilities = _smooth_labels(true_class_probabilities,
                                                      label_smoothing)

        # 将yolo_outputs的特征层输出进行处理
        # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(bs,13,13,3,85)
        # 还有解码后的xy,wh,(bs,13,13,3,2)
        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[i],
                                                     anchors[anchor_mask[i]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)

        # 这个是解码后的预测的box的位置
        # (bs,13,13,3,4)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # 找到负样本群组,第一步是创建一个数组,[]
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        # 对每一张图片计算ignore_mask
        def loop_body(b, ignore_mask):
            # 取出第b副图内,真实存在的所有的box的参数
            # n,4
            true_box = tf.boolean_mask(y_true[i][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            # 计算预测结果与真实情况的iou
            # pred_box为13,13,3,4
            # 计算的结果是每个pred_box和其它所有真实框的iou
            # 13,13,3,n
            iou = box_iou(pred_box[b], true_box)

            # 13,13,3
            best_iou = K.max(iou, axis=-1)

            # 如果某些预测框和真实框的重合程度大于0.5,则忽略。
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        # 遍历所有的图片
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < bs,
                                                       loop_body,
                                                       [0, ignore_mask])

        # 将每幅图的内容压缩,进行处理
        ignore_mask = ignore_mask.stack()
        # (bs,13,13,3,1)
        ignore_mask = K.expand_dims(ignore_mask, -1)

        box_loss_scale = 2 - y_true[i][..., 2:3] * y_true[i][..., 3:4]

        # Calculate ciou loss as location loss
        raw_true_box = y_true[i][..., 0:4]
        ciou = box_ciou(pred_box, raw_true_box)
        ciou_loss = object_mask * box_loss_scale * (1 - ciou)
        ciou_loss = K.sum(ciou_loss) / batch_size
        location_loss = ciou_loss

        # 如果该位置本来有框,那么计算1与置信度的交叉熵
        # 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本
        # best_iou<ignore_thresh用于限制负样本数量
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
                          (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
                                                                    from_logits=True) * ignore_mask

        class_loss = object_mask * K.binary_crossentropy(
            true_class_probabilities, raw_pred[..., 5:], from_logits=True)

        confidence_loss = K.sum(confidence_loss) / batch_size
        class_loss = K.sum(class_loss) / batch_size
        loss += location_loss + confidence_loss + class_loss
        # if print_loss:
        # loss = tf.Print(loss, [loss, location_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ')
    return loss
Exemplo n.º 48
0
 def _get_anchor_negative_triplet_mask(self, y_true: Tensor,
                                       pairwise_dist: Tensor) -> Tensor:
     # mask label(n) == label(a)
     mask = K.not_equal(K.expand_dims(y_true, 0), K.expand_dims(y_true, 1))
     mask = K.cast(mask, K.dtype(pairwise_dist))
     return mask
Exemplo n.º 49
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(
        K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_xy, box_wh, box_confidence, box_class_probs
Exemplo n.º 50
0
 def loop_body(b, ignore_mask):
     true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
     iou = box_iou(pred_box[b], true_box)
     best_iou = K.max(iou, axis=-1)
     ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
     return b+1, ignore_mask
Exemplo n.º 51
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]),
                               axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum +
                        coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss
Exemplo n.º 52
0
Arquivo: ctc.py Projeto: Navdevl/kur
	def get_loss(self, model, target, output):
		""" Returns the loss function that can be used by the implementation-
			specific model.
		"""
		backend = model.get_backend()

		if backend.get_name() == 'keras':

			import keras.backend as K

			if 'warp' in self.variant:

				# Just use the built-in Keras CTC loss function.
				logger.info('Attaching Warp-CTC loss function to model '
					'output "%s".', target)

				if backend.get_toolchain() != 'theano':
					logger.error('If you want to use warp-ctc, you need to '
						'use the Theano backend to Keras.')
					raise ValueError('Warp-CTC is currently only supported '
						'with the Theano backend to Keras.')

			else:
				# Just use the built-in Keras CTC loss function.
				logger.debug('Attaching built-in Keras CTC loss function to '
					'model output "%s".', target)

			ctc_scaled = 'ctc_scaled_{}'.format(self.input_length)
			flattened_labels = 'ctc_flattened_labels_{}'.format(target)

			transcript_length = K.placeholder(
				ndim=2,
				dtype='int32',
				name=self.output_length
			)
			transcript = K.placeholder(
				ndim=2,
				dtype='int32',
				name=flattened_labels if 'warp' in self.variant \
					else self.output
			)
			utterance_length = K.placeholder(
				ndim=2,
				dtype='int32',
				name=self.input_length if self.relative_to is None \
					else ctc_scaled
			)

			if self.relative_to is not None:
				model.add_data_source(
					ctc_scaled,
					ScaledSource(
						model,
						relative_to=self.relative_to,
						to_this=target,
						scale_this=self.input_length
					)
				)

			if 'warp' in self.variant:
				model.add_data_source(
					flattened_labels,
					FlattenSource(
						self.output,
						self.output_length
					)
				)

				try:
					import ctc					# pylint: disable=import-error
				except ImportError:
					logger.error('The warp-CTC loss function was requested,  '
						'but we cannot find the "ctc" library. See our '
						'troubleshooting page for helpful tips.')
					raise ImportError('Cannot find the "ctc" library, which '
						'is needed when using the "warp" variant of the CTC '
						'loss function.')

				out = ctc.cpu_ctc_th(
					output.dimshuffle((1, 0, 2)),
					K.squeeze(utterance_length, -1),
					transcript[0]+1,
					K.squeeze(transcript_length, -1)
				)
			else:
				out = K.ctc_batch_cost(
					transcript,
					output,
					utterance_length,
					transcript_length
				)

			if 'loss_scale' in self.variant:
				logger.debug('Loss scaling is active.')
				out = out * K.mean(
					K.cast(utterance_length, K.dtype(out))
				) / 100

			return (
				(
					(self.output_length, transcript_length),
					(flattened_labels if 'warp' in self.variant \
						else self.output, transcript),
					(self.input_length if self.relative_to is None \
						else ctc_scaled, utterance_length)
				),
				out
			)

		elif backend.get_name() == 'pytorch':

			if 'warp' not in self.variant:
				logger.error('PyTorch does not include a native CTC loss '
					'function yet. However, PyTorch bindings to Warp CTC are '
					'available (SeanNaren/warp-ctc). Try installing that, and '
					'then settings variant=warp.')
				raise ValueError('Only Warp CTC is supported for PyTorch '
					'right now.')

			ctc_scaled = 'ctc_scaled_{}'.format(self.input_length)
			flattened_labels = 'ctc_flattened_labels_{}'.format(target)
			transcript_length = model.data.placeholder(
				self.output_length,
				location='cpu',
				data_type='int'
			)
			transcript = model.data.placeholder(
				flattened_labels,
				location='cpu',
				data_type='int'
			)
			utterance_length = model.data.placeholder(
				self.input_length if self.relative_to is None else ctc_scaled,
				location='cpu',
				data_type='int'
			)

			if self.relative_to is not None:
				model.add_data_source(
					ctc_scaled,
					ScaledSource(
						model,
						relative_to=self.relative_to,
						to_this=target,
						scale_this=self.input_length
					)
				)

			if 'warp' in self.variant:
				model.add_data_source(
					flattened_labels,
					FlattenSource(
						self.output,
						self.output_length
					)
				)

			try:
				from warpctc_pytorch import CTCLoss	# pytorch: disable=import-error
			except ImportError:
				logger.error('The warp-CTC loss function was requested,  '
					'but we cannot find the "warpctc_pytorch" library. See '
					'out troubleshooting page for helpful tips.')
				raise ImportError('Cannot find the "warpctc_pytorch" library, '
					'which is needed when using the "warp" variant of the CTC '
					'loss function.')

			loss = model.data.move(CTCLoss())

			def basic_ctc_loss(inputs, output):
				""" Computes CTC loss.
				"""
				return loss(
					output.transpose(1, 0).contiguous(),
					inputs[0][0]+1,		# transcript[0]+1
					inputs[1].squeeze(1),	# K.squeeze(utterance_length, -1),
					inputs[2].squeeze(1)	# K.squeeze(transcript_length, -1)
				) / output.size(0)

			if 'loss_scale' in self.variant:
				logger.debug('Loss scaling is active.')

				def loss_scale(inputs, output):
					""" Computes CTC loss.
					"""
					factor = inputs[1].float().mean().data[0] / 100.
					return basic_ctc_loss(inputs, output) * factor

				get_ctc_loss = loss_scale
			else:
				get_ctc_loss = basic_ctc_loss

			return [
				[
					(flattened_labels if 'warp' in self.variant \
						else self.output, transcript),
					(self.input_length if self.relative_to is None \
						else ctc_scaled, utterance_length),
					(self.output_length, transcript_length)
				],
				get_ctc_loss
			]

		else:
			raise ValueError('Unsupported backend "{}" for loss function "{}"'
				.format(backend.get_name(), self.get_name()))