示例#1
0
def box_diou(b1, b2):
    b1_xy = b1[..., :2]
    b1_wh = b1[..., 2:4]
    b1_wh_half = b1_wh / 2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half

    b2_xy = b2[..., :2]
    b2_wh = b2[..., 2:4]
    b2_wh_half = b2_wh / 2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half

    intersect_mins = K.maximum(b1_mins, b2_mins)
    intersect_maxes = K.minimum(b1_maxes, b2_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
    union_area = b1_area + b2_area - intersect_area
    iou = intersect_area / (union_area + K.epsilon())

    center_distance = K.sum(K.square(b1_xy - b2_xy), axis=-1)
    enclose_mins = K.minimum(b1_mins, b2_mins)
    enclose_maxes = K.maximum(b1_maxes, b2_maxes)
    enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0)
    enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1)
    diou = iou - 1.0 * (center_distance) / (enclose_diagonal + K.epsilon())

    diou = K.expand_dims(diou, -1)
    return diou
示例#2
0
def path_energy0(y, x, U, mask=None):
    """Path energy without boundary potential handling."""
    n_classes = K.shape(x)[2]
    y_one_hot = K.one_hot(y, n_classes)

    # Tag path energy
    energy = K.sum(x * y_one_hot, 2)
    energy = K.sum(energy, 1)

    # Transition energy
    y_t = y[:, :-1]
    y_tp1 = y[:, 1:]
    U_flat = K.reshape(U, [-1])
    # Convert 2-dim indices (y_t, y_tp1) of U to 1-dim indices of U_flat:
    flat_indices = y_t * n_classes + y_tp1
    U_y_t_tp1 = K.gather(U_flat, flat_indices)

    if mask is not None:
        mask = K.cast(mask, K.floatx())
        y_t_mask = mask[:, :-1]
        y_tp1_mask = mask[:, 1:]
        U_y_t_tp1 *= y_t_mask * y_tp1_mask

    energy += K.sum(U_y_t_tp1, axis=1)

    return energy
示例#3
0
    def build(self, input_shape=None):
        self.input_spec = InputSpec(shape=input_shape)
        if not self.layer.built:
            self.layer.build(input_shape)
            self.layer.built = True
        super(ConcreteDropout, self).build(
        )  # this is very weird.. we must call super before we add new losses

        # initialise p
        self.p_logit = self.layer.add_weight(name='p_logit',
                                             shape=(1, ),
                                             initializer=RandomUniform(
                                                 self.init_min, self.init_max),
                                             trainable=True)
        self.p = K.sigmoid(self.p_logit[0])

        # initialise regulariser / prior KL term
        input_dim = np.prod(input_shape[1:])  # we drop only last dim
        weight = self.layer.kernel
        kernel_regularizer = self.weight_regularizer * K.sum(
            K.square(weight)) / (1. - self.p)
        dropout_regularizer = self.p * K.log(self.p)
        dropout_regularizer += (1. - self.p) * K.log(1. - self.p)
        dropout_regularizer *= self.dropout_regularizer * input_dim
        regularizer = K.sum(kernel_regularizer + dropout_regularizer)
        self.layer.add_loss(regularizer)
示例#4
0
def pearson_correlation_metric_fn(
    y_true: tf.Tensor,
    y_pred: tf.Tensor,
) -> tf.Tensor:
    """
    Pearson correlation metric function.
    https://github.com/WenYanger/Keras_Metrics

    Args:
        y_true (tf.Tensor): y_true
        y_pred (tf.Tensor): y_pred

    Returns:
        tf.contrib.metrics: pearson correlation
    """

    x = y_true
    y = y_pred
    mx = K.mean(x, axis=0)
    my = K.mean(y, axis=0)
    xm, ym = x - mx, y - my
    r_num = K.sum(xm * ym)
    x_square_sum = K.sum(xm * xm)
    y_square_sum = K.sum(ym * ym)
    r_den = K.sqrt(x_square_sum * y_square_sum) + 1e-12
    r = r_num / r_den
    return K.mean(r)
示例#5
0
    def call(self, inputs, mask=None, **kwargs):
        """Core implemention of soft attention

        Args:
            inputs (object): input tensor.

        Returns:
            object: weighted sum of input tensors.
        """

        attention = K.tanh(K.dot(inputs, self.W) + self.b)
        attention = K.dot(attention, self.q)

        attention = K.squeeze(attention, axis=2)

        if mask is None:
            attention = K.exp(attention)
        else:
            attention = K.exp(attention) * K.cast(mask, dtype="float32")

        attention_weight = attention / (
            K.sum(attention, axis=-1, keepdims=True) + K.epsilon())

        attention_weight = K.expand_dims(attention_weight)
        weighted_input = inputs * attention_weight
        return K.sum(weighted_input, axis=1)
 def loss(y_true, y_pred):
     # scale predictions so that the class probas of each sample sum to 1
     y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
     # clip to prevent NaN's and Inf's
     y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
     # calc
     loss = y_true * K.log(y_pred) * weights
     loss = -K.sum(loss, -1)
     return loss
示例#7
0
def __center_loss(y_true, y_pred, centers):
    y_true_value = K.argmax(y_true)

    loss = K.variable(0.0)
    for label in range(CONFIG["num_classes"]):
        center = centers[label]

        indices = tf.where(tf.equal(y_true_value, label))
        pred_per_class = tf.gather_nd(y_pred, indices=indices)
        diff = tf.subtract(pred_per_class, center)
        square_diff = K.pow(diff, 2)
        sum = K.sum(K.sum(square_diff, axis=-1), axis=-1)
        loss = tf.add(loss, sum)

    return loss
示例#8
0
def euclidean_distance(vects):
    '''
    Computes the euclidean distances between vects[0] and vects[1]
    '''
    x, y = vects
    return K.sqrt(
        K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
示例#9
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = anchors_per_level
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(
        tf.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1],
                   name='yolo_head/tile/reshape/grid_y'),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(
        tf.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1],
                   name='yolo_head/tile/reshape/grid_x'),
        [grid_shape[0], 1, 1, 1])
    grid = tf.concat([grid_x, grid_y],
                     axis=-1,
                     name='yolo_head/concatenate/grid')
    grid = K.cast(grid, K.dtype(feats))
    feats = tf.reshape(feats, [
        -1, grid_shape[0], grid_shape[1], num_anchors,
        num_classes + 5 + NUM_ANGLES3
    ],
                       name='yolo_head/reshape/feats')

    # Adjust predictions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[..., ::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[..., ::-1], K.dtype(feats))

    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:5 + num_classes])
    polygons_confidence = K.sigmoid(feats[..., 5 + num_classes + 2:5 +
                                          num_classes + NUM_ANGLES3:3])
    polygons_x = K.exp(feats[...,
                             5 + num_classes:num_classes + 5 + NUM_ANGLES3:3])

    dx = K.square(anchors_tensor[..., 0:1] / 2)
    dy = K.square(anchors_tensor[..., 1:2] / 2)
    d = K.cast(K.sqrt(dx + dy), K.dtype(polygons_x))
    a = K.pow(input_shape[..., ::-1], 2)
    a = K.cast(a, K.dtype(feats))
    b = K.sum(a)
    diagonal = K.cast(K.sqrt(b), K.dtype(feats))
    polygons_x = polygons_x * d / diagonal

    polygons_y = feats[...,
                       5 + num_classes + 1:num_classes + 5 + NUM_ANGLES3:3]
    polygons_y = K.sigmoid(polygons_y)

    if calc_loss == True:
        return grid, feats, box_xy, box_wh, polygons_confidence
    return box_xy, box_wh, box_confidence, box_class_probs, polygons_x, polygons_y, polygons_confidence
def loss_function_jaccard(y_true, y_pred, smooth=100):
    
    #
    """
    Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
            = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|))
    
    The jaccard distance loss is usefull for unbalanced datasets. This has been
    shifted so it converges on 0 and is smoothed to avoid exploding or disapearing
    gradient.
    
    Ref: https://en.wikipedia.org/wiki/Jaccard_index
    
    @url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96
    @author: wassname
    """
    intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
    sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1)
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    return (1 - jac) * smooth
示例#11
0
def get_GRU_components(inputs, states, weight):
    units = weight[0].shape[0]

    kernel = K.variable(weight[0])  # shape = (input_dim, self.units * 3)
    recurrent_kernel = K.variable(
        weight[1])  # shape = (self.units, self.units * 3)
    bias = K.variable(weight[2])  # bias_shape = (3 * self.units,)
    inputs = K.variable(inputs)  # Not sure.
    h_tm1 = K.variable(states)  # Previous memory state.

    # Update gate.
    kernel_z = kernel[:, :units]
    recurrent_kernel_z = recurrent_kernel[:, :units]
    input_bias_z = bias[:units]
    # Reset gate.
    kernel_r = kernel[:, units:units * 2]
    recurrent_kernel_r = recurrent_kernel[:, units:units * 2]
    input_bias_r = bias[units:units * 2]
    # New gate.
    kernel_h = kernel[:, units * 2:]
    recurrent_kernel_h = recurrent_kernel[:, units * 2:]
    input_bias_h = bias[units * 2:]

    x_z = K.bias_add(K.dot(inputs, kernel_z), input_bias_z)
    x_r = K.bias_add(K.dot(inputs, kernel_r), input_bias_r)
    x_h = K.bias_add(K.dot(inputs, kernel_h), input_bias_h)
    recurrent_z = K.dot(h_tm1, recurrent_kernel_z)
    recurrent_r = K.dot(h_tm1, recurrent_kernel_r)

    z = hard_sigmoid(x_z +
                     recurrent_z)  # Recurrent activation = 'hard_sigmoid'.
    r = hard_sigmoid(x_r + recurrent_r)

    recurrent_h = K.dot(r * h_tm1, recurrent_kernel_h)
    # Get split part of recurrent_h.
    split_recurrent_h = K.expand_dims(h_tm1, axis=-1) * recurrent_kernel_h
    r_unsqueeze = K.expand_dims(r, axis=-1)
    recompute_recurrent_h = K.sum(r_unsqueeze * split_recurrent_h, axis=1)
    #print(recurrent_h.shape, h_tm1.shape, recurrent_kernel_h.shape, split_recurrent_h.shape)
    #print(K.get_value(recompute_recurrent_h)[0, :3], np.mean(K.get_value(recompute_recurrent_h)))
    #print(K.get_value(recurrent_h)[0, :3], np.mean(K.get_value(recurrent_h)))
    delta = np.mean(
        np.abs(K.get_value(recompute_recurrent_h) - K.get_value(recurrent_h)))
    print("delta =", delta, np.mean(K.get_value(recompute_recurrent_h)),
          np.mean(K.get_value(recurrent_h)))
    assert delta < 1e-6, "r gate is wrong."

    hh = tanh(x_h + recurrent_h)  # Activation = 'tanh'.
    # Previous and candidate state mixed by update gate.
    h = z * h_tm1 + (1 - z) * hh

    return K.get_value(h_tm1), K.get_value(h), K.get_value(z), K.get_value(
        r), K.get_value(hh), K.get_value(x_h), K.get_value(split_recurrent_h)
示例#12
0
def __softmax_cosine_loss(y_true, y_pred, centers):
    y_true_value = K.argmax(y_true)

    base_loss = K.variable(0.0)
    for label in range(CONFIG["num_classes"]):
        center = centers[label]
        indices = tf.where(tf.equal(y_true_value, label))
        pred_per_class = tf.gather_nd(y_pred, indices=indices)
        distances_per_class = __cosine(pred_per_class, center)
        sum = K.sum(distances_per_class, axis=-1)
        base_loss = tf.add(base_loss, sum)

    base_distances = K.zeros_like(y_true_value, dtype='float32')
    for label in range(CONFIG["num_classes"]):
        center = centers[label]
        distances_with_all_classes = __cosine(y_pred, center)
        exp_distances = K.exp(-distances_with_all_classes)
        base_distances = tf.add(base_distances, exp_distances)

    log_distances = K.log(base_distances)
    sum_on_batch = K.sum(log_distances)

    return base_loss + sum_on_batch
示例#13
0
def squared_distance(input_x, input_y=None, weight=None):
  """Calculates the pairwise distance between points in X and Y.

  Args:
    input_x: n x d matrix
    input_y: m x d matrix
    weight: affinity n x m -- if provided, we normalize the distance

  Returns:
    n x m matrix of all pairwise squared Euclidean distances
  """
  if input_y is None:
    input_y = input_x
  sum_dimensions = list(range(2, K.ndim(input_x) + 1))
  input_x = K.expand_dims(input_x, axis=1)
  if weight is not None:
    # if weight provided, we normalize input_x and input_y by weight
    d_diag = K.expand_dims(K.sqrt(K.sum(weight, axis=1)), axis=1)
    input_x /= d_diag
    input_y /= d_diag
  squared_difference = K.square(input_x - input_y)
  distance = K.sum(squared_difference, axis=sum_dimensions)
  return distance
示例#14
0
def squared_distance(X, Y=None, W=None):
    '''
    Calculates the pairwise distance between points in X and Y

    X:          n x d matrix
    Y:          m x d matrix
    W:          affinity -- if provided, we normalize the distance

    returns:    n x m matrix of all pairwise squared Euclidean distances
    '''
    if Y is None:
        Y = X
    # distance = squaredDistance(X, Y)
    sum_dimensions = list(range(2, K.ndim(X) + 1))
    X = K.expand_dims(X, axis=1)
    if W is not None:
        # if W provided, we normalize X and Y by W
        D_diag = K.expand_dims(K.sqrt(K.sum(W, axis=1)), axis=1)
        X /= D_diag
        Y /= D_diag
    squared_difference = K.square(X - Y)
    distance = K.sum(squared_difference, axis=sum_dimensions)
    return distance
示例#15
0
    def _build(self):
        self.encoder = self._build_encoder()
        self.stacked_lstm = self._build_stack_lstm()
        self.decoder = self._build_decoder()

        x = Input(shape=(self.historical_len, self.num_nodes, self.num_nodes))
        h = TimeDistributed(self.encoder)(x)
        h = Reshape((self.historical_len, -1))(h)
        h = Lambda(lambda x: K.sum(x, axis=1))(h)
        h = Reshape((self.num_nodes, -1))(h)
        h = self.stacked_lstm(h)
        y = self.decoder(h)

        self.model = Model(inputs=x, outputs=y)
示例#16
0
    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        e = K.reshape(
            K.dot(K.reshape(x, (-1, features_dim)),
                  K.reshape(self.W, (features_dim, 1))),
            (-1, step_dim))  # e = K.dot(x, self.W)
        if self.bias:
            e += self.b
        e = K.tanh(e)

        a = K.exp(e)
        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())
        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        a = K.expand_dims(a)

        c = K.sum(a * x, axis=1)
        return c
示例#17
0
    def gru_with_r_gate(x, weight):
        h_tm1, inputs, z, x_h, split_recurrent_h = x[0], x[1], x[2], x[3], x[4]
        weight = K.variable(weight)
        units = h_tm1.shape[-1]

        kernel_r = weight[:units, units:units * 2]
        recurrent_kernel_r = weight[units:units * 2, units:units * 2]
        input_bias_r = weight[units * 2, units:units * 2]  # Change to 1 dim.
        x_r = K.bias_add(K.dot(inputs, kernel_r), input_bias_r)
        recurrent_r = K.dot(h_tm1, recurrent_kernel_r)
        r_without_activate = x_r + recurrent_r
        r = hard_sigmoid(r_without_activate)
        #r = hard_sigmoid(x_r + recurrent_r)

        # Recompute recurrent_h by two parts.
        r_unsqueeze = K.expand_dims(r, axis=-1)
        recompute_recurrent_h = K.sum(r_unsqueeze * split_recurrent_h, axis=1)
        hh = tanh(x_h + recompute_recurrent_h)

        h = z * h_tm1 + (1 - z) * hh
        #return h
        return r
示例#18
0
    def call(self, input):
        f = list()
        fn = list()
        c_dict = keypoint_connections()
        for i in range(21):
            f.append(self.conv2(input))
        for i in range(21):
            c = list()
            c.append(f[i])

            for j in c_dict[i]:
                c.append(f[j])

            c = K.concatenate(tuple(c), axis=-1)
            h = self.conv2a[i](c)
            g = self.conv2b[i](h)
            l = K.dot(g, self.W[i])
            print(l.shape)
            fn.append(
                K.sum(K.concatenate((f[i], l), axis=-1),
                      axis=-1,
                      keepdims=True))

        return K.concatenate(tuple(fn), axis=-1)
示例#19
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
示例#20
0
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall
示例#21
0
def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision
示例#22
0
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
示例#23
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(T, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    yolo_outputs = args[:3]
    y_true = args[3:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(3)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]

    for l in range(3):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(
            yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet box loss.
        xy_delta = (y_true[l][..., :2] - pred_xy) * grid_shapes[l][::-1]
        wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh)
        # Avoid log(0)=-inf.
        wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta))
        box_delta = K.concatenate([xy_delta, wh_delta], axis=-1)
        box_delta_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        box_loss = object_mask * K.square(box_delta * box_delta_scale)
        confidence_loss = object_mask * K.square(1-pred_confidence) + \
            (1-object_mask) * K.square(0-pred_confidence) * ignore_mask
        class_loss = object_mask * K.square(true_class_probs -
                                            pred_class_probs)
        loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss)
    return loss / K.cast(m, K.dtype(loss))
示例#24
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    """Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    """
    num_layers = 1
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    g_y_true = y_true
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * grid_size_multiplier,
        K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0

    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))
    for layer in range(num_layers):
        object_mask = y_true[layer][..., 4:5]
        vertices_mask = y_true[layer][..., 5 + num_classes + 2:5 +
                                      num_classes + NUM_ANGLES3:3]
        true_class_probs = y_true[layer][..., 5:5 + num_classes]

        grid, raw_pred, pred_xy, pred_wh, pol_cnf = yolo_head(
            yolo_outputs[layer],
            anchors[anchor_mask[layer]],
            num_classes,
            input_shape,
            calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])
        raw_true_xy = y_true[layer][..., :2] * grid_shapes[layer][
            ..., ::-1] - grid
        raw_true_polygon0 = y_true[layer][..., 5 + num_classes:5 +
                                          num_classes + NUM_ANGLES3]

        raw_true_wh = K.log(y_true[layer][..., 2:4] /
                            anchors[anchor_mask[layer]] *
                            input_shape[..., ::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf

        raw_true_polygon_x = raw_true_polygon0[..., ::3]
        raw_true_polygon_y = raw_true_polygon0[..., 1::3]

        dx = K.square(anchors[anchor_mask[layer]][..., 0:1] / 2)
        dy = K.square(anchors[anchor_mask[layer]][..., 1:2] / 2)
        d = K.cast(K.sqrt(dx + dy), K.dtype(raw_true_polygon_x))

        diagonal = K.sqrt(
            K.pow(input_shape[..., ::-1][0], 2) +
            K.pow(input_shape[..., ::-1][1], 2))
        raw_true_polygon_x = K.log(raw_true_polygon_x / d * diagonal)
        raw_true_polygon_x = K.switch(vertices_mask, raw_true_polygon_x,
                                      K.zeros_like(raw_true_polygon_x))
        box_loss_scale = 2 - y_true[layer][..., 2:3] * y_true[layer][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[layer][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(
            object_mask, raw_pred[..., 4:5], from_logits=True
        ) + (1 - object_mask) * K.binary_crossentropy(
            object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs,
            raw_pred[..., 5:5 + num_classes],
            from_logits=True)
        polygon_loss_x = object_mask * vertices_mask * box_loss_scale * 0.5 * K.square(
            raw_true_polygon_x -
            raw_pred[..., 5 + num_classes:5 + num_classes + NUM_ANGLES3:3])
        polygon_loss_y = object_mask * vertices_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_polygon_y,
            raw_pred[..., 5 + num_classes + 1:5 + num_classes + NUM_ANGLES3:3],
            from_logits=True)
        vertices_confidence_loss = object_mask * K.binary_crossentropy(
            vertices_mask,
            raw_pred[..., 5 + num_classes + 2:5 + num_classes + NUM_ANGLES3:3],
            from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        class_loss = K.sum(class_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        vertices_confidence_loss = K.sum(vertices_confidence_loss) / mf
        polygon_loss = K.sum(polygon_loss_x) / mf + K.sum(polygon_loss_y) / mf

        diou_loss = K.sum(
            object_mask * box_loss_scale *
            (1 - box_diou(pred_box, y_true[layer][..., 0:4]))) / mf

        loss += (xy_loss + wh_loss + confidence_loss + class_loss +
                 0.2 * polygon_loss + 0.2 * vertices_confidence_loss) / (
                     K.sum(object_mask) + 1) * mf
    return loss
示例#25
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate(
        (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (
        confidence_loss_sum + classification_loss_sum + coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss
示例#26
0
def __euclidean(x, y):
    diff = tf.subtract(x, y)
    square_diff = K.pow(diff, 2)
    d = K.sqrt(K.sum(square_diff, axis=-1))
    return d
示例#27
0
    def __init__(self,
                 inputs,
                 arch,
                 spec_reg,
                 y_true,
                 y_train_labeled_onehot,
                 n_clusters,
                 affinity,
                 scale_nbr,
                 n_nbrs,
                 batch_sizes,
                 siamese_net=None,
                 x_train=None,
                 have_labeled=False):
        self.y_true = y_true
        self.y_train_labeled_onehot = y_train_labeled_onehot
        self.inputs = inputs
        self.batch_sizes = batch_sizes
        # generate layers
        self.layers = make_layer_list(arch[:-1], 'spectral', spec_reg)
        self.layers += [{
            'type': 'tanh',
            'size': n_clusters,
            'l2_reg': spec_reg,
            'name': 'spectral_{}'.format(len(arch) - 1)
        }, {
            'type': 'Orthonorm',
            'name': 'orthonorm'
        }]

        # create spectralnet
        self.outputs = stack_layers(self.inputs, self.layers)
        self.net = Model(inputs=self.inputs['Unlabeled'],
                         outputs=self.outputs['Unlabeled'])

        # DEFINE LOSS

        # generate affinity matrix W according to params
        if affinity == 'siamese':
            input_affinity = tf.concat(
                [siamese_net.outputs['A'], siamese_net.outputs['Labeled']],
                axis=0)
            x_affinity = siamese_net.predict(x_train, batch_sizes)
        elif affinity in ['knn', 'full']:
            input_affinity = tf.concat(
                [self.inputs['Unlabeled'], self.inputs['Labeled']], axis=0)
            x_affinity = x_train

        # calculate scale for affinity matrix
        scale = get_scale(x_affinity, self.batch_sizes['Unlabeled'], scale_nbr)

        # create affinity matrix
        if affinity == 'full':
            W = costs.full_affinity(input_affinity, scale=scale)
        elif affinity in ['knn', 'siamese']:
            W = costs.knn_affinity(input_affinity,
                                   n_nbrs,
                                   scale=scale,
                                   scale_nbr=scale_nbr)

        # if we have labels, use them
        if have_labeled:
            # get true affinities (from labeled data)
            W_true = tf.cast(tf.equal(costs.squared_distance(y_true), 0),
                             dtype='float32')

            # replace lower right corner of W with W_true
            unlabeled_end = tf.shape(self.inputs['Unlabeled'])[0]
            W_u = W[:unlabeled_end, :]  # upper half
            W_ll = W[unlabeled_end:, :unlabeled_end]  # lower left
            W_l = tf.concat((W_ll, W_true), axis=1)  # lower half
            W = tf.concat((W_u, W_l), axis=0)

            # create pairwise batch distance matrix self.Dy
            self.Dy = costs.squared_distance(
                tf.concat([self.outputs['Unlabeled'], self.outputs['Labeled']],
                          axis=0))
        else:
            self.Dy = costs.squared_distance(self.outputs['Unlabeled'])

        # define loss
        self.loss = K.sum(W * self.Dy) / (2 * batch_sizes['Unlabeled'])

        # create the train step update
        self.learning_rate = tf.Variable(0., name='spectral_net_learning_rate')
        self.train_step = tf.train.RMSPropOptimizer(
            learning_rate=self.learning_rate).minimize(
                self.loss, var_list=self.net.trainable_weights)

        # initialize spectralnet variables
        K.get_session().run(tf.global_variables_initializer())
        K.get_session().run(
            tf.variables_initializer(self.net.trainable_weights))