def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[layer][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask
def yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=.6): """Filter YOLO boxes based on object and class confidence.""" box_scores = box_confidence * box_class_probs box_classes = K.argmax(box_scores, axis=-1) box_class_scores = K.max(box_scores, axis=-1) prediction_mask = box_class_scores >= threshold # TODO: Expose tf.boolean_mask to Keras backend? boxes = tf.boolean_mask(boxes, prediction_mask) scores = tf.boolean_mask(box_class_scores, prediction_mask) classes = tf.boolean_mask(box_classes, prediction_mask) return boxes, scores, classes
def viterbi_decode(x, U, b_start=None, b_end=None, mask=None): """Computes the best tag sequence y for a given input x, i.e. the one that maximizes the value of path_energy.""" x = add_boundary_energy(x, b_start, b_end, mask) alpha_0 = x[:, 0, :] gamma_0 = K.zeros_like(alpha_0) initial_states = [gamma_0, alpha_0] _, gamma = _forward( x, lambda B: [K.cast(K.argmax(B, axis=1), K.floatx()), K.max(B, axis=1)], initial_states, U, mask) y = _backward(gamma, mask) return y
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=.6): # get the p(class = x given object = true) = p(class = x) * p(object = true) box_scores = box_confidence * box_class_probs # 19x19x80 # box_classes indeces of highest probability box_classes = K.argmax(box_scores, axis=-1) # 19x19x5x1 (1 class index) # box class scores of highest probabilites box_class_scores = K.max(box_scores, axis=-1) # 19x19x5x1 (1 class score) # make filter of boxes with have scores more than threshold filtering_mask = box_class_scores >= threshold # choice from box_classes that is exist in our filter scores = tf.boolean_mask(box_class_scores, filtering_mask) boxes = tf.boolean_mask(boxes, filtering_mask) classes = tf.boolean_mask(box_classes, filtering_mask) return scores, boxes, classes
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.3): """ 通过阈值来过滤对象和分类的置信度。 参数: box_confidence - tensor类型,维度为(19,19,5,1),包含19x19单元格中每个单元格预测的5个锚框中的所有的锚框的pc (一些对象的置信概率)。 boxes - tensor类型,维度为(19,19,5,4),包含了所有的锚框的(px,py,ph,pw )。 box_class_probs - tensor类型,维度为(19,19,5,80),包含了所有单元格中所有锚框的所有对象( c1,c2,c3,···,c80 )检测的概率。 threshold - 实数,阈值,如果分类预测的概率高于它,那么这个分类预测的概率就会被保留。 返回: scores - tensor 类型,维度为(None,),包含了保留了的锚框的分类概率。 boxes - tensor 类型,维度为(None,4),包含了保留了的锚框的(b_x, b_y, b_h, b_w) classess - tensor 类型,维度为(None,),包含了保留了的锚框的索引 注意:"None"是因为你不知道所选框的确切数量,因为它取决于阈值。 比如:如果有10个锚框,scores的实际输出大小将是(10,) """ #第一步:计算锚框的得分 box_scores = box_confidence * box_class_probs #第二步:找到最大值的锚框的索引以及对应的最大值的锚框的分数 box_classes = K.argmax(box_scores, axis=-1) #(19*19*5*1) box_class_scores = K.max( box_scores, axis=-1) #找到最可能的类,是将最后一个维度进行展开(19*19*5*80)得到(19*19*5*1) #第三步:根据阈值创建掩码 filtering_mask = (box_class_scores >= threshold) #对scores, boxes 以及 classes使用掩码 scores = tf.boolean_mask(box_class_scores, filtering_mask) boxes = tf.boolean_mask(boxes, filtering_mask) classes = tf.boolean_mask(box_classes, filtering_mask) return scores, boxes, classes
def dual_attn_block(inp, nc, squeeze_factor=8): ''' https://github.com/junfu1115/DANet ''' assert nc // squeeze_factor > 0, f"Input channels must be >= {squeeze_factor}, recieved nc={nc}" x = inp shape_x = x.get_shape().as_list() # position attention module x_pam = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), kernel_initializer=conv_init, use_bias=False, padding="same")(x) x_pam = Activation("relu")(x_pam) x_pam = normalization(x_pam, norm, nc) f_pam = Conv2D(nc // squeeze_factor, 1, kernel_regularizer=regularizers.l2(w_l2))(x_pam) g_pam = Conv2D(nc // squeeze_factor, 1, kernel_regularizer=regularizers.l2(w_l2))(x_pam) h_pam = Conv2D(nc, 1, kernel_regularizer=regularizers.l2(w_l2))(x_pam) shape_f_pam = f_pam.get_shape().as_list() shape_g_pam = g_pam.get_shape().as_list() shape_h_pam = h_pam.get_shape().as_list() flat_f_pam = Reshape((-1, shape_f_pam[-1]))(f_pam) flat_g_pam = Reshape((-1, shape_g_pam[-1]))(g_pam) flat_h_pam = Reshape((-1, shape_h_pam[-1]))(h_pam) s_pam = Lambda(lambda x: K.batch_dot(x[0], Permute((2, 1))(x[1])))( [flat_g_pam, flat_f_pam]) beta_pam = Softmax(axis=-1)(s_pam) o_pam = Lambda(lambda x: K.batch_dot(x[0], x[1]))([beta_pam, flat_h_pam]) o_pam = Reshape(shape_x[1:])(o_pam) o_pam = Scale()(o_pam) out_pam = add([o_pam, x_pam]) out_pam = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), kernel_initializer=conv_init, use_bias=False, padding="same")(out_pam) out_pam = Activation("relu")(out_pam) out_pam = normalization(out_pam, norm, nc) # channel attention module x_chn = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), kernel_initializer=conv_init, use_bias=False, padding="same")(x) x_chn = Activation("relu")(x_chn) x_chn = normalization(x_chn, norm, nc) shape_x_chn = x_chn.get_shape().as_list() flat_f_chn = Reshape((-1, shape_x_chn[-1]))(x_chn) flat_g_chn = Reshape((-1, shape_x_chn[-1]))(x_chn) flat_h_chn = Reshape((-1, shape_x_chn[-1]))(x_chn) s_chn = Lambda(lambda x: K.batch_dot(Permute((2, 1))(x[0]), x[1]))( [flat_g_chn, flat_f_chn]) s_new_chn = Lambda(lambda x: K.repeat_elements(K.max(x, -1, keepdims=True), nc, -1))(s_chn) s_new_chn = Lambda(lambda x: x[0] - x[1])([s_new_chn, s_chn]) beta_chn = Softmax(axis=-1)(s_new_chn) o_chn = Lambda(lambda x: K.batch_dot(x[0], Permute((2, 1))(x[1])))( [flat_h_chn, beta_chn]) o_chn = Reshape(shape_x[1:])(o_chn) o_chn = Scale()(o_chn) out_chn = add([o_chn, x_chn]) out_chn = Conv2D(nc, kernel_size=3, kernel_regularizer=regularizers.l2(w_l2), kernel_initializer=conv_init, use_bias=False, padding="same")(out_chn) out_chn = Activation("relu")(out_chn) out_chn = normalization(out_chn, norm, nc) out = add([out_pam, out_chn]) return out
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate( (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * ( confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss