def _kernel_constraint(self, kernel): """Radially constraints a kernel with shape (height, width, channels).""" padding = K.constant([[1, 1], [1, 1]], dtype='int32') kernel_shape = K.shape(kernel)[0] start = K.cast(kernel_shape / 2, 'int32') kernel_new = K.switch( K.cast(math_ops.floormod(kernel_shape, 2), 'bool'), lambda: kernel[start - 1:start, start - 1:start], lambda: kernel[start - 1:start, start - 1:start] + K.zeros( # pylint: disable=g-long-lambda (2, 2), dtype=kernel.dtype)) index = K.switch(K.cast(math_ops.floormod(kernel_shape, 2), 'bool'), lambda: K.constant(0, dtype='int32'), lambda: K.constant(1, dtype='int32')) while_condition = lambda index, *args: K.less(index, start) def body_fn(i, array): return i + 1, array_ops.pad(array, padding, constant_values=kernel[start + i, start + i]) _, kernel_new = control_flow_ops.while_loop( while_condition, body_fn, [index, kernel_new], shape_invariants=[ index.get_shape(), tensor_shape.TensorShape([None, None]) ]) return kernel_new
def call(self, inputs, reverse=False, ddi=False, **kwargs): logscale_factor = 3. x = inputs reduce_axis = list(range(K.ndim(inputs)))[:-1] if not reverse: log_scale = self.log_scale bias = self.bias if ddi: x_var = tf.reduce_mean(x**2, reduce_axis, keepdims=True) init_scale = tf.log(1. / (tf.sqrt(x_var) + 1e-6)) / logscale_factor init_bias = tf.reduce_mean(x, reduce_axis, keepdims=True) log_scale = K.switch(K.all(K.equal(self.log_scale, 0.)), init_scale, self.log_scale) bias = K.switch(K.all(K.equal(self.bias, 0.)), -init_bias, self.bias) self.add_update(K.update_add( self.log_scale, K.switch(K.all(K.equal(self.log_scale, 0.)), init_scale, K.zeros_like(init_scale))), inputs=x) self.add_update(K.update_add( self.bias, K.switch(K.all(K.equal(self.bias, 0.)), -init_bias, K.zeros_like(init_bias))), inputs=x) return (x + bias) * K.exp(log_scale) else: return x / K.exp(self.log_scale) - self.bias
def loss(y_true, y_pred): loss_val = -1 * K.sum( K.log(K.softmax(y_pred[:, :-1])) * y_true[:, :-1], axis=-1) return K.mean( K.switch( K.equal(task, 1005), loss_weights[task] * loss_val, K.switch(K.equal(y_true[:, -1], task), loss_val, loss_weights[task] * loss_val)))
def call(self, inputs, **kwargs): input_shape = K.int_shape(inputs) sequence_length, d_model = input_shape[-2:] # output of the "sigmoid halting unit" (not the probability yet) halting = K.sigmoid( K.reshape( K.bias_add(K.dot(K.reshape(inputs, [-1, d_model]), self.act_weights['halting_kernel']), self.act_weights['halting_biases'], data_format='channels_last'), [-1, sequence_length])) if self.zeros_like_halting is None: self.initialize_control_tensors(halting) # useful flags step_is_active = K.greater(self.halt_budget, 0) no_further_steps = K.less_equal(self.halt_budget - halting, 0) # halting probability is equal to # a. halting output if this isn't the last step (we have some budget) # b. to remainder if it is, # c. and zero for the steps that shouldn't be executed at all # (out of budget for them) halting_prob = K.switch( step_is_active, K.switch(no_further_steps, self.remainder, halting), self.zeros_like_halting) self.active_steps += K.switch(step_is_active, self.ones_like_halting, self.zeros_like_halting) # We don't know which step is the last, so we keep updating # expression for the loss with each call of the layer self.ponder_cost = (self.act_weights['time_penalty_t'] * K.mean(self.remainder + self.active_steps)) # Updating "the remaining probability" and the halt budget self.remainder = K.switch(no_further_steps, self.remainder, self.remainder - halting) self.halt_budget -= halting # OK to become negative # If none of the inputs are active at this step, then instead # of zeroing them out by multiplying to all-zeroes halting_prob, # we can simply use a constant tensor of zeroes, which means that # we won't even calculate the output of those steps, saving # some real computational time. if self.zeros_like_input is None: self.zeros_like_input = K.zeros_like(inputs, name='zeros_like_input') # just because K.any(step_is_active) doesn't work in PlaidML any_step_is_active = K.greater(K.sum(K.cast(step_is_active, 'int32')), 0) step_weighted_output = K.switch( any_step_is_active, K.expand_dims(halting_prob, -1) * inputs, self.zeros_like_input) if self.weighted_output is None: self.weighted_output = step_weighted_output else: self.weighted_output += step_weighted_output return [inputs, self.weighted_output]
def call(self, y): # Sanity Check if isinstance(y, list): raise ValueError('TSG layer has only 1 input') # y = tf_print(y, [y], message='{}: The unconstrained action is:'.format(y.name.split('/')[0]), summarize=-1) y = check_numerics(y, 'Problem with input y') # Calculate A.c Ac = tensordot(self.A_graph, self.c_graph, 1) # Calculate b - Ac bMinusAc = self.b_graph - Ac # Calculate y - c yMinusc = y - self.c_graph # Calculate A.(y - c) ADotyMinusc = K.sum((self.A_graph * expand_dims(yMinusc, -2)), axis=2) # Do elem-wise division intersection_points = bMinusAc / (ADotyMinusc + K.epsilon() ) # Do we need the K.epsilon()? # Enforce 0 <= intersection_points <= 1 because the point must lie between c and y greater_1 = K.greater(intersection_points, K.ones_like(intersection_points)) candidate_alpha = K.switch(greater_1, K.ones_like(intersection_points) + 1, intersection_points) less_0 = K.less(candidate_alpha, K.zeros_like(intersection_points)) candidate_alpha = K.switch(less_0, K.ones_like(intersection_points) + 1, candidate_alpha) # Find farthest intersection point from y to get projection point alpha = K.min(candidate_alpha, axis=-1, keepdims=True) # If it is an interior point, y itself is the projection point interior_point = K.greater(alpha, K.ones_like(alpha)) alpha = K.switch(interior_point, K.ones_like(alpha), alpha) # alpha = tf_print(alpha, [alpha], message="{}: The value of alpha is: ".format(alpha.name.split('/')[0])) # Return \alpha.y + (1 - \alpha).c z = alpha * y + ((1 - alpha) * self.c_graph) # z = tf_print(z, [z], message='{}: The constrained action is:'.format(z.name.split('/')[0]), summarize=-1) return z
def create_model(numNodes, embedding_size, lamb_V): u = Input(shape=(1, )) pos = Input(shape=(1, )) neg = Input(shape=(1, )) train_type = Input(shape=(1, )) # No reg vertex_emb = Embedding(numNodes, embedding_size, name='vertex_emb') context_emb = Embedding(numNodes, embedding_size, name='context_emb') u_emb = vertex_emb(u) pos_emb = vertex_emb(pos) neg_emb = vertex_emb(neg) pos_ctx = context_emb(pos) neg_ctx = context_emb(neg) # DS pair score DS_score = Lambda(lambda x: x[0] * x[1] - x[0] * x[2], name='DS_SCORE')([u_emb, pos_emb, neg_emb]) # NS pair NS_score = Lambda(lambda x: x[0] * x[1] - x[0] * x[2], name='NS_SCORE')([u_emb, pos_ctx, neg_ctx]) score = Lambda(lambda x: K.switch( K.equal(x[2], 1), tf.reduce_sum(x[0], axis=-1, keep_dims=False), tf.reduce_sum(x[1], axis=-1, keep_dims=False)), name='switch')([DS_score, NS_score, train_type]) model = Model(inputs=[u, pos, neg, train_type], outputs=score) return model, vertex_emb
def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox): """Return the RPN bounding box loss graph. config: the model config object. target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))]. Uses 0 padding to fill in unsed bbox deltas. rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, -1=negative, 0=neutral anchor. rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] """ # Positive anchors contribute to the loss, but negative and # neutral anchors (match value of 0 or -1) don't. rpn_match = K.squeeze(rpn_match, -1) indices = tf.where(K.equal(rpn_match, 1)) # Pick bbox deltas that contribute to the loss rpn_bbox = tf.gather_nd(rpn_bbox, indices) # Trim target bounding box deltas to the same length as rpn_bbox. batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1) target_bbox = batch_pack_graph(target_bbox, batch_counts, config.IMAGES_PER_GPU) loss = smooth_l1_loss(target_bbox, rpn_bbox) loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) return loss
def rpn_regress_loss(predict_deltas, deltas, indices): """ :param predict_deltas: 预测的回归目标,(batch_num, anchors_num, 4) :param deltas: 真实的回归目标,(batch_num, rpn_train_anchors, 4+1), 最后一位为tag, tag=0 为padding :param indices: 正负样本索引,(batch_num, rpn_train_anchors, (idx,tag)), idx:指定anchor索引位置,最后一位为tag, tag=0 为padding; 1为正样本,-1为负样本 :return: """ # 去除padding和负样本 positive_indices = tf.where(tf.equal(indices[:, :, -1], 1)) deltas = tf.gather_nd(deltas[..., :-1], positive_indices) # (n,(dy,dx,dw,dh)) true_positive_indices = tf.gather_nd(indices[..., 0], positive_indices) # 一维,正anchor索引 # batch索引 batch_indices = positive_indices[:, 0] # 正样本anchor的2维索引 train_indices_2d = tf.stack( [batch_indices, tf.cast(true_positive_indices, dtype=tf.int64)], axis=1) # 正样本anchor预测的回归类型 predict_deltas = tf.gather_nd(predict_deltas, train_indices_2d, name='rpn_regress_loss_predict_deltas') # Smooth-L1 # 非常重要,不然报NAN loss = K.switch( tf.size(deltas) > 0, smooth_l1_loss(deltas, predict_deltas), tf.constant(0.0)) loss = K.mean(loss) return loss
def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox): """Loss for Mask R-CNN bounding box refinement. target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))] target_class_ids: [batch, num_rois]. Integer class IDs. pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))] """ # Reshape to merge batch and roi dimensions for simplicity. target_class_ids = K.reshape(target_class_ids, (-1, )) target_bbox = K.reshape(target_bbox, (-1, 4)) pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4)) # Only positive ROIs contribute to the loss. And only # the right class_id of each ROI. Get their indices. positive_roi_ix = tf.where(target_class_ids > 0)[:, 0] positive_roi_class_ids = tf.cast( tf.gather(target_class_ids, positive_roi_ix), tf.int64) indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1) # Gather the deltas (predicted and true) that contribute to loss target_bbox = tf.gather(target_bbox, positive_roi_ix) pred_bbox = tf.gather_nd(pred_bbox, indices) # Smooth-L1 Loss loss = K.switch( tf.size(target_bbox) > 0, smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox), tf.constant(0.0)) loss = K.mean(loss) return loss
def __init__(self, optimizer, steps_per_update=1, **kwargs): super(AccumOptimizer, self).__init__(**kwargs) self.optimizer = optimizer with K.name_scope(self.__class__.__name__): self.steps_per_update = steps_per_update self.iterations = K.variable(0, "int64", "iteration") self.cond = K.equal(self.iterations % steps_per_update, 0) self.lr = self.optimizer.lr self.accum_grads = None self.optimizer.lr = K.switch(self.cond, self.lr, 0) for attr in ["momentum", "rho", "beta_1", "beta_2"]: if hasattr(self.optimizer, attr): value = getattr(self.optimizer, attr) setattr(self, attr, value) setattr(self.optimizer, attr, 1. - 1e-7) for cfg in self.optimizer.get_config(): if not hasattr(self, cfg): value = getattr(self.optimizer, cfg) setattr(self, cfg, value) # Cover the original get_gradients method with accumulative gradients. def get_gradients(loss, params): return [ag / self.steps_per_update for ag in self.accum_grads] self.optimizer.get_gradients = get_gradients
def step(dec_input, states): (prev_output, prev_attention, prev_alignment, prev_attn_rnn_state, prev_dec_rnn1_state, prev_dec_rnn2_state) = states dec_input = K.switch(training, dec_input, prev_output) prenet_out = self.prenet(dec_input) cell_inputs = K.concatenate([prenet_out, prev_attention], axis=-1) cell_out, next_attn_rnn_state = self.attn_rnn_cell( cell_inputs, [prev_attn_rnn_state]) next_attention, next_alignment = self.attention_mechanism( [cell_out, values, keys]) concatenated = K.concatenate([next_attention, cell_out], axis=-1) projected = self.projection(concatenated) dec_rnn1_out, next_dec_rnn1_state = self.decoderRNNCell1( projected, [prev_dec_rnn1_state]) res_conn1 = projected + dec_rnn1_out dec_rnn2_out, next_dec_rnn2_state = self.decoderRNNCell2( res_conn1, [prev_dec_rnn2_state]) res_conn2 = res_conn1 + dec_rnn2_out next_output = self.output_projection(res_conn2) return [next_output, next_alignment], [ next_output, next_attention, next_alignment, next_attn_rnn_state, next_dec_rnn1_state, next_dec_rnn2_state ]
def loss_fn(y_true: tf.Tensor, y_pred: tf.Tensor): """ split the label """ grid_pred_xy = y_pred[..., 0:2] grid_pred_wh = y_pred[..., 2:4] pred_confidence = y_pred[..., 4:5] pred_cls = y_pred[..., 5:] all_true_xy = y_true[..., 0:2] all_true_wh = y_true[..., 2:4] true_confidence = y_true[..., 4:5] true_cls = y_true[..., 5:] obj_mask = true_confidence # true_confidence[..., 0] > obj_thresh obj_mask_bool = y_true[..., 4] > obj_thresh """ calc the ignore mask """ ignore_mask = calc_ignore_mask(all_true_xy, all_true_wh, grid_pred_xy, grid_pred_wh, obj_mask_bool, iou_thresh, layer, h) grid_true_xy, grid_true_wh = tf_xywh_to_grid(all_true_xy, all_true_wh, layer, h) # NOTE When wh=0 , tf.log(0) = -inf, so use K.switch to avoid it grid_true_wh = K.switch(obj_mask_bool, grid_true_wh, tf.zeros_like(grid_true_wh)) """ define loss """ coord_weight = 2 - all_true_wh[..., 0:1] * all_true_wh[..., 1:2] xy_loss = tf.reduce_sum( obj_mask * coord_weight * tf.nn.sigmoid_cross_entropy_with_logits( labels=grid_true_xy, logits=grid_pred_xy)) / h.batch_size wh_loss = tf.reduce_sum( obj_mask * coord_weight * wh_weight * tf.square( tf.subtract(x=grid_true_wh, y=grid_pred_wh))) / h.batch_size obj_loss = obj_weight * tf.reduce_sum( obj_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=true_confidence, logits=pred_confidence)) / h.batch_size noobj_loss = noobj_weight * tf.reduce_sum( (1 - obj_mask) * ignore_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=true_confidence, logits=pred_confidence)) / h.batch_size cls_loss = tf.reduce_sum( obj_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=true_cls, logits=pred_cls)) / h.batch_size total_loss = obj_loss + noobj_loss + cls_loss + xy_loss + wh_loss return total_loss
def mean_iou(y_true, y_pred): """ Args: y_true: true labels, tensor with shape (-1, num_labels) y_pred: predicted label propabilities from a softmax layer, tensor with shape (-1, num_labels, num_classes) """ iou_sum = K.variable(0.0, name='iou_sum') seen_classes = K.variable(0.0, name='seen_classes') y_pred_sparse = K.argmax(y_pred, axis=-1) for c in range(0, num_classes): true_c = K.cast(K.equal(y_true, c), K.floatx()) pred_c = K.cast(K.equal(y_pred_sparse, c), K.floatx()) true_c_sum = K.sum(true_c) pred_c_sum = K.sum(pred_c) intersect = true_c * pred_c union = true_c + pred_c - intersect intersect_sum = K.sum(intersect) union_sum = K.sum(union) iou = intersect_sum / union_sum union_sum_is_zero = K.equal(union_sum, 0) iou_sum = K.switch(union_sum_is_zero, iou_sum, iou_sum + iou) seen_classes = K.switch(union_sum_is_zero, seen_classes, seen_classes + 1) # Calculate mean IOU over all (seen) classes. Regarding this check # `seen_classes` can only be 0 if none of the true or predicted # labels in the batch contains a valid class. We do not want to # raise a DivByZero error in this case. return K.switch(K.equal(seen_classes, 0), iou_sum, iou_sum / seen_classes)
def get_updates(self, loss, params): self.updates = [ K.update_add(self.iterations, 1), K.update_add(self.optimizer.iterations, K.constant(self.cond, "int64")) ] # accumulate gradients self.accum_grads = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] grads = self.get_gradients(loss, params) for g, ag in zip(grads, self.accum_grads): self.updates.append(K.update(ag, K.switch(self.cond, ag * 0, ag + g))) self.updates.extend(self.optimizer.get_updates()[1:]) self.weights.extend(self.optimizer.weights) return self.updates
def smoothL1(y_true, y_pred): x = k.abs(y_true - y_pred) x = k.switch(x < HUBER_DELTA, 0.5 * x ** 2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA)) return k.sum(x) #def total_loss(y_true, y_pred): # img1=image.load_img(y_true_path, target_size=(224, 224)) # img2=image.load_img(y_pred_path, target_size=(224, 224)) #f1=preprocess(y_true) #f2=preprocess(y_pred) #fx1=feature_extract(f1) #fx2=feature_extract(f2) #loss1 = tf.reduce_mean(tf.squared_difference(fx1, fx2)) loss2=smoothL1(y_true,y_pred) return k.eval(loss2)+k.eval(loss2)
def multi_inputs_multi_outputs_model(): a = keras.layers.Input(shape=(16, ), name='input_a') b = keras.layers.Input(shape=(16, ), name='input_b') m = keras.layers.Input(shape=(8, ), dtype='bool', name='input_m') dense = keras.layers.Dense(8, name='dense_1') a_2 = dense(a) # Apply a mask s_2 = keras.layers.Lambda( lambda k: K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2]) b_2 = dense(b) merged = keras.layers.concatenate([s_2, b_2], name='merge') c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged) d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged) model = keras.models.Model(inputs=[a, b, m], outputs=[c, d]) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics={ 'dense_2': 'categorical_accuracy', 'dense_3': 'categorical_accuracy' }) return model
def __call__(self, *args, **kwargs): gs = tf.train.get_global_step() if gs is None: # if not set - create a variable self.global_step = K.variable(tf.zeros(shape=(), dtype=tf.int64), dtype=tf.int64, name="lr_global_step") tf.train.global_step(K.get_session(), self.global_step) gs = K.update_add(self.global_step, 1) ###tf.train.get_global_step() else: self.global_step = gs assert (gs is not None) gstep = tf.cast(gs, dtype=tf.float32) lr_up = K.exp(self.step_accelerate_log * gstep) * self.min_lr lr_down = K.exp(self.step_deccelerate_log * (gstep - self.step_max_lr)) * self.max_lr lr = K.switch(K.less(gs, self.step_max_lr), lr_up, lr_down) if self.tensorboardimage and not self.added_scalar_to_tensorboard: self.tensorboardimage.add_scalar("learning_rate", lr) self.added_scalar_to_tensorboard = True # add once return lr
def rpn_class_loss_graph(rpn_match, rpn_class_logits): """RPN anchor classifier loss. rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, -1=negative, 0=neutral anchor. rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for BG/FG. """ # Squeeze last dim to simplify rpn_match = tf.squeeze(rpn_match, -1) # Get anchor classes. Convert the -1/+1 match to 0/1 values. anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32) # Positive and Negative anchors contribute to the loss, # but neutral anchors (match value = 0) don't. indices = tf.where(K.not_equal(rpn_match, 0)) # Pick rows that contribute to the loss and filter out the rest. rpn_class_logits = tf.gather_nd(rpn_class_logits, indices) anchor_class = tf.gather_nd(anchor_class, indices) # Cross entropy loss loss = K.sparse_categorical_crossentropy(target=anchor_class, output=rpn_class_logits, from_logits=True) loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) return loss
def multi_inputs_multi_outputs_model(): a = keras.layers.Input(shape=(16,), name='input_a') b = keras.layers.Input(shape=(16,), name='input_b') m = keras.layers.Input(shape=(8,), dtype='bool', name='input_m') dense = keras.layers.Dense(8, name='dense_1') a_2 = dense(a) # Apply a mask s_2 = keras.layers.Lambda(lambda k: K.switch(k[0], k[1], K.zeros_like(k[1])))([m, a_2]) b_2 = dense(b) merged = keras.layers.concatenate([s_2, b_2], name='merge') c = keras.layers.Dense(3, activation='softmax', name='dense_2')(merged) d = keras.layers.Dense(2, activation='softmax', name='dense_3')(merged) model = keras.models.Model(inputs=[a, b, m], outputs=[c, d]) model.compile( loss='categorical_crossentropy', optimizer='rmsprop', metrics={ 'dense_2': 'categorical_accuracy', 'dense_3': 'categorical_accuracy' }) return model
def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks): """Mask binary cross-entropy loss for the masks head. target_masks: [batch, num_rois, height, width]. A float32 tensor of values 0 or 1. Uses zero padding to fill array. target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded. pred_masks: [batch, proposals, height, width, num_classes] float32 tensor with values from 0 to 1. """ # Reshape for simplicity. Merge first two dimensions into one. target_class_ids = K.reshape(target_class_ids, (-1, )) mask_shape = tf.shape(target_masks) target_masks = K.reshape(target_masks, (-1, mask_shape[2], mask_shape[3])) pred_shape = tf.shape(pred_masks) pred_masks = K.reshape(pred_masks, (-1, pred_shape[2], pred_shape[3], pred_shape[4])) # Permute predicted masks to [N, num_classes, height, width] pred_masks = tf.transpose(pred_masks, [0, 3, 1, 2]) # Only positive ROIs contribute to the loss. And only # the class specific mask of each ROI. positive_ix = tf.where(target_class_ids > 0)[:, 0] positive_class_ids = tf.cast(tf.gather(target_class_ids, positive_ix), tf.int64) indices = tf.stack([positive_ix, positive_class_ids], axis=1) # Gather the masks (predicted and true) that contribute to loss y_true = tf.gather(target_masks, positive_ix) y_pred = tf.gather_nd(pred_masks, indices) # Compute binary cross entropy. If no positive ROIs, then return 0. # shape: [batch, roi, num_classes] loss = K.switch( tf.size(y_true) > 0, K.binary_crossentropy(target=y_true, output=y_pred), tf.constant(0.0)) loss = K.mean(loss) return loss
def detect_regress_loss(predict_deltas, deltas, class_ids): """ 检测网络回归损失(类别相关) :param predict_deltas: 回归预测值 (batch_num, train_roi_num, num_classes,(dy,dx,dh,dw)) :param deltas: 实际回归参数(batch_num, train_roi_num, (dy,dx,dh,dw,tag)) ,tag:0-padding,-1-负样本,1-正样本 :param class_ids: 实际类别(batch_num, train_roi_num, (class_id,tag)) ,tag:0-padding,-1-负样本,1-正样本 :return: """ # 去除padding和负样本,保留正样本 indices = tf.where(tf.equal(deltas[..., -1], 1)) # 二维的(N,2) deltas = tf.gather_nd(deltas[..., :-1], indices) class_ids = tf.gather_nd(class_ids[..., :-1], indices) # 二维的(N,1) # 预测的回归参数索引位置(类别相关,还需要类别索引) predict_indices = tf.concat( [indices, tf.cast(class_ids, tf.int64)], axis=1) predict_deltas = tf.gather_nd(predict_deltas, predict_indices) # Smooth-L1 # 非常重要,不然报NAN loss = K.switch( tf.size(deltas) > 0, smooth_l1_loss(deltas, predict_deltas), tf.constant(0.0)) loss = K.mean(loss) return loss
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = K.cast(self.iterations, K.floatx()) + 1 lr = K.switch( t <= self.warmup_steps, self.lr * (t / self.warmup_steps), self.min_lr + (self.lr - self.min_lr) * (1.0 - K.minimum(t, self.decay_steps) / self.decay_steps), ) lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_{}'.format(i)) for i, p in enumerate(params) ] vs = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_{}'.format(i)) for i, p in enumerate(params) ] if self.amsgrad: vhats = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='vh_{}'.format(i)) for i, p in enumerate(params) ] else: vhats = [ K.zeros(1, dtype=K.dtype(p), name='vh_{}'.format(i)) for i, p in enumerate(params) ] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) p_t = m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: p_t = m_t / (K.sqrt(v_t) + self.epsilon) if self.initial_weight_decay > 0.0: if self.weight_decay_pattern is None: p_t += self.weight_decay * p else: for pattern in self.weight_decay_pattern: if pattern in p.name: p_t += self.weight_decay * p break p_t = p - lr_t * p_t self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def YOLOLoss(args, anchors, num_classes, ignore_threshold=0.5, print_loss=False): '''Return YOLO Loss Tensor Return: loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shape = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 # batch size, tensor batch_size = K.shape(yolo_outputs[0])[0] fbatch_size = K.cast(batch_size, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_prob = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = YOLOHead(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) raw_true_xy = y_true[l][..., :2] * grid_shape[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_threshold, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop( lambda b, *args: b < fbatch_size, loop, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * .5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_prob, raw_pred[..., 5:]) xy_loss = K.sum(xy_loss) / fbatch_size wh_loss = K.sum(wh_loss) / fbatch_size confidence_loss = K.sum(confidence_loss) / fbatch_size class_loss = K.sum(class_loss) / fbatch_size loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='Loss: ') return loss
def _yolo_loss(args, anchors, num_classes, ignore_thresh, print_loss, prefix): num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask from tensorflow.python.ops import control_flow_ops _, ignore_mask = control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:7], from_logits=True) # # extend_true_class_probs = tf.concat( # [true_class_probs, 1 - tf.reduce_sum(true_class_probs, axis=4, keepdims=True)], axis=4) # # # class_center = tf.Variable("class_center") # multi_mask = 1 - ignore_mask # # multi_mask = 1 - ignore_mask / tf.norm(ignore_mask, 1, keepdims=True) # multi_class_loss = K.squeeze(multi_mask, 4) * tf.nn.softmax_cross_entropy_with_logits( # labels=extend_true_class_probs # , logits=raw_pred[..., 7:] # ) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf # multi_class_loss = K.sum(multi_class_loss) / mf # TODO loss += ( xy_loss + wh_loss + confidence_loss + class_loss # + multi_class_loss ) def format(label, tensors): content_data = zip(label.split(","), tensors) return content_data if print_loss: print_op = tf.print( *format( "[xy_loss, wh_loss, confidence_loss, class_loss, multi_class_loss]", [ xy_loss, wh_loss, confidence_loss, class_loss # , multi_class_loss ]), output_stream=sys.stdout) with tf.control_dependencies([print_op]): loss = loss * 1.0 losses = dict( format( "xy_loss, wh_loss, confidence_loss, class_loss, loss", [ xy_loss, wh_loss, confidence_loss, class_loss, loss # , multi_class_loss ])) return loss, losses
def smoothL1(y_true, y_pred): x = k.abs(y_true - y_pred) x = k.switch(x < HUBER_DELTA, 0.5 * x**2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA)) return k.sum(x)
def _yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False, summary_loss=True): num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] # gaps = args[num_layers:num_layers * 2] y_true = args[num_layers:num_layers * 2] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask from tensorflow.python.ops import control_flow_ops _, ignore_mask = control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:5 + num_classes], from_logits=True) # extend_true_class_probs = tf.concat( # [true_class_probs, 1 - tf.reduce_sum(true_class_probs, axis=4, keepdims=True)], axis=4) # num_pos = tf.reduce_sum(true_class_probs) # pos = tf.reduce_sum(true_class_probs, axis=4, ) # all = tf.cast(tf.reduce_prod(tf.shape(true_class_probs)), K.dtype(pos)) # weight = 1 - (1 - num_pos / all) * pos # multi_class_loss = weight * tf.nn.softmax_cross_entropy_with_logits( # labels=extend_true_class_probs # , logits=raw_pred[..., 7:] # ) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf # multi_class_loss = K.sum(multi_class_loss) / mf # TODO loss += ( xy_loss + wh_loss + confidence_loss + class_loss # + multi_class_loss ) # def _get_streaming_metrics(_label, _prediction, num_classes, name): # # label = tf.reshape(_label, [-1]) # prediction = tf.reshape(_prediction, [-1]) # with tf.name_scope(name): # # the streaming accuracy (lookup and update tensors) # accuracy, accuracy_update = tf.metrics.accuracy(label, prediction, # name='accuracy') # # Compute a per-batch confusion # batch_confusion = tf.confusion_matrix(label, prediction, # num_classes=num_classes, # name='batch_confusion') # # Create an accumulator variable to hold the counts # confusion = tf.Variable(tf.zeros([num_classes, num_classes], # dtype=tf.int32), # name='confusion') # # Create the update op for doing a "+=" accumulation on the batch # confusion_update = confusion.assign(confusion + batch_confusion) # # Cast counts to float so tf.summary.image renormalizes to [0,255] # confusion_image = tf.reshape(tf.cast(confusion, tf.float32), # [1, num_classes, num_classes, 1]) # # Combine streaming accuracy and confusion matrix updates in one op # test_op = tf.group(accuracy_update, confusion_update) # # # tf.summary.image('confusion', confusion_image) # # tf.summary.scalar('accuracy', accuracy) # # return {'accuracy', accuracy} # # _get_streaming_metrics(true_class_probs, raw_pred[..., 5:7], num_classes, "classify_branch") # _get_streaming_metrics(extend_true_class_probs, raw_pred[..., 7:], num_classes + 1, "metric_branch") # if print_loss: # print_op = tf.print("[xy_loss, wh_loss, confidence_loss, class_loss]", # *[xy_loss, wh_loss, confidence_loss, class_loss], # output_stream=sys.stdout) # with tf.control_dependencies([print_op]): # loss = loss * 1.0 return loss
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) beta2_t = self.beta_2 ** t N_sma_max = 2 / (1 - self.beta_2) - 1 N_sma = N_sma_max - 2 * t * beta2_t / (1 - beta2_t) # apply weight decay if self.weight_decay != 0.: p_wd = p - self.weight_decay * lr * p else: p_wd = None if p_wd is None: p_ = p else: p_ = p_wd def gt_path(): step_size = lr * K.sqrt( (1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - self.beta_1 ** t) denom = K.sqrt(v_t) + self.epsilon p_t = p_ - step_size * (m_t / denom) return p_t def lt_path(): step_size = lr / (1 - self.beta_1 ** t) p_t = p_ - step_size * m_t return p_t p_t = K.switch(N_sma > 5, gt_path, lt_path) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3) ] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2] - pred_xy) * grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta * box_delta_scale) confidence_loss = object_mask * K.square(1-pred_confidence) + \ (1-object_mask) * K.square(0-pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs - pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def smoothL1(y_true, y_pred): HUBER_DELTA = 0.3 # 1.0 x = K.abs(y_true - y_pred) x = K.switch(x < HUBER_DELTA, 0.5 * x**2, HUBER_DELTA * (x - 0.5 * HUBER_DELTA)) return K.sum(x)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) # if print_loss: # grid_shapes = tf.Print(grid_shapes, [grid_shapes], message='grid_shapes: ') # y_true = tf.Print(y_true, [y_true], message='y_true: ') for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] """ grid 是 [[0,0],[1,0],[2,0],... [0,1],[1,1],[2,1],... [0,2],[1,2],[2,2],... ] grid_shapes= [[7,10],[14,20]] """ # NOTE 这里的pred_xy, pred_wh是用来计算ignore mask的, grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) # if print_loss: # # grid = tf.Print(grid, [grid], message='grid: ', summarize=50) # # grid_shapes = tf.Print(grid_shapes, [grid_shapes], message='grid_shapes: ', summarize=50) # # K.print_tensor(grid, message='grid: ') pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. # 这里也是吧 true xy wh 都转换到全局 y_true_xy = y_true[l][..., :2] y_true_wh = y_true[l][..., 2:4] # if print_loss: # y_true_xy = tf.Print(y_true_xy, [y_true_xy], message='y_true_xy: ') # y_true_wh = tf.Print(y_true_wh, [y_true_wh], message='y_true_wh: ') # NOTE 【0-1】乘上grid 的wh 得到 【0-7】 和 【0-10】之间的值,然后减去gird,得到相对grid的【0-1】之间的值 raw_true_xy = y_true_xy * grid_shapes[l][::-1] - grid # NOTE 【0-1】之间先转换成全局,再除anchor,再log。就是原始yolo wh预测的值 raw_true_wh = K.log(y_true_wh / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf # 这个box_loss_scale是 全局的【0-1】wh乘积 box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') # 这里计算 def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. # NOTE 这个的raw的意思就是原始yolo所输出的内容 xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss # if print_loss: # loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') return loss